// This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. package starturl_test import ( "net/http" "testing" "github.com/philippta/flyscrape" "github.com/philippta/flyscrape/modules/hook" "github.com/philippta/flyscrape/modules/starturl" "github.com/stretchr/testify/require" ) func TestStartURL(t *testing.T) { var url string var depth int mods := []flyscrape.Module{ &starturl.Module{URL: "http://www.example.com/foo/bar"}, hook.Module{ AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper { return flyscrape.MockTransport(200, "") }, BuildRequestFn: func(r *flyscrape.Request) { url = r.URL depth = r.Depth }, }, } scraper := flyscrape.NewScraper() scraper.Modules = mods scraper.Run() require.Equal(t, "http://www.example.com/foo/bar", url) require.Equal(t, 0, depth) } func TestStartURL_MultipleStartingURLs(t *testing.T) { testCases := []struct { name string startURLModFn func() *starturl.Module urls []string }{ { name: ".URL and .URLs", startURLModFn: func() *starturl.Module { return &starturl.Module{ URL: "http://www.example.com/foo", URLs: []string{ "http://www.example.com/bar", "http://www.example.com/baz", }, } }, urls: []string{ "http://www.example.com/foo", "http://www.example.com/bar", "http://www.example.com/baz", }, }, { name: "only .URL", startURLModFn: func() *starturl.Module { return &starturl.Module{ URL: "http://www.example.com/foo", } }, urls: []string{ "http://www.example.com/foo", }, }, { name: "only .URLs", startURLModFn: func() *starturl.Module { return &starturl.Module{ URLs: []string{ "http://www.example.com/bar", "http://www.example.com/baz", }, } }, urls: []string{ "http://www.example.com/bar", "http://www.example.com/baz", }, }, { name: "empty", startURLModFn: func() *starturl.Module { return &starturl.Module{} }, urls: []string{}, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { urls := []string{} mods := []flyscrape.Module{ tc.startURLModFn(), hook.Module{ AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper { return flyscrape.MockTransport(http.StatusOK, "") }, BuildRequestFn: func(r *flyscrape.Request) { urls = append(urls, r.URL) }, }, } scraper := flyscrape.NewScraper() scraper.Modules = mods scraper.Run() require.ElementsMatch(t, tc.urls, urls) }) } }