diff options
| author | Philipp Tanlak <philipp.tanlak@gmail.com> | 2025-01-10 13:09:50 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-01-10 13:09:50 +0100 |
| commit | 40e02d5d28e59dbeb6134afdce12536c602e6aa5 (patch) | |
| tree | 24c84c4084cf72e552804397eecc9f5bb3c2d4be /modules/followlinks/followlinks_test.go | |
| parent | bf99c233a18c3165e0d4d251b41224e5bc6eb93d (diff) | |
Implement manual following (#82)
Diffstat (limited to 'modules/followlinks/followlinks_test.go')
| -rw-r--r-- | modules/followlinks/followlinks_test.go | 40 |
1 files changed, 37 insertions, 3 deletions
diff --git a/modules/followlinks/followlinks_test.go b/modules/followlinks/followlinks_test.go index af186f9..b09b0cd 100644 --- a/modules/followlinks/followlinks_test.go +++ b/modules/followlinks/followlinks_test.go @@ -57,7 +57,7 @@ func TestFollowSelector(t *testing.T) { mods := []flyscrape.Module{ &starturl.Module{URL: "http://www.example.com/foo/bar"}, &followlinks.Module{ - Follow: []string{".next a[href]"}, + Follow: &[]string{".next a[href]"}, }, hook.Module{ AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper { @@ -92,7 +92,7 @@ func TestFollowDataAttr(t *testing.T) { mods := []flyscrape.Module{ &starturl.Module{URL: "http://www.example.com/foo/bar"}, &followlinks.Module{ - Follow: []string{"[data-url]"}, + Follow: &[]string{"[data-url]"}, }, hook.Module{ AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper { @@ -125,7 +125,7 @@ func TestFollowMultiple(t *testing.T) { mods := []flyscrape.Module{ &starturl.Module{URL: "http://www.example.com/foo/bar"}, &followlinks.Module{ - Follow: []string{"a.prev", "a.next"}, + Follow: &[]string{"a.prev", "a.next"}, }, hook.Module{ AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper { @@ -151,3 +151,37 @@ func TestFollowMultiple(t *testing.T) { require.Contains(t, urls, "http://www.example.com/foo/a") require.Contains(t, urls, "http://www.example.com/foo/b") } + +func TestFollowNoFollow(t *testing.T) { + var urls []string + var mu sync.Mutex + + mods := []flyscrape.Module{ + &starturl.Module{URL: "http://www.example.com/foo/bar"}, + &followlinks.Module{ + Follow: &[]string{}, + }, + hook.Module{ + AdaptTransportFn: func(rt http.RoundTripper) http.RoundTripper { + return flyscrape.MockTransport(200, ` + <a href="/baz">Baz</a> + <a href="baz">Baz</a> + <div class="next"> + <a href="http://www.google.com">Google</a> + </div>`) + }, + ReceiveResponseFn: func(r *flyscrape.Response) { + mu.Lock() + urls = append(urls, r.Request.URL) + mu.Unlock() + }, + }, + } + + scraper := flyscrape.NewScraper() + scraper.Modules = mods + scraper.Run() + + require.Len(t, urls, 1) + require.Contains(t, urls, "http://www.example.com/foo/bar") +} |