diff options
| author | Philipp Tanlak <philipp.tanlak@gmail.com> | 2023-08-11 18:31:20 +0200 |
|---|---|---|
| committer | Philipp Tanlak <philipp.tanlak@gmail.com> | 2023-08-11 18:31:20 +0200 |
| commit | 062b36fe5725d1267c66db2e506b4131d78ce772 (patch) | |
| tree | 998e5260feb1babac8dae512b56d67d8f20f7266 /scrape_test.go | |
| parent | 7e4cf39a0ba6ccbd5cc036700a8b1ff9358ecc3d (diff) | |
simplify project structure
Diffstat (limited to 'scrape_test.go')
| -rw-r--r-- | scrape_test.go | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/scrape_test.go b/scrape_test.go new file mode 100644 index 0000000..5d6e578 --- /dev/null +++ b/scrape_test.go @@ -0,0 +1,67 @@ +package flyscrape_test + +import ( + "sort" + "testing" + + "flyscrape" + + "github.com/stretchr/testify/require" +) + +func TestScrape(t *testing.T) { + svc := flyscrape.Scraper{ + Concurrency: 10, + ScrapeOptions: flyscrape.ScrapeOptions{ + URL: "http://example.com/foo/bar", + Depth: 1, + }, + ScrapeFunc: func(params flyscrape.ScrapeParams) (any, error) { + return map[string]any{ + "url": params.URL, + }, nil + }, + FetchFunc: func(url string) (string, error) { + return `<html> + <body> + <a href="/baz">Baz</a> + <a href="baz">Baz</a> + <a href="http://www.google.com">Google</a> + </body> + </html>`, nil + }, + } + + var urls []string + for res := range svc.Scrape() { + urls = append(urls, res.URL) + } + sort.Strings(urls) + + require.Len(t, urls, 4) + require.Equal(t, "http://example.com/baz", urls[0]) + require.Equal(t, "http://example.com/foo/bar", urls[1]) + require.Equal(t, "http://example.com/foo/baz", urls[2]) + require.Equal(t, "http://www.google.com/", urls[3]) +} + +func TestFindLinks(t *testing.T) { + origin := "http://example.com/foo/bar" + html := ` + <html> + <body> + <a href="/baz">Baz</a> + <a href="baz">Baz</a> + <a href="http://www.google.com">Google</a> + <a href="javascript:void(0)">Google</a> + <a href="/foo#hello">Anchor</a> + </body> + </html>` + + links := flyscrape.Links(html, origin) + require.Len(t, links, 4) + require.Equal(t, "http://example.com/baz", links[0]) + require.Equal(t, "http://example.com/foo/baz", links[1]) + require.Equal(t, "http://www.google.com/", links[2]) + require.Equal(t, "http://example.com/foo", links[3]) +} |