From 062b36fe5725d1267c66db2e506b4131d78ce772 Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Fri, 11 Aug 2023 18:31:20 +0200 Subject: simplify project structure --- scrape_test.go | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 scrape_test.go (limited to 'scrape_test.go') diff --git a/scrape_test.go b/scrape_test.go new file mode 100644 index 0000000..5d6e578 --- /dev/null +++ b/scrape_test.go @@ -0,0 +1,67 @@ +package flyscrape_test + +import ( + "sort" + "testing" + + "flyscrape" + + "github.com/stretchr/testify/require" +) + +func TestScrape(t *testing.T) { + svc := flyscrape.Scraper{ + Concurrency: 10, + ScrapeOptions: flyscrape.ScrapeOptions{ + URL: "http://example.com/foo/bar", + Depth: 1, + }, + ScrapeFunc: func(params flyscrape.ScrapeParams) (any, error) { + return map[string]any{ + "url": params.URL, + }, nil + }, + FetchFunc: func(url string) (string, error) { + return ` + + Baz + Baz + Google + + `, nil + }, + } + + var urls []string + for res := range svc.Scrape() { + urls = append(urls, res.URL) + } + sort.Strings(urls) + + require.Len(t, urls, 4) + require.Equal(t, "http://example.com/baz", urls[0]) + require.Equal(t, "http://example.com/foo/bar", urls[1]) + require.Equal(t, "http://example.com/foo/baz", urls[2]) + require.Equal(t, "http://www.google.com/", urls[3]) +} + +func TestFindLinks(t *testing.T) { + origin := "http://example.com/foo/bar" + html := ` + + + Baz + Baz + Google + Google + Anchor + + ` + + links := flyscrape.Links(html, origin) + require.Len(t, links, 4) + require.Equal(t, "http://example.com/baz", links[0]) + require.Equal(t, "http://example.com/foo/baz", links[1]) + require.Equal(t, "http://www.google.com/", links[2]) + require.Equal(t, "http://example.com/foo", links[3]) +} -- cgit v1.2.3