summaryrefslogtreecommitdiff
path: root/scrape_test.go
diff options
context:
space:
mode:
Diffstat (limited to 'scrape_test.go')
-rw-r--r--scrape_test.go67
1 files changed, 67 insertions, 0 deletions
diff --git a/scrape_test.go b/scrape_test.go
new file mode 100644
index 0000000..5d6e578
--- /dev/null
+++ b/scrape_test.go
@@ -0,0 +1,67 @@
+package flyscrape_test
+
+import (
+ "sort"
+ "testing"
+
+ "flyscrape"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestScrape(t *testing.T) {
+ svc := flyscrape.Scraper{
+ Concurrency: 10,
+ ScrapeOptions: flyscrape.ScrapeOptions{
+ URL: "http://example.com/foo/bar",
+ Depth: 1,
+ },
+ ScrapeFunc: func(params flyscrape.ScrapeParams) (any, error) {
+ return map[string]any{
+ "url": params.URL,
+ }, nil
+ },
+ FetchFunc: func(url string) (string, error) {
+ return `<html>
+ <body>
+ <a href="/baz">Baz</a>
+ <a href="baz">Baz</a>
+ <a href="http://www.google.com">Google</a>
+ </body>
+ </html>`, nil
+ },
+ }
+
+ var urls []string
+ for res := range svc.Scrape() {
+ urls = append(urls, res.URL)
+ }
+ sort.Strings(urls)
+
+ require.Len(t, urls, 4)
+ require.Equal(t, "http://example.com/baz", urls[0])
+ require.Equal(t, "http://example.com/foo/bar", urls[1])
+ require.Equal(t, "http://example.com/foo/baz", urls[2])
+ require.Equal(t, "http://www.google.com/", urls[3])
+}
+
+func TestFindLinks(t *testing.T) {
+ origin := "http://example.com/foo/bar"
+ html := `
+ <html>
+ <body>
+ <a href="/baz">Baz</a>
+ <a href="baz">Baz</a>
+ <a href="http://www.google.com">Google</a>
+ <a href="javascript:void(0)">Google</a>
+ <a href="/foo#hello">Anchor</a>
+ </body>
+ </html>`
+
+ links := flyscrape.Links(html, origin)
+ require.Len(t, links, 4)
+ require.Equal(t, "http://example.com/baz", links[0])
+ require.Equal(t, "http://example.com/foo/baz", links[1])
+ require.Equal(t, "http://www.google.com/", links[2])
+ require.Equal(t, "http://example.com/foo", links[3])
+}