From bf99c233a18c3165e0d4d251b41224e5bc6eb93d Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Fri, 10 Jan 2025 12:49:32 +0100 Subject: Implement nested scraping (#81) --- js_test.go | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) (limited to 'js_test.go') diff --git a/js_test.go b/js_test.go index 0aeb9cd..4b08720 100644 --- a/js_test.go +++ b/js_test.go @@ -168,6 +168,98 @@ func TestJSScrapeNaN(t *testing.T) { require.Nil(t, result) } +func TestJSScrapeParamURL(t *testing.T) { + js := ` + export default function({ url }) { + return url; + } + ` + exports, err := flyscrape.Compile(js, nil) + require.NoError(t, err) + + result, err := exports.Scrape(flyscrape.ScrapeParams{ + HTML: html, + URL: "http://localhost/", + }) + require.NoError(t, err) + require.Equal(t, "http://localhost/", result) +} + +func TestJSScrapeParamAbsoluteURL(t *testing.T) { + js := ` + export default function({ absoluteURL }) { + return absoluteURL("/foo"); + } + ` + exports, err := flyscrape.Compile(js, nil) + require.NoError(t, err) + + result, err := exports.Scrape(flyscrape.ScrapeParams{ + HTML: html, + URL: "http://localhost/", + }) + require.NoError(t, err) + require.Equal(t, "http://localhost/foo", result) +} + +func TestJSScrapeParamScrape(t *testing.T) { + js := ` + export default function({ scrape }) { + return scrape("/foo", function({ url }) { + return { + url: url, + foo: "bar", + }; + }); + } + ` + exports, err := flyscrape.Compile(js, nil) + require.NoError(t, err) + + result, err := exports.Scrape(flyscrape.ScrapeParams{ + HTML: html, + URL: "http://localhost/", + Process: func(url string) ([]byte, error) { + return nil, nil + }, + }) + require.NoError(t, err) + require.Equal(t, map[string]any{ + "url": "http://localhost/foo", + "foo": "bar", + }, result) +} + +func TestJSScrapeParamScrapeDeep(t *testing.T) { + js := ` + export default function({ scrape }) { + return scrape("/foo/", function({ url, scrape }) { + return { + url: url, + deep: scrape("bar", function({ url }) { + return url; + }), + }; + }); + } + ` + exports, err := flyscrape.Compile(js, nil) + require.NoError(t, err) + + result, err := exports.Scrape(flyscrape.ScrapeParams{ + HTML: html, + URL: "http://localhost/", + Process: func(url string) ([]byte, error) { + return nil, nil + }, + }) + require.NoError(t, err) + require.Equal(t, map[string]any{ + "url": "http://localhost/foo/", + "deep": "http://localhost/foo/bar", + }, result) +} + func TestJSCompileError(t *testing.T) { exports, err := flyscrape.Compile("import foo;", nil) require.Error(t, err) -- cgit v1.2.3