// This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. package flyscrape_test import ( "encoding/json" "testing" "github.com/dop251/goja" "github.com/philippta/flyscrape" "github.com/stretchr/testify/require" ) var html = `

headline

paragraph

` var script = ` export const config = { url: "https://localhost/", } export default function({ doc, url }) { return { headline: doc.find("h1").text(), body: doc.find("p").text(), url: url, } } ` func TestJSScrape(t *testing.T) { exports, err := flyscrape.Compile(script, nil) require.NoError(t, err) require.NotNil(t, exports) require.NotEmpty(t, exports.Config) result, err := exports.Scrape(flyscrape.ScrapeParams{ HTML: html, URL: "http://localhost/", }) require.NoError(t, err) m, ok := result.(map[string]any) require.True(t, ok) require.Equal(t, "headline", m["headline"]) require.Equal(t, "paragraph", m["body"]) require.Equal(t, "http://localhost/", m["url"]) } func TestJSScrapeObject(t *testing.T) { js := ` export default function() { return {foo: "bar"} } ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) result, err := exports.Scrape(flyscrape.ScrapeParams{ HTML: html, URL: "http://localhost/", }) require.NoError(t, err) m, ok := result.(map[string]any) require.True(t, ok) require.Equal(t, "bar", m["foo"]) } func TestJSScrapeNull(t *testing.T) { js := ` export default function() { return null } ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) result, err := exports.Scrape(flyscrape.ScrapeParams{ HTML: html, URL: "http://localhost/", }) require.NoError(t, err) require.Nil(t, result) } func TestJSScrapeUndefined(t *testing.T) { js := ` export default function() { } ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) result, err := exports.Scrape(flyscrape.ScrapeParams{ HTML: html, URL: "http://localhost/", }) require.NoError(t, err) require.Nil(t, result) } func TestJSScrapeString(t *testing.T) { js := ` export default function() { return "foo" } ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) result, err := exports.Scrape(flyscrape.ScrapeParams{ HTML: html, URL: "http://localhost/", }) require.NoError(t, err) m, ok := result.(string) require.True(t, ok) require.Equal(t, "foo", m) } func TestJSScrapeArray(t *testing.T) { js := ` export default function() { return [1,2,3] } ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) result, err := exports.Scrape(flyscrape.ScrapeParams{ HTML: html, URL: "http://localhost/", }) require.NoError(t, err) m, ok := result.([]any) require.True(t, ok) require.Equal(t, float64(1), m[0]) require.Equal(t, float64(2), m[1]) require.Equal(t, float64(3), m[2]) } func TestJSScrapeNaN(t *testing.T) { js := ` export default function() { return NaN } ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) result, err := exports.Scrape(flyscrape.ScrapeParams{ HTML: html, URL: "http://localhost/", }) require.NoError(t, err) require.Nil(t, result) } func TestJSScrapeParamURL(t *testing.T) { js := ` export default function({ url }) { return url; } ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) result, err := exports.Scrape(flyscrape.ScrapeParams{ HTML: html, URL: "http://localhost/", }) require.NoError(t, err) require.Equal(t, "http://localhost/", result) } func TestJSScrapeParamAbsoluteURL(t *testing.T) { js := ` export default function({ absoluteURL }) { return absoluteURL("/foo"); } ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) result, err := exports.Scrape(flyscrape.ScrapeParams{ HTML: html, URL: "http://localhost/", }) require.NoError(t, err) require.Equal(t, "http://localhost/foo", result) } func TestJSScrapeParamScrape(t *testing.T) { js := ` export default function({ scrape }) { return scrape("/foo", function({ url }) { return { url: url, foo: "bar", }; }); } ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) result, err := exports.Scrape(flyscrape.ScrapeParams{ HTML: html, URL: "http://localhost/", Process: func(url string) ([]byte, error) { return nil, nil }, }) require.NoError(t, err) require.Equal(t, map[string]any{ "url": "http://localhost/foo", "foo": "bar", }, result) } func TestJSScrapeParamScrapeDeep(t *testing.T) { js := ` export default function({ scrape }) { return scrape("/foo/", function({ url, scrape }) { return { url: url, deep: scrape("bar", function({ url }) { return url; }), }; }); } ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) result, err := exports.Scrape(flyscrape.ScrapeParams{ HTML: html, URL: "http://localhost/", Process: func(url string) ([]byte, error) { return nil, nil }, }) require.NoError(t, err) require.Equal(t, map[string]any{ "url": "http://localhost/foo/", "deep": "http://localhost/foo/bar", }, result) } func TestJSCompileError(t *testing.T) { exports, err := flyscrape.Compile("import foo;", nil) require.Error(t, err) require.Nil(t, exports) var terr flyscrape.TransformError require.ErrorAs(t, err, &terr) require.Equal(t, terr, flyscrape.TransformError{ Line: 1, Column: 10, Text: `Expected "from" but found ";"`, }) } func TestJSConfig(t *testing.T) { js := ` export const config = { url: 'http://localhost/', depth: 5, allowedDomains: ['example.com'], } export default function() {} ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) require.NotNil(t, exports) require.NotEmpty(t, exports.Config()) type config struct { URL string `json:"url"` Depth int `json:"depth"` AllowedDomains []string `json:"allowedDomains"` } var cfg config err = json.Unmarshal(exports.Config(), &cfg) require.NoError(t, err) require.Equal(t, config{ URL: "http://localhost/", Depth: 5, AllowedDomains: []string{"example.com"}, }, cfg) } func TestJSImports(t *testing.T) { js := ` import A from "flyscrape" import { bar } from "flyscrape/foo" export const config = {} export default function() {} export const a = A.foo export const b = bar() ` imports := flyscrape.Imports{ "flyscrape": map[string]any{ "foo": 10, }, "flyscrape/foo": map[string]any{ "bar": func() string { return "baz" }, }, } exports, err := flyscrape.Compile(js, imports) require.NoError(t, err) require.NotNil(t, exports) require.Equal(t, int64(10), exports["a"].(int64)) require.Equal(t, "baz", exports["b"].(string)) } func TestJSArbitraryFunction(t *testing.T) { js := ` export const config = {} export default function() {} export function foo() { return "bar"; } ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) require.NotNil(t, exports) foo := func() string { fn := exports["foo"].(func(goja.FunctionCall) goja.Value) return fn(goja.FunctionCall{}).String() } require.Equal(t, "bar", foo()) } func TestJSArbitraryConstString(t *testing.T) { js := ` export const config = {} export default function() {} export const foo = "bar" ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) require.NotNil(t, exports) require.Equal(t, "bar", exports["foo"].(string)) }