// This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. package flyscrape_test import ( "encoding/json" "testing" "github.com/dop251/goja" "github.com/philippta/flyscrape" "github.com/stretchr/testify/require" ) var html = `

headline

paragraph

` var script = ` export const config = { url: "https://localhost/", } export default function({ doc, url }) { return { headline: doc.find("h1").text(), body: doc.find("p").text(), url: url, } } ` func TestJSScrape(t *testing.T) { exports, err := flyscrape.Compile(script, nil) require.NoError(t, err) require.NotNil(t, exports) require.NotEmpty(t, exports.Config) result, err := exports.Scrape(flyscrape.ScrapeParams{ HTML: html, URL: "http://localhost/", }) require.NoError(t, err) m, ok := result.(map[string]any) require.True(t, ok) require.Equal(t, "headline", m["headline"]) require.Equal(t, "paragraph", m["body"]) require.Equal(t, "http://localhost/", m["url"]) } func TestJSScrapeObject(t *testing.T) { js := ` export default function() { return {foo: "bar"} } ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) result, err := exports.Scrape(flyscrape.ScrapeParams{ HTML: html, URL: "http://localhost/", }) require.NoError(t, err) m, ok := result.(map[string]any) require.True(t, ok) require.Equal(t, "bar", m["foo"]) } func TestJSScrapeNull(t *testing.T) { js := ` export default function() { return null } ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) result, err := exports.Scrape(flyscrape.ScrapeParams{ HTML: html, URL: "http://localhost/", }) require.NoError(t, err) require.Nil(t, result) } func TestJSScrapeUndefined(t *testing.T) { js := ` export default function() { } ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) result, err := exports.Scrape(flyscrape.ScrapeParams{ HTML: html, URL: "http://localhost/", }) require.NoError(t, err) require.Nil(t, result) } func TestJSScrapeString(t *testing.T) { js := ` export default function() { return "foo" } ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) result, err := exports.Scrape(flyscrape.ScrapeParams{ HTML: html, URL: "http://localhost/", }) require.NoError(t, err) m, ok := result.(string) require.True(t, ok) require.Equal(t, "foo", m) } func TestJSScrapeArray(t *testing.T) { js := ` export default function() { return [1,2,3] } ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) result, err := exports.Scrape(flyscrape.ScrapeParams{ HTML: html, URL: "http://localhost/", }) require.NoError(t, err) m, ok := result.([]any) require.True(t, ok) require.Equal(t, float64(1), m[0]) require.Equal(t, float64(2), m[1]) require.Equal(t, float64(3), m[2]) } func TestJSScrapeNaN(t *testing.T) { js := ` export default function() { return NaN } ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) result, err := exports.Scrape(flyscrape.ScrapeParams{ HTML: html, URL: "http://localhost/", }) require.NoError(t, err) require.Nil(t, result) } func TestJSCompileError(t *testing.T) { exports, err := flyscrape.Compile("import foo;", nil) require.Error(t, err) require.Nil(t, exports) var terr flyscrape.TransformError require.ErrorAs(t, err, &terr) require.Equal(t, terr, flyscrape.TransformError{ Line: 1, Column: 10, Text: `Expected "from" but found ";"`, }) } func TestJSConfig(t *testing.T) { js := ` export const config = { url: 'http://localhost/', depth: 5, allowedDomains: ['example.com'], } export default function() {} ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) require.NotNil(t, exports) require.NotEmpty(t, exports.Config()) type config struct { URL string `json:"url"` Depth int `json:"depth"` AllowedDomains []string `json:"allowedDomains"` } var cfg config err = json.Unmarshal(exports.Config(), &cfg) require.NoError(t, err) require.Equal(t, config{ URL: "http://localhost/", Depth: 5, AllowedDomains: []string{"example.com"}, }, cfg) } func TestJSImports(t *testing.T) { js := ` import A from "pkg-a" import { bar } from "pkg-a/pkg-b" export const config = {} export default function() {} export const a = A.foo export const b = bar() ` imports := flyscrape.Imports{ "pkg-a": map[string]any{ "foo": 10, }, "pkg-a/pkg-b": map[string]any{ "bar": func() string { return "baz" }, }, } exports, err := flyscrape.Compile(js, imports) require.NoError(t, err) require.NotNil(t, exports) require.Equal(t, int64(10), exports["a"].(int64)) require.Equal(t, "baz", exports["b"].(string)) } func TestJSArbitraryFunction(t *testing.T) { js := ` export const config = {} export default function() {} export function foo() { return "bar"; } ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) require.NotNil(t, exports) foo := func() string { fn := exports["foo"].(func(goja.FunctionCall) goja.Value) return fn(goja.FunctionCall{}).String() } require.Equal(t, "bar", foo()) } func TestJSArbitraryConstString(t *testing.T) { js := ` export const config = {} export default function() {} export const foo = "bar" ` exports, err := flyscrape.Compile(js, nil) require.NoError(t, err) require.NotNil(t, exports) require.Equal(t, "bar", exports["foo"].(string)) }