// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package flyscrape_test
import (
"encoding/json"
"testing"
"github.com/dop251/goja"
"github.com/philippta/flyscrape"
"github.com/stretchr/testify/require"
)
var html = `
headline
paragraph
`
var script = `
export const config = {
url: "https://localhost/",
}
export default function({ doc, url }) {
return {
headline: doc.find("h1").text(),
body: doc.find("p").text(),
url: url,
}
}
`
func TestJSScrape(t *testing.T) {
exports, err := flyscrape.Compile(script, nil)
require.NoError(t, err)
require.NotNil(t, exports)
require.NotEmpty(t, exports.Config)
result, err := exports.Scrape(flyscrape.ScrapeParams{
HTML: html,
URL: "http://localhost/",
})
require.NoError(t, err)
m, ok := result.(map[string]any)
require.True(t, ok)
require.Equal(t, "headline", m["headline"])
require.Equal(t, "paragraph", m["body"])
require.Equal(t, "http://localhost/", m["url"])
}
func TestJSScrapeObject(t *testing.T) {
js := `
export default function() {
return {foo: "bar"}
}
`
exports, err := flyscrape.Compile(js, nil)
require.NoError(t, err)
result, err := exports.Scrape(flyscrape.ScrapeParams{
HTML: html,
URL: "http://localhost/",
})
require.NoError(t, err)
m, ok := result.(map[string]any)
require.True(t, ok)
require.Equal(t, "bar", m["foo"])
}
func TestJSScrapeNull(t *testing.T) {
js := `
export default function() {
return null
}
`
exports, err := flyscrape.Compile(js, nil)
require.NoError(t, err)
result, err := exports.Scrape(flyscrape.ScrapeParams{
HTML: html,
URL: "http://localhost/",
})
require.NoError(t, err)
require.Nil(t, result)
}
func TestJSScrapeUndefined(t *testing.T) {
js := `
export default function() { }
`
exports, err := flyscrape.Compile(js, nil)
require.NoError(t, err)
result, err := exports.Scrape(flyscrape.ScrapeParams{
HTML: html,
URL: "http://localhost/",
})
require.NoError(t, err)
require.Nil(t, result)
}
func TestJSScrapeString(t *testing.T) {
js := `
export default function() {
return "foo"
}
`
exports, err := flyscrape.Compile(js, nil)
require.NoError(t, err)
result, err := exports.Scrape(flyscrape.ScrapeParams{
HTML: html,
URL: "http://localhost/",
})
require.NoError(t, err)
m, ok := result.(string)
require.True(t, ok)
require.Equal(t, "foo", m)
}
func TestJSScrapeArray(t *testing.T) {
js := `
export default function() {
return [1,2,3]
}
`
exports, err := flyscrape.Compile(js, nil)
require.NoError(t, err)
result, err := exports.Scrape(flyscrape.ScrapeParams{
HTML: html,
URL: "http://localhost/",
})
require.NoError(t, err)
m, ok := result.([]any)
require.True(t, ok)
require.Equal(t, float64(1), m[0])
require.Equal(t, float64(2), m[1])
require.Equal(t, float64(3), m[2])
}
func TestJSScrapeNaN(t *testing.T) {
js := `
export default function() {
return NaN
}
`
exports, err := flyscrape.Compile(js, nil)
require.NoError(t, err)
result, err := exports.Scrape(flyscrape.ScrapeParams{
HTML: html,
URL: "http://localhost/",
})
require.NoError(t, err)
require.Nil(t, result)
}
func TestJSScrapeParamURL(t *testing.T) {
js := `
export default function({ url }) {
return url;
}
`
exports, err := flyscrape.Compile(js, nil)
require.NoError(t, err)
result, err := exports.Scrape(flyscrape.ScrapeParams{
HTML: html,
URL: "http://localhost/",
})
require.NoError(t, err)
require.Equal(t, "http://localhost/", result)
}
func TestJSScrapeParamAbsoluteURL(t *testing.T) {
js := `
export default function({ absoluteURL }) {
return absoluteURL("/foo");
}
`
exports, err := flyscrape.Compile(js, nil)
require.NoError(t, err)
result, err := exports.Scrape(flyscrape.ScrapeParams{
HTML: html,
URL: "http://localhost/",
})
require.NoError(t, err)
require.Equal(t, "http://localhost/foo", result)
}
func TestJSScrapeParamScrape(t *testing.T) {
js := `
export default function({ scrape }) {
return scrape("/foo", function({ url }) {
return {
url: url,
foo: "bar",
};
});
}
`
exports, err := flyscrape.Compile(js, nil)
require.NoError(t, err)
result, err := exports.Scrape(flyscrape.ScrapeParams{
HTML: html,
URL: "http://localhost/",
Process: func(url string) ([]byte, error) {
return nil, nil
},
})
require.NoError(t, err)
require.Equal(t, map[string]any{
"url": "http://localhost/foo",
"foo": "bar",
}, result)
}
func TestJSScrapeParamScrapeDeep(t *testing.T) {
js := `
export default function({ scrape }) {
return scrape("/foo/", function({ url, scrape }) {
return {
url: url,
deep: scrape("bar", function({ url }) {
return url;
}),
};
});
}
`
exports, err := flyscrape.Compile(js, nil)
require.NoError(t, err)
result, err := exports.Scrape(flyscrape.ScrapeParams{
HTML: html,
URL: "http://localhost/",
Process: func(url string) ([]byte, error) {
return nil, nil
},
})
require.NoError(t, err)
require.Equal(t, map[string]any{
"url": "http://localhost/foo/",
"deep": "http://localhost/foo/bar",
}, result)
}
func TestJSCompileError(t *testing.T) {
exports, err := flyscrape.Compile("import foo;", nil)
require.Error(t, err)
require.Nil(t, exports)
var terr flyscrape.TransformError
require.ErrorAs(t, err, &terr)
require.Equal(t, terr, flyscrape.TransformError{
Line: 1,
Column: 10,
Text: `Expected "from" but found ";"`,
})
}
func TestJSConfig(t *testing.T) {
js := `
export const config = {
url: 'http://localhost/',
depth: 5,
allowedDomains: ['example.com'],
}
export default function() {}
`
exports, err := flyscrape.Compile(js, nil)
require.NoError(t, err)
require.NotNil(t, exports)
require.NotEmpty(t, exports.Config())
type config struct {
URL string `json:"url"`
Depth int `json:"depth"`
AllowedDomains []string `json:"allowedDomains"`
}
var cfg config
err = json.Unmarshal(exports.Config(), &cfg)
require.NoError(t, err)
require.Equal(t, config{
URL: "http://localhost/",
Depth: 5,
AllowedDomains: []string{"example.com"},
}, cfg)
}
func TestJSImports(t *testing.T) {
js := `
import A from "flyscrape"
import { bar } from "flyscrape/foo"
export const config = {}
export default function() {}
export const a = A.foo
export const b = bar()
`
imports := flyscrape.Imports{
"flyscrape": map[string]any{
"foo": 10,
},
"flyscrape/foo": map[string]any{
"bar": func() string {
return "baz"
},
},
}
exports, err := flyscrape.Compile(js, imports)
require.NoError(t, err)
require.NotNil(t, exports)
require.Equal(t, int64(10), exports["a"].(int64))
require.Equal(t, "baz", exports["b"].(string))
}
func TestJSArbitraryFunction(t *testing.T) {
js := `
export const config = {}
export default function() {}
export function foo() {
return "bar";
}
`
exports, err := flyscrape.Compile(js, nil)
require.NoError(t, err)
require.NotNil(t, exports)
foo := func() string {
fn := exports["foo"].(func(goja.FunctionCall) goja.Value)
return fn(goja.FunctionCall{}).String()
}
require.Equal(t, "bar", foo())
}
func TestJSArbitraryConstString(t *testing.T) {
js := `
export const config = {}
export default function() {}
export const foo = "bar"
`
exports, err := flyscrape.Compile(js, nil)
require.NoError(t, err)
require.NotNil(t, exports)
require.Equal(t, "bar", exports["foo"].(string))
}