diff options
| author | Philipp Tanlak <philipp.tanlak@gmail.com> | 2023-11-20 17:07:28 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-11-20 17:07:28 +0100 |
| commit | 13322edf37510b6d3bb68a853368fd1a0a67a105 (patch) | |
| tree | af325e934f95deeee44cbdd126100dff912f0e5a | |
| parent | 47d58e6e0ebc44e7c00dffcc3b892932dc70eb3a (diff) | |
Gracefully handle NaN and Inf values (#21)
| -rw-r--r-- | js.go | 33 | ||||
| -rw-r--r-- | js_lib_test.go | 8 | ||||
| -rw-r--r-- | js_test.go | 38 | ||||
| -rw-r--r-- | modules/jsonprint/jsonprint.go | 6 |
4 files changed, 72 insertions, 13 deletions
@@ -135,22 +135,29 @@ func vm(src string, imports Imports) (Exports, error) { exports[key] = obj.Get(key).Export() } - exports["__scrape"] = scrape(vm) + exports["__scrape"], err = scrape(vm) + if err != nil { + return nil, err + } return exports, nil } -func scrape(vm *goja.Runtime) ScrapeFunc { +func scrape(vm *goja.Runtime) (ScrapeFunc, error) { var lock sync.Mutex - defaultfn, err := vm.RunString("module.exports.default") + if v, err := vm.RunString("module.exports.default"); err != nil || goja.IsUndefined(v) { + return nil, errors.New("default export is not defined") + } + + defaultfn, err := vm.RunString("(o) => JSON.stringify(module.exports.default(o))") if err != nil { - return func(ScrapeParams) (any, error) { return nil, errors.New("no scrape function defined") } + return nil, fmt.Errorf("failed to create scrape function: %w", err) } scrapefn, ok := defaultfn.Export().(func(goja.FunctionCall) goja.Value) if !ok { - return func(ScrapeParams) (any, error) { return nil, errors.New("default export is not a function") } + return nil, errors.New("failed to export scrape funtion") } return func(p ScrapeParams) (any, error) { @@ -183,9 +190,19 @@ func scrape(vm *goja.Runtime) ScrapeFunc { o.Set("doc", doc) o.Set("absoluteURL", absoluteURL) - ret := scrapefn(goja.FunctionCall{Arguments: []goja.Value{o}}).Export() - return ret, nil - } + ret := scrapefn(goja.FunctionCall{Arguments: []goja.Value{o}}) + if goja.IsUndefined(ret) { + return nil, nil + } + + var result any + if err := json.Unmarshal([]byte(ret.String()), &result); err != nil { + log.Println(err) + return nil, err + } + + return result, nil + }, nil } func DocumentFromString(s string) (map[string]any, error) { diff --git a/js_lib_test.go b/js_lib_test.go index e375308..20e39da 100644 --- a/js_lib_test.go +++ b/js_lib_test.go @@ -20,6 +20,8 @@ func TestJSLibParse(t *testing.T) { const doc = parse('<div class=foo>Hello world</div>') export const text = doc.find(".foo").text() + + export default function () {} ` client := &http.Client{ @@ -45,6 +47,8 @@ func TestJSLibHTTPGet(t *testing.T) { export const status = res.status; export const error = res.error; export const headers = res.headers; + + export default function () {} ` client := &http.Client{ @@ -86,6 +90,8 @@ func TestJSLibHTTPPostForm(t *testing.T) { export const status = res.status; export const error = res.error; export const headers = res.headers; + + export default function () {} ` client := &http.Client{ @@ -134,6 +140,8 @@ func TestJSLibHTTPPostJSON(t *testing.T) { export const status = res.status; export const error = res.error; export const headers = res.headers; + + export default function () {} ` client := &http.Client{ @@ -94,6 +94,21 @@ func TestJSScrapeNull(t *testing.T) { require.Nil(t, result) } +func TestJSScrapeUndefined(t *testing.T) { + js := ` + export default function() { } + ` + exports, err := flyscrape.Compile(js, nil) + require.NoError(t, err) + + result, err := exports.Scrape(flyscrape.ScrapeParams{ + HTML: html, + URL: "http://localhost/", + }) + require.NoError(t, err) + require.Nil(t, result) +} + func TestJSScrapeString(t *testing.T) { js := ` export default function() { @@ -131,9 +146,26 @@ func TestJSScrapeArray(t *testing.T) { m, ok := result.([]any) require.True(t, ok) - require.Equal(t, int64(1), m[0]) - require.Equal(t, int64(2), m[1]) - require.Equal(t, int64(3), m[2]) + require.Equal(t, float64(1), m[0]) + require.Equal(t, float64(2), m[1]) + require.Equal(t, float64(3), m[2]) +} + +func TestJSScrapeNaN(t *testing.T) { + js := ` + export default function() { + return NaN + } + ` + exports, err := flyscrape.Compile(js, nil) + require.NoError(t, err) + + result, err := exports.Scrape(flyscrape.ScrapeParams{ + HTML: html, + URL: "http://localhost/", + }) + require.NoError(t, err) + require.Nil(t, result) } func TestJSCompileError(t *testing.T) { diff --git a/modules/jsonprint/jsonprint.go b/modules/jsonprint/jsonprint.go index a4be5c9..c40a8b9 100644 --- a/modules/jsonprint/jsonprint.go +++ b/modules/jsonprint/jsonprint.go @@ -41,9 +41,11 @@ func (m *Module) ReceiveResponse(resp *flyscrape.Response) { o := output{ URL: resp.Request.URL, Data: resp.Data, - Error: resp.Error, Timestamp: time.Now(), } + if resp.Error != nil { + o.Error = resp.Error.Error() + } fmt.Print(flyscrape.Prettify(o, " ")) } @@ -57,7 +59,7 @@ func (m *Module) Finalize() { type output struct { URL string `json:"url,omitempty"` Data any `json:"data,omitempty"` - Error error `json:"error,omitempty"` + Error string `json:"error,omitempty"` Timestamp time.Time `json:"timestamp,omitempty"` } |