From 7e4cf39a0ba6ccbd5cc036700a8b1ff9358ecc3d Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Thu, 10 Aug 2023 18:18:01 +0200 Subject: improve --- js/js.go | 102 +++++++++++++++++++++++++-------------------------------------- 1 file changed, 41 insertions(+), 61 deletions(-) (limited to 'js/js.go') diff --git a/js/js.go b/js/js.go index c5b8818..dd9c7a1 100644 --- a/js/js.go +++ b/js/js.go @@ -6,9 +6,10 @@ import ( "fmt" "math/rand" "os" - "path/filepath" + "strings" "time" + "flyscrape/flyscrape" "flyscrape/js/jsbundle" "github.com/evanw/esbuild/pkg/api" @@ -19,53 +20,23 @@ func init() { rand.Seed(time.Now().UnixNano()) } -type RunOptions struct { - HTML string -} - -type RunFunc func(RunOptions) any - -type Options struct { - URL string `json:"url"` -} - -func Compile(file string) (*Options, RunFunc, error) { +func Compile(file string) (*flyscrape.ScrapeOptions, flyscrape.ScrapeFunc, error) { src, err := build(file) if err != nil { return nil, nil, err } - os.WriteFile("out.js", []byte(src), 0o644) return vm(src) } func build(file string) (string, error) { - dir, err := os.MkdirTemp("", "flyscrape") + data, err := os.ReadFile(file) if err != nil { - return "", err + return "", fmt.Errorf("read %q: %w", file, err) } - defer os.RemoveAll(dir) - tmpfile := filepath.Join(dir, "flyscrape.js") - if err := os.WriteFile(tmpfile, jsbundle.Flyscrape, 0o644); err != nil { - return "", err - } - - resolve := api.Plugin{ - Name: "flyscrape", - Setup: func(build api.PluginBuild) { - build.OnResolve(api.OnResolveOptions{ - Filter: "^flyscrape$", - }, func(ora api.OnResolveArgs) (api.OnResolveResult, error) { - return api.OnResolveResult{Path: tmpfile}, nil - }) - }, - } - - res := api.Build(api.BuildOptions{ - EntryPoints: []string{file}, - Bundle: true, - Platform: api.PlatformNode, - Plugins: []api.Plugin{resolve}, + res := api.Transform(string(data), api.TransformOptions{ + Platform: api.PlatformBrowser, + Format: api.FormatIIFE, }) var errs []error @@ -76,51 +47,54 @@ func build(file string) (string, error) { return "", errors.Join(errs...) } - out := string(res.OutputFiles[0].Contents) - return out, nil + return string(res.Code), nil } -func vm(src string) (*Options, RunFunc, error) { - os.WriteFile("out.js", []byte(src), 0o644) - +func vm(src string) (*flyscrape.ScrapeOptions, flyscrape.ScrapeFunc, error) { ctx := v8.NewContext() ctx.RunScript("var module = {}", "main.js") - if _, err := ctx.RunScript(src, "main.js"); err != nil { + + if _, err := ctx.RunScript(removeIIFE(jsbundle.Flyscrape), "main.js"); err != nil { return nil, nil, fmt.Errorf("run bundled js: %w", err) } - - val, err := ctx.RunScript("module.exports.options", "main.js") - if err != nil { - return nil, nil, fmt.Errorf("export options: %w", err) + if _, err := ctx.RunScript(`const require = () => require_flyscrape();`, "main.js"); err != nil { + return nil, nil, fmt.Errorf("define require: %w", err) } - options, err := val.AsObject() - if err != nil { - return nil, nil, fmt.Errorf("cast options as object: %w", err) + if _, err := ctx.RunScript(removeIIFE(src), "main.js"); err != nil { + return nil, nil, fmt.Errorf("userscript: %w", err) } - var opts Options - url, err := options.Get("url") + var opts flyscrape.ScrapeOptions + + url, err := ctx.RunScript("options.url", "main.js") if err != nil { - return nil, nil, fmt.Errorf("getting url from options: %w", err) + return nil, nil, fmt.Errorf("eval: options.url: %w", err) } opts.URL = url.String() - run := func(ro RunOptions) any { + depth, err := ctx.RunScript("options.depth", "main.js") + if err != nil { + return nil, nil, fmt.Errorf("eval: options.depth: %w", err) + } + opts.Depth = int(depth.Integer()) + + scrape := func(params flyscrape.ScrapeParams) (flyscrape.M, error) { suffix := randSeq(10) - ctx.Global().Set("html_"+suffix, ro.HTML) - data, err := ctx.RunScript(fmt.Sprintf(`JSON.stringify(module.exports.default({html: html_%s}))`, suffix), "main.js") + ctx.Global().Set("html_"+suffix, params.HTML) + data, err := ctx.RunScript(fmt.Sprintf(`JSON.stringify(stdin_default({html: html_%s}))`, suffix), "main.js") if err != nil { - return err.Error() + return nil, err } - var obj any + var obj flyscrape.M if err := json.Unmarshal([]byte(data.String()), &obj); err != nil { - return err.Error() + return nil, err } - return obj + return obj, nil } - return &opts, run, nil + + return &opts, scrape, nil } func randSeq(n int) string { @@ -131,3 +105,9 @@ func randSeq(n int) string { } return string(b) } + +func removeIIFE(s string) string { + s = strings.TrimPrefix(s, "(() => {\n") + s = strings.TrimSuffix(s, "})();\n") + return s +} -- cgit v1.2.3