summaryrefslogtreecommitdiff
path: root/js/js.go
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2023-08-10 18:18:01 +0200
committerPhilipp Tanlak <philipp.tanlak@gmail.com>2023-08-10 18:18:01 +0200
commit7e4cf39a0ba6ccbd5cc036700a8b1ff9358ecc3d (patch)
tree0f48b46e70162bad117f9f50d297487dee33266f /js/js.go
parenta9b61f84070cc7ca0d6e26f187c745619a91422a (diff)
improve
Diffstat (limited to 'js/js.go')
-rw-r--r--js/js.go102
1 files changed, 41 insertions, 61 deletions
diff --git a/js/js.go b/js/js.go
index c5b8818..dd9c7a1 100644
--- a/js/js.go
+++ b/js/js.go
@@ -6,9 +6,10 @@ import (
"fmt"
"math/rand"
"os"
- "path/filepath"
+ "strings"
"time"
+ "flyscrape/flyscrape"
"flyscrape/js/jsbundle"
"github.com/evanw/esbuild/pkg/api"
@@ -19,53 +20,23 @@ func init() {
rand.Seed(time.Now().UnixNano())
}
-type RunOptions struct {
- HTML string
-}
-
-type RunFunc func(RunOptions) any
-
-type Options struct {
- URL string `json:"url"`
-}
-
-func Compile(file string) (*Options, RunFunc, error) {
+func Compile(file string) (*flyscrape.ScrapeOptions, flyscrape.ScrapeFunc, error) {
src, err := build(file)
if err != nil {
return nil, nil, err
}
- os.WriteFile("out.js", []byte(src), 0o644)
return vm(src)
}
func build(file string) (string, error) {
- dir, err := os.MkdirTemp("", "flyscrape")
+ data, err := os.ReadFile(file)
if err != nil {
- return "", err
+ return "", fmt.Errorf("read %q: %w", file, err)
}
- defer os.RemoveAll(dir)
- tmpfile := filepath.Join(dir, "flyscrape.js")
- if err := os.WriteFile(tmpfile, jsbundle.Flyscrape, 0o644); err != nil {
- return "", err
- }
-
- resolve := api.Plugin{
- Name: "flyscrape",
- Setup: func(build api.PluginBuild) {
- build.OnResolve(api.OnResolveOptions{
- Filter: "^flyscrape$",
- }, func(ora api.OnResolveArgs) (api.OnResolveResult, error) {
- return api.OnResolveResult{Path: tmpfile}, nil
- })
- },
- }
-
- res := api.Build(api.BuildOptions{
- EntryPoints: []string{file},
- Bundle: true,
- Platform: api.PlatformNode,
- Plugins: []api.Plugin{resolve},
+ res := api.Transform(string(data), api.TransformOptions{
+ Platform: api.PlatformBrowser,
+ Format: api.FormatIIFE,
})
var errs []error
@@ -76,51 +47,54 @@ func build(file string) (string, error) {
return "", errors.Join(errs...)
}
- out := string(res.OutputFiles[0].Contents)
- return out, nil
+ return string(res.Code), nil
}
-func vm(src string) (*Options, RunFunc, error) {
- os.WriteFile("out.js", []byte(src), 0o644)
-
+func vm(src string) (*flyscrape.ScrapeOptions, flyscrape.ScrapeFunc, error) {
ctx := v8.NewContext()
ctx.RunScript("var module = {}", "main.js")
- if _, err := ctx.RunScript(src, "main.js"); err != nil {
+
+ if _, err := ctx.RunScript(removeIIFE(jsbundle.Flyscrape), "main.js"); err != nil {
return nil, nil, fmt.Errorf("run bundled js: %w", err)
}
-
- val, err := ctx.RunScript("module.exports.options", "main.js")
- if err != nil {
- return nil, nil, fmt.Errorf("export options: %w", err)
+ if _, err := ctx.RunScript(`const require = () => require_flyscrape();`, "main.js"); err != nil {
+ return nil, nil, fmt.Errorf("define require: %w", err)
}
- options, err := val.AsObject()
- if err != nil {
- return nil, nil, fmt.Errorf("cast options as object: %w", err)
+ if _, err := ctx.RunScript(removeIIFE(src), "main.js"); err != nil {
+ return nil, nil, fmt.Errorf("userscript: %w", err)
}
- var opts Options
- url, err := options.Get("url")
+ var opts flyscrape.ScrapeOptions
+
+ url, err := ctx.RunScript("options.url", "main.js")
if err != nil {
- return nil, nil, fmt.Errorf("getting url from options: %w", err)
+ return nil, nil, fmt.Errorf("eval: options.url: %w", err)
}
opts.URL = url.String()
- run := func(ro RunOptions) any {
+ depth, err := ctx.RunScript("options.depth", "main.js")
+ if err != nil {
+ return nil, nil, fmt.Errorf("eval: options.depth: %w", err)
+ }
+ opts.Depth = int(depth.Integer())
+
+ scrape := func(params flyscrape.ScrapeParams) (flyscrape.M, error) {
suffix := randSeq(10)
- ctx.Global().Set("html_"+suffix, ro.HTML)
- data, err := ctx.RunScript(fmt.Sprintf(`JSON.stringify(module.exports.default({html: html_%s}))`, suffix), "main.js")
+ ctx.Global().Set("html_"+suffix, params.HTML)
+ data, err := ctx.RunScript(fmt.Sprintf(`JSON.stringify(stdin_default({html: html_%s}))`, suffix), "main.js")
if err != nil {
- return err.Error()
+ return nil, err
}
- var obj any
+ var obj flyscrape.M
if err := json.Unmarshal([]byte(data.String()), &obj); err != nil {
- return err.Error()
+ return nil, err
}
- return obj
+ return obj, nil
}
- return &opts, run, nil
+
+ return &opts, scrape, nil
}
func randSeq(n int) string {
@@ -131,3 +105,9 @@ func randSeq(n int) string {
}
return string(b)
}
+
+func removeIIFE(s string) string {
+ s = strings.TrimPrefix(s, "(() => {\n")
+ s = strings.TrimSuffix(s, "})();\n")
+ return s
+}