summaryrefslogblamecommitdiff
path: root/js.go
blob: 7e1f7c03ad869e41eaf49ef8a732188cb282f7d4 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11



                                                                      
                 





                       
                 

                                          
                                           


                             
                  
 






                                               









                                                                       
                                                      
                              
                       
                                    
         


                      

                                                       

                                              



                                        





                                                        




                                               
                                    

 
                                                 

                                                   
 
                                                                                     
                                                                                
         
                                                                                                         
                                                                                 
         
                                                                            
                                                                           

         
                                                                      
                       
                                                                      
         
                            
                                                                     
         
 
                                                          
                                     
                                                             

                                                                                                                                                 
                               
                                       

                 
                           
                                                                                   
                                       

                 
                               
         
 
                                                









                                                                                 





                                               
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

package flyscrape

import (
	"encoding/json"
	"errors"
	"fmt"
	"math/rand"
	"strings"

	"github.com/evanw/esbuild/pkg/api"
	"github.com/philippta/flyscrape/js"
	v8 "rogchap.com/v8go"
)

type Config []byte

type ScrapeParams struct {
	HTML string
	URL  string
}

type ScrapeFunc func(ScrapeParams) (any, error)

type TransformError struct {
	Line   int
	Column int
	Text   string
}

func (err TransformError) Error() string {
	return fmt.Sprintf("%d:%d: %s", err.Line, err.Column, err.Text)
}

func Compile(src string) (Config, ScrapeFunc, error) {
	src, err := build(src)
	if err != nil {
		return nil, nil, err
	}
	return vm(src)
}

func build(src string) (string, error) {
	res := api.Transform(src, api.TransformOptions{
		Platform: api.PlatformBrowser,
		Format:   api.FormatIIFE,
	})

	var errs []error
	for _, msg := range res.Errors {
		err := TransformError{Text: msg.Text}
		if msg.Location != nil {
			err.Line = msg.Location.Line
			err.Column = msg.Location.Column
		}
		errs = append(errs, err)
	}
	if len(res.Errors) > 0 {
		return "", errors.Join(errs...)
	}

	return string(res.Code), nil
}

func vm(src string) (Config, ScrapeFunc, error) {
	ctx := v8.NewContext()
	ctx.RunScript("var module = {}", "main.js")

	if _, err := ctx.RunScript(removeIIFE(js.Flyscrape), "main.js"); err != nil {
		return nil, nil, fmt.Errorf("running flyscrape bundle: %w", err)
	}
	if _, err := ctx.RunScript(`const require = () => require_flyscrape();`, "main.js"); err != nil {
		return nil, nil, fmt.Errorf("creating require function: %w", err)
	}
	if _, err := ctx.RunScript(removeIIFE(src), "main.js"); err != nil {
		return nil, nil, fmt.Errorf("running user script: %w", err)
	}

	cfg, err := ctx.RunScript("JSON.stringify(config)", "main.js")
	if err != nil {
		return nil, nil, fmt.Errorf("reading config: %w", err)
	}
	if !cfg.IsString() {
		return nil, nil, fmt.Errorf("config is not a string")
	}

	scrape := func(params ScrapeParams) (any, error) {
		suffix := randSeq(16)
		ctx.Global().Set("html_"+suffix, params.HTML)
		ctx.Global().Set("url_"+suffix, params.URL)
		data, err := ctx.RunScript(fmt.Sprintf(`JSON.stringify(stdin_default({html: html_%s, url: url_%s}))`, suffix, suffix), "main.js")
		if err != nil {
			return nil, err
		}

		var obj any
		if err := json.Unmarshal([]byte(data.String()), &obj); err != nil {
			return nil, err
		}

		return obj, nil
	}

	return Config(cfg.String()), scrape, nil
}

func randSeq(n int) string {
	letters := []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
	b := make([]rune, n)
	for i := range b {
		b[i] = letters[rand.Intn(len(letters))]
	}
	return string(b)
}

func removeIIFE(s string) string {
	s = strings.TrimPrefix(s, "(() => {\n")
	s = strings.TrimSuffix(s, "})();\n")
	return s
}