summaryrefslogtreecommitdiff
path: root/js_lib.go
diff options
context:
space:
mode:
Diffstat (limited to 'js_lib.go')
-rw-r--r--js_lib.go108
1 files changed, 106 insertions, 2 deletions
diff --git a/js_lib.go b/js_lib.go
index abfe07c..ce4500a 100644
--- a/js_lib.go
+++ b/js_lib.go
@@ -9,13 +9,27 @@ import (
"encoding/json"
"fmt"
"io"
+ "log"
+ "mime"
"net/http"
gourl "net/url"
+ "os"
+ "path/filepath"
"strings"
+
+ "golang.org/x/sync/errgroup"
)
-func NewJSLibrary(client *http.Client) Imports {
- return Imports{
+func NewJSLibrary(client *http.Client) (imports Imports, wait func()) {
+ downloads := &errgroup.Group{}
+
+ // Allow 5 parallel downloads. Why 5?
+ // Docker downloads 3 layers in parallel.
+ // My Chrome downloads up to 6 files in parallel.
+ // 5 feels like a reasonable number.
+ downloads.SetLimit(5)
+
+ im := Imports{
"flyscrape": map[string]any{
"parse": jsParse(),
},
@@ -23,8 +37,11 @@ func NewJSLibrary(client *http.Client) Imports {
"get": jsHTTPGet(client),
"postForm": jsHTTPPostForm(client),
"postJSON": jsHTTPPostJSON(client),
+ "download": jsHTTPDownload(client, downloads),
},
}
+
+ return im, func() { downloads.Wait() }
}
func jsParse() func(html string) map[string]any {
@@ -85,6 +102,93 @@ func jsHTTPPostJSON(client *http.Client) func(url string, data any) map[string]a
}
}
+func jsHTTPDownload(client *http.Client, g *errgroup.Group) func(url string, dst string) {
+ fileExists := func(name string) bool {
+ _, err := os.Stat(name)
+ return err == nil
+ }
+
+ isDir := func(path string) bool {
+ if strings.HasSuffix(path, "/") {
+ return true
+ }
+ if filepath.Ext(path) == "" {
+ return true
+ }
+ s, err := os.Stat(path)
+ return err == nil && s.IsDir()
+ }
+
+ suggestedFilename := func(url, contentDisp string) string {
+ filename := filepath.Base(url)
+
+ if contentDisp == "" {
+ return filename
+ }
+
+ _, params, err := mime.ParseMediaType(contentDisp)
+ if err != nil {
+ return filename
+ }
+
+ name, ok := params["filename"]
+ if !ok || name == "" {
+ return filename
+ }
+
+ return filepath.Base(name)
+ }
+
+ return func(url string, dst string) {
+ g.Go(func() error {
+ req, err := http.NewRequest("GET", url, nil)
+ if err != nil {
+ log.Printf("error downloading file %q: %v", url, err)
+ return nil
+ }
+ req.Header.Add(HeaderBypassCache, "true")
+
+ resp, err := client.Do(req)
+ if err != nil {
+ log.Printf("error downloading file %q: %v", url, err)
+ return nil
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+ log.Printf("error downloading file %q: unexpected status code %d", url, resp.StatusCode)
+ return nil
+ }
+
+ dst, err = filepath.Abs(dst)
+ if err != nil {
+ log.Printf("error downloading file %q: abs path failed: %v", url, err)
+ return nil
+ }
+
+ if isDir(dst) {
+ name := suggestedFilename(url, resp.Header.Get("Content-Disposition"))
+ dst = filepath.Join(dst, name)
+ }
+
+ if fileExists(dst) {
+ return nil
+ }
+
+ os.MkdirAll(filepath.Dir(dst), 0o755)
+ f, err := os.Create(dst)
+ if err != nil {
+ log.Printf("error downloading file %q: file save failed: %v", url, err)
+ return nil
+ }
+ defer f.Close()
+
+ io.Copy(f, resp.Body)
+ return nil
+ })
+ }
+}
+
func jsFetch(client *http.Client, req *http.Request) (obj map[string]any) {
obj = map[string]any{
"body": "",