summaryrefslogtreecommitdiff
path: root/scrape/parser.go
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2023-08-10 18:18:01 +0200
committerPhilipp Tanlak <philipp.tanlak@gmail.com>2023-08-10 18:18:01 +0200
commit7e4cf39a0ba6ccbd5cc036700a8b1ff9358ecc3d (patch)
tree0f48b46e70162bad117f9f50d297487dee33266f /scrape/parser.go
parenta9b61f84070cc7ca0d6e26f187c745619a91422a (diff)
improve
Diffstat (limited to 'scrape/parser.go')
-rw-r--r--scrape/parser.go70
1 files changed, 0 insertions, 70 deletions
diff --git a/scrape/parser.go b/scrape/parser.go
deleted file mode 100644
index 3304b77..0000000
--- a/scrape/parser.go
+++ /dev/null
@@ -1,70 +0,0 @@
-package scrape
-
-import (
- "encoding/json"
- "strings"
-
- "github.com/PuerkitoBio/goquery"
-)
-
-func ParseFromJSON(html, input string) any {
- var inputJSON map[string]any
- json.Unmarshal([]byte(input), &inputJSON)
- return Parse(html, inputJSON)
-}
-
-func Parse(html string, fields map[string]any) any {
- return queryMap(Doc(html), fields)
-}
-
-func AddMeta(result any, key string, value any) {
- switch res := result.(type) {
- case []map[string]any:
- for i := range res {
- res[i][key] = value
- }
- case map[string]any:
- res[key] = value
- }
-}
-
-func walk(s *goquery.Selection, fields map[string]any) map[string]any {
- out := map[string]any{}
- for k, v := range fields {
- if strings.HasPrefix(k, "#") {
- continue
- }
-
- switch val := v.(type) {
- case string:
- segs := strings.SplitN(k, "#", 2)
- if len(segs) == 2 && segs[1] == "html" {
- out[segs[0]] = QueryHTML(s, val)
- } else if len(segs) == 2 {
- out[segs[0]] = QueryAttr(s, val, segs[1])
- } else {
- out[k] = Query(s, val)
- }
-
- case map[string]any:
- out[k] = queryMap(s, val)
- }
- }
- return out
-}
-
-func queryMap(s *goquery.Selection, fields map[string]any) any {
- if sel, ok := fields["#each"].(string); ok {
- rows := []map[string]any{}
- QueryFunc(s, sel, func(s *goquery.Selection) {
- rows = append(rows, walk(s, fields))
- })
- return rows
- }
-
- if sel, ok := fields["#element"].(string); ok {
- return walk(s.Find(sel), fields)
- }
-
- return walk(s, fields)
-}