diff options
Diffstat (limited to 'scrape/parser.go')
| -rw-r--r-- | scrape/parser.go | 70 |
1 files changed, 0 insertions, 70 deletions
diff --git a/scrape/parser.go b/scrape/parser.go deleted file mode 100644 index 3304b77..0000000 --- a/scrape/parser.go +++ /dev/null @@ -1,70 +0,0 @@ -package scrape - -import ( - "encoding/json" - "strings" - - "github.com/PuerkitoBio/goquery" -) - -func ParseFromJSON(html, input string) any { - var inputJSON map[string]any - json.Unmarshal([]byte(input), &inputJSON) - return Parse(html, inputJSON) -} - -func Parse(html string, fields map[string]any) any { - return queryMap(Doc(html), fields) -} - -func AddMeta(result any, key string, value any) { - switch res := result.(type) { - case []map[string]any: - for i := range res { - res[i][key] = value - } - case map[string]any: - res[key] = value - } -} - -func walk(s *goquery.Selection, fields map[string]any) map[string]any { - out := map[string]any{} - for k, v := range fields { - if strings.HasPrefix(k, "#") { - continue - } - - switch val := v.(type) { - case string: - segs := strings.SplitN(k, "#", 2) - if len(segs) == 2 && segs[1] == "html" { - out[segs[0]] = QueryHTML(s, val) - } else if len(segs) == 2 { - out[segs[0]] = QueryAttr(s, val, segs[1]) - } else { - out[k] = Query(s, val) - } - - case map[string]any: - out[k] = queryMap(s, val) - } - } - return out -} - -func queryMap(s *goquery.Selection, fields map[string]any) any { - if sel, ok := fields["#each"].(string); ok { - rows := []map[string]any{} - QueryFunc(s, sel, func(s *goquery.Selection) { - rows = append(rows, walk(s, fields)) - }) - return rows - } - - if sel, ok := fields["#element"].(string); ok { - return walk(s.Find(sel), fields) - } - - return walk(s, fields) -} |