summaryrefslogtreecommitdiff
path: root/scrape/query.go
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2023-07-27 19:03:41 +0200
committerPhilipp Tanlak <philipp.tanlak@gmail.com>2023-07-27 19:03:41 +0200
commita9b61f84070cc7ca0d6e26f187c745619a91422a (patch)
treed69b67142b6de860d7da23bd5ff8c62af0aaca1e /scrape/query.go
init
Diffstat (limited to 'scrape/query.go')
-rw-r--r--scrape/query.go41
1 files changed, 41 insertions, 0 deletions
diff --git a/scrape/query.go b/scrape/query.go
new file mode 100644
index 0000000..1fe5ea4
--- /dev/null
+++ b/scrape/query.go
@@ -0,0 +1,41 @@
+package scrape
+
+import (
+ "strings"
+
+ "github.com/PuerkitoBio/goquery"
+)
+
+var emptyDoc, _ = goquery.NewDocumentFromReader(strings.NewReader(""))
+
+func Doc(html string) *goquery.Selection {
+ doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
+ if err != nil {
+ return emptyDoc.Selection
+ }
+ return doc.Selection
+}
+
+func Query(s *goquery.Selection, selector string) string {
+ val := s.Find(selector).First().Text()
+ return strings.TrimSpace(val)
+}
+
+func QueryAttr(s *goquery.Selection, selector, attr string) string {
+ val := s.Find(selector).First().AttrOr(attr, "")
+ return strings.TrimSpace(val)
+}
+
+func QueryHTML(s *goquery.Selection, selector string) string {
+ val, err := goquery.OuterHtml(s.Find(selector))
+ if err != nil {
+ return ""
+ }
+ return strings.TrimSpace(val)
+}
+
+func QueryFunc(s *goquery.Selection, selector string, f func(*goquery.Selection)) {
+ s.Find(selector).Each(func(i int, s *goquery.Selection) {
+ f(s)
+ })
+}