diff options
Diffstat (limited to 'scrape/query.go')
| -rw-r--r-- | scrape/query.go | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/scrape/query.go b/scrape/query.go new file mode 100644 index 0000000..1fe5ea4 --- /dev/null +++ b/scrape/query.go @@ -0,0 +1,41 @@ +package scrape + +import ( + "strings" + + "github.com/PuerkitoBio/goquery" +) + +var emptyDoc, _ = goquery.NewDocumentFromReader(strings.NewReader("")) + +func Doc(html string) *goquery.Selection { + doc, err := goquery.NewDocumentFromReader(strings.NewReader(html)) + if err != nil { + return emptyDoc.Selection + } + return doc.Selection +} + +func Query(s *goquery.Selection, selector string) string { + val := s.Find(selector).First().Text() + return strings.TrimSpace(val) +} + +func QueryAttr(s *goquery.Selection, selector, attr string) string { + val := s.Find(selector).First().AttrOr(attr, "") + return strings.TrimSpace(val) +} + +func QueryHTML(s *goquery.Selection, selector string) string { + val, err := goquery.OuterHtml(s.Find(selector)) + if err != nil { + return "" + } + return strings.TrimSpace(val) +} + +func QueryFunc(s *goquery.Selection, selector string, f func(*goquery.Selection)) { + s.Find(selector).Each(func(i int, s *goquery.Selection) { + f(s) + }) +} |