summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRafael Bodill <rafi@users.noreply.github.com>2024-11-24 13:59:48 +0200
committerGitHub <noreply@github.com>2024-11-24 12:59:48 +0100
commitd2aec146ac5a2aef4a87813be47e5e1dc7404c51 (patch)
tree7407bd1f69e999f1409d82c1e9d3fce80a353845
parent54a8ea0bc36d6bb401b942bb2644c316c55c4ff9 (diff)
Add tag name and more traversal methods (#73)
-rw-r--r--README.md18
-rw-r--r--js.go5
2 files changed, 23 insertions, 0 deletions
diff --git a/README.md b/README.md
index d10ce44..233e2ca 100644
--- a/README.md
+++ b/README.md
@@ -273,6 +273,7 @@ export default function ({ doc, url, absoluteURL }) {
const el = doc.find(".element")
el.text() // "Hey"
el.html() // `<div class="element">Hey</div>`
+el.name() // div
el.attr("foo") // "bar"
el.hasAttr("foo") // true
el.hasClass("element") // true
@@ -296,6 +297,23 @@ items.get(1).parent() // <ul>...</ul>
items.get(1).siblings() // [<li class="a">Item 1</li>, <li>Item 2</li>, <li>Item 3</li>]
items.map(item => item.text()) // ["Item 1", "Item 2", "Item 3"]
items.filter(item => item.hasClass("a")) // [<li class="a">Item 1</li>]
+
+// <div>
+// <h2 id="aleph">Aleph</h2>
+// <p>Aleph</p>
+// <h2 id="beta">Beta</h2>
+// <p>Beta</p>
+// <h2 id="gamma">Gamma</h2>
+// <p>Gamma</p>
+// </div>
+const header = doc.find("div h2")
+
+header.get(1).prev() // <p>Aleph</p>
+header.get(1).prevAll() // [<p>Aleph</p>, <h2 id="aleph">Aleph</h2>]
+header.get(1).prevUntil('div,h1,h2,h3') // <h2 id="aleph">Aleph</h2>
+header.get(1).next() // <p>Beta</p>
+header.get(1).nextAll() // [<p>Beta</p>, <h2 id="gamma">Gamma</h2>, <p>Gamma</p>]
+header.get(1).nextUntil('div,h1,h2,h3') // <p>Beta</p>
```
## Flyscrape API
diff --git a/js.go b/js.go
index 3b8ec20..50c89ca 100644
--- a/js.go
+++ b/js.go
@@ -225,6 +225,7 @@ func Document(sel *goquery.Selection) map[string]any {
o := map[string]any{}
o["WARNING"] = "Forgot to call text(), html() or attr()?"
o["text"] = sel.Text
+ o["name"] = func() string { return sel.Get(0).Data }
o["html"] = func() string { h, _ := goquery.OuterHtml(sel); return h }
o["attr"] = func(name string) string { v, _ := sel.Attr(name); return v }
o["hasAttr"] = func(name string) bool { _, ok := sel.Attr(name); return ok }
@@ -235,7 +236,11 @@ func Document(sel *goquery.Selection) map[string]any {
o["get"] = func(index int) map[string]any { return Document(sel.Eq(index)) }
o["find"] = func(s string) map[string]any { return Document(sel.Find(s)) }
o["next"] = func() map[string]any { return Document(sel.Next()) }
+ o["nextAll"] = func() map[string]any { return Document(sel.NextAll()) }
+ o["nextUntil"] = func(s string) map[string]any { return Document(sel.NextUntil(s)) }
o["prev"] = func() map[string]any { return Document(sel.Prev()) }
+ o["prevAll"] = func() map[string]any { return Document(sel.PrevAll()) }
+ o["prevUntil"] = func(s string) map[string]any { return Document(sel.PrevUntil(s)) }
o["siblings"] = func() map[string]any { return Document(sel.Siblings()) }
o["children"] = func() map[string]any { return Document(sel.Children()) }
o["parent"] = func() map[string]any { return Document(sel.Parent()) }