diff options
| author | Philipp Tanlak <philipp.tanlak@gmail.com> | 2025-01-10 12:49:32 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-01-10 12:49:32 +0100 |
| commit | bf99c233a18c3165e0d4d251b41224e5bc6eb93d (patch) | |
| tree | d32f0fd0770a049552cdd0d51e9402d594e9a35e /examples | |
| parent | 924184f37ef0d3e244f8e8991c259affb45d0ae2 (diff) | |
Implement nested scraping (#81)
Diffstat (limited to 'examples')
| -rw-r--r-- | examples/hackernews_with_comments.js | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/examples/hackernews_with_comments.js b/examples/hackernews_with_comments.js new file mode 100644 index 0000000..8d9cfb5 --- /dev/null +++ b/examples/hackernews_with_comments.js @@ -0,0 +1,23 @@ +export const config = { + url: "https://news.ycombinator.com/", +}; + +export default function({ doc, scrape }) { + const post = doc.find(".athing.submission").first(); + const title = post.find(".titleline > a").text(); + const commentsLink = post.next().find("a").last().attr("href"); + + const comments = scrape(commentsLink, function({ doc }) { + return doc.find(".comtr").map(comment => { + return { + author: comment.find(".hnuser").text(), + text: comment.find(".commtext").text(), + }; + }); + }); + + return { + title, + comments, + }; +} |