From bf99c233a18c3165e0d4d251b41224e5bc6eb93d Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Fri, 10 Jan 2025 12:49:32 +0100 Subject: Implement nested scraping (#81) --- examples/hackernews_with_comments.js | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 examples/hackernews_with_comments.js (limited to 'examples/hackernews_with_comments.js') diff --git a/examples/hackernews_with_comments.js b/examples/hackernews_with_comments.js new file mode 100644 index 0000000..8d9cfb5 --- /dev/null +++ b/examples/hackernews_with_comments.js @@ -0,0 +1,23 @@ +export const config = { + url: "https://news.ycombinator.com/", +}; + +export default function({ doc, scrape }) { + const post = doc.find(".athing.submission").first(); + const title = post.find(".titleline > a").text(); + const commentsLink = post.next().find("a").last().attr("href"); + + const comments = scrape(commentsLink, function({ doc }) { + return doc.find(".comtr").map(comment => { + return { + author: comment.find(".hnuser").text(), + text: comment.find(".commtext").text(), + }; + }); + }); + + return { + title, + comments, + }; +} -- cgit v1.2.3