summaryrefslogtreecommitdiff
path: root/examples/hackernews_with_comments.js
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2025-01-10 12:49:32 +0100
committerGitHub <noreply@github.com>2025-01-10 12:49:32 +0100
commitbf99c233a18c3165e0d4d251b41224e5bc6eb93d (patch)
treed32f0fd0770a049552cdd0d51e9402d594e9a35e /examples/hackernews_with_comments.js
parent924184f37ef0d3e244f8e8991c259affb45d0ae2 (diff)
Implement nested scraping (#81)
Diffstat (limited to 'examples/hackernews_with_comments.js')
-rw-r--r--examples/hackernews_with_comments.js23
1 files changed, 23 insertions, 0 deletions
diff --git a/examples/hackernews_with_comments.js b/examples/hackernews_with_comments.js
new file mode 100644
index 0000000..8d9cfb5
--- /dev/null
+++ b/examples/hackernews_with_comments.js
@@ -0,0 +1,23 @@
+export const config = {
+ url: "https://news.ycombinator.com/",
+};
+
+export default function({ doc, scrape }) {
+ const post = doc.find(".athing.submission").first();
+ const title = post.find(".titleline > a").text();
+ const commentsLink = post.next().find("a").last().attr("href");
+
+ const comments = scrape(commentsLink, function({ doc }) {
+ return doc.find(".comtr").map(comment => {
+ return {
+ author: comment.find(".hnuser").text(),
+ text: comment.find(".commtext").text(),
+ };
+ });
+ });
+
+ return {
+ title,
+ comments,
+ };
+}