diff options
| author | Philipp Tanlak <philipp.tanlak@gmail.com> | 2023-08-16 19:05:24 +0200 |
|---|---|---|
| committer | Philipp Tanlak <philipp.tanlak@gmail.com> | 2023-08-16 19:05:24 +0200 |
| commit | d82e66800478219dd924c6969bd91dbfe004fc9d (patch) | |
| tree | 2c8211096fa38d619c7f5260b0193aea3c1a56ff /js/template.js | |
| parent | 807796ad35b48c58f61f6c058e12ec10078fd0e3 (diff) | |
create new command
Diffstat (limited to 'js/template.js')
| -rw-r--r-- | js/template.js | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/js/template.js b/js/template.js new file mode 100644 index 0000000..d33adc5 --- /dev/null +++ b/js/template.js @@ -0,0 +1,27 @@ +import { parse } from "flyscrape"; + +export const options = { + url: "https://news.ycombinator.com/", // Specify the URL to start scraping from. + depth: 1, // Specify how deep links should be followed (0 = no follow). + allowedDomains: ["news.ycombinator.com"], // Specify the allowed domains to follow. +} + +export default function({ html, url }) { + const $ = parse(html); + + return { + title: $('title').text(), + entries: $('.athing').toArray().map(entry => { + const link = $(entry).find('.titleline > a'); + const rank = $(entry).find('.rank'); + const points = $(entry).next().find('.score'); + + return { + title: link.text(), // Extract the title text. + url: link.attr('href'), // Extract the link href. + rank: parseInt(rank.text().slice(0, -1)), // Extract and cleanup the rank. + points: parseInt(points.text().replace(' points', '')), // Extract and cleanup the points. + } + }), + }; +} |