diff options
Diffstat (limited to 'js/template.js')
| -rw-r--r-- | js/template.js | 38 |
1 files changed, 0 insertions, 38 deletions
diff --git a/js/template.js b/js/template.js deleted file mode 100644 index 1a030e5..0000000 --- a/js/template.js +++ /dev/null @@ -1,38 +0,0 @@ -import { parse } from 'flyscrape'; - -export const config = { - url: 'https://news.ycombinator.com/', // Specify the URL to start scraping from. - depth: 0, // Specify how deep links should be followed. (default = 0, no follow) - allowedDomains: [], // Specify the allowed domains. ['*'] for all. (default = domain from url) - blockedDomains: [], // Specify the blocked domains. (default = none) - allowedURLs: [], // Specify the allowed URLs as regex. (default = all allowed) - blockedURLs: [], // Specify the blocked URLs as regex. (default = non blocked) - proxy: '', // Specify the HTTP(S) proxy to use. (default = no proxy) - rate: 100, // Specify the rate in requests per second. (default = 100) -} - -export default function({ html, url }) { - const $ = parse(html); - const title = $('title'); - const entries = $('.athing').toArray(); - - if (!entries.length) { - return null; // Omits scraped pages without entries. - } - - return { - title: title.text(), // Extract the page title. - entries: entries.map(entry => { // Extract all news entries. - const link = $(entry).find('.titleline > a'); - const rank = $(entry).find('.rank'); - const points = $(entry).next().find('.score'); - - return { - title: link.text(), // Extract the title text. - url: link.attr('href'), // Extract the link href. - rank: parseInt(rank.text().slice(0, -1)), // Extract and cleanup the rank. - points: parseInt(points.text().replace(' points', '')), // Extract and cleanup the points. - } - }), - }; -} |