summaryrefslogtreecommitdiff
path: root/js/template.js
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2023-08-17 20:13:29 +0200
committerPhilipp Tanlak <philipp.tanlak@gmail.com>2023-08-17 20:13:29 +0200
commit8a44dc0856d7bf2cdc2eafa8594f4a47d488c3fd (patch)
tree3943b4b5b17fce915727f89e038b0ae6199420d6 /js/template.js
parent8812c84f32c74ac2f44af1abdb7e4e6f7cbf10b0 (diff)
omit empty scrape results
Diffstat (limited to 'js/template.js')
-rw-r--r--js/template.js17
1 files changed, 12 insertions, 5 deletions
diff --git a/js/template.js b/js/template.js
index a2b4518..f75df28 100644
--- a/js/template.js
+++ b/js/template.js
@@ -2,17 +2,24 @@ import { parse } from "flyscrape";
export const options = {
url: "https://news.ycombinator.com/", // Specify the URL to start scraping from.
- depth: 1, // Specify how deep links should be followed (0 = no follow).
- allowedDomains: ["news.ycombinator.com"], // Specify the allowed domains to follow.
- rate: 100, // Specify the request rate in requests per second.
+ depth: 1, // Specify how deep links should be followed. (default = 0, no follow)
+ allowedDomains: ["news.ycombinator.com"], // Specify the allowed domains. (default = domain from url)
+ blockedDomains: [], // Specify the blocked domains. (default = none)
+ rate: 100, // Specify the rate in requests per second. (default = 100)
}
export default function({ html, url }) {
const $ = parse(html);
+ const title = $('title');
+ const entries = $('.athing').toArray();
+
+ if (entries.length == 0) {
+ return null; // Omits scraped pages without entries.
+ }
return {
- title: $('title').text(),
- entries: $('.athing').toArray().map(entry => {
+ title: title.text(), // Extract the page title.
+ entries: entries.map(entry => { // Extract all news entries.
const link = $(entry).find('.titleline > a');
const rank = $(entry).find('.rank');
const points = $(entry).next().find('.score');