omit empty scrape results

author: Philipp Tanlak <philipp.tanlak@gmail.com> 2023-08-17 20:13:29 +0200
committer: Philipp Tanlak <philipp.tanlak@gmail.com> 2023-08-17 20:13:29 +0200
commit: 8a44dc0856d7bf2cdc2eafa8594f4a47d488c3fd (patch)
tree: 3943b4b5b17fce915727f89e038b0ae6199420d6 /js/template.js
parent: 8812c84f32c74ac2f44af1abdb7e4e6f7cbf10b0 (diff)
1 files changed, 12 insertions, 5 deletions
diff --git a/js/template.js b/js/template.js
index a2b4518..f75df28 100644
--- a/js/template.js
+++ b/js/template.js
@@ -2,17 +2,24 @@ import { parse } from "flyscrape";
 
 export const options = {
     url: "https://news.ycombinator.com/",     // Specify the URL to start scraping from.
-    depth: 1,                                 // Specify how deep links should be followed (0 = no follow).
-    allowedDomains: ["news.ycombinator.com"], // Specify the allowed domains to follow.
-    rate: 100,                                // Specify the request rate in requests per second.
+    depth: 1,                                 // Specify how deep links should be followed.  (default = 0, no follow)
+    allowedDomains: ["news.ycombinator.com"], // Specify the allowed domains.                (default = domain from url)
+    blockedDomains: [],                       // Specify the blocked domains.                (default = none)
+    rate: 100,                                // Specify the rate in requests per second.    (default = 100)
 }
 
 export default function({ html, url }) {
     const $ = parse(html);
+    const title = $('title');
+    const entries = $('.athing').toArray();
+
+    if (entries.length == 0) {
+        return null; // Omits scraped pages without entries.
+    }
 
     return {
-        title: $('title').text(),
-        entries: $('.athing').toArray().map(entry => {
+        title: title.text(),                                            // Extract the page title.
+        entries: entries.map(entry => {                                 // Extract all news entries.
             const link = $(entry).find('.titleline > a');
             const rank = $(entry).find('.rank');
             const points = $(entry).next().find('.score');
author	Philipp Tanlak <philipp.tanlak@gmail.com>	2023-08-17 20:13:29 +0200
committer	Philipp Tanlak <philipp.tanlak@gmail.com>	2023-08-17 20:13:29 +0200
commit	8a44dc0856d7bf2cdc2eafa8594f4a47d488c3fd (patch)
tree	3943b4b5b17fce915727f89e038b0ae6199420d6 /js/template.js
parent	8812c84f32c74ac2f44af1abdb7e4e6f7cbf10b0 (diff)