Update documentation

author: rafiramadhana <rf.ramadhana@gmail.com> 2023-11-16 05:36:22 +0700
committer: Philipp Tanlak <philipp.tanlak@gmail.com> 2023-11-15 23:48:30 +0100
commit: 3e01902887bdc52e743ef6cec53a5c89cb5637f0 (patch)
tree: 607f77cf8b9b00ab73e1003331eebefae7abc9ae
parent: beadfd1db3d2398b9b1e66d60779a7b2649af044 (diff)
2 files changed, 39 insertions, 10 deletions
diff --git a/README.md b/README.md
index 0f4f356..4d46bbd 100644
--- a/README.md
+++ b/README.md
@@ -118,16 +118,20 @@ Below is an example scraping script that showcases the capabilities of flyscrape
 
 ```javascript
 export const config = {
-    url: "https://example.com/", // Specify the URL to start scraping from.
-    depth: 0,                    // Specify how deep links should be followed.  (default = 0, no follow)
-    follow: [],                  // Speficy the css selectors to follow         (default = ["a[href]"])
-    allowedDomains: [],          // Specify the allowed domains. ['*'] for all. (default = domain from url)
-    blockedDomains: [],          // Specify the blocked domains.                (default = none)
-    allowedURLs: [],             // Specify the allowed URLs as regex.          (default = all allowed)
-    blockedURLs: [],             // Specify the blocked URLs as regex.          (default = none)
-    rate: 100,                   // Specify the rate in requests per second.    (default = no rate limit)
-    proxies: [],                 // Specify the HTTP(S) proxy URLs.             (default = no proxy)
-    cache: "file",               // Enable file-based request caching.          (default = no cache)
+    url: "https://example.com/",    // Specify the URL to start scraping from.
+    urls: [                         // Specify the URL(S) to start scraping from. If both .url and .urls
+        "https://example.com/foo",  // are provided, all of the specified URLs will be scraped.
+        "https://example.com/foo",
+    ]
+    depth: 0,                       // Specify how deep links should be followed.  (default = 0, no follow)
+    follow: [],                     // Speficy the css selectors to follow         (default = ["a[href]"])
+    allowedDomains: [],             // Specify the allowed domains. ['*'] for all. (default = domain from url)
+    blockedDomains: [],             // Specify the blocked domains.                (default = none)
+    allowedURLs: [],                // Specify the allowed URLs as regex.          (default = all allowed)
+    blockedURLs: [],                // Specify the blocked URLs as regex.          (default = none)
+    rate: 100,                      // Specify the rate in requests per second.    (default = no rate limit)
+    proxies: [],                    // Specify the HTTP(S) proxy URLs.             (default = no proxy)
+    cache: "file",                  // Enable file-based request caching.          (default = no cache)
 };
 
 export function setup() {
diff --git a/examples/multiple_starting_urls.js b/examples/multiple_starting_urls.js
new file mode 100644
index 0000000..5cb7ac9
--- /dev/null
+++ b/examples/multiple_starting_urls.js
@@ -0,0 +1,25 @@
+export const config = {
+  urls: [
+    "https://news.ycombinator.com/show",
+    "https://news.ycombinator.com/ask",
+  ],
+};
+
+export default function({ doc, absoluteURL }) {
+  const posts = doc.find(".athing");
+
+  return {
+    posts: posts.map((post) => {
+      const link = post.find(".titleline > a");
+      const meta = post.next();
+
+      return {
+        url: absoluteURL(link.attr("href")),
+        user: meta.find(".hnuser").text(),
+        title: link.text(),
+        points: meta.find(".score").text().replace(" points", ""),
+        created: meta.find(".age").attr("title"),
+      };
+    }),
+  };
+}
author	rafiramadhana <rf.ramadhana@gmail.com>	2023-11-16 05:36:22 +0700
committer	Philipp Tanlak <philipp.tanlak@gmail.com>	2023-11-15 23:48:30 +0100
commit	3e01902887bdc52e743ef6cec53a5c89cb5637f0 (patch)
tree	607f77cf8b9b00ab73e1003331eebefae7abc9ae
parent	beadfd1db3d2398b9b1e66d60779a7b2649af044 (diff)