summaryrefslogtreecommitdiff
path: root/README.md
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2025-01-10 12:49:32 +0100
committerGitHub <noreply@github.com>2025-01-10 12:49:32 +0100
commitbf99c233a18c3165e0d4d251b41224e5bc6eb93d (patch)
treed32f0fd0770a049552cdd0d51e9402d594e9a35e /README.md
parent924184f37ef0d3e244f8e8991c259affb45d0ae2 (diff)
Implement nested scraping (#81)
Diffstat (limited to 'README.md')
-rw-r--r--README.md19
1 files changed, 15 insertions, 4 deletions
diff --git a/README.md b/README.md
index 233e2ca..6a3290c 100644
--- a/README.md
+++ b/README.md
@@ -37,6 +37,7 @@ Flyscrape is a command-line web scraping tool designed for those without <br />a
- **Scriptable:** Use JavaScript to write your data extraction logic.
- **System Cookies:** Give Flyscrape access to your browsers cookie store.
- **Browser Mode:** Render JavaScript heavy pages using a headless Browser.
+- **Nested Scraping:** Extract data from linked pages within a single scrape.
## Overview
@@ -259,10 +260,20 @@ export const config = {
},
};
-export default function ({ doc, url, absoluteURL }) {
- // doc - Contains the parsed HTML document
- // url - Contains the scraped URL
- // absoluteURL(...) - Transforms relative URLs into absolute URLs
+export default function ({ doc, url, absoluteURL, scrape }) {
+ // doc
+ // Contains the parsed HTML document.
+
+ // url
+ // Contains the scraped URL.
+
+ // absoluteURL("/foo")
+ // Transforms a relative URL into absolute URL.
+
+ // scrape(url, function({ doc, url, absoluteURL, scrape }) {
+ // return { ... };
+ // })
+ // Scrapes a linked page and returns the scrape result.
}
```