From bf99c233a18c3165e0d4d251b41224e5bc6eb93d Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Fri, 10 Jan 2025 12:49:32 +0100 Subject: Implement nested scraping (#81) --- README.md | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'README.md') diff --git a/README.md b/README.md index 233e2ca..6a3290c 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,7 @@ Flyscrape is a command-line web scraping tool designed for those without
a - **Scriptable:** Use JavaScript to write your data extraction logic. - **System Cookies:** Give Flyscrape access to your browsers cookie store. - **Browser Mode:** Render JavaScript heavy pages using a headless Browser. +- **Nested Scraping:** Extract data from linked pages within a single scrape. ## Overview @@ -259,10 +260,20 @@ export const config = { }, }; -export default function ({ doc, url, absoluteURL }) { - // doc - Contains the parsed HTML document - // url - Contains the scraped URL - // absoluteURL(...) - Transforms relative URLs into absolute URLs +export default function ({ doc, url, absoluteURL, scrape }) { + // doc + // Contains the parsed HTML document. + + // url + // Contains the scraped URL. + + // absoluteURL("/foo") + // Transforms a relative URL into absolute URL. + + // scrape(url, function({ doc, url, absoluteURL, scrape }) { + // return { ... }; + // }) + // Scrapes a linked page and returns the scrape result. } ``` -- cgit v1.2.3