From b1e2c8fd5cb5dfa46bc440a12eafaf56cd844b1c Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Mon, 24 Nov 2025 20:54:57 +0100 Subject: Docs --- content/docs/configuration/link-following.md | 33 ++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 content/docs/configuration/link-following.md (limited to 'content/docs/configuration/link-following.md') diff --git a/content/docs/configuration/link-following.md b/content/docs/configuration/link-following.md new file mode 100644 index 0000000..b9755f7 --- /dev/null +++ b/content/docs/configuration/link-following.md @@ -0,0 +1,33 @@ +--- +title: 'Link Following' +weight: 5 +--- + +The `follow` config option allows you to specify a list of CSS selectors that determine which links the scraper should follow. + +When no value is provided the scraper will follow all links found with the `a[href]` selector. + +```javascript {filename="Configuration"} +export const config = { + url: "http://example.com/", + follow: [ + ".pagination > a[href]", + ".nav a[href]", + ], + // ... +}; +``` + +## Following non `href` attributes + +For special cases where the link is not to be found in the `href`, you specify a selector with a different ending attribute. + +```javascript {filename="Configuration"} +export const config = { + url: "http://example.com/", + follow: [ + ".articles > div[data-url]", + ], + // ... +}; +``` -- cgit v1.2.3