From 0daefa86b400efe08245f4f2a386f7341b76b24e Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Thu, 19 Oct 2023 17:54:18 +0200 Subject: docs: Add documentation --- docs/configuration/link-following.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 docs/configuration/link-following.md (limited to 'docs/configuration/link-following.md') diff --git a/docs/configuration/link-following.md b/docs/configuration/link-following.md new file mode 100644 index 0000000..6522ce8 --- /dev/null +++ b/docs/configuration/link-following.md @@ -0,0 +1,29 @@ +# Link Following + +The `follow` config option allows you to specify a list of CSS selectors that determine which links the scraper should follow. + +When no value is provided the scraper will follow all links found with the `a[href]` selector. + +Example: + +```javascript +export const config = { + url: "http://example.com/", + follow: [".pagination > a[href]", ".nav a[href]"], + // ... +}; +``` + +### Following non `href` attributes + +For special cases where the link is not to be found in the `href`, you specify a selector with a different ending attribute. + +Example: + +```javascript +export const config = { + url: "http://example.com/", + follow: [".articles > div[data-url]"], + // ... +}; +``` -- cgit v1.2.3