From 0daefa86b400efe08245f4f2a386f7341b76b24e Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Thu, 19 Oct 2023 17:54:18 +0200 Subject: docs: Add documentation --- docs/configuration/url-filter.md | 42 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 docs/configuration/url-filter.md (limited to 'docs/configuration/url-filter.md') diff --git a/docs/configuration/url-filter.md b/docs/configuration/url-filter.md new file mode 100644 index 0000000..e2feda8 --- /dev/null +++ b/docs/configuration/url-filter.md @@ -0,0 +1,42 @@ +# URL Filter + +The `allowedURLs` and `blockedURLs` config options allow you to specify a list of URL patterns (in form of regular expressions) which are accessible or blocked during scraping. + +```javascript +export const options = { + url: "http://example.com/", + allowedURLs: ["/articles/.*", "/authors/.*"], + blockedURLs: ["/authors/admin"], + // ... +}; +``` + +### `allowedURLs` + +This config option controls which URLs are allowed to be visted during scraping. When no value is provided all URLs are allowed to be visited if not otherwise blocked. + +When a list of URL patterns is provided, only URLs matching one or more of these patterns are allowed to be visted. + +Example: + +```javascript +export const options = { + url: "http://example.com/", + allowedURLs: ["/products/"], +}; +``` + +### `blockedURLs` + +This config option controls which URLs are blocked from being visted during scraping. + +When a list of URL patterns is provided, URLs matching one or more of these patterns are blocked from to be visted. + +Example: + +```javascript +export const options = { + url: "http://example.com/", + blockedURLs: ["/restricted"], +}; +``` -- cgit v1.2.3