From 0daefa86b400efe08245f4f2a386f7341b76b24e Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Thu, 19 Oct 2023 17:54:18 +0200 Subject: docs: Add documentation --- docs/configuration/domain-filter.md | 44 +++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 docs/configuration/domain-filter.md (limited to 'docs/configuration/domain-filter.md') diff --git a/docs/configuration/domain-filter.md b/docs/configuration/domain-filter.md new file mode 100644 index 0000000..e8adc30 --- /dev/null +++ b/docs/configuration/domain-filter.md @@ -0,0 +1,44 @@ +# Domain Filter + +The `allowedDomains` and `blockedDomains` config options allow you to specify a list of domains which are accessible or blocked during scraping. + +```javascript +export const options = { + url: "http://example.com/", + allowedDomains: ["subdomain.example.com"], + // ... +}; +``` + +### `allowedDomains` + +This config option controls which additional domains are allowed to be visted during scraping. The domain of the initial URL is always allowed. + +You can also allow all domains to be accessible by setting `allowedDomains` to `["*"]`. To then further restrict access, you can specify `blockedDomains`. + +Example: + +```javascript +export const options = { + url: "http://example.com/", + allowedDomains: ["*"], + // ... +}; +``` + +### `blockedDomains` + +This config option controls which additional domains are blocked from being accessed. By default all domains other than the domain of the initial URL or those specified in `allowedDomains` are blocked. + +You can best use `blockedDomains` in conjunction with `allowedDomains: ["*"]`, allowing the scraping process to access all domains except what's specified in `blockedDomains`. + +Example: + +```javascript +export const options = { + url: "http://example.com/", + allowedDomains: ["*"], + blockedDomains: ["google.com", "bing.com"], + // ... +}; +``` -- cgit v1.2.3