From b1e2c8fd5cb5dfa46bc440a12eafaf56cd844b1c Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Mon, 24 Nov 2025 20:54:57 +0100 Subject: Docs --- public/docs/configuration/url-filter/index.html | 610 ++++++++++++++++++++++++ 1 file changed, 610 insertions(+) create mode 100644 public/docs/configuration/url-filter/index.html (limited to 'public/docs/configuration/url-filter') diff --git a/public/docs/configuration/url-filter/index.html b/public/docs/configuration/url-filter/index.html new file mode 100644 index 0000000..264627f --- /dev/null +++ b/public/docs/configuration/url-filter/index.html @@ -0,0 +1,610 @@ + + + + + + + + + + + + + + + URL Filter – Flyscrape + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + +
+
+
+ +
URL Filter
+
+ +
+

URL Filter

+

The allowedURLs and blockedURLs config options allow you to specify a list of URL patterns (in form of regular expressions) which are accessible or blocked during scraping.

+
Configuration
export const options = {
+  url: "http://example.com/",
+  allowedURLs: ["/articles/.*", "/authors/.*"],
+  blockedURLs: ["/authors/admin"],
+  // ...
+};
+ +
+
+

Allowed URLs +

This config option controls which URLs are allowed to be visted during scraping. When no value is provided all URLs are allowed to be visited if not otherwise blocked.

+

When a list of URL patterns is provided, only URLs matching one or more of these patterns are allowed to be visted.

+
Configuration
export const options = {
+  url: "http://example.com/",
+  allowedURLs: ["/products/"],
+};
+ +
+
+

Blocked URLs +

This config option controls which URLs are blocked from being visted during scraping.

+

When a list of URL patterns is provided, URLs matching one or more of these patterns are blocked from to be visted.

+
Configuration
export const options = {
+  url: "http://example.com/",
+  blockedURLs: ["/restricted"],
+};
+ +
+
+ +
+
+ + +
+
+
+ + + + + + + + + + -- cgit v1.2.3