From 6aa52bdbe2cefdbc9219abfb4399afa0d492913d Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Sat, 18 Nov 2023 22:49:26 +0100 Subject: Support passing config options as CLI arguments (#15) --- README.md | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'README.md') diff --git a/README.md b/README.md index d7c701c..2f84843 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,16 @@ ```javascript export const config = { url: "https://news.ycombinator.com/", + // urls: [] // Specify additional URLs to start from. (default = none) + // depth: 0, // Specify how deep links should be followed. (default = 0, no follow) + // follow: [], // Speficy the css selectors to follow (default = ["a[href]"]) + // allowedDomains: [], // Specify the allowed domains. ['*'] for all. (default = domain from url) + // blockedDomains: [], // Specify the blocked domains. (default = none) + // allowedURLs: [], // Specify the allowed URLs as regex. (default = all allowed) + // blockedURLs: [], // Specify the blocked URLs as regex. (default = none) + // rate: 100, // Specify the rate in requests per second. (default = no rate limit) + // proxies: [], // Specify the HTTP(S) proxy URLs. (default = no proxy) + // cache: "file", // Enable file-based request caching. (default = no cache) } export default function ({ doc, absoluteURL }) { @@ -99,17 +109,23 @@ To compile flyscrape from source, follow these steps: ## Usage ``` -flyscrape is a standalone and scriptable web scraper for efficiently extracting data from websites. - Usage: - flyscrape [arguments] + flyscrape run SCRIPT [config flags] + +Examples: + + # Run the script. + $ flyscrape run example.js + + # Set the URL as argument. + $ flyscrape run example.js --url "http://other.com" -Commands: + # Enable proxy support. + $ flyscrape run example.js --proxies "http://someproxy:8043" - new creates a sample scraping script - run runs a scraping script - dev watches and re-runs a scraping script + # Follow paginated links. + $ flyscrape run example.js --depth 5 --follow ".next-button > a" ``` ## Configuration -- cgit v1.2.3