diff options
| author | Rafi Ramadhana <42462215+rafiramadhana@users.noreply.github.com> | 2023-11-23 18:58:41 +0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-11-23 12:58:41 +0100 |
| commit | cbdbbd249239345f88bea031beb55e84c2f47688 (patch) | |
| tree | f3b001d36c0da83a1827eb8008615f5f2577a638 /README.md | |
| parent | 13322edf37510b6d3bb68a853368fd1a0a67a105 (diff) | |
Add custom request header (#18)
Diffstat (limited to 'README.md')
| -rw-r--r-- | README.md | 28 |
1 files changed, 16 insertions, 12 deletions
@@ -134,20 +134,24 @@ Below is an example scraping script that showcases the capabilities of flyscrape ```javascript export const config = { - url: "https://example.com/", // Specify the URL to start scraping from. - urls: [ // Specify the URL(s) to start scraping from. If both `url` and `urls` - "https://example.com/foo", // are provided, all of the specified URLs will be scraped. + url: "https://example.com/", // Specify the URL to start scraping from. + urls: [ // Specify the URL(s) to start scraping from. If both `url` and `urls` + "https://example.com/foo", // are provided, all of the specified URLs will be scraped. "https://example.com/bar", ], - depth: 0, // Specify how deep links should be followed. (default = 0, no follow) - follow: [], // Speficy the css selectors to follow (default = ["a[href]"]) - allowedDomains: [], // Specify the allowed domains. ['*'] for all. (default = domain from url) - blockedDomains: [], // Specify the blocked domains. (default = none) - allowedURLs: [], // Specify the allowed URLs as regex. (default = all allowed) - blockedURLs: [], // Specify the blocked URLs as regex. (default = none) - rate: 100, // Specify the rate in requests per second. (default = no rate limit) - proxies: [], // Specify the HTTP(S) proxy URLs. (default = no proxy) - cache: "file", // Enable file-based request caching. (default = no cache) + depth: 0, // Specify how deep links should be followed. (default = 0, no follow) + follow: [], // Speficy the css selectors to follow (default = ["a[href]"]) + allowedDomains: [], // Specify the allowed domains. ['*'] for all. (default = domain from url) + blockedDomains: [], // Specify the blocked domains. (default = none) + allowedURLs: [], // Specify the allowed URLs as regex. (default = all allowed) + blockedURLs: [], // Specify the blocked URLs as regex. (default = none) + rate: 100, // Specify the rate in requests per second. (default = no rate limit) + proxies: [], // Specify the HTTP(S) proxy URLs. (default = no proxy) + cache: "file", // Enable file-based request caching. (default = no cache) + headers: { // Specify the HTTP request header. (default = none) + "Authorization": "Basic ZGVtbzpwQDU1dzByZA==", + "User-Agent": "Gecko/1.0", + }, }; export function setup() { |