summaryrefslogtreecommitdiff
path: root/README.md
diff options
context:
space:
mode:
authorPhilipp Tanlak <philipp.tanlak@gmail.com>2023-11-15 16:31:50 +0100
committerPhilipp Tanlak <philipp.tanlak@gmail.com>2023-11-15 23:54:03 +0100
commit94da9293f63e46712b0a890e1e0eab4153fdb3f9 (patch)
treede81e6d00f7e1a5215d18557e772e7f1131d218b /README.md
parent3e01902887bdc52e743ef6cec53a5c89cb5637f0 (diff)
Add file download functionality
Diffstat (limited to 'README.md')
-rw-r--r--README.md23
1 files changed, 20 insertions, 3 deletions
diff --git a/README.md b/README.md
index 4d46bbd..d7c701c 100644
--- a/README.md
+++ b/README.md
@@ -119,10 +119,10 @@ Below is an example scraping script that showcases the capabilities of flyscrape
```javascript
export const config = {
url: "https://example.com/", // Specify the URL to start scraping from.
- urls: [ // Specify the URL(S) to start scraping from. If both .url and .urls
+ urls: [ // Specify the URL(s) to start scraping from. If both `url` and `urls`
"https://example.com/foo", // are provided, all of the specified URLs will be scraped.
- "https://example.com/foo",
- ]
+ "https://example.com/bar",
+ ],
depth: 0, // Specify how deep links should be followed. (default = 0, no follow)
follow: [], // Speficy the css selectors to follow (default = ["a[href]"])
allowedDomains: [], // Specify the allowed domains. ['*'] for all. (default = domain from url)
@@ -180,6 +180,8 @@ items.filter(item => item.hasClass("a")) // [<li class="a">Item 1</li>]
## Flyscrape API
+### Document Parsing
+
```javascript
import { parse } from "flyscrape";
@@ -187,6 +189,8 @@ const doc = parse(`<div class="foo">bar</div>`);
const text = doc.find(".foo").text();
```
+### Basic HTTP Requests
+
```javascript
import http from "flyscrape/http";
@@ -214,7 +218,20 @@ const response = http.postJSON("https://example.com", {
}
```
+### File Downloads
+
+```javascript
+import { download } from "flyscrape/http";
+
+download("http://example.com/image.jpg") // downloads as "image.jpg"
+download("http://example.com/image.jpg", "other.jpg") // downloads as "other.jpg"
+download("http://example.com/image.jpg", "dir/") // downloads as "dir/image.jpg"
+// If the server offers a filename via the Content-Disposition header and no
+// destination filename is provided, Flyscrape will honor the suggested filename.
+// E.g. `Content-Disposition: attachment; filename="archive.zip"`
+download("http://example.com/generate_archive.php", "dir/") // downloads as "dir/archive.zip"
+```
## Issues and Suggestions