diff options
Diffstat (limited to 'examples/useragents')
| -rw-r--r-- | examples/useragents/chrome.js | 50 | ||||
| -rw-r--r-- | examples/useragents/edge.js | 12 | ||||
| -rw-r--r-- | examples/useragents/firefox.js | 13 | ||||
| -rw-r--r-- | examples/useragents/opera.js | 15 |
4 files changed, 90 insertions, 0 deletions
diff --git a/examples/useragents/chrome.js b/examples/useragents/chrome.js new file mode 100644 index 0000000..c3a6fcd --- /dev/null +++ b/examples/useragents/chrome.js @@ -0,0 +1,50 @@ +import { parse } from "flyscrape"; + +export const config = { + url: "https://chromereleases.googleblog.com/search/label/Stable%20updates", + follow: [".blog-pager-older-link"], + depth: 30, + cache: "file", +}; + +export default function ({ doc, absoluteURL }) { + const posts = doc.find(".post"); + return posts.map(post => { + const title = post.find("h2").text().trim(); + const body = parse(post.find(".post-content").text()).find("p:nth-child(1)").text().trim(); + + const regexes = [ + /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (Mac)/, + /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (Windows)/, + /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (Linux)/, + /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (iOS)/, + /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (Android)/, + /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (ChromeOS)/, + /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (Mac,Linux)/, + /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (Mac and Linux)/, + /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)?\s\(Platform version:\s[\d\.]+\)\sfor\smost\s(ChromeOS)/, + ]; + + const versions = new Set(); + for (const regex of regexes) { + const matches = body.match(regex); + if (!matches) { + continue; + } + + let versionStr = matches[1]; + + let vv = versionStr.split("/"); + if (vv.length == 2) { + vv[1] = vv[0].substring(0, vv[0].lastIndexOf(".")) + vv[1]; + } + + for (const version of vv) { + versions.add(version) + } + } + + + return versions + }).filter(Boolean).flat(); +} diff --git a/examples/useragents/edge.js b/examples/useragents/edge.js new file mode 100644 index 0000000..1e76131 --- /dev/null +++ b/examples/useragents/edge.js @@ -0,0 +1,12 @@ +import { parse } from "flyscrape"; + +export const config = { + url: "https://learn.microsoft.com/en-us/deployedge/microsoft-edge-release-schedule", +}; + +export default function ({ doc, absoluteURL }) { + const links = doc.find("table a"); + return links + .map(link => link.text()) + .filter(Boolean) +} diff --git a/examples/useragents/firefox.js b/examples/useragents/firefox.js new file mode 100644 index 0000000..20d4d3c --- /dev/null +++ b/examples/useragents/firefox.js @@ -0,0 +1,13 @@ +import { parse } from "flyscrape"; + +export const config = { + url: "https://www.mozilla.org/en-US/firefox/releases/", +}; + +export default function ({ doc, absoluteURL }) { + const links = doc.find(".c-release-list a"); + return links + .map(link => link.text()) + .filter(Boolean) + .filter(version => parseFloat(version) >= 60); +} diff --git a/examples/useragents/opera.js b/examples/useragents/opera.js new file mode 100644 index 0000000..56e192c --- /dev/null +++ b/examples/useragents/opera.js @@ -0,0 +1,15 @@ + +export const config = { + urls: range("https://blogs.opera.com/desktop/changelog-for-{}/", 60, 110), +}; + +export default function ({ doc, absoluteURL }) { + const versions = doc.find(".content h4"); + return versions.map(versions => { + return versions.text().split(" ")[0].trim(); + }).filter(Boolean); +} + +function range(url, from, to) { + return Array.from({length: to - from + 1}).map((_, i) => url.replace("{}", i + from)); +} |