summaryrefslogtreecommitdiff
path: root/examples/useragents
diff options
context:
space:
mode:
Diffstat (limited to 'examples/useragents')
-rw-r--r--examples/useragents/chrome.js50
-rw-r--r--examples/useragents/edge.js12
-rw-r--r--examples/useragents/firefox.js13
-rw-r--r--examples/useragents/opera.js15
4 files changed, 90 insertions, 0 deletions
diff --git a/examples/useragents/chrome.js b/examples/useragents/chrome.js
new file mode 100644
index 0000000..c3a6fcd
--- /dev/null
+++ b/examples/useragents/chrome.js
@@ -0,0 +1,50 @@
+import { parse } from "flyscrape";
+
+export const config = {
+ url: "https://chromereleases.googleblog.com/search/label/Stable%20updates",
+ follow: [".blog-pager-older-link"],
+ depth: 30,
+ cache: "file",
+};
+
+export default function ({ doc, absoluteURL }) {
+ const posts = doc.find(".post");
+ return posts.map(post => {
+ const title = post.find("h2").text().trim();
+ const body = parse(post.find(".post-content").text()).find("p:nth-child(1)").text().trim();
+
+ const regexes = [
+ /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (Mac)/,
+ /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (Windows)/,
+ /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (Linux)/,
+ /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (iOS)/,
+ /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (Android)/,
+ /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (ChromeOS)/,
+ /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (Mac,Linux)/,
+ /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)? for (Mac and Linux)/,
+ /(\d+\.\d+\.\d+\.\d+(\/\.\d+)?)\)?\s\(Platform version:\s[\d\.]+\)\sfor\smost\s(ChromeOS)/,
+ ];
+
+ const versions = new Set();
+ for (const regex of regexes) {
+ const matches = body.match(regex);
+ if (!matches) {
+ continue;
+ }
+
+ let versionStr = matches[1];
+
+ let vv = versionStr.split("/");
+ if (vv.length == 2) {
+ vv[1] = vv[0].substring(0, vv[0].lastIndexOf(".")) + vv[1];
+ }
+
+ for (const version of vv) {
+ versions.add(version)
+ }
+ }
+
+
+ return versions
+ }).filter(Boolean).flat();
+}
diff --git a/examples/useragents/edge.js b/examples/useragents/edge.js
new file mode 100644
index 0000000..1e76131
--- /dev/null
+++ b/examples/useragents/edge.js
@@ -0,0 +1,12 @@
+import { parse } from "flyscrape";
+
+export const config = {
+ url: "https://learn.microsoft.com/en-us/deployedge/microsoft-edge-release-schedule",
+};
+
+export default function ({ doc, absoluteURL }) {
+ const links = doc.find("table a");
+ return links
+ .map(link => link.text())
+ .filter(Boolean)
+}
diff --git a/examples/useragents/firefox.js b/examples/useragents/firefox.js
new file mode 100644
index 0000000..20d4d3c
--- /dev/null
+++ b/examples/useragents/firefox.js
@@ -0,0 +1,13 @@
+import { parse } from "flyscrape";
+
+export const config = {
+ url: "https://www.mozilla.org/en-US/firefox/releases/",
+};
+
+export default function ({ doc, absoluteURL }) {
+ const links = doc.find(".c-release-list a");
+ return links
+ .map(link => link.text())
+ .filter(Boolean)
+ .filter(version => parseFloat(version) >= 60);
+}
diff --git a/examples/useragents/opera.js b/examples/useragents/opera.js
new file mode 100644
index 0000000..56e192c
--- /dev/null
+++ b/examples/useragents/opera.js
@@ -0,0 +1,15 @@
+
+export const config = {
+ urls: range("https://blogs.opera.com/desktop/changelog-for-{}/", 60, 110),
+};
+
+export default function ({ doc, absoluteURL }) {
+ const versions = doc.find(".content h4");
+ return versions.map(versions => {
+ return versions.text().split(" ")[0].trim();
+ }).filter(Boolean);
+}
+
+function range(url, from, to) {
+ return Array.from({length: to - from + 1}).map((_, i) => url.replace("{}", i + from));
+}