Skip to content

Commit

Permalink
make some attempts in case of problems with access in the usage script
Browse files Browse the repository at this point in the history
  • Loading branch information
zloirock committed Aug 18, 2021
1 parent 9cd8119 commit 81acbc8
Showing 1 changed file with 33 additions and 42 deletions.
75 changes: 33 additions & 42 deletions scripts/usage.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -2,68 +2,58 @@
import puppeteer from 'puppeteer';
import pTimeout from 'p-timeout';

async function getTopK(i) {
const res = await fetch(`https://stuffgate.com/stuff/website/top-${ i }000-sites`);
const html = await res.text();
const [, table] = html.match(/Analyze<\/th><\/tr><\/thead><tbody><tr>(.+)<\/tr><\/tbody><\/table><\/div>/s);
return Array.from(table.matchAll(/<a href=["']([^"']+)["'] target='_blank'>/g), ([, href]) => href);
}

const { cyan, green, gray, red } = chalk;
const { TimeoutError } = puppeteer.errors;
const timeout = new TimeoutError();
const sites = [];
let limit = argv.l || 100;
let index = 0;
const timeout = new puppeteer.errors.TimeoutError();
const msie = 'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko';
const limit = argv.l || 100;
const attempts = new Map();
let sites = [];
let tested = 0;
let withCoreJS = 0;

// parse Alexa rank
for (let i = 1, top = Math.ceil(limit / 1e3); i <= top; i++) {
sites.push(...await getTopK(i));
const response = await fetch(`https://stuffgate.com/stuff/website/top-${ i }000-sites`);
const html = await response.text();
const [, table] = html.match(/Analyze<\/th><\/tr><\/thead><tbody><tr>(.+)<\/tr><\/tbody><\/table><\/div>/s);
sites.push(...Array.from(table.matchAll(/<a href=["']([^"']+)["'] target='_blank'>/g), match => match[1]));
}

limit = Math.min(limit, sites.length);
sites = sites.slice(0, Math.min(limit, sites.length)).reverse();

await Promise.all(Array(os.cpus().length).fill(0).map(async (i, n) => {
// run in parallel
await Promise.all(Array(os.cpus().length).fill(0).map(async i => {
let browser, page, site, name;
await sleep(n * 3e3);

async function evaluate() {
// seems js on some sites hangs, so added a time limit
async function check(ua) {
await page.setUserAgent(ua);
await page.goto(site);
// seems js hangs on some sites, so added a time limit
const { core, vm, vl } = await pTimeout(page.evaluate(`({
core: !!window['__core-js_shared__'] || !!window.core,
vm: window['__core-js_shared__']?.versions,
vl: window.core?.version,
})`), 1e4, timeout);
const versions = vm ? vm.map(({ version, mode }) => `${ version } (${ mode } mode)`) : vl ? [vl] : [];

return { core, versions };
}

async function check(ua) {
await page.setUserAgent(ua);
await page.goto(site);
const result = await evaluate();
if (result.core) return result;
await page.waitForTimeout(1e4);
return evaluate();
}

while (index < limit) try {
// restart it each some pages for prevent possible `puppeteer` crash and memory leaks
if (!(i++ % 32)) {
while (sites.length) try {
site = sites.pop();
name = site.replace(/^https?:\/\//, '');
// restart browser each some pages for prevent possible `puppeteer` crash and memory leaks
if (!(i++ % 32) || !browser) {
if (browser) await browser.close();
browser = await puppeteer.launch();
}
page = await browser.newPage();
page.setDefaultNavigationTimeout(12e4);
site = sites[index++];
name = site.replace(/^https?:\/\//, '');

let { core, versions } = await check((await browser.userAgent()).replaceAll('Headless', ''));
if (!core) ({ // double check, try with another UA
core, versions,
} = await check('Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko'));
// double check, try with another UA
if (!core) ({ core, versions } = await check(msie));

await page.close();

tested++;
if (core) withCoreJS++;
Expand All @@ -73,13 +63,14 @@ await Promise.all(Array(os.cpus().length).fill(0).map(async (i, n) => {
? `${ cyan(versions.length) } versions: ${ cyan(versions.join(', ')) }`
: `version ${ cyan(versions[0]) }` }`)
: gray('`core-js` is not detected') }`);

await page.close();
} catch (error) {
if (error instanceof TimeoutError || (error instanceof Error && error.message.startsWith('net::'))) {
console.log(`${ cyan(`${ name }:`) } ${ red('problems with access') }`);
}
} catch {
const attempting = (attempts.get(site) | 0) + 1;
attempts.set(site, attempting);
if (attempting < 4) sites.push(site);
else console.log(red(`${ cyan(`${ name }:`) } problems with access`));
await sleep(3e3);
}

return browser.close();
}));

Expand Down

0 comments on commit 81acbc8

Please sign in to comment.