Skip to content

Commit

Permalink
Dev environment runs on M1 mac
Browse files Browse the repository at this point in the history
Pupeteer does not play well with M1 and ARM in general.
While they [just released](puppeteer/puppeteer#7546) experimental
support for ARM macs, they still don't support [ARM machines for Debian/Docker](puppeteer/puppeteer#7546 (comment)).

This PR switches from puppeteer to [zombie](https://github.com/assaf/zombie). Ideally
I would like to move back, as Zombie does not appear to be actively maintained.
  • Loading branch information
olaven committed May 12, 2022
1 parent 7a5824b commit ea01032
Show file tree
Hide file tree
Showing 7 changed files with 299 additions and 101 deletions.
6 changes: 2 additions & 4 deletions docker-compose.yml
Expand Up @@ -19,9 +19,7 @@ services:
- LOG_LEVEL=$LOG_LEVEL
working_dir: /paperpod
api:
build:
context: .
dockerfile: ./packages/api/Dockerfile
build: .
env_file:
- .env
ports:
Expand Down Expand Up @@ -78,7 +76,7 @@ services:
- .env
ports:
- $WEB_PORT:$WEB_PORT
command: yarn web dev
command: yarn web dev --port $WEB_PORT
volumes:
- .:/paperpod
environment:
Expand Down
5 changes: 0 additions & 5 deletions packages/api/Dockerfile

This file was deleted.

1 change: 0 additions & 1 deletion packages/authentication/src/app.ts
Expand Up @@ -5,7 +5,6 @@ import { subscriptionManagementRoutes } from "./routes/public/subscription-manag
import { userRoutes } from "./routes/public/user-routes";

export const publicAuthenticationApp = server.app

.appWithEnvironment(
server.app.appWithBodyParser(server.app.appWithCookieParser())
)
Expand Down
4 changes: 2 additions & 2 deletions packages/converter/package.json
Expand Up @@ -14,9 +14,9 @@
"@paperpod/common": "0.0.1",
"node-kall": "^1.0.83",
"pdf-parse": "^1.1.1",
"puppeteer": "^5.4.1",
"serialize-xml": "^0.4.0",
"unfluff": "^3.2.0"
"unfluff": "^3.2.0",
"zombie": "^6.1.4"
},
"devDependencies": {
"@types/faker": "^5.5.3",
Expand Down
56 changes: 37 additions & 19 deletions packages/converter/src/text/web.ts
@@ -1,6 +1,7 @@
import unfluff from "unfluff";
import puppeteer from "puppeteer";
import { models } from "@paperpod/common";
import { logger } from "@paperpod/common";
import zombie from "zombie";

/**
* returns the publication timestamp, if any.
Expand All @@ -22,30 +23,47 @@ export const extractTextFromWeb = async (
author: extracted.author().join(", "),
description: extracted.description(),
publication_time: date(extracted),
added_time: new Date()
added_time: new Date(),
};
};

/**
* Using Puppeteer (or a browser-emulator in general) makes it possible for
* me to access text that is not directly provided, but client side rendered.
* Using Zombie (or a browser-emulator in general) makes it possible
* to access text that is not directly provided, but client side rendered.
*/
const getHtml = async (url: string) => {
const browser = await puppeteer.launch({
executablePath: process.env.PUPPETEER_EXEC_PATH,
headless: true,
//FIXME: security considerations without sandbox? Read up on this.
args: ["--disable-setuid-sandbox", "--no-sandbox"],
ignoreHTTPSErrors: true,
});
const getHtml = (url: string): Promise<string> =>
new Promise((resolve, _reject) => {
const browser = new zombie({
debug: true,
waitFor: 15000,
});

const page = await browser.newPage();
await page.goto(url);
browser.visit(url, function () {
const html = browser.html();
logger.debug({ message: `Got HTML from zombie`, html, url });
resolve(html);
});

await page.waitFor("*");
/*
NOTE: replaced puppeteer in favour of zombie because puppeteer
does not run on arm systems in Docker..
[They're working on it though](https://github.com/garris/BackstopJS/issues/1300#issuecomment-1096969710)
const html = await page.content();
await browser.close();
TODO: Consider switching back to puppeteer; Zombie is not actively maintained.
return html;
};
const browser = await puppeteer.launch({
executablePath: process.env.PUPPETEER_EXEC_PATH,
headless: true,
//FIXME: security considerations without sandbox? Read up on this.
args: ["--disable-setuid-sandbox", "--no-sandbox"],
ignoreHTTPSErrors: true,
});
const page = await browser.newPage();
await page.goto(url);
await page.waitFor("*");
const html = await page.content();
await browser.close();
*/
});
1 change: 0 additions & 1 deletion packages/extension/src/background/fetch_token.ts
Expand Up @@ -25,7 +25,6 @@ const run = async () => {
}
};

logger.debug("AM her at all");
run().then(() => {
logger.debug(`Running token background script`);
});

0 comments on commit ea01032

Please sign in to comment.