Unbork unfurler concurrency implementation

This commit is contained in:
Mononaut 2022-08-12 16:41:55 +00:00
parent 0631f357b6
commit 1a903d3efb
No known key found for this signature in database
GPG key ID: 61B952CAF4838F94
2 changed files with 64 additions and 33 deletions

View file

@ -1,5 +1,5 @@
import * as puppeteer from 'puppeteer'; import * as puppeteer from 'puppeteer';
import ConcurrencyImplementation, { ResourceData } from 'puppeteer-cluster/dist/concurrency/ConcurrencyImplementation'; import ConcurrencyImplementation from 'puppeteer-cluster/dist/concurrency/ConcurrencyImplementation';
import { timeoutExecute } from 'puppeteer-cluster/dist/util'; import { timeoutExecute } from 'puppeteer-cluster/dist/util';
import config from '../config'; import config from '../config';
@ -8,17 +8,24 @@ const mempoolHost = config.MEMPOOL.HTTP_HOST + (config.MEMPOOL.HTTP_PORT ? ':' +
const BROWSER_TIMEOUT = 8000; const BROWSER_TIMEOUT = 8000;
// maximum lifetime of a single page session // maximum lifetime of a single page session
const maxAgeMs = (config.PUPPETEER.MAX_PAGE_AGE || (24 * 60 * 60)) * 1000; const maxAgeMs = (config.PUPPETEER.MAX_PAGE_AGE || (24 * 60 * 60)) * 1000;
const maxConcurrency = config.PUPPETEER.CLUSTER_SIZE;
interface repairablePage extends puppeteer.Page { interface RepairablePage extends puppeteer.Page {
repairRequested?: boolean; repairRequested?: boolean;
language?: string | null; language?: string | null;
createdAt?: number;
free?: boolean;
index?: number;
}
interface ResourceData {
page: RepairablePage;
} }
export default class ReusablePage extends ConcurrencyImplementation { export default class ReusablePage extends ConcurrencyImplementation {
protected browser: puppeteer.Browser | null = null; protected browser: puppeteer.Browser | null = null;
protected currentPage: repairablePage | null = null; protected pages: RepairablePage[] = [];
protected pageCreatedAt: number = 0;
private repairing: boolean = false; private repairing: boolean = false;
private repairRequested: boolean = false; private repairRequested: boolean = false;
private openInstances: number = 0; private openInstances: number = 0;
@ -46,40 +53,70 @@ export default class ReusablePage extends ConcurrencyImplementation {
} }
try { try {
this.browser = await this.puppeteer.launch(this.options) as puppeteer.Browser; await this.init();
} catch (err) { } catch (err) {
throw new Error('Unable to restart chrome.'); throw new Error('Unable to restart chrome.');
} }
this.currentPage = null;
this.repairRequested = false; this.repairRequested = false;
this.repairing = false; this.repairing = false;
this.waitingForRepairResolvers.forEach(resolve => resolve()); this.waitingForRepairResolvers.forEach(resolve => resolve());
this.waitingForRepairResolvers = []; this.waitingForRepairResolvers = [];
await this.createResources();
} }
public async init() { public async init() {
this.browser = await this.puppeteer.launch(this.options); this.browser = await this.puppeteer.launch(this.options);
const promises = []
for (let i = 0; i < maxConcurrency; i++) {
const newPage = await this.initPage();
newPage.index = this.pages.length;
console.log('initialized page ', newPage.index);
this.pages.push(newPage);
}
} }
public async close() { public async close() {
await (this.browser as puppeteer.Browser).close(); await (this.browser as puppeteer.Browser).close();
} }
protected async initPage(): Promise<RepairablePage> {
const page = await (this.browser as puppeteer.Browser).newPage() as RepairablePage;
page.language = null;
page.createdAt = Date.now();
const defaultUrl = mempoolHost + '/preview/block/1';
page.on('pageerror', (err) => {
page.repairRequested = true;
});
await page.goto(defaultUrl, { waitUntil: "load" });
page.free = true;
return page
}
protected async createResources(): Promise<ResourceData> { protected async createResources(): Promise<ResourceData> {
if (!this.currentPage) { const page = this.pages.find(p => p.free);
this.currentPage = await (this.browser as puppeteer.Browser).newPage(); if (!page) {
this.currentPage.language = null; console.log('no free pages!')
this.pageCreatedAt = Date.now(); throw new Error('no pages available');
const defaultUrl = mempoolHost + '/preview/block/1'; } else {
this.currentPage.on('pageerror', (err) => { page.free = false;
this.repairRequested = true; return { page };
});
await this.currentPage.goto(defaultUrl, { waitUntil: "load" });
} }
return { }
page: this.currentPage
protected async repairPage(page) {
// create a new page
const newPage = await this.initPage();
newPage.free = true;
// replace the old page
newPage.index = page.index;
this.pages.splice(page.index, 1, newPage);
// clean up the old page
try {
await page.goto('about:blank', {timeout: 200}); // prevents memory leak (maybe?)
} catch (e) {
console.log('unexpected page repair error');
} }
await page.close();
return newPage;
} }
public async workerInstance() { public async workerInstance() {
@ -97,8 +134,15 @@ export default class ReusablePage extends ConcurrencyImplementation {
close: async () => { close: async () => {
this.openInstances -= 1; // decrement first in case of error this.openInstances -= 1; // decrement first in case of error
if (resources?.page != null) {
if (resources.page.repairRequested || (Date.now() - (resources.page.createdAt || 0) > maxAgeMs)) {
resources.page = await this.repairPage(resources.page);
} else {
resources.page.free = true;
}
}
if (this.repairRequested || this.currentPage?.repairRequested || (Date.now() - this.pageCreatedAt > maxAgeMs)) { if (this.repairRequested) {
await this.repair(); await this.repair();
} }
}, },
@ -108,9 +152,7 @@ export default class ReusablePage extends ConcurrencyImplementation {
close: async () => {}, close: async () => {},
repair: async () => { repair: async () => {
console.log('Repair requested'); await this.repairPage(resources.page);
this.repairRequested = true;
await this.repair();
}, },
}; };
} }

View file

@ -51,8 +51,6 @@ class Server {
this.server.listen(config.SERVER.HTTP_PORT, () => { this.server.listen(config.SERVER.HTTP_PORT, () => {
console.log(`Mempool Unfurl Server is running on port ${config.SERVER.HTTP_PORT}`); console.log(`Mempool Unfurl Server is running on port ${config.SERVER.HTTP_PORT}`);
}); });
this.initClusterPages();
} }
async stopServer() { async stopServer() {
@ -70,16 +68,7 @@ class Server {
this.app.get('*', (req, res) => { return this.renderHTML(req, res) }) this.app.get('*', (req, res) => { return this.renderHTML(req, res) })
} }
async initClusterPages() {
for (let i = 0; i < config.PUPPETEER.CLUSTER_SIZE; i++) {
this.cluster?.execute({ action: 'init' });
}
}
async clusterTask({ page, data: { url, path, action } }) { async clusterTask({ page, data: { url, path, action } }) {
if (action === 'init') {
return;
}
try { try {
const urlParts = parseLanguageUrl(path); const urlParts = parseLanguageUrl(path);
if (page.language !== urlParts.lang) { if (page.language !== urlParts.lang) {