mirror of
https://github.com/mempool/mempool.git
synced 2025-01-18 21:32:55 +01:00
Merge pull request #4653 from mempool/mononaut/healthier-checks
Staggered esplora fallback health checks
This commit is contained in:
commit
49553b7bae
@ -4,6 +4,7 @@ import http from 'http';
|
||||
import { AbstractBitcoinApi } from './bitcoin-api-abstract-factory';
|
||||
import { IEsploraApi } from './esplora-api.interface';
|
||||
import logger from '../../logger';
|
||||
import { Common } from '../common';
|
||||
|
||||
interface FailoverHost {
|
||||
host: string,
|
||||
@ -15,11 +16,13 @@ interface FailoverHost {
|
||||
outOfSync?: boolean,
|
||||
unreachable?: boolean,
|
||||
preferred?: boolean,
|
||||
checked: boolean,
|
||||
}
|
||||
|
||||
class FailoverRouter {
|
||||
activeHost: FailoverHost;
|
||||
fallbackHost: FailoverHost;
|
||||
maxHeight: number = 0;
|
||||
hosts: FailoverHost[];
|
||||
multihost: boolean;
|
||||
pollInterval: number = 60000;
|
||||
@ -34,6 +37,7 @@ class FailoverRouter {
|
||||
this.hosts = (config.ESPLORA.FALLBACK || []).map(domain => {
|
||||
return {
|
||||
host: domain,
|
||||
checked: false,
|
||||
rtts: [],
|
||||
rtt: Infinity,
|
||||
failures: 0,
|
||||
@ -46,6 +50,7 @@ class FailoverRouter {
|
||||
failures: 0,
|
||||
socket: !!config.ESPLORA.UNIX_SOCKET_PATH,
|
||||
preferred: true,
|
||||
checked: false,
|
||||
};
|
||||
this.fallbackHost = this.activeHost;
|
||||
this.hosts.unshift(this.activeHost);
|
||||
@ -74,66 +79,87 @@ class FailoverRouter {
|
||||
clearTimeout(this.pollTimer);
|
||||
}
|
||||
|
||||
const results = await Promise.allSettled(this.hosts.map(async (host) => {
|
||||
if (host.socket) {
|
||||
return this.pollConnection.get<number>('/blocks/tip/height', { socketPath: host.host, timeout: config.ESPLORA.FALLBACK_TIMEOUT });
|
||||
} else {
|
||||
return this.pollConnection.get<number>(host.host + '/blocks/tip/height', { timeout: config.ESPLORA.FALLBACK_TIMEOUT });
|
||||
}
|
||||
}));
|
||||
const maxHeight = results.reduce((max, result) => Math.max(max, result.status === 'fulfilled' ? result.value?.data || 0 : 0), 0);
|
||||
const start = Date.now();
|
||||
|
||||
// update rtts & sync status
|
||||
for (let i = 0; i < results.length; i++) {
|
||||
const host = this.hosts[i];
|
||||
const result = results[i].status === 'fulfilled' ? (results[i] as PromiseFulfilledResult<AxiosResponse<number, any>>).value : null;
|
||||
if (result) {
|
||||
const height = result.data;
|
||||
const rtt = result.config['meta'].rtt;
|
||||
host.rtts.unshift(rtt);
|
||||
host.rtts.slice(0, 5);
|
||||
host.rtt = host.rtts.reduce((acc, l) => acc + l, 0) / host.rtts.length;
|
||||
host.latestHeight = height;
|
||||
if (height == null || isNaN(height) || (maxHeight - height > 2)) {
|
||||
host.outOfSync = true;
|
||||
for (const host of this.hosts) {
|
||||
try {
|
||||
const result = await (host.socket
|
||||
? this.pollConnection.get<number>('/blocks/tip/height', { socketPath: host.host, timeout: config.ESPLORA.FALLBACK_TIMEOUT })
|
||||
: this.pollConnection.get<number>(host.host + '/blocks/tip/height', { timeout: config.ESPLORA.FALLBACK_TIMEOUT })
|
||||
);
|
||||
if (result) {
|
||||
const height = result.data;
|
||||
this.maxHeight = Math.max(height, this.maxHeight);
|
||||
const rtt = result.config['meta'].rtt;
|
||||
host.rtts.unshift(rtt);
|
||||
host.rtts.slice(0, 5);
|
||||
host.rtt = host.rtts.reduce((acc, l) => acc + l, 0) / host.rtts.length;
|
||||
host.latestHeight = height;
|
||||
if (height == null || isNaN(height) || (this.maxHeight - height > 2)) {
|
||||
host.outOfSync = true;
|
||||
} else {
|
||||
host.outOfSync = false;
|
||||
}
|
||||
host.unreachable = false;
|
||||
} else {
|
||||
host.outOfSync = false;
|
||||
host.outOfSync = true;
|
||||
host.unreachable = true;
|
||||
host.rtts = [];
|
||||
host.rtt = Infinity;
|
||||
}
|
||||
host.unreachable = false;
|
||||
} else {
|
||||
} catch (e) {
|
||||
host.outOfSync = true;
|
||||
host.unreachable = true;
|
||||
host.rtts = [];
|
||||
host.rtt = Infinity;
|
||||
}
|
||||
host.checked = true;
|
||||
|
||||
|
||||
// switch if the current host is out of sync or significantly slower than the next best alternative
|
||||
const rankOrder = this.sortHosts();
|
||||
// switch if the current host is out of sync or significantly slower than the next best alternative
|
||||
if (this.activeHost.outOfSync || this.activeHost.unreachable || (this.activeHost !== rankOrder[0] && rankOrder[0].preferred) || (!this.activeHost.preferred && this.activeHost.rtt > (rankOrder[0].rtt * 2) + 50)) {
|
||||
if (this.activeHost.unreachable) {
|
||||
logger.warn(`π¨π¨π¨ Unable to reach ${this.activeHost.host}, failing over to next best alternative π¨π¨π¨`);
|
||||
} else if (this.activeHost.outOfSync) {
|
||||
logger.warn(`π¨π¨π¨ ${this.activeHost.host} has fallen behind, failing over to next best alternative π¨π¨π¨`);
|
||||
} else {
|
||||
logger.debug(`π οΈ ${this.activeHost.host} is no longer the best esplora host π οΈ`);
|
||||
}
|
||||
this.electHost();
|
||||
}
|
||||
await Common.sleep$(50);
|
||||
}
|
||||
|
||||
this.sortHosts();
|
||||
const rankOrder = this.updateFallback();
|
||||
logger.debug(`Tomahawk ranking:\n${rankOrder.map((host, index) => this.formatRanking(index, host, this.activeHost, this.maxHeight)).join('\n')}`);
|
||||
|
||||
logger.debug(`Tomahawk ranking:\n${this.hosts.map((host, index) => this.formatRanking(index, host, this.activeHost, maxHeight)).join('\n')}`);
|
||||
const elapsed = Date.now() - start;
|
||||
|
||||
// switch if the current host is out of sync or significantly slower than the next best alternative
|
||||
if (this.activeHost.outOfSync || this.activeHost.unreachable || (this.activeHost !== this.hosts[0] && this.hosts[0].preferred) || (!this.activeHost.preferred && this.activeHost.rtt > (this.hosts[0].rtt * 2) + 50)) {
|
||||
if (this.activeHost.unreachable) {
|
||||
logger.warn(`π¨π¨π¨ Unable to reach ${this.activeHost.host}, failing over to next best alternative π¨π¨π¨`);
|
||||
} else if (this.activeHost.outOfSync) {
|
||||
logger.warn(`π¨π¨π¨ ${this.activeHost.host} has fallen behind, failing over to next best alternative π¨π¨π¨`);
|
||||
} else {
|
||||
logger.debug(`π οΈ ${this.activeHost.host} is no longer the best esplora host π οΈ`);
|
||||
}
|
||||
this.electHost();
|
||||
}
|
||||
|
||||
this.pollTimer = setTimeout(() => { this.pollHosts(); }, this.pollInterval);
|
||||
this.pollTimer = setTimeout(() => { this.pollHosts(); }, Math.max(1, this.pollInterval - elapsed));
|
||||
}
|
||||
|
||||
private formatRanking(index: number, host: FailoverHost, active: FailoverHost, maxHeight: number): string {
|
||||
const heightStatus = host.outOfSync ? 'π«' : (host.latestHeight && host.latestHeight < maxHeight ? 'π§' : 'β
');
|
||||
return `${host === active ? 'βοΈ' : ' '} ${host.rtt < Infinity ? Math.round(host.rtt).toString().padStart(5, ' ') + 'ms' : ' - '} ${host.unreachable ? 'π₯' : 'β
'} | block: ${host.latestHeight || '??????'} ${heightStatus} | ${host.host} ${host === active ? 'βοΈ' : ' '}`;
|
||||
const heightStatus = !host.checked ? 'β³' : (host.outOfSync ? 'π«' : (host.latestHeight && host.latestHeight < maxHeight ? 'π§' : 'β
'));
|
||||
return `${host === active ? 'βοΈ' : ' '} ${host.rtt < Infinity ? Math.round(host.rtt).toString().padStart(5, ' ') + 'ms' : ' - '} ${!host.checked ? 'β³' : (host.unreachable ? 'π₯' : 'β
')} | block: ${host.latestHeight || '??????'} ${heightStatus} | ${host.host} ${host === active ? 'βοΈ' : ' '}`;
|
||||
}
|
||||
|
||||
private updateFallback(): FailoverHost[] {
|
||||
const rankOrder = this.sortHosts();
|
||||
if (rankOrder.length > 1 && rankOrder[0] === this.activeHost) {
|
||||
this.fallbackHost = rankOrder[1];
|
||||
} else {
|
||||
this.fallbackHost = rankOrder[0];
|
||||
}
|
||||
return rankOrder;
|
||||
}
|
||||
|
||||
// sort hosts by connection quality, and update default fallback
|
||||
private sortHosts(): void {
|
||||
private sortHosts(): FailoverHost[] {
|
||||
// sort by connection quality
|
||||
this.hosts.sort((a, b) => {
|
||||
return this.hosts.slice().sort((a, b) => {
|
||||
if ((a.unreachable || a.outOfSync) === (b.unreachable || b.outOfSync)) {
|
||||
if (a.preferred === b.preferred) {
|
||||
// lower rtt is best
|
||||
@ -145,19 +171,14 @@ class FailoverRouter {
|
||||
return (a.unreachable || a.outOfSync) ? 1 : -1;
|
||||
}
|
||||
});
|
||||
if (this.hosts.length > 1 && this.hosts[0] === this.activeHost) {
|
||||
this.fallbackHost = this.hosts[1];
|
||||
} else {
|
||||
this.fallbackHost = this.hosts[0];
|
||||
}
|
||||
}
|
||||
|
||||
// depose the active host and choose the next best replacement
|
||||
private electHost(): void {
|
||||
this.activeHost.outOfSync = true;
|
||||
this.activeHost.failures = 0;
|
||||
this.sortHosts();
|
||||
this.activeHost = this.hosts[0];
|
||||
const rankOrder = this.sortHosts();
|
||||
this.activeHost = rankOrder[0];
|
||||
logger.warn(`Switching esplora host to ${this.activeHost.host}`);
|
||||
}
|
||||
|
||||
|
Loadingβ¦
Reference in New Issue
Block a user