Merge pull request #4653 from mempool/mononaut/healthier-checks

Staggered esplora fallback health checks
This commit is contained in:
wiz 2024-02-08 07:23:39 -05:00 committed by GitHub
commit 49553b7bae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -4,6 +4,7 @@ import http from 'http';
import { AbstractBitcoinApi } from './bitcoin-api-abstract-factory';
import { IEsploraApi } from './esplora-api.interface';
import logger from '../../logger';
import { Common } from '../common';
interface FailoverHost {
host: string,
@ -15,11 +16,13 @@ interface FailoverHost {
outOfSync?: boolean,
unreachable?: boolean,
preferred?: boolean,
checked: boolean,
}
class FailoverRouter {
activeHost: FailoverHost;
fallbackHost: FailoverHost;
maxHeight: number = 0;
hosts: FailoverHost[];
multihost: boolean;
pollInterval: number = 60000;
@ -34,6 +37,7 @@ class FailoverRouter {
this.hosts = (config.ESPLORA.FALLBACK || []).map(domain => {
return {
host: domain,
checked: false,
rtts: [],
rtt: Infinity,
failures: 0,
@ -46,6 +50,7 @@ class FailoverRouter {
failures: 0,
socket: !!config.ESPLORA.UNIX_SOCKET_PATH,
preferred: true,
checked: false,
};
this.fallbackHost = this.activeHost;
this.hosts.unshift(this.activeHost);
@ -74,66 +79,87 @@ class FailoverRouter {
clearTimeout(this.pollTimer);
}
const results = await Promise.allSettled(this.hosts.map(async (host) => {
if (host.socket) {
return this.pollConnection.get<number>('/blocks/tip/height', { socketPath: host.host, timeout: config.ESPLORA.FALLBACK_TIMEOUT });
} else {
return this.pollConnection.get<number>(host.host + '/blocks/tip/height', { timeout: config.ESPLORA.FALLBACK_TIMEOUT });
}
}));
const maxHeight = results.reduce((max, result) => Math.max(max, result.status === 'fulfilled' ? result.value?.data || 0 : 0), 0);
const start = Date.now();
// update rtts & sync status
for (let i = 0; i < results.length; i++) {
const host = this.hosts[i];
const result = results[i].status === 'fulfilled' ? (results[i] as PromiseFulfilledResult<AxiosResponse<number, any>>).value : null;
if (result) {
const height = result.data;
const rtt = result.config['meta'].rtt;
host.rtts.unshift(rtt);
host.rtts.slice(0, 5);
host.rtt = host.rtts.reduce((acc, l) => acc + l, 0) / host.rtts.length;
host.latestHeight = height;
if (height == null || isNaN(height) || (maxHeight - height > 2)) {
host.outOfSync = true;
for (const host of this.hosts) {
try {
const result = await (host.socket
? this.pollConnection.get<number>('/blocks/tip/height', { socketPath: host.host, timeout: config.ESPLORA.FALLBACK_TIMEOUT })
: this.pollConnection.get<number>(host.host + '/blocks/tip/height', { timeout: config.ESPLORA.FALLBACK_TIMEOUT })
);
if (result) {
const height = result.data;
this.maxHeight = Math.max(height, this.maxHeight);
const rtt = result.config['meta'].rtt;
host.rtts.unshift(rtt);
host.rtts.slice(0, 5);
host.rtt = host.rtts.reduce((acc, l) => acc + l, 0) / host.rtts.length;
host.latestHeight = height;
if (height == null || isNaN(height) || (this.maxHeight - height > 2)) {
host.outOfSync = true;
} else {
host.outOfSync = false;
}
host.unreachable = false;
} else {
host.outOfSync = false;
host.outOfSync = true;
host.unreachable = true;
host.rtts = [];
host.rtt = Infinity;
}
host.unreachable = false;
} else {
} catch (e) {
host.outOfSync = true;
host.unreachable = true;
host.rtts = [];
host.rtt = Infinity;
}
host.checked = true;
// switch if the current host is out of sync or significantly slower than the next best alternative
const rankOrder = this.sortHosts();
// switch if the current host is out of sync or significantly slower than the next best alternative
if (this.activeHost.outOfSync || this.activeHost.unreachable || (this.activeHost !== rankOrder[0] && rankOrder[0].preferred) || (!this.activeHost.preferred && this.activeHost.rtt > (rankOrder[0].rtt * 2) + 50)) {
if (this.activeHost.unreachable) {
logger.warn(`🚨🚨🚨 Unable to reach ${this.activeHost.host}, failing over to next best alternative 🚨🚨🚨`);
} else if (this.activeHost.outOfSync) {
logger.warn(`🚨🚨🚨 ${this.activeHost.host} has fallen behind, failing over to next best alternative 🚨🚨🚨`);
} else {
logger.debug(`πŸ› οΈ ${this.activeHost.host} is no longer the best esplora host πŸ› οΈ`);
}
this.electHost();
}
await Common.sleep$(50);
}
this.sortHosts();
const rankOrder = this.updateFallback();
logger.debug(`Tomahawk ranking:\n${rankOrder.map((host, index) => this.formatRanking(index, host, this.activeHost, this.maxHeight)).join('\n')}`);
logger.debug(`Tomahawk ranking:\n${this.hosts.map((host, index) => this.formatRanking(index, host, this.activeHost, maxHeight)).join('\n')}`);
const elapsed = Date.now() - start;
// switch if the current host is out of sync or significantly slower than the next best alternative
if (this.activeHost.outOfSync || this.activeHost.unreachable || (this.activeHost !== this.hosts[0] && this.hosts[0].preferred) || (!this.activeHost.preferred && this.activeHost.rtt > (this.hosts[0].rtt * 2) + 50)) {
if (this.activeHost.unreachable) {
logger.warn(`🚨🚨🚨 Unable to reach ${this.activeHost.host}, failing over to next best alternative 🚨🚨🚨`);
} else if (this.activeHost.outOfSync) {
logger.warn(`🚨🚨🚨 ${this.activeHost.host} has fallen behind, failing over to next best alternative 🚨🚨🚨`);
} else {
logger.debug(`πŸ› οΈ ${this.activeHost.host} is no longer the best esplora host πŸ› οΈ`);
}
this.electHost();
}
this.pollTimer = setTimeout(() => { this.pollHosts(); }, this.pollInterval);
this.pollTimer = setTimeout(() => { this.pollHosts(); }, Math.max(1, this.pollInterval - elapsed));
}
private formatRanking(index: number, host: FailoverHost, active: FailoverHost, maxHeight: number): string {
const heightStatus = host.outOfSync ? '🚫' : (host.latestHeight && host.latestHeight < maxHeight ? '🟧' : 'βœ…');
return `${host === active ? '⭐️' : ' '} ${host.rtt < Infinity ? Math.round(host.rtt).toString().padStart(5, ' ') + 'ms' : ' - '} ${host.unreachable ? 'πŸ”₯' : 'βœ…'} | block: ${host.latestHeight || '??????'} ${heightStatus} | ${host.host} ${host === active ? '⭐️' : ' '}`;
const heightStatus = !host.checked ? '⏳' : (host.outOfSync ? '🚫' : (host.latestHeight && host.latestHeight < maxHeight ? '🟧' : 'βœ…'));
return `${host === active ? '⭐️' : ' '} ${host.rtt < Infinity ? Math.round(host.rtt).toString().padStart(5, ' ') + 'ms' : ' - '} ${!host.checked ? '⏳' : (host.unreachable ? 'πŸ”₯' : 'βœ…')} | block: ${host.latestHeight || '??????'} ${heightStatus} | ${host.host} ${host === active ? '⭐️' : ' '}`;
}
private updateFallback(): FailoverHost[] {
const rankOrder = this.sortHosts();
if (rankOrder.length > 1 && rankOrder[0] === this.activeHost) {
this.fallbackHost = rankOrder[1];
} else {
this.fallbackHost = rankOrder[0];
}
return rankOrder;
}
// sort hosts by connection quality, and update default fallback
private sortHosts(): void {
private sortHosts(): FailoverHost[] {
// sort by connection quality
this.hosts.sort((a, b) => {
return this.hosts.slice().sort((a, b) => {
if ((a.unreachable || a.outOfSync) === (b.unreachable || b.outOfSync)) {
if (a.preferred === b.preferred) {
// lower rtt is best
@ -145,19 +171,14 @@ class FailoverRouter {
return (a.unreachable || a.outOfSync) ? 1 : -1;
}
});
if (this.hosts.length > 1 && this.hosts[0] === this.activeHost) {
this.fallbackHost = this.hosts[1];
} else {
this.fallbackHost = this.hosts[0];
}
}
// depose the active host and choose the next best replacement
private electHost(): void {
this.activeHost.outOfSync = true;
this.activeHost.failures = 0;
this.sortHosts();
this.activeHost = this.hosts[0];
const rankOrder = this.sortHosts();
this.activeHost = rankOrder[0];
logger.warn(`Switching esplora host to ${this.activeHost.host}`);
}