2017-06-15 23:08:48 +02:00
|
|
|
#!/usr/bin/env python3
|
2022-12-25 00:49:50 +01:00
|
|
|
# Copyright (c) 2017-2022 The Bitcoin Core developers
|
2017-06-15 23:08:48 +02:00
|
|
|
# Distributed under the MIT software license, see the accompanying
|
|
|
|
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
2017-06-29 15:52:13 +02:00
|
|
|
"""Test recovery from a crash during chainstate writing.
|
2017-06-15 23:08:48 +02:00
|
|
|
|
|
|
|
- 4 nodes
|
|
|
|
* node0, node1, and node2 will have different dbcrash ratios, and different
|
|
|
|
dbcache sizes
|
|
|
|
* node3 will be a regular node, with no crashing.
|
|
|
|
* The nodes will not connect to each other.
|
|
|
|
|
|
|
|
- use default test framework starting chain. initialize starting_tip_height to
|
|
|
|
tip height.
|
|
|
|
|
|
|
|
- Main loop:
|
|
|
|
* generate lots of transactions on node3, enough to fill up a block.
|
|
|
|
* uniformly randomly pick a tip height from starting_tip_height to
|
|
|
|
tip_height; with probability 1/(height_difference+4), invalidate this block.
|
|
|
|
* mine enough blocks to overtake tip_height at start of loop.
|
|
|
|
* for each node in [node0,node1,node2]:
|
|
|
|
- for each mined block:
|
|
|
|
* submit block to node
|
|
|
|
* if node crashed on/after submitting:
|
|
|
|
- restart until recovery succeeds
|
2017-06-29 15:52:13 +02:00
|
|
|
- check that utxo matches node3 using gettxoutsetinfo"""
|
2017-06-15 23:08:48 +02:00
|
|
|
|
2017-06-29 15:52:13 +02:00
|
|
|
import errno
|
|
|
|
import http.client
|
|
|
|
import random
|
|
|
|
import time
|
|
|
|
|
2022-05-09 00:38:17 +02:00
|
|
|
from test_framework.blocktools import COINBASE_MATURITY
|
2019-07-30 17:19:44 +02:00
|
|
|
from test_framework.messages import (
|
|
|
|
COIN,
|
|
|
|
)
|
2017-06-29 15:52:13 +02:00
|
|
|
from test_framework.test_framework import BitcoinTestFramework
|
2019-07-30 17:19:44 +02:00
|
|
|
from test_framework.util import (
|
|
|
|
assert_equal,
|
2022-05-09 00:38:17 +02:00
|
|
|
)
|
|
|
|
from test_framework.wallet import (
|
|
|
|
MiniWallet,
|
|
|
|
getnewdestination,
|
2019-07-30 17:19:44 +02:00
|
|
|
)
|
2017-06-29 15:52:13 +02:00
|
|
|
|
2017-06-15 23:08:48 +02:00
|
|
|
|
2017-06-29 15:52:13 +02:00
|
|
|
class ChainstateWriteCrashTest(BitcoinTestFramework):
|
2017-06-10 00:21:21 +02:00
|
|
|
def set_test_params(self):
|
2017-06-15 23:08:48 +02:00
|
|
|
self.num_nodes = 4
|
2019-09-16 19:19:29 +02:00
|
|
|
self.rpc_timeout = 480
|
2019-12-06 15:37:49 +01:00
|
|
|
self.supports_cli = False
|
2017-06-15 23:08:48 +02:00
|
|
|
|
|
|
|
# Set -maxmempool=0 to turn off mempool memory sharing with dbcache
|
2023-03-31 10:33:07 +02:00
|
|
|
self.base_args = [
|
|
|
|
"-limitdescendantsize=0",
|
|
|
|
"-maxmempool=0",
|
|
|
|
"-dbbatchsize=200000",
|
|
|
|
]
|
2017-06-15 23:08:48 +02:00
|
|
|
|
|
|
|
# Set different crash ratios and cache sizes. Note that not all of
|
2019-07-24 17:45:04 +02:00
|
|
|
# -dbcache goes to the in-memory coins cache.
|
2017-06-29 15:52:13 +02:00
|
|
|
self.node0_args = ["-dbcrashratio=8", "-dbcache=4"] + self.base_args
|
|
|
|
self.node1_args = ["-dbcrashratio=16", "-dbcache=8"] + self.base_args
|
|
|
|
self.node2_args = ["-dbcrashratio=24", "-dbcache=16"] + self.base_args
|
2017-06-15 23:08:48 +02:00
|
|
|
|
|
|
|
# Node3 is a normal node with default args, except will mine full blocks
|
2022-07-03 16:06:56 +02:00
|
|
|
# and txs with "dust" outputs
|
|
|
|
self.node3_args = ["-blockmaxweight=4000000", "-dustrelayfee=0"]
|
2017-06-15 23:08:48 +02:00
|
|
|
self.extra_args = [self.node0_args, self.node1_args, self.node2_args, self.node3_args]
|
|
|
|
|
|
|
|
def setup_network(self):
|
2018-08-01 20:37:47 +02:00
|
|
|
self.add_nodes(self.num_nodes, extra_args=self.extra_args)
|
2017-06-09 22:35:17 +02:00
|
|
|
self.start_nodes()
|
2017-06-15 23:08:48 +02:00
|
|
|
# Leave them unconnected, we'll use submitblock directly in this test
|
|
|
|
|
|
|
|
def restart_node(self, node_index, expected_tip):
|
2017-06-29 15:52:13 +02:00
|
|
|
"""Start up a given node id, wait for the tip to reach the given block hash, and calculate the utxo hash.
|
|
|
|
|
|
|
|
Exceptions on startup should indicate node crash (due to -dbcrashratio), in which case we try again. Give up
|
|
|
|
after 60 seconds. Returns the utxo hash of the given node."""
|
|
|
|
|
2017-06-15 23:08:48 +02:00
|
|
|
time_start = time.time()
|
2017-08-16 21:46:48 +02:00
|
|
|
while time.time() - time_start < 120:
|
2017-06-15 23:08:48 +02:00
|
|
|
try:
|
|
|
|
# Any of these RPC calls could throw due to node crash
|
2017-06-09 22:35:17 +02:00
|
|
|
self.start_node(node_index)
|
2017-06-15 23:08:48 +02:00
|
|
|
self.nodes[node_index].waitforblock(expected_tip)
|
|
|
|
utxo_hash = self.nodes[node_index].gettxoutsetinfo()['hash_serialized_2']
|
|
|
|
return utxo_hash
|
2022-08-18 20:23:15 +02:00
|
|
|
except Exception:
|
2017-06-15 23:08:48 +02:00
|
|
|
# An exception here should mean the node is about to crash.
|
|
|
|
# If bitcoind exits, then try again. wait_for_node_exit()
|
|
|
|
# should raise an exception if bitcoind doesn't exit.
|
2017-06-02 19:14:14 +02:00
|
|
|
self.wait_for_node_exit(node_index, timeout=10)
|
2017-06-15 23:08:48 +02:00
|
|
|
self.crashed_on_restart += 1
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
|
|
# If we got here, bitcoind isn't coming back up on restart. Could be a
|
|
|
|
# bug in bitcoind, or we've gotten unlucky with our dbcrash ratio --
|
|
|
|
# perhaps we generated a test case that blew up our cache?
|
|
|
|
# TODO: If this happens a lot, we should try to restart without -dbcrashratio
|
|
|
|
# and make sure that recovery happens.
|
2021-06-11 08:04:14 +02:00
|
|
|
raise AssertionError(f"Unable to successfully restart node {node_index} in allotted time")
|
2017-06-15 23:08:48 +02:00
|
|
|
|
|
|
|
def submit_block_catch_error(self, node_index, block):
|
2017-06-29 15:52:13 +02:00
|
|
|
"""Try submitting a block to the given node.
|
|
|
|
|
|
|
|
Catch any exceptions that indicate the node has crashed.
|
|
|
|
Returns true if the block was submitted successfully; false otherwise."""
|
|
|
|
|
2017-06-15 23:08:48 +02:00
|
|
|
try:
|
|
|
|
self.nodes[node_index].submitblock(block)
|
|
|
|
return True
|
2019-03-05 00:21:51 +01:00
|
|
|
except (http.client.CannotSendRequest, http.client.RemoteDisconnected) as e:
|
2021-06-11 08:04:14 +02:00
|
|
|
self.log.debug(f"node {node_index} submitblock raised exception: {e}")
|
2017-06-15 23:08:48 +02:00
|
|
|
return False
|
|
|
|
except OSError as e:
|
2021-06-11 08:04:14 +02:00
|
|
|
self.log.debug(f"node {node_index} submitblock raised OSError exception: errno={e.errno}")
|
2017-06-15 23:08:48 +02:00
|
|
|
if e.errno in [errno.EPIPE, errno.ECONNREFUSED, errno.ECONNRESET]:
|
|
|
|
# The node has likely crashed
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
# Unexpected exception, raise
|
|
|
|
raise
|
|
|
|
|
|
|
|
def sync_node3blocks(self, block_hashes):
|
2017-06-29 15:52:13 +02:00
|
|
|
"""Use submitblock to sync node3's chain with the other nodes
|
|
|
|
|
|
|
|
If submitblock fails, restart the node and get the new utxo hash.
|
|
|
|
If any nodes crash while updating, we'll compare utxo hashes to
|
|
|
|
ensure recovery was successful."""
|
|
|
|
|
2017-06-15 23:08:48 +02:00
|
|
|
node3_utxo_hash = self.nodes[3].gettxoutsetinfo()['hash_serialized_2']
|
|
|
|
|
|
|
|
# Retrieve all the blocks from node3
|
|
|
|
blocks = []
|
|
|
|
for block_hash in block_hashes:
|
|
|
|
blocks.append([block_hash, self.nodes[3].getblock(block_hash, 0)])
|
|
|
|
|
|
|
|
# Deliver each block to each other node
|
|
|
|
for i in range(3):
|
|
|
|
nodei_utxo_hash = None
|
2021-06-11 08:04:14 +02:00
|
|
|
self.log.debug(f"Syncing blocks to node {i}")
|
2017-06-15 23:08:48 +02:00
|
|
|
for (block_hash, block) in blocks:
|
|
|
|
# Get the block from node3, and submit to node_i
|
2021-06-11 08:04:14 +02:00
|
|
|
self.log.debug(f"submitting block {block_hash}")
|
2017-06-15 23:08:48 +02:00
|
|
|
if not self.submit_block_catch_error(i, block):
|
|
|
|
# TODO: more carefully check that the crash is due to -dbcrashratio
|
|
|
|
# (change the exit code perhaps, and check that here?)
|
2017-06-02 19:14:14 +02:00
|
|
|
self.wait_for_node_exit(i, timeout=30)
|
2021-06-11 08:04:14 +02:00
|
|
|
self.log.debug(f"Restarting node {i} after block hash {block_hash}")
|
2017-06-15 23:08:48 +02:00
|
|
|
nodei_utxo_hash = self.restart_node(i, block_hash)
|
|
|
|
assert nodei_utxo_hash is not None
|
|
|
|
self.restart_counts[i] += 1
|
|
|
|
else:
|
|
|
|
# Clear it out after successful submitblock calls -- the cached
|
|
|
|
# utxo hash will no longer be correct
|
|
|
|
nodei_utxo_hash = None
|
|
|
|
|
|
|
|
# Check that the utxo hash matches node3's utxo set
|
|
|
|
# NOTE: we only check the utxo set if we had to restart the node
|
|
|
|
# after the last block submitted:
|
|
|
|
# - checking the utxo hash causes a cache flush, which we don't
|
|
|
|
# want to do every time; so
|
|
|
|
# - we only update the utxo cache after a node restart, since flushing
|
|
|
|
# the cache is a no-op at that point
|
|
|
|
if nodei_utxo_hash is not None:
|
2021-06-11 08:04:14 +02:00
|
|
|
self.log.debug(f"Checking txoutsetinfo matches for node {i}")
|
2017-06-15 23:08:48 +02:00
|
|
|
assert_equal(nodei_utxo_hash, node3_utxo_hash)
|
|
|
|
|
|
|
|
def verify_utxo_hash(self):
|
2017-06-29 15:52:13 +02:00
|
|
|
"""Verify that the utxo hash of each node matches node3.
|
|
|
|
|
|
|
|
Restart any nodes that crash while querying."""
|
2017-06-15 23:08:48 +02:00
|
|
|
node3_utxo_hash = self.nodes[3].gettxoutsetinfo()['hash_serialized_2']
|
|
|
|
self.log.info("Verifying utxo hash matches for all nodes")
|
|
|
|
|
|
|
|
for i in range(3):
|
|
|
|
try:
|
|
|
|
nodei_utxo_hash = self.nodes[i].gettxoutsetinfo()['hash_serialized_2']
|
|
|
|
except OSError:
|
|
|
|
# probably a crash on db flushing
|
|
|
|
nodei_utxo_hash = self.restart_node(i, self.nodes[3].getbestblockhash())
|
|
|
|
assert_equal(nodei_utxo_hash, node3_utxo_hash)
|
|
|
|
|
|
|
|
def generate_small_transactions(self, node, count, utxo_list):
|
2017-06-29 15:52:13 +02:00
|
|
|
FEE = 1000 # TODO: replace this with node relay fee based calculation
|
2017-06-15 23:08:48 +02:00
|
|
|
num_transactions = 0
|
|
|
|
random.shuffle(utxo_list)
|
|
|
|
while len(utxo_list) >= 2 and num_transactions < count:
|
2022-05-09 00:38:17 +02:00
|
|
|
utxos_to_spend = [utxo_list.pop() for _ in range(2)]
|
|
|
|
input_amount = int(sum([utxo['value'] for utxo in utxos_to_spend]) * COIN)
|
|
|
|
if input_amount < FEE:
|
2017-06-15 23:08:48 +02:00
|
|
|
# Sanity check -- if we chose inputs that are too small, skip
|
|
|
|
continue
|
|
|
|
|
2022-06-23 10:50:29 +02:00
|
|
|
self.wallet.send_self_transfer_multi(
|
2022-05-09 00:38:17 +02:00
|
|
|
from_node=node,
|
|
|
|
utxos_to_spend=utxos_to_spend,
|
|
|
|
num_outputs=3,
|
2022-06-23 10:50:29 +02:00
|
|
|
fee_per_output=FEE // 3,
|
|
|
|
)
|
2017-06-15 23:08:48 +02:00
|
|
|
num_transactions += 1
|
|
|
|
|
|
|
|
def run_test(self):
|
2022-05-09 00:38:17 +02:00
|
|
|
self.wallet = MiniWallet(self.nodes[3])
|
|
|
|
initial_height = self.nodes[3].getblockcount()
|
|
|
|
self.generate(self.nodes[3], COINBASE_MATURITY, sync_fun=self.no_op)
|
|
|
|
|
2017-06-29 15:52:13 +02:00
|
|
|
# Track test coverage statistics
|
|
|
|
self.restart_counts = [0, 0, 0] # Track the restarts for nodes 0-2
|
|
|
|
self.crashed_on_restart = 0 # Track count of crashes during recovery
|
2017-06-15 23:08:48 +02:00
|
|
|
|
|
|
|
# Start by creating a lot of utxos on node3
|
2022-07-03 16:06:56 +02:00
|
|
|
utxo_list = []
|
|
|
|
for _ in range(5):
|
|
|
|
utxo_list.extend(self.wallet.send_self_transfer_multi(from_node=self.nodes[3], num_outputs=1000)['new_utxos'])
|
2022-05-09 00:38:17 +02:00
|
|
|
self.generate(self.nodes[3], 1, sync_fun=self.no_op)
|
|
|
|
assert_equal(len(self.nodes[3].getrawmempool()), 0)
|
2021-06-11 08:04:14 +02:00
|
|
|
self.log.info(f"Prepped {len(utxo_list)} utxo entries")
|
2017-06-15 23:08:48 +02:00
|
|
|
|
|
|
|
# Sync these blocks with the other nodes
|
|
|
|
block_hashes_to_sync = []
|
2017-06-29 15:52:13 +02:00
|
|
|
for height in range(initial_height + 1, self.nodes[3].getblockcount() + 1):
|
2017-06-15 23:08:48 +02:00
|
|
|
block_hashes_to_sync.append(self.nodes[3].getblockhash(height))
|
|
|
|
|
2021-06-11 08:04:14 +02:00
|
|
|
self.log.debug(f"Syncing {len(block_hashes_to_sync)} blocks with other nodes")
|
2017-06-15 23:08:48 +02:00
|
|
|
# Syncing the blocks could cause nodes to crash, so the test begins here.
|
|
|
|
self.sync_node3blocks(block_hashes_to_sync)
|
|
|
|
|
|
|
|
starting_tip_height = self.nodes[3].getblockcount()
|
|
|
|
|
|
|
|
# Main test loop:
|
|
|
|
# each time through the loop, generate a bunch of transactions,
|
|
|
|
# and then either mine a single new block on the tip, or some-sized reorg.
|
|
|
|
for i in range(40):
|
2021-06-11 08:04:14 +02:00
|
|
|
self.log.info(f"Iteration {i}, generating 2500 transactions {self.restart_counts}")
|
2017-06-15 23:08:48 +02:00
|
|
|
# Generate a bunch of small-ish transactions
|
|
|
|
self.generate_small_transactions(self.nodes[3], 2500, utxo_list)
|
|
|
|
# Pick a random block between current tip, and starting tip
|
|
|
|
current_height = self.nodes[3].getblockcount()
|
|
|
|
random_height = random.randint(starting_tip_height, current_height)
|
2021-06-11 08:04:14 +02:00
|
|
|
self.log.debug(f"At height {current_height}, considering height {random_height}")
|
2017-06-15 23:08:48 +02:00
|
|
|
if random_height > starting_tip_height:
|
|
|
|
# Randomly reorg from this point with some probability (1/4 for
|
|
|
|
# tip, 1/5 for tip-1, ...)
|
2017-06-29 15:52:13 +02:00
|
|
|
if random.random() < 1.0 / (current_height + 4 - random_height):
|
2021-06-11 08:04:14 +02:00
|
|
|
self.log.debug(f"Invalidating block at height {random_height}")
|
2017-06-15 23:08:48 +02:00
|
|
|
self.nodes[3].invalidateblock(self.nodes[3].getblockhash(random_height))
|
|
|
|
|
|
|
|
# Now generate new blocks until we pass the old tip height
|
|
|
|
self.log.debug("Mining longer tip")
|
2017-06-29 15:52:13 +02:00
|
|
|
block_hashes = []
|
|
|
|
while current_height + 1 > self.nodes[3].getblockcount():
|
2021-07-27 20:04:51 +02:00
|
|
|
block_hashes.extend(self.generatetoaddress(
|
|
|
|
self.nodes[3],
|
2020-05-20 00:57:09 +02:00
|
|
|
nblocks=min(10, current_height + 1 - self.nodes[3].getblockcount()),
|
|
|
|
# new address to avoid mining a block that has just been invalidated
|
2022-05-09 00:38:17 +02:00
|
|
|
address=getnewdestination()[2],
|
2020-11-10 18:02:31 +01:00
|
|
|
sync_fun=self.no_op,
|
2020-05-20 00:57:09 +02:00
|
|
|
))
|
2021-06-11 08:04:14 +02:00
|
|
|
self.log.debug(f"Syncing {len(block_hashes)} new blocks...")
|
2017-06-15 23:08:48 +02:00
|
|
|
self.sync_node3blocks(block_hashes)
|
2022-05-09 00:38:17 +02:00
|
|
|
self.wallet.rescan_utxos()
|
|
|
|
utxo_list = self.wallet.get_utxos()
|
|
|
|
self.log.debug(f"MiniWallet utxo count: {len(utxo_list)}")
|
2017-06-15 23:08:48 +02:00
|
|
|
|
|
|
|
# Check that the utxo hashes agree with node3
|
|
|
|
# Useful side effect: each utxo cache gets flushed here, so that we
|
|
|
|
# won't get crashes on shutdown at the end of the test.
|
|
|
|
self.verify_utxo_hash()
|
|
|
|
|
|
|
|
# Check the test coverage
|
2021-06-11 08:04:14 +02:00
|
|
|
self.log.info(f"Restarted nodes: {self.restart_counts}; crashes on restart: {self.crashed_on_restart}")
|
2017-06-15 23:08:48 +02:00
|
|
|
|
|
|
|
# If no nodes were restarted, we didn't test anything.
|
|
|
|
assert self.restart_counts != [0, 0, 0]
|
|
|
|
|
|
|
|
# Make sure we tested the case of crash-during-recovery.
|
|
|
|
assert self.crashed_on_restart > 0
|
|
|
|
|
|
|
|
# Warn if any of the nodes escaped restart.
|
|
|
|
for i in range(3):
|
|
|
|
if self.restart_counts[i] == 0:
|
2021-06-11 08:04:14 +02:00
|
|
|
self.log.warning(f"Node {i} never crashed during utxo flush!")
|
2017-06-15 23:08:48 +02:00
|
|
|
|
2020-05-20 00:57:09 +02:00
|
|
|
|
2017-06-15 23:08:48 +02:00
|
|
|
if __name__ == "__main__":
|
|
|
|
ChainstateWriteCrashTest().main()
|