Faster -reindex by initially deserializing only headers

When a block is initially read from a blk*.dat file during reindexing,
it can be added to the block index only if all of its ancestor blocks
have been added, which is rare. If the block's ancestors have not been
added, the block must be re-read from disk later when it can be added.

This commit: During the initial block read, deserialize only its header,
rather than the entire block, since this is sufficient to determine
if its parent (and thus all its ancestors) has been added. This is a
performance improvement.
This commit is contained in:
Larry Ruane 2020-08-07 15:07:21 -06:00
parent c72de9990a
commit db929893ef
2 changed files with 78 additions and 18 deletions

View File

@ -4393,6 +4393,8 @@ void Chainstate::LoadExternalBlockFile(
try {
// This takes over fileIn and calls fclose() on it in the CBufferedFile destructor
CBufferedFile blkdat(fileIn, 2*MAX_BLOCK_SERIALIZED_SIZE, MAX_BLOCK_SERIALIZED_SIZE+8, SER_DISK, CLIENT_VERSION);
// nRewind indicates where to resume scanning in case something goes wrong,
// such as a block fails to deserialize.
uint64_t nRewind = blkdat.GetPos();
while (!blkdat.eof()) {
if (ShutdownRequested()) return;
@ -4416,28 +4418,30 @@ void Chainstate::LoadExternalBlockFile(
continue;
} catch (const std::exception&) {
// no valid block header found; don't complain
// (this happens at the end of every blk.dat file)
break;
}
try {
// read block
uint64_t nBlockPos = blkdat.GetPos();
// read block header
const uint64_t nBlockPos{blkdat.GetPos()};
if (dbp)
dbp->nPos = nBlockPos;
blkdat.SetLimit(nBlockPos + nSize);
std::shared_ptr<CBlock> pblock = std::make_shared<CBlock>();
CBlock& block = *pblock;
blkdat >> block;
nRewind = blkdat.GetPos();
uint256 hash = block.GetHash();
CBlockHeader header;
blkdat >> header;
const uint256 hash{header.GetHash()};
// Skip the rest of this block (this may read from disk into memory); position to the marker before the
// next block, but it's still possible to rewind to the start of the current block (without a disk read).
nRewind = nBlockPos + nSize;
blkdat.SkipTo(nRewind);
{
LOCK(cs_main);
// detect out of order blocks, and store them for later
if (hash != params.GetConsensus().hashGenesisBlock && !m_blockman.LookupBlockIndex(block.hashPrevBlock)) {
if (hash != params.GetConsensus().hashGenesisBlock && !m_blockman.LookupBlockIndex(header.hashPrevBlock)) {
LogPrint(BCLog::REINDEX, "%s: Out of order block %s, parent %s not known\n", __func__, hash.ToString(),
block.hashPrevBlock.ToString());
header.hashPrevBlock.ToString());
if (dbp && blocks_with_unknown_parent) {
blocks_with_unknown_parent->emplace(block.hashPrevBlock, *dbp);
blocks_with_unknown_parent->emplace(header.hashPrevBlock, *dbp);
}
continue;
}
@ -4445,13 +4449,19 @@ void Chainstate::LoadExternalBlockFile(
// process in case the block isn't known yet
const CBlockIndex* pindex = m_blockman.LookupBlockIndex(hash);
if (!pindex || (pindex->nStatus & BLOCK_HAVE_DATA) == 0) {
BlockValidationState state;
if (AcceptBlock(pblock, state, nullptr, true, dbp, nullptr, true)) {
nLoaded++;
}
if (state.IsError()) {
break;
}
// This block can be processed immediately; rewind to its start, read and deserialize it.
blkdat.SetPos(nBlockPos);
std::shared_ptr<CBlock> pblock{std::make_shared<CBlock>()};
blkdat >> *pblock;
nRewind = blkdat.GetPos();
BlockValidationState state;
if (AcceptBlock(pblock, state, nullptr, true, dbp, nullptr, true)) {
nLoaded++;
}
if (state.IsError()) {
break;
}
} else if (hash != params.GetConsensus().hashGenesisBlock && pindex->nHeight % 1000 == 0) {
LogPrint(BCLog::REINDEX, "Block Import: already had block %s at height %d\n", hash.ToString(), pindex->nHeight);
}

View File

@ -7,9 +7,12 @@
- Start a single node and generate 3 blocks.
- Stop the node and restart it with -reindex. Verify that the node has reindexed up to block 3.
- Stop the node and restart it with -reindex-chainstate. Verify that the node has reindexed up to block 3.
- Verify that out-of-order blocks are correctly processed, see LoadExternalBlockFile()
"""
import os
from test_framework.test_framework import BitcoinTestFramework
from test_framework.p2p import MAGIC_BYTES
from test_framework.util import assert_equal
@ -27,11 +30,58 @@ class ReindexTest(BitcoinTestFramework):
assert_equal(self.nodes[0].getblockcount(), blockcount) # start_node is blocking on reindex
self.log.info("Success")
# Check that blocks can be processed out of order
def out_of_order(self):
# The previous test created 12 blocks
assert_equal(self.nodes[0].getblockcount(), 12)
self.stop_nodes()
# In this test environment, blocks will always be in order (since
# we're generating them rather than getting them from peers), so to
# test out-of-order handling, swap blocks 1 and 2 on disk.
blk0 = os.path.join(self.nodes[0].datadir, self.nodes[0].chain, 'blocks', 'blk00000.dat')
with open(blk0, 'r+b') as bf:
# Read at least the first few blocks (including genesis)
b = bf.read(2000)
# Find the offsets of blocks 2, 3, and 4 (the first 3 blocks beyond genesis)
# by searching for the regtest marker bytes (see pchMessageStart).
def find_block(b, start):
return b.find(MAGIC_BYTES["regtest"], start)+4
genesis_start = find_block(b, 0)
assert_equal(genesis_start, 4)
b2_start = find_block(b, genesis_start)
b3_start = find_block(b, b2_start)
b4_start = find_block(b, b3_start)
# Blocks 2 and 3 should be the same size.
assert_equal(b3_start-b2_start, b4_start-b3_start)
# Swap the second and third blocks (don't disturb the genesis block).
bf.seek(b2_start)
bf.write(b[b3_start:b4_start])
bf.write(b[b2_start:b3_start])
# The reindexing code should detect and accommodate out of order blocks.
with self.nodes[0].assert_debug_log([
'LoadExternalBlockFile: Out of order block',
'LoadExternalBlockFile: Processing out of order child',
]):
extra_args = [["-reindex"]]
self.start_nodes(extra_args)
# All blocks should be accepted and processed.
assert_equal(self.nodes[0].getblockcount(), 12)
def run_test(self):
self.reindex(False)
self.reindex(True)
self.reindex(False)
self.reindex(True)
self.out_of_order()
if __name__ == '__main__':
ReindexTest().main()