Merge bitcoin/bitcoin#25574: validation: Improve error handling when VerifyDB dosn't finish successfully

0af16e7134 doc: add release note for #25574 (Martin Zumsande)
57ef2a4812 validation: report if pruning prevents completion of verification (Martin Zumsande)
0c7785bb25 init, validation: Improve handling if VerifyDB() fails due to insufficient dbcache (Martin Zumsande)
d6f781f1cf validation: return VerifyDBResult::INTERRUPTED if verification was interrupted (Martin Zumsande)
6360b5302d validation: Change return value of VerifyDB to enum type (Martin Zumsande)

Pull request description:

  `VerifyDB()` can fail to complete due to insufficient dbcache at the level 3 checks. This PR improves the error handling in this case in the following ways:
  - The rpc `-verifychain` now returns false if the check can't be completed due to insufficient cache
  - During init, we only log a warning if the default values for `-checkblocks` and `-checklevel` are taken and the check doesn't complete. However, if the user actively specifies one of these args, we return with an InitError if we can't complete the check.

  This PR also changes `-verifychain` RPC to return `false` if the verification didn't finish due to missing block data (pruning) or due to being interrupted by the node being shutdown.

  Previously, this PR also included a fix for a possible assert during verification - this was done in #27009 (now merged).

ACKs for top commit:
  achow101:
    ACK 0af16e7134
  ryanofsky:
    Code review ACK 0af16e7134. Only small suggested changes since the last review, like renaming some of the enum values. I did leave more suggestions, but they are not very important and could be followups
  john-moffett:
    ACK 0af16e7134
  MarcoFalke:
    lgtm re-ACK 0af16e7134 🎚

Tree-SHA512: 84b4f767cf9bfbafef362312757c9bf765b41ae3977f4ece840e40c52a2266b1457832df0cdf70440be0aac2168d9b58fc817238630b0b6812f3836ca950bc0e
This commit is contained in:
Andrew Chow 2023-02-22 14:10:18 -05:00
commit 832fa2d238
No known key found for this signature in database
GPG Key ID: 17565732E08E5E41
9 changed files with 86 additions and 28 deletions

View File

@ -0,0 +1,13 @@
Updated settings
----------------
If the `-checkblocks` or `-checklevel` options are explicitly provided by the
user, but the verification checks cannot be completed due to an insufficient
dbcache, Bitcoin Core will now return an error at startup. (#25574)
RPC
---
The `-verifychain` RPC will now return `false` if the checks didn't fail,
but couldn't be completed at the desired depth and level. This could be due
to missing data while pruning, due to an insufficient dbcache or due to
the node being shutdown before the call could finish. (#25574)

View File

@ -1495,6 +1495,7 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)
options.prune = chainman.m_blockman.IsPruneMode();
options.check_blocks = args.GetIntArg("-checkblocks", DEFAULT_CHECKBLOCKS);
options.check_level = args.GetIntArg("-checklevel", DEFAULT_CHECKLEVEL);
options.require_full_verification = args.IsArgSet("-checkblocks") || args.IsArgSet("-checklevel");
options.check_interrupt = ShutdownRequested;
options.coins_error_cb = [] {
uiInterface.ThreadSafeMessageBox(
@ -1526,7 +1527,7 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)
}
}
if (status == node::ChainstateLoadStatus::FAILURE_INCOMPATIBLE_DB) {
if (status == node::ChainstateLoadStatus::FAILURE_INCOMPATIBLE_DB || status == node::ChainstateLoadStatus::FAILURE_INSUFFICIENT_DBCACHE) {
return InitError(error);
}

View File

@ -192,12 +192,23 @@ ChainstateLoadResult VerifyLoadedChainstate(ChainstateManager& chainman, const C
"Only rebuild the block database if you are sure that your computer's date and time are correct")};
}
if (!CVerifyDB().VerifyDB(
*chainstate, chainman.GetConsensus(), chainstate->CoinsDB(),
options.check_level,
options.check_blocks)) {
VerifyDBResult result = CVerifyDB().VerifyDB(
*chainstate, chainman.GetConsensus(), chainstate->CoinsDB(),
options.check_level,
options.check_blocks);
switch (result) {
case VerifyDBResult::SUCCESS:
case VerifyDBResult::INTERRUPTED:
case VerifyDBResult::SKIPPED_MISSING_BLOCKS:
break;
case VerifyDBResult::CORRUPTED_BLOCK_DB:
return {ChainstateLoadStatus::FAILURE, _("Corrupted block database detected")};
}
case VerifyDBResult::SKIPPED_L3_CHECKS:
if (options.require_full_verification) {
return {ChainstateLoadStatus::FAILURE_INSUFFICIENT_DBCACHE, _("Insufficient dbcache for block verification")};
}
break;
} // no default case, so the compiler can warn about missing cases
}
}

View File

@ -25,6 +25,7 @@ struct ChainstateLoadOptions {
bool reindex{false};
bool reindex_chainstate{false};
bool prune{false};
bool require_full_verification{true};
int64_t check_blocks{DEFAULT_CHECKBLOCKS};
int64_t check_level{DEFAULT_CHECKLEVEL};
std::function<bool()> check_interrupt;
@ -35,7 +36,13 @@ struct ChainstateLoadOptions {
//! case, and treat other cases as errors. More complex applications may want to
//! try reindexing in the generic failure case, and pass an interrupt callback
//! and exit cleanly in the interrupted case.
enum class ChainstateLoadStatus { SUCCESS, FAILURE, FAILURE_INCOMPATIBLE_DB, INTERRUPTED };
enum class ChainstateLoadStatus {
SUCCESS,
FAILURE,
FAILURE_INCOMPATIBLE_DB,
FAILURE_INSUFFICIENT_DBCACHE,
INTERRUPTED,
};
//! Chainstate load status code and optional error string.
using ChainstateLoadResult = std::tuple<ChainstateLoadStatus, bilingual_str>;

View File

@ -1110,7 +1110,7 @@ static RPCHelpMan verifychain()
{"nblocks", RPCArg::Type::NUM, RPCArg::DefaultHint{strprintf("%d, 0=all", DEFAULT_CHECKBLOCKS)}, "The number of blocks to check."},
},
RPCResult{
RPCResult::Type::BOOL, "", "Verified or not"},
RPCResult::Type::BOOL, "", "Verification finished successfully. If false, check debug.log for reason."},
RPCExamples{
HelpExampleCli("verifychain", "")
+ HelpExampleRpc("verifychain", "")
@ -1125,7 +1125,7 @@ static RPCHelpMan verifychain()
Chainstate& active_chainstate = chainman.ActiveChainstate();
return CVerifyDB().VerifyDB(
active_chainstate, chainman.GetParams().GetConsensus(), active_chainstate.CoinsTip(), check_level, check_depth);
active_chainstate, chainman.GetParams().GetConsensus(), active_chainstate.CoinsTip(), check_level, check_depth) == VerifyDBResult::SUCCESS;
},
};
}

View File

@ -222,6 +222,7 @@ void TestingSetup::LoadVerifyActivateChainstate()
options.prune = chainman.m_blockman.IsPruneMode();
options.check_blocks = m_args.GetIntArg("-checkblocks", DEFAULT_CHECKBLOCKS);
options.check_level = m_args.GetIntArg("-checklevel", DEFAULT_CHECKLEVEL);
options.require_full_verification = m_args.IsArgSet("-checkblocks") || m_args.IsArgSet("-checklevel");
auto [status, error] = LoadChainstate(chainman, m_cache_sizes, options);
assert(status == node::ChainstateLoadStatus::SUCCESS);

View File

@ -4060,7 +4060,7 @@ CVerifyDB::~CVerifyDB()
uiInterface.ShowProgress("", 100, false);
}
bool CVerifyDB::VerifyDB(
VerifyDBResult CVerifyDB::VerifyDB(
Chainstate& chainstate,
const Consensus::Params& consensus_params,
CCoinsView& coinsview,
@ -4069,7 +4069,7 @@ bool CVerifyDB::VerifyDB(
AssertLockHeld(cs_main);
if (chainstate.m_chain.Tip() == nullptr || chainstate.m_chain.Tip()->pprev == nullptr) {
return true;
return VerifyDBResult::SUCCESS;
}
// Verify blocks in the best chain
@ -4084,6 +4084,7 @@ bool CVerifyDB::VerifyDB(
int nGoodTransactions = 0;
BlockValidationState state;
int reportDone = 0;
bool skipped_no_block_data{false};
bool skipped_l3_checks{false};
LogPrintf("Verification progress: 0%%\n");
@ -4103,25 +4104,29 @@ bool CVerifyDB::VerifyDB(
if ((chainstate.m_blockman.IsPruneMode() || is_snapshot_cs) && !(pindex->nStatus & BLOCK_HAVE_DATA)) {
// If pruning or running under an assumeutxo snapshot, only go
// back as far as we have data.
LogPrintf("VerifyDB(): block verification stopping at height %d (pruning, no data)\n", pindex->nHeight);
LogPrintf("VerifyDB(): block verification stopping at height %d (no data). This could be due to pruning or use of an assumeutxo snapshot.\n", pindex->nHeight);
skipped_no_block_data = true;
break;
}
CBlock block;
// check level 0: read from disk
if (!ReadBlockFromDisk(block, pindex, consensus_params)) {
return error("VerifyDB(): *** ReadBlockFromDisk failed at %d, hash=%s", pindex->nHeight, pindex->GetBlockHash().ToString());
LogPrintf("Verification error: ReadBlockFromDisk failed at %d, hash=%s\n", pindex->nHeight, pindex->GetBlockHash().ToString());
return VerifyDBResult::CORRUPTED_BLOCK_DB;
}
// check level 1: verify block validity
if (nCheckLevel >= 1 && !CheckBlock(block, state, consensus_params)) {
return error("%s: *** found bad block at %d, hash=%s (%s)\n", __func__,
pindex->nHeight, pindex->GetBlockHash().ToString(), state.ToString());
LogPrintf("Verification error: found bad block at %d, hash=%s (%s)\n",
pindex->nHeight, pindex->GetBlockHash().ToString(), state.ToString());
return VerifyDBResult::CORRUPTED_BLOCK_DB;
}
// check level 2: verify undo validity
if (nCheckLevel >= 2 && pindex) {
CBlockUndo undo;
if (!pindex->GetUndoPos().IsNull()) {
if (!UndoReadFromDisk(undo, pindex)) {
return error("VerifyDB(): *** found bad undo data at %d, hash=%s\n", pindex->nHeight, pindex->GetBlockHash().ToString());
LogPrintf("Verification error: found bad undo data at %d, hash=%s\n", pindex->nHeight, pindex->GetBlockHash().ToString());
return VerifyDBResult::CORRUPTED_BLOCK_DB;
}
}
}
@ -4133,7 +4138,8 @@ bool CVerifyDB::VerifyDB(
assert(coins.GetBestBlock() == pindex->GetBlockHash());
DisconnectResult res = chainstate.DisconnectBlock(block, pindex, coins);
if (res == DISCONNECT_FAILED) {
return error("VerifyDB(): *** irrecoverable inconsistency in block data at %d, hash=%s", pindex->nHeight, pindex->GetBlockHash().ToString());
LogPrintf("Verification error: irrecoverable inconsistency in block data at %d, hash=%s\n", pindex->nHeight, pindex->GetBlockHash().ToString());
return VerifyDBResult::CORRUPTED_BLOCK_DB;
}
if (res == DISCONNECT_UNCLEAN) {
nGoodTransactions = 0;
@ -4145,14 +4151,16 @@ bool CVerifyDB::VerifyDB(
skipped_l3_checks = true;
}
}
if (ShutdownRequested()) return true;
if (ShutdownRequested()) return VerifyDBResult::INTERRUPTED;
}
if (pindexFailure) {
return error("VerifyDB(): *** coin database inconsistencies found (last %i blocks, %i good transactions before that)\n", chainstate.m_chain.Height() - pindexFailure->nHeight + 1, nGoodTransactions);
LogPrintf("Verification error: coin database inconsistencies found (last %i blocks, %i good transactions before that)\n", chainstate.m_chain.Height() - pindexFailure->nHeight + 1, nGoodTransactions);
return VerifyDBResult::CORRUPTED_BLOCK_DB;
}
if (skipped_l3_checks) {
LogPrintf("Skipped verification of level >=3 (insufficient database cache size). Consider increasing -dbcache.\n");
}
// store block count as we move pindex at check level >= 4
int block_count = chainstate.m_chain.Height() - pindex->nHeight;
@ -4168,18 +4176,27 @@ bool CVerifyDB::VerifyDB(
uiInterface.ShowProgress(_("Verifying blocks…").translated, percentageDone, false);
pindex = chainstate.m_chain.Next(pindex);
CBlock block;
if (!ReadBlockFromDisk(block, pindex, consensus_params))
return error("VerifyDB(): *** ReadBlockFromDisk failed at %d, hash=%s", pindex->nHeight, pindex->GetBlockHash().ToString());
if (!chainstate.ConnectBlock(block, state, pindex, coins)) {
return error("VerifyDB(): *** found unconnectable block at %d, hash=%s (%s)", pindex->nHeight, pindex->GetBlockHash().ToString(), state.ToString());
if (!ReadBlockFromDisk(block, pindex, consensus_params)) {
LogPrintf("Verification error: ReadBlockFromDisk failed at %d, hash=%s\n", pindex->nHeight, pindex->GetBlockHash().ToString());
return VerifyDBResult::CORRUPTED_BLOCK_DB;
}
if (ShutdownRequested()) return true;
if (!chainstate.ConnectBlock(block, state, pindex, coins)) {
LogPrintf("Verification error: found unconnectable block at %d, hash=%s (%s)\n", pindex->nHeight, pindex->GetBlockHash().ToString(), state.ToString());
return VerifyDBResult::CORRUPTED_BLOCK_DB;
}
if (ShutdownRequested()) return VerifyDBResult::INTERRUPTED;
}
}
LogPrintf("Verification: No coin database inconsistencies in last %i blocks (%i transactions)\n", block_count, nGoodTransactions);
return true;
if (skipped_l3_checks) {
return VerifyDBResult::SKIPPED_L3_CHECKS;
}
if (skipped_no_block_data) {
return VerifyDBResult::SKIPPED_MISSING_BLOCKS;
}
return VerifyDBResult::SUCCESS;
}
/** Apply the effects of a block on the utxo cache, ignoring that it may already have been applied. */

View File

@ -349,12 +349,20 @@ bool HasValidProofOfWork(const std::vector<CBlockHeader>& headers, const Consens
/** Return the sum of the work on a given set of headers */
arith_uint256 CalculateHeadersWork(const std::vector<CBlockHeader>& headers);
enum class VerifyDBResult {
SUCCESS,
CORRUPTED_BLOCK_DB,
INTERRUPTED,
SKIPPED_L3_CHECKS,
SKIPPED_MISSING_BLOCKS,
};
/** RAII wrapper for VerifyDB: Verify consistency of the block and coin databases */
class CVerifyDB {
public:
CVerifyDB();
~CVerifyDB();
bool VerifyDB(
[[nodiscard]] VerifyDBResult VerifyDB(
Chainstate& chainstate,
const Consensus::Params& consensus_params,
CCoinsView& coinsview,

View File

@ -223,8 +223,8 @@ class PruneTest(BitcoinTestFramework):
def reorg_back(self):
# Verify that a block on the old main chain fork has been pruned away
assert_raises_rpc_error(-1, "Block not available (pruned data)", self.nodes[2].getblock, self.forkhash)
with self.nodes[2].assert_debug_log(expected_msgs=['block verification stopping at height', '(pruning, no data)']):
self.nodes[2].verifychain(checklevel=4, nblocks=0)
with self.nodes[2].assert_debug_log(expected_msgs=['block verification stopping at height', '(no data)']):
assert not self.nodes[2].verifychain(checklevel=4, nblocks=0)
self.log.info(f"Will need to redownload block {self.forkheight}")
# Verify that we have enough history to reorg back to the fork point