blockstorage: Return on fatal block file flush error

By returning an error code if `FlushBlockFile` fails, the caller now has
to explicitly handle block file flushing errors. Before this change
such errors were non-explicitly ignored without a clear rationale.

Prior to this patch `FlushBlockFile` may have failed silently in
`Chainstate::FlushStateToDisk`. Improve this with a log line. Also add a
TODO comment to flesh out whether returning early in the case of an
error is appropriate or not. Returning early might be appropriate to
prohibit `WriteBlockIndexDB` from writing a block index entry that does
not refer to a fully flushed block.

Besides `Chainstate::FlushStateToDisk`, `FlushBlockFile` is also called
by `FindBlockPos`. Don't change the abort behavior there, since we don't
want to fail the function if the flushing of already written blocks
fails. Instead, just document it.
This commit is contained in:
TheCharlatan 2023-07-25 11:32:09 +02:00
parent 5671c15f45
commit f0207e0030
No known key found for this signature in database
GPG key ID: 9B79B45691DB4173
3 changed files with 27 additions and 5 deletions

View file

@ -546,8 +546,9 @@ void BlockManager::FlushUndoFile(int block_file, bool finalize)
}
}
void BlockManager::FlushBlockFile(bool fFinalize, bool finalize_undo)
bool BlockManager::FlushBlockFile(bool fFinalize, bool finalize_undo)
{
bool success = true;
LOCK(cs_LastBlockFile);
if (m_blockfile_info.size() < 1) {
@ -555,17 +556,19 @@ void BlockManager::FlushBlockFile(bool fFinalize, bool finalize_undo)
// chainstate init, when we call ChainstateManager::MaybeRebalanceCaches() (which
// then calls FlushStateToDisk()), resulting in a call to this function before we
// have populated `m_blockfile_info` via LoadBlockIndexDB().
return;
return true;
}
assert(static_cast<int>(m_blockfile_info.size()) > m_last_blockfile);
FlatFilePos block_pos_old(m_last_blockfile, m_blockfile_info[m_last_blockfile].nSize);
if (!BlockFileSeq().Flush(block_pos_old, fFinalize)) {
m_opts.notifications.flushError("Flushing block file to disk failed. This is likely the result of an I/O error.");
success = false;
}
// we do not always flush the undo file, as the chain tip may be lagging behind the incoming blocks,
// e.g. during IBD or a sync after a node going offline
if (!fFinalize || finalize_undo) FlushUndoFile(m_last_blockfile, finalize_undo);
return success;
}
uint64_t BlockManager::CalculateCurrentUsage()
@ -658,7 +661,19 @@ bool BlockManager::FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigne
if (!fKnown) {
LogPrint(BCLog::BLOCKSTORAGE, "Leaving block file %i: %s\n", m_last_blockfile, m_blockfile_info[m_last_blockfile].ToString());
}
FlushBlockFile(!fKnown, finalize_undo);
// Do not propagate the return code. The flush concerns a previous block
// and undo file that has already been written to. If a flush fails
// here, and we crash, there is no expected additional block data
// inconsistency arising from the flush failure here. However, the undo
// data may be inconsistent after a crash if the flush is called during
// a reindex. A flush error might also leave some of the data files
// untrimmed.
if (!FlushBlockFile(!fKnown, finalize_undo)) {
LogPrintLevel(BCLog::BLOCKSTORAGE, BCLog::Level::Warning,
"Failed to flush previous block file %05i (finalize=%i, finalize_undo=%i) before opening new block file %05i\n",
m_last_blockfile, !fKnown, finalize_undo, nFile);
}
m_last_blockfile = nFile;
m_undo_height_in_last_blockfile = 0; // No undo data yet in the new file, so reset our undo-height tracking.
}

View file

@ -91,7 +91,10 @@ private:
*/
bool LoadBlockIndex()
EXCLUSIVE_LOCKS_REQUIRED(cs_main);
void FlushBlockFile(bool fFinalize = false, bool finalize_undo = false);
/** Return false if block file flushing fails. */
[[nodiscard]] bool FlushBlockFile(bool fFinalize = false, bool finalize_undo = false);
void FlushUndoFile(int block_file, bool finalize = false);
[[nodiscard]] bool FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigned int nHeight, uint64_t nTime, bool fKnown);
bool FindUndoPos(BlockValidationState& state, int nFile, FlatFilePos& pos, unsigned int nAddSize);

View file

@ -2511,7 +2511,11 @@ bool Chainstate::FlushStateToDisk(
LOG_TIME_MILLIS_WITH_CATEGORY("write block and undo data to disk", BCLog::BENCH);
// First make sure all block and undo data is flushed to disk.
m_blockman.FlushBlockFile();
// TODO: Handle return error, or add detailed comment why it is
// safe to not return an error upon failure.
if (!m_blockman.FlushBlockFile()) {
LogPrintLevel(BCLog::VALIDATION, BCLog::Level::Warning, "%s: Failed to flush block file.\n", __func__);
}
}
// Then update all block file information (which may refer to block and undo files).