Merge #17925: Improve UpdateTransactionsFromBlock with Epochs

bd5a026928 Make UpdateTransactionsFromBlock use Epochs (Jeremy Rubin)
2ccb7cca4a Add Epoch Guards to CTXMemPoolEntry and CTxMemPool (Jeremy Rubin)

Pull request description:

  UpdateTransactionsFromBlock is called during a re-org. When a re-org occurs, all of the transactions in the mempool may be descendants from a transaction which is in the pre-reorg block. This can cause us to propagate updates, worst case, to every transaction in the mempool.

  Because we construct a `setEntries setChildren`, which is backed by a `std::set`, it is possible that this algorithm is `O(N log N)`.

  By using an Epoch visitor pattern, we can limit this to `O(N)` worst case behavior.

  Epochs are also less resource intensive than almost any set option (e.g., hash set) because they are allocation free.

  This PR is related to https://github.com/bitcoin/bitcoin/pull/17268, it is a small subset of the changes which have been refactored slightly to ease review. If this PR gets review & merge, I will follow up with more PRs (similar to #17268) to improve the mempool

ACKs for top commit:
  sdaftuar:
    ACK bd5a026928
  adamjonas:
    Just to summarize for those looking to review - as of bd5a026 there are 3 ACKs (@sdaftuar, @ariard, and @hebasto) and one "looks good" from @ajtowns with no NACKs or any show-stopping concerns raised.
  ajtowns:
    ACK bd5a026928 (code review)
  ariard:
    Code review ACK bd5a026
  hebasto:
    ACK bd5a026928, modulo some nits and a typo.

Tree-SHA512: f0d2291085019ffb4e1119edeb9f4a89c1a572d1cb5b4bdf5743dd0152e721e1935f5155dcae84e1e5bda5ffdf6224c488c1e200bd33bedca9f5ca22d5f5139f
This commit is contained in:
Wladimir J. van der Laan 2020-02-03 11:28:39 +01:00
commit b2df21b32c
No known key found for this signature in database
GPG Key ID: 1E4AED62986CD25D
2 changed files with 87 additions and 14 deletions

View File

@ -23,7 +23,7 @@ CTxMemPoolEntry::CTxMemPoolEntry(const CTransactionRef& _tx, const CAmount& _nFe
int64_t _nTime, unsigned int _entryHeight,
bool _spendsCoinbase, int64_t _sigOpsCost, LockPoints lp)
: tx(_tx), nFee(_nFee), nTxWeight(GetTransactionWeight(*tx)), nUsageSize(RecursiveDynamicUsage(tx)), nTime(_nTime), entryHeight(_entryHeight),
spendsCoinbase(_spendsCoinbase), sigOpCost(_sigOpsCost), lockPoints(lp)
spendsCoinbase(_spendsCoinbase), sigOpCost(_sigOpsCost), lockPoints(lp), m_epoch(0)
{
nCountWithDescendants = 1;
nSizeWithDescendants = GetTxSize();
@ -122,8 +122,6 @@ void CTxMemPool::UpdateTransactionsFromBlock(const std::vector<uint256> &vHashes
// setMemPoolChildren will be updated, an assumption made in
// UpdateForDescendants.
for (const uint256 &hash : reverse_iterate(vHashesToUpdate)) {
// we cache the in-mempool children to avoid duplicate updates
setEntries setChildren;
// calculate children from mapNextTx
txiter it = mapTx.find(hash);
if (it == mapTx.end()) {
@ -132,17 +130,21 @@ void CTxMemPool::UpdateTransactionsFromBlock(const std::vector<uint256> &vHashes
auto iter = mapNextTx.lower_bound(COutPoint(hash, 0));
// First calculate the children, and update setMemPoolChildren to
// include them, and update their setMemPoolParents to include this tx.
// we cache the in-mempool children to avoid duplicate updates
{
const auto epoch = GetFreshEpoch();
for (; iter != mapNextTx.end() && iter->first->hash == hash; ++iter) {
const uint256 &childHash = iter->second->GetHash();
txiter childIter = mapTx.find(childHash);
assert(childIter != mapTx.end());
// We can skip updating entries we've encountered before or that
// are in the block (which are already accounted for).
if (setChildren.insert(childIter).second && !setAlreadyIncluded.count(childHash)) {
if (!visited(childIter) && !setAlreadyIncluded.count(childHash)) {
UpdateChild(it, childIter, true);
UpdateParent(childIter, it, true);
}
}
} // release epoch guard for UpdateForDescendants
UpdateForDescendants(it, mapMemPoolDescendantsToUpdate, setAlreadyIncluded);
}
}
@ -325,7 +327,7 @@ void CTxMemPoolEntry::UpdateAncestorState(int64_t modifySize, CAmount modifyFee,
}
CTxMemPool::CTxMemPool(CBlockPolicyEstimator* estimator)
: nTransactionsUpdated(0), minerPolicyEstimator(estimator)
: nTransactionsUpdated(0), minerPolicyEstimator(estimator), m_epoch(0), m_has_epoch_guard(false)
{
_clear(); //lock free clear
@ -1105,4 +1107,23 @@ void CTxMemPool::SetIsLoaded(bool loaded)
m_is_loaded = loaded;
}
CTxMemPool::EpochGuard CTxMemPool::GetFreshEpoch() const
{
return EpochGuard(*this);
}
CTxMemPool::EpochGuard::EpochGuard(const CTxMemPool& in) : pool(in)
{
assert(!pool.m_has_epoch_guard);
++pool.m_epoch;
pool.m_has_epoch_guard = true;
}
CTxMemPool::EpochGuard::~EpochGuard()
{
// prevents stale results being used
++pool.m_epoch;
pool.m_has_epoch_guard = false;
}
SaltedTxidHasher::SaltedTxidHasher() : k0(GetRand(std::numeric_limits<uint64_t>::max())), k1(GetRand(std::numeric_limits<uint64_t>::max())) {}

View File

@ -129,6 +129,7 @@ public:
int64_t GetSigOpCostWithAncestors() const { return nSigOpCostWithAncestors; }
mutable size_t vTxHashesIdx; //!< Index in mempool's vTxHashes
mutable uint64_t m_epoch; //!< epoch when last touched, useful for graph algorithms
};
// Helpers for modifying CTxMemPool::mapTx, which is a boost multi_index.
@ -453,6 +454,8 @@ private:
mutable int64_t lastRollingFeeUpdate;
mutable bool blockSinceLastRollingFeeBump;
mutable double rollingMinimumFeeRate; //!< minimum fee to get into the pool, decreases exponentially
mutable uint64_t m_epoch;
mutable bool m_has_epoch_guard;
void trackPackageRemoved(const CFeeRate& rate) EXCLUSIVE_LOCKS_REQUIRED(cs);
@ -736,6 +739,55 @@ private:
* removal.
*/
void removeUnchecked(txiter entry, MemPoolRemovalReason reason) EXCLUSIVE_LOCKS_REQUIRED(cs);
public:
/** EpochGuard: RAII-style guard for using epoch-based graph traversal algorithms.
* When walking ancestors or descendants, we generally want to avoid
* visiting the same transactions twice. Some traversal algorithms use
* std::set (or setEntries) to deduplicate the transaction we visit.
* However, use of std::set is algorithmically undesirable because it both
* adds an asymptotic factor of O(log n) to traverals cost and triggers O(n)
* more dynamic memory allocations.
* In many algorithms we can replace std::set with an internal mempool
* counter to track the time (or, "epoch") that we began a traversal, and
* check + update a per-transaction epoch for each transaction we look at to
* determine if that transaction has not yet been visited during the current
* traversal's epoch.
* Algorithms using std::set can be replaced on a one by one basis.
* Both techniques are not fundamentally incomaptible across the codebase.
* Generally speaking, however, the remaining use of std::set for mempool
* traversal should be viewed as a TODO for replacement with an epoch based
* traversal, rather than a preference for std::set over epochs in that
* algorithm.
*/
class EpochGuard {
const CTxMemPool& pool;
public:
EpochGuard(const CTxMemPool& in);
~EpochGuard();
};
// N.B. GetFreshEpoch modifies mutable state via the EpochGuard construction
// (and later destruction)
EpochGuard GetFreshEpoch() const EXCLUSIVE_LOCKS_REQUIRED(cs);
/** visited marks a CTxMemPoolEntry as having been traversed
* during the lifetime of the most recently created EpochGuard
* and returns false if we are the first visitor, true otherwise.
*
* An EpochGuard must be held when visited is called or an assert will be
* triggered.
*
*/
bool visited(txiter it) const EXCLUSIVE_LOCKS_REQUIRED(cs) {
assert(m_has_epoch_guard);
bool ret = it->m_epoch >= m_epoch;
it->m_epoch = std::max(it->m_epoch, m_epoch);
return ret;
}
bool visited(Optional<txiter> it) const EXCLUSIVE_LOCKS_REQUIRED(cs) {
assert(m_has_epoch_guard);
return !it || visited(*it);
}
};
/**