diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index cb6187eeca3..52880529f6f 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -13,6 +13,7 @@ #include #include +#include namespace cluster_linearize { @@ -415,7 +416,8 @@ public: }; /** The queue of work items. */ - std::vector queue; + VecDeque queue; + queue.reserve(std::max(256, 2 * m_todo.Count())); // Create an initial entry with m_todo as undecided. Also use it as best if not provided, // so that during the work processing loop below, and during the add_fn/split_fn calls, we @@ -445,7 +447,10 @@ public: // Make sure there are undecided transactions left to split on. if (und.None()) return; - // Actually construct a new work item on the queue. + // Actually construct a new work item on the queue. Due to the switch to DFS when queue + // space runs out (see below), we know that no reallocation of the queue should ever + // occur. + Assume(queue.size() < queue.capacity()); queue.emplace_back(std::move(inc), std::move(und)); }; @@ -479,10 +484,33 @@ public: }; // Work processing loop. + // + // New work items are always added at the back of the queue, but items to process use a + // hybrid approach where they can be taken from the front or the back. + // + // Depth-first search (DFS) corresponds to always taking from the back of the queue. This + // is very memory-efficient (linear in the number of transactions). Breadth-first search + // (BFS) corresponds to always taking from the front, which potentially uses more memory + // (up to exponential in the transaction count), but seems to work better in practice. + // + // The approach here combines the two: use BFS until the queue grows too large, at which + // point we temporarily switch to DFS until the size shrinks again. while (!queue.empty()) { + // Processing the first queue item, and then using DFS for everything it gives rise to, + // may increase the queue size by the number of undecided elements in there, minus 1 + // for the first queue item being removed. Thus, only when that pushes the queue over + // its capacity can we not process from the front (BFS), and should we use DFS. + while (queue.size() - 1 + queue.front().und.Count() > queue.capacity()) { + if (!iterations_left) break; + auto elem = queue.back(); + queue.pop_back(); + split_fn(std::move(elem)); + } + + // Process one entry from the front of the queue (BFS exploration) if (!iterations_left) break; - auto elem = queue.back(); - queue.pop_back(); + auto elem = queue.front(); + queue.pop_front(); split_fn(std::move(elem)); }