diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h
index cb6187eeca3..52880529f6f 100644
--- a/src/cluster_linearize.h
+++ b/src/cluster_linearize.h
@@ -13,6 +13,7 @@
 #include <utility>
 
 #include <util/feefrac.h>
+#include <util/vecdeque.h>
 
 namespace cluster_linearize {
 
@@ -415,7 +416,8 @@ public:
         };
 
         /** The queue of work items. */
-        std::vector<WorkItem> queue;
+        VecDeque<WorkItem> queue;
+        queue.reserve(std::max<size_t>(256, 2 * m_todo.Count()));
 
         // Create an initial entry with m_todo as undecided. Also use it as best if not provided,
         // so that during the work processing loop below, and during the add_fn/split_fn calls, we
@@ -445,7 +447,10 @@ public:
             // Make sure there are undecided transactions left to split on.
             if (und.None()) return;
 
-            // Actually construct a new work item on the queue.
+            // Actually construct a new work item on the queue. Due to the switch to DFS when queue
+            // space runs out (see below), we know that no reallocation of the queue should ever
+            // occur.
+            Assume(queue.size() < queue.capacity());
             queue.emplace_back(std::move(inc), std::move(und));
         };
 
@@ -479,10 +484,33 @@ public:
         };
 
         // Work processing loop.
+        //
+        // New work items are always added at the back of the queue, but items to process use a
+        // hybrid approach where they can be taken from the front or the back.
+        //
+        // Depth-first search (DFS) corresponds to always taking from the back of the queue. This
+        // is very memory-efficient (linear in the number of transactions). Breadth-first search
+        // (BFS) corresponds to always taking from the front, which potentially uses more memory
+        // (up to exponential in the transaction count), but seems to work better in practice.
+        //
+        // The approach here combines the two: use BFS until the queue grows too large, at which
+        // point we temporarily switch to DFS until the size shrinks again.
         while (!queue.empty()) {
+            // Processing the first queue item, and then using DFS for everything it gives rise to,
+            // may increase the queue size by the number of undecided elements in there, minus 1
+            // for the first queue item being removed. Thus, only when that pushes the queue over
+            // its capacity can we not process from the front (BFS), and should we use DFS.
+            while (queue.size() - 1 + queue.front().und.Count() > queue.capacity()) {
+                if (!iterations_left) break;
+                auto elem = queue.back();
+                queue.pop_back();
+                split_fn(std::move(elem));
+            }
+
+            // Process one entry from the front of the queue (BFS exploration)
             if (!iterations_left) break;
-            auto elem = queue.back();
-            queue.pop_back();
+            auto elem = queue.front();
+            queue.pop_front();
             split_fn(std::move(elem));
         }