Merge bitcoin/bitcoin#30197: fuzz: bound some miniscript operations to avoid fuzz timeouts

bc34bc2888 fuzz: limit the number of nested wrappers in descriptors (Antoine Poinsot) 8d7340105f fuzz: limit the number of sub-fragments per fragment for descriptors (Antoine Poinsot) Pull request description: Some of the logic in the miniscript module is quadratic. It only becomes an issue for very large uninteresting descriptors (like a `thresh` with 130k sub-fragments or a fragment with more than 60k nested `j:` wrappers). This PR fixes the two types of fuzz timeouts reported by Marco in https://github.com/bitcoin/bitcoin/issues/28812 by trying to pinpoint the problematic descriptors through a simple analysis of the string, without limiting the size of the string itself. This is the same approach as was adopted for limiting the depth of derivation paths. ACKs for top commit: dergoegge: utACK bc34bc2888 stickies-v: Light ACK bc34bc2888 marcofleon: Code review ACK bc34bc2888. The added comments are useful, thanks for those. Tested on the three inputs in https://github.com/bitcoin/bitcoin/issues/28812 that caused the timeouts. Tree-SHA512: 8811c7b225684c5ecc1eb1256cf39dfa60d4518161e70210086c8a01b38927481ebe747af86aa5f4803187672d43fadabcfdfbf4e3b049738d629a25143f0e77
2025-02-20 14:05:23 +01:00 · 2024-07-15 14:11:14 +01:00 · 2024-07-15 14:11:14 +01:00 · 262260ce1e
commit 262260ce1e
parent 84063a4c4c bc34bc2888
3 changed files with 91 additions and 1 deletions
--- a/src/test/fuzz/descriptor_parse.cpp
+++ b/src/test/fuzz/descriptor_parse.cpp
@ -72,6 +72,14 @@ FUZZ_TARGET(mocked_descriptor_parse, .init = initialize_mocked_descriptor_parse)
    // out strings which could correspond to a descriptor containing a too large derivation path.
    if (HasDeepDerivPath(buffer)) return;

+    // Some fragments can take a virtually unlimited number of sub-fragments (thresh, multi_a) but
+    // may perform quadratic operations on them. Limit the number of sub-fragments per fragment.
+    if (HasTooManySubFrag(buffer)) return;
+
+    // The script building logic performs quadratic copies in the number of nested wrappers. Limit
+    // the number of nested wrappers per fragment.
+    if (HasTooManyWrappers(buffer)) return;
+
    const std::string mocked_descriptor{buffer.begin(), buffer.end()};
    if (const auto descriptor = MOCKED_DESC_CONVERTER.GetDescriptor(mocked_descriptor)) {
        FlatSigningProvider signing_provider;
@ -83,8 +91,10 @@ FUZZ_TARGET(mocked_descriptor_parse, .init = initialize_mocked_descriptor_parse)

 FUZZ_TARGET(descriptor_parse, .init = initialize_descriptor_parse)
 {
-    // See comment above for rationale.
+    // See comments above for rationales.
    if (HasDeepDerivPath(buffer)) return;
+    if (HasTooManySubFrag(buffer)) return;
+    if (HasTooManyWrappers(buffer)) return;

    const std::string descriptor(buffer.begin(), buffer.end());
    FlatSigningProvider signing_provider;
--- a/src/test/fuzz/util/descriptor.cpp
+++ b/src/test/fuzz/util/descriptor.cpp
@ -4,6 +4,9 @@

 #include <test/fuzz/util/descriptor.h>

+#include <ranges>
+#include <stack>
+
 void MockedDescriptorConverter::Init() {
    // The data to use as a private key or a seed for an xprv.
    std::array<std::byte, 32> key_data{std::byte{1}};
@ -84,3 +87,59 @@ bool HasDeepDerivPath(const FuzzBufferType& buff, const int max_depth)
    }
    return false;
 }
+
+bool HasTooManySubFrag(const FuzzBufferType& buff, const int max_subs, const size_t max_nested_subs)
+{
+    // We use a stack because there may be many nested sub-frags.
+    std::stack<int> counts;
+    for (const auto& ch: buff) {
+        // The fuzzer may generate an input with a ton of parentheses. Rule out pathological cases.
+        if (counts.size() > max_nested_subs) return true;
+
+        if (ch == '(') {
+            // A new fragment was opened, create a new sub-count for it and start as one since any fragment with
+            // parentheses has at least one sub.
+            counts.push(1);
+        } else if (ch == ',' && !counts.empty()) {
+            // When encountering a comma, account for an additional sub in the last opened fragment. If it exceeds the
+            // limit, bail.
+            if (++counts.top() > max_subs) return true;
+        } else if (ch == ')' && !counts.empty()) {
+            // Fragment closed! Drop its sub count and resume to counting the number of subs for its parent.
+            counts.pop();
+        }
+    }
+    return false;
+}
+
+bool HasTooManyWrappers(const FuzzBufferType& buff, const int max_wrappers)
+{
+    // The number of nested wrappers. Nested wrappers are always characters which follow each other so we don't have to
+    // use a stack as we do above when counting the number of sub-fragments.
+    std::optional<int> count;
+
+    // We want to detect nested wrappers. A wrapper is a character prepended to a fragment, separated by a colon. There
+    // may be more than one wrapper, in which case the colon is not repeated. For instance `jjjjj:pk()`.  To count
+    // wrappers we iterate in reverse and use the colon to detect the end of a wrapper expression and count how many
+    // characters there are since the beginning of the expression. We stop counting when we encounter a character
+    // indicating the beginning of a new expression.
+    for (const auto ch: buff | std::views::reverse) {
+        // A colon, start counting.
+        if (ch == ':') {
+            // The colon itself is not a wrapper so we start at 0.
+            count = 0;
+        } else if (count) {
+            // If we are counting wrappers, stop when we crossed the beginning of the wrapper expression. Otherwise keep
+            // counting and bail if we reached the limit.
+            // A wrapper may only ever occur as the first sub of a descriptor/miniscript expression ('('), as the
+            // first Taproot leaf in a pair ('{') or as the nth sub in each case (',').
+            if (ch == ',' || ch == '(' || ch == '{') {
+                count.reset();
+            } else if (++*count > max_wrappers) {
+                return true;
+            }
+        }
+    }
+
+    return false;
+}
--- a/src/test/fuzz/util/descriptor.h
+++ b/src/test/fuzz/util/descriptor.h
@ -55,4 +55,25 @@ constexpr int MAX_DEPTH{2};
 */
 bool HasDeepDerivPath(const FuzzBufferType& buff, const int max_depth = MAX_DEPTH);

+//! Default maximum number of sub-fragments.
+constexpr int MAX_SUBS{1'000};
+//! Maximum number of nested sub-fragments we'll allow in a descriptor.
+constexpr size_t MAX_NESTED_SUBS{10'000};
+
+/**
+ * Whether the buffer, if it represents a valid descriptor, contains a fragment with more
+ * sub-fragments than the given maximum.
+ */
+bool HasTooManySubFrag(const FuzzBufferType& buff, const int max_subs = MAX_SUBS,
+                       const size_t max_nested_subs = MAX_NESTED_SUBS);
+
+//! Default maximum number of wrappers per fragment.
+constexpr int MAX_WRAPPERS{100};
+
+/**
+ * Whether the buffer, if it represents a valid descriptor, contains a fragment with more
+ * wrappers than the given maximum.
+ */
+bool HasTooManyWrappers(const FuzzBufferType& buff, const int max_wrappers = MAX_WRAPPERS);
+
 #endif // BITCOIN_TEST_FUZZ_UTIL_DESCRIPTOR_H