Merge #16887: Abstract out some of the descriptor Span-parsing helpers

bb36372b8f test: add unit tests for Span-parsing helpers (Sebastian Falbesoner)
5e69aeec3f Add documenting comments to spanparsing.h (Pieter Wuille)
230d43fdbc Abstract out some of the descriptor Span-parsing helpers (Pieter Wuille)

Pull request description:

  As suggested here: https://github.com/bitcoin/bitcoin/pull/16800#issuecomment-531605482.

  This moves the Span parsing functions out of the descriptor module, making them more easily usable for other parsers (in particular, in preparation for miniscript parsing).

ACKs for top commit:
  MarcoFalke:
    ACK bb36372b8f

Tree-SHA512: b5c5c11a9bc3f0a1c2c4cfa22755654ecfb8d4b69da0dc1fb9f04e1556dc0f6ffd87ad153600963279ac465d587d7971b53d240ced802d12693682411ac73deb
This commit is contained in:
MarcoFalke 2019-10-10 12:32:57 -04:00
commit befdef8aee
No known key found for this signature in database
GPG Key ID: D2EA4850E7528B25
5 changed files with 252 additions and 57 deletions

View File

@ -208,6 +208,7 @@ BITCOIN_CORE_H = \
util/bytevectorhash.h \
util/error.h \
util/fees.h \
util/spanparsing.h \
util/system.h \
util/macros.h \
util/memory.h \
@ -505,6 +506,7 @@ libbitcoin_util_a_SOURCES = \
util/moneystr.cpp \
util/rbf.cpp \
util/threadnames.cpp \
util/spanparsing.cpp \
util/strencodings.cpp \
util/string.cpp \
util/time.cpp \

View File

@ -11,6 +11,7 @@
#include <span.h>
#include <util/bip32.h>
#include <util/spanparsing.h>
#include <util/system.h>
#include <util/strencodings.h>
@ -640,63 +641,6 @@ enum class ParseScriptContext {
P2WSH,
};
/** Parse a constant. If successful, sp is updated to skip the constant and return true. */
bool Const(const std::string& str, Span<const char>& sp)
{
if ((size_t)sp.size() >= str.size() && std::equal(str.begin(), str.end(), sp.begin())) {
sp = sp.subspan(str.size());
return true;
}
return false;
}
/** Parse a function call. If successful, sp is updated to be the function's argument(s). */
bool Func(const std::string& str, Span<const char>& sp)
{
if ((size_t)sp.size() >= str.size() + 2 && sp[str.size()] == '(' && sp[sp.size() - 1] == ')' && std::equal(str.begin(), str.end(), sp.begin())) {
sp = sp.subspan(str.size() + 1, sp.size() - str.size() - 2);
return true;
}
return false;
}
/** Return the expression that sp begins with, and update sp to skip it. */
Span<const char> Expr(Span<const char>& sp)
{
int level = 0;
auto it = sp.begin();
while (it != sp.end()) {
if (*it == '(') {
++level;
} else if (level && *it == ')') {
--level;
} else if (level == 0 && (*it == ')' || *it == ',')) {
break;
}
++it;
}
Span<const char> ret = sp.first(it - sp.begin());
sp = sp.subspan(it - sp.begin());
return ret;
}
/** Split a string on every instance of sep, returning a vector. */
std::vector<Span<const char>> Split(const Span<const char>& sp, char sep)
{
std::vector<Span<const char>> ret;
auto it = sp.begin();
auto start = it;
while (it != sp.end()) {
if (*it == sep) {
ret.emplace_back(start, it);
start = it + 1;
}
++it;
}
ret.emplace_back(start, it);
return ret;
}
/** Parse a key path, being passed a split list of elements (the first element is ignored). */
NODISCARD bool ParseKeyPath(const std::vector<Span<const char>>& split, KeyPath& out, std::string& error)
{
@ -723,6 +667,8 @@ NODISCARD bool ParseKeyPath(const std::vector<Span<const char>>& split, KeyPath&
/** Parse a public key that excludes origin information. */
std::unique_ptr<PubkeyProvider> ParsePubkeyInner(const Span<const char>& sp, bool permit_uncompressed, FlatSigningProvider& out, std::string& error)
{
using namespace spanparsing;
auto split = Split(sp, '/');
std::string str(split[0].begin(), split[0].end());
if (str.size() == 0) {
@ -782,6 +728,8 @@ std::unique_ptr<PubkeyProvider> ParsePubkeyInner(const Span<const char>& sp, boo
/** Parse a public key including origin information (if enabled). */
std::unique_ptr<PubkeyProvider> ParsePubkey(const Span<const char>& sp, bool permit_uncompressed, FlatSigningProvider& out, std::string& error)
{
using namespace spanparsing;
auto origin_split = Split(sp, ']');
if (origin_split.size() > 2) {
error = "Multiple ']' characters found for a single pubkey";
@ -816,6 +764,8 @@ std::unique_ptr<PubkeyProvider> ParsePubkey(const Span<const char>& sp, bool per
/** Parse a script in a particular context. */
std::unique_ptr<DescriptorImpl> ParseScript(Span<const char>& sp, ParseScriptContext ctx, FlatSigningProvider& out, std::string& error)
{
using namespace spanparsing;
auto expr = Expr(sp);
bool sorted_multi = false;
if (Func("pk", expr)) {
@ -1012,6 +962,8 @@ std::unique_ptr<DescriptorImpl> InferScript(const CScript& script, ParseScriptCo
/** Check a descriptor checksum, and update desc to be the checksum-less part. */
bool CheckChecksum(Span<const char>& sp, bool require_checksum, std::string& error, std::string* out_checksum = nullptr)
{
using namespace spanparsing;
auto check_split = Split(sp, '#');
if (check_split.size() > 2) {
error = "Multiple '#' symbols";

View File

@ -12,6 +12,7 @@
#include <util/strencodings.h>
#include <util/string.h>
#include <util/time.h>
#include <util/spanparsing.h>
#include <stdint.h>
#include <thread>
@ -1572,4 +1573,127 @@ BOOST_AUTO_TEST_CASE(test_Capitalize)
BOOST_CHECK_EQUAL(Capitalize("\x00\xfe\xff"), "\x00\xfe\xff");
}
static std::string SpanToStr(Span<const char>& span)
{
return std::string(span.begin(), span.end());
}
BOOST_AUTO_TEST_CASE(test_spanparsing)
{
using namespace spanparsing;
std::string input;
Span<const char> sp;
bool success;
// Const(...): parse a constant, update span to skip it if successful
input = "MilkToastHoney";
sp = MakeSpan(input);
success = Const("", sp); // empty
BOOST_CHECK(success);
BOOST_CHECK_EQUAL(SpanToStr(sp), "MilkToastHoney");
success = Const("Milk", sp);
BOOST_CHECK(success);
BOOST_CHECK_EQUAL(SpanToStr(sp), "ToastHoney");
success = Const("Bread", sp);
BOOST_CHECK(!success);
success = Const("Toast", sp);
BOOST_CHECK(success);
BOOST_CHECK_EQUAL(SpanToStr(sp), "Honey");
success = Const("Honeybadger", sp);
BOOST_CHECK(!success);
success = Const("Honey", sp);
BOOST_CHECK(success);
BOOST_CHECK_EQUAL(SpanToStr(sp), "");
// Func(...): parse a function call, update span to argument if successful
input = "Foo(Bar(xy,z()))";
sp = MakeSpan(input);
success = Func("FooBar", sp);
BOOST_CHECK(!success);
success = Func("Foo(", sp);
BOOST_CHECK(!success);
success = Func("Foo", sp);
BOOST_CHECK(success);
BOOST_CHECK_EQUAL(SpanToStr(sp), "Bar(xy,z())");
success = Func("Bar", sp);
BOOST_CHECK(success);
BOOST_CHECK_EQUAL(SpanToStr(sp), "xy,z()");
success = Func("xy", sp);
BOOST_CHECK(!success);
// Expr(...): return expression that span begins with, update span to skip it
Span<const char> result;
input = "(n*(n-1))/2";
sp = MakeSpan(input);
result = Expr(sp);
BOOST_CHECK_EQUAL(SpanToStr(result), "(n*(n-1))/2");
BOOST_CHECK_EQUAL(SpanToStr(sp), "");
input = "foo,bar";
sp = MakeSpan(input);
result = Expr(sp);
BOOST_CHECK_EQUAL(SpanToStr(result), "foo");
BOOST_CHECK_EQUAL(SpanToStr(sp), ",bar");
input = "(aaaaa,bbbbb()),c";
sp = MakeSpan(input);
result = Expr(sp);
BOOST_CHECK_EQUAL(SpanToStr(result), "(aaaaa,bbbbb())");
BOOST_CHECK_EQUAL(SpanToStr(sp), ",c");
input = "xyz)foo";
sp = MakeSpan(input);
result = Expr(sp);
BOOST_CHECK_EQUAL(SpanToStr(result), "xyz");
BOOST_CHECK_EQUAL(SpanToStr(sp), ")foo");
input = "((a),(b),(c)),xxx";
sp = MakeSpan(input);
result = Expr(sp);
BOOST_CHECK_EQUAL(SpanToStr(result), "((a),(b),(c))");
BOOST_CHECK_EQUAL(SpanToStr(sp), ",xxx");
// Split(...): split a string on every instance of sep, return vector
std::vector<Span<const char>> results;
input = "xxx";
results = Split(MakeSpan(input), 'x');
BOOST_CHECK_EQUAL(results.size(), 4);
BOOST_CHECK_EQUAL(SpanToStr(results[0]), "");
BOOST_CHECK_EQUAL(SpanToStr(results[1]), "");
BOOST_CHECK_EQUAL(SpanToStr(results[2]), "");
BOOST_CHECK_EQUAL(SpanToStr(results[3]), "");
input = "one#two#three";
results = Split(MakeSpan(input), '-');
BOOST_CHECK_EQUAL(results.size(), 1);
BOOST_CHECK_EQUAL(SpanToStr(results[0]), "one#two#three");
input = "one#two#three";
results = Split(MakeSpan(input), '#');
BOOST_CHECK_EQUAL(results.size(), 3);
BOOST_CHECK_EQUAL(SpanToStr(results[0]), "one");
BOOST_CHECK_EQUAL(SpanToStr(results[1]), "two");
BOOST_CHECK_EQUAL(SpanToStr(results[2]), "three");
input = "*foo*bar*";
results = Split(MakeSpan(input), '*');
BOOST_CHECK_EQUAL(results.size(), 4);
BOOST_CHECK_EQUAL(SpanToStr(results[0]), "");
BOOST_CHECK_EQUAL(SpanToStr(results[1]), "foo");
BOOST_CHECK_EQUAL(SpanToStr(results[2]), "bar");
BOOST_CHECK_EQUAL(SpanToStr(results[3]), "");
}
BOOST_AUTO_TEST_SUITE_END()

67
src/util/spanparsing.cpp Normal file
View File

@ -0,0 +1,67 @@
// Copyright (c) 2018 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include <util/spanparsing.h>
#include <span.h>
#include <string>
#include <vector>
namespace spanparsing {
bool Const(const std::string& str, Span<const char>& sp)
{
if ((size_t)sp.size() >= str.size() && std::equal(str.begin(), str.end(), sp.begin())) {
sp = sp.subspan(str.size());
return true;
}
return false;
}
bool Func(const std::string& str, Span<const char>& sp)
{
if ((size_t)sp.size() >= str.size() + 2 && sp[str.size()] == '(' && sp[sp.size() - 1] == ')' && std::equal(str.begin(), str.end(), sp.begin())) {
sp = sp.subspan(str.size() + 1, sp.size() - str.size() - 2);
return true;
}
return false;
}
Span<const char> Expr(Span<const char>& sp)
{
int level = 0;
auto it = sp.begin();
while (it != sp.end()) {
if (*it == '(') {
++level;
} else if (level && *it == ')') {
--level;
} else if (level == 0 && (*it == ')' || *it == ',')) {
break;
}
++it;
}
Span<const char> ret = sp.first(it - sp.begin());
sp = sp.subspan(it - sp.begin());
return ret;
}
std::vector<Span<const char>> Split(const Span<const char>& sp, char sep)
{
std::vector<Span<const char>> ret;
auto it = sp.begin();
auto start = it;
while (it != sp.end()) {
if (*it == sep) {
ret.emplace_back(start, it);
start = it + 1;
}
++it;
}
ret.emplace_back(start, it);
return ret;
}
} // namespace spanparsing

50
src/util/spanparsing.h Normal file
View File

@ -0,0 +1,50 @@
// Copyright (c) 2018 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#ifndef BITCOIN_UTIL_SPANPARSING_H
#define BITCOIN_UTIL_SPANPARSING_H
#include <span.h>
#include <string>
#include <vector>
namespace spanparsing {
/** Parse a constant.
*
* If sp's initial part matches str, sp is updated to skip that part, and true is returned.
* Otherwise sp is unmodified and false is returned.
*/
bool Const(const std::string& str, Span<const char>& sp);
/** Parse a function call.
*
* If sp's initial part matches str + "(", and sp ends with ")", sp is updated to be the
* section between the braces, and true is returned. Otherwise sp is unmodified and false
* is returned.
*/
bool Func(const std::string& str, Span<const char>& sp);
/** Extract the expression that sp begins with.
*
* This function will return the initial part of sp, up to (but not including) the first
* comma or closing brace, skipping ones that are surrounded by braces. So for example,
* for "foo(bar(1),2),3" the initial part "foo(bar(1),2)" will be returned. sp will be
* updated to skip the initial part that is returned.
*/
Span<const char> Expr(Span<const char>& sp);
/** Split a string on every instance of sep, returning a vector.
*
* If sep does not occur in sp, a singleton with the entirety of sp is returned.
*
* Note that this function does not care about braces, so splitting
* "foo(bar(1),2),3) on ',' will return {"foo(bar(1)", "2)", "3)"}.
*/
std::vector<Span<const char>> Split(const Span<const char>& sp, char sep);
} // namespace spanparsing
#endif // BITCOIN_UTIL_SPANPARSING_H