util: Return empty vector on invalid hex encoding

This commit is contained in:
MarcoFalke 2023-02-27 13:39:17 +01:00
parent fa3549a77b
commit faab273e06
No known key found for this signature in database
GPG key ID: CE2B75697E69A548
3 changed files with 34 additions and 17 deletions

View file

@ -140,42 +140,52 @@ BOOST_AUTO_TEST_CASE(parse_hex)
// Basic test vector // Basic test vector
result = ParseHex("04678afdb0fe5548271967f1a67130b7105cd6a828e03909a67962e0ea1f61deb649f6bc3f4cef38c4f35504e51ec112de5c384df7ba0b8d578a4c702b6bf11d5f"); result = ParseHex("04678afdb0fe5548271967f1a67130b7105cd6a828e03909a67962e0ea1f61deb649f6bc3f4cef38c4f35504e51ec112de5c384df7ba0b8d578a4c702b6bf11d5f");
BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(), expected.begin(), expected.end()); BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(), expected.begin(), expected.end());
result = TryParseHex<uint8_t>("04678afdb0fe5548271967f1a67130b7105cd6a828e03909a67962e0ea1f61deb649f6bc3f4cef38c4f35504e51ec112de5c384df7ba0b8d578a4c702b6bf11d5f").value();
BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(), expected.begin(), expected.end());
// Spaces between bytes must be supported // Spaces between bytes must be supported
result = ParseHex("12 34 56 78"); result = ParseHex("12 34 56 78");
BOOST_CHECK(result.size() == 4 && result[0] == 0x12 && result[1] == 0x34 && result[2] == 0x56 && result[3] == 0x78); BOOST_CHECK(result.size() == 4 && result[0] == 0x12 && result[1] == 0x34 && result[2] == 0x56 && result[3] == 0x78);
result = TryParseHex<uint8_t>("12 34 56 78").value();
BOOST_CHECK(result.size() == 4 && result[0] == 0x12 && result[1] == 0x34 && result[2] == 0x56 && result[3] == 0x78);
// Leading space must be supported (used in BerkeleyEnvironment::Salvage) // Leading space must be supported (used in BerkeleyEnvironment::Salvage)
result = ParseHex(" 89 34 56 78"); result = ParseHex(" 89 34 56 78");
BOOST_CHECK(result.size() == 4 && result[0] == 0x89 && result[1] == 0x34 && result[2] == 0x56 && result[3] == 0x78); BOOST_CHECK(result.size() == 4 && result[0] == 0x89 && result[1] == 0x34 && result[2] == 0x56 && result[3] == 0x78);
result = TryParseHex<uint8_t>(" 89 34 56 78").value();
BOOST_CHECK(result.size() == 4 && result[0] == 0x89 && result[1] == 0x34 && result[2] == 0x56 && result[3] == 0x78);
// Mixed case and spaces are supported // Mixed case and spaces are supported
result = ParseHex(" Ff aA "); result = ParseHex(" Ff aA ");
BOOST_CHECK(result.size() == 2 && result[0] == 0xff && result[1] == 0xaa); BOOST_CHECK(result.size() == 2 && result[0] == 0xff && result[1] == 0xaa);
result = TryParseHex<uint8_t>(" Ff aA ").value();
BOOST_CHECK(result.size() == 2 && result[0] == 0xff && result[1] == 0xaa);
// Empty string is supported // Empty string is supported
result = ParseHex(""); result = ParseHex("");
BOOST_CHECK(result.size() == 0); BOOST_CHECK(result.size() == 0);
result = TryParseHex<uint8_t>("").value();
BOOST_CHECK(result.size() == 0);
// Spaces between nibbles is treated as end // Spaces between nibbles is treated as invalid
result = ParseHex("AAF F"); BOOST_CHECK_EQUAL(ParseHex("AAF F").size(), 0);
BOOST_CHECK(result.size() == 1 && result[0] == 0xaa); BOOST_CHECK(!TryParseHex("AAF F").has_value());
// Embedded null is treated as end // Embedded null is treated as invalid
const std::string with_embedded_null{" 11 "s const std::string with_embedded_null{" 11 "s
" \0 " " \0 "
" 22 "s}; " 22 "s};
BOOST_CHECK_EQUAL(with_embedded_null.size(), 11); BOOST_CHECK_EQUAL(with_embedded_null.size(), 11);
result = ParseHex(with_embedded_null); BOOST_CHECK_EQUAL(ParseHex(with_embedded_null).size(), 0);
BOOST_CHECK(result.size() == 1 && result[0] == 0x11); BOOST_CHECK(!TryParseHex(with_embedded_null).has_value());
// Stop parsing at invalid value // Non-hex is treated as invalid
result = ParseHex("1234 invalid 1234"); BOOST_CHECK_EQUAL(ParseHex("1234 invalid 1234").size(), 0);
BOOST_CHECK(result.size() == 2 && result[0] == 0x12 && result[1] == 0x34); BOOST_CHECK(!TryParseHex("1234 invalid 1234").has_value());
// Truncated input is treated as end // Truncated input is treated as invalid
result = ParseHex("12 3"); BOOST_CHECK_EQUAL(ParseHex("12 3").size(), 0);
BOOST_CHECK(result.size() == 1 && result[0] == 0x12); BOOST_CHECK(!TryParseHex("12 3").has_value());
} }
BOOST_AUTO_TEST_CASE(util_HexStr) BOOST_AUTO_TEST_CASE(util_HexStr)

View file

@ -78,18 +78,19 @@ bool IsHexNumber(std::string_view str)
} }
template <typename Byte> template <typename Byte>
std::vector<Byte> ParseHex(std::string_view str) std::optional<std::vector<Byte>> TryParseHex(std::string_view str)
{ {
std::vector<Byte> vch; std::vector<Byte> vch;
auto it = str.begin(); auto it = str.begin();
while (it != str.end() && it + 1 != str.end()) { while (it != str.end()) {
if (IsSpace(*it)) { if (IsSpace(*it)) {
++it; ++it;
continue; continue;
} }
auto c1 = HexDigit(*(it++)); auto c1 = HexDigit(*(it++));
if (it == str.end()) return std::nullopt;
auto c2 = HexDigit(*(it++)); auto c2 = HexDigit(*(it++));
if (c1 < 0 || c2 < 0) break; if (c1 < 0 || c2 < 0) return std::nullopt;
vch.push_back(Byte(c1 << 4) | Byte(c2)); vch.push_back(Byte(c1 << 4) | Byte(c2));
} }
return vch; return vch;

View file

@ -57,9 +57,15 @@ enum class ByteUnit : uint64_t {
* @return A new string without unsafe chars * @return A new string without unsafe chars
*/ */
std::string SanitizeString(std::string_view str, int rule = SAFE_CHARS_DEFAULT); std::string SanitizeString(std::string_view str, int rule = SAFE_CHARS_DEFAULT);
/** Parse the hex string into bytes (uint8_t or std::byte). Ignores whitespace. */ /** Parse the hex string into bytes (uint8_t or std::byte). Ignores whitespace. Returns nullopt on invalid input. */
template <typename Byte = std::byte>
std::optional<std::vector<Byte>> TryParseHex(std::string_view str);
/** Like TryParseHex, but returns an empty vector on invalid input. */
template <typename Byte = uint8_t> template <typename Byte = uint8_t>
std::vector<Byte> ParseHex(std::string_view str); std::vector<Byte> ParseHex(std::string_view hex_str)
{
return TryParseHex<Byte>(hex_str).value_or(std::vector<Byte>{});
}
signed char HexDigit(char c); signed char HexDigit(char c);
/* Returns true if each character in str is a hex character, and has an even /* Returns true if each character in str is a hex character, and has an even
* number of hex digits.*/ * number of hex digits.*/