Merge bitcoin/bitcoin#30546: util: Use consteval checked format string in FatalErrorf, LogConnectFailure

fa5bc450d5 util: Use compile-time check for LogConnectFailure (MarcoFalke)
fa7087b896 util: Use compile-time check for FatalErrorf (MarcoFalke)
faa62c0112 util: Add ConstevalFormatString (MarcoFalke)
fae7b83eb5 lint: Remove forbidden functions from lint-format-strings.py (MarcoFalke)

Pull request description:

  The `test/lint/lint-format-strings.py` was designed to count the number of format specifiers and assert that they are equal to the number of parameters passed to the format function. The goal seems reasonable, but the implementation has many problems:

  * It is written in Python, meaning that C++ code can not be parsed correctly. Currently it relies on brittle regex and string parsing.
  * Apart from the parsing errors, there are also many logic errors. For example, `count_format_specifiers` allows a mix of positional specifiers and non-positional specifiers, which can lead to runtime format bugs. Also, `count_format_specifiers` silently skipped over "special" format specifiers, which are valid in tinyformat, which again can lead to runtime format bugs being undetected.
  * The brittle logic has a history of breaking in pull requests that are otherwise fine. This causes the CI to fail and the pull request being blocked from progress until the bug in the linter is fixed, or the code is rewritten to work around the bug.
  * It is only run in the CI, or when the developer invokes the script. It would be better if the developer got the error message at compile-time, directly when writing the code.

  Fix all issues by using a `consteval` checked format string in `FatalErrorf` and `LogConnectFailure`.

  This is the first step toward https://github.com/bitcoin/bitcoin/issues/30530 and a follow-up will apply the approach to the other places.

ACKs for top commit:
  stickies-v:
    re-ACK fa5bc450d5
  l0rinc:
    ACK fa5bc450d5
  hodlinator:
    ACK fa5bc450d5
  ryanofsky:
    Code review ACK fa5bc450d5

Tree-SHA512: d6189096b16083143687ed1b1559cf4f92f97dd87bc5d00673e44f4fb9fce7bb7b215cfdfc39b6e6a24f0b75a79a03ededce966639e554f7172e1fc22cf015ae
This commit is contained in:
Ryan Ofsky 2024-09-12 12:37:36 -04:00
commit e46bebb444
No known key found for this signature in database
GPG Key ID: 46800E30FC748A66
8 changed files with 166 additions and 20 deletions

View File

@ -1,4 +1,4 @@
// Copyright (c) 2017-2022 The Bitcoin Core developers
// Copyright (c) 2017-present The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
@ -14,6 +14,7 @@
#include <node/database_args.h>
#include <node/interface_ui.h>
#include <tinyformat.h>
#include <util/string.h>
#include <util/thread.h>
#include <util/translation.h>
#include <validation.h> // For g_chainman
@ -27,7 +28,7 @@ constexpr auto SYNC_LOG_INTERVAL{30s};
constexpr auto SYNC_LOCATOR_WRITE_INTERVAL{30s};
template <typename... Args>
void BaseIndex::FatalErrorf(const char* fmt, const Args&... args)
void BaseIndex::FatalErrorf(util::ConstevalFormatString<sizeof...(Args)> fmt, const Args&... args)
{
auto message = tfm::format(fmt, args...);
node::AbortNode(m_chain->context()->shutdown, m_chain->context()->exit_status, Untranslated(message), m_chain->context()->warnings.get());

View File

@ -1,4 +1,4 @@
// Copyright (c) 2017-2022 The Bitcoin Core developers
// Copyright (c) 2017-present The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
@ -7,6 +7,7 @@
#include <dbwrapper.h>
#include <interfaces/chain.h>
#include <util/string.h>
#include <util/threadinterrupt.h>
#include <validationinterface.h>
@ -94,7 +95,7 @@ private:
virtual bool AllowPrune() const = 0;
template <typename... Args>
void FatalErrorf(const char* fmt, const Args&... args);
void FatalErrorf(util::ConstevalFormatString<sizeof...(Args)> fmt, const Args&... args);
protected:
std::unique_ptr<interfaces::Chain> m_chain;

View File

@ -557,7 +557,8 @@ std::unique_ptr<Sock> CreateSockOS(int domain, int type, int protocol)
std::function<std::unique_ptr<Sock>(int, int, int)> CreateSock = CreateSockOS;
template<typename... Args>
static void LogConnectFailure(bool manual_connection, const char* fmt, const Args&... args) {
static void LogConnectFailure(bool manual_connection, util::ConstevalFormatString<sizeof...(Args)> fmt, const Args&... args)
{
std::string error_message = tfm::format(fmt, args...);
if (manual_connection) {
LogPrintf("%s\n", error_message);

View File

@ -132,6 +132,7 @@ add_executable(test_bitcoin
txvalidation_tests.cpp
txvalidationcache_tests.cpp
uint256_tests.cpp
util_string_tests.cpp
util_tests.cpp
util_threadnames_tests.cpp
validation_block_tests.cpp

View File

@ -0,0 +1,86 @@
// Copyright (c) 2024-present The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include <util/string.h>
#include <boost/test/unit_test.hpp>
using namespace util;
BOOST_AUTO_TEST_SUITE(util_string_tests)
// Helper to allow compile-time sanity checks while providing the number of
// args directly. Normally PassFmt<sizeof...(Args)> would be used.
template <unsigned NumArgs>
inline void PassFmt(util::ConstevalFormatString<NumArgs> fmt)
{
// This was already executed at compile-time, but is executed again at run-time to avoid -Wunused.
decltype(fmt)::Detail_CheckNumFormatSpecifiers(fmt.fmt);
}
template <unsigned WrongNumArgs>
inline void FailFmtWithError(std::string_view wrong_fmt, std::string_view error)
{
using ErrType = const char*;
auto check_throw{[error](const ErrType& str) { return str == error; }};
BOOST_CHECK_EXCEPTION(util::ConstevalFormatString<WrongNumArgs>::Detail_CheckNumFormatSpecifiers(wrong_fmt), ErrType, check_throw);
}
BOOST_AUTO_TEST_CASE(ConstevalFormatString_NumSpec)
{
PassFmt<0>("");
PassFmt<0>("%%");
PassFmt<1>("%s");
PassFmt<0>("%%s");
PassFmt<0>("s%%");
PassFmt<1>("%%%s");
PassFmt<1>("%s%%");
PassFmt<0>(" 1$s");
PassFmt<1>("%1$s");
PassFmt<1>("%1$s%1$s");
PassFmt<2>("%2$s");
PassFmt<2>("%2$s 4$s %2$s");
PassFmt<129>("%129$s 999$s %2$s");
PassFmt<1>("%02d");
PassFmt<1>("%+2s");
PassFmt<1>("%.6i");
PassFmt<1>("%5.2f");
PassFmt<1>("%#x");
PassFmt<1>("%1$5i");
PassFmt<1>("%1$-5i");
PassFmt<1>("%1$.5i");
// tinyformat accepts almost any "type" spec, even '%', or '_', or '\n'.
PassFmt<1>("%123%");
PassFmt<1>("%123%s");
PassFmt<1>("%_");
PassFmt<1>("%\n");
// The `*` specifier behavior is unsupported and can lead to runtime
// errors when used in a ConstevalFormatString. Please refer to the
// note in the ConstevalFormatString docs.
PassFmt<1>("%*c");
PassFmt<2>("%2$*3$d");
PassFmt<1>("%.*f");
auto err_mix{"Format specifiers must be all positional or all non-positional!"};
FailFmtWithError<1>("%s%1$s", err_mix);
auto err_num{"Format specifier count must match the argument count!"};
FailFmtWithError<1>("", err_num);
FailFmtWithError<0>("%s", err_num);
FailFmtWithError<2>("%s", err_num);
FailFmtWithError<0>("%1$s", err_num);
FailFmtWithError<2>("%1$s", err_num);
auto err_0_pos{"Positional format specifier must have position of at least 1"};
FailFmtWithError<1>("%$s", err_0_pos);
FailFmtWithError<1>("%$", err_0_pos);
FailFmtWithError<0>("%0$", err_0_pos);
FailFmtWithError<0>("%0$s", err_0_pos);
auto err_term{"Format specifier incorrectly terminated by end of string"};
FailFmtWithError<1>("%", err_term);
FailFmtWithError<1>("%1$", err_term);
}
BOOST_AUTO_TEST_SUITE_END()

View File

@ -1,4 +1,4 @@
// Copyright (c) 2019-2022 The Bitcoin Core developers
// Copyright (c) 2019-present The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
@ -6,6 +6,7 @@
#define BITCOIN_UTIL_STRING_H
#include <span.h>
#include <tinyformat.h>
#include <array>
#include <cstdint>
@ -17,6 +18,67 @@
#include <vector>
namespace util {
/**
* @brief A wrapper for a compile-time partially validated format string
*
* This struct can be used to enforce partial compile-time validation of format
* strings, to reduce the likelihood of tinyformat throwing exceptions at
* run-time. Validation is partial to try and prevent the most common errors
* while avoiding re-implementing the entire parsing logic.
*
* @note Counting of `*` dynamic width and precision fields (such as `%*c`,
* `%2$*3$d`, `%.*f`) is not implemented to minimize code complexity as long as
* they are not used in the codebase. Usage of these fields is not counted and
* can lead to run-time exceptions. Code wanting to use the `*` specifier can
* side-step this struct and call tinyformat directly.
*/
template <unsigned num_params>
struct ConstevalFormatString {
const char* const fmt;
consteval ConstevalFormatString(const char* str) : fmt{str} { Detail_CheckNumFormatSpecifiers(fmt); }
constexpr static void Detail_CheckNumFormatSpecifiers(std::string_view str)
{
unsigned count_normal{0}; // Number of "normal" specifiers, like %s
unsigned count_pos{0}; // Max number in positional specifier, like %8$s
for (auto it{str.begin()}; it < str.end();) {
if (*it != '%') {
++it;
continue;
}
if (++it >= str.end()) throw "Format specifier incorrectly terminated by end of string";
if (*it == '%') {
// Percent escape: %%
++it;
continue;
}
unsigned maybe_num{0};
while ('0' <= *it && *it <= '9') {
maybe_num *= 10;
maybe_num += *it - '0';
++it;
};
if (*it == '$') {
// Positional specifier, like %8$s
if (maybe_num == 0) throw "Positional format specifier must have position of at least 1";
count_pos = std::max(count_pos, maybe_num);
if (++it >= str.end()) throw "Format specifier incorrectly terminated by end of string";
} else {
// Non-positional specifier, like %s
++count_normal;
++it;
}
// The remainder "[flags][width][.precision][length]type" of the
// specifier is not checked. Parsing continues with the next '%'.
}
if (count_normal && count_pos) throw "Format specifiers must be all positional or all non-positional!";
unsigned count{count_normal | count_pos};
if (num_params != count) throw "Format specifier count must match the argument count!";
}
};
void ReplaceAll(std::string& in_out, const std::string& search, const std::string& substitute);
/** Split a string on any char found in separators, returning a vector.
@ -173,4 +235,12 @@ template <typename T1, size_t PREFIX_LEN>
}
} // namespace util
namespace tinyformat {
template <typename... Args>
std::string format(util::ConstevalFormatString<sizeof...(Args)> fmt, const Args&... args)
{
return format(fmt.fmt, args...);
}
} // namespace tinyformat
#endif // BITCOIN_UTIL_STRING_H

View File

@ -16,10 +16,7 @@ import re
import sys
FUNCTION_NAMES_AND_NUMBER_OF_LEADING_ARGUMENTS = [
'FatalErrorf,0',
'fprintf,1',
'tfm::format,1', # Assuming tfm::::format(std::ostream&, ...
'LogConnectFailure,1',
'LogError,0',
'LogWarning,0',
'LogInfo,0',
@ -27,14 +24,7 @@ FUNCTION_NAMES_AND_NUMBER_OF_LEADING_ARGUMENTS = [
'LogTrace,1',
'LogPrintf,0',
'LogPrintLevel,2',
'printf,0',
'snprintf,2',
'sprintf,1',
'strprintf,0',
'vfprintf,1',
'vprintf,1',
'vsnprintf,1',
'vsprintf,1',
'WalletLogPrintf,0',
]
RUN_LINT_FILE = 'test/lint/run-lint-format-strings.py'

View File

@ -13,10 +13,6 @@ import re
import sys
FALSE_POSITIVES = [
("src/dbwrapper.cpp", "vsnprintf(p, limit - p, format, backup_ap)"),
("src/index/base.cpp", "FatalErrorf(const char* fmt, const Args&... args)"),
("src/index/base.h", "FatalErrorf(const char* fmt, const Args&... args)"),
("src/netbase.cpp", "LogConnectFailure(bool manual_connection, const char* fmt, const Args&... args)"),
("src/clientversion.cpp", "strprintf(_(COPYRIGHT_HOLDERS).translated, COPYRIGHT_HOLDERS_SUBSTITUTION)"),
("src/test/translation_tests.cpp", "strprintf(format, arg)"),
("src/validationinterface.cpp", "LogDebug(BCLog::VALIDATION, fmt \"\\n\", __VA_ARGS__)"),