Return EXIT_FAILURE on post-init fatal errors

It seems odd to return `EXIT_SUCCESS` when the node aborted
execution due a fatal internal error or any post-init problem
that triggers an unrequested shutdown.

e.g. blocks or coins db I/O errors, disconnect block failure,
failure during thread import (external blocks loading process
error), among others.

Co-authored-by: Ryan Ofsky <ryan@ofsky.org>
This commit is contained in:
furszy 2023-05-20 10:51:17 -03:00
parent 3c06926cf2
commit 3b2c61e819
No known key found for this signature in database
GPG Key ID: 5DD23CCC686AA623
9 changed files with 32 additions and 14 deletions

View File

@ -169,7 +169,7 @@ static bool AppInit(NodeContext& node, int argc, char* argv[])
// Set this early so that parameter interactions go to console
InitLogging(args);
InitParameterInteraction(args);
if (!AppInitBasicSetup(args)) {
if (!AppInitBasicSetup(args, node.exit_status)) {
// InitError will have been called with detailed error, which ends up on console
return false;
}
@ -238,6 +238,8 @@ static bool AppInit(NodeContext& node, int argc, char* argv[])
SetSyscallSandboxPolicy(SyscallSandboxPolicy::SHUTOFF);
if (fRet) {
WaitForShutdown();
} else {
node.exit_status = EXIT_FAILURE;
}
Interrupt(node);
Shutdown(node);
@ -264,5 +266,5 @@ MAIN_FUNCTION
// Connect bitcoind signal handlers
noui_connect();
return (AppInit(node, argc, argv) ? EXIT_SUCCESS : EXIT_FAILURE);
return AppInit(node, argc, argv) ? node.exit_status.load() : EXIT_FAILURE;
}

View File

@ -800,7 +800,7 @@ std::set<BlockFilterType> g_enabled_filter_types;
std::terminate();
};
bool AppInitBasicSetup(const ArgsManager& args)
bool AppInitBasicSetup(const ArgsManager& args, std::atomic<int>& exit_status)
{
// ********************************************************* Step 1: setup
#ifdef _MSC_VER
@ -814,7 +814,7 @@ bool AppInitBasicSetup(const ArgsManager& args)
// Enable heap terminate-on-corruption
HeapSetInformation(nullptr, HeapEnableTerminationOnCorruption, nullptr, 0);
#endif
if (!InitShutdownState()) {
if (!InitShutdownState(exit_status)) {
return InitError(Untranslated("Initializing wait-for-shutdown state failed."));
}

View File

@ -38,7 +38,7 @@ void InitParameterInteraction(ArgsManager& args);
* @note This can be done before daemonization. Do not call Shutdown() if this function fails.
* @pre Parameters should be parsed and config file should be read.
*/
bool AppInitBasicSetup(const ArgsManager& args);
bool AppInitBasicSetup(const ArgsManager& args, std::atomic<int>& exit_status);
/**
* Initialization: parameter interaction.
* @note This can be done before daemonization. Do not call Shutdown() if this function fails.

View File

@ -7,7 +7,9 @@
#include <kernel/context.h>
#include <atomic>
#include <cassert>
#include <cstdlib>
#include <functional>
#include <memory>
#include <vector>
@ -65,6 +67,7 @@ struct NodeContext {
std::unique_ptr<CScheduler> scheduler;
std::function<void()> rpc_interruption_point = [] {};
std::unique_ptr<KernelNotifications> notifications;
std::atomic<int> exit_status{EXIT_SUCCESS};
//! Declare default constructor and destructor that are not inline, so code
//! instantiating the NodeContext struct doesn't need to #include class

View File

@ -92,7 +92,7 @@ public:
uint32_t getLogCategories() override { return LogInstance().GetCategoryMask(); }
bool baseInitialize() override
{
if (!AppInitBasicSetup(args())) return false;
if (!AppInitBasicSetup(args(), Assert(context())->exit_status)) return false;
if (!AppInitParameterInteraction(args(), /*use_syscall_sandbox=*/false)) return false;
m_context->kernel = std::make_unique<kernel::Context>();

View File

@ -11,6 +11,7 @@
#include <logging.h>
#include <node/interface_ui.h>
#include <util/check.h>
#include <util/tokenpipe.h>
#include <warnings.h>
@ -20,6 +21,8 @@
#include <condition_variable>
#endif
static std::atomic<int>* g_exit_status{nullptr};
bool AbortNode(const std::string& strMessage, bilingual_str user_message)
{
SetMiscWarning(Untranslated(strMessage));
@ -28,6 +31,7 @@ bool AbortNode(const std::string& strMessage, bilingual_str user_message)
user_message = _("A fatal internal error occurred, see debug.log for details");
}
InitError(user_message);
Assert(g_exit_status)->store(EXIT_FAILURE);
StartShutdown();
return false;
}
@ -44,8 +48,9 @@ static TokenPipeEnd g_shutdown_r;
static TokenPipeEnd g_shutdown_w;
#endif
bool InitShutdownState()
bool InitShutdownState(std::atomic<int>& exit_status)
{
g_exit_status = &exit_status;
#ifndef WIN32
std::optional<TokenPipe> pipe = TokenPipe::Make();
if (!pipe) return false;

View File

@ -8,13 +8,15 @@
#include <util/translation.h> // For bilingual_str
#include <atomic>
/** Abort with a message */
bool AbortNode(const std::string& strMessage, bilingual_str user_message = bilingual_str{});
/** Initialize shutdown state. This must be called before using either StartShutdown(),
* AbortShutdown() or WaitForShutdown(). Calling ShutdownRequested() is always safe.
*/
bool InitShutdownState();
bool InitShutdownState(std::atomic<int>& exit_status);
/** Request shutdown of the application. */
void StartShutdown();

View File

@ -40,7 +40,7 @@ class AbortNodeTest(BitcoinTestFramework):
# Check that node0 aborted
self.log.info("Waiting for crash")
self.nodes[0].wait_until_stopped(timeout=5)
self.nodes[0].wait_until_stopped(timeout=5, expect_error=True)
self.log.info("Node crashed - now verifying restart fails")
self.nodes[0].assert_start_raises_init_error()

View File

@ -365,7 +365,7 @@ class TestNode():
if wait_until_stopped:
self.wait_until_stopped()
def is_node_stopped(self):
def is_node_stopped(self, expected_ret_code=None):
"""Checks whether the node has stopped.
Returns True if the node has stopped. False otherwise.
@ -377,8 +377,13 @@ class TestNode():
return False
# process has stopped. Assert that it didn't return an error code.
assert return_code == 0, self._node_msg(
"Node returned non-zero exit code (%d) when stopping" % return_code)
# unless 'expected_ret_code' is provided.
if expected_ret_code is not None:
assert return_code == expected_ret_code, self._node_msg(
"Node returned unexpected exit code (%d) vs (%d) when stopping" % (return_code, expected_ret_code))
else:
assert return_code == 0, self._node_msg(
"Node returned non-zero exit code (%d) when stopping" % return_code)
self.running = False
self.process = None
self.rpc_connected = False
@ -386,8 +391,9 @@ class TestNode():
self.log.debug("Node stopped")
return True
def wait_until_stopped(self, timeout=BITCOIND_PROC_WAIT_TIMEOUT):
wait_until_helper(self.is_node_stopped, timeout=timeout, timeout_factor=self.timeout_factor)
def wait_until_stopped(self, timeout=BITCOIND_PROC_WAIT_TIMEOUT, expect_error=False):
expected_ret_code = 1 if expect_error else None # Whether node shutdown return EXIT_FAILURE or EXIT_SUCCESS
wait_until_helper(lambda: self.is_node_stopped(expected_ret_code=expected_ret_code), timeout=timeout, timeout_factor=self.timeout_factor)
def replace_in_config(self, replacements):
"""