From 8fa04cb13a4bf62a42b18e7c0c79151d13cb494b Mon Sep 17 00:00:00 2001 From: Hodlinator <172445034+hodlinator@users.noreply.github.com> Date: Wed, 8 Jan 2025 11:57:39 +0100 Subject: [PATCH 1/6] qa refactor: wait_for_rpc_connection - Treat OSErrors the same ConnectionResetError is an OSError as well (ECONNRESET), no reason to have a separate except-block for it. Also improves comments for other exceptions and make condition above more Pythonic. --- test/functional/test_framework/test_node.py | 24 ++++++++++----------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/test/functional/test_framework/test_node.py b/test/functional/test_framework/test_node.py index f7d6ba78d23..329dc58fb01 100755 --- a/test/functional/test_framework/test_node.py +++ b/test/functional/test_framework/test_node.py @@ -311,23 +311,21 @@ class TestNode(): self.rpc_connected = True self.url = self.rpc.rpc_url return - except JSONRPCException as e: # Initialization phase + except JSONRPCException as e: + # Suppress these in favor of a later outcome (success, FailedToStartError, or timeout). # -28 RPC in warmup - # -342 Service unavailable, RPC server started but is shutting down due to error - if e.error['code'] != -28 and e.error['code'] != -342: + # -342 Service unavailable, could be starting up or shutting down + if e.error['code'] not in [-28, -342]: raise # unknown JSON RPC exception - except ConnectionResetError: - # This might happen when the RPC server is in warmup, but shut down before the call to getblockcount - # succeeds. Try again to properly raise the FailedToStartError - pass except OSError as e: - if e.errno == errno.ETIMEDOUT: - pass # Treat identical to ConnectionResetError - elif e.errno == errno.ECONNREFUSED: - pass # Port not yet open? - else: + # Suppress similarly to the above JSONRPCException errors. + if e.errno not in [ errno.ECONNRESET, # This might happen when the RPC server is in warmup, + # but shut down before the call to getblockcount succeeds. + errno.ETIMEDOUT, # Treat identical to ECONNRESET + errno.ECONNREFUSED ]: # Port not yet open? raise # unknown OS error - except ValueError as e: # cookie file not found and no rpcuser or rpcpassword; bitcoind is still starting + except ValueError as e: + # Suppress if cookie file isn't generated yet and no rpcuser or rpcpassword; bitcoind may be starting. if "No RPC credentials" not in str(e): raise time.sleep(1.0 / poll_per_s) From 20e19e866ba3bea63b4152981f5a1176b58678a1 Mon Sep 17 00:00:00 2001 From: Hodlinator <172445034+hodlinator@users.noreply.github.com> Date: Wed, 11 Dec 2024 14:56:16 +0100 Subject: [PATCH 2/6] qa: Include ignored errors in RPC connection timeout When an RPC connection attempt with bitcoind times out, include which ignored errors occurred in the exception message. May provide clues of what has gone wrong. --- test/functional/test_framework/test_node.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/test/functional/test_framework/test_node.py b/test/functional/test_framework/test_node.py index 329dc58fb01..9f5369fe3cb 100755 --- a/test/functional/test_framework/test_node.py +++ b/test/functional/test_framework/test_node.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright (c) 2017-2022 The Bitcoin Core developers +# Copyright (c) 2017-present The Bitcoin Core developers # Distributed under the MIT software license, see the accompanying # file COPYING or http://www.opensource.org/licenses/mit-license.php. """Class for bitcoind node under test""" @@ -265,6 +265,8 @@ class TestNode(): """Sets up an RPC connection to the bitcoind process. Returns False if unable to connect.""" # Poll at a rate of four times per second poll_per_s = 4 + suppressed_errors = collections.defaultdict(int) + latest_error = "" for _ in range(poll_per_s * self.rpc_timeout): if self.process.poll() is not None: # Attach abrupt shutdown error/s to the exception message @@ -317,6 +319,8 @@ class TestNode(): # -342 Service unavailable, could be starting up or shutting down if e.error['code'] not in [-28, -342]: raise # unknown JSON RPC exception + suppressed_errors[f"JSONRPCException {e.error['code']}"] += 1 + latest_error = repr(e) except OSError as e: # Suppress similarly to the above JSONRPCException errors. if e.errno not in [ errno.ECONNRESET, # This might happen when the RPC server is in warmup, @@ -324,12 +328,16 @@ class TestNode(): errno.ETIMEDOUT, # Treat identical to ECONNRESET errno.ECONNREFUSED ]: # Port not yet open? raise # unknown OS error + suppressed_errors[f"OSError {errno.errorcode[e.errno]}"] += 1 + latest_error = repr(e) except ValueError as e: # Suppress if cookie file isn't generated yet and no rpcuser or rpcpassword; bitcoind may be starting. if "No RPC credentials" not in str(e): raise + suppressed_errors["missing_credentials"] += 1 + latest_error = repr(e) time.sleep(1.0 / poll_per_s) - self._raise_assertion_error("Unable to connect to bitcoind after {}s".format(self.rpc_timeout)) + self._raise_assertion_error(f"Unable to connect to bitcoind after {self.rpc_timeout}s (ignored errors: {str(dict(suppressed_errors))}, latest error: {latest_error})") def wait_for_cookie_credentials(self): """Ensures auth cookie credentials can be read, e.g. for testing CLI with -rpcwait before RPC connection is up.""" From 2347ce8f018b1b9b8d6ef1933a778023fd228b7f Mon Sep 17 00:00:00 2001 From: Hodlinator <172445034+hodlinator@users.noreply.github.com> Date: Wed, 29 Jan 2025 14:37:07 +0100 Subject: [PATCH 3/6] qa: Workaround Windows issue with socket timeouts --- test/functional/test_framework/test_node.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/test/functional/test_framework/test_node.py b/test/functional/test_framework/test_node.py index 9f5369fe3cb..63c00f3d614 100755 --- a/test/functional/test_framework/test_node.py +++ b/test/functional/test_framework/test_node.py @@ -322,13 +322,18 @@ class TestNode(): suppressed_errors[f"JSONRPCException {e.error['code']}"] += 1 latest_error = repr(e) except OSError as e: + error_num = e.errno + # Workaround issue observed on Windows, Python v3.13.1 where socket timeouts don't have errno set. + if error_num is None: + assert isinstance(e, TimeoutError) + error_num = errno.ETIMEDOUT # Suppress similarly to the above JSONRPCException errors. - if e.errno not in [ errno.ECONNRESET, # This might happen when the RPC server is in warmup, - # but shut down before the call to getblockcount succeeds. - errno.ETIMEDOUT, # Treat identical to ECONNRESET - errno.ECONNREFUSED ]: # Port not yet open? + if error_num not in [ errno.ECONNRESET, # This might happen when the RPC server is in warmup, + # but shut down before the call to getblockcount succeeds. + errno.ETIMEDOUT, # Treat identical to ECONNRESET + errno.ECONNREFUSED ]: # Port not yet open? raise # unknown OS error - suppressed_errors[f"OSError {errno.errorcode[e.errno]}"] += 1 + suppressed_errors[f"OSError {errno.errorcode[error_num]}"] += 1 latest_error = repr(e) except ValueError as e: # Suppress if cookie file isn't generated yet and no rpcuser or rpcpassword; bitcoind may be starting. From 8c3118f11694a3c3644df54620fa9ca066142de4 Mon Sep 17 00:00:00 2001 From: Hodlinator <172445034+hodlinator@users.noreply.github.com> Date: Thu, 13 Feb 2025 09:18:24 +0100 Subject: [PATCH 4/6] qa: Avoid calling stop-RPC if not connected Avoid calling the non-functioning RPC interface which would trigger knock-on exceptions. Instead raise an exception describing the root cause of the error. (No longer log and swallow http.client.CannotSendRequest). --- test/functional/test_framework/test_node.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/functional/test_framework/test_node.py b/test/functional/test_framework/test_node.py index 63c00f3d614..d6d5c4be434 100755 --- a/test/functional/test_framework/test_node.py +++ b/test/functional/test_framework/test_node.py @@ -8,7 +8,6 @@ import contextlib import decimal import errno from enum import Enum -import http.client import json import logging import os @@ -307,10 +306,11 @@ class TestNode(): # overhead is trivial, and the added guarantees are worth # the minimal performance cost. self.log.debug("RPC successfully started") + # Set rpc_connected even if we are in use_cli mode so that we know we can call self.stop() if needed. + self.rpc_connected = True if self.use_cli: return self.rpc = rpc - self.rpc_connected = True self.url = self.rpc.rpc_url return except JSONRPCException as e: @@ -400,14 +400,14 @@ class TestNode(): if not self.running: return self.log.debug("Stopping node") - try: + if self.rpc_connected: # Do not use wait argument when testing older nodes, e.g. in wallet_backwards_compatibility.py if self.version_is_at_least(180000): self.stop(wait=wait) else: self.stop() - except http.client.CannotSendRequest: - self.log.exception("Unable to stop node.") + else: + raise RuntimeError(self._node_msg(f"Cannot call stop-RPC as we don't have an RPC connection to process {self.process.pid}, wait_for_rpc_connection() failed or was never called.")) # If there are any running perf processes, stop them. for profile_name in tuple(self.perf_subprocesses.keys()): From 7d994d0564cdfee766e90890165783150c926baf Mon Sep 17 00:00:00 2001 From: Hodlinator <172445034+hodlinator@users.noreply.github.com> Date: Thu, 13 Feb 2025 11:26:39 +0100 Subject: [PATCH 5/6] qa: Add feature_framework_startup_failures.py --- .../feature_framework_startup_failures.py | 117 ++++++++++++++++++ test/functional/test_runner.py | 3 +- 2 files changed, 119 insertions(+), 1 deletion(-) create mode 100755 test/functional/feature_framework_startup_failures.py diff --git a/test/functional/feature_framework_startup_failures.py b/test/functional/feature_framework_startup_failures.py new file mode 100755 index 00000000000..db40a7c8651 --- /dev/null +++ b/test/functional/feature_framework_startup_failures.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +# Copyright (c) 2025-present The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +""" +Verify various startup failures only raise one exception since multiple +exceptions being raised muddies the waters of what actually went wrong. +We should maintain this bar of only raising one exception as long as +additional maintenance and complexity is low. + +Test relaunches itself into a child processes in order to trigger failure +without the parent process' BitcoinTestFramework also failing. +""" + +from test_framework.util import rpc_port +from test_framework.test_framework import BitcoinTestFramework + +import re +import subprocess +import sys + +class FeatureFrameworkRPCFailure(BitcoinTestFramework): + def set_test_params(self): + # Only run a node for child processes + self.num_nodes = 1 if any(o is not None for o in [self.options.internal_rpc_timeout, + self.options.internal_extra_args, + self.options.internal_start_stop]) else 0 + + if self.options.internal_rpc_timeout is not None: + self.rpc_timeout = self.options.internal_rpc_timeout + if self.options.internal_extra_args: + self.extra_args = [[self.options.internal_extra_args]] + + def add_options(self, parser): + parser.add_argument("--internal-rpc_timeout", dest="internal_rpc_timeout", help="ONLY TO BE USED WHEN TEST RELAUNCHES ITSELF") + parser.add_argument("--internal-extra_args", dest="internal_extra_args", help="ONLY TO BE USED WHEN TEST RELAUNCHES ITSELF") + parser.add_argument("--internal-start_stop", dest="internal_start_stop", help="ONLY TO BE USED WHEN TEST RELAUNCHES ITSELF") + + def setup_network(self): + # Avoid doing anything if num_nodes == 0, otherwise we fail. + if self.num_nodes > 0: + if self.options.internal_start_stop: + self.add_nodes(self.num_nodes, self.extra_args) + self.nodes[0].start() + self.nodes[0].stop_node() + else: + BitcoinTestFramework.setup_network(self) + + def _run_test_internal(self, args, expected_exception): + try: + result = subprocess.run([sys.executable, __file__] + args, encoding="utf-8", stdout=subprocess.PIPE, stderr=subprocess.STDOUT, timeout=10 * self.options.timeout_factor) + except subprocess.TimeoutExpired as e: + print(f"Unexpected timeout, subprocess output:\n{e.output}\nSubprocess output end", file=sys.stderr) + raise + + success = True + + traceback_count = len(re.findall("Traceback", result.stdout)) + if traceback_count != 1: + self.log.error(f"Found {traceback_count}/1 tracebacks - expecting exactly one with no knock-on exceptions.") + success = False + + matching_exception_count = len(re.findall(expected_exception, result.stdout)) + if matching_exception_count != 1: + self.log.error(f"Found {matching_exception_count}/1 occurrences of the specific exception: {expected_exception}") + success = False + + test_failure_msg_count = len(re.findall("Test failed. Test logging available at", result.stdout)) + if test_failure_msg_count != 1: + self.log.error(f"Found {test_failure_msg_count}/1 test failure output messages.") + success = False + + if not success: + raise AssertionError(f"Child test didn't contain expected errors.\n:\n{result.stdout}\n\n") + + def test_instant_rpc_timeout(self): + self.log.info("Verifying timeout in connecting to bitcoind's RPC interface results in only one exception.") + self._run_test_internal( + ["--internal-rpc_timeout=0"], + "AssertionError: \\[node 0\\] Unable to connect to bitcoind after 0s" + ) + + def test_wrong_rpc_port(self): + self.log.info("Verifying inability to connect to bitcoind's RPC interface due to wrong port results in one exception containing at least one OSError.") + self._run_test_internal( + # Lower the timeout so we don't wait that long. + [f"--internal-rpc_timeout={int(max(3, self.options.timeout_factor))}", + # Override RPC port to something TestNode isn't expecting so that we + # are unable to establish an RPC connection. + f"--internal-extra_args=-rpcport={rpc_port(2)}"], + r"AssertionError: \[node 0\] Unable to connect to bitcoind after \d+s \(ignored errors: {[^}]*'OSError \w+'?: \d+[^}]*}, latest error: \w+\([^)]+\)\)" + ) + + def test_init_error(self): + self.log.info("Verify startup failure due to invalid arg results in only one exception.") + self._run_test_internal( + ["--internal-extra_args=-nonexistentarg"], + "FailedToStartError: \\[node 0\\] bitcoind exited with status 1 during initialization. Error: Error parsing command line arguments: Invalid parameter -nonexistentarg" + ) + + def test_start_stop(self): + self.log.info("Verify start() then stop_node() on a node without wait_for_rpc_connection() in between raises a clear exception.") + self._run_test_internal( + ["--internal-start_stop=1"], + r"RuntimeError: \[node 0\] Cannot call stop-RPC as we don't have an RPC connection to process \d+, wait_for_rpc_connection\(\) failed or was never called." + ) + + def run_test(self): + if self.options.internal_rpc_timeout is None and self.options.internal_extra_args is None: + self.test_instant_rpc_timeout() + self.test_wrong_rpc_port() + self.test_init_error() + self.test_start_stop() + + +if __name__ == '__main__': + FeatureFrameworkRPCFailure(__file__).main() diff --git a/test/functional/test_runner.py b/test/functional/test_runner.py index b51e40483e6..7472d6930f4 100755 --- a/test/functional/test_runner.py +++ b/test/functional/test_runner.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright (c) 2014-2022 The Bitcoin Core developers +# Copyright (c) 2014-present The Bitcoin Core developers # Distributed under the MIT software license, see the accompanying # file COPYING or http://www.opensource.org/licenses/mit-license.php. """Run regression test suite. @@ -408,6 +408,7 @@ BASE_SCRIPTS = [ 'p2p_handshake.py --v2transport', 'feature_dirsymlinks.py', 'feature_help.py', + 'feature_framework_startup_failures.py', 'feature_shutdown.py', 'wallet_migration.py', 'p2p_ibd_txrelay.py', From c107aa8c873cb3c031b341baef487638133823f1 Mon Sep 17 00:00:00 2001 From: Hodlinator <172445034+hodlinator@users.noreply.github.com> Date: Wed, 12 Feb 2025 23:52:17 +0100 Subject: [PATCH 6/6] qa: Avoid logging node stopping info when a test has 0 nodes (This applies to the test in the parent commit). --- test/functional/test_framework/test_framework.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/functional/test_framework/test_framework.py b/test/functional/test_framework/test_framework.py index 921f12d9fb4..2e99284aa3e 100755 --- a/test/functional/test_framework/test_framework.py +++ b/test/functional/test_framework/test_framework.py @@ -323,11 +323,11 @@ class BitcoinTestFramework(metaclass=BitcoinTestMetaClass): self.log.debug('Closing down network thread') self.network_thread.close() - if self.success == TestStatus.FAILED: - self.log.info("Not stopping nodes as test failed. The dangling processes will be cleaned up later.") - else: - self.log.info("Stopping nodes") - if self.nodes: + if self.nodes: + if self.success == TestStatus.FAILED: + self.log.info("Not stopping nodes as test failed. The dangling processes will be cleaned up later.") + else: + self.log.info("Stopping nodes") self.stop_nodes() should_clean_up = (