Merge bitcoin/bitcoin#23737: test: make feature_init more robust

8904f17ea7 test: enable txindex in feature_init (James O'Beirne)
93db6d8422 test: feature_init: retain debug.log and improve detection (James O'Beirne)
24fcf6e435 test: feature_init: tweak all .ldb files of a certain type (James O'Beirne)
a8ffbc01db test: add TestNode.wait_for_debug_log (James O'Beirne)
a2fb62b632 test: introduce TestNode.debug_log_bytes (James O'Beirne)

Pull request description:

  Hopefully fixes #23646.

  This makes a few changes to feature_init (along the lines of the discussion here: https://github.com/bitcoin/bitcoin/pull/23289#discussion_r766615073). The changes are detailed in the specific commits, but a summary might be:
  - more robust log-line detection (at the expense of maybe being slower)
  - retain all debug.log content
  - perturb .ldb files in a more complete way

Top commit has no ACKs.

Tree-SHA512: 9c8900998fdb96d3cc92bd30c4eeda2a47e9787e6432546d458c17f24eb9e2b6b3eb573db225172888ca71d7a62260f497f83827597b982b9e35c5452cb6e514
This commit is contained in:
MarcoFalke 2022-01-03 08:49:21 +01:00
commit d69af93223
No known key found for this signature in database
GPG Key ID: CE2B75697E69A548
2 changed files with 67 additions and 52 deletions

View File

@ -44,14 +44,9 @@ class InitStressTest(BitcoinTestFramework):
def sigterm_node():
node.process.terminate()
node.process.wait()
node.debug_log_path.unlink()
node.debug_log_path.touch()
def check_clean_start():
"""Ensure that node restarts successfully after various interrupts."""
# TODO: add -txindex=1 to fully test index initiatlization.
# See https://github.com/bitcoin/bitcoin/pull/23289#discussion_r735159180 for
# a discussion of the related bug.
node.start()
node.wait_for_rpc_connection()
assert_equal(200, node.getblockcount())
@ -71,56 +66,33 @@ class InitStressTest(BitcoinTestFramework):
'net thread start',
'addcon thread start',
'loadblk thread start',
# TODO: reenable - see above TODO
# 'txindex thread start',
'msghand thread start'
'txindex thread start',
'msghand thread start',
'net thread start',
'addcon thread start',
]
if self.is_wallet_compiled():
lines_to_terminate_after.append('Verifying wallet')
for terminate_line in lines_to_terminate_after:
self.log.info(f"Starting node and will exit after line '{terminate_line}'")
node.start(
# TODO: add -txindex=1 to fully test index initiatlization.
# extra_args=['-txindex=1'],
)
logfile = open(node.debug_log_path, 'r', encoding='utf8')
node.start(extra_args=['-txindex=1'])
MAX_SECS_TO_WAIT = 30
start = time.time()
num_lines = 0
while True:
line = logfile.readline()
if line:
num_lines += 1
if line and terminate_line.lower() in line.lower():
self.log.debug(f"Terminating node after {num_lines} log lines seen")
sigterm_node()
break
if (time.time() - start) > MAX_SECS_TO_WAIT:
raise AssertionError(
f"missed line {terminate_line}; terminating now after {num_lines} lines")
if node.process.poll() is not None:
raise AssertionError(f"node failed to start (line: '{terminate_line}')")
num_total_logs = node.wait_for_debug_log([terminate_line], ignore_case=True)
self.log.debug(f"Terminating node after {num_total_logs} log lines seen")
sigterm_node()
check_clean_start()
num_total_logs = len(node.debug_log_path.read_text().splitlines())
self.stop_node(0)
self.log.info(
f"Terminate at some random point in the init process (max logs: {num_total_logs})")
for _ in range(40):
terminate_after = random.randint(1, num_total_logs)
self.log.debug(f"Starting node and will exit after {terminate_after} lines")
node.start(
# TODO: add -txindex=1 to fully test index initiatlization.
# extra_args=['-txindex=1'],
)
num_logs = len(Path(node.debug_log_path).read_text().splitlines())
additional_lines = random.randint(1, num_total_logs)
self.log.debug(f"Starting node and will exit after {additional_lines} lines")
node.start(extra_args=['-txindex=1'])
logfile = open(node.debug_log_path, 'r', encoding='utf8')
MAX_SECS_TO_WAIT = 10
@ -132,7 +104,8 @@ class InitStressTest(BitcoinTestFramework):
if line:
num_lines += 1
if num_lines >= terminate_after or (time.time() - start) > MAX_SECS_TO_WAIT:
if num_lines >= (num_logs + additional_lines) or \
(time.time() - start) > MAX_SECS_TO_WAIT:
self.log.debug(f"Terminating node after {num_lines} log lines seen")
sigterm_node()
break
@ -152,11 +125,12 @@ class InitStressTest(BitcoinTestFramework):
}
for file_patt, err_fragment in files_to_disturb.items():
target_file = list(node.chain_path.glob(file_patt))[0]
target_files = list(node.chain_path.glob(file_patt))
self.log.info(f"Tweaking file to ensure failure {target_file}")
bak_path = str(target_file) + ".bak"
target_file.rename(bak_path)
for target_file in target_files:
self.log.info(f"Tweaking file to ensure failure {target_file}")
bak_path = str(target_file) + ".bak"
target_file.rename(bak_path)
# TODO: at some point, we should test perturbing the files instead of removing
# them, e.g.
@ -170,14 +144,16 @@ class InitStressTest(BitcoinTestFramework):
# investigate doing this later.
node.assert_start_raises_init_error(
# TODO: add -txindex=1 to fully test index initiatlization.
# extra_args=['-txindex=1'],
extra_args=['-txindex=1'],
expected_msg=err_fragment,
match=ErrorMatch.PARTIAL_REGEX,
)
self.log.info(f"Restoring file from {bak_path} and restarting")
Path(bak_path).rename(target_file)
for target_file in target_files:
bak_path = str(target_file) + ".bak"
self.log.debug(f"Restoring file from {bak_path} and restarting")
Path(bak_path).rename(target_file)
check_clean_start()
self.stop_node(0)

View File

@ -389,14 +389,17 @@ class TestNode():
def debug_log_path(self) -> Path:
return self.chain_path / 'debug.log'
def debug_log_bytes(self) -> int:
with open(self.debug_log_path, encoding='utf-8') as dl:
dl.seek(0, 2)
return dl.tell()
@contextlib.contextmanager
def assert_debug_log(self, expected_msgs, unexpected_msgs=None, timeout=2):
if unexpected_msgs is None:
unexpected_msgs = []
time_end = time.time() + timeout * self.timeout_factor
with open(self.debug_log_path, encoding='utf-8') as dl:
dl.seek(0, 2)
prev_size = dl.tell()
prev_size = self.debug_log_bytes()
yield
@ -419,6 +422,42 @@ class TestNode():
time.sleep(0.05)
self._raise_assertion_error('Expected messages "{}" does not partially match log:\n\n{}\n\n'.format(str(expected_msgs), print_log))
def wait_for_debug_log(self, expected_msgs, timeout=10, ignore_case=False) -> int:
"""
Block until we see a particular debug log message fragment or until we exceed the timeout.
Return:
the number of log lines we encountered when matching
"""
time_end = time.time() + timeout * self.timeout_factor
prev_size = self.debug_log_bytes()
re_flags = re.MULTILINE | (re.IGNORECASE if ignore_case else 0)
while True:
found = True
with open(self.debug_log_path, encoding='utf-8') as dl:
dl.seek(prev_size)
log = dl.read()
for expected_msg in expected_msgs:
if re.search(re.escape(expected_msg), log, flags=re_flags) is None:
found = False
if found:
num_logs = len(log.splitlines())
return num_logs
if time.time() >= time_end:
print_log = " - " + "\n - ".join(log.splitlines())
break
# No sleep here because we want to detect the message fragment as fast as
# possible.
self._raise_assertion_error(
'Expected messages "{}" does not partially match log:\n\n{}\n\n'.format(
str(expected_msgs), print_log))
return -1 # useless return to satisfy linter
@contextlib.contextmanager
def profile_with_perf(self, profile_name: str):
"""