From 11eaabdbe627a206796de7e70776adc74b2c863e Mon Sep 17 00:00:00 2001 From: Christian Decker Date: Thu, 28 Sep 2017 13:01:47 +0930 Subject: [PATCH] pytest: Stopping daemon cleanly We used to simply kill the daemon, which in some cases could result in half-written crashlogs and similar artifacts such as half-completed RPC calls. Now we ask lightningd to stop nicely, give it some time and only then kill it. We also return the returncode of the daemon. Signed-off-by: Christian Decker --- tests/test_lightningd.py | 7 +++-- tests/utils.py | 56 ++++++++++++++++++++++++++++++++-------- 2 files changed, 48 insertions(+), 15 deletions(-) diff --git a/tests/test_lightningd.py b/tests/test_lightningd.py index cc3e5e30f..9f7c5b838 100644 --- a/tests/test_lightningd.py +++ b/tests/test_lightningd.py @@ -126,7 +126,7 @@ class NodeFactory(object): def killall(self): for n in self.nodes: - n.daemon.stop() + n.stop() class BaseLightningDTests(unittest.TestCase): @@ -1541,9 +1541,8 @@ class LightningDTests(BaseLightningDTests): time.sleep(1) assert l1.rpc.getpeers()['peers'][0]['msatoshi_to_us'] == 99990000 assert l2.rpc.getpeers()['peers'][0]['msatoshi_to_us'] == 10000 - # Stop l2, l1 will reattempt to connect - l2.daemon.stop() + l2.stop() # Wait for l1 to notice wait_for(lambda: not l1.rpc.getpeers()['peers'][0]['connected']) @@ -1562,7 +1561,7 @@ class LightningDTests(BaseLightningDTests): assert l2.rpc.getpeers()['peers'][0]['msatoshi_to_us'] == 20000 # Finally restart l1, and make sure it remembers - l1.daemon.stop() + l1.stop() l1.daemon.start() assert l1.rpc.getpeers()['peers'][0]['msatoshi_to_us'] == 99980000 diff --git a/tests/utils.py b/tests/utils.py index 6a8f8ee35..bdb26bd3f 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -47,7 +47,7 @@ class TailableProc(object): self.proc = None self.outputDir = outputDir self.logsearch_start = 0 - + def start(self): """Start the underlying process and start monitoring it. """ @@ -58,17 +58,28 @@ class TailableProc(object): self.thread.start() self.running = True - def stop(self): + def stop(self, timeout=10): if self.outputDir: logpath = os.path.join(self.outputDir, 'log') with open(logpath, 'w') as f: for l in self.logs: f.write(l + '\n') self.proc.terminate() - self.proc.kill() + + # Now give it some time to react to the signal + rc = self.proc.wait(timeout) + + if rc is None: + self.proc.kill() + self.proc.wait() self.thread.join() + if failed: + raise(ValueError("Process '{}' did not cleanly shutdown".format(self.proc.pid))) + + return self.proc.returncode + def tail(self): """Tail the stdout of the process and remember it. @@ -175,7 +186,7 @@ class BitcoinD(TailableProc): def __init__(self, bitcoin_dir="/tmp/bitcoind-test", rpcport=18332): TailableProc.__init__(self, bitcoin_dir) - + self.bitcoin_dir = bitcoin_dir self.rpcport = rpcport self.prefix = 'bitcoind' @@ -231,12 +242,14 @@ class LightningD(TailableProc): self.wait_for_log("Creating IPv6 listener on port") logging.info("LightningD started") - def stop(self): - # If it's already crashing, wait a bit for log dump. - if os.path.isfile(os.path.join(self.lightning_dir, 'crash.log')): - time.sleep(2) - TailableProc.stop(self) - logging.info("LightningD stopped") + def wait(self, timeout=10): + """Wait for the daemon to stop for up to timeout seconds + + Returns the returncode of the process, None if the process did + not return before the timeout triggers. + """ + self.proc.wait(timeout) + return self.proc.returncode class LightningNode(object): def __init__(self, daemon, rpc, btc, executor): @@ -262,7 +275,7 @@ class LightningNode(object): t = threading.Thread(target=call_connect) t.daemon = True t.start() - + def wait_connected(): # Up to 10 seconds to get tx into mempool. start_time = time.time() @@ -316,3 +329,24 @@ class LightningNode(object): on cleanup""" self.known_fail = True + def stop(self, timeout=10): + """ Attempt to do a clean shutdown, but kill if it hangs + """ + + # Tell the daemon to stop + try: + # May fail if the process already died + self.rpc.stop() + except: + pass + + rc = self.daemon.wait(timeout) + + # If it did not stop be more insistent + if rc is None: + rc = self.daemon.stop() + + if rc != 0: + raise ValueError("Node did not exit cleanly, rc={}".format(rc)) + else: + return rc