mirror of
https://github.com/ElementsProject/lightning.git
synced 2025-01-07 14:29:33 +01:00
3f3a48dae9
The optimistic lock prevents multiple instances of c-lightning making concurrent modifications to the database. That would be unsafe as it messes up the state in the DB. The optimistic lock is implemented by checking whether a gated update on the previous value of the `data_version` actually results in an update. If that's not the case the DB has been changed under our feet. The lock provides linearizability of DB modifications: if a database is changed under the feet of a running process that process will `abort()`, which from a global point of view is as if it had crashed right after the last successful commit. Any process that also changed the DB must've started between the last successful commit and the unsuccessful one since otherwise its counters would not have matched (which would also have aborted that transaction). So this reduces all the possible timelines to an equivalent where the first process died, and the second process recovered from the DB. This is not that interesting for `sqlite3` where we are also protected via the PID file, but when running on multiple hosts against the same DB, e.g., with `postgres`, this protection becomes important. Changelog-Added: DB: Optimistic logging prevents instances from running concurrently against the same database, providing linear consistency to changes.
161 lines
6.7 KiB
Python
161 lines
6.7 KiB
Python
from fixtures import * # noqa: F401,F403
|
|
from fixtures import TEST_NETWORK
|
|
from pyln.client import RpcError
|
|
from utils import wait_for, sync_blockheight, COMPAT
|
|
import os
|
|
import pytest
|
|
import time
|
|
import unittest
|
|
|
|
|
|
@unittest.skipIf(TEST_NETWORK != 'regtest', "The DB migration is network specific due to the chain var.")
|
|
def test_db_dangling_peer_fix(node_factory, bitcoind):
|
|
# Make sure bitcoind doesn't think it's going backwards
|
|
bitcoind.generate_block(104)
|
|
# This was taken from test_fail_unconfirmed() node.
|
|
l1 = node_factory.get_node(dbfile='dangling-peer.sqlite3.xz')
|
|
l2 = node_factory.get_node()
|
|
|
|
# Must match entry in db
|
|
assert l2.info['id'] == '022d223620a359a47ff7f7ac447c85c46c923da53389221a0054c11c1e3ca31d59'
|
|
|
|
# This time it should work! (Connect *in* since l1 thinks it has UTXOs
|
|
# it doesn't have).
|
|
l1.rpc.connect(l2.info['id'], 'localhost', l2.port)
|
|
# Make sure l2 has register connection
|
|
l2.daemon.wait_for_log('Handed peer, entering loop')
|
|
l2.fund_channel(l1, 200000, wait_for_active=True)
|
|
|
|
|
|
@unittest.skipIf(TEST_NETWORK != 'regtest', "Address is network specific")
|
|
def test_block_backfill(node_factory, bitcoind, chainparams):
|
|
"""Test whether we backfill data from the blockchain correctly.
|
|
|
|
For normal operation we will process any block after the initial start
|
|
height, or rescan height, but for gossip we actually also need to backfill
|
|
the blocks we skipped initially. We do so on-demand, whenever we see a
|
|
channel_announcement referencing a blockheight we haven't processed yet,
|
|
we fetch the entire block, extract P2WSH outputs and ask `bitcoin
|
|
gettxout` for each of them. We then store the block header in the `blocks`
|
|
table and the unspent outputs in the `utxoset` table.
|
|
|
|
The test consist of two nodes opening a channel at height X, and an
|
|
unrelated P2WSH transaction being sent at the same height (will be used to
|
|
check for completeness of the backfill). Then a second node starts at
|
|
height X+100 and connect to one of the nodes. It should not have the block
|
|
in its DB before connecting. After connecting it should sync the gossip,
|
|
triggering a backfill of block X, and all associated P2WSH outputs.
|
|
|
|
"""
|
|
# Need to manually open the channels later since otherwise we can't have a
|
|
# tx in the same block (`line_graph` with `fundchannel=True` generates
|
|
# blocks).
|
|
l1, l2 = node_factory.line_graph(2, fundchannel=False)
|
|
|
|
# Get some funds to l1
|
|
addr = l1.rpc.newaddr()['bech32']
|
|
bitcoind.rpc.sendtoaddress(addr, 1)
|
|
bitcoind.generate_block(1)
|
|
wait_for(lambda: len(l1.rpc.listfunds()['outputs']) == 1)
|
|
|
|
# Now send the needle we will go looking for later:
|
|
bitcoind.rpc.sendtoaddress('bcrt1qtwxd8wg5eanumk86vfeujvp48hfkgannf77evggzct048wggsrxsum2pmm', 0.00031337)
|
|
l1.rpc.fundchannel(l2.info['id'], 10**6, announce=True)
|
|
wait_for(lambda: len(bitcoind.rpc.getrawmempool()) == 2)
|
|
|
|
# Confirm and get some distance between the funding and the l3 wallet birth date
|
|
bitcoind.generate_block(100)
|
|
wait_for(lambda: len(l1.rpc.listnodes()['nodes']) == 2)
|
|
|
|
# Start the tester node, and connect it to l1. l0 should sync the gossip
|
|
# and call out to `bitcoind` to backfill the block.
|
|
l3 = node_factory.get_node()
|
|
heights = [r['height'] for r in l3.db_query("SELECT height FROM blocks")]
|
|
assert(103 not in heights)
|
|
|
|
l3.rpc.connect(l1.info['id'], 'localhost', l1.port)
|
|
|
|
# Make sure we have backfilled the block
|
|
wait_for(lambda: len(l3.rpc.listnodes()['nodes']) == 2)
|
|
heights = [r['height'] for r in l3.db_query("SELECT height FROM blocks")]
|
|
assert(103 in heights)
|
|
|
|
# Make sure we also have the needle we added to the haystack above
|
|
assert(31337 in [r['satoshis'] for r in l3.db_query("SELECT satoshis FROM utxoset")])
|
|
|
|
# Make sure that l3 doesn't ask for more gossip and get a reply about
|
|
# the closed channel (hence Bad gossip msgs in log).
|
|
l3.daemon.wait_for_log('seeker: state = NORMAL')
|
|
|
|
# Now close the channel and make sure `l3` cleans up correctly:
|
|
txid = l1.rpc.close(l2.info['id'])['txid']
|
|
bitcoind.generate_block(1, wait_for_mempool=txid)
|
|
wait_for(lambda: len(l3.rpc.listchannels()['channels']) == 0)
|
|
|
|
|
|
# Test that the max-channel-id is set correctly between
|
|
# restarts (with forgotten channel)
|
|
def test_max_channel_id(node_factory, bitcoind):
|
|
# Create a channel between two peers.
|
|
# Close the channel and have 100 blocks happen (forget channel)
|
|
# Restart node, create channel again. Should succeed.
|
|
l1, l2 = node_factory.line_graph(2, fundchannel=True, wait_for_announce=True)
|
|
sync_blockheight(bitcoind, [l1, l2])
|
|
|
|
# Now shutdown cleanly.
|
|
l1.rpc.close(l2.info['id'], 0)
|
|
|
|
l1.daemon.wait_for_log(' to CLOSINGD_COMPLETE')
|
|
l2.daemon.wait_for_log(' to CLOSINGD_COMPLETE')
|
|
|
|
# And should put closing into mempool.
|
|
l1.wait_for_channel_onchain(l2.info['id'])
|
|
l2.wait_for_channel_onchain(l1.info['id'])
|
|
|
|
bitcoind.generate_block(101)
|
|
wait_for(lambda: l1.rpc.listpeers()['peers'] == [])
|
|
wait_for(lambda: l2.rpc.listpeers()['peers'] == [])
|
|
|
|
# Stop l2, and restart
|
|
l2.stop()
|
|
l2.start()
|
|
|
|
# Reconnect
|
|
l1.rpc.connect(l2.info['id'], 'localhost', l2.port)
|
|
|
|
# Fundchannel again, should succeed.
|
|
l1.rpc.fundchannel(l2.info['id'], 10**5)
|
|
|
|
|
|
@unittest.skipIf(not COMPAT, "needs COMPAT to convert obsolete db")
|
|
@unittest.skipIf(os.getenv('TEST_DB_PROVIDER', 'sqlite3') != 'sqlite3', "This test is based on a sqlite3 snapshot")
|
|
@unittest.skipIf(TEST_NETWORK != 'regtest', "The network must match the DB snapshot")
|
|
def test_scid_upgrade(node_factory, bitcoind):
|
|
bitcoind.generate_block(1)
|
|
|
|
# Created through the power of sed "s/X'\([0-9]*\)78\([0-9]*\)78\([0-9]*\)'/X'\13A\23A\3'/"
|
|
l1 = node_factory.get_node(dbfile='oldstyle-scids.sqlite3.xz')
|
|
|
|
assert l1.db_query('SELECT short_channel_id from channels;') == [{'short_channel_id': '103x1x1'}]
|
|
assert l1.db_query('SELECT failchannel from payments;') == [{'failchannel': '103x1x1'}]
|
|
|
|
|
|
def test_optimistic_locking(node_factory, bitcoind):
|
|
"""Have a node run against a DB, then change it under its feet, crashing it.
|
|
|
|
We start a node, wait for it to settle its write so we have a window where
|
|
we can interfere, and watch the world burn (safely).
|
|
"""
|
|
l1 = node_factory.get_node(may_fail=True, allow_broken_log=True)
|
|
|
|
sync_blockheight(bitcoind, [l1])
|
|
l1.rpc.getinfo()
|
|
time.sleep(1)
|
|
l1.db.execute("UPDATE vars SET intval = intval + 1 WHERE name = 'data_version';")
|
|
|
|
# Now trigger any DB write and we should be crashing.
|
|
with pytest.raises(RpcError, match=r'Connection to RPC server lost.'):
|
|
l1.rpc.newaddr()
|
|
|
|
assert(l1.daemon.is_in_log(r'Optimistic lock on the database failed'))
|