Merge bitcoin/bitcoin#30607: contrib: support reading XORed blocks in linearize-data.py script

77ff0ec1f1 contrib: support reading XORed blocks in linearize-data.py script (Sebastian Falbesoner)

Pull request description:

  This PR is a small follow-up for #28052, adding support for the block linearization script to handle XORed blocksdir *.dat files. Note that if no xor.dat file exists, the XOR pattern is set to all-zeros, in order to still support blockdirs that have been created with versions earlier than 28.x.

  Partly fixes issue #30599.

ACKs for top commit:
  achow101:
    ACK 77ff0ec1f1
  tdb3:
    ACK 77ff0ec1f1
  hodlinator:
    ACK 77ff0ec1f1

Tree-SHA512: 011eb02e2411de373cbbf4b26db4640fc693a20be8c2430529fba6e36a3a3abfdfdc3b005d330f9ec2846bfad9bfbf34231c574ba99289ef37dd51a68e6e7f3d
This commit is contained in:
Ava Chow 2024-08-12 15:03:35 -04:00
commit ba5fdd1a68
No known key found for this signature in database
GPG key ID: 17565732E08E5E41
2 changed files with 23 additions and 9 deletions

View file

@ -76,6 +76,16 @@ def getFirstBlockFileId(block_dir_path):
blkId = int(firstBlkFn[3:8]) blkId = int(firstBlkFn[3:8])
return blkId return blkId
def read_xor_key(blocks_path):
NUM_XOR_BYTES = 8 # From InitBlocksdirXorKey::xor_key.size()
try:
xor_filename = os.path.join(blocks_path, "xor.dat")
with open(xor_filename, "rb") as xor_file:
return xor_file.read(NUM_XOR_BYTES)
# support also blockdirs created with pre-v28 versions, where no xor key exists yet
except FileNotFoundError:
return bytes([0] * NUM_XOR_BYTES)
# Block header and extent on disk # Block header and extent on disk
BlockExtent = namedtuple('BlockExtent', ['fn', 'offset', 'inhdr', 'blkhdr', 'size']) BlockExtent = namedtuple('BlockExtent', ['fn', 'offset', 'inhdr', 'blkhdr', 'size'])
@ -95,6 +105,7 @@ class BlockDataCopier:
self.outFname = None self.outFname = None
self.blkCountIn = 0 self.blkCountIn = 0
self.blkCountOut = 0 self.blkCountOut = 0
self.xor_key = read_xor_key(self.settings['input'])
self.lastDate = datetime.datetime(2000, 1, 1) self.lastDate = datetime.datetime(2000, 1, 1)
self.highTS = 1408893517 - 315360000 self.highTS = 1408893517 - 315360000
@ -113,6 +124,13 @@ class BlockDataCopier:
self.outOfOrderData = {} self.outOfOrderData = {}
self.outOfOrderSize = 0 # running total size for items in outOfOrderData self.outOfOrderSize = 0 # running total size for items in outOfOrderData
def read_xored(self, f, size):
offset = f.tell()
data = bytearray(f.read(size))
for i in range(len(data)):
data[i] ^= self.xor_key[(i + offset) % len(self.xor_key)]
return bytes(data)
def writeBlock(self, inhdr, blk_hdr, rawblock): def writeBlock(self, inhdr, blk_hdr, rawblock):
blockSizeOnDisk = len(inhdr) + len(blk_hdr) + len(rawblock) blockSizeOnDisk = len(inhdr) + len(blk_hdr) + len(rawblock)
if not self.fileOutput and ((self.outsz + blockSizeOnDisk) > self.maxOutSz): if not self.fileOutput and ((self.outsz + blockSizeOnDisk) > self.maxOutSz):
@ -165,7 +183,7 @@ class BlockDataCopier:
'''Fetch block contents from disk given extents''' '''Fetch block contents from disk given extents'''
with open(self.inFileName(extent.fn), "rb") as f: with open(self.inFileName(extent.fn), "rb") as f:
f.seek(extent.offset) f.seek(extent.offset)
return f.read(extent.size) return self.read_xored(f, extent.size)
def copyOneBlock(self): def copyOneBlock(self):
'''Find the next block to be written in the input, and copy it to the output.''' '''Find the next block to be written in the input, and copy it to the output.'''
@ -190,7 +208,7 @@ class BlockDataCopier:
print("Premature end of block data") print("Premature end of block data")
return return
inhdr = self.inF.read(8) inhdr = self.read_xored(self.inF, 8)
if (not inhdr or (inhdr[0] == "\0")): if (not inhdr or (inhdr[0] == "\0")):
self.inF.close() self.inF.close()
self.inF = None self.inF = None
@ -207,7 +225,7 @@ class BlockDataCopier:
inLenLE = inhdr[4:] inLenLE = inhdr[4:]
su = struct.unpack("<I", inLenLE) su = struct.unpack("<I", inLenLE)
inLen = su[0] - 80 # length without header inLen = su[0] - 80 # length without header
blk_hdr = self.inF.read(80) blk_hdr = self.read_xored(self.inF, 80)
inExtent = BlockExtent(self.inFn, self.inF.tell(), inhdr, blk_hdr, inLen) inExtent = BlockExtent(self.inFn, self.inF.tell(), inhdr, blk_hdr, inLen)
self.hash_str = calc_hash_str(blk_hdr) self.hash_str = calc_hash_str(blk_hdr)
@ -224,7 +242,7 @@ class BlockDataCopier:
if self.blkCountOut == blkHeight: if self.blkCountOut == blkHeight:
# If in-order block, just copy # If in-order block, just copy
rawblock = self.inF.read(inLen) rawblock = self.read_xored(self.inF, inLen)
self.writeBlock(inhdr, blk_hdr, rawblock) self.writeBlock(inhdr, blk_hdr, rawblock)
# See if we can catch up to prior out-of-order blocks # See if we can catch up to prior out-of-order blocks
@ -237,7 +255,7 @@ class BlockDataCopier:
# If there is space in the cache, read the data # If there is space in the cache, read the data
# Reading the data in file sequence instead of seeking and fetching it later is preferred, # Reading the data in file sequence instead of seeking and fetching it later is preferred,
# but we don't want to fill up memory # but we don't want to fill up memory
self.outOfOrderData[blkHeight] = self.inF.read(inLen) self.outOfOrderData[blkHeight] = self.read_xored(self.inF, inLen)
self.outOfOrderSize += inLen self.outOfOrderSize += inLen
else: # If no space in cache, seek forward else: # If no space in cache, seek forward
self.inF.seek(inLen, os.SEEK_CUR) self.inF.seek(inLen, os.SEEK_CUR)

View file

@ -26,10 +26,6 @@ class LoadblockTest(BitcoinTestFramework):
self.setup_clean_chain = True self.setup_clean_chain = True
self.num_nodes = 2 self.num_nodes = 2
self.supports_cli = False self.supports_cli = False
self.extra_args = [
["-blocksxor=0"], # TODO: The linearize scripts should be adjusted to apply any XOR
[],
]
def run_test(self): def run_test(self):
self.nodes[1].setnetworkactive(state=False) self.nodes[1].setnetworkactive(state=False)