1
0
mirror of https://github.com/lightning/bolts.git synced 2024-11-19 01:50:03 +01:00

tools/extract-formats.py: rewrite, change output.

Editing the previous mess was horrific.  I gave up and rewrote using a
generator.

Changes to output:
1. subtypes and tlvs now handled.
2. The output format now has explicit prefixes, so readers don't have
   to rely on number of fields to interpret data.
3. Each field is split into type and count; count is empty if there's
   no '*x'.
4. TLV stream typenames are repeated; TLV record type names are not
   necessarily unique.
5. The unused offset field is removed.
6. No arguments taken: everything is always printed, and you can grep if you
   only want some.

[ Fixup by <niftynei@gmail.com> ]
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
Rusty Russell 2019-06-17 13:42:30 +09:30
parent a82dd143f1
commit d6e477be43
2 changed files with 187 additions and 74 deletions

View File

@ -9,5 +9,5 @@ python:
- "3.5"
- "3.6"
script:
- (set -e; for i in 0?-*.md; do echo "Extracting $i"; python3 tools/extract-formats.py --message-types --message-fields $i; done)
- (set -e; for i in 0?-*.md; do echo "Extracting $i"; python3 tools/extract-formats.py $i; done)
- tools/spellcheck.sh --check [0-9][0-9]-*.md

View File

@ -3,94 +3,207 @@
# Released by Rusty Russell under CC0:
# https://creativecommons.org/publicdomain/zero/1.0/
# Outputs:
#
# Standard message types:
# msgtype,<msgname>,<value>[,<option>]
# msgdata,<msgname>,<fieldname>,<typename>,[<count>][,<option>]
#
# TLV types:
# tlvtype,<tlvstreamname>,<tlvname>,<value>[,<option>]
# tlvdata,<tlvstreamname>,<tlvname>,<fieldname>,<typename>,[<count>][,<option>]
#
# Subtypes:
# subtype,<msgname>[,<option>]
# subtypedata,<msgname>,<fieldname>,<typename>,[<count>][,<option>]
from optparse import OptionParser
import sys
import re
import fileinput
typeline = re.compile(
'1\. type: (?P<value>[-0-9A-Za-z_|]+) \(`(?P<name>[A-Za-z2_]+)`\)( \(`?(?P<option>[^)`]*)`\))?')
tlvline = re.compile(
'1\. tlvs: `(?P<name>[A-Za-z2_]+)`( \(`?(?P<option>[^)`]*)`\))?')
subtypeline = re.compile(
'1\. subtype: `(?P<name>[A-Za-z2_]+)`( \(`?(?P<option>[^)`]*)`\))?')
dataline = re.compile(
'\s+\* \[`(?P<typefield>[-_a-zA-Z0-9*+]+)`:`(?P<name>[_a-z0-9]+)`\]( \(`?(?P<option>[^)`]*)`?\))?')
def main(options, args=None, output=sys.stdout, lines=None):
# Example inputs:
# 1. type: 17 (`error`) (`optionXXX`)
# 2. data:
# * [`8`:`channel_id`]
# * [`4`:`len`]
# * [`len`:`data`] (optionXXX)
#
# 1. type: PERM|NODE|3 (`required_node_feature_missing`)
message = None
havedata = None
typeline = re.compile(
'1\. type: (?P<value>[-0-9A-Za-z_|]+) \(`(?P<name>[A-Za-z_]+)`\)( \(`?(?P<option>[^)`]*)`?\))?')
dataline = re.compile(
'\s+\* \[`(?P<size>[_a-z0-9*+]+)`:`(?P<name>[_a-z0-9]+)`\]( \(`?(?P<option>[^)`]*)`?\))?')
# Generator to give us one line at a time.
def next_line(args, lines):
if lines is None:
lines = fileinput.input(args)
for i, line in enumerate(lines):
line = line.rstrip()
linenum = i+1
yield i, line.rstrip()
match = typeline.fullmatch(line)
if match:
if message is not None:
raise ValueError('{}:Found a message while I was already in a '
'message'.format(linenum))
message = match.group('name')
if options.output_types:
print("{},{}".format(
match.group('name'),
match.group('value')), file=output)
havedata = None
elif message is not None and havedata is None:
if line != '2. data:':
message = None
havedata = True
dataoff = 0
off_extraterms = ""
elif message is not None and havedata is not None:
match = dataline.fullmatch(line)
if match:
if options.output_fields:
print("{},{}{},{},{}".format(
message,
dataoff,
off_extraterms,
match.group('name'),
match.group('size')), file=output, end='')
if match.group('option'):
print(",{}".format(match.group('option')), file=output)
# Helper to print a line to output with optional ,option
def print_csv(output, fmt, option):
print(fmt, file=output, end='')
if option:
print(',{}'.format(option), file=output)
else:
print('', file=output)
# Size can be variable.
try:
dataoff += int(match.group('size'))
except ValueError:
# Offset has variable component.
off_extraterms = off_extraterms + "+" + match.group('size')
else:
message = None
# 1. type: 17 (`error`) (`optionXXX`)
# 2. data:
# * [`short_channel_id`:`channel_id`]
# * [`u16`:`num_inputs`]
# * [`num_inputs*sha256`:`input_info`]
# * [`u32`:`len`] (optionYYY)
# * [`len*byte`:`data`] (optionYYY)
#
# output:
# msgtype,error,17,optionXXX
# msgdata,error,channel_id,short_channel_id,
# msgdata,error,num_inputs,u16,
# msgdata,error,input_info,sha256,num_inputs
# msgdata,error,len,u32,,optionYYY
# msgdata,error,data,byte,len,optionYYY
#
# 1. type: PERM|NODE|3 (`required_node_feature_missing`)
#
# output:
# msgtype,required_node_feature_missing,PERM|NODE|3
#
# 1. type: 261 (`query_short_channel_ids`) (`gossip_queries`)
# 2. data:
# * [`chain_hash`:`chain_hash`]
# * [`u16`:`len`]
# * [`len*byte`:`encoded_short_ids`]
# * [`tlvs`:`query_short_channel_ids_tlvs`]
#
# output:
# msgtype,query_short_channel_ids,261,gossip_queries
# msgdata,query_short_channel_ids,chain_hash,chain_hash,
# msgdata,query_short_channel_ids,len,u16,
# msgdata,query_short_channel_ids,encoded_short_ids,byte,len
# msgdata,query_short_channel_ids,query_short_channel_ids_tlvs,tlvs,
def parse_type(genline, output, name, value, option, in_tlv=None):
_, line = next(genline)
if in_tlv:
type_prefix='tlvtype,{}'.format(in_tlv)
data_prefix='tlvdata,{}'.format(in_tlv)
else:
type_prefix='msgtype'
data_prefix='msgdata'
print_csv(output, '{},{},{}'.format(type_prefix, name, value), option)
# Expect a data: line before values, if any
if line.lstrip() != '2. data:':
return
while True:
i, line = next(genline)
match = dataline.fullmatch(line)
if not match:
break
if '*' in match.group('typefield'):
num,typename = match.group('typefield').split('*')
else:
num,typename = ("", match.group('typefield'))
print_csv(output,
"{},{},{},{},{}"
.format(data_prefix, name, match.group('name'), typename, num),
match.group('option'))
# 1. tlvs: `query_short_channel_ids_tlvs`
# 2. types:
# 1. type: 1 (`query_flags`)
# 2. data:
# * [`byte`:`encoding_type`]
# * [`tlv_len-1*byte`:`encoded_query_flags`]
#
# output:
# tlvtype,query_short_channel_ids_tlvs,query_flags,1
# tlvdata,query_short_channel_ids_tlvs,query_flags,encoding_type,byte,
# tlvdata,query_short_channel_ids_tlvs,query_flags,encoded_query_flags,byte,tlv_len-1
def parse_tlv(genline, output, name, option):
i, line = next(genline)
# Expect a types: line after tlvs.
if line != '2. types:':
raise ValueError('{}: Expected "2. types:" line'.format(i))
while True:
_, line = next(genline)
# Inside tlv, types are indented.
match = typeline.fullmatch(line.lstrip())
if not match:
break
parse_type(genline, output, match.group('name'), match.group('value'), match.group('option'), name)
# 1. subtype: `input_info`
# 2. data:
# * [`u64`:`satoshis`]
# * [`sha256`:`prevtx_txid`]
#
# output:
# subtype,input_info
# subtypedata,input_info,satoshis,u64,
# subtypedata,input_info,prevtx_txid,sha256,
def parse_subtype(genline, output, name, option):
i, line = next(genline)
# Expect a data: line after subtype.
if line != '2. data:':
raise ValueError('{}: Expected "2. data:" line'.format(i))
print_csv(output, 'subtype,{}'.format(name), option)
while True:
i, line = next(genline)
match = dataline.fullmatch(line)
if not match:
break
if '*' in match.group('typefield'):
num,typename = match.group('typefield').split('*')
else:
num,typename = ("", match.group('typefield'))
print_csv(output,
"{},{},{},{},{}"
.format('subtypedata', name, match.group('name'), typename, num),
match.group('option'))
def main(options, args=None, output=sys.stdout, lines=None):
genline = next_line(args, lines)
try:
while True:
_, line = next(genline)
match = typeline.fullmatch(line)
if match:
parse_type(genline, output, match.group('name'), match.group('value'), match.group('option'))
continue
match = tlvline.fullmatch(line)
if match:
parse_tlv(genline, output, match.group('name'), match.group('option'))
continue
match = subtypeline.fullmatch(line)
if match:
parse_subtype(genline, output, match.group('name'), match.group('option'))
continue
except StopIteration:
pass
if __name__ == "__main__":
parser = OptionParser()
parser.add_option(
"--message-types",
action="store_true",
dest="output_types",
default=False,
help="Output MESSAGENAME,VALUE for every message"
)
parser.add_option(
"--message-fields",
action="store_true",
dest="output_fields",
default=False,
help="Output MESSAGENAME,OFFSET,FIELDNAME,SIZE for every message"
)
(options, args) = parser.parse_args()
main(options, args)