Script for extracting structured protocol messages

This changes extract-formats.py so that other scripts can use it, but retains normal functionality. The new script (structured.py) parses the CSV variant and shows a representation of an OrderedMap. This could be used to write parsers.
2025-02-22 14:22:46 +01:00 · 2017-07-02 23:01:23 +02:00 · 2017-07-02 23:01:23 +02:00 · 22544d7789
commit 22544d7789
parent a11d234e18
2 changed files with 115 additions and 63 deletions
--- a/tools/extract-formats.py
+++ b/tools/extract-formats.py
@ -30,7 +30,7 @@ def guess_alignment(message,name,sizestr):
    if 'signature' in name:
       return 1
-    
+
    # Size can be variable.
    try:
        size = int(sizestr)
@ -51,73 +51,80 @@ def guess_alignment(message,name,sizestr):
    return 1
-parser = OptionParser()
+def main(options, args=None, output=sys.stdout, lines=None):
-parser.add_option("--message-types",
+    # Example inputs:
-                  action="store_true", dest="output_types", default=False,
+    # 1. type: 17 (`error`)
-                  help="Output MESSAGENAME,VALUE for every message")
+    # 2. data:
-parser.add_option("--check-alignment",
+    #    * [`8`:`channel_id`]
-                  action="store_true", dest="check_alignment", default=False,
+    #    * [`4`:`len`]
-                  help="Check alignment for every member of each message")
+    #    * [`len`:`data`]
-parser.add_option("--message-fields",
+    #
-                  action="store_true", dest="output_fields", default=False,
+    # 1. type: PERM|NODE|3 (`required_node_feature_missing`)
-                  help="Output MESSAGENAME,OFFSET,FIELDNAME,SIZE for every message")
+    message = None
    havedata = None
    typeline = re.compile('1\. type: (?P<value>[-0-9A-Za-z_|]+) \(`(?P<name>[A-Za-z_]+)`\)')
    dataline = re.compile('\s+\* \[`(?P<size>[_a-z0-9*+]+)`:`(?P<name>[_a-z0-9]+)`\]')
-(options, args) = parser.parse_args()
+    if lines is None:
        lines = fileinput.input(args)
-# Example inputs:
+    for i,line in enumerate(lines):
-# 1. type: 17 (`error`)
+        line = line.rstrip()
-# 2. data:
+        linenum = i+1
 #    * [`8`:`channel_id`]
 #    * [`4`:`len`]
 #    * [`len`:`data`]
 #
 # 1. type: PERM|NODE|3 (`required_node_feature_missing`)
 message = None
 havedata = None
 typeline = re.compile('1\. type: (?P<value>[-0-9A-Za-z_|]+) \(`(?P<name>[A-Za-z_]+)`\)')
 dataline = re.compile('\s+\* \[`(?P<size>[_a-z0-9*+]+)`:`(?P<name>[_a-z0-9]+)`\]')
-for i,line in enumerate(fileinput.input(args)):
+        match = typeline.fullmatch(line)
    line = line.rstrip()
    linenum = i+1
    match = typeline.fullmatch(line)
    if match:
        if message is not None:
            raise ValueError('{}:Found a message while I was already in a message'.format(linenum))
        message = match.group('name')
        if options.output_types:
            print("{},{}".format(match.group('name'), match.group('value')))
        havedata = None
        alignoff = False
    elif message is not None and havedata is None:
        if line != '2. data:':
            message = None
        havedata = True
        dataoff = 0
        off_extraterms = ""
        maxalign = 1
    elif message is not None and havedata is not None:
        match = dataline.fullmatch(line)
        if match:
-            align = guess_alignment(message, match.group('name'), match.group('size'))
+            if message is not None:
                raise ValueError('{}:Found a message while I was already in a message'.format(linenum))
            message = match.group('name')
            if options.output_types:
                print("{},{}".format(match.group('name'), match.group('value')), file=output)
            havedata = None
            alignoff = False
        elif message is not None and havedata is None:
            if line != '2. data:':
                message = None
            havedata = True
            dataoff = 0
            off_extraterms = ""
            maxalign = 1
        elif message is not None and havedata is not None:
            match = dataline.fullmatch(line)
            if match:
                align = guess_alignment(message, match.group('name'), match.group('size'))
-            # Do not check alignment if we previously had a variable
+                # Do not check alignment if we previously had a variable
-            # length field in the message
+                # length field in the message
-            if off_extraterms != "":
+                if off_extraterms != "":
-                alignoff = True
+                    alignoff = True
-            if not alignoff and options.check_alignment and dataoff % align != 0:
+                if not alignoff and options.check_alignment and dataoff % align != 0:
-                raise ValueError('{}:message {} field {} Offset {} not aligned on {} boundary:'.format(linenum, message, match.group('name'), dataoff, align))
+                    raise ValueError('{}:message {} field {} Offset {} not aligned on {} boundary:'.format(linenum, message, match.group('name'), dataoff, align))
-            if options.output_fields:
+                if options.output_fields:
-                print("{},{}{},{},{}".format(message,dataoff,off_extraterms,match.group('name'),match.group('size')))
+                    print("{},{}{},{},{}".format(message,dataoff,off_extraterms,match.group('name'),match.group('size')), file=output)
-            # Size can be variable.
+                # Size can be variable.
-            try:
+                try:
-                dataoff += int(match.group('size'))
+                    dataoff += int(match.group('size'))
-            except ValueError:
+                except ValueError:
-                # Offset has variable component.
+                    # Offset has variable component.
-                off_extraterms = off_extraterms + "+" + match.group('size')
+                    off_extraterms = off_extraterms + "+" + match.group('size')
-        else:
+            else:
-            message = None
+                message = None
 if __name__=="__main__":
    parser = OptionParser()
    parser.add_option("--message-types",
                      action="store_true", dest="output_types", default=False,
                      help="Output MESSAGENAME,VALUE for every message")
    parser.add_option("--check-alignment",
                      action="store_true", dest="check_alignment", default=False,
                      help="Check alignment for every member of each message")
    parser.add_option("--message-fields",
                      action="store_true", dest="output_fields", default=False,
                      help="Output MESSAGENAME,OFFSET,FIELDNAME,SIZE for every message")
    (options, args) = parser.parse_args()
    main(options, args)
--- a/tools/structured.py
+++ b/tools/structured.py
@ -0,0 +1,45 @@
 formats = __import__("extract-formats")
 from io import StringIO
 import glob
 import collections
 import json
 class Options(object):
  output_types = True
  output_fields = True
  check_alignment = False
 options = Options()
 csv = []
 output = StringIO()
 for i in sorted(glob.glob("../??-*.md")):
  with open(i) as f:
    formats.main(options, output=output, lines=f.readlines())
    csvstr = output.getvalue().strip()
    if csvstr == "": continue
    csv += csvstr.split("\n")
 resmap = collections.OrderedDict()
 currentmsgname = None
 currentmsgfields = {}
 typenum = None
 for line in csv:
  parts = line.split(",")
  if len(parts) == 2:
    if currentmsgname is not None:
      resmap[currentmsgname] = collections.OrderedDict([("type", typenum), ("payload", currentmsgfields)])
    currentmsgfields = collections.OrderedDict()
    currentmsgname = parts[0]
    typenum = parts[1]
    continue
  assert currentmsgname == parts[0], line
  assert len(parts) == 4, line
  position = parts[1]
  length = parts[3]
  fieldname = parts[2]
  currentmsgfields[fieldname] = {"position": position, "length": length}
 if __name__ == "__main__":
  print(json.dumps(resmap, indent=True))