From 22544d778949128bc5a0816b1eca5b43d549d5e4 Mon Sep 17 00:00:00 2001
From: Janus <ysangkok@gmail.com>
Date: Sun, 2 Jul 2017 23:01:23 +0200
Subject: [PATCH] Script for extracting structured protocol messages

This changes extract-formats.py so that other scripts can use it, but retains normal functionality.
The new script (structured.py) parses the CSV variant and shows a representation of an OrderedMap.
This could be used to write parsers.
---
 tools/extract-formats.py | 133 ++++++++++++++++++++-------------------
 tools/structured.py      |  45 +++++++++++++
 2 files changed, 115 insertions(+), 63 deletions(-)
 create mode 100644 tools/structured.py
diff --git a/tools/extract-formats.py b/tools/extract-formats.py
index e8c720c..8956561 100755
--- a/tools/extract-formats.py
+++ b/tools/extract-formats.py
@@ -30,7 +30,7 @@ def guess_alignment(message,name,sizestr):
 
     if 'signature' in name:
        return 1
-    
+
     # Size can be variable.
     try:
         size = int(sizestr)
@@ -51,73 +51,80 @@ def guess_alignment(message,name,sizestr):
 
     return 1
 
-parser = OptionParser()
-parser.add_option("--message-types",
-                  action="store_true", dest="output_types", default=False,
-                  help="Output MESSAGENAME,VALUE for every message")
-parser.add_option("--check-alignment",
-                  action="store_true", dest="check_alignment", default=False,
-                  help="Check alignment for every member of each message")
-parser.add_option("--message-fields",
-                  action="store_true", dest="output_fields", default=False,
-                  help="Output MESSAGENAME,OFFSET,FIELDNAME,SIZE for every message")
+def main(options, args=None, output=sys.stdout, lines=None):
+    # Example inputs:
+    # 1. type: 17 (`error`)
+    # 2. data:
+    #    * [`8`:`channel_id`]
+    #    * [`4`:`len`]
+    #    * [`len`:`data`]
+    #
+    # 1. type: PERM|NODE|3 (`required_node_feature_missing`)
+    message = None
+    havedata = None
+    typeline = re.compile('1\. type: (?P<value>[-0-9A-Za-z_|]+) \(`(?P<name>[A-Za-z_]+)`\)')
+    dataline = re.compile('\s+\* \[`(?P<size>[_a-z0-9*+]+)`:`(?P<name>[_a-z0-9]+)`\]')
 
-(options, args) = parser.parse_args()
+    if lines is None:
+        lines = fileinput.input(args)
 
-# Example inputs:
-# 1. type: 17 (`error`)
-# 2. data:
-#    * [`8`:`channel_id`]
-#    * [`4`:`len`]
-#    * [`len`:`data`]
-#
-# 1. type: PERM|NODE|3 (`required_node_feature_missing`)
-message = None
-havedata = None
-typeline = re.compile('1\. type: (?P<value>[-0-9A-Za-z_|]+) \(`(?P<name>[A-Za-z_]+)`\)')
-dataline = re.compile('\s+\* \[`(?P<size>[_a-z0-9*+]+)`:`(?P<name>[_a-z0-9]+)`\]')
+    for i,line in enumerate(lines):
+        line = line.rstrip()
+        linenum = i+1
 
-for i,line in enumerate(fileinput.input(args)):
-    line = line.rstrip()
-    linenum = i+1
-
-    match = typeline.fullmatch(line)
-    if match:
-        if message is not None:
-            raise ValueError('{}:Found a message while I was already in a message'.format(linenum))
-        message = match.group('name')
-        if options.output_types:
-            print("{},{}".format(match.group('name'), match.group('value')))
-        havedata = None
-        alignoff = False
-    elif message is not None and havedata is None:
-        if line != '2. data:':
-            message = None
-        havedata = True
-        dataoff = 0
-        off_extraterms = ""
-        maxalign = 1
-    elif message is not None and havedata is not None:
-        match = dataline.fullmatch(line)
+        match = typeline.fullmatch(line)
         if match:
-            align = guess_alignment(message, match.group('name'), match.group('size'))
+            if message is not None:
+                raise ValueError('{}:Found a message while I was already in a message'.format(linenum))
+            message = match.group('name')
+            if options.output_types:
+                print("{},{}".format(match.group('name'), match.group('value')), file=output)
+            havedata = None
+            alignoff = False
+        elif message is not None and havedata is None:
+            if line != '2. data:':
+                message = None
+            havedata = True
+            dataoff = 0
+            off_extraterms = ""
+            maxalign = 1
+        elif message is not None and havedata is not None:
+            match = dataline.fullmatch(line)
+            if match:
+                align = guess_alignment(message, match.group('name'), match.group('size'))
 
-            # Do not check alignment if we previously had a variable
-            # length field in the message
-            if off_extraterms != "":
-                alignoff = True
+                # Do not check alignment if we previously had a variable
+                # length field in the message
+                if off_extraterms != "":
+                    alignoff = True
 
-            if not alignoff and options.check_alignment and dataoff % align != 0:
-                raise ValueError('{}:message {} field {} Offset {} not aligned on {} boundary:'.format(linenum, message, match.group('name'), dataoff, align))
+                if not alignoff and options.check_alignment and dataoff % align != 0:
+                    raise ValueError('{}:message {} field {} Offset {} not aligned on {} boundary:'.format(linenum, message, match.group('name'), dataoff, align))
 
-            if options.output_fields:
-                print("{},{}{},{},{}".format(message,dataoff,off_extraterms,match.group('name'),match.group('size')))
+                if options.output_fields:
+                    print("{},{}{},{},{}".format(message,dataoff,off_extraterms,match.group('name'),match.group('size')), file=output)
 
-            # Size can be variable.
-            try:
-                dataoff += int(match.group('size'))
-            except ValueError:
-                # Offset has variable component.
-                off_extraterms = off_extraterms + "+" + match.group('size')
-        else:
-            message = None
+                # Size can be variable.
+                try:
+                    dataoff += int(match.group('size'))
+                except ValueError:
+                    # Offset has variable component.
+                    off_extraterms = off_extraterms + "+" + match.group('size')
+            else:
+                message = None
+
+if __name__=="__main__":
+    parser = OptionParser()
+    parser.add_option("--message-types",
+                      action="store_true", dest="output_types", default=False,
+                      help="Output MESSAGENAME,VALUE for every message")
+    parser.add_option("--check-alignment",
+                      action="store_true", dest="check_alignment", default=False,
+                      help="Check alignment for every member of each message")
+    parser.add_option("--message-fields",
+                      action="store_true", dest="output_fields", default=False,
+                      help="Output MESSAGENAME,OFFSET,FIELDNAME,SIZE for every message")
+
+    (options, args) = parser.parse_args()
+
+    main(options, args)
diff --git a/tools/structured.py b/tools/structured.py
new file mode 100644
index 0000000..f95bda0
--- /dev/null
+++ b/tools/structured.py
@@ -0,0 +1,45 @@
+formats = __import__("extract-formats")
+from io import StringIO
+import glob
+import collections
+import json
+
+class Options(object):
+  output_types = True
+  output_fields = True
+  check_alignment = False
+
+options = Options()
+csv = []
+
+output = StringIO()
+for i in sorted(glob.glob("../??-*.md")):
+  with open(i) as f:
+    formats.main(options, output=output, lines=f.readlines())
+    csvstr = output.getvalue().strip()
+    if csvstr == "": continue
+    csv += csvstr.split("\n")
+
+resmap = collections.OrderedDict()
+
+currentmsgname = None
+currentmsgfields = {}
+typenum = None
+for line in csv:
+  parts = line.split(",")
+  if len(parts) == 2:
+    if currentmsgname is not None:
+      resmap[currentmsgname] = collections.OrderedDict([("type", typenum), ("payload", currentmsgfields)])
+    currentmsgfields = collections.OrderedDict()
+    currentmsgname = parts[0]
+    typenum = parts[1]
+    continue
+  assert currentmsgname == parts[0], line
+  assert len(parts) == 4, line
+  position = parts[1]
+  length = parts[3]
+  fieldname = parts[2]
+  currentmsgfields[fieldname] = {"position": position, "length": length}
+
+if __name__ == "__main__":
+  print(json.dumps(resmap, indent=True))