From 149787e7c5c4d9cf5abd1a6bdb3074eae34de1b5 Mon Sep 17 00:00:00 2001
From: Jack Jansen <jack.jansen@cwi.nl>
Date: Sun, 17 Jul 2005 00:15:46 +0000
Subject: [PATCH] Added an option to the scanner to generated marked-up HTML
 from the input file. This should make it a lot easier (I hope) to get the
 regular expressions right.

---
 Tools/bgen/bgen/scantools.py | 167 +++++++++++++++++++++++++++++++++--
 1 file changed, 161 insertions(+), 6 deletions(-)
diff --git a/Tools/bgen/bgen/scantools.py b/Tools/bgen/bgen/scantools.py
index 4cf1cf97c84..aea43ea1f7a 100644
--- a/Tools/bgen/bgen/scantools.py
+++ b/Tools/bgen/bgen/scantools.py
@@ -32,6 +32,76 @@ except ImportError:
 
 Error = "scantools.Error"
 
+BEGINHTMLREPORT="""<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+<html>
+<head>
+<style type="text/css">
+.unmatched {  }
+.commentstripping { color: grey; text-decoration: line-through }
+.comment { text-decoration: line-through }
+.notcomment { color: black }
+.incomplete { color: maroon }
+.constant { color: green }
+.pyconstant { background-color: yellow }
+.blconstant { background-color: yellow; color: red }
+.declaration { color: blue }
+.pydeclaration { background-color: yellow }
+.type { font-style: italic }
+.name { font-weight: bold }
+.value { font-style: italic }
+.arglist { text-decoration: underline }
+.blacklisted { background-color: yellow; color: red }
+</style>
+<title>Bgen scan report</title>
+</head>
+<body>
+<h1>Bgen scan report</h1>
+<h2>Legend</h2>
+<p>This scan report is intended to help you debug the regular expressions
+used by the bgen scanner. It consists of the original ".h" header file(s)
+marked up to show you what the regular expressions in the bgen parser matched
+for each line. NOTE: comments in the original source files may or may not be
+shown.</p>
+<p>The typographic conventions of this file are as follows:</p>
+<dl>
+<dt>comment stripping</dt>
+<dd><pre><span class="commentstripping"><span class="notcomment">comment stripping is </span><span class="comment">/* marked up */</span><span class="notcomment"> and the line is repeated if needed</span></span></pre>
+<p>If anything here does not appear to happen correctly look at
+<tt>comment1_pat</tt> and <tt>comment2_pat</tt>.</p>
+</dd>
+<dt>constant definitions</dt>
+<dd><pre><span class="constant">#define <span class="name">name</span> <span class="value">value</span></pre>
+<p>Highlights name and value of the constant. Governed by <tt>sym_pat</tt>.</p>
+</dd>
+<dt>function declaration</dt>
+<dd><pre><span class="declaration"><span class="type">char *</span><span class="name">rindex</span><span class="arglist">(<span class="type">const char *</span><span class="name">s</span>, <span class="type">int </span><span class="name">c</span>)</span>;</span></pre>
+<p>Highlights type, name and argument list. <tt>type_pat</tt>,
+<tt>name_pat</tt> and <tt>args_pat</tt> are combined into <tt>whole_pat</tt>, which
+is what is used here.</p></dd>
+</dd>
+<dt>incomplete match for function declaration</dt>
+<dd><pre><span class="incomplete"><span class="type">char *</span>foo;</span></pre>
+<p>The beginning of this looked promising, but it did not match a function declaration.
+In other words, it matched <tt>head_pat</tt> but not <tt>whole_pat</tt>. If the next
+declaration has also been gobbled up you need to look at <tt>end_pat</tt>.</p>
+</dd>
+<dt>unrecognized input</dt>
+<dd><pre><span class="unmatched">#include "type.h"</span></pre>
+<p>If there are function declarations the scanner has missed (i.e. things
+are in this class but you want them to be declarations) you need to adapt
+<tt>head_pat</tt>.
+</dd>
+</dl>
+<h2>Output</h2>
+<pre>
+<span class="unmatched">
+"""
+ENDHTMLREPORT="""</span>
+</pre>
+</body>
+</html>
+"""
+
 class Scanner:
 
     # Set to 1 in subclass to debug your scanner patterns.
@@ -232,9 +302,11 @@ if missing: raise "Missing Types"
         self.specmine = 0
         self.defsmine = 0
         self.scanmine = 0
+        self.htmlmine = 0
         self.specfile = sys.stdout
         self.defsfile = None
         self.scanfile = sys.stdin
+        self.htmlfile = None
         self.lineno = 0
         self.line = ""
 
@@ -286,6 +358,7 @@ if missing: raise "Missing Types"
         self.closespec()
         self.closedefs()
         self.closescan()
+        self.closehtml()
 
     def closespec(self):
         tmp = self.specmine and self.specfile
@@ -301,6 +374,12 @@ if missing: raise "Missing Types"
         tmp = self.scanmine and self.scanfile
         self.scanfile = None
         if tmp: tmp.close()
+        
+    def closehtml(self):
+        if self.htmlfile: self.htmlfile.write(ENDHTMLREPORT)
+        tmp = self.htmlmine and self.htmlfile
+        self.htmlfile = None
+        if tmp: tmp.close()
 
     def setoutput(self, spec, defs = None):
         self.closespec()
@@ -324,6 +403,19 @@ if missing: raise "Missing Types"
             self.defsfile = file
             self.defsmine = mine
 
+    def sethtmloutput(self, htmlfile):
+        self.closehtml()
+        if htmlfile:
+            if type(htmlfile) == StringType:
+                file = self.openoutput(htmlfile)
+                mine = 1
+            else:
+                file = htmlfile
+                mine = 0
+            self.htmlfile = file
+            self.htmlmine = mine
+            self.htmlfile.write(BEGINHTMLREPORT)
+            
     def openoutput(self, filename):
         try:
             file = open(filename, 'w')
@@ -408,11 +500,17 @@ if missing: raise "Missing Types"
                     self.report("LINE: %r" % (line,))
                 match = self.comment1.match(line)
                 if match:
+                    self.htmlreport(line, klass='commentstripping', ranges=[(
+                        match.start('rest'), match.end('rest'), 'notcomment')])
                     line = match.group('rest')
                     if self.debug:
                         self.report("\tafter comment1: %r" % (line,))
                 match = self.comment2.match(line)
                 while match:
+                    if match:
+                        self.htmlreport(line, klass='commentstripping', ranges=[
+                            (match.start('rest1'), match.end('rest1'), 'notcomment'),
+                            (match.start('rest2'), match.end('rest2'), 'notcomment')])
                     line = match.group('rest1')+match.group('rest2')
                     if self.debug:
                         self.report("\tafter comment2: %r" % (line,))
@@ -422,7 +520,7 @@ if missing: raise "Missing Types"
                     if match:
                         if self.debug:
                             self.report("\tmatches sym.")
-                        self.dosymdef(match)
+                        self.dosymdef(match, line)
                         continue
                 match = self.head.match(line)
                 if match:
@@ -430,19 +528,26 @@ if missing: raise "Missing Types"
                         self.report("\tmatches head.")
                     self.dofuncspec()
                     continue
+                self.htmlreport(line, klass='unmatched')
         except EOFError:
             self.error("Uncaught EOF error")
         self.reportusedtypes()
 
-    def dosymdef(self, match):
+    def dosymdef(self, match, line):
         name, defn = match.group('name', 'defn')
+        self.htmlreport(line, klass='constant', ranges=[
+            (match.start('name'), match.end('name'), 'name'),
+            (match.start('defn'), match.end('defn'), 'value')])
         defn = escape8bit(defn)
         if self.debug:
             self.report("\tsym: name=%r, defn=%r" % (name, defn))
         if not name in self.blacklistnames:
-            self.defsfile.write("%s = %s\n" % (name, defn))
+            oline = "%s = %s\n" % (name, defn)
+            self.defsfile.write(oline)
+            self.htmlreport(oline, klass="pyconstant")
         else:
             self.defsfile.write("# %s = %s\n" % (name, defn))
+            self.htmlreport("** no output: name is blacklisted", klass="blconstant")
         # XXXX No way to handle greylisted names
 
     def dofuncspec(self):
@@ -473,19 +578,30 @@ if missing: raise "Missing Types"
         if not match:
             self.report("Bad raw spec: %r", raw)
             if self.debug:
-                if not self.type.search(raw):
+                match = self.type.search(raw)
+                if not match:
                     self.report("(Type already doesn't match)")
+                    self.htmlreport(raw, klass='incomplete', ranges=[(
+                        match.start('type'), match.end('type'), 'type')])
                 else:
                     self.report("(but type matched)")
+                    self.htmlreport(raw, klass='incomplete')
             return
         type, name, args = match.group('type', 'name', 'args')
+        ranges=[
+                (match.start('type'), match.end('type'), 'type'),
+                (match.start('name'), match.end('name'), 'name'),
+                (match.start('args'), match.end('args'), 'arglist')]
+        self.htmlreport(raw, klass='declaration', ranges=ranges)
         modifiers = self.getmodifiers(match)
         type = self.pythonizename(type)
         name = self.pythonizename(name)
         if self.checkduplicate(name):
+            self.htmlreport("*** no output generated: duplicate name", klass="blacklisted")
             return
         self.report("==> %s %s <==", type, name)
         if self.blacklisted(type, name):
+            self.htmlreport("*** no output generated: function name or return type blacklisted", klass="blacklisted")
             self.report("*** %s %s blacklisted", type, name)
             return
         returnlist = [(type, name, 'ReturnMode')]
@@ -494,6 +610,7 @@ if missing: raise "Missing Types"
         arglist = self.extractarglist(args)
         arglist = self.repairarglist(name, arglist)
         if self.unmanageable(type, name, arglist):
+            self.htmlreport("*** no output generated: some argument blacklisted", klass="blacklisted")
             ##for arg in arglist:
             ##  self.report("    %r", arg)
             self.report("*** %s %s unmanageable", type, name)
@@ -611,8 +728,12 @@ if missing: raise "Missing Types"
             classname, listname = self.destination(tp, name, arglist, modifiers)
         else:
             classname, listname = self.destination(tp, name, arglist)
-        if not classname or not listname: return
-        if not self.specfile: return
+        if not classname or not listname: 
+            self.htmlreport("*** no output generated: self.destination() returned None", klass="blacklisted")
+            return
+        if not self.specfile: 
+            self.htmlreport("*** no output generated: no output file specified", klass="blacklisted")
+            return
         self.specfile.write("f = %s(%s, %r,\n" % (classname, tp, name))
         for atype, aname, amode in arglist:
             self.typeused(atype, amode)
@@ -623,6 +744,12 @@ if missing: raise "Missing Types"
         self.generatemodifiers(classname, name, modifiers)
         self.specfile.write(")\n")
         self.specfile.write("%s.append(f)\n\n" % listname)
+        if self.htmlfile:
+            oline = "Adding to %s:\n%s(returntype=%s, name=%r" % (listname, classname, tp, name)
+            for atype, aname, amode in arglist:
+                oline += ",\n    (%s, %r, %s)" % (atype, aname, amode)
+            oline += ")\n"
+            self.htmlreport(oline, klass="pydeclaration")
 
     def destination(self, type, name, arglist):
         return "FunctionGenerator", "functions"
@@ -646,6 +773,34 @@ if missing: raise "Missing Types"
                 return 1
         return 0
 
+    def htmlreport(self, line, klass=None, ranges=None):
+        if not self.htmlfile: return
+        if ranges is None:
+            ranges = []
+        if klass:
+            ranges.insert(0, (0, len(line), klass))
+        oline = ''
+        i = 0
+        for c in line:
+            for b, e, name in ranges:
+                if b == i:
+                    oline += '<span class="%s">' % name
+                if e == i:
+                    oline += '</span>'
+            i += 1
+
+            if c == '<': oline += '&lt;'
+            elif c == '>': oline += '&gt;'
+            else: oline += c
+        for b, e, name in ranges:
+            if b >= i:
+                oline += '<span class="%s">' % name
+            if e >= i:
+                oline += '</span>'
+        if not line or line[-1] != '\n':
+            oline += '\n'
+        self.htmlfile.write(oline)
+        
 class Scanner_PreUH3(Scanner):
     """Scanner for Universal Headers before release 3"""
     def initpatterns(self):