From 7d39055f7faa7650b474113eda813b48baeab5b2 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <python@rcn.com>
Date: Mon, 9 Jul 2012 23:52:08 -0700
Subject: [PATCH] Add simple LaTeX markup

---
 Tools/scripts/highlight.py | 98 +++++++++++++++++++++++++++++++++-----
 1 file changed, 85 insertions(+), 13 deletions(-)
diff --git a/Tools/scripts/highlight.py b/Tools/scripts/highlight.py
index ebf96b7f2a4..763be837e3b 100755
--- a/Tools/scripts/highlight.py
+++ b/Tools/scripts/highlight.py
@@ -1,12 +1,11 @@
 #!/usr/bin/env python3
 '''Add syntax highlighting to Python source code'''
 
-__all__ = ['analyze_python', 'ansi_highlight', 'default_ansi',
-           'html_highlight', 'build_html_page', 'default_css', 'default_html']
-
 __author__ = 'Raymond Hettinger'
 
-import keyword, tokenize, cgi, functools
+import keyword, tokenize, cgi, re, functools
+
+#### Analyze Python Source #################################
 
 def is_builtin(s):
     'Return True if s is the name of a builtin'
@@ -60,6 +59,20 @@ def analyze_python(source):
     line_upto_token, written = combine_range(lines, written, (erow, ecol))
     yield line_upto_token, '', ''
 
+#### Raw Output  ###########################################
+
+def raw_highlight(classified_text):
+    'Straight text display of text classifications'
+    result = []
+    for line_upto_token, kind, line_thru_token in classified_text:
+        if line_upto_token:
+            result.append('          plain:  %r\n' % line_upto_token)
+        if line_thru_token:
+            result.append('%15s:  %r\n' % (kind, line_thru_token))
+    return ''.join(result)
+
+#### ANSI Output ###########################################
+
 default_ansi = {
     'comment': ('\033[0;31m', '\033[0m'),
     'string': ('\033[0;32m', '\033[0m'),
@@ -80,6 +93,8 @@ def ansi_highlight(classified_text, colors=default_ansi):
         result += [line_upto_token, opener, line_thru_token, closer]
     return ''.join(result)
 
+#### HTML Output ###########################################
+
 def html_highlight(classified_text,opener='<pre class="python">\n', closer='</pre>\n'):
     'Convert classified text to an HTML fragment'
     result = [opener]
@@ -131,6 +146,59 @@ def build_html_page(classified_text, title='python',
     title = cgi.escape(title)
     return html.format(title=title, css=css_str, body=result)
 
+#### LaTeX Output ##########################################
+
+default_latex_colors = {
+    'comment': 'red',
+    'string': 'green',
+    'docstring': 'green',
+    'keyword': 'orange',
+    'builtin': 'purple',
+    'definition': 'orange',
+    'defname': 'blue',
+    'operator': 'brown',
+}
+
+default_latex_document = r'''
+\documentclass{article}
+\usepackage{alltt}
+\usepackage{color}
+\usepackage[usenames,dvipsnames]{xcolor}
+\usepackage[cm]{fullpage}
+\begin{document}
+\center{\LARGE{%(title)s}}
+\begin{alltt}
+%(body)s
+\end{alltt}
+\end{document}
+'''
+
+def latex_escape(s):
+    'Replace LaTeX special characters with their escaped equivalents'
+    # http://en.wikibooks.org/wiki/LaTeX/Basics#Special_Characters
+    xlat = {
+        '#': r'\#', '$': r'\$', '%': r'\%', '^': r'\textasciicircum{}',
+        '&': r'\&', '_': r'\_', '{': r'\{', '}': r'\}', '~': r'\~{}',
+        '\\': r'\textbackslash{}',
+    }
+    return re.sub(r'[\\#$%^&_{}~]', lambda mo: xlat[mo.group()], s)
+
+def latex_highlight(classified_text, title = 'python',
+                    colors = default_latex_colors,
+                    document = default_latex_document):
+    'Create a complete LaTeX document with colorized source code'
+    result = []
+    for line_upto_token, kind, line_thru_token in classified_text:
+        if kind:
+            result += [latex_escape(line_upto_token),
+                       r'{\color{%s}' % colors[kind],
+                       latex_escape(line_thru_token),
+                       '}']
+        else:
+            result += [latex_escape(line_upto_token),
+                       latex_escape(line_thru_token)]
+    return default_latex_document % dict(title=title, body=''.join(result))
+
 
 if __name__ == '__main__':
     import sys, argparse, webbrowser, os, textwrap
@@ -152,6 +220,10 @@ if __name__ == '__main__':
 
                   # Create a complete HTML file
                   $ ./highlight.py -c myfile.py > myfile.html
+
+                  # Create a PDF using LaTeX
+                  $ ./highlight.py -l myfile.py | pdflatex
+
             '''))
     parser.add_argument('sourcefile', metavar = 'SOURCEFILE',
             help = 'file containing Python sourcecode')
@@ -159,10 +231,12 @@ if __name__ == '__main__':
             help = 'launch a browser to show results')
     parser.add_argument('-c', '--complete', action = 'store_true',
             help = 'build a complete html webpage')
+    parser.add_argument('-l', '--latex', action = 'store_true',
+            help = 'build a LaTeX document')
+    parser.add_argument('-r', '--raw', action = 'store_true',
+            help = 'raw parse of categorized text')
     parser.add_argument('-s', '--section', action = 'store_true',
             help = 'show an HTML section rather than a complete webpage')
-    parser.add_argument('-v', '--verbose', action = 'store_true',
-            help = 'display categorized text to stderr')
     args = parser.parse_args()
 
     if args.section and (args.browser or args.complete):
@@ -174,16 +248,14 @@ if __name__ == '__main__':
         source = f.read()
     classified_text = analyze_python(source)
 
-    if args.verbose:
-        classified_text = list(classified_text)
-        for line_upto_token, kind, line_thru_token in classified_text:
-            sys.stderr.write('%15s:  %r\n' % ('leadin', line_upto_token))
-            sys.stderr.write('%15s:  %r\n\n' % (kind, line_thru_token))
-
-    if args.complete or args.browser:
+    if args.raw:
+        encoded = raw_highlight(classified_text)
+    elif args.complete or args.browser:
         encoded = build_html_page(classified_text, title=sourcefile)
     elif args.section:
         encoded = html_highlight(classified_text)
+    elif args.latex:
+        encoded = latex_highlight(classified_text, title=sourcefile)
     else:
         encoded = ansi_highlight(classified_text)