From 7d39055f7faa7650b474113eda813b48baeab5b2 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 9 Jul 2012 23:52:08 -0700 Subject: [PATCH] Add simple LaTeX markup --- Tools/scripts/highlight.py | 98 +++++++++++++++++++++++++++++++++----- 1 file changed, 85 insertions(+), 13 deletions(-) diff --git a/Tools/scripts/highlight.py b/Tools/scripts/highlight.py index ebf96b7f2a4..763be837e3b 100755 --- a/Tools/scripts/highlight.py +++ b/Tools/scripts/highlight.py @@ -1,12 +1,11 @@ #!/usr/bin/env python3 '''Add syntax highlighting to Python source code''' -__all__ = ['analyze_python', 'ansi_highlight', 'default_ansi', - 'html_highlight', 'build_html_page', 'default_css', 'default_html'] - __author__ = 'Raymond Hettinger' -import keyword, tokenize, cgi, functools +import keyword, tokenize, cgi, re, functools + +#### Analyze Python Source ################################# def is_builtin(s): 'Return True if s is the name of a builtin' @@ -60,6 +59,20 @@ def analyze_python(source): line_upto_token, written = combine_range(lines, written, (erow, ecol)) yield line_upto_token, '', '' +#### Raw Output ########################################### + +def raw_highlight(classified_text): + 'Straight text display of text classifications' + result = [] + for line_upto_token, kind, line_thru_token in classified_text: + if line_upto_token: + result.append(' plain: %r\n' % line_upto_token) + if line_thru_token: + result.append('%15s: %r\n' % (kind, line_thru_token)) + return ''.join(result) + +#### ANSI Output ########################################### + default_ansi = { 'comment': ('\033[0;31m', '\033[0m'), 'string': ('\033[0;32m', '\033[0m'), @@ -80,6 +93,8 @@ def ansi_highlight(classified_text, colors=default_ansi): result += [line_upto_token, opener, line_thru_token, closer] return ''.join(result) +#### HTML Output ########################################### + def html_highlight(classified_text,opener='
\n', closer='
\n'): 'Convert classified text to an HTML fragment' result = [opener] @@ -131,6 +146,59 @@ def build_html_page(classified_text, title='python', title = cgi.escape(title) return html.format(title=title, css=css_str, body=result) +#### LaTeX Output ########################################## + +default_latex_colors = { + 'comment': 'red', + 'string': 'green', + 'docstring': 'green', + 'keyword': 'orange', + 'builtin': 'purple', + 'definition': 'orange', + 'defname': 'blue', + 'operator': 'brown', +} + +default_latex_document = r''' +\documentclass{article} +\usepackage{alltt} +\usepackage{color} +\usepackage[usenames,dvipsnames]{xcolor} +\usepackage[cm]{fullpage} +\begin{document} +\center{\LARGE{%(title)s}} +\begin{alltt} +%(body)s +\end{alltt} +\end{document} +''' + +def latex_escape(s): + 'Replace LaTeX special characters with their escaped equivalents' + # http://en.wikibooks.org/wiki/LaTeX/Basics#Special_Characters + xlat = { + '#': r'\#', '$': r'\$', '%': r'\%', '^': r'\textasciicircum{}', + '&': r'\&', '_': r'\_', '{': r'\{', '}': r'\}', '~': r'\~{}', + '\\': r'\textbackslash{}', + } + return re.sub(r'[\\#$%^&_{}~]', lambda mo: xlat[mo.group()], s) + +def latex_highlight(classified_text, title = 'python', + colors = default_latex_colors, + document = default_latex_document): + 'Create a complete LaTeX document with colorized source code' + result = [] + for line_upto_token, kind, line_thru_token in classified_text: + if kind: + result += [latex_escape(line_upto_token), + r'{\color{%s}' % colors[kind], + latex_escape(line_thru_token), + '}'] + else: + result += [latex_escape(line_upto_token), + latex_escape(line_thru_token)] + return default_latex_document % dict(title=title, body=''.join(result)) + if __name__ == '__main__': import sys, argparse, webbrowser, os, textwrap @@ -152,6 +220,10 @@ if __name__ == '__main__': # Create a complete HTML file $ ./highlight.py -c myfile.py > myfile.html + + # Create a PDF using LaTeX + $ ./highlight.py -l myfile.py | pdflatex + ''')) parser.add_argument('sourcefile', metavar = 'SOURCEFILE', help = 'file containing Python sourcecode') @@ -159,10 +231,12 @@ if __name__ == '__main__': help = 'launch a browser to show results') parser.add_argument('-c', '--complete', action = 'store_true', help = 'build a complete html webpage') + parser.add_argument('-l', '--latex', action = 'store_true', + help = 'build a LaTeX document') + parser.add_argument('-r', '--raw', action = 'store_true', + help = 'raw parse of categorized text') parser.add_argument('-s', '--section', action = 'store_true', help = 'show an HTML section rather than a complete webpage') - parser.add_argument('-v', '--verbose', action = 'store_true', - help = 'display categorized text to stderr') args = parser.parse_args() if args.section and (args.browser or args.complete): @@ -174,16 +248,14 @@ if __name__ == '__main__': source = f.read() classified_text = analyze_python(source) - if args.verbose: - classified_text = list(classified_text) - for line_upto_token, kind, line_thru_token in classified_text: - sys.stderr.write('%15s: %r\n' % ('leadin', line_upto_token)) - sys.stderr.write('%15s: %r\n\n' % (kind, line_thru_token)) - - if args.complete or args.browser: + if args.raw: + encoded = raw_highlight(classified_text) + elif args.complete or args.browser: encoded = build_html_page(classified_text, title=sourcefile) elif args.section: encoded = html_highlight(classified_text) + elif args.latex: + encoded = latex_highlight(classified_text, title=sourcefile) else: encoded = ansi_highlight(classified_text)