2003-05-10 00:30:13 -03:00
|
|
|
""" TeXcheck.py -- rough syntax checking on Python style LaTeX documents.
|
|
|
|
|
|
|
|
Written by Raymond D. Hettinger <python at rcn.com>
|
|
|
|
Copyright (c) 2003 Python Software Foundation. All rights reserved.
|
|
|
|
|
|
|
|
Designed to catch common markup errors including:
|
|
|
|
* Unbalanced or mismatched parenthesis, brackets, and braces.
|
2003-05-16 00:06:39 -03:00
|
|
|
* Unbalanced or mismatched \\begin and \\end blocks.
|
2003-05-10 00:30:13 -03:00
|
|
|
* Misspelled or invalid LaTeX commands.
|
|
|
|
* Use of forward slashes instead of backslashes for commands.
|
2003-05-16 00:06:39 -03:00
|
|
|
* Table line size mismatches.
|
2003-05-10 00:30:13 -03:00
|
|
|
|
2003-05-16 00:06:39 -03:00
|
|
|
Sample command line usage:
|
|
|
|
python texcheck.py -k chapterheading -m lib/librandomtex *.tex
|
2003-05-10 00:30:13 -03:00
|
|
|
|
|
|
|
Options:
|
2003-05-12 20:33:28 -03:00
|
|
|
-m Munge parenthesis and brackets. [0,n) would normally mismatch.
|
|
|
|
-k keyword: Keyword is a valid LaTeX command. Do not include the backslash.
|
2003-05-10 00:30:13 -03:00
|
|
|
-d: Delimiter check only (useful for non-LaTeX files).
|
|
|
|
-h: Help
|
|
|
|
-s lineno: Start at lineno (useful for skipping complex sections).
|
2003-05-16 00:06:39 -03:00
|
|
|
-v: Verbose. Trace the matching of //begin and //end blocks.
|
2003-05-10 00:30:13 -03:00
|
|
|
"""
|
|
|
|
|
|
|
|
import re
|
|
|
|
import sets
|
|
|
|
import sys
|
|
|
|
import getopt
|
|
|
|
from itertools import izip, count, islice
|
2003-05-16 00:06:39 -03:00
|
|
|
import glob
|
2003-05-10 00:30:13 -03:00
|
|
|
|
|
|
|
cmdstr = r"""
|
|
|
|
\section \module \declaremodule \modulesynopsis \moduleauthor
|
|
|
|
\sectionauthor \versionadded \code \class \method \begin
|
|
|
|
\optional \var \ref \end \subsection \lineiii \hline \label
|
|
|
|
\indexii \textrm \ldots \keyword \stindex \index \item \note
|
|
|
|
\withsubitem \ttindex \footnote \citetitle \samp \opindex
|
|
|
|
\noindent \exception \strong \dfn \ctype \obindex \character
|
|
|
|
\indexiii \function \bifuncindex \refmodule \refbimodindex
|
|
|
|
\subsubsection \nodename \member \chapter \emph \ASCII \UNIX
|
|
|
|
\regexp \program \production \token \productioncont \term
|
|
|
|
\grammartoken \lineii \seemodule \file \EOF \documentclass
|
|
|
|
\usepackage \title \input \maketitle \ifhtml \fi \url \Cpp
|
|
|
|
\tableofcontents \kbd \programopt \envvar \refstmodindex
|
|
|
|
\cfunction \constant \NULL \moreargs \cfuncline \cdata
|
|
|
|
\textasciicircum \n \ABC \setindexsubitem \versionchanged
|
|
|
|
\deprecated \seetext \newcommand \POSIX \pep \warning \rfc
|
|
|
|
\verbatiminput \methodline \textgreater \seetitle \lineiv
|
|
|
|
\funclineni \ulink \manpage \funcline \dataline \unspecified
|
|
|
|
\textbackslash \mimetype \mailheader \seepep \textunderscore
|
|
|
|
\longprogramopt \infinity \plusminus \shortversion \version
|
|
|
|
\refmodindex \seerfc \makeindex \makemodindex \renewcommand
|
2003-05-10 04:41:55 -03:00
|
|
|
\indexname \appendix \protect \indexiv \mbox \textasciitilde
|
|
|
|
\platform \seeurl \leftmargin \labelwidth \localmoduletable
|
2003-05-12 20:33:28 -03:00
|
|
|
\LaTeX \copyright \memberline \backslash \pi \centerline
|
|
|
|
\caption \vspace \textwidth \menuselection \textless
|
|
|
|
\makevar \csimplemacro \menuselection \bfcode \sub \release
|
|
|
|
\email \kwindex \refexmodindex \filenq \e \menuselection
|
|
|
|
\exindex \linev \newsgroup \verbatim \setshortversion
|
2003-05-10 00:30:13 -03:00
|
|
|
"""
|
|
|
|
|
|
|
|
def matchclose(c_lineno, c_symbol, openers, pairmap):
|
|
|
|
"Verify that closing delimiter matches most recent opening delimiter"
|
|
|
|
try:
|
|
|
|
o_lineno, o_symbol = openers.pop()
|
|
|
|
except IndexError:
|
2003-05-16 00:06:39 -03:00
|
|
|
print "\nDelimiter mismatch. On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol)
|
|
|
|
return
|
2003-05-10 00:30:13 -03:00
|
|
|
if o_symbol in pairmap.get(c_symbol, [c_symbol]): return
|
2003-05-16 00:06:39 -03:00
|
|
|
print "\nOpener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno)
|
|
|
|
return
|
2003-05-10 00:30:13 -03:00
|
|
|
|
|
|
|
def checkit(source, opts, morecmds=[]):
|
2003-05-10 04:41:55 -03:00
|
|
|
"""Check the LaTeX formatting in a sequence of lines.
|
2003-05-10 00:30:13 -03:00
|
|
|
|
|
|
|
Opts is a mapping of options to option values if any:
|
|
|
|
-m munge parenthesis and brackets
|
|
|
|
-d delimiters only checking
|
2003-05-16 00:06:39 -03:00
|
|
|
-v verbose trace of delimiter matching
|
2003-05-10 00:30:13 -03:00
|
|
|
-s lineno: linenumber to start scan (default is 1).
|
|
|
|
|
2003-05-10 04:41:55 -03:00
|
|
|
Morecmds is a sequence of LaTeX commands (without backslashes) that
|
2003-05-10 00:30:13 -03:00
|
|
|
are to be considered valid in the scan.
|
|
|
|
"""
|
|
|
|
|
|
|
|
texcmd = re.compile(r'\\[A-Za-z]+')
|
2003-05-14 15:15:55 -03:00
|
|
|
falsetexcmd = re.compile(r'\/([A-Za-z]+)') # Mismarked with forward slash
|
2003-05-10 00:30:13 -03:00
|
|
|
|
|
|
|
validcmds = sets.Set(cmdstr.split())
|
|
|
|
for cmd in morecmds:
|
|
|
|
validcmds.add('\\' + cmd)
|
|
|
|
|
|
|
|
if '-m' in opts:
|
|
|
|
pairmap = {']':'[(', ')':'(['} # Munged openers
|
|
|
|
else:
|
|
|
|
pairmap = {']':'[', ')':'('} # Normal opener for a given closer
|
2003-05-16 00:06:39 -03:00
|
|
|
openpunct = sets.Set('([') # Set of valid openers
|
2003-05-10 00:30:13 -03:00
|
|
|
|
|
|
|
delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])')
|
2003-05-12 20:33:28 -03:00
|
|
|
braces = re.compile(r'({)|(})')
|
2003-08-25 01:39:55 -03:00
|
|
|
doubledwords = re.compile(r'(\b[A-za-z]+\b) \b\1\b')
|
2003-09-08 15:43:46 -03:00
|
|
|
spacingmarkup = re.compile(r'\\(ABC|ASCII|C|Cpp|EOF|infinity|NULL|plusminus|POSIX|UNIX)\s')
|
2003-05-12 20:33:28 -03:00
|
|
|
|
|
|
|
openers = [] # Stack of pending open delimiters
|
|
|
|
bracestack = [] # Stack of pending open braces
|
2003-05-10 00:30:13 -03:00
|
|
|
|
2003-05-10 04:41:55 -03:00
|
|
|
tablestart = re.compile(r'\\begin{(?:long)?table([iv]+)}')
|
|
|
|
tableline = re.compile(r'\\line([iv]+){')
|
|
|
|
tableend = re.compile(r'\\end{(?:long)?table([iv]+)}')
|
|
|
|
tablelevel = ''
|
|
|
|
tablestartline = 0
|
|
|
|
|
2003-05-10 00:30:13 -03:00
|
|
|
startline = int(opts.get('-s', '1'))
|
|
|
|
lineno = 0
|
|
|
|
|
|
|
|
for lineno, line in izip(count(startline), islice(source, startline-1, None)):
|
|
|
|
line = line.rstrip()
|
|
|
|
|
2003-05-16 00:06:39 -03:00
|
|
|
# Check balancing of open/close parenthesis, brackets, and begin/end blocks
|
2003-05-10 00:30:13 -03:00
|
|
|
for begend, name, punct in delimiters.findall(line):
|
|
|
|
if '-v' in opts:
|
|
|
|
print lineno, '|', begend, name, punct,
|
|
|
|
if begend == 'begin' and '-d' not in opts:
|
|
|
|
openers.append((lineno, name))
|
|
|
|
elif punct in openpunct:
|
|
|
|
openers.append((lineno, punct))
|
|
|
|
elif begend == 'end' and '-d' not in opts:
|
|
|
|
matchclose(lineno, name, openers, pairmap)
|
|
|
|
elif punct in pairmap:
|
|
|
|
matchclose(lineno, punct, openers, pairmap)
|
|
|
|
if '-v' in opts:
|
|
|
|
print ' --> ', openers
|
|
|
|
|
2003-05-12 20:33:28 -03:00
|
|
|
# Balance opening and closing braces
|
|
|
|
for open, close in braces.findall(line):
|
|
|
|
if open == '{':
|
|
|
|
bracestack.append(lineno)
|
|
|
|
if close == '}':
|
|
|
|
try:
|
|
|
|
bracestack.pop()
|
|
|
|
except IndexError:
|
|
|
|
print r'Warning, unmatched } on line %s.' % (lineno,)
|
2003-05-16 00:06:39 -03:00
|
|
|
|
|
|
|
# Optionally, skip LaTeX specific checks
|
|
|
|
if '-d' in opts:
|
|
|
|
continue
|
|
|
|
|
|
|
|
# Warn whenever forward slashes encountered with a LaTeX command
|
|
|
|
for cmd in falsetexcmd.findall(line):
|
|
|
|
if '822' in line or '.html' in line:
|
|
|
|
continue # Ignore false positives for urls and for /rfc822
|
|
|
|
if '\\' + cmd in validcmds:
|
|
|
|
print 'Warning, forward slash used on line %d with cmd: /%s' % (lineno, cmd)
|
|
|
|
|
2003-09-08 15:43:46 -03:00
|
|
|
# Check for markup requiring {} for correct spacing
|
|
|
|
for cmd in spacingmarkup.findall(line):
|
|
|
|
print r'Warning, \%s should be written as \%s{} on line %d' % (cmd, cmd, lineno)
|
2003-09-08 14:33:31 -03:00
|
|
|
|
2003-05-16 00:06:39 -03:00
|
|
|
# Validate commands
|
|
|
|
nc = line.find(r'\newcommand')
|
|
|
|
if nc != -1:
|
|
|
|
start = line.find('{', nc)
|
|
|
|
end = line.find('}', start)
|
|
|
|
validcmds.add(line[start+1:end])
|
|
|
|
for cmd in texcmd.findall(line):
|
|
|
|
if cmd not in validcmds:
|
|
|
|
print r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd)
|
2003-05-12 20:33:28 -03:00
|
|
|
|
2003-05-10 06:04:37 -03:00
|
|
|
# Check table levels (make sure lineii only inside tableii)
|
2003-05-10 04:41:55 -03:00
|
|
|
m = tablestart.search(line)
|
|
|
|
if m:
|
|
|
|
tablelevel = m.group(1)
|
|
|
|
tablestartline = lineno
|
|
|
|
m = tableline.search(line)
|
|
|
|
if m and m.group(1) != tablelevel:
|
|
|
|
print r'Warning, \line%s on line %d does not match \table%s on line %d' % (m.group(1), lineno, tablelevel, tablestartline)
|
|
|
|
if tableend.search(line):
|
|
|
|
tablelevel = ''
|
|
|
|
|
2003-05-16 00:06:39 -03:00
|
|
|
# Style guide warnings
|
|
|
|
if 'e.g.' in line or 'i.e.' in line:
|
|
|
|
print r'Style warning, avoid use of i.e or e.g. on line %d' % (lineno,)
|
|
|
|
|
2003-08-25 01:39:55 -03:00
|
|
|
for dw in doubledwords.findall(line):
|
|
|
|
print r'Doubled word warning. "%s" on line %d' % (dw, lineno)
|
2003-05-16 00:06:39 -03:00
|
|
|
|
2003-05-10 06:04:37 -03:00
|
|
|
lastline = lineno
|
2003-05-10 00:30:13 -03:00
|
|
|
for lineno, symbol in openers:
|
2003-05-10 04:41:55 -03:00
|
|
|
print "Unmatched open delimiter '%s' on line %d" % (symbol, lineno)
|
2003-05-12 20:33:28 -03:00
|
|
|
for lineno in bracestack:
|
|
|
|
print "Unmatched { on line %d" % (lineno,)
|
2003-05-10 06:04:37 -03:00
|
|
|
print 'Done checking %d lines.' % (lastline,)
|
2003-05-10 00:30:13 -03:00
|
|
|
return 0
|
|
|
|
|
|
|
|
def main(args=None):
|
|
|
|
if args is None:
|
|
|
|
args = sys.argv[1:]
|
2003-05-14 15:15:55 -03:00
|
|
|
optitems, arglist = getopt.getopt(args, "k:mdhs:v")
|
2003-05-10 00:30:13 -03:00
|
|
|
opts = dict(optitems)
|
|
|
|
if '-h' in opts or args==[]:
|
|
|
|
print __doc__
|
|
|
|
return 0
|
|
|
|
|
|
|
|
if len(arglist) < 1:
|
|
|
|
print 'Please specify a file to be checked'
|
|
|
|
return 1
|
|
|
|
|
2003-05-16 00:06:39 -03:00
|
|
|
for i, filespec in enumerate(arglist):
|
|
|
|
if '*' in filespec or '?' in filespec:
|
|
|
|
arglist[i:i+1] = glob.glob(filespec)
|
2003-05-10 00:30:13 -03:00
|
|
|
|
2003-05-16 00:06:39 -03:00
|
|
|
morecmds = [v for k,v in optitems if k=='-k']
|
|
|
|
err = []
|
|
|
|
|
|
|
|
for filename in arglist:
|
|
|
|
print '=' * 30
|
|
|
|
print "Checking", filename
|
|
|
|
try:
|
|
|
|
f = open(filename)
|
|
|
|
except IOError:
|
|
|
|
print 'Cannot open file %s.' % arglist[0]
|
|
|
|
return 2
|
|
|
|
|
|
|
|
try:
|
|
|
|
err.append(checkit(f, opts, morecmds))
|
|
|
|
finally:
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
return max(err)
|
2003-05-10 00:30:13 -03:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
sys.exit(main())
|
|
|
|
|