418 lines
16 KiB
Python
Executable File
418 lines
16 KiB
Python
Executable File
#! /usr/bin/env python
|
|
|
|
"""Generate ESIS events based on a LaTeX source document and configuration
|
|
data.
|
|
"""
|
|
__version__ = '$Revision$'
|
|
|
|
import errno
|
|
import re
|
|
import string
|
|
import StringIO
|
|
import sys
|
|
|
|
from esistools import encode
|
|
|
|
|
|
DEBUG = 0
|
|
|
|
|
|
class Error(Exception):
|
|
pass
|
|
|
|
class LaTeXFormatError(Error):
|
|
pass
|
|
|
|
|
|
_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
|
|
_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
|
|
_begin_macro_rx = re.compile("[\\\\]([a-zA-Z]+[*]?)({|\\s*\n?)")
|
|
_comment_rx = re.compile("%+ ?(.*)\n *")
|
|
_text_rx = re.compile(r"[^]%\\{}]+")
|
|
_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
|
|
# _parameter_rx is this complicated to allow {...} inside a parameter;
|
|
# this is useful to match tabular layout specifications like {c|p{24pt}}
|
|
_parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
|
|
_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
|
|
_start_group_rx = re.compile("[ \n]*{")
|
|
_start_optional_rx = re.compile("[ \n]*[[]")
|
|
|
|
|
|
ESCAPED_CHARS = "$%#^ {}&~"
|
|
|
|
|
|
def pushing(name, point, depth):
|
|
if DEBUG:
|
|
sys.stderr.write("%s<%s> at %s\n" % (" "*depth, name, point))
|
|
|
|
def popping(name, point, depth):
|
|
if DEBUG:
|
|
sys.stderr.write("%s</%s> at %s\n" % (" "*depth, name, point))
|
|
|
|
|
|
def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0):
|
|
if DEBUG and endchar:
|
|
sys.stderr.write("subconvert(%s, ..., endchar=%s)\n"
|
|
% (`line[:20]`, `endchar`))
|
|
stack = []
|
|
while line:
|
|
if line[0] == endchar and not stack:
|
|
if DEBUG:
|
|
sys.stderr.write("subconvert() --> %s\n" % `line[1:21]`)
|
|
return line[1:]
|
|
m = _comment_rx.match(line)
|
|
if m:
|
|
text = m.group(1)
|
|
if text:
|
|
ofp.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n" % encode(text))
|
|
line = line[m.end():]
|
|
continue
|
|
m = _begin_env_rx.match(line)
|
|
if m:
|
|
# re-write to use the macro handler
|
|
line = r"\%s %s" % (m.group(1), line[m.end():])
|
|
continue
|
|
m = _end_env_rx.match(line)
|
|
if m:
|
|
# end of environment
|
|
envname = m.group(1)
|
|
if envname == "document":
|
|
# special magic
|
|
for n in stack[1:]:
|
|
if n not in autoclosing:
|
|
raise LaTeXFormatError("open element on stack: " + `n`)
|
|
# should be more careful, but this is easier to code:
|
|
stack = []
|
|
ofp.write(")document\n")
|
|
elif envname == stack[-1]:
|
|
ofp.write(")%s\n" % envname)
|
|
del stack[-1]
|
|
popping(envname, "a", len(stack) + depth)
|
|
else:
|
|
sys.stderr.write("stack: %s\n" % `stack`)
|
|
raise LaTeXFormatError(
|
|
"environment close for %s doesn't match" % envname)
|
|
line = line[m.end():]
|
|
continue
|
|
m = _begin_macro_rx.match(line)
|
|
if m:
|
|
# start of macro
|
|
macroname = m.group(1)
|
|
if macroname == "verbatim":
|
|
# really magic case!
|
|
pos = string.find(line, "\\end{verbatim}")
|
|
text = line[m.end(1):pos]
|
|
ofp.write("(verbatim\n")
|
|
ofp.write("-%s\n" % encode(text))
|
|
ofp.write(")verbatim\n")
|
|
line = line[pos + len("\\end{verbatim}"):]
|
|
continue
|
|
numbered = 1
|
|
if macroname[-1] == "*":
|
|
macroname = macroname[:-1]
|
|
numbered = 0
|
|
if macroname in autoclosing and macroname in stack:
|
|
while stack[-1] != macroname:
|
|
if stack[-1] and stack[-1] not in discards:
|
|
ofp.write(")%s\n-\\n\n" % stack[-1])
|
|
popping(stack[-1], "b", len(stack) + depth - 1)
|
|
del stack[-1]
|
|
if macroname not in discards:
|
|
ofp.write("-\\n\n)%s\n-\\n\n" % macroname)
|
|
popping(macroname, "c", len(stack) + depth - 1)
|
|
del stack[-1]
|
|
real_ofp = ofp
|
|
if macroname in discards:
|
|
ofp = StringIO.StringIO()
|
|
#
|
|
conversion = table.get(macroname, ([], 0, 0, 0, 0))
|
|
params, optional, empty, environ, nocontent = conversion
|
|
if empty:
|
|
ofp.write("e\n")
|
|
elif nocontent:
|
|
empty = 1
|
|
if not numbered:
|
|
ofp.write("Anumbered TOKEN no\n")
|
|
opened = 0
|
|
# rip off the macroname
|
|
if params:
|
|
if optional and len(params) == 1:
|
|
line = line = line[m.end():]
|
|
else:
|
|
line = line[m.end(1):]
|
|
elif empty:
|
|
line = line[m.end(1):]
|
|
else:
|
|
line = line[m.end():]
|
|
#
|
|
# Very ugly special case to deal with \item[]. The catch is that
|
|
# this needs to occur outside the for loop that handles attribute
|
|
# parsing so we can 'continue' the outer loop.
|
|
#
|
|
if optional and type(params[0]) is type(()):
|
|
# the attribute name isn't used in this special case
|
|
pushing(macroname, "a", depth + len(stack))
|
|
stack.append(macroname)
|
|
ofp.write("(%s\n" % macroname)
|
|
m = _start_optional_rx.match(line)
|
|
if m:
|
|
line = line[m.end():]
|
|
line = subconvert(line, ofp, table, discards,
|
|
autoclosing, endchar="]",
|
|
depth=depth + len(stack))
|
|
line = "}" + line
|
|
continue
|
|
# handle attribute mappings here:
|
|
for attrname in params:
|
|
if optional:
|
|
optional = 0
|
|
if type(attrname) is type(""):
|
|
m = _optional_rx.match(line)
|
|
if m:
|
|
line = line[m.end():]
|
|
ofp.write("A%s TOKEN %s\n"
|
|
% (attrname, encode(m.group(1))))
|
|
elif type(attrname) is type(()):
|
|
# This is a sub-element; but don't place the
|
|
# element we found on the stack (\section-like)
|
|
pushing(macroname, "b", len(stack) + depth)
|
|
stack.append(macroname)
|
|
ofp.write("(%s\n" % macroname)
|
|
macroname = attrname[0]
|
|
m = _start_group_rx.match(line)
|
|
if m:
|
|
line = line[m.end():]
|
|
elif type(attrname) is type([]):
|
|
# A normal subelement.
|
|
attrname = attrname[0]
|
|
if not opened:
|
|
opened = 1
|
|
ofp.write("(%s\n" % macroname)
|
|
pushing(macroname, "c", len(stack) + depth)
|
|
ofp.write("(%s\n" % attrname)
|
|
pushing(attrname, "sub-elem", len(stack) + depth + 1)
|
|
line = subconvert(skip_white(line)[1:], ofp, table,
|
|
discards, autoclosing, endchar="}",
|
|
depth=depth + len(stack) + 2)
|
|
popping(attrname, "sub-elem", len(stack) + depth + 1)
|
|
ofp.write(")%s\n" % attrname)
|
|
else:
|
|
m = _parameter_rx.match(line)
|
|
if not m:
|
|
raise LaTeXFormatError(
|
|
"could not extract parameter %s for %s: %s"
|
|
% (attrname, macroname, `line[:100]`))
|
|
value = m.group(1)
|
|
if _token_rx.match(value):
|
|
dtype = "TOKEN"
|
|
else:
|
|
dtype = "CDATA"
|
|
ofp.write("A%s %s %s\n"
|
|
% (attrname, dtype, encode(value)))
|
|
line = line[m.end():]
|
|
if params and type(params[-1]) is type('') \
|
|
and (not empty) and not environ:
|
|
# attempt to strip off next '{'
|
|
m = _start_group_rx.match(line)
|
|
if not m:
|
|
raise LaTeXFormatError(
|
|
"non-empty element '%s' has no content: %s"
|
|
% (macroname, line[:12]))
|
|
line = line[m.end():]
|
|
if not opened:
|
|
ofp.write("(%s\n" % macroname)
|
|
pushing(macroname, "d", len(stack) + depth)
|
|
if empty:
|
|
line = "}" + line
|
|
stack.append(macroname)
|
|
ofp = real_ofp
|
|
continue
|
|
if line[0] == endchar and not stack:
|
|
if DEBUG:
|
|
sys.stderr.write("subconvert() --> %s\n" % `line[1:21]`)
|
|
return line[1:]
|
|
if line[0] == "}":
|
|
# end of macro
|
|
macroname = stack[-1]
|
|
conversion = table.get(macroname)
|
|
if macroname \
|
|
and macroname not in discards \
|
|
and type(conversion) is not type(""):
|
|
# otherwise, it was just a bare group
|
|
ofp.write(")%s\n" % stack[-1])
|
|
popping(macroname, "d", len(stack) + depth - 1)
|
|
del stack[-1]
|
|
line = line[1:]
|
|
continue
|
|
if line[0] == "{":
|
|
pushing("", "e", len(stack) + depth)
|
|
stack.append("")
|
|
line = line[1:]
|
|
continue
|
|
if line[0] == "\\" and line[1] in ESCAPED_CHARS:
|
|
ofp.write("-%s\n" % encode(line[1]))
|
|
line = line[2:]
|
|
continue
|
|
if line[:2] == r"\\":
|
|
ofp.write("(BREAK\n)BREAK\n")
|
|
line = line[2:]
|
|
continue
|
|
m = _text_rx.match(line)
|
|
if m:
|
|
text = encode(m.group())
|
|
ofp.write("-%s\n" % text)
|
|
line = line[m.end():]
|
|
continue
|
|
# special case because of \item[]
|
|
if line[0] == "]":
|
|
ofp.write("-]\n")
|
|
line = line[1:]
|
|
continue
|
|
# avoid infinite loops
|
|
extra = ""
|
|
if len(line) > 100:
|
|
extra = "..."
|
|
raise LaTeXFormatError("could not identify markup: %s%s"
|
|
% (`line[:100]`, extra))
|
|
while stack and stack[-1] in autoclosing:
|
|
ofp.write("-\\n\n")
|
|
ofp.write(")%s\n" % stack[-1])
|
|
popping(stack[-1], "e", len(stack) + depth - 1)
|
|
del stack[-1]
|
|
if stack:
|
|
raise LaTeXFormatError("elements remain on stack: "
|
|
+ string.join(stack))
|
|
# otherwise we just ran out of input here...
|
|
|
|
|
|
def convert(ifp, ofp, table={}, discards=(), autoclosing=()):
|
|
lines = string.split(ifp.read(), "\n")
|
|
for i in range(len(lines)):
|
|
lines[i] = string.rstrip(lines[i])
|
|
data = string.join(lines, "\n")
|
|
try:
|
|
subconvert(data, ofp, table, discards, autoclosing)
|
|
except IOError, (err, msg):
|
|
if err != errno.EPIPE:
|
|
raise
|
|
|
|
|
|
def skip_white(line):
|
|
while line and line[0] in " %\n\t":
|
|
line = string.lstrip(line[1:])
|
|
return line
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) == 2:
|
|
ifp = open(sys.argv[1])
|
|
ofp = sys.stdout
|
|
elif len(sys.argv) == 3:
|
|
ifp = open(sys.argv[1])
|
|
ofp = open(sys.argv[2], "w")
|
|
else:
|
|
usage()
|
|
sys.exit(2)
|
|
convert(ifp, ofp, {
|
|
# entries have the form:
|
|
# name: ([attribute names], is1stOptional, isEmpty, isEnv, nocontent)
|
|
# attribute names can be:
|
|
# "string" -- normal attribute
|
|
# ("string",) -- sub-element with content of macro; like for \section
|
|
# ["string"] -- sub-element
|
|
"appendix": ([], 0, 1, 0, 0),
|
|
"bifuncindex": (["name"], 0, 1, 0, 0),
|
|
"catcode": ([], 0, 1, 0, 0),
|
|
"cfuncdesc": (["type", "name", ("args",)], 0, 0, 1, 0),
|
|
"chapter": ([("title",)], 0, 0, 0, 0),
|
|
"chapter*": ([("title",)], 0, 0, 0, 0),
|
|
"classdesc": (["name", ("constructor-args",)], 0, 0, 1, 0),
|
|
"ctypedesc": (["name"], 0, 0, 1, 0),
|
|
"cvardesc": (["type", "name"], 0, 0, 1, 0),
|
|
"datadesc": (["name"], 0, 0, 1, 0),
|
|
"declaremodule": (["id", "type", "name"], 1, 1, 0, 0),
|
|
"deprecated": (["release"], 0, 0, 0, 0),
|
|
"documentclass": (["classname"], 0, 1, 0, 0),
|
|
"excdesc": (["name"], 0, 0, 1, 0),
|
|
"funcdesc": (["name", ("args",)], 0, 0, 1, 0),
|
|
"funcdescni": (["name", ("args",)], 0, 0, 1, 0),
|
|
"geq": ([], 0, 1, 0, 0),
|
|
"hline": ([], 0, 1, 0, 0),
|
|
"indexii": (["ie1", "ie2"], 0, 1, 0, 0),
|
|
"indexiii": (["ie1", "ie2", "ie3"], 0, 1, 0, 0),
|
|
"indexiv": (["ie1", "ie2", "ie3", "ie4"], 0, 1, 0, 0),
|
|
"indexname": ([], 0, 0, 0, 0),
|
|
"input": (["source"], 0, 1, 0, 0),
|
|
"item": ([("leader",)], 1, 0, 0, 0),
|
|
"label": (["id"], 0, 1, 0, 0),
|
|
"labelwidth": ([], 0, 1, 0, 0),
|
|
"LaTeX": ([], 0, 1, 0, 0),
|
|
"leftmargin": ([], 0, 1, 0, 0),
|
|
"leq": ([], 0, 1, 0, 0),
|
|
"lineii": ([["entry"], ["entry"]], 0, 0, 0, 1),
|
|
"lineiii": ([["entry"], ["entry"], ["entry"]], 0, 0, 0, 1),
|
|
"lineiv": ([["entry"], ["entry"], ["entry"], ["entry"]], 0, 0, 0, 1),
|
|
"localmoduletable": ([], 0, 1, 0, 0),
|
|
"makeindex": ([], 0, 1, 0, 0),
|
|
"makemodindex": ([], 0, 1, 0, 0),
|
|
"maketitle": ([], 0, 1, 0, 0),
|
|
"manpage": (["name", "section"], 0, 1, 0, 0),
|
|
"memberdesc": (["class", "name"], 1, 0, 1, 0),
|
|
"methoddesc": (["class", "name", ("args",)], 1, 0, 1, 0),
|
|
"methoddescni": (["class", "name", ("args",)], 1, 0, 1, 0),
|
|
"moduleauthor": (["name", "email"], 0, 1, 0, 0),
|
|
"opcodedesc": (["name", "var"], 0, 0, 1, 0),
|
|
"par": ([], 0, 1, 0, 0),
|
|
"paragraph": ([("title",)], 0, 0, 0, 0),
|
|
"renewcommand": (["macro"], 0, 0, 0, 0),
|
|
"rfc": (["number"], 0, 1, 0, 0),
|
|
"section": ([("title",)], 0, 0, 0, 0),
|
|
"sectionauthor": (["name", "email"], 0, 1, 0, 0),
|
|
"seemodule": (["ref", "name"], 1, 0, 0, 0),
|
|
"stindex": (["type"], 0, 1, 0, 0),
|
|
"subparagraph": ([("title",)], 0, 0, 0, 0),
|
|
"subsection": ([("title",)], 0, 0, 0, 0),
|
|
"subsubsection": ([("title",)], 0, 0, 0, 0),
|
|
"list": (["bullet", "init"], 0, 0, 1, 0),
|
|
"tableii": (["colspec", "style",
|
|
["entry"], ["entry"]], 0, 0, 1, 0),
|
|
"tableiii": (["colspec", "style",
|
|
["entry"], ["entry"], ["entry"]], 0, 0, 1, 0),
|
|
"tableiv": (["colspec", "style",
|
|
["entry"], ["entry"], ["entry"], ["entry"]], 0, 0, 1, 0),
|
|
"version": ([], 0, 1, 0, 0),
|
|
"versionadded": (["version"], 0, 1, 0, 0),
|
|
"versionchanged": (["version"], 0, 1, 0, 0),
|
|
"withsubitem": (["text"], 0, 0, 0, 0),
|
|
#
|
|
"ABC": ([], 0, 1, 0, 0),
|
|
"ASCII": ([], 0, 1, 0, 0),
|
|
"C": ([], 0, 1, 0, 0),
|
|
"Cpp": ([], 0, 1, 0, 0),
|
|
"EOF": ([], 0, 1, 0, 0),
|
|
"e": ([], 0, 1, 0, 0),
|
|
"ldots": ([], 0, 1, 0, 0),
|
|
"NULL": ([], 0, 1, 0, 0),
|
|
"POSIX": ([], 0, 1, 0, 0),
|
|
"UNIX": ([], 0, 1, 0, 0),
|
|
#
|
|
# Things that will actually be going away!
|
|
#
|
|
"fi": ([], 0, 1, 0, 0),
|
|
"ifhtml": ([], 0, 1, 0, 0),
|
|
"makeindex": ([], 0, 1, 0, 0),
|
|
"makemodindex": ([], 0, 1, 0, 0),
|
|
"maketitle": ([], 0, 1, 0, 0),
|
|
"noindent": ([], 0, 1, 0, 0),
|
|
"protect": ([], 0, 1, 0, 0),
|
|
"tableofcontents": ([], 0, 1, 0, 0),
|
|
},
|
|
discards=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle",
|
|
"noindent", "tableofcontents"],
|
|
autoclosing=["chapter", "section", "subsection", "subsubsection",
|
|
"paragraph", "subparagraph", ])
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|