1998-11-23 12:59:39 -04:00
|
|
|
|
#! /usr/bin/env python
|
|
|
|
|
|
1999-05-19 14:37:37 -03:00
|
|
|
|
"""Generate ESIS events based on a LaTeX source document and
|
|
|
|
|
configuration data.
|
|
|
|
|
|
|
|
|
|
The conversion is not strong enough to work with arbitrary LaTeX
|
|
|
|
|
documents; it has only been designed to work with the highly stylized
|
|
|
|
|
markup used in the standard Python documentation. A lot of
|
|
|
|
|
information about specific markup is encoded in the control table
|
|
|
|
|
passed to the convert() function; changing this table can allow this
|
|
|
|
|
tool to support additional LaTeX markups.
|
|
|
|
|
|
|
|
|
|
The format of the table is largely undocumented; see the commented
|
|
|
|
|
headers where the table is specified in main(). There is no provision
|
|
|
|
|
to load an alternate table from an external file.
|
1998-11-23 12:59:39 -04:00
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import errno
|
1999-07-29 19:22:13 -03:00
|
|
|
|
import getopt
|
|
|
|
|
import os
|
1998-11-23 12:59:39 -04:00
|
|
|
|
import re
|
|
|
|
|
import sys
|
2001-11-30 15:30:03 -04:00
|
|
|
|
import xml.sax
|
2000-11-22 13:56:43 -04:00
|
|
|
|
import xml.sax.saxutils
|
1998-11-23 12:59:39 -04:00
|
|
|
|
|
2001-03-23 12:53:34 -04:00
|
|
|
|
from esistools import encode
|
|
|
|
|
|
|
|
|
|
|
1999-01-14 13:38:12 -04:00
|
|
|
|
DEBUG = 0
|
|
|
|
|
|
|
|
|
|
|
1999-07-29 19:22:13 -03:00
|
|
|
|
class LaTeXFormatError(Exception):
|
1998-11-23 12:59:39 -04:00
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
1999-07-29 19:22:13 -03:00
|
|
|
|
class LaTeXStackError(LaTeXFormatError):
|
|
|
|
|
def __init__(self, found, stack):
|
|
|
|
|
msg = "environment close for %s doesn't match;\n stack = %s" \
|
|
|
|
|
% (found, stack)
|
|
|
|
|
self.found = found
|
|
|
|
|
self.stack = stack[:]
|
|
|
|
|
LaTeXFormatError.__init__(self, msg)
|
1998-11-23 12:59:39 -04:00
|
|
|
|
|
1999-07-29 19:22:13 -03:00
|
|
|
|
|
1998-11-23 12:59:39 -04:00
|
|
|
|
_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
|
|
|
|
|
_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
|
1999-05-19 14:37:37 -03:00
|
|
|
|
_begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)")
|
1999-05-07 16:59:02 -03:00
|
|
|
|
_comment_rx = re.compile("%+ ?(.*)\n[ \t]*")
|
2000-11-22 13:56:43 -04:00
|
|
|
|
_text_rx = re.compile(r"[^]~%\\{}]+")
|
2001-07-06 18:01:19 -03:00
|
|
|
|
_optional_rx = re.compile(r"\s*[[]([^]]*)[]]", re.MULTILINE)
|
1998-12-01 15:04:12 -04:00
|
|
|
|
# _parameter_rx is this complicated to allow {...} inside a parameter;
|
|
|
|
|
# this is useful to match tabular layout specifications like {c|p{24pt}}
|
|
|
|
|
_parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
|
1998-11-23 12:59:39 -04:00
|
|
|
|
_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
|
|
|
|
|
_start_group_rx = re.compile("[ \n]*{")
|
|
|
|
|
_start_optional_rx = re.compile("[ \n]*[[]")
|
|
|
|
|
|
|
|
|
|
|
1998-11-30 10:45:24 -04:00
|
|
|
|
ESCAPED_CHARS = "$%#^ {}&~"
|
1998-11-23 12:59:39 -04:00
|
|
|
|
|
|
|
|
|
|
1999-05-07 18:12:21 -03:00
|
|
|
|
def dbgmsg(msg):
|
1999-01-14 13:38:12 -04:00
|
|
|
|
if DEBUG:
|
1999-05-07 18:12:21 -03:00
|
|
|
|
sys.stderr.write(msg + "\n")
|
|
|
|
|
|
|
|
|
|
def pushing(name, point, depth):
|
1999-07-29 19:22:13 -03:00
|
|
|
|
dbgmsg("pushing <%s> at %s" % (name, point))
|
1999-01-14 13:38:12 -04:00
|
|
|
|
|
|
|
|
|
def popping(name, point, depth):
|
1999-07-29 19:22:13 -03:00
|
|
|
|
dbgmsg("popping </%s> at %s" % (name, point))
|
|
|
|
|
|
|
|
|
|
|
2002-10-16 13:00:42 -03:00
|
|
|
|
class _Stack(list):
|
1999-07-29 19:22:13 -03:00
|
|
|
|
def append(self, entry):
|
2002-10-16 13:00:42 -03:00
|
|
|
|
if not isinstance(entry, str):
|
2004-02-12 13:35:32 -04:00
|
|
|
|
raise LaTeXFormatError("cannot push non-string on stack: %r"
|
|
|
|
|
% (entry, ))
|
2001-03-23 12:53:34 -04:00
|
|
|
|
#dbgmsg("%s<%s>" % (" "*len(self.data), entry))
|
2002-10-16 13:00:42 -03:00
|
|
|
|
list.append(self, entry)
|
1999-01-14 13:38:12 -04:00
|
|
|
|
|
1999-07-29 19:22:13 -03:00
|
|
|
|
def pop(self, index=-1):
|
2002-10-16 13:00:42 -03:00
|
|
|
|
entry = self[index]
|
|
|
|
|
del self[index]
|
|
|
|
|
#dbgmsg("%s</%s>" % (" " * len(self), entry))
|
1999-07-29 19:22:13 -03:00
|
|
|
|
|
|
|
|
|
def __delitem__(self, index):
|
2002-10-16 13:00:42 -03:00
|
|
|
|
entry = self[index]
|
|
|
|
|
list.__delitem__(self, index)
|
|
|
|
|
#dbgmsg("%s</%s>" % (" " * len(self), entry))
|
1999-07-29 19:22:13 -03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def new_stack():
|
|
|
|
|
if DEBUG:
|
|
|
|
|
return _Stack()
|
2002-10-16 13:00:42 -03:00
|
|
|
|
else:
|
|
|
|
|
return []
|
1999-01-14 13:38:12 -04:00
|
|
|
|
|
1999-07-29 19:22:13 -03:00
|
|
|
|
|
1999-08-02 11:35:25 -03:00
|
|
|
|
class Conversion:
|
|
|
|
|
def __init__(self, ifp, ofp, table):
|
|
|
|
|
self.write = ofp.write
|
|
|
|
|
self.ofp = ofp
|
1999-05-07 16:59:02 -03:00
|
|
|
|
self.table = table
|
2001-11-19 01:27:40 -04:00
|
|
|
|
L = [s.rstrip() for s in ifp.readlines()]
|
|
|
|
|
L.append("")
|
2002-10-16 13:00:42 -03:00
|
|
|
|
self.line = "\n".join(L)
|
1999-05-07 16:59:02 -03:00
|
|
|
|
self.preamble = 1
|
|
|
|
|
|
1999-07-29 19:22:13 -03:00
|
|
|
|
def convert(self):
|
|
|
|
|
self.subconvert()
|
|
|
|
|
|
|
|
|
|
def subconvert(self, endchar=None, depth=0):
|
|
|
|
|
#
|
|
|
|
|
# Parses content, including sub-structures, until the character
|
|
|
|
|
# 'endchar' is found (with no open structures), or until the end
|
|
|
|
|
# of the input data is endchar is None.
|
|
|
|
|
#
|
|
|
|
|
stack = new_stack()
|
|
|
|
|
line = self.line
|
|
|
|
|
while line:
|
|
|
|
|
if line[0] == endchar and not stack:
|
|
|
|
|
self.line = line
|
|
|
|
|
return line
|
|
|
|
|
m = _comment_rx.match(line)
|
|
|
|
|
if m:
|
|
|
|
|
text = m.group(1)
|
|
|
|
|
if text:
|
|
|
|
|
self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
|
|
|
|
|
% encode(text))
|
|
|
|
|
line = line[m.end():]
|
|
|
|
|
continue
|
|
|
|
|
m = _begin_env_rx.match(line)
|
|
|
|
|
if m:
|
|
|
|
|
name = m.group(1)
|
|
|
|
|
entry = self.get_env_entry(name)
|
|
|
|
|
# re-write to use the macro handler
|
|
|
|
|
line = r"\%s %s" % (name, line[m.end():])
|
|
|
|
|
continue
|
|
|
|
|
m = _end_env_rx.match(line)
|
|
|
|
|
if m:
|
|
|
|
|
# end of environment
|
|
|
|
|
envname = m.group(1)
|
|
|
|
|
entry = self.get_entry(envname)
|
|
|
|
|
while stack and envname != stack[-1] \
|
|
|
|
|
and stack[-1] in entry.endcloses:
|
|
|
|
|
self.write(")%s\n" % stack.pop())
|
|
|
|
|
if stack and envname == stack[-1]:
|
|
|
|
|
self.write(")%s\n" % entry.outputname)
|
|
|
|
|
del stack[-1]
|
|
|
|
|
else:
|
|
|
|
|
raise LaTeXStackError(envname, stack)
|
|
|
|
|
line = line[m.end():]
|
|
|
|
|
continue
|
|
|
|
|
m = _begin_macro_rx.match(line)
|
|
|
|
|
if m:
|
|
|
|
|
# start of macro
|
|
|
|
|
macroname = m.group(1)
|
2000-11-22 13:56:43 -04:00
|
|
|
|
if macroname == "c":
|
|
|
|
|
# Ugh! This is a combining character...
|
|
|
|
|
endpos = m.end()
|
|
|
|
|
self.combining_char("c", line[endpos])
|
|
|
|
|
line = line[endpos + 1:]
|
|
|
|
|
continue
|
1999-07-29 19:22:13 -03:00
|
|
|
|
entry = self.get_entry(macroname)
|
|
|
|
|
if entry.verbatim:
|
|
|
|
|
# magic case!
|
2001-09-28 13:26:13 -03:00
|
|
|
|
pos = line.find("\\end{%s}" % macroname)
|
1999-07-29 19:22:13 -03:00
|
|
|
|
text = line[m.end(1):pos]
|
|
|
|
|
stack.append(entry.name)
|
|
|
|
|
self.write("(%s\n" % entry.outputname)
|
|
|
|
|
self.write("-%s\n" % encode(text))
|
|
|
|
|
self.write(")%s\n" % entry.outputname)
|
|
|
|
|
stack.pop()
|
|
|
|
|
line = line[pos + len("\\end{%s}" % macroname):]
|
|
|
|
|
continue
|
|
|
|
|
while stack and stack[-1] in entry.closes:
|
|
|
|
|
top = stack.pop()
|
|
|
|
|
topentry = self.get_entry(top)
|
|
|
|
|
if topentry.outputname:
|
|
|
|
|
self.write(")%s\n-\\n\n" % topentry.outputname)
|
|
|
|
|
#
|
2001-09-25 17:57:36 -03:00
|
|
|
|
if entry.outputname and entry.empty:
|
|
|
|
|
self.write("e\n")
|
1999-07-29 19:22:13 -03:00
|
|
|
|
#
|
2001-09-25 17:57:36 -03:00
|
|
|
|
params, optional, empty = self.start_macro(macroname)
|
1999-07-29 19:22:13 -03:00
|
|
|
|
# rip off the macroname
|
|
|
|
|
if params:
|
|
|
|
|
line = line[m.end(1):]
|
|
|
|
|
elif empty:
|
|
|
|
|
line = line[m.end(1):]
|
|
|
|
|
else:
|
|
|
|
|
line = line[m.end():]
|
|
|
|
|
opened = 0
|
|
|
|
|
implied_content = 0
|
|
|
|
|
|
|
|
|
|
# handle attribute mappings here:
|
|
|
|
|
for pentry in params:
|
|
|
|
|
if pentry.type == "attribute":
|
|
|
|
|
if pentry.optional:
|
|
|
|
|
m = _optional_rx.match(line)
|
1999-08-02 11:35:25 -03:00
|
|
|
|
if m and entry.outputname:
|
1999-07-29 19:22:13 -03:00
|
|
|
|
line = line[m.end():]
|
|
|
|
|
self.dump_attr(pentry, m.group(1))
|
1999-08-02 11:35:25 -03:00
|
|
|
|
elif pentry.text and entry.outputname:
|
1999-07-29 19:22:13 -03:00
|
|
|
|
# value supplied by conversion spec:
|
|
|
|
|
self.dump_attr(pentry, pentry.text)
|
|
|
|
|
else:
|
|
|
|
|
m = _parameter_rx.match(line)
|
|
|
|
|
if not m:
|
|
|
|
|
raise LaTeXFormatError(
|
2004-02-12 13:35:32 -04:00
|
|
|
|
"could not extract parameter %s for %s: %r"
|
|
|
|
|
% (pentry.name, macroname, line[:100]))
|
1999-08-02 11:35:25 -03:00
|
|
|
|
if entry.outputname:
|
|
|
|
|
self.dump_attr(pentry, m.group(1))
|
1999-07-29 19:22:13 -03:00
|
|
|
|
line = line[m.end():]
|
|
|
|
|
elif pentry.type == "child":
|
|
|
|
|
if pentry.optional:
|
|
|
|
|
m = _optional_rx.match(line)
|
|
|
|
|
if m:
|
|
|
|
|
line = line[m.end():]
|
|
|
|
|
if entry.outputname and not opened:
|
|
|
|
|
opened = 1
|
|
|
|
|
self.write("(%s\n" % entry.outputname)
|
|
|
|
|
stack.append(macroname)
|
|
|
|
|
stack.append(pentry.name)
|
|
|
|
|
self.write("(%s\n" % pentry.name)
|
|
|
|
|
self.write("-%s\n" % encode(m.group(1)))
|
|
|
|
|
self.write(")%s\n" % pentry.name)
|
|
|
|
|
stack.pop()
|
|
|
|
|
else:
|
|
|
|
|
if entry.outputname and not opened:
|
|
|
|
|
opened = 1
|
|
|
|
|
self.write("(%s\n" % entry.outputname)
|
|
|
|
|
stack.append(entry.name)
|
|
|
|
|
self.write("(%s\n" % pentry.name)
|
|
|
|
|
stack.append(pentry.name)
|
|
|
|
|
self.line = skip_white(line)[1:]
|
|
|
|
|
line = self.subconvert(
|
|
|
|
|
"}", len(stack) + depth + 1)[1:]
|
|
|
|
|
self.write(")%s\n" % stack.pop())
|
|
|
|
|
elif pentry.type == "content":
|
|
|
|
|
if pentry.implied:
|
|
|
|
|
implied_content = 1
|
|
|
|
|
else:
|
|
|
|
|
if entry.outputname and not opened:
|
|
|
|
|
opened = 1
|
|
|
|
|
self.write("(%s\n" % entry.outputname)
|
|
|
|
|
stack.append(entry.name)
|
|
|
|
|
line = skip_white(line)
|
|
|
|
|
if line[0] != "{":
|
|
|
|
|
raise LaTeXFormatError(
|
|
|
|
|
"missing content for " + macroname)
|
|
|
|
|
self.line = line[1:]
|
|
|
|
|
line = self.subconvert("}", len(stack) + depth + 1)
|
|
|
|
|
if line and line[0] == "}":
|
|
|
|
|
line = line[1:]
|
1999-08-02 11:35:25 -03:00
|
|
|
|
elif pentry.type == "text" and pentry.text:
|
|
|
|
|
if entry.outputname and not opened:
|
|
|
|
|
opened = 1
|
|
|
|
|
stack.append(entry.name)
|
|
|
|
|
self.write("(%s\n" % entry.outputname)
|
2004-02-12 13:35:32 -04:00
|
|
|
|
#dbgmsg("--- text: %r" % pentry.text)
|
1999-08-02 11:35:25 -03:00
|
|
|
|
self.write("-%s\n" % encode(pentry.text))
|
1999-08-26 14:54:16 -03:00
|
|
|
|
elif pentry.type == "entityref":
|
|
|
|
|
self.write("&%s\n" % pentry.name)
|
1999-07-29 19:22:13 -03:00
|
|
|
|
if entry.outputname:
|
|
|
|
|
if not opened:
|
|
|
|
|
self.write("(%s\n" % entry.outputname)
|
|
|
|
|
stack.append(entry.name)
|
|
|
|
|
if not implied_content:
|
|
|
|
|
self.write(")%s\n" % entry.outputname)
|
|
|
|
|
stack.pop()
|
|
|
|
|
continue
|
|
|
|
|
if line[0] == endchar and not stack:
|
|
|
|
|
self.line = line[1:]
|
|
|
|
|
return self.line
|
|
|
|
|
if line[0] == "}":
|
|
|
|
|
# end of macro or group
|
|
|
|
|
macroname = stack[-1]
|
|
|
|
|
if macroname:
|
2001-03-23 12:53:34 -04:00
|
|
|
|
conversion = self.table[macroname]
|
1999-07-29 19:22:13 -03:00
|
|
|
|
if conversion.outputname:
|
|
|
|
|
# otherwise, it was just a bare group
|
|
|
|
|
self.write(")%s\n" % conversion.outputname)
|
|
|
|
|
del stack[-1]
|
|
|
|
|
line = line[1:]
|
|
|
|
|
continue
|
2000-11-22 13:56:43 -04:00
|
|
|
|
if line[0] == "~":
|
|
|
|
|
# don't worry about the "tie" aspect of this command
|
|
|
|
|
line = line[1:]
|
|
|
|
|
self.write("- \n")
|
|
|
|
|
continue
|
1999-07-29 19:22:13 -03:00
|
|
|
|
if line[0] == "{":
|
|
|
|
|
stack.append("")
|
|
|
|
|
line = line[1:]
|
|
|
|
|
continue
|
|
|
|
|
if line[0] == "\\" and line[1] in ESCAPED_CHARS:
|
|
|
|
|
self.write("-%s\n" % encode(line[1]))
|
|
|
|
|
line = line[2:]
|
|
|
|
|
continue
|
|
|
|
|
if line[:2] == r"\\":
|
|
|
|
|
self.write("(BREAK\n)BREAK\n")
|
|
|
|
|
line = line[2:]
|
|
|
|
|
continue
|
2000-11-22 13:56:43 -04:00
|
|
|
|
if line[:2] == r"\_":
|
|
|
|
|
line = "_" + line[2:]
|
|
|
|
|
continue
|
|
|
|
|
if line[:2] in (r"\'", r'\"'):
|
|
|
|
|
# combining characters...
|
|
|
|
|
self.combining_char(line[1], line[2])
|
|
|
|
|
line = line[3:]
|
|
|
|
|
continue
|
1999-07-29 19:22:13 -03:00
|
|
|
|
m = _text_rx.match(line)
|
|
|
|
|
if m:
|
|
|
|
|
text = encode(m.group())
|
|
|
|
|
self.write("-%s\n" % text)
|
|
|
|
|
line = line[m.end():]
|
|
|
|
|
continue
|
|
|
|
|
# special case because of \item[]
|
|
|
|
|
# XXX can we axe this???
|
|
|
|
|
if line[0] == "]":
|
|
|
|
|
self.write("-]\n")
|
|
|
|
|
line = line[1:]
|
|
|
|
|
continue
|
|
|
|
|
# avoid infinite loops
|
|
|
|
|
extra = ""
|
|
|
|
|
if len(line) > 100:
|
|
|
|
|
extra = "..."
|
2004-02-12 13:35:32 -04:00
|
|
|
|
raise LaTeXFormatError("could not identify markup: %r%s"
|
|
|
|
|
% (line[:100], extra))
|
1999-07-29 19:22:13 -03:00
|
|
|
|
while stack:
|
|
|
|
|
entry = self.get_entry(stack[-1])
|
|
|
|
|
if entry.closes:
|
|
|
|
|
self.write(")%s\n-%s\n" % (entry.outputname, encode("\n")))
|
|
|
|
|
del stack[-1]
|
|
|
|
|
else:
|
|
|
|
|
break
|
|
|
|
|
if stack:
|
|
|
|
|
raise LaTeXFormatError("elements remain on stack: "
|
2002-10-16 13:00:42 -03:00
|
|
|
|
+ ", ".join(stack))
|
1999-07-29 19:22:13 -03:00
|
|
|
|
# otherwise we just ran out of input here...
|
|
|
|
|
|
2000-11-22 13:56:43 -04:00
|
|
|
|
# This is a really limited table of combinations, but it will have
|
|
|
|
|
# to do for now.
|
|
|
|
|
_combinations = {
|
|
|
|
|
("c", "c"): 0x00E7,
|
|
|
|
|
("'", "e"): 0x00E9,
|
|
|
|
|
('"', "o"): 0x00F6,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def combining_char(self, prefix, char):
|
|
|
|
|
ordinal = self._combinations[(prefix, char)]
|
|
|
|
|
self.write("-\\%%%d;\n" % ordinal)
|
|
|
|
|
|
1999-07-29 19:22:13 -03:00
|
|
|
|
def start_macro(self, name):
|
|
|
|
|
conversion = self.get_entry(name)
|
|
|
|
|
parameters = conversion.parameters
|
|
|
|
|
optional = parameters and parameters[0].optional
|
2001-09-25 17:57:36 -03:00
|
|
|
|
return parameters, optional, conversion.empty
|
1999-07-29 19:22:13 -03:00
|
|
|
|
|
|
|
|
|
def get_entry(self, name):
|
|
|
|
|
entry = self.table.get(name)
|
|
|
|
|
if entry is None:
|
2004-02-12 13:35:32 -04:00
|
|
|
|
dbgmsg("get_entry(%r) failing; building default entry!" % (name, ))
|
1999-07-29 19:22:13 -03:00
|
|
|
|
# not defined; build a default entry:
|
|
|
|
|
entry = TableEntry(name)
|
|
|
|
|
entry.has_content = 1
|
|
|
|
|
entry.parameters.append(Parameter("content"))
|
|
|
|
|
self.table[name] = entry
|
|
|
|
|
return entry
|
|
|
|
|
|
|
|
|
|
def get_env_entry(self, name):
|
|
|
|
|
entry = self.table.get(name)
|
|
|
|
|
if entry is None:
|
|
|
|
|
# not defined; build a default entry:
|
|
|
|
|
entry = TableEntry(name, 1)
|
|
|
|
|
entry.has_content = 1
|
|
|
|
|
entry.parameters.append(Parameter("content"))
|
|
|
|
|
entry.parameters[-1].implied = 1
|
|
|
|
|
self.table[name] = entry
|
|
|
|
|
elif not entry.environment:
|
|
|
|
|
raise LaTeXFormatError(
|
|
|
|
|
name + " is defined as a macro; expected environment")
|
|
|
|
|
return entry
|
|
|
|
|
|
|
|
|
|
def dump_attr(self, pentry, value):
|
|
|
|
|
if not (pentry.name and value):
|
|
|
|
|
return
|
|
|
|
|
if _token_rx.match(value):
|
|
|
|
|
dtype = "TOKEN"
|
|
|
|
|
else:
|
|
|
|
|
dtype = "CDATA"
|
|
|
|
|
self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value)))
|
|
|
|
|
|
|
|
|
|
|
1999-07-29 19:42:27 -03:00
|
|
|
|
def convert(ifp, ofp, table):
|
|
|
|
|
c = Conversion(ifp, ofp, table)
|
1999-07-29 19:22:13 -03:00
|
|
|
|
try:
|
|
|
|
|
c.convert()
|
|
|
|
|
except IOError, (err, msg):
|
|
|
|
|
if err != errno.EPIPE:
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
|
1999-01-14 13:38:12 -04:00
|
|
|
|
def skip_white(line):
|
1999-07-29 19:22:13 -03:00
|
|
|
|
while line and line[0] in " %\n\t\r":
|
2001-09-28 13:26:13 -03:00
|
|
|
|
line = line[1:].lstrip()
|
1999-01-14 13:38:12 -04:00
|
|
|
|
return line
|
|
|
|
|
|
|
|
|
|
|
1999-07-29 19:22:13 -03:00
|
|
|
|
|
|
|
|
|
class TableEntry:
|
|
|
|
|
def __init__(self, name, environment=0):
|
|
|
|
|
self.name = name
|
|
|
|
|
self.outputname = name
|
|
|
|
|
self.environment = environment
|
|
|
|
|
self.empty = not environment
|
|
|
|
|
self.has_content = 0
|
|
|
|
|
self.verbatim = 0
|
|
|
|
|
self.auto_close = 0
|
|
|
|
|
self.parameters = []
|
|
|
|
|
self.closes = []
|
|
|
|
|
self.endcloses = []
|
|
|
|
|
|
|
|
|
|
class Parameter:
|
|
|
|
|
def __init__(self, type, name=None, optional=0):
|
|
|
|
|
self.type = type
|
|
|
|
|
self.name = name
|
|
|
|
|
self.optional = optional
|
|
|
|
|
self.text = ''
|
|
|
|
|
self.implied = 0
|
|
|
|
|
|
|
|
|
|
|
2001-11-30 15:30:03 -04:00
|
|
|
|
class TableHandler(xml.sax.handler.ContentHandler):
|
|
|
|
|
def __init__(self):
|
|
|
|
|
self.__table = {}
|
1999-07-29 19:22:13 -03:00
|
|
|
|
self.__buffer = ''
|
2001-11-30 15:30:03 -04:00
|
|
|
|
self.__methods = {}
|
1999-07-29 19:22:13 -03:00
|
|
|
|
|
|
|
|
|
def get_table(self):
|
|
|
|
|
for entry in self.__table.values():
|
|
|
|
|
if entry.environment and not entry.has_content:
|
|
|
|
|
p = Parameter("content")
|
|
|
|
|
p.implied = 1
|
|
|
|
|
entry.parameters.append(p)
|
|
|
|
|
entry.has_content = 1
|
|
|
|
|
return self.__table
|
|
|
|
|
|
2001-11-30 15:30:03 -04:00
|
|
|
|
def startElement(self, tag, attrs):
|
|
|
|
|
try:
|
|
|
|
|
start, end = self.__methods[tag]
|
|
|
|
|
except KeyError:
|
|
|
|
|
start = getattr(self, "start_" + tag, None)
|
|
|
|
|
end = getattr(self, "end_" + tag, None)
|
|
|
|
|
self.__methods[tag] = (start, end)
|
|
|
|
|
if start:
|
|
|
|
|
start(attrs)
|
|
|
|
|
|
|
|
|
|
def endElement(self, tag):
|
|
|
|
|
start, end = self.__methods[tag]
|
|
|
|
|
if end:
|
|
|
|
|
end()
|
|
|
|
|
|
|
|
|
|
def endDocument(self):
|
|
|
|
|
self.__methods.clear()
|
|
|
|
|
|
|
|
|
|
def characters(self, data):
|
|
|
|
|
self.__buffer += data
|
|
|
|
|
|
1999-07-29 19:22:13 -03:00
|
|
|
|
def start_environment(self, attrs):
|
|
|
|
|
name = attrs["name"]
|
|
|
|
|
self.__current = TableEntry(name, environment=1)
|
|
|
|
|
self.__current.verbatim = attrs.get("verbatim") == "yes"
|
|
|
|
|
if attrs.has_key("outputname"):
|
|
|
|
|
self.__current.outputname = attrs.get("outputname")
|
2001-09-28 13:26:13 -03:00
|
|
|
|
self.__current.endcloses = attrs.get("endcloses", "").split()
|
1999-07-29 19:22:13 -03:00
|
|
|
|
def end_environment(self):
|
|
|
|
|
self.end_macro()
|
|
|
|
|
|
|
|
|
|
def start_macro(self, attrs):
|
|
|
|
|
name = attrs["name"]
|
|
|
|
|
self.__current = TableEntry(name)
|
2001-09-28 13:26:13 -03:00
|
|
|
|
self.__current.closes = attrs.get("closes", "").split()
|
1999-07-29 19:22:13 -03:00
|
|
|
|
if attrs.has_key("outputname"):
|
|
|
|
|
self.__current.outputname = attrs.get("outputname")
|
|
|
|
|
def end_macro(self):
|
2002-04-10 01:20:33 -03:00
|
|
|
|
name = self.__current.name
|
|
|
|
|
if self.__table.has_key(name):
|
2004-02-12 13:35:32 -04:00
|
|
|
|
raise ValueError("name %r already in use" % (name,))
|
2002-04-10 01:20:33 -03:00
|
|
|
|
self.__table[name] = self.__current
|
1999-07-29 19:22:13 -03:00
|
|
|
|
self.__current = None
|
|
|
|
|
|
|
|
|
|
def start_attribute(self, attrs):
|
|
|
|
|
name = attrs.get("name")
|
|
|
|
|
optional = attrs.get("optional") == "yes"
|
|
|
|
|
if name:
|
|
|
|
|
p = Parameter("attribute", name, optional=optional)
|
|
|
|
|
else:
|
|
|
|
|
p = Parameter("attribute", optional=optional)
|
|
|
|
|
self.__current.parameters.append(p)
|
|
|
|
|
self.__buffer = ''
|
|
|
|
|
def end_attribute(self):
|
|
|
|
|
self.__current.parameters[-1].text = self.__buffer
|
|
|
|
|
|
1999-08-26 14:54:16 -03:00
|
|
|
|
def start_entityref(self, attrs):
|
|
|
|
|
name = attrs["name"]
|
|
|
|
|
p = Parameter("entityref", name)
|
|
|
|
|
self.__current.parameters.append(p)
|
|
|
|
|
|
1999-07-29 19:22:13 -03:00
|
|
|
|
def start_child(self, attrs):
|
|
|
|
|
name = attrs["name"]
|
|
|
|
|
p = Parameter("child", name, attrs.get("optional") == "yes")
|
|
|
|
|
self.__current.parameters.append(p)
|
|
|
|
|
self.__current.empty = 0
|
|
|
|
|
|
|
|
|
|
def start_content(self, attrs):
|
|
|
|
|
p = Parameter("content")
|
|
|
|
|
p.implied = attrs.get("implied") == "yes"
|
|
|
|
|
if self.__current.environment:
|
|
|
|
|
p.implied = 1
|
|
|
|
|
self.__current.parameters.append(p)
|
|
|
|
|
self.__current.has_content = 1
|
|
|
|
|
self.__current.empty = 0
|
|
|
|
|
|
|
|
|
|
def start_text(self, attrs):
|
1999-08-02 11:35:25 -03:00
|
|
|
|
self.__current.empty = 0
|
1999-07-29 19:22:13 -03:00
|
|
|
|
self.__buffer = ''
|
|
|
|
|
def end_text(self):
|
|
|
|
|
p = Parameter("text")
|
|
|
|
|
p.text = self.__buffer
|
|
|
|
|
self.__current.parameters.append(p)
|
|
|
|
|
|
|
|
|
|
|
2001-11-30 15:30:03 -04:00
|
|
|
|
def load_table(fp):
|
|
|
|
|
ch = TableHandler()
|
|
|
|
|
xml.sax.parse(fp, ch)
|
|
|
|
|
return ch.get_table()
|
1999-07-29 19:22:13 -03:00
|
|
|
|
|
|
|
|
|
|
1998-11-23 12:59:39 -04:00
|
|
|
|
def main():
|
1999-07-29 19:22:13 -03:00
|
|
|
|
global DEBUG
|
|
|
|
|
#
|
1999-07-29 19:42:27 -03:00
|
|
|
|
opts, args = getopt.getopt(sys.argv[1:], "D", ["debug"])
|
1999-07-29 19:22:13 -03:00
|
|
|
|
for opt, arg in opts:
|
1999-07-29 19:42:27 -03:00
|
|
|
|
if opt in ("-D", "--debug"):
|
2002-10-16 13:00:42 -03:00
|
|
|
|
DEBUG += 1
|
1999-07-29 19:22:13 -03:00
|
|
|
|
if len(args) == 0:
|
|
|
|
|
ifp = sys.stdin
|
|
|
|
|
ofp = sys.stdout
|
|
|
|
|
elif len(args) == 1:
|
2002-04-05 14:09:22 -04:00
|
|
|
|
ifp = open(args[0])
|
1998-11-23 12:59:39 -04:00
|
|
|
|
ofp = sys.stdout
|
1999-07-29 19:22:13 -03:00
|
|
|
|
elif len(args) == 2:
|
|
|
|
|
ifp = open(args[0])
|
|
|
|
|
ofp = open(args[1], "w")
|
1998-11-23 12:59:39 -04:00
|
|
|
|
else:
|
|
|
|
|
usage()
|
|
|
|
|
sys.exit(2)
|
1999-07-29 19:42:27 -03:00
|
|
|
|
|
|
|
|
|
table = load_table(open(os.path.join(sys.path[0], 'conversion.xml')))
|
|
|
|
|
convert(ifp, ofp, table)
|
1998-11-23 12:59:39 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|