mirror of https://github.com/python/cpython
Define & use a Conversion object. It's still really ugly, but at
least there's a token object in here now! ;-)
This commit is contained in:
parent
b0bc7f2d6c
commit
96c00b0b5e
|
@ -27,7 +27,7 @@ class LaTeXFormatError(Error):
|
|||
_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
|
||||
_end_env_rx = re.compile(r"[\\]end{([^}]*)}")
|
||||
_begin_macro_rx = re.compile("[\\\\]([a-zA-Z]+[*]?)({|\\s*\n?)")
|
||||
_comment_rx = re.compile("%+ ?(.*)\n *")
|
||||
_comment_rx = re.compile("%+ ?(.*)\n[ \t]*")
|
||||
_text_rx = re.compile(r"[^]%\\{}]+")
|
||||
_optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
|
||||
# _parameter_rx is this complicated to allow {...} inside a parameter;
|
||||
|
@ -50,248 +50,276 @@ def popping(name, point, depth):
|
|||
sys.stderr.write("%s</%s> at %s\n" % (" "*depth, name, point))
|
||||
|
||||
|
||||
def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0):
|
||||
if DEBUG and endchar:
|
||||
sys.stderr.write("subconvert(%s, ..., endchar=%s)\n"
|
||||
% (`line[:20]`, `endchar`))
|
||||
stack = []
|
||||
while line:
|
||||
if line[0] == endchar and not stack:
|
||||
if DEBUG:
|
||||
sys.stderr.write("subconvert() --> %s\n" % `line[1:21]`)
|
||||
return line[1:]
|
||||
m = _comment_rx.match(line)
|
||||
if m:
|
||||
text = m.group(1)
|
||||
if text:
|
||||
ofp.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n" % encode(text))
|
||||
line = line[m.end():]
|
||||
continue
|
||||
m = _begin_env_rx.match(line)
|
||||
if m:
|
||||
# re-write to use the macro handler
|
||||
line = r"\%s %s" % (m.group(1), line[m.end():])
|
||||
continue
|
||||
m = _end_env_rx.match(line)
|
||||
if m:
|
||||
# end of environment
|
||||
envname = m.group(1)
|
||||
if envname == "document":
|
||||
# special magic
|
||||
for n in stack[1:]:
|
||||
if n not in autoclosing:
|
||||
raise LaTeXFormatError("open element on stack: " + `n`)
|
||||
# should be more careful, but this is easier to code:
|
||||
stack = []
|
||||
ofp.write(")document\n")
|
||||
elif envname == stack[-1]:
|
||||
ofp.write(")%s\n" % envname)
|
||||
del stack[-1]
|
||||
popping(envname, "a", len(stack) + depth)
|
||||
else:
|
||||
sys.stderr.write("stack: %s\n" % `stack`)
|
||||
raise LaTeXFormatError(
|
||||
"environment close for %s doesn't match" % envname)
|
||||
line = line[m.end():]
|
||||
continue
|
||||
m = _begin_macro_rx.match(line)
|
||||
if m:
|
||||
# start of macro
|
||||
macroname = m.group(1)
|
||||
if macroname == "verbatim":
|
||||
# really magic case!
|
||||
pos = string.find(line, "\\end{verbatim}")
|
||||
text = line[m.end(1):pos]
|
||||
ofp.write("(verbatim\n")
|
||||
ofp.write("-%s\n" % encode(text))
|
||||
ofp.write(")verbatim\n")
|
||||
line = line[pos + len("\\end{verbatim}"):]
|
||||
continue
|
||||
numbered = 1
|
||||
if macroname[-1] == "*":
|
||||
macroname = macroname[:-1]
|
||||
numbered = 0
|
||||
if macroname in autoclosing and macroname in stack:
|
||||
while stack[-1] != macroname:
|
||||
if stack[-1] and stack[-1] not in discards:
|
||||
ofp.write(")%s\n-\\n\n" % stack[-1])
|
||||
popping(stack[-1], "b", len(stack) + depth - 1)
|
||||
del stack[-1]
|
||||
if macroname not in discards:
|
||||
ofp.write("-\\n\n)%s\n-\\n\n" % macroname)
|
||||
popping(macroname, "c", len(stack) + depth - 1)
|
||||
del stack[-1]
|
||||
real_ofp = ofp
|
||||
if macroname in discards:
|
||||
ofp = StringIO.StringIO()
|
||||
#
|
||||
conversion = table.get(macroname, ([], 0, 0, 0, 0))
|
||||
params, optional, empty, environ, nocontent = conversion
|
||||
if empty:
|
||||
ofp.write("e\n")
|
||||
elif nocontent:
|
||||
empty = 1
|
||||
if not numbered:
|
||||
ofp.write("Anumbered TOKEN no\n")
|
||||
opened = 0
|
||||
# rip off the macroname
|
||||
if params:
|
||||
if optional and len(params) == 1:
|
||||
line = line = line[m.end():]
|
||||
else:
|
||||
line = line[m.end(1):]
|
||||
elif empty:
|
||||
line = line[m.end(1):]
|
||||
else:
|
||||
class Conversion:
|
||||
def __init__(self, ifp, ofp, table=None, discards=(), autoclosing=()):
|
||||
self.ofp_stack = [ofp]
|
||||
self.pop_output()
|
||||
self.table = table
|
||||
self.discards = discards
|
||||
self.autoclosing = autoclosing
|
||||
self.line = string.join(map(string.rstrip, ifp.readlines()), "\n")
|
||||
self.err_write = sys.stderr.write
|
||||
self.preamble = 1
|
||||
|
||||
def push_output(self, ofp):
|
||||
self.ofp_stack.append(self.ofp)
|
||||
self.ofp = ofp
|
||||
self.write = ofp.write
|
||||
|
||||
def pop_output(self):
|
||||
self.ofp = self.ofp_stack.pop()
|
||||
self.write = self.ofp.write
|
||||
|
||||
def subconvert(self, endchar=None, depth=0):
|
||||
if DEBUG and endchar:
|
||||
self.err_write(
|
||||
"subconvert(%s)\n line = %s\n" % (`endchar`, `line[:20]`))
|
||||
stack = []
|
||||
line = self.line
|
||||
while line:
|
||||
if line[0] == endchar and not stack:
|
||||
if DEBUG:
|
||||
self.err_write("subconvert() --> %s\n" % `line[1:21]`)
|
||||
self.line = line
|
||||
return line
|
||||
m = _comment_rx.match(line)
|
||||
if m:
|
||||
text = m.group(1)
|
||||
if text:
|
||||
self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
|
||||
% encode(text))
|
||||
line = line[m.end():]
|
||||
#
|
||||
# Very ugly special case to deal with \item[]. The catch is that
|
||||
# this needs to occur outside the for loop that handles attribute
|
||||
# parsing so we can 'continue' the outer loop.
|
||||
#
|
||||
if optional and type(params[0]) is type(()):
|
||||
# the attribute name isn't used in this special case
|
||||
pushing(macroname, "a", depth + len(stack))
|
||||
stack.append(macroname)
|
||||
ofp.write("(%s\n" % macroname)
|
||||
m = _start_optional_rx.match(line)
|
||||
if m:
|
||||
line = line[m.end():]
|
||||
line = subconvert(line, ofp, table, discards,
|
||||
autoclosing, endchar="]",
|
||||
depth=depth + len(stack))
|
||||
line = "}" + line
|
||||
continue
|
||||
# handle attribute mappings here:
|
||||
for attrname in params:
|
||||
if optional:
|
||||
optional = 0
|
||||
if type(attrname) is type(""):
|
||||
m = _optional_rx.match(line)
|
||||
m = _begin_env_rx.match(line)
|
||||
if m:
|
||||
# re-write to use the macro handler
|
||||
line = r"\%s %s" % (m.group(1), line[m.end():])
|
||||
continue
|
||||
m = _end_env_rx.match(line)
|
||||
if m:
|
||||
# end of environment
|
||||
envname = m.group(1)
|
||||
if envname == "document":
|
||||
# special magic
|
||||
for n in stack[1:]:
|
||||
if n not in self.autoclosing:
|
||||
raise LaTeXFormatError(
|
||||
"open element on stack: " + `n`)
|
||||
# should be more careful, but this is easier to code:
|
||||
stack = []
|
||||
self.write(")document\n")
|
||||
elif envname == stack[-1]:
|
||||
self.write(")%s\n" % envname)
|
||||
del stack[-1]
|
||||
popping(envname, "a", len(stack) + depth)
|
||||
else:
|
||||
self.err_write("stack: %s\n" % `stack`)
|
||||
raise LaTeXFormatError(
|
||||
"environment close for %s doesn't match" % envname)
|
||||
line = line[m.end():]
|
||||
continue
|
||||
m = _begin_macro_rx.match(line)
|
||||
if m:
|
||||
# start of macro
|
||||
macroname = m.group(1)
|
||||
if macroname == "verbatim":
|
||||
# really magic case!
|
||||
pos = string.find(line, "\\end{verbatim}")
|
||||
text = line[m.end(1):pos]
|
||||
self.write("(verbatim\n")
|
||||
self.write("-%s\n" % encode(text))
|
||||
self.write(")verbatim\n")
|
||||
line = line[pos + len("\\end{verbatim}"):]
|
||||
continue
|
||||
numbered = 1
|
||||
opened = 0
|
||||
if macroname[-1] == "*":
|
||||
macroname = macroname[:-1]
|
||||
numbered = 0
|
||||
if macroname in self.autoclosing and macroname in stack:
|
||||
while stack[-1] != macroname:
|
||||
top = stack.pop()
|
||||
if top and top not in self.discards:
|
||||
self.write(")%s\n-\\n\n" % top)
|
||||
popping(top, "b", len(stack) + depth)
|
||||
if macroname not in self.discards:
|
||||
self.write("-\\n\n)%s\n-\\n\n" % macroname)
|
||||
popping(macroname, "c", len(stack) + depth - 1)
|
||||
del stack[-1]
|
||||
#
|
||||
if macroname in self.discards:
|
||||
self.push_output(StringIO.StringIO())
|
||||
else:
|
||||
self.push_output(self.ofp)
|
||||
#
|
||||
params, optional, empty, environ = self.start_macro(macroname)
|
||||
if not numbered:
|
||||
self.write("Anumbered TOKEN no\n")
|
||||
# rip off the macroname
|
||||
if params:
|
||||
if optional and len(params) == 1:
|
||||
line = line[m.end():]
|
||||
else:
|
||||
line = line[m.end(1):]
|
||||
elif empty:
|
||||
line = line[m.end(1):]
|
||||
else:
|
||||
line = line[m.end():]
|
||||
#
|
||||
# Very ugly special case to deal with \item[]. The catch
|
||||
# is that this needs to occur outside the for loop that
|
||||
# handles attribute parsing so we can 'continue' the outer
|
||||
# loop.
|
||||
#
|
||||
if optional and type(params[0]) is type(()):
|
||||
# the attribute name isn't used in this special case
|
||||
pushing(macroname, "a", depth + len(stack))
|
||||
stack.append(macroname)
|
||||
self.write("(%s\n" % macroname)
|
||||
m = _start_optional_rx.match(line)
|
||||
if m:
|
||||
self.line = line[m.end():]
|
||||
line = self.subconvert("]", depth + len(stack))
|
||||
line = "}" + line
|
||||
continue
|
||||
# handle attribute mappings here:
|
||||
for attrname in params:
|
||||
if optional:
|
||||
optional = 0
|
||||
if type(attrname) is type(""):
|
||||
m = _optional_rx.match(line)
|
||||
if m:
|
||||
line = line[m.end():]
|
||||
self.write("A%s TOKEN %s\n"
|
||||
% (attrname, encode(m.group(1))))
|
||||
elif type(attrname) is type(()):
|
||||
# This is a sub-element; but don't place the
|
||||
# element we found on the stack (\section-like)
|
||||
pushing(macroname, "b", len(stack) + depth)
|
||||
stack.append(macroname)
|
||||
self.write("(%s\n" % macroname)
|
||||
macroname = attrname[0]
|
||||
m = _start_group_rx.match(line)
|
||||
if m:
|
||||
line = line[m.end():]
|
||||
ofp.write("A%s TOKEN %s\n"
|
||||
% (attrname, encode(m.group(1))))
|
||||
elif type(attrname) is type(()):
|
||||
# This is a sub-element; but don't place the
|
||||
# element we found on the stack (\section-like)
|
||||
pushing(macroname, "b", len(stack) + depth)
|
||||
stack.append(macroname)
|
||||
ofp.write("(%s\n" % macroname)
|
||||
macroname = attrname[0]
|
||||
m = _start_group_rx.match(line)
|
||||
if m:
|
||||
elif type(attrname) is type([]):
|
||||
# A normal subelement.
|
||||
attrname = attrname[0]
|
||||
if not opened:
|
||||
opened = 1
|
||||
self.write("(%s\n" % macroname)
|
||||
pushing(macroname, "c", len(stack) + depth)
|
||||
self.write("(%s\n" % attrname)
|
||||
pushing(attrname, "sub-elem", len(stack) + depth + 1)
|
||||
self.line = skip_white(line)[1:]
|
||||
line = subconvert("}", depth + len(stack) + 2)
|
||||
popping(attrname, "sub-elem", len(stack) + depth + 1)
|
||||
self.write(")%s\n" % attrname)
|
||||
else:
|
||||
m = _parameter_rx.match(line)
|
||||
if not m:
|
||||
raise LaTeXFormatError(
|
||||
"could not extract parameter %s for %s: %s"
|
||||
% (attrname, macroname, `line[:100]`))
|
||||
value = m.group(1)
|
||||
if _token_rx.match(value):
|
||||
dtype = "TOKEN"
|
||||
else:
|
||||
dtype = "CDATA"
|
||||
self.write("A%s %s %s\n"
|
||||
% (attrname, dtype, encode(value)))
|
||||
line = line[m.end():]
|
||||
elif type(attrname) is type([]):
|
||||
# A normal subelement.
|
||||
attrname = attrname[0]
|
||||
if not opened:
|
||||
opened = 1
|
||||
ofp.write("(%s\n" % macroname)
|
||||
pushing(macroname, "c", len(stack) + depth)
|
||||
ofp.write("(%s\n" % attrname)
|
||||
pushing(attrname, "sub-elem", len(stack) + depth + 1)
|
||||
line = subconvert(skip_white(line)[1:], ofp, table,
|
||||
discards, autoclosing, endchar="}",
|
||||
depth=depth + len(stack) + 2)
|
||||
popping(attrname, "sub-elem", len(stack) + depth + 1)
|
||||
ofp.write(")%s\n" % attrname)
|
||||
else:
|
||||
m = _parameter_rx.match(line)
|
||||
if params and type(params[-1]) is type('') \
|
||||
and (not empty) and not environ:
|
||||
# attempt to strip off next '{'
|
||||
m = _start_group_rx.match(line)
|
||||
if not m:
|
||||
raise LaTeXFormatError(
|
||||
"could not extract parameter %s for %s: %s"
|
||||
% (attrname, macroname, `line[:100]`))
|
||||
value = m.group(1)
|
||||
if _token_rx.match(value):
|
||||
dtype = "TOKEN"
|
||||
else:
|
||||
dtype = "CDATA"
|
||||
ofp.write("A%s %s %s\n"
|
||||
% (attrname, dtype, encode(value)))
|
||||
"non-empty element '%s' has no content: %s"
|
||||
% (macroname, line[:12]))
|
||||
line = line[m.end():]
|
||||
if params and type(params[-1]) is type('') \
|
||||
and (not empty) and not environ:
|
||||
# attempt to strip off next '{'
|
||||
m = _start_group_rx.match(line)
|
||||
if not m:
|
||||
raise LaTeXFormatError(
|
||||
"non-empty element '%s' has no content: %s"
|
||||
% (macroname, line[:12]))
|
||||
if not opened:
|
||||
self.write("(%s\n" % macroname)
|
||||
pushing(macroname, "d", len(stack) + depth)
|
||||
if empty:
|
||||
line = "}" + line
|
||||
stack.append(macroname)
|
||||
self.pop_output()
|
||||
continue
|
||||
if line[0] == endchar and not stack:
|
||||
if DEBUG:
|
||||
self.err_write("subconvert() --> %s\n" % `line[1:21]`)
|
||||
self.line = line[1:]
|
||||
return self.line
|
||||
if line[0] == "}":
|
||||
# end of macro or group
|
||||
macroname = stack[-1]
|
||||
conversion = self.table.get(macroname)
|
||||
if macroname \
|
||||
and macroname not in self.discards \
|
||||
and type(conversion) is not type(""):
|
||||
# otherwise, it was just a bare group
|
||||
self.write(")%s\n" % stack[-1])
|
||||
popping(macroname, "d", len(stack) + depth - 1)
|
||||
del stack[-1]
|
||||
line = line[1:]
|
||||
continue
|
||||
if line[0] == "{":
|
||||
pushing("", "e", len(stack) + depth)
|
||||
stack.append("")
|
||||
line = line[1:]
|
||||
continue
|
||||
if line[0] == "\\" and line[1] in ESCAPED_CHARS:
|
||||
self.write("-%s\n" % encode(line[1]))
|
||||
line = line[2:]
|
||||
continue
|
||||
if line[:2] == r"\\":
|
||||
self.write("(BREAK\n)BREAK\n")
|
||||
line = line[2:]
|
||||
continue
|
||||
m = _text_rx.match(line)
|
||||
if m:
|
||||
text = encode(m.group())
|
||||
self.write("-%s\n" % text)
|
||||
line = line[m.end():]
|
||||
if not opened:
|
||||
ofp.write("(%s\n" % macroname)
|
||||
pushing(macroname, "d", len(stack) + depth)
|
||||
if empty:
|
||||
line = "}" + line
|
||||
stack.append(macroname)
|
||||
ofp = real_ofp
|
||||
continue
|
||||
if line[0] == endchar and not stack:
|
||||
if DEBUG:
|
||||
sys.stderr.write("subconvert() --> %s\n" % `line[1:21]`)
|
||||
return line[1:]
|
||||
if line[0] == "}":
|
||||
# end of macro
|
||||
macroname = stack[-1]
|
||||
conversion = table.get(macroname)
|
||||
if macroname \
|
||||
and macroname not in discards \
|
||||
and type(conversion) is not type(""):
|
||||
# otherwise, it was just a bare group
|
||||
ofp.write(")%s\n" % stack[-1])
|
||||
popping(macroname, "d", len(stack) + depth - 1)
|
||||
del stack[-1]
|
||||
line = line[1:]
|
||||
continue
|
||||
if line[0] == "{":
|
||||
pushing("", "e", len(stack) + depth)
|
||||
stack.append("")
|
||||
line = line[1:]
|
||||
continue
|
||||
if line[0] == "\\" and line[1] in ESCAPED_CHARS:
|
||||
ofp.write("-%s\n" % encode(line[1]))
|
||||
line = line[2:]
|
||||
continue
|
||||
if line[:2] == r"\\":
|
||||
ofp.write("(BREAK\n)BREAK\n")
|
||||
line = line[2:]
|
||||
continue
|
||||
m = _text_rx.match(line)
|
||||
if m:
|
||||
text = encode(m.group())
|
||||
ofp.write("-%s\n" % text)
|
||||
line = line[m.end():]
|
||||
continue
|
||||
# special case because of \item[]
|
||||
if line[0] == "]":
|
||||
ofp.write("-]\n")
|
||||
line = line[1:]
|
||||
continue
|
||||
# avoid infinite loops
|
||||
extra = ""
|
||||
if len(line) > 100:
|
||||
extra = "..."
|
||||
raise LaTeXFormatError("could not identify markup: %s%s"
|
||||
% (`line[:100]`, extra))
|
||||
while stack and stack[-1] in autoclosing:
|
||||
ofp.write("-\\n\n")
|
||||
ofp.write(")%s\n" % stack[-1])
|
||||
popping(stack[-1], "e", len(stack) + depth - 1)
|
||||
del stack[-1]
|
||||
if stack:
|
||||
raise LaTeXFormatError("elements remain on stack: "
|
||||
+ string.join(stack))
|
||||
# otherwise we just ran out of input here...
|
||||
continue
|
||||
# special case because of \item[]
|
||||
if line[0] == "]":
|
||||
self.write("-]\n")
|
||||
line = line[1:]
|
||||
continue
|
||||
# avoid infinite loops
|
||||
extra = ""
|
||||
if len(line) > 100:
|
||||
extra = "..."
|
||||
raise LaTeXFormatError("could not identify markup: %s%s"
|
||||
% (`line[:100]`, extra))
|
||||
while stack and stack[-1] in self.autoclosing:
|
||||
self.write("-\\n\n")
|
||||
self.write(")%s\n" % stack[-1])
|
||||
popping(stack.pop(), "e", len(stack) + depth - 1)
|
||||
if stack:
|
||||
raise LaTeXFormatError("elements remain on stack: "
|
||||
+ string.join(stack, ", "))
|
||||
# otherwise we just ran out of input here...
|
||||
|
||||
def convert(self):
|
||||
self.subconvert()
|
||||
|
||||
def start_macro(self, name):
|
||||
conversion = self.table.get(name, ([], 0, 0, 0, 0))
|
||||
params, optional, empty, environ, nocontent = conversion
|
||||
if empty:
|
||||
self.write("e\n")
|
||||
elif nocontent:
|
||||
empty = 1
|
||||
return params, optional, empty, environ
|
||||
|
||||
|
||||
def convert(ifp, ofp, table={}, discards=(), autoclosing=()):
|
||||
lines = string.split(ifp.read(), "\n")
|
||||
for i in range(len(lines)):
|
||||
lines[i] = string.rstrip(lines[i])
|
||||
data = string.join(lines, "\n")
|
||||
c = Conversion(ifp, ofp, table, discards, autoclosing)
|
||||
try:
|
||||
subconvert(data, ofp, table, discards, autoclosing)
|
||||
c.convert()
|
||||
except IOError, (err, msg):
|
||||
if err != errno.EPIPE:
|
||||
raise
|
||||
|
|
Loading…
Reference in New Issue