1998-11-23 12:59:39 -04:00
|
|
|
#! /usr/bin/env python
|
|
|
|
|
|
|
|
"""Convert ESIS events to SGML or XML markup.
|
|
|
|
|
|
|
|
This is limited, but seems sufficient for the ESIS generated by the
|
|
|
|
latex2esis.py script when run over the Python documentation.
|
|
|
|
"""
|
1999-02-18 12:30:16 -04:00
|
|
|
|
|
|
|
# This should have an explicit option to indicate whether the *INPUT* was
|
|
|
|
# generated from an SGML or an XML application.
|
|
|
|
|
1998-11-23 12:59:39 -04:00
|
|
|
import errno
|
1998-12-01 15:01:53 -04:00
|
|
|
import esistools
|
1999-01-19 19:03:04 -04:00
|
|
|
import os
|
1998-11-23 12:59:39 -04:00
|
|
|
import re
|
|
|
|
import string
|
|
|
|
|
2001-03-23 12:38:12 -04:00
|
|
|
from xml.sax.saxutils import escape
|
1999-01-14 13:06:09 -04:00
|
|
|
|
1998-11-23 12:59:39 -04:00
|
|
|
|
1999-02-18 12:30:16 -04:00
|
|
|
AUTOCLOSE = ()
|
|
|
|
|
1999-01-19 19:03:04 -04:00
|
|
|
EMPTIES_FILENAME = "../sgml/empties.dat"
|
|
|
|
LIST_EMPTIES = 0
|
|
|
|
|
|
|
|
|
1999-02-18 12:30:16 -04:00
|
|
|
_elem_map = {}
|
|
|
|
_attr_map = {}
|
|
|
|
_token_map = {}
|
|
|
|
|
|
|
|
_normalize_case = str
|
|
|
|
|
|
|
|
def map_gi(sgmlgi, map):
|
|
|
|
uncased = _normalize_case(sgmlgi)
|
|
|
|
try:
|
|
|
|
return map[uncased]
|
|
|
|
except IndexError:
|
|
|
|
map[uncased] = sgmlgi
|
|
|
|
return sgmlgi
|
|
|
|
|
|
|
|
def null_map_gi(sgmlgi, map):
|
|
|
|
return sgmlgi
|
|
|
|
|
|
|
|
|
1999-01-19 13:10:31 -04:00
|
|
|
def format_attrs(attrs, xml=0):
|
1998-11-23 12:59:39 -04:00
|
|
|
attrs = attrs.items()
|
|
|
|
attrs.sort()
|
1999-02-18 12:30:16 -04:00
|
|
|
parts = []
|
|
|
|
append = parts.append
|
1998-11-23 12:59:39 -04:00
|
|
|
for name, value in attrs:
|
1999-01-19 13:10:31 -04:00
|
|
|
if xml:
|
1999-02-18 12:30:16 -04:00
|
|
|
append('%s="%s"' % (name, escape(value)))
|
1999-01-19 13:10:31 -04:00
|
|
|
else:
|
|
|
|
# this is a little bogus, but should do for now
|
|
|
|
if name == value and isnmtoken(value):
|
1999-02-18 12:30:16 -04:00
|
|
|
append(value)
|
1999-01-19 13:10:31 -04:00
|
|
|
elif istoken(value):
|
1999-01-29 17:35:50 -04:00
|
|
|
if value == "no" + name:
|
1999-02-18 12:30:16 -04:00
|
|
|
append(value)
|
1999-01-29 17:35:50 -04:00
|
|
|
else:
|
1999-02-18 12:30:16 -04:00
|
|
|
append("%s=%s" % (name, value))
|
1999-01-19 13:10:31 -04:00
|
|
|
else:
|
1999-02-18 12:30:16 -04:00
|
|
|
append('%s="%s"' % (name, escape(value)))
|
|
|
|
if parts:
|
|
|
|
parts.insert(0, '')
|
|
|
|
return string.join(parts)
|
1998-11-23 12:59:39 -04:00
|
|
|
|
|
|
|
|
1999-01-19 19:03:04 -04:00
|
|
|
_nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE)
|
1999-01-19 13:10:31 -04:00
|
|
|
def isnmtoken(s):
|
|
|
|
return _nmtoken_rx.match(s) is not None
|
|
|
|
|
1999-01-19 19:03:04 -04:00
|
|
|
_token_rx = re.compile("[a-z0-9][-._a-z0-9]*$", re.IGNORECASE)
|
1999-01-19 13:10:31 -04:00
|
|
|
def istoken(s):
|
|
|
|
return _token_rx.match(s) is not None
|
|
|
|
|
|
|
|
|
1999-07-29 19:03:52 -03:00
|
|
|
def convert(ifp, ofp, xml=0, autoclose=(), verbatims=()):
|
1999-01-20 16:35:05 -04:00
|
|
|
if xml:
|
|
|
|
autoclose = ()
|
1998-11-23 12:59:39 -04:00
|
|
|
attrs = {}
|
|
|
|
lastopened = None
|
1998-12-10 14:31:37 -04:00
|
|
|
knownempties = []
|
1998-11-23 12:59:39 -04:00
|
|
|
knownempty = 0
|
|
|
|
lastempty = 0
|
1999-05-18 14:34:51 -03:00
|
|
|
inverbatim = 0
|
1998-11-23 12:59:39 -04:00
|
|
|
while 1:
|
|
|
|
line = ifp.readline()
|
|
|
|
if not line:
|
|
|
|
break
|
|
|
|
|
|
|
|
type = line[0]
|
|
|
|
data = line[1:]
|
|
|
|
if data and data[-1] == "\n":
|
|
|
|
data = data[:-1]
|
|
|
|
if type == "-":
|
1998-12-01 15:01:53 -04:00
|
|
|
data = esistools.decode(data)
|
1999-05-18 14:34:51 -03:00
|
|
|
data = escape(data)
|
|
|
|
if not inverbatim:
|
|
|
|
data = string.replace(data, "---", "—")
|
|
|
|
ofp.write(data)
|
1998-11-23 12:59:39 -04:00
|
|
|
if "\n" in data:
|
|
|
|
lastopened = None
|
|
|
|
knownempty = 0
|
|
|
|
lastempty = 0
|
|
|
|
elif type == "(":
|
1998-12-01 15:01:53 -04:00
|
|
|
if data == "COMMENT":
|
|
|
|
ofp.write("<!--")
|
|
|
|
continue
|
1999-02-18 12:30:16 -04:00
|
|
|
data = map_gi(data, _elem_map)
|
1998-11-23 12:59:39 -04:00
|
|
|
if knownempty and xml:
|
1999-01-19 13:10:31 -04:00
|
|
|
ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml)))
|
1998-11-23 12:59:39 -04:00
|
|
|
else:
|
1999-01-19 13:10:31 -04:00
|
|
|
ofp.write("<%s%s>" % (data, format_attrs(attrs, xml)))
|
1998-11-23 12:59:39 -04:00
|
|
|
if knownempty and data not in knownempties:
|
|
|
|
# accumulate knowledge!
|
|
|
|
knownempties.append(data)
|
|
|
|
attrs = {}
|
|
|
|
lastopened = data
|
|
|
|
lastempty = knownempty
|
|
|
|
knownempty = 0
|
1999-05-18 14:34:51 -03:00
|
|
|
inverbatim = data in verbatims
|
1998-11-23 12:59:39 -04:00
|
|
|
elif type == ")":
|
1998-12-01 15:01:53 -04:00
|
|
|
if data == "COMMENT":
|
|
|
|
ofp.write("-->")
|
|
|
|
continue
|
1999-02-18 12:30:16 -04:00
|
|
|
data = map_gi(data, _elem_map)
|
1998-11-23 12:59:39 -04:00
|
|
|
if xml:
|
|
|
|
if not lastempty:
|
|
|
|
ofp.write("</%s>" % data)
|
|
|
|
elif data not in knownempties:
|
1999-01-20 16:35:05 -04:00
|
|
|
if data in autoclose:
|
|
|
|
pass
|
|
|
|
elif lastopened == data:
|
1998-11-23 12:59:39 -04:00
|
|
|
ofp.write("</>")
|
|
|
|
else:
|
|
|
|
ofp.write("</%s>" % data)
|
|
|
|
lastopened = None
|
|
|
|
lastempty = 0
|
1999-05-18 14:34:51 -03:00
|
|
|
inverbatim = 0
|
1998-11-23 12:59:39 -04:00
|
|
|
elif type == "A":
|
|
|
|
name, type, value = string.split(data, " ", 2)
|
1999-02-18 12:30:16 -04:00
|
|
|
name = map_gi(name, _attr_map)
|
1998-12-01 15:01:53 -04:00
|
|
|
attrs[name] = esistools.decode(value)
|
1998-11-23 12:59:39 -04:00
|
|
|
elif type == "e":
|
|
|
|
knownempty = 1
|
1999-08-26 14:50:26 -03:00
|
|
|
elif type == "&":
|
|
|
|
ofp.write("&%s;" % data)
|
|
|
|
knownempty = 0
|
|
|
|
else:
|
|
|
|
raise RuntimeError, "unrecognized ESIS event type: '%s'" % type
|
1998-11-23 12:59:39 -04:00
|
|
|
|
1999-01-19 19:03:04 -04:00
|
|
|
if LIST_EMPTIES:
|
1999-02-18 12:30:16 -04:00
|
|
|
dump_empty_element_names(knownempties)
|
|
|
|
|
|
|
|
|
|
|
|
def dump_empty_element_names(knownempties):
|
1999-07-29 19:03:52 -03:00
|
|
|
d = {}
|
|
|
|
for gi in knownempties:
|
|
|
|
d[gi] = gi
|
1999-02-18 12:30:16 -04:00
|
|
|
knownempties.append("")
|
|
|
|
if os.path.isfile(EMPTIES_FILENAME):
|
1999-07-29 19:03:52 -03:00
|
|
|
fp = open(EMPTIES_FILENAME)
|
|
|
|
while 1:
|
|
|
|
line = fp.readline()
|
|
|
|
if not line:
|
|
|
|
break
|
|
|
|
gi = string.strip(line)
|
|
|
|
if gi:
|
|
|
|
d[gi] = gi
|
|
|
|
fp = open(EMPTIES_FILENAME, "w")
|
|
|
|
gilist = d.keys()
|
|
|
|
gilist.sort()
|
|
|
|
fp.write(string.join(gilist, "\n"))
|
|
|
|
fp.write("\n")
|
1999-02-18 12:30:16 -04:00
|
|
|
fp.close()
|
1999-01-19 19:03:04 -04:00
|
|
|
|
1998-11-23 12:59:39 -04:00
|
|
|
|
1999-02-18 12:30:16 -04:00
|
|
|
def update_gi_map(map, names, fromsgml=1):
|
|
|
|
for name in string.split(names, ","):
|
|
|
|
if fromsgml:
|
|
|
|
uncased = string.lower(name)
|
|
|
|
else:
|
|
|
|
uncased = name
|
|
|
|
map[uncased] = name
|
1998-11-23 12:59:39 -04:00
|
|
|
|
|
|
|
|
|
|
|
def main():
|
1998-12-01 15:01:53 -04:00
|
|
|
import getopt
|
1998-11-23 12:59:39 -04:00
|
|
|
import sys
|
|
|
|
#
|
1999-01-20 16:35:05 -04:00
|
|
|
autoclose = AUTOCLOSE
|
1999-02-18 12:30:16 -04:00
|
|
|
xml = 1
|
1998-12-01 15:01:53 -04:00
|
|
|
xmldecl = 0
|
1999-02-18 12:30:16 -04:00
|
|
|
elem_names = ''
|
|
|
|
attr_names = ''
|
|
|
|
value_names = ''
|
1999-05-18 14:34:51 -03:00
|
|
|
verbatims = ('verbatim', 'interactive-session')
|
1999-02-18 12:30:16 -04:00
|
|
|
opts, args = getopt.getopt(sys.argv[1:], "adesx",
|
|
|
|
["autoclose=", "declare", "sgml", "xml",
|
|
|
|
"elements-map=", "attributes-map",
|
|
|
|
"values-map="])
|
1998-12-01 15:01:53 -04:00
|
|
|
for opt, arg in opts:
|
|
|
|
if opt in ("-d", "--declare"):
|
|
|
|
xmldecl = 1
|
1999-02-18 12:30:16 -04:00
|
|
|
elif opt == "-e":
|
|
|
|
global LIST_EMPTIES
|
|
|
|
LIST_EMPTIES = 1
|
|
|
|
elif opt in ("-s", "--sgml"):
|
|
|
|
xml = 0
|
1998-12-01 15:01:53 -04:00
|
|
|
elif opt in ("-x", "--xml"):
|
|
|
|
xml = 1
|
1999-01-20 16:35:05 -04:00
|
|
|
elif opt in ("-a", "--autoclose"):
|
|
|
|
autoclose = string.split(arg, ",")
|
1999-02-18 12:30:16 -04:00
|
|
|
elif opt == "--elements-map":
|
|
|
|
elem_names = ("%s,%s" % (elem_names, arg))[1:]
|
|
|
|
elif opt == "--attributes-map":
|
|
|
|
attr_names = ("%s,%s" % (attr_names, arg))[1:]
|
|
|
|
elif opt == "--values-map":
|
|
|
|
value_names = ("%s,%s" % (value_names, arg))[1:]
|
|
|
|
#
|
|
|
|
# open input streams:
|
|
|
|
#
|
1998-12-01 15:01:53 -04:00
|
|
|
if len(args) == 0:
|
1998-11-23 12:59:39 -04:00
|
|
|
ifp = sys.stdin
|
|
|
|
ofp = sys.stdout
|
1998-12-01 15:01:53 -04:00
|
|
|
elif len(args) == 1:
|
|
|
|
ifp = open(args[0])
|
1998-11-23 12:59:39 -04:00
|
|
|
ofp = sys.stdout
|
1998-12-01 15:01:53 -04:00
|
|
|
elif len(args) == 2:
|
|
|
|
ifp = open(args[0])
|
|
|
|
ofp = open(args[1], "w")
|
1998-11-23 12:59:39 -04:00
|
|
|
else:
|
|
|
|
usage()
|
|
|
|
sys.exit(2)
|
1999-02-18 12:30:16 -04:00
|
|
|
#
|
|
|
|
# setup the name maps:
|
|
|
|
#
|
|
|
|
if elem_names or attr_names or value_names:
|
|
|
|
# assume the origin was SGML; ignore case of the names from the ESIS
|
|
|
|
# stream but set up conversion tables to get the case right on output
|
|
|
|
global _normalize_case
|
|
|
|
_normalize_case = string.lower
|
|
|
|
update_gi_map(_elem_map, string.split(elem_names, ","))
|
|
|
|
update_gi_map(_attr_map, string.split(attr_names, ","))
|
|
|
|
update_gi_map(_values_map, string.split(value_names, ","))
|
|
|
|
else:
|
|
|
|
global map_gi
|
|
|
|
map_gi = null_map_gi
|
|
|
|
#
|
|
|
|
# run the conversion:
|
|
|
|
#
|
1998-11-23 12:59:39 -04:00
|
|
|
try:
|
1998-12-01 15:01:53 -04:00
|
|
|
if xml and xmldecl:
|
|
|
|
opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n')
|
1999-07-29 19:03:52 -03:00
|
|
|
convert(ifp, ofp, xml=xml, autoclose=autoclose, verbatims=verbatims)
|
1998-11-23 12:59:39 -04:00
|
|
|
except IOError, (err, msg):
|
|
|
|
if err != errno.EPIPE:
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|