212 lines
6.0 KiB
Python
Executable File
212 lines
6.0 KiB
Python
Executable File
#! /usr/bin/env python
|
|
|
|
"""
|
|
"""
|
|
__version__ = '$Revision$'
|
|
|
|
import re
|
|
import string
|
|
import sys
|
|
|
|
|
|
class Node:
|
|
|
|
__rmtt = re.compile(r"(.*)<tt>(.*)</tt>(.*)$", re.IGNORECASE)
|
|
__rmjunk = re.compile("<#\d+#>")
|
|
|
|
def __init__(self, link, str, seqno):
|
|
self.links = [link]
|
|
self.seqno = seqno
|
|
# remove <#\d+#> left in by moving the data out of LaTeX2HTML
|
|
str = self.__rmjunk.sub('', str)
|
|
# now remove <tt>...</tt> markup; contents remain.
|
|
if '<' in str:
|
|
m = self.__rmtt.match(str)
|
|
if m:
|
|
kstr = string.join(m.group(1, 2, 3), '')
|
|
else:
|
|
kstr = str
|
|
else:
|
|
kstr = str
|
|
kstr = string.lower(kstr)
|
|
# build up the text
|
|
self.text = []
|
|
parts = string.split(str, '!')
|
|
parts = map(string.split, parts, ['@'] * len(parts))
|
|
for entry in parts:
|
|
if len(entry) != 1:
|
|
key, text = entry
|
|
else:
|
|
text = entry[0]
|
|
self.text.append(text)
|
|
# Building the key must be separate since any <tt> has been stripped
|
|
# from the key, but can be avoided if both key and text sources are
|
|
# the same.
|
|
if kstr != str:
|
|
self.key = []
|
|
kparts = string.split(kstr, '!')
|
|
kparts = map(string.split, kparts, ['@'] * len(kparts))
|
|
for entry in kparts:
|
|
if len(entry) != 1:
|
|
key, text = entry
|
|
else:
|
|
key = entry[0]
|
|
self.key.append(key)
|
|
else:
|
|
self.key = self.text
|
|
|
|
def __cmp__(self, other):
|
|
"""Comparison operator includes sequence number, for use with
|
|
list.sort()."""
|
|
return self.cmp_entry(other) or cmp(self.seqno, other.seqno)
|
|
|
|
def cmp_entry(self, other):
|
|
"""Comparison 'operator' that ignores sequence number."""
|
|
for i in range(min(len(self.key), len(other.key))):
|
|
c = (cmp(self.key[i], other.key[i])
|
|
or cmp(self.text[i], other.text[i]))
|
|
if c:
|
|
return c
|
|
return cmp(self.key, other.key)
|
|
|
|
def __repr__(self):
|
|
return "<Node for %s (%s)>" % (string.join(self.text, '!'), self.seqno)
|
|
|
|
def __str__(self):
|
|
return string.join(self.key, '!')
|
|
|
|
def dump(self):
|
|
return "%s\0%s###%s\n" \
|
|
% (string.join(self.links, "\0"),
|
|
string.join(self.text, '!'),
|
|
self.seqno)
|
|
|
|
|
|
def load(fp):
|
|
nodes = []
|
|
rx = re.compile(r"(.*)\0(.*)###(.*)$")
|
|
while 1:
|
|
line = fp.readline()
|
|
if not line:
|
|
break
|
|
m = rx.match(line)
|
|
if m:
|
|
link, str, seqno = m.group(1, 2, 3)
|
|
nodes.append(Node(link, str, seqno))
|
|
return nodes
|
|
|
|
|
|
def split_letters(nodes):
|
|
letter_groups = []
|
|
group = []
|
|
append = group.append
|
|
if nodes:
|
|
letter = nodes[0].key[0][0]
|
|
letter_groups.append((letter, group))
|
|
for node in nodes:
|
|
nletter = node.key[0][0]
|
|
if letter != nletter:
|
|
letter = nletter
|
|
group = []
|
|
letter_groups.append((letter, group))
|
|
append = group.append
|
|
append(node)
|
|
return letter_groups
|
|
|
|
|
|
def format_nodes(nodes):
|
|
# Does not create multiple links to multiple targets for the same entry;
|
|
# uses a separate entry for each target. This is a bug.
|
|
level = 0
|
|
strings = ["<dl compact>"]
|
|
append = strings.append
|
|
prev = None
|
|
for node in nodes:
|
|
nlevel = len(node.key) - 1
|
|
if nlevel > level:
|
|
if prev is None or node.key[level] != prev.key[level]:
|
|
append("%s\n<dl compact>" % node.text[level])
|
|
else:
|
|
append("<dl compact>")
|
|
level = nlevel
|
|
elif nlevel < level:
|
|
append("</dl>" * (level - len(node.key) + 1))
|
|
level = nlevel
|
|
if prev is not None and node.key[level] != prev.key[level]:
|
|
append("</dl>")
|
|
else:
|
|
append("<dl compact>")
|
|
elif level:
|
|
if node.key[level-1] != prev.key[level-1]:
|
|
append("</dl>\n%s<dl compact>"
|
|
% node.text[level-1])
|
|
append("%s%s</a><br>" % (node.links[0], node.text[-1]))
|
|
for link in node.links[1:]:
|
|
strings[-1] = strings[-1][:-4] + ","
|
|
append(link + "[Link]</a><br>")
|
|
prev = node
|
|
append("</dl>" * (level + 1))
|
|
append("")
|
|
append("")
|
|
return string.join(strings, "\n")
|
|
|
|
|
|
def format_letter(letter):
|
|
if letter == '.':
|
|
lettername = ". (dot)"
|
|
elif letter == '_':
|
|
lettername = "_ (underscore)"
|
|
else:
|
|
lettername = string.upper(letter)
|
|
return "<hr>\n<h2><a name=\"letter-%s\">%s</a></h2>\n\n" \
|
|
% (letter, lettername)
|
|
|
|
|
|
def format_html(nodes):
|
|
letter_groups = split_letters(nodes)
|
|
items = []
|
|
for letter, nodes in letter_groups:
|
|
s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter)
|
|
items.append(s)
|
|
s = "<hr><center>\n%s</center>\n" % string.join(items, " |\n")
|
|
for letter, nodes in letter_groups:
|
|
s = s + format_letter(letter) + format_nodes(nodes)
|
|
return s
|
|
|
|
|
|
def collapse(nodes):
|
|
"""Collapse sequences of nodes with matching keys into a single node.
|
|
Destructive."""
|
|
if len(nodes) < 2:
|
|
return
|
|
prev = nodes[0]
|
|
i = 1
|
|
while i < len(nodes):
|
|
node = nodes[i]
|
|
if not node.cmp_entry(prev):
|
|
prev.links.append(node.links[0])
|
|
del nodes[i]
|
|
## sys.stderr.write("collapsing %s\n" % `node`)
|
|
else:
|
|
i = i + 1
|
|
prev = node
|
|
|
|
|
|
def dump(nodes, fp):
|
|
for node in nodes:
|
|
fp.write(node.dump())
|
|
|
|
|
|
def main():
|
|
fn = sys.argv[1]
|
|
nodes = load(open(fn))
|
|
nodes.sort()
|
|
dump(nodes, open(fn + ".dump-1", "w"))
|
|
collapse(nodes)
|
|
dump(nodes, open(fn + ".dump-2", "w"))
|
|
sys.stdout.write(format_html(nodes))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|