1998-03-06 17:29:00 -04:00
|
|
|
#! /usr/bin/env python
|
|
|
|
|
|
|
|
"""Convert a LaTeX .toc file to some PDFTeX magic to create that neat outline.
|
|
|
|
|
|
|
|
The output file has an extension of '.bkm' instead of '.out', since hyperref
|
1998-10-07 11:12:20 -03:00
|
|
|
already uses that extension.
|
1998-03-06 17:29:00 -04:00
|
|
|
"""
|
|
|
|
|
1998-03-07 11:34:50 -04:00
|
|
|
import getopt
|
1998-03-06 17:29:00 -04:00
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import string
|
|
|
|
import sys
|
|
|
|
|
|
|
|
|
|
|
|
# Ench item in an entry is a tuple of:
|
|
|
|
#
|
|
|
|
# Section #, Title String, Page #, List of Sub-entries
|
1998-05-14 17:07:10 -03:00
|
|
|
#
|
|
|
|
# The return value of parse_toc() is such a tuple.
|
1998-03-06 17:29:00 -04:00
|
|
|
|
|
|
|
cline_re = r"""^
|
|
|
|
\\contentsline\ \{([a-z]*)} # type of section in $1
|
|
|
|
\{(?:\\numberline\ \{([0-9.A-Z]+)})? # section number
|
|
|
|
(.*)} # title string
|
2000-10-07 09:50:05 -03:00
|
|
|
\{(\d+)}$""" # page number
|
1998-03-06 17:29:00 -04:00
|
|
|
|
|
|
|
cline_rx = re.compile(cline_re, re.VERBOSE)
|
|
|
|
|
|
|
|
OUTER_TO_INNER = -1
|
|
|
|
|
|
|
|
_transition_map = {
|
|
|
|
('chapter', 'section'): OUTER_TO_INNER,
|
|
|
|
('section', 'subsection'): OUTER_TO_INNER,
|
|
|
|
('subsection', 'subsubsection'): OUTER_TO_INNER,
|
|
|
|
('subsubsection', 'subsection'): 1,
|
|
|
|
('subsection', 'section'): 1,
|
|
|
|
('section', 'chapter'): 1,
|
|
|
|
('subsection', 'chapter'): 2,
|
|
|
|
('subsubsection', 'section'): 2,
|
|
|
|
('subsubsection', 'chapter'): 3,
|
|
|
|
}
|
|
|
|
|
1998-10-07 11:12:20 -03:00
|
|
|
INCLUDED_LEVELS = ("chapter", "section", "subsection", "subsubsection")
|
|
|
|
|
|
|
|
|
2006-03-31 01:30:19 -04:00
|
|
|
class BadSectionNesting(Exception):
|
|
|
|
"""Raised for unsupported section level transitions."""
|
|
|
|
|
|
|
|
def __init__(self, level, newsection, path, lineno):
|
|
|
|
self.level = level
|
|
|
|
self.newsection = newsection
|
|
|
|
self.path = path
|
|
|
|
self.lineno = lineno
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
return ("illegal transition from %s to %s at %s (line %s)"
|
|
|
|
% (self.level, self.newsection, self.path, self.lineno))
|
|
|
|
|
|
|
|
|
1998-03-07 11:34:50 -04:00
|
|
|
def parse_toc(fp, bigpart=None):
|
1998-03-06 17:29:00 -04:00
|
|
|
toc = top = []
|
|
|
|
stack = [toc]
|
1998-03-07 11:34:50 -04:00
|
|
|
level = bigpart or 'chapter'
|
1998-03-06 17:29:00 -04:00
|
|
|
lineno = 0
|
|
|
|
while 1:
|
2000-10-07 09:50:05 -03:00
|
|
|
line = fp.readline()
|
|
|
|
if not line:
|
|
|
|
break
|
|
|
|
lineno = lineno + 1
|
|
|
|
m = cline_rx.match(line)
|
|
|
|
if m:
|
|
|
|
stype, snum, title, pageno = m.group(1, 2, 3, 4)
|
|
|
|
title = clean_title(title)
|
2002-10-16 12:30:17 -03:00
|
|
|
entry = (stype, snum, title, int(pageno), [])
|
2000-10-07 09:50:05 -03:00
|
|
|
if stype == level:
|
|
|
|
toc.append(entry)
|
|
|
|
else:
|
1998-10-07 11:12:20 -03:00
|
|
|
if stype not in INCLUDED_LEVELS:
|
|
|
|
# we don't want paragraphs & subparagraphs
|
|
|
|
continue
|
2006-03-31 01:30:19 -04:00
|
|
|
try:
|
|
|
|
direction = _transition_map[(level, stype)]
|
|
|
|
except KeyError:
|
|
|
|
raise BadSectionNesting(level, stype, fp.name, lineno)
|
2000-10-07 09:50:05 -03:00
|
|
|
if direction == OUTER_TO_INNER:
|
|
|
|
toc = toc[-1][-1]
|
|
|
|
stack.insert(0, toc)
|
|
|
|
toc.append(entry)
|
|
|
|
else:
|
|
|
|
for i in range(direction):
|
|
|
|
del stack[0]
|
|
|
|
toc = stack[0]
|
|
|
|
toc.append(entry)
|
|
|
|
level = stype
|
|
|
|
else:
|
|
|
|
sys.stderr.write("l.%s: " + line)
|
1998-03-06 17:29:00 -04:00
|
|
|
return top
|
|
|
|
|
|
|
|
|
1998-03-10 10:02:35 -04:00
|
|
|
hackscore_rx = re.compile(r"\\hackscore\s*{[^}]*}")
|
|
|
|
raisebox_rx = re.compile(r"\\raisebox\s*{[^}]*}")
|
|
|
|
title_rx = re.compile(r"\\([a-zA-Z])+\s+")
|
1998-03-06 17:29:00 -04:00
|
|
|
title_trans = string.maketrans("", "")
|
|
|
|
|
|
|
|
def clean_title(title):
|
1998-03-10 10:02:35 -04:00
|
|
|
title = raisebox_rx.sub("", title)
|
|
|
|
title = hackscore_rx.sub(r"\\_", title)
|
|
|
|
pos = 0
|
1998-03-06 17:29:00 -04:00
|
|
|
while 1:
|
2000-10-07 09:50:05 -03:00
|
|
|
m = title_rx.search(title, pos)
|
|
|
|
if m:
|
|
|
|
start = m.start()
|
|
|
|
if title[start:start+15] != "\\textunderscore":
|
|
|
|
title = title[:start] + title[m.end():]
|
|
|
|
pos = start + 1
|
|
|
|
else:
|
|
|
|
break
|
2002-10-16 12:30:17 -03:00
|
|
|
title = title.translate(title_trans, "{}")
|
1998-03-10 10:02:35 -04:00
|
|
|
return title
|
1998-03-06 17:29:00 -04:00
|
|
|
|
|
|
|
|
|
|
|
def write_toc(toc, fp):
|
|
|
|
for entry in toc:
|
2000-10-07 09:50:05 -03:00
|
|
|
write_toc_entry(entry, fp, 0)
|
1998-03-06 17:29:00 -04:00
|
|
|
|
|
|
|
def write_toc_entry(entry, fp, layer):
|
|
|
|
stype, snum, title, pageno, toc = entry
|
1998-04-15 14:50:22 -03:00
|
|
|
s = "\\pdfoutline goto name{page%03d}" % pageno
|
1998-03-06 17:29:00 -04:00
|
|
|
if toc:
|
2000-10-07 09:50:05 -03:00
|
|
|
s = "%s count -%d" % (s, len(toc))
|
1998-03-06 17:29:00 -04:00
|
|
|
if snum:
|
2000-10-07 09:50:05 -03:00
|
|
|
title = "%s %s" % (snum, title)
|
1998-03-06 17:29:00 -04:00
|
|
|
s = "%s {%s}\n" % (s, title)
|
|
|
|
fp.write(s)
|
|
|
|
for entry in toc:
|
2000-10-07 09:50:05 -03:00
|
|
|
write_toc_entry(entry, fp, layer + 1)
|
1998-03-06 17:29:00 -04:00
|
|
|
|
|
|
|
|
1999-03-03 15:25:56 -04:00
|
|
|
def process(ifn, ofn, bigpart=None):
|
|
|
|
toc = parse_toc(open(ifn), bigpart)
|
|
|
|
write_toc(toc, open(ofn, "w"))
|
|
|
|
|
|
|
|
|
1998-03-06 17:29:00 -04:00
|
|
|
def main():
|
1998-03-07 11:34:50 -04:00
|
|
|
bigpart = None
|
|
|
|
opts, args = getopt.getopt(sys.argv[1:], "c:")
|
|
|
|
if opts:
|
2000-10-07 09:50:05 -03:00
|
|
|
bigpart = opts[0][1]
|
1998-03-07 11:34:50 -04:00
|
|
|
if not args:
|
2000-10-07 09:50:05 -03:00
|
|
|
usage()
|
|
|
|
sys.exit(2)
|
1998-03-07 11:34:50 -04:00
|
|
|
for filename in args:
|
2000-10-07 09:50:05 -03:00
|
|
|
base, ext = os.path.splitext(filename)
|
|
|
|
ext = ext or ".toc"
|
1999-03-03 15:25:56 -04:00
|
|
|
process(base + ext, base + ".bkm", bigpart)
|
1998-03-06 17:29:00 -04:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|