504 lines
16 KiB
Python
504 lines
16 KiB
Python
"""
|
|
Makes the necesary files to convert from plain html of
|
|
Python 1.5 and 1.5.x Documentation to
|
|
Microsoft HTML Help format version 1.1
|
|
Doesn't change the html's docs.
|
|
|
|
by hernan.foffani@iname.com
|
|
no copyright and no responsabilities.
|
|
|
|
modified by Dale Nagata for Python 1.5.2
|
|
|
|
Renamed from make_chm.py to prechm.py, and checked into the Python
|
|
project, 19-Apr-2002 by Tim Peters. Assorted modifications by Tim
|
|
and Fred Drake. Obtained from Robin Dunn's .chm packaging of the
|
|
Python 2.2 docs, at <http://alldunn.com/python/>.
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
from formatter import NullWriter, AbstractFormatter
|
|
from htmllib import HTMLParser
|
|
import getopt
|
|
import cgi
|
|
|
|
usage_mode = '''
|
|
Usage: prechm.py [-c] [-k] [-p] [-v 1.5[.x]] filename
|
|
-c: does not build filename.hhc (Table of Contents)
|
|
-k: does not build filename.hhk (Index)
|
|
-p: does not build filename.hhp (Project File)
|
|
-v 1.5[.x]: makes help for the python 1.5[.x] docs
|
|
(default is python 1.5.2 docs)
|
|
'''
|
|
|
|
# Project file (*.hhp) template. 'arch' is the file basename (like
|
|
# the pythlp in pythlp.hhp); 'version' is the doc version number (like
|
|
# the 2.2 in Python 2.2).
|
|
# The magical numbers in the long line under [WINDOWS] set most of the
|
|
# user-visible features (visible buttons, tabs, etc).
|
|
# About 0x10384e: This defines the buttons in the help viewer. The
|
|
# following defns are taken from htmlhelp.h. Not all possibilities
|
|
# actually work, and not all those that work are available from the Help
|
|
# Workshop GUI. In particular, the Zoom/Font button works and is not
|
|
# available from the GUI. The ones we're using are marked with 'x':
|
|
#
|
|
# 0x000002 Hide/Show x
|
|
# 0x000004 Back x
|
|
# 0x000008 Forward x
|
|
# 0x000010 Stop
|
|
# 0x000020 Refresh
|
|
# 0x000040 Home x
|
|
# 0x000080 Forward
|
|
# 0x000100 Back
|
|
# 0x000200 Notes
|
|
# 0x000400 Contents
|
|
# 0x000800 Locate x
|
|
# 0x001000 Options x
|
|
# 0x002000 Print x
|
|
# 0x004000 Index
|
|
# 0x008000 Search
|
|
# 0x010000 History
|
|
# 0x020000 Favorites
|
|
# 0x040000 Jump 1
|
|
# 0x080000 Jump 2
|
|
# 0x100000 Zoom/Font x
|
|
# 0x200000 TOC Next
|
|
# 0x400000 TOC Prev
|
|
|
|
project_template = '''
|
|
[OPTIONS]
|
|
Compiled file=%(arch)s.chm
|
|
Contents file=%(arch)s.hhc
|
|
Default Window=%(arch)s
|
|
Default topic=index.html
|
|
Display compile progress=No
|
|
Full text search stop list file=%(arch)s.stp
|
|
Full-text search=Yes
|
|
Index file=%(arch)s.hhk
|
|
Language=0x409
|
|
Title=Python %(version)s Documentation
|
|
|
|
[WINDOWS]
|
|
%(arch)s="Python %(version)s Documentation","%(arch)s.hhc","%(arch)s.hhk",\
|
|
"index.html","index.html",,,,,0x63520,220,0x10384e,[0,0,1024,768],,,,,,,0
|
|
|
|
[FILES]
|
|
'''
|
|
|
|
contents_header = '''\
|
|
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
|
|
<HTML>
|
|
<HEAD>
|
|
<meta name="GENERATOR" content="Microsoft® HTML Help Workshop 4.1">
|
|
<!-- Sitemap 1.0 -->
|
|
</HEAD><BODY>
|
|
<OBJECT type="text/site properties">
|
|
<param name="Window Styles" value="0x801227">
|
|
<param name="ImageType" value="Folder">
|
|
</OBJECT>
|
|
<UL>
|
|
'''
|
|
|
|
contents_footer = '''\
|
|
</UL></BODY></HTML>
|
|
'''
|
|
|
|
object_sitemap = '''\
|
|
<OBJECT type="text/sitemap">
|
|
<param name="Name" value="%s">
|
|
<param name="Local" value="%s">
|
|
</OBJECT>
|
|
'''
|
|
|
|
# List of words the full text search facility shouldn't index. This
|
|
# becomes file ARCH.stp. Note that this list must be pretty small!
|
|
# Different versions of the MS docs claim the file has a maximum size of
|
|
# 256 or 512 bytes (including \r\n at the end of each line).
|
|
# Note that "and", "or", "not" and "near" are operators in the search
|
|
# language, so no point indexing them even if we wanted to.
|
|
stop_list = '''
|
|
a and are as at
|
|
be but by
|
|
for
|
|
if in into is it
|
|
near no not
|
|
of on or
|
|
such
|
|
that the their then there these they this to
|
|
was will with
|
|
'''
|
|
|
|
# s is a string or None. If None or empty, return None. Else tack '.html'
|
|
# on to the end, unless it's already there.
|
|
def addhtml(s):
|
|
if s:
|
|
if not s.endswith('.html'):
|
|
s += '.html'
|
|
return s
|
|
|
|
# Convenience class to hold info about "a book" in HTMLHelp terms == a doc
|
|
# directory in Python terms.
|
|
class Book:
|
|
def __init__(self, directory, title, firstpage,
|
|
contentpage=None, indexpage=None):
|
|
self.directory = directory
|
|
self.title = title
|
|
self.firstpage = addhtml(firstpage)
|
|
self.contentpage = addhtml(contentpage)
|
|
self.indexpage = addhtml(indexpage)
|
|
|
|
# Library Doc list of books:
|
|
# each 'book' : (Dir, Title, First page, Content page, Index page)
|
|
supported_libraries = {
|
|
'2.4':
|
|
[
|
|
Book('.', 'Main page', 'index'),
|
|
Book('.', 'Global Module Index', 'modindex'),
|
|
Book('whatsnew', "What's New", 'index', 'contents'),
|
|
Book('tut','Tutorial','tut','node2'),
|
|
Book('lib','Library Reference','lib','contents','genindex'),
|
|
Book('ref','Language Reference','ref','contents','genindex'),
|
|
Book('mac','Macintosh Reference','mac','contents','genindex'),
|
|
Book('ext','Extending and Embedding','ext','contents'),
|
|
Book('api','Python/C API','api','contents','genindex'),
|
|
Book('doc','Documenting Python','doc','contents'),
|
|
Book('inst','Installing Python Modules', 'inst', 'index'),
|
|
Book('dist','Distributing Python Modules', 'dist', 'index'),
|
|
],
|
|
|
|
'2.3':
|
|
[
|
|
Book('.', 'Main page', 'index'),
|
|
Book('.', 'Global Module Index', 'modindex'),
|
|
Book('whatsnew', "What's New", 'index', 'contents'),
|
|
Book('tut','Tutorial','tut','node2'),
|
|
Book('lib','Library Reference','lib','contents','genindex'),
|
|
Book('ref','Language Reference','ref','contents','genindex'),
|
|
Book('mac','Macintosh Reference','mac','contents','genindex'),
|
|
Book('ext','Extending and Embedding','ext','contents'),
|
|
Book('api','Python/C API','api','contents','genindex'),
|
|
Book('doc','Documenting Python','doc','contents'),
|
|
Book('inst','Installing Python Modules', 'inst', 'index'),
|
|
Book('dist','Distributing Python Modules', 'dist', 'index'),
|
|
],
|
|
|
|
'2.2':
|
|
[
|
|
Book('.', 'Main page', 'index'),
|
|
Book('.', 'Global Module Index', 'modindex'),
|
|
Book('whatsnew', "What's New", 'index', 'contents'),
|
|
Book('tut','Tutorial','tut','node2'),
|
|
Book('lib','Library Reference','lib','contents','genindex'),
|
|
Book('ref','Language Reference','ref','contents','genindex'),
|
|
Book('mac','Macintosh Reference','mac','contents','genindex'),
|
|
Book('ext','Extending and Embedding','ext','contents'),
|
|
Book('api','Python/C API','api','contents','genindex'),
|
|
Book('doc','Documenting Python','doc','contents'),
|
|
Book('inst','Installing Python Modules', 'inst', 'index'),
|
|
Book('dist','Distributing Python Modules', 'dist', 'index'),
|
|
],
|
|
|
|
'2.1.1':
|
|
[
|
|
Book('.', 'Main page', 'index'),
|
|
Book('.', 'Global Module Index', 'modindex'),
|
|
Book('tut','Tutorial','tut','node2'),
|
|
Book('lib','Library Reference','lib','contents','genindex'),
|
|
Book('ref','Language Reference','ref','contents','genindex'),
|
|
Book('mac','Macintosh Reference','mac','contents','genindex'),
|
|
Book('ext','Extending and Embedding','ext','contents'),
|
|
Book('api','Python/C API','api','contents','genindex'),
|
|
Book('doc','Documenting Python','doc','contents'),
|
|
Book('inst','Installing Python Modules', 'inst', 'index'),
|
|
Book('dist','Distributing Python Modules', 'dist', 'index'),
|
|
],
|
|
|
|
'2.0.0':
|
|
[
|
|
Book('.', 'Global Module Index', 'modindex'),
|
|
Book('tut','Tutorial','tut','node2'),
|
|
Book('lib','Library Reference','lib','contents','genindex'),
|
|
Book('ref','Language Reference','ref','contents','genindex'),
|
|
Book('mac','Macintosh Reference','mac','contents','genindex'),
|
|
Book('ext','Extending and Embedding','ext','contents'),
|
|
Book('api','Python/C API','api','contents','genindex'),
|
|
Book('doc','Documenting Python','doc','contents'),
|
|
Book('inst','Installing Python Modules', 'inst', 'contents'),
|
|
Book('dist','Distributing Python Modules', 'dist', 'contents'),
|
|
],
|
|
|
|
# <dnagata@creo.com> Apr 17/99: library for 1.5.2 version:
|
|
# <hernan.foffani@iname.com> May 01/99: library for 1.5.2 (04/30/99):
|
|
'1.5.2':
|
|
[
|
|
Book('tut','Tutorial','tut','node2'),
|
|
Book('lib','Library Reference','lib','contents','genindex'),
|
|
Book('ref','Language Reference','ref','contents','genindex'),
|
|
Book('mac','Macintosh Reference','mac','contents','genindex'),
|
|
Book('ext','Extending and Embedding','ext','contents'),
|
|
Book('api','Python/C API','api','contents','genindex'),
|
|
Book('doc','Documenting Python','doc','contents')
|
|
],
|
|
|
|
# library for 1.5.1 version:
|
|
'1.5.1':
|
|
[
|
|
Book('tut','Tutorial','tut','contents'),
|
|
Book('lib','Library Reference','lib','contents','genindex'),
|
|
Book('ref','Language Reference','ref-1','ref-2','ref-11'),
|
|
Book('ext','Extending and Embedding','ext','contents'),
|
|
Book('api','Python/C API','api','contents','genindex')
|
|
],
|
|
|
|
# library for 1.5 version:
|
|
'1.5':
|
|
[
|
|
Book('tut','Tutorial','tut','node1'),
|
|
Book('lib','Library Reference','lib','node1','node268'),
|
|
Book('ref','Language Reference','ref-1','ref-2','ref-11'),
|
|
Book('ext','Extending and Embedding','ext','node1'),
|
|
Book('api','Python/C API','api','node1','node48')
|
|
]
|
|
}
|
|
|
|
# AlmostNullWriter doesn't print anything; it just arranges to save the
|
|
# text sent to send_flowing_data(). This is used to capture the text
|
|
# between an anchor begin/end pair, e.g. for TOC entries.
|
|
|
|
class AlmostNullWriter(NullWriter):
|
|
|
|
def __init__(self):
|
|
NullWriter.__init__(self)
|
|
self.saved_clear()
|
|
|
|
def send_flowing_data(self, data):
|
|
stripped = data.strip()
|
|
if stripped: # don't bother to save runs of whitespace
|
|
self.saved.append(stripped)
|
|
|
|
# Forget all saved text.
|
|
def saved_clear(self):
|
|
self.saved = []
|
|
|
|
# Return all saved text as a string.
|
|
def saved_get(self):
|
|
return ' '.join(self.saved)
|
|
|
|
class HelpHtmlParser(HTMLParser):
|
|
|
|
def __init__(self, formatter, path, output):
|
|
HTMLParser.__init__(self, formatter)
|
|
self.path = path # relative path
|
|
self.ft = output # output file
|
|
self.indent = 0 # number of tabs for pretty printing of files
|
|
self.proc = False # True when actively processing, else False
|
|
# (headers, footers, etc)
|
|
# XXX This shouldn't need to be a stack -- anchors shouldn't nest.
|
|
# XXX See SF bug <http://www.python.org/sf/546579>.
|
|
self.hrefstack = [] # stack of hrefs from anchor begins
|
|
|
|
def begin_group(self):
|
|
self.indent += 1
|
|
self.proc = True
|
|
|
|
def finish_group(self):
|
|
self.indent -= 1
|
|
# stop processing when back to top level
|
|
self.proc = self.indent > 0
|
|
|
|
def anchor_bgn(self, href, name, type):
|
|
if self.proc:
|
|
# XXX See SF bug <http://www.python.org/sf/546579>.
|
|
# XXX index.html for the 2.2.1 language reference manual contains
|
|
# XXX nested <a></a> tags in the entry for the section on blank
|
|
# XXX lines. We want to ignore the nested part completely.
|
|
if len(self.hrefstack) == 0:
|
|
self.saved_clear()
|
|
self.hrefstack.append(href)
|
|
|
|
def anchor_end(self):
|
|
if self.proc:
|
|
# XXX See XXX above.
|
|
if self.hrefstack:
|
|
title = cgi.escape(self.saved_get(), True)
|
|
path = self.path + '/' + self.hrefstack.pop()
|
|
self.tab(object_sitemap % (title, path))
|
|
|
|
def start_dl(self, atr_val):
|
|
self.begin_group()
|
|
|
|
def end_dl(self):
|
|
self.finish_group()
|
|
|
|
def do_dt(self, atr_val):
|
|
# no trailing newline on purpose!
|
|
self.tab("<LI>")
|
|
|
|
# Write text to output file.
|
|
def write(self, text):
|
|
self.ft.write(text)
|
|
|
|
# Write text to output file after indenting by self.indent tabs.
|
|
def tab(self, text=''):
|
|
self.write('\t' * self.indent)
|
|
if text:
|
|
self.write(text)
|
|
|
|
# Forget all saved text.
|
|
def saved_clear(self):
|
|
self.formatter.writer.saved_clear()
|
|
|
|
# Return all saved text as a string.
|
|
def saved_get(self):
|
|
return self.formatter.writer.saved_get()
|
|
|
|
class IdxHlpHtmlParser(HelpHtmlParser):
|
|
# nothing special here, seems enough with parent class
|
|
pass
|
|
|
|
class TocHlpHtmlParser(HelpHtmlParser):
|
|
|
|
def start_dl(self, atr_val):
|
|
self.begin_group()
|
|
self.tab('<UL>\n')
|
|
|
|
def end_dl(self):
|
|
self.finish_group()
|
|
self.tab('</UL>\n')
|
|
|
|
def start_ul(self, atr_val):
|
|
self.begin_group()
|
|
self.tab('<UL>\n')
|
|
|
|
def end_ul(self):
|
|
self.finish_group()
|
|
self.tab('</UL>\n')
|
|
|
|
def do_li(self, atr_val):
|
|
# no trailing newline on purpose!
|
|
self.tab("<LI>")
|
|
|
|
def index(path, indexpage, output):
|
|
parser = IdxHlpHtmlParser(AbstractFormatter(AlmostNullWriter()),
|
|
path, output)
|
|
f = open(path + '/' + indexpage)
|
|
parser.feed(f.read())
|
|
parser.close()
|
|
f.close()
|
|
|
|
def content(path, contentpage, output):
|
|
parser = TocHlpHtmlParser(AbstractFormatter(AlmostNullWriter()),
|
|
path, output)
|
|
f = open(path + '/' + contentpage)
|
|
parser.feed(f.read())
|
|
parser.close()
|
|
f.close()
|
|
|
|
def do_index(library, output):
|
|
output.write('<UL>\n')
|
|
for book in library:
|
|
print '\t', book.title, '-', book.indexpage
|
|
if book.indexpage:
|
|
index(book.directory, book.indexpage, output)
|
|
output.write('</UL>\n')
|
|
|
|
def do_content(library, version, output):
|
|
output.write(contents_header)
|
|
for book in library:
|
|
print '\t', book.title, '-', book.firstpage
|
|
path = book.directory + "/" + book.firstpage
|
|
output.write('<LI>')
|
|
output.write(object_sitemap % (book.title, path))
|
|
if book.contentpage:
|
|
content(book.directory, book.contentpage, output)
|
|
output.write(contents_footer)
|
|
|
|
# Fill in the [FILES] section of the project (.hhp) file.
|
|
# 'library' is the list of directory description tuples from
|
|
# supported_libraries for the version of the docs getting generated.
|
|
def do_project(library, output, arch, version):
|
|
output.write(project_template % locals())
|
|
pathseen = {}
|
|
for book in library:
|
|
directory = book.directory
|
|
path = directory + '\\%s\n'
|
|
for page in os.listdir(directory):
|
|
if page.endswith('.html') or page.endswith('.css'):
|
|
fullpath = path % page
|
|
if fullpath not in pathseen:
|
|
output.write(fullpath)
|
|
pathseen[fullpath] = True
|
|
|
|
def openfile(file):
|
|
try:
|
|
p = open(file, "w")
|
|
except IOError, msg:
|
|
print file, ":", msg
|
|
sys.exit(1)
|
|
return p
|
|
|
|
def usage():
|
|
print usage_mode
|
|
sys.exit(0)
|
|
|
|
def do_it(args = None):
|
|
if not args:
|
|
args = sys.argv[1:]
|
|
|
|
if not args:
|
|
usage()
|
|
|
|
try:
|
|
optlist, args = getopt.getopt(args, 'ckpv:')
|
|
except getopt.error, msg:
|
|
print msg
|
|
usage()
|
|
|
|
if not args or len(args) > 1:
|
|
usage()
|
|
arch = args[0]
|
|
|
|
version = None
|
|
for opt in optlist:
|
|
if opt[0] == '-v':
|
|
version = opt[1]
|
|
break
|
|
if not version:
|
|
usage()
|
|
|
|
library = supported_libraries[version]
|
|
|
|
if not (('-p','') in optlist):
|
|
fname = arch + '.stp'
|
|
f = openfile(fname)
|
|
print "Building stoplist", fname, "..."
|
|
words = stop_list.split()
|
|
words.sort()
|
|
for word in words:
|
|
print >> f, word
|
|
f.close()
|
|
|
|
f = openfile(arch + '.hhp')
|
|
print "Building Project..."
|
|
do_project(library, f, arch, version)
|
|
if version == '2.0.0':
|
|
for image in os.listdir('icons'):
|
|
f.write('icons'+ '\\' + image + '\n')
|
|
|
|
f.close()
|
|
|
|
if not (('-c','') in optlist):
|
|
f = openfile(arch + '.hhc')
|
|
print "Building Table of Content..."
|
|
do_content(library, version, f)
|
|
f.close()
|
|
|
|
if not (('-k','') in optlist):
|
|
f = openfile(arch + '.hhk')
|
|
print "Building Index..."
|
|
do_index(library, f)
|
|
f.close()
|
|
|
|
if __name__ == '__main__':
|
|
do_it()
|