cpython/Doc/tools/prechm.py

504 lines
16 KiB
Python

"""
Makes the necesary files to convert from plain html of
Python 1.5 and 1.5.x Documentation to
Microsoft HTML Help format version 1.1
Doesn't change the html's docs.
by hernan.foffani@iname.com
no copyright and no responsabilities.
modified by Dale Nagata for Python 1.5.2
Renamed from make_chm.py to prechm.py, and checked into the Python
project, 19-Apr-2002 by Tim Peters. Assorted modifications by Tim
and Fred Drake. Obtained from Robin Dunn's .chm packaging of the
Python 2.2 docs, at <http://alldunn.com/python/>.
"""
import sys
import os
from formatter import NullWriter, AbstractFormatter
from htmllib import HTMLParser
import getopt
import cgi
usage_mode = '''
Usage: prechm.py [-c] [-k] [-p] [-v 1.5[.x]] filename
-c: does not build filename.hhc (Table of Contents)
-k: does not build filename.hhk (Index)
-p: does not build filename.hhp (Project File)
-v 1.5[.x]: makes help for the python 1.5[.x] docs
(default is python 1.5.2 docs)
'''
# Project file (*.hhp) template. 'arch' is the file basename (like
# the pythlp in pythlp.hhp); 'version' is the doc version number (like
# the 2.2 in Python 2.2).
# The magical numbers in the long line under [WINDOWS] set most of the
# user-visible features (visible buttons, tabs, etc).
# About 0x10384e: This defines the buttons in the help viewer. The
# following defns are taken from htmlhelp.h. Not all possibilities
# actually work, and not all those that work are available from the Help
# Workshop GUI. In particular, the Zoom/Font button works and is not
# available from the GUI. The ones we're using are marked with 'x':
#
# 0x000002 Hide/Show x
# 0x000004 Back x
# 0x000008 Forward x
# 0x000010 Stop
# 0x000020 Refresh
# 0x000040 Home x
# 0x000080 Forward
# 0x000100 Back
# 0x000200 Notes
# 0x000400 Contents
# 0x000800 Locate x
# 0x001000 Options x
# 0x002000 Print x
# 0x004000 Index
# 0x008000 Search
# 0x010000 History
# 0x020000 Favorites
# 0x040000 Jump 1
# 0x080000 Jump 2
# 0x100000 Zoom/Font x
# 0x200000 TOC Next
# 0x400000 TOC Prev
project_template = '''
[OPTIONS]
Compiled file=%(arch)s.chm
Contents file=%(arch)s.hhc
Default Window=%(arch)s
Default topic=index.html
Display compile progress=No
Full text search stop list file=%(arch)s.stp
Full-text search=Yes
Index file=%(arch)s.hhk
Language=0x409
Title=Python %(version)s Documentation
[WINDOWS]
%(arch)s="Python %(version)s Documentation","%(arch)s.hhc","%(arch)s.hhk",\
"index.html","index.html",,,,,0x63520,220,0x10384e,[0,0,1024,768],,,,,,,0
[FILES]
'''
contents_header = '''\
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
<HTML>
<HEAD>
<meta name="GENERATOR" content="Microsoft&reg; HTML Help Workshop 4.1">
<!-- Sitemap 1.0 -->
</HEAD><BODY>
<OBJECT type="text/site properties">
<param name="Window Styles" value="0x801227">
<param name="ImageType" value="Folder">
</OBJECT>
<UL>
'''
contents_footer = '''\
</UL></BODY></HTML>
'''
object_sitemap = '''\
<OBJECT type="text/sitemap">
<param name="Name" value="%s">
<param name="Local" value="%s">
</OBJECT>
'''
# List of words the full text search facility shouldn't index. This
# becomes file ARCH.stp. Note that this list must be pretty small!
# Different versions of the MS docs claim the file has a maximum size of
# 256 or 512 bytes (including \r\n at the end of each line).
# Note that "and", "or", "not" and "near" are operators in the search
# language, so no point indexing them even if we wanted to.
stop_list = '''
a and are as at
be but by
for
if in into is it
near no not
of on or
such
that the their then there these they this to
was will with
'''
# s is a string or None. If None or empty, return None. Else tack '.html'
# on to the end, unless it's already there.
def addhtml(s):
if s:
if not s.endswith('.html'):
s += '.html'
return s
# Convenience class to hold info about "a book" in HTMLHelp terms == a doc
# directory in Python terms.
class Book:
def __init__(self, directory, title, firstpage,
contentpage=None, indexpage=None):
self.directory = directory
self.title = title
self.firstpage = addhtml(firstpage)
self.contentpage = addhtml(contentpage)
self.indexpage = addhtml(indexpage)
# Library Doc list of books:
# each 'book' : (Dir, Title, First page, Content page, Index page)
supported_libraries = {
'2.4':
[
Book('.', 'Main page', 'index'),
Book('.', 'Global Module Index', 'modindex'),
Book('whatsnew', "What's New", 'index', 'contents'),
Book('tut','Tutorial','tut','node2'),
Book('lib','Library Reference','lib','contents','genindex'),
Book('ref','Language Reference','ref','contents','genindex'),
Book('mac','Macintosh Reference','mac','contents','genindex'),
Book('ext','Extending and Embedding','ext','contents'),
Book('api','Python/C API','api','contents','genindex'),
Book('doc','Documenting Python','doc','contents'),
Book('inst','Installing Python Modules', 'inst', 'index'),
Book('dist','Distributing Python Modules', 'dist', 'index', 'genindex'),
],
'2.3':
[
Book('.', 'Main page', 'index'),
Book('.', 'Global Module Index', 'modindex'),
Book('whatsnew', "What's New", 'index', 'contents'),
Book('tut','Tutorial','tut','node2'),
Book('lib','Library Reference','lib','contents','genindex'),
Book('ref','Language Reference','ref','contents','genindex'),
Book('mac','Macintosh Reference','mac','contents','genindex'),
Book('ext','Extending and Embedding','ext','contents'),
Book('api','Python/C API','api','contents','genindex'),
Book('doc','Documenting Python','doc','contents'),
Book('inst','Installing Python Modules', 'inst', 'index'),
Book('dist','Distributing Python Modules', 'dist', 'index'),
],
'2.2':
[
Book('.', 'Main page', 'index'),
Book('.', 'Global Module Index', 'modindex'),
Book('whatsnew', "What's New", 'index', 'contents'),
Book('tut','Tutorial','tut','node2'),
Book('lib','Library Reference','lib','contents','genindex'),
Book('ref','Language Reference','ref','contents','genindex'),
Book('mac','Macintosh Reference','mac','contents','genindex'),
Book('ext','Extending and Embedding','ext','contents'),
Book('api','Python/C API','api','contents','genindex'),
Book('doc','Documenting Python','doc','contents'),
Book('inst','Installing Python Modules', 'inst', 'index'),
Book('dist','Distributing Python Modules', 'dist', 'index'),
],
'2.1.1':
[
Book('.', 'Main page', 'index'),
Book('.', 'Global Module Index', 'modindex'),
Book('tut','Tutorial','tut','node2'),
Book('lib','Library Reference','lib','contents','genindex'),
Book('ref','Language Reference','ref','contents','genindex'),
Book('mac','Macintosh Reference','mac','contents','genindex'),
Book('ext','Extending and Embedding','ext','contents'),
Book('api','Python/C API','api','contents','genindex'),
Book('doc','Documenting Python','doc','contents'),
Book('inst','Installing Python Modules', 'inst', 'index'),
Book('dist','Distributing Python Modules', 'dist', 'index'),
],
'2.0.0':
[
Book('.', 'Global Module Index', 'modindex'),
Book('tut','Tutorial','tut','node2'),
Book('lib','Library Reference','lib','contents','genindex'),
Book('ref','Language Reference','ref','contents','genindex'),
Book('mac','Macintosh Reference','mac','contents','genindex'),
Book('ext','Extending and Embedding','ext','contents'),
Book('api','Python/C API','api','contents','genindex'),
Book('doc','Documenting Python','doc','contents'),
Book('inst','Installing Python Modules', 'inst', 'contents'),
Book('dist','Distributing Python Modules', 'dist', 'contents'),
],
# <dnagata@creo.com> Apr 17/99: library for 1.5.2 version:
# <hernan.foffani@iname.com> May 01/99: library for 1.5.2 (04/30/99):
'1.5.2':
[
Book('tut','Tutorial','tut','node2'),
Book('lib','Library Reference','lib','contents','genindex'),
Book('ref','Language Reference','ref','contents','genindex'),
Book('mac','Macintosh Reference','mac','contents','genindex'),
Book('ext','Extending and Embedding','ext','contents'),
Book('api','Python/C API','api','contents','genindex'),
Book('doc','Documenting Python','doc','contents')
],
# library for 1.5.1 version:
'1.5.1':
[
Book('tut','Tutorial','tut','contents'),
Book('lib','Library Reference','lib','contents','genindex'),
Book('ref','Language Reference','ref-1','ref-2','ref-11'),
Book('ext','Extending and Embedding','ext','contents'),
Book('api','Python/C API','api','contents','genindex')
],
# library for 1.5 version:
'1.5':
[
Book('tut','Tutorial','tut','node1'),
Book('lib','Library Reference','lib','node1','node268'),
Book('ref','Language Reference','ref-1','ref-2','ref-11'),
Book('ext','Extending and Embedding','ext','node1'),
Book('api','Python/C API','api','node1','node48')
]
}
# AlmostNullWriter doesn't print anything; it just arranges to save the
# text sent to send_flowing_data(). This is used to capture the text
# between an anchor begin/end pair, e.g. for TOC entries.
class AlmostNullWriter(NullWriter):
def __init__(self):
NullWriter.__init__(self)
self.saved_clear()
def send_flowing_data(self, data):
stripped = data.strip()
if stripped: # don't bother to save runs of whitespace
self.saved.append(stripped)
# Forget all saved text.
def saved_clear(self):
self.saved = []
# Return all saved text as a string.
def saved_get(self):
return ' '.join(self.saved)
class HelpHtmlParser(HTMLParser):
def __init__(self, formatter, path, output):
HTMLParser.__init__(self, formatter)
self.path = path # relative path
self.ft = output # output file
self.indent = 0 # number of tabs for pretty printing of files
self.proc = False # True when actively processing, else False
# (headers, footers, etc)
# XXX This shouldn't need to be a stack -- anchors shouldn't nest.
# XXX See SF bug <http://www.python.org/sf/546579>.
self.hrefstack = [] # stack of hrefs from anchor begins
def begin_group(self):
self.indent += 1
self.proc = True
def finish_group(self):
self.indent -= 1
# stop processing when back to top level
self.proc = self.indent > 0
def anchor_bgn(self, href, name, type):
if self.proc:
# XXX See SF bug <http://www.python.org/sf/546579>.
# XXX index.html for the 2.2.1 language reference manual contains
# XXX nested <a></a> tags in the entry for the section on blank
# XXX lines. We want to ignore the nested part completely.
if len(self.hrefstack) == 0:
self.saved_clear()
self.hrefstack.append(href)
def anchor_end(self):
if self.proc:
# XXX See XXX above.
if self.hrefstack:
title = cgi.escape(self.saved_get(), True)
path = self.path + '/' + self.hrefstack.pop()
self.tab(object_sitemap % (title, path))
def start_dl(self, atr_val):
self.begin_group()
def end_dl(self):
self.finish_group()
def do_dt(self, atr_val):
# no trailing newline on purpose!
self.tab("<LI>")
# Write text to output file.
def write(self, text):
self.ft.write(text)
# Write text to output file after indenting by self.indent tabs.
def tab(self, text=''):
self.write('\t' * self.indent)
if text:
self.write(text)
# Forget all saved text.
def saved_clear(self):
self.formatter.writer.saved_clear()
# Return all saved text as a string.
def saved_get(self):
return self.formatter.writer.saved_get()
class IdxHlpHtmlParser(HelpHtmlParser):
# nothing special here, seems enough with parent class
pass
class TocHlpHtmlParser(HelpHtmlParser):
def start_dl(self, atr_val):
self.begin_group()
self.tab('<UL>\n')
def end_dl(self):
self.finish_group()
self.tab('</UL>\n')
def start_ul(self, atr_val):
self.begin_group()
self.tab('<UL>\n')
def end_ul(self):
self.finish_group()
self.tab('</UL>\n')
def do_li(self, atr_val):
# no trailing newline on purpose!
self.tab("<LI>")
def index(path, indexpage, output):
parser = IdxHlpHtmlParser(AbstractFormatter(AlmostNullWriter()),
path, output)
f = open(path + '/' + indexpage)
parser.feed(f.read())
parser.close()
f.close()
def content(path, contentpage, output):
parser = TocHlpHtmlParser(AbstractFormatter(AlmostNullWriter()),
path, output)
f = open(path + '/' + contentpage)
parser.feed(f.read())
parser.close()
f.close()
def do_index(library, output):
output.write('<UL>\n')
for book in library:
print '\t', book.title, '-', book.indexpage
if book.indexpage:
index(book.directory, book.indexpage, output)
output.write('</UL>\n')
def do_content(library, version, output):
output.write(contents_header)
for book in library:
print '\t', book.title, '-', book.firstpage
path = book.directory + "/" + book.firstpage
output.write('<LI>')
output.write(object_sitemap % (book.title, path))
if book.contentpage:
content(book.directory, book.contentpage, output)
output.write(contents_footer)
# Fill in the [FILES] section of the project (.hhp) file.
# 'library' is the list of directory description tuples from
# supported_libraries for the version of the docs getting generated.
def do_project(library, output, arch, version):
output.write(project_template % locals())
pathseen = {}
for book in library:
directory = book.directory
path = directory + '\\%s\n'
for page in os.listdir(directory):
if page.endswith('.html') or page.endswith('.css'):
fullpath = path % page
if fullpath not in pathseen:
output.write(fullpath)
pathseen[fullpath] = True
def openfile(file):
try:
p = open(file, "w")
except IOError, msg:
print file, ":", msg
sys.exit(1)
return p
def usage():
print usage_mode
sys.exit(0)
def do_it(args = None):
if not args:
args = sys.argv[1:]
if not args:
usage()
try:
optlist, args = getopt.getopt(args, 'ckpv:')
except getopt.error, msg:
print msg
usage()
if not args or len(args) > 1:
usage()
arch = args[0]
version = None
for opt in optlist:
if opt[0] == '-v':
version = opt[1]
break
if not version:
usage()
library = supported_libraries[version]
if not (('-p','') in optlist):
fname = arch + '.stp'
f = openfile(fname)
print "Building stoplist", fname, "..."
words = stop_list.split()
words.sort()
for word in words:
print >> f, word
f.close()
f = openfile(arch + '.hhp')
print "Building Project..."
do_project(library, f, arch, version)
if version == '2.0.0':
for image in os.listdir('icons'):
f.write('icons'+ '\\' + image + '\n')
f.close()
if not (('-c','') in optlist):
f = openfile(arch + '.hhc')
print "Building Table of Content..."
do_content(library, version, f)
f.close()
if not (('-k','') in optlist):
f = openfile(arch + '.hhk')
print "Building Index..."
do_index(library, f)
f.close()
if __name__ == '__main__':
do_it()