cpython/Doc/tools/mkhowto

#! /usr/bin/env python
#  -*- Python -*-
"""usage: %(program)s [options...] file ...

Options specifying formats to build:
    --html		HyperText Markup Language
    --pdf		Portable Document Format (default)
    --ps		PostScript
    --dvi		'DeVice Indepentent' format from TeX
    --text		ASCII text (requires lynx)

    More than one output format may be specified, or --all.

HTML options:
    --address, -a	Specify an address for page footers.
    --link		Specify the number of levels to include on each page.
    --split, -s		Specify a section level for page splitting, default: %(max_split_depth)s.
    --iconserver, -i	Specify location of icons (default: ../).
    --image-type	Specify the image type to use in HTML output;
                        values: gif (default), png.
    --numeric           Don't rename the HTML files; just keep node#.html for
                        the filenames.

Other options:
    --a4		Format for A4 paper.
    --letter		Format for US letter paper (the default).
    --help, -H		Show this text.
    --logging, -l	Log stdout and stderr to a file (*.how).
    --debugging, -D	Echo commands as they are executed.
    --keep, -k		Keep temporary files around.
    --quiet, -q		Do not print command output to stdout.
			(stderr is also lost,  sorry; see *.how for errors)
"""

import getopt
import glob
import os
import re
import shutil
import string
import sys
import tempfile


MYDIR = os.path.normpath(os.path.join(os.getcwd(), sys.path[0]))
TOPDIR = os.path.normpath(os.path.join(MYDIR, os.pardir))

ISTFILE = os.path.join(TOPDIR, "texinputs", "python.ist")
NODE2LABEL_SCRIPT = os.path.join(MYDIR, "node2label.pl")
L2H_INIT_FILE = os.path.join(TOPDIR, "perl", "l2hinit.perl")

BIBTEX_BINARY = "bibtex"
DVIPS_BINARY = "dvips"
LATEX_BINARY = "latex"
LATEX2HTML_BINARY = "latex2html"
LYNX_BINARY = "lynx"
MAKEINDEX_BINARY = "makeindex"
PDFLATEX_BINARY = "pdflatex"
PERL_BINARY = "perl"
PYTHON_BINARY = "python"


def usage(options):
    print __doc__ % options

def error(options, message, err=2):
    sys.stdout = sys.stderr
    print message
    print
    usage(options)
    sys.exit(2)


class Options:
    program = os.path.basename(sys.argv[0])
    #
    address = ''
    debugging = 0
    discard_temps = 1
    have_temps = 0
    icon_server = None
    image_type = "gif"
    logging = 0
    max_link_depth = 3
    max_split_depth = 6
    paper = "letter"
    quiet = 0
    runs = 0
    numeric = 0
    style_file = os.path.join(TOPDIR, "html", "style.css")
    about_file = os.path.join(TOPDIR, "html", "about.dat")
    #
    DEFAULT_FORMATS = ("pdf",)
    ALL_FORMATS = ("dvi", "html", "pdf", "ps", "text")

    def __init__(self):
        self.formats = []

    def __getitem__(self, key):
        # This is used when formatting the usage message.
        try:
            return getattr(self, key)
        except AttributeError:
            raise KeyError, key

    def parse(self, args):
        opts, args = getopt.getopt(args, "Hi:a:s:lDkqr:",
                                   ["all", "postscript", "help", "iconserver=",
                                    "address=", "a4", "letter",
                                    "link=", "split=", "logging", "debugging",
                                    "keep", "quiet", "runs=", "image-type=",
                                    "about=", "numeric"]
                                   + list(self.ALL_FORMATS))
        for opt, arg in opts:
            if opt == "--all":
                self.formats = list(self.ALL_FORMATS)
            elif opt in ("-H", "--help"):
                usage(self)
                sys.exit()
            elif opt == "--iconserver":
                self.icon_server = arg
            elif opt in ("-a", "--address"):
                self.address = arg
            elif opt == "--a4":
                self.paper = "a4"
            elif opt == "--letter":
                self.paper = "letter"
            elif opt == "--link":
                self.max_link_depth = int(arg)
            elif opt in ("-s", "--split"):
                self.max_split_depth = int(arg)
            elif opt in ("-l", "--logging"):
                self.logging = self.logging + 1
            elif opt in ("-D", "--debugging"):
                self.debugging = self.debugging + 1
            elif opt in ("-k", "--keep"):
                self.discard_temps = 0
            elif opt in ("-q", "--quiet"):
                self.quiet = 1
            elif opt in ("-r", "--runs"):
                self.runs = int(arg)
            elif opt == "--image-type":
                self.image_type = arg
            elif opt == "--about":
                # always make this absolute:
                self.about_file = os.path.normpath(
                    os.path.join(os.getcwd(), arg))
            elif opt == "--numeric":
                self.numeric = 1
            #
            # Format specifiers:
            #
            elif opt[2:] in self.ALL_FORMATS:
                self.add_format(opt[2:])
            elif opt == "--postscript":
                # synonym for --ps
                self.add_format("ps")
        self.initialize()
        #
        # return the args to allow the caller access:
        #
        return args

    def add_format(self, format):
        """Add a format to the formats list if not present."""
        if not format in self.formats:
            self.formats.append(format)

    def initialize(self):
        """Complete initialization.  This is needed if parse() isn't used."""
        # add the default format if no formats were specified:
        if not self.formats:
            self.formats = self.DEFAULT_FORMATS
        # determine the base set of texinputs directories:
        texinputs = string.split(os.environ.get("TEXINPUTS", ""), os.pathsep)
        if not texinputs:
            texinputs = ['']
        self.base_texinputs = [
            os.path.join(TOPDIR, "paper-" + self.paper),
            os.path.join(TOPDIR, "texinputs"),
            ] + texinputs


class Job:
    latex_runs = 0

    def __init__(self, options, path):
        self.options = options
        self.doctype = get_doctype(path)
        self.filedir, self.doc = split_pathname(path)
        self.log_filename = self.doc + ".how"
        if os.path.exists(self.log_filename):
            os.unlink(self.log_filename)
        if os.path.exists(self.doc + ".l2h"):
            self.l2h_aux_init_file = tempfile.mktemp()
        else:
            self.l2h_aux_init_file = self.doc + ".l2h"
        self.write_l2h_aux_init_file()

    def build(self):
        self.setup_texinputs()
        formats = self.options.formats
        if "dvi" in formats or "ps" in formats:
            self.build_dvi()
        if "pdf" in formats:
            self.build_pdf()
        if "ps" in formats:
            self.build_ps()
        if "html" in formats:
            self.require_temps()
            self.build_html(self.doc)
            if self.options.icon_server == ".":
                pattern = os.path.join(TOPDIR, "html", "icons",
                                       "*." + self.options.image_type)
                imgs = glob.glob(pattern)
                if not imgs:
                    self.warning(
                        "Could not locate support images of type %s."
                        % `self.options.image_type`)
                for fn in imgs:
                    new_fn = os.path.join(self.doc, os.path.basename(fn))
                    shutil.copyfile(fn, new_fn)
        if "text" in formats:
            self.require_temps()
            tempdir = self.doc
            need_html = "html" not in formats
            if self.options.max_split_depth != 1:
                fp = open(self.l2h_aux_init_file, "a")
                fp.write("# re-hack this file for --text:\n")
                l2hoption(fp, "MAX_SPLIT_DEPTH", "1")
                fp.write("1;\n")
                fp.close()
                tempdir = self.doc + "-temp-html"
                need_html = 1
            if need_html:
                self.build_html(tempdir, max_split_depth=1)
            self.build_text(tempdir)
        if self.options.discard_temps:
            self.cleanup()

    def setup_texinputs(self):
        texinputs = [self.filedir] + list(self.options.base_texinputs)
        os.environ["TEXINPUTS"] = string.join(texinputs, os.pathsep)
        self.message("TEXINPUTS=" + os.environ["TEXINPUTS"])

    def build_aux(self, binary=None):
        if binary is None:
            binary = LATEX_BINARY
        new_index(   "%s.ind" % self.doc, "genindex")
        new_index("mod%s.ind" % self.doc, "modindex")
        self.run("%s %s" % (binary, self.doc))
        self.use_bibtex = check_for_bibtex(self.doc + ".aux")
        self.latex_runs = 1

    def build_dvi(self):
        self.use_latex(LATEX_BINARY)

    def build_pdf(self):
        self.use_latex(PDFLATEX_BINARY)

    def use_latex(self, binary):
        self.require_temps(binary=binary)
        if os.path.isfile("mod%s.idx" % self.doc):
            self.run("%s mod%s.idx" % (MAKEINDEX_BINARY, self.doc))
        if os.path.isfile(self.doc + ".idx"):
            # call to Doc/tools/fix_hack omitted; doesn't appear necessary
            self.run("%s %s.idx" % (MAKEINDEX_BINARY, self.doc))
            import indfix
            indfix.process(self.doc + ".ind")
        if self.use_bibtex:
            self.run("%s %s" % (BIBTEX_BINARY, self.doc))
        self.process_synopsis_files()
        #
        # let the doctype-specific handler do some intermediate work:
        #
        if self.doctype == "manual":
            self.use_latex_manual(binary=binary)
        elif self.doctype == "howto":
            self.use_latex_howto(binary=binary)
        else:
            raise RuntimeError, "unsupported document type: " + self.doctype
        #
        # and now finish it off:
        #
        if os.path.isfile(self.doc + ".toc") and binary == PDFLATEX_BINARY:
            import toc2bkm
            toc2bkm.process(self.doc + ".toc", self.doc + ".bkm", "section")
        if self.use_bibtex:
            self.run("%s %s" % (BIBTEX_BINARY, self.doc))
        self.run("%s %s" % (binary, self.doc))

    def use_latex_howto(self, binary):
        self.run("%s %s" % (binary, self.doc))
        if os.path.isfile("mod%s.idx" % self.doc):
            self.run("%s -s %s mod%s.idx"
                     % (MAKEINDEX_BINARY, ISTFILE, self.doc))
        if os.path.isfile(self.doc + ".idx"):
            self.run("%s -s %s %s.idx" % (MAKEINDEX_BINARY, ISTFILE, self.doc))
        self.process_synopsis_files()

    def use_latex_manual(self, binary):
        pass

    def process_synopsis_files(self):
        synopsis_files = glob.glob(self.doc + "*.syn")
        for path in synopsis_files:
            uniqify_module_table(path)

    def build_ps(self):
        self.run("%s -N0 -o %s.ps %s" % (DVIPS_BINARY, self.doc, self.doc))

    def build_html(self, builddir=None, max_split_depth=None):
        if builddir is None:
            builddir = self.doc
        if max_split_depth is None:
            max_split_depth = self.options.max_split_depth
        texfile = None
        for p in string.split(os.environ["TEXINPUTS"], os.pathsep):
            fn = os.path.join(p, self.doc + ".tex")
            if os.path.isfile(fn):
                texfile = fn
                break
        if not texfile:
            self.warning("Could not locate %s.tex; aborting." % self.doc)
            sys.exit(1)
        # remove leading ./ (or equiv.); might avoid problems w/ dvips
        if texfile[:2] == os.curdir + os.sep:
            texfile = texfile[2:]
        # build the command line and run LaTeX2HTML:
        if not os.path.isdir(builddir):
            os.mkdir(builddir)
        args = [LATEX2HTML_BINARY,
                "-init_file", self.l2h_aux_init_file,
                "-dir", builddir,
                texfile
                ]
        self.run(string.join(args))     # XXX need quoting!
        # ... postprocess
        shutil.copyfile(self.options.style_file,
                        os.path.join(builddir, self.doc + ".css"))
        shutil.copyfile(os.path.join(builddir, self.doc + ".html"),
                        os.path.join(builddir, "index.html"))
        if max_split_depth != 1 and not self.options.numeric:
            pwd = os.getcwd()
            try:
                os.chdir(builddir)
                self.run("%s %s *.html" % (PERL_BINARY, NODE2LABEL_SCRIPT))
            finally:
                os.chdir(pwd)

    def build_text(self, tempdir=None):
        if tempdir is None:
            tempdir = self.doc
        indexfile = os.path.join(tempdir, "index.html")
        self.run("%s -nolist -dump %s >%s.txt"
                 % (LYNX_BINARY, indexfile, self.doc))

    def require_temps(self, binary=None):
        if not self.latex_runs:
            self.build_aux(binary=binary)

    def write_l2h_aux_init_file(self):
        fp = open(self.l2h_aux_init_file, "w")
        fp.write(open(L2H_INIT_FILE).read())
        fp.write("\n"
                 "# auxillary init file for latex2html\n"
                 "# generated by mkhowto\n"
                 "push (@INC, '%s');\n"
                 "$NO_AUTO_LINK = 1;\n"
                 % os.path.dirname(L2H_INIT_FILE)
                 )
        options = self.options
        l2hoption(fp, "ABOUT_FILE", options.about_file)
        l2hoption(fp, "ICONSERVER", options.icon_server)
        l2hoption(fp, "IMAGE_TYPE", options.image_type)
        l2hoption(fp, "ADDRESS", options.address)
        l2hoption(fp, "MAX_LINK_DEPTH", options.max_link_depth)
        l2hoption(fp, "MAX_SPLIT_DEPTH", options.max_split_depth)
        fp.write("1;\n")
        fp.close()

    def cleanup(self):
        self.__have_temps = 0
        for pattern in ("%s.aux", "%s.log", "%s.out", "%s.toc", "%s.bkm",
                        "%s.idx", "%s.ilg", "%s.ind", "%s.pla",
                        "%s.bbl", "%s.blg",
                        "mod%s.idx", "mod%s.ind", "mod%s.ilg",
                        ):
            safe_unlink(pattern % self.doc)
        map(safe_unlink, glob.glob(self.doc + "*.syn"))
        for spec in ("IMG*", "*.pl", "WARNINGS", "index.dat", "modindex.dat"):
            pattern = os.path.join(self.doc, spec)
            map(safe_unlink, glob.glob(pattern))
        if "dvi" not in self.options.formats:
            safe_unlink(self.doc + ".dvi")
        if os.path.isdir(self.doc + "-temp-html"):
            shutil.rmtree(self.doc + "-temp-html", ignore_errors=1)
        if not self.options.logging:
            os.unlink(self.log_filename)
        if not self.options.debugging:
            os.unlink(self.l2h_aux_init_file)

    def run(self, command):
        self.message(command)
        rc = os.system("(%s) </dev/null >>%s 2>&1"
                       % (command, self.log_filename))
        if rc:
            self.warning(
                "Session transcript and error messages are in %s."
                % self.log_filename)
            sys.exit(rc)

    def message(self, msg):
        msg = "+++ " + msg
        if not self.options.quiet:
            print msg
        self.log(msg + "\n")

    def warning(self, msg):
        msg = "*** %s\n" % msg
        sys.stderr.write(msg)
        self.log(msg)

    def log(self, msg):
        fp = open(self.log_filename, "a")
        fp.write(msg)
        fp.close()


def safe_unlink(path):
    try:
        os.unlink(path)
    except os.error:
        pass


def split_pathname(path):
    path = os.path.normpath(os.path.join(os.getcwd(), path))
    dirname, basename = os.path.split(path)
    if basename[-4:] == ".tex":
        basename = basename[:-4]
    return dirname, basename


_doctype_rx = re.compile(r"\\documentclass(?:\[[^]]*\])?{([a-zA-Z]*)}")
def get_doctype(path):
    fp = open(path)
    doctype = None
    while 1:
        line = fp.readline()
        if not line:
            break
        m = _doctype_rx.match(line)
        if m:
            doctype = m.group(1)
            break
    fp.close()
    return doctype


def main():
    options = Options()
    try:
        args = options.parse(sys.argv[1:])
    except getopt.error, msg:
        error(options, msg)
    if not args:
        # attempt to locate single .tex file in current directory:
        args = glob.glob("*.tex")
        if not args:
            error(options, "No file to process.")
        if len(args) > 1:
            error(options, "Could not deduce which files should be processed.")
    #
    # parameters are processed, let's go!
    #
    for path in args:
        Job(options, path).build()


def l2hoption(fp, option, value):
    if value:
        fp.write('$%s = "%s";\n' % (option, string_to_perl(str(value))))


_to_perl = {}
for c in map(chr, range(1, 256)):
    _to_perl[c] = c
_to_perl["@"] = "\\@"
_to_perl["$"] = "\\$"
_to_perl['"'] = '\\"'

def string_to_perl(s):
    return string.join(map(_to_perl.get, s), '')


def check_for_bibtex(filename):
    fp = open(filename)
    pos = string.find(fp.read(), r"\bibdata{")
    fp.close()
    return pos >= 0

def uniqify_module_table(filename):
    lines = open(filename).readlines()
    if len(lines) > 1:
        if lines[-1] == lines[-2]:
            del lines[-1]
    open(filename, "w").writelines(lines)


def new_index(filename, label="genindex"):
    fp = open(filename, "w")
    fp.write(r"""\
\begin{theindex}
\label{%s}
\end{theindex}
""" % label)
    fp.close()


if __name__ == "__main__":
    main()