#! /usr/bin/env python # -*- Python -*- """usage: %(program)s [options...] file ... Options specifying formats to build: --html HyperText Markup Language (default) --pdf Portable Document Format --ps PostScript --dvi 'DeVice Indepentent' format from TeX --text ASCII text (requires lynx) More than one output format may be specified, or --all. HTML options: --address, -a Specify an address for page footers. --dir Specify the directory for HTML output. --link Specify the number of levels to include on each page. --split, -s Specify a section level for page splitting, default: %(max_split_depth)s. --iconserver, -i Specify location of icons (default: ./). --image-type Specify the image type to use in HTML output; values: gif, png (default). --numeric Don't rename the HTML files; just keep node#.html for the filenames. --style Specify the CSS file to use for the output (filename, not a URL). --up-link URL to a parent document. --up-title Title of a parent document. --favicon Icon to display in the browsers location bar. Other options: --a4 Format for A4 paper. --letter Format for US letter paper (the default). --help, -H Show this text. --logging, -l Log stdout and stderr to a file (*.how). --debugging, -D Echo commands as they are executed. --keep, -k Keep temporary files around. --quiet, -q Do not print command output to stdout. (stderr is also lost, sorry; see *.how for errors) """ import getopt import glob import os import re import shutil import sys MYDIR = os.path.abspath(sys.path[0]) TOPDIR = os.path.dirname(MYDIR) ISTFILE = os.path.join(TOPDIR, "texinputs", "python.ist") NODE2LABEL_SCRIPT = os.path.join(MYDIR, "node2label.pl") L2H_INIT_FILE = os.path.join(TOPDIR, "perl", "l2hinit.perl") BIBTEX_BINARY = "bibtex" DVIPS_BINARY = "dvips" LATEX_BINARY = "latex" LATEX2HTML_BINARY = "latex2html" LYNX_BINARY = "lynx" MAKEINDEX_BINARY = "makeindex" PDFLATEX_BINARY = "pdflatex" PERL_BINARY = "perl" PYTHON_BINARY = "python" def usage(options, file): print >>file, __doc__ % options def error(options, message, err=2): print >>sys.stderr, message print >>sys.stderr usage(options, sys.stderr) sys.exit(2) class Options: program = os.path.basename(sys.argv[0]) # address = '' builddir = None debugging = 0 discard_temps = 1 have_temps = 0 icon_server = "." image_type = "png" logging = 0 max_link_depth = 3 max_split_depth = 6 paper = "letter" quiet = 0 runs = 0 numeric = 0 global_module_index = None style_file = os.path.join(TOPDIR, "html", "style.css") about_file = os.path.join(TOPDIR, "html", "about.dat") up_link = None up_title = None favicon = None # # 'dvips_safe' is a weird option. It is used mostly to make # LaTeX2HTML not try to be too smart about protecting the user # from a bad version of dvips -- some versions would core dump if # the path to the source DVI contained a dot, and it's appearantly # difficult to determine if the version available has that bug. # This option gets set when PostScript output is requested # (because we're going to run dvips regardless, and we'll either # know it succeeds before LaTeX2HTML is run, or we'll have # detected the failure and bailed), or the user asserts that it's # safe from the command line. # # So, why does LaTeX2HTML think it appropriate to protect the user # from a dvips that's only potentially going to core dump? Only # because they want to avoid doing a lot of work just to have to # bail later with no useful intermediates. Unfortunately, they # bail *before* they know whether dvips will be needed at all. # I've gone around the bush a few times with the LaTeX2HTML # developers over whether this is appropriate behavior, and they # don't seem interested in changing their position. # dvips_safe = 0 # DEFAULT_FORMATS = ("html",) ALL_FORMATS = ("dvi", "html", "pdf", "ps", "text") def __init__(self): self.formats = [] self.l2h_init_files = [] def __getitem__(self, key): # This is used when formatting the usage message. try: return getattr(self, key) except AttributeError: raise KeyError, key def parse(self, args): opts, args = getopt.getopt(args, "Hi:a:s:lDkqr:", ["all", "postscript", "help", "iconserver=", "address=", "a4", "letter", "l2h-init=", "link=", "split=", "logging", "debugging", "keep", "quiet", "runs=", "image-type=", "about=", "numeric", "style=", "paper=", "up-link=", "up-title=", "dir=", "global-module-index=", "dvips-safe", "favicon="] + list(self.ALL_FORMATS)) for opt, arg in opts: if opt == "--all": self.formats = list(self.ALL_FORMATS) self.dvips_safe = "ps" in self.formats elif opt in ("-H", "--help"): usage(self, sys.stdout) sys.exit() elif opt == "--iconserver": self.icon_server = arg elif opt in ("-a", "--address"): self.address = arg elif opt == "--a4": self.paper = "a4" elif opt == "--letter": self.paper = "letter" elif opt == "--link": self.max_link_depth = int(arg) elif opt in ("-s", "--split"): self.max_split_depth = int(arg) elif opt in ("-l", "--logging"): self.logging = self.logging + 1 elif opt in ("-D", "--debugging"): self.debugging = self.debugging + 1 elif opt in ("-k", "--keep"): self.discard_temps = 0 elif opt in ("-q", "--quiet"): self.quiet = 1 elif opt in ("-r", "--runs"): self.runs = int(arg) elif opt == "--image-type": self.image_type = arg elif opt == "--about": # always make this absolute: self.about_file = os.path.normpath( os.path.abspath(arg)) elif opt == "--numeric": self.numeric = 1 elif opt == "--style": self.style_file = os.path.abspath(arg) elif opt == "--l2h-init": self.l2h_init_files.append(os.path.abspath(arg)) elif opt == "--favicon": self.favicon = arg elif opt == "--up-link": self.up_link = arg elif opt == "--up-title": self.up_title = arg elif opt == "--global-module-index": self.global_module_index = arg elif opt == "--dir": if os.sep == "\\": arg = re.sub("/", "\\\\", arg) self.builddir = os.path.expanduser(arg) elif opt == "--paper": self.paper = arg elif opt == "--dvips-safe": self.dvips_safe = 1 # # Format specifiers: # elif opt[2:] in self.ALL_FORMATS: self.add_format(opt[2:]) elif opt == "--postscript": # synonym for --ps self.add_format("ps") self.initialize() # # return the args to allow the caller access: # return args def add_format(self, format): """Add a format to the formats list if not present.""" if not format in self.formats: if format == "ps": # assume this is safe since we're going to run it anyway self.dvips_safe = 1 self.formats.append(format) def initialize(self): """Complete initialization. This is needed if parse() isn't used.""" # add the default format if no formats were specified: if not self.formats: self.formats = self.DEFAULT_FORMATS # determine the base set of texinputs directories: texinputs = os.environ.get("TEXINPUTS", "").split(os.pathsep) if not texinputs: texinputs = [''] mydirs = [os.path.join(TOPDIR, "paper-" + self.paper), os.path.join(TOPDIR, "texinputs"), ] if '' in texinputs: i = texinputs.index('') texinputs[i:i] = mydirs else: texinputs += mydirs self.base_texinputs = texinputs if self.builddir: self.builddir = os.path.abspath(self.builddir) class Job: latex_runs = 0 def __init__(self, options, path): self.options = options self.doctype = get_doctype(path) self.filedir, self.doc = split_pathname(path) self.builddir = os.path.abspath(options.builddir or self.doc) if ("html" in options.formats or "text" in options.formats): if not os.path.exists(self.builddir): os.mkdir(self.builddir) self.log_filename = os.path.join(self.builddir, self.doc + ".how") else: self.log_filename = os.path.abspath(self.doc + ".how") if os.path.exists(self.log_filename): os.unlink(self.log_filename) l2hconf = self.doc + ".l2h" if os.path.exists(l2hconf): if os.path.exists(l2hconf + "~"): os.unlink(l2hconf + "~") os.rename(l2hconf, l2hconf + "~") self.l2h_aux_init_file = self.doc + ".l2h" self.write_l2h_aux_init_file() def build(self): self.setup_texinputs() formats = self.options.formats if "dvi" in formats or "ps" in formats: self.build_dvi() if "pdf" in formats: self.build_pdf() if "ps" in formats: self.build_ps() if "html" in formats: self.require_temps() self.build_html(self.builddir) if self.options.icon_server == ".": pattern = os.path.join(TOPDIR, "html", "icons", "*." + self.options.image_type) imgs = glob.glob(pattern) if not imgs: self.warning( "Could not locate support images of type %s." % `self.options.image_type`) for fn in imgs: new_fn = os.path.join(self.builddir, os.path.basename(fn)) shutil.copyfile(fn, new_fn) if "text" in formats: self.require_temps() tempdir = self.doc need_html = "html" not in formats if self.options.max_split_depth != 1: fp = open(self.l2h_aux_init_file, "a") fp.write("# re-hack this file for --text:\n") l2hoption(fp, "MAX_SPLIT_DEPTH", "1") fp.write("1;\n") fp.close() tempdir = self.doc + "-temp-html" need_html = 1 if need_html: self.build_html(tempdir, max_split_depth=1) self.build_text(tempdir) if self.options.discard_temps: self.cleanup() def setup_texinputs(self): texinputs = [self.filedir] + self.options.base_texinputs os.environ["TEXINPUTS"] = os.pathsep.join(texinputs) self.message("TEXINPUTS=" + os.environ["TEXINPUTS"]) def build_aux(self, binary=None): if binary is None: binary = LATEX_BINARY new_index( "%s.ind" % self.doc, "genindex") new_index("mod%s.ind" % self.doc, "modindex") self.run("%s %s" % (binary, self.doc)) self.use_bibtex = check_for_bibtex(self.doc + ".aux") self.latex_runs = 1 def build_dvi(self): self.use_latex(LATEX_BINARY) def build_pdf(self): self.use_latex(PDFLATEX_BINARY) def use_latex(self, binary): self.require_temps(binary=binary) if self.latex_runs < 2: if os.path.isfile("mod%s.idx" % self.doc): self.run("%s mod%s.idx" % (MAKEINDEX_BINARY, self.doc)) use_indfix = 0 if os.path.isfile(self.doc + ".idx"): use_indfix = 1 # call to Doc/tools/fix_hack omitted; doesn't appear necessary self.run("%s %s.idx" % (MAKEINDEX_BINARY, self.doc)) import indfix indfix.process(self.doc + ".ind") if self.use_bibtex: self.run("%s %s" % (BIBTEX_BINARY, self.doc)) self.process_synopsis_files() self.run("%s %s" % (binary, self.doc)) self.latex_runs = self.latex_runs + 1 if os.path.isfile("mod%s.idx" % self.doc): self.run("%s -s %s mod%s.idx" % (MAKEINDEX_BINARY, ISTFILE, self.doc)) if use_indfix: self.run("%s -s %s %s.idx" % (MAKEINDEX_BINARY, ISTFILE, self.doc)) indfix.process(self.doc + ".ind") self.process_synopsis_files() # # and now finish it off: # if os.path.isfile(self.doc + ".toc") and binary == PDFLATEX_BINARY: import toc2bkm if self.doctype == "manual": bigpart = "chapter" else: bigpart = "section" toc2bkm.process(self.doc + ".toc", self.doc + ".bkm", bigpart) if self.use_bibtex: self.run("%s %s" % (BIBTEX_BINARY, self.doc)) self.run("%s %s" % (binary, self.doc)) self.latex_runs = self.latex_runs + 1 def process_synopsis_files(self): synopsis_files = glob.glob(self.doc + "*.syn") for path in synopsis_files: uniqify_module_table(path) def build_ps(self): self.run("%s -N0 -o %s.ps %s" % (DVIPS_BINARY, self.doc, self.doc)) def build_html(self, builddir, max_split_depth=None): if max_split_depth is None: max_split_depth = self.options.max_split_depth texfile = None for p in os.environ["TEXINPUTS"].split(os.pathsep): fn = os.path.join(p, self.doc + ".tex") if os.path.isfile(fn): texfile = fn break if not texfile: self.warning("Could not locate %s.tex; aborting." % self.doc) sys.exit(1) # remove leading ./ (or equiv.); might avoid problems w/ dvips if texfile[:2] == os.curdir + os.sep: texfile = texfile[2:] # build the command line and run LaTeX2HTML: if not os.path.isdir(builddir): os.mkdir(builddir) else: for fname in glob.glob(os.path.join(builddir, "*.html")): os.unlink(fname) args = [LATEX2HTML_BINARY, "-init_file", self.l2h_aux_init_file, "-dir", builddir, texfile ] self.run(" ".join(args)) # XXX need quoting! # ... postprocess shutil.copyfile(self.options.style_file, os.path.join(builddir, self.doc + ".css")) shutil.copyfile(os.path.join(builddir, self.doc + ".html"), os.path.join(builddir, "index.html")) if max_split_depth != 1: label_file = os.path.join(builddir, "labels.pl") fp = open(label_file) about_node = None target = " = q/about/;\n" x = len(target) while 1: line = fp.readline() if not line: break if line[-x:] == target: line = fp.readline() m = re.search(r"\|(node\d+\.[a-z]+)\|", line) about_node = m.group(1) shutil.copyfile(os.path.join(builddir, about_node), os.path.join(builddir, "about.html")) break if not self.options.numeric: pwd = os.getcwd() try: os.chdir(builddir) self.run("%s %s *.html" % (PERL_BINARY, NODE2LABEL_SCRIPT)) finally: os.chdir(pwd) # These files need to be cleaned up here since builddir there # can be more than one, so we clean each of them. if self.options.discard_temps: for fn in ("images.tex", "images.log", "images.aux"): safe_unlink(os.path.join(builddir, fn)) def build_text(self, tempdir=None): if tempdir is None: tempdir = self.doc indexfile = os.path.join(tempdir, "index.html") self.run("%s -nolist -dump %s >%s.txt" % (LYNX_BINARY, indexfile, self.doc)) def require_temps(self, binary=None): if not self.latex_runs: self.build_aux(binary=binary) def write_l2h_aux_init_file(self): options = self.options fp = open(self.l2h_aux_init_file, "w") d = string_to_perl(os.path.dirname(L2H_INIT_FILE)) fp.write("package main;\n" "push (@INC, '%s');\n" "$mydir = '%s';\n" % (d, d)) fp.write(open(L2H_INIT_FILE).read()) for filename in options.l2h_init_files: fp.write("\n# initialization code incorporated from:\n# ") fp.write(filename) fp.write("\n") fp.write(open(filename).read()) fp.write("\n" "# auxillary init file for latex2html\n" "# generated by mkhowto\n" "$NO_AUTO_LINK = 1;\n" ) l2hoption(fp, "ABOUT_FILE", options.about_file) l2hoption(fp, "ICONSERVER", options.icon_server) l2hoption(fp, "IMAGE_TYPE", options.image_type) l2hoption(fp, "ADDRESS", options.address) l2hoption(fp, "MAX_LINK_DEPTH", options.max_link_depth) l2hoption(fp, "MAX_SPLIT_DEPTH", options.max_split_depth) l2hoption(fp, "EXTERNAL_UP_LINK", options.up_link) l2hoption(fp, "EXTERNAL_UP_TITLE", options.up_title) l2hoption(fp, "FAVORITES_ICON", options.favicon) l2hoption(fp, "GLOBAL_MODULE_INDEX", options.global_module_index) l2hoption(fp, "DVIPS_SAFE", options.dvips_safe) fp.write("1;\n") fp.close() def cleanup(self): self.__have_temps = 0 for pattern in ("%s.aux", "%s.log", "%s.out", "%s.toc", "%s.bkm", "%s.idx", "%s.ilg", "%s.ind", "%s.pla", "%s.bbl", "%s.blg", "mod%s.idx", "mod%s.ind", "mod%s.ilg", ): safe_unlink(pattern % self.doc) map(safe_unlink, glob.glob(self.doc + "*.syn")) for spec in ("IMG*", "*.pl", "WARNINGS", "index.dat", "modindex.dat"): pattern = os.path.join(self.doc, spec) map(safe_unlink, glob.glob(pattern)) if "dvi" not in self.options.formats: safe_unlink(self.doc + ".dvi") if os.path.isdir(self.doc + "-temp-html"): shutil.rmtree(self.doc + "-temp-html", ignore_errors=1) if not self.options.logging: os.unlink(self.log_filename) if not self.options.debugging: os.unlink(self.l2h_aux_init_file) def run(self, command): self.message(command) if sys.platform.startswith("win"): rc = os.system(command) else: rc = os.system("(%s) >%s 2>&1" % (command, self.log_filename)) if rc: self.warning( "Session transcript and error messages are in %s." % self.log_filename) result = 1 if hasattr(os, "WIFEXITED"): if os.WIFEXITED(rc): result = os.WEXITSTATUS(rc) self.warning("Exited with status %s." % result) else: self.warning("Killed by signal %s." % os.WSTOPSIG(rc)) else: self.warning("Return code: %s" % rc) sys.stderr.write("The relevant lines from the transcript are:\n") sys.stderr.write("-" * 72 + "\n") sys.stderr.writelines(get_run_transcript(self.log_filename)) sys.exit(result) def message(self, msg): msg = "+++ " + msg if not self.options.quiet: print msg self.log(msg + "\n") def warning(self, msg): msg = "*** %s\n" % msg sys.stderr.write(msg) self.log(msg) def log(self, msg): fp = open(self.log_filename, "a") fp.write(msg) fp.close() def get_run_transcript(filename): """Return lines from the transcript file for the most recent run() call.""" fp = open(filename) lines = fp.readlines() fp.close() lines.reverse() L = [] for line in lines: L.append(line) if line[:4] == "+++ ": break L.reverse() return L def safe_unlink(path): """Unlink a file without raising an error if it doesn't exist.""" try: os.unlink(path) except os.error: pass def split_pathname(path): path = os.path.abspath(path) dirname, basename = os.path.split(path) if basename[-4:] == ".tex": basename = basename[:-4] return dirname, basename _doctype_rx = re.compile(r"\\documentclass(?:\[[^]]*\])?{([a-zA-Z]*)}") def get_doctype(path): fp = open(path) doctype = None while 1: line = fp.readline() if not line: break m = _doctype_rx.match(line) if m: doctype = m.group(1) break fp.close() return doctype def main(): options = Options() try: args = options.parse(sys.argv[1:]) except getopt.error as msg: error(options, msg) if not args: # attempt to locate single .tex file in current directory: args = glob.glob("*.tex") if not args: error(options, "No file to process.") if len(args) > 1: error(options, "Could not deduce which files should be processed.") # # parameters are processed, let's go! # for path in args: Job(options, path).build() def l2hoption(fp, option, value): if value: fp.write('$%s = "%s";\n' % (option, string_to_perl(str(value)))) _to_perl = {} for c in map(chr, range(1, 256)): _to_perl[c] = c _to_perl["@"] = "\\@" _to_perl["$"] = "\\$" _to_perl['"'] = '\\"' def string_to_perl(s): return ''.join(map(_to_perl.get, s)) def check_for_bibtex(filename): fp = open(filename) pos = fp.read().find(r"\bibdata{") fp.close() return pos >= 0 def uniqify_module_table(filename): lines = open(filename).readlines() if len(lines) > 1: if lines[-1] == lines[-2]: del lines[-1] open(filename, "w").writelines(lines) def new_index(filename, label="genindex"): fp = open(filename, "w") fp.write(r"""\ \begin{theindex} \label{%s} \end{theindex} """ % label) fp.close() if __name__ == "__main__": main()