#! /usr/local/bin/python # Convert GNU texinfo files into HTML, one file per node. # Based on Texinfo 2.14. # Usage: texi2html [-d] [-d] inputfile outputdirectory # The input file must be a complete texinfo file, e.g. emacs.texi. # This creates many files (one per info node) in the output directory, # overwriting existing files of the same name. All files created have # ".html" as their extension. # XXX To do: # - handle @comment*** correctly # - handle @xref {some words} correctly # - handle @ftable correctly (items aren't indexed?) # - handle @itemx properly # - handle @exdent properly # - add links directly to the proper line from indices # - check against the definitive list of @-cmds; we still miss (among others): # - @set, @clear, @ifset, @ifclear # - @defindex (hard) # - @c(omment) in the middle of a line (rarely used) # - @this* (not really needed, only used in headers anyway) # - @today{} (ever used outside title page?) import os import regex import regsub import string MAGIC = '\\input texinfo' cmprog = regex.compile('^@\([a-z]+\)\([ \t]\|$\)') # Command (line-oriented) blprog = regex.compile('^[ \t]*$') # Blank line kwprog = regex.compile('@[a-z]+') # Keyword (embedded, usually with {} args) spprog = regex.compile('[\n@{}&<>]') # Special characters in running text miprog = regex.compile( \ '^\* \([^:]*\):\(:\|[ \t]*\([^\t,\n.]+\)\([^ \t\n]*\)\)[ \t\n]*') # menu item (Yuck!) class TexinfoParser: # Initialize an instance def __init__(self): self.unknown = {} # statistics about unknown @-commands self.filenames = {} # Check for identical filenames self.debugging = 0 # larger values produce more output self.nodefp = None # open file we're writing to self.nodelineno = 0 # Linenumber relative to node self.links = None # Links from current node self.savetext = None # If not None, save text head instead self.dirname = 'tmp' # directory where files are created self.includedir = '.' # directory to search @include files self.nodename = '' # name of current node self.topname = '' # name of top node (first node seen) self.title = '' # title of this whole Texinfo tree self.resetindex() # Reset all indices self.contents = [] # Reset table of contents self.numbering = [] # Reset section numbering counters self.nofill = 0 # Normal operation: fill paragraphs self.goodset=['html'] # Names that should be parsed in ifset self.stackinfo={} # Keep track of state in the stack # XXX The following should be reset per node?! self.footnotes = [] # Reset list of footnotes self.itemarg = None # Reset command used by @item self.itemnumber = None # Reset number for @item in @enumerate self.itemindex = None # Reset item index name # Set (output) directory name def setdirname(self, dirname): self.dirname = dirname # Set include directory name def setincludedir(self, includedir): self.includedir = includedir # Parse the contents of an entire file def parse(self, fp): line = fp.readline() lineno = 1 while line and (line[0] == '%' or blprog.match(line) >= 0): line = fp.readline() lineno = lineno + 1 if line[:len(MAGIC)] <> MAGIC: raise SyntaxError, 'file does not begin with '+`MAGIC` self.parserest(fp, lineno) # Parse the contents of a file, not expecting a MAGIC header def parserest(self, fp, initial_lineno): lineno = initial_lineno self.done = 0 self.skip = 0 self.stack = [] accu = [] while not self.done: line = fp.readline() self.nodelineno = self.nodelineno + 1 if not line: if accu: if not self.skip: self.process(accu) accu = [] if initial_lineno > 0: print '*** EOF before @bye' break lineno = lineno + 1 if cmprog.match(line) >= 0: a, b = cmprog.regs[1] cmd = line[a:b] if cmd in ('noindent', 'refill'): accu.append(line) else: if accu: if not self.skip: self.process(accu) accu = [] self.command(line) elif blprog.match(line) >= 0 and \ 'format' not in self.stack and \ 'example' not in self.stack: if accu: if not self.skip: self.process(accu) self.write('
\n') accu = [] else: # Append the line including trailing \n! accu.append(line) # if self.skip: print '*** Still skipping at the end' if self.stack: print '*** Stack not empty at the end' print '***', self.stack # Start saving text in a buffer instead of writing it to a file def startsaving(self): if self.savetext <> None: print '*** Recursively saving text, expect trouble' self.savetext = '' # Return the text saved so far and start writing to file again def collectsavings(self): savetext = self.savetext self.savetext = None return savetext or '' # Write text to file, or save it in a buffer, or ignore it def write(self, *args): text = string.joinfields(args, '') if self.savetext <> None: self.savetext = self.savetext + text elif self.nodefp: self.nodefp.write(text) # Complete the current node -- write footnotes and close file def endnode(self): if self.savetext <> None: print '*** Still saving text at end of node' dummy = self.collectsavings() if self.footnotes: self.writefootnotes() if self.nodefp: if self.nodelineno > 20: self.write ('