#! /usr/bin/env python """Perform massive transformations on a document tree created from the LaTeX of the Python documentation, and dump the ESIS data for the transformed tree. """ __version__ = '$Revision$' import errno import esistools import re import string import sys import xml.dom.core from xml.dom.core import \ ELEMENT, \ TEXT class ConversionError(Exception): pass ewrite = sys.stderr.write try: # We can only do this trick on Unix (if tput is on $PATH)! if sys.platform != "posix" or not sys.stderr.isatty(): raise ImportError import curses import commands except ImportError: bwrite = ewrite else: def bwrite(s, BOLDON=commands.getoutput("tput bold"), BOLDOFF=commands.getoutput("tput sgr0")): ewrite("%s%s%s" % (BOLDON, s, BOLDOFF)) PARA_ELEMENT = "para" DEBUG_PARA_FIXER = 0 if DEBUG_PARA_FIXER: def para_msg(s): ewrite("*** %s\n" % s) else: def para_msg(s): pass # Workaround to deal with invalid documents (multiple root elements). This # does not indicate a bug in the DOM implementation. # def get_documentElement(doc): docelem = None for n in doc.childNodes: if n.nodeType == ELEMENT: docelem = n return docelem xml.dom.core.Document.get_documentElement = get_documentElement # Replace get_childNodes for the Document class; without this, children # accessed from the Document object via .childNodes (no matter how many # levels of access are used) will be given an ownerDocument of None. # def get_childNodes(doc): return xml.dom.core.NodeList(doc._node.children, doc._node) xml.dom.core.Document.get_childNodes = get_childNodes def get_first_element(doc, gi): for n in doc.childNodes: if n.nodeType == ELEMENT and n.tagName == gi: return n def extract_first_element(doc, gi): node = get_first_element(doc, gi) if node is not None: doc.removeChild(node) return node def find_all_elements(doc, gi): nodes = [] if doc.nodeType == ELEMENT and doc.tagName == gi: nodes.append(doc) for child in doc.childNodes: if child.nodeType == ELEMENT: if child.tagName == gi: nodes.append(child) for node in child.getElementsByTagName(gi): nodes.append(node) return nodes def find_all_child_elements(doc, gi): nodes = [] for child in doc.childNodes: if child.nodeType == ELEMENT: if child.tagName == gi: nodes.append(child) return nodes def find_all_elements_from_set(doc, gi_set): return __find_all_elements_from_set(doc, gi_set, []) def __find_all_elements_from_set(doc, gi_set, nodes): if doc.nodeType == ELEMENT and doc.tagName in gi_set: nodes.append(doc) for child in doc.childNodes: if child.nodeType == ELEMENT: __find_all_elements_from_set(child, gi_set, nodes) return nodes def simplify(doc, fragment): # Try to rationalize the document a bit, since these things are simply # not valid SGML/XML documents as they stand, and need a little work. documentclass = "document" inputs = [] node = extract_first_element(fragment, "documentclass") if node is not None: documentclass = node.getAttribute("classname") node = extract_first_element(fragment, "title") if node is not None: inputs.append(node) # update the name of the root element node = get_first_element(fragment, "document") if node is not None: node._node.name = documentclass while 1: node = extract_first_element(fragment, "input") if node is None: break inputs.append(node) if inputs: docelem = get_documentElement(fragment) inputs.reverse() for node in inputs: text = doc.createTextNode("\n") docelem.insertBefore(text, docelem.firstChild) docelem.insertBefore(node, text) docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild) while fragment.firstChild and fragment.firstChild.nodeType == TEXT: fragment.removeChild(fragment.firstChild) def cleanup_root_text(doc): discards = [] skip = 0 for n in doc.childNodes: prevskip = skip skip = 0 if n.nodeType == TEXT and not prevskip: discards.append(n) elif n.nodeType == ELEMENT and n.tagName == "COMMENT": skip = 1 for node in discards: doc.removeChild(node) DESCRIPTOR_ELEMENTS = ( "cfuncdesc", "cvardesc", "ctypedesc", "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni", "excdesc", "funcdesc", "funcdescni", "opcodedesc", "datadesc", "datadescni", ) def fixup_descriptors(doc, fragment): sections = find_all_elements(fragment, "section") for section in sections: find_and_fix_descriptors(doc, section) def find_and_fix_descriptors(doc, container): children = container.childNodes for child in children: if child.nodeType == ELEMENT: tagName = child.tagName if tagName in DESCRIPTOR_ELEMENTS: rewrite_descriptor(doc, child) elif tagName == "subsection": find_and_fix_descriptors(doc, child) def rewrite_descriptor(doc, descriptor): # # Do these things: # 1. Add an "index='no'" attribute to the element if the tagName # ends in 'ni', removing the 'ni' from the name. # 2. Create a from the name attribute and . # 3. Create additional s from <*line{,ni}> elements, # if found. # 4. If a is found, move it to an attribute on the # descriptor. # 5. Move remaining child nodes to a element. # 6. Put it back together. # descname = descriptor.tagName index = 1 if descname[-2:] == "ni": descname = descname[:-2] descriptor.setAttribute("index", "no") descriptor._node.name = descname index = 0 desctype = descname[:-4] # remove 'desc' linename = desctype + "line" if not index: linename = linename + "ni" # 2. signature = doc.createElement("signature") name = doc.createElement("name") signature.appendChild(doc.createTextNode("\n ")) signature.appendChild(name) name.appendChild(doc.createTextNode(descriptor.getAttribute("name"))) descriptor.removeAttribute("name") if descriptor.attributes.has_key("var"): variable = descriptor.getAttribute("var") if variable: args = doc.createElement("args") args.appendChild(doc.createTextNode(variable)) signature.appendChild(doc.createTextNode("\n ")) signature.appendChild(args) descriptor.removeAttribute("var") newchildren = [signature] children = descriptor.childNodes pos = skip_leading_nodes(children, 0) if pos < len(children): child = children[pos] if child.nodeType == ELEMENT and child.tagName == "args": # create an in : args = doc.createElement("args") argchildren = [] map(argchildren.append, child.childNodes) for n in argchildren: child.removeChild(n) args.appendChild(n) signature.appendChild(doc.createTextNode("\n ")) signature.appendChild(args) signature.appendChild(doc.createTextNode("\n ")) # 3, 4. pos = skip_leading_nodes(children, pos + 1) while pos < len(children) \ and children[pos].nodeType == ELEMENT \ and children[pos].tagName in (linename, "versionadded"): if children[pos].tagName == linename: # this is really a supplemental signature, create sig = methodline_to_signature(doc, children[pos]) newchildren.append(sig) else: # descriptor.setAttribute( "added", children[pos].getAttribute("version")) pos = skip_leading_nodes(children, pos + 1) # 5. description = doc.createElement("description") description.appendChild(doc.createTextNode("\n")) newchildren.append(description) move_children(descriptor, description, pos) last = description.childNodes[-1] if last.nodeType == TEXT: last.data = string.rstrip(last.data) + "\n " # 6. # should have nothing but whitespace and signature lines in ; # discard them while descriptor.childNodes: descriptor.removeChild(descriptor.childNodes[0]) for node in newchildren: descriptor.appendChild(doc.createTextNode("\n ")) descriptor.appendChild(node) descriptor.appendChild(doc.createTextNode("\n")) def methodline_to_signature(doc, methodline): signature = doc.createElement("signature") signature.appendChild(doc.createTextNode("\n ")) name = doc.createElement("name") name.appendChild(doc.createTextNode(methodline.getAttribute("name"))) methodline.removeAttribute("name") signature.appendChild(name) if len(methodline.childNodes): args = doc.createElement("args") signature.appendChild(doc.createTextNode("\n ")) signature.appendChild(args) move_children(methodline, args) signature.appendChild(doc.createTextNode("\n ")) return signature def move_children(origin, dest, start=0): children = origin.childNodes while start < len(children): node = children[start] origin.removeChild(node) dest.appendChild(node) def handle_appendix(doc, fragment): # must be called after simplfy() if document is multi-rooted to begin with docelem = get_documentElement(fragment) toplevel = docelem.tagName == "manual" and "chapter" or "section" appendices = 0 nodes = [] for node in docelem.childNodes: if appendices: nodes.append(node) elif node.nodeType == ELEMENT: appnodes = node.getElementsByTagName("appendix") if appnodes: appendices = 1 parent = appnodes[0].parentNode parent.removeChild(appnodes[0]) parent.normalize() if nodes: map(docelem.removeChild, nodes) docelem.appendChild(doc.createTextNode("\n\n\n")) back = doc.createElement("back-matter") docelem.appendChild(back) back.appendChild(doc.createTextNode("\n")) while nodes and nodes[0].nodeType == TEXT \ and not string.strip(nodes[0].data): del nodes[0] map(back.appendChild, nodes) docelem.appendChild(doc.createTextNode("\n")) def handle_labels(doc, fragment): for label in find_all_elements(fragment, "label"): id = label.getAttribute("id") if not id: continue parent = label.parentNode if parent.tagName == "title": parent.parentNode.setAttribute("id", id) else: parent.setAttribute("id", id) # now, remove