#! /usr/bin/env python """Perform massive transformations on a document tree created from the LaTeX of the Python documentation, and dump the ESIS data for the transformed tree. """ __version__ = '$Revision$' import errno import esistools import re import string import sys import xml.dom.core import xml.dom.esis_builder class ConversionError(Exception): pass DEBUG_PARA_FIXER = 0 if DEBUG_PARA_FIXER: def para_msg(s): sys.stderr.write("*** %s\n" % s) else: def para_msg(s): pass # Workaround to deal with invalid documents (multiple root elements). This # does not indicate a bug in the DOM implementation. # def get_documentElement(self): docelem = None for n in self._node.children: if n.type == xml.dom.core.ELEMENT: docelem = xml.dom.core.Element(n, self, self) return docelem xml.dom.core.Document.get_documentElement = get_documentElement # Replace get_childNodes for the Document class; without this, children # accessed from the Document object via .childNodes (no matter how many # levels of access are used) will be given an ownerDocument of None. # def get_childNodes(self): return xml.dom.core.NodeList(self._node.children, self, self) xml.dom.core.Document.get_childNodes = get_childNodes def get_first_element(doc, gi): for n in doc.childNodes: if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi: return n def extract_first_element(doc, gi): node = get_first_element(doc, gi) if node is not None: doc.removeChild(node) return node def find_all_elements(doc, gi): nodes = [] if doc.nodeType == xml.dom.core.ELEMENT and doc.tagName == gi: nodes.append(doc) for child in doc.childNodes: if child.nodeType == xml.dom.core.ELEMENT: if child.tagName == gi: nodes.append(child) for node in child.getElementsByTagName(gi): nodes.append(node) return nodes def simplify(doc): # Try to rationalize the document a bit, since these things are simply # not valid SGML/XML documents as they stand, and need a little work. documentclass = "document" inputs = [] node = extract_first_element(doc, "documentclass") if node is not None: documentclass = node.getAttribute("classname") node = extract_first_element(doc, "title") if node is not None: inputs.append(node) # update the name of the root element node = get_first_element(doc, "document") if node is not None: node._node.name = documentclass while 1: node = extract_first_element(doc, "input") if node is None: break inputs.append(node) if inputs: docelem = doc.documentElement inputs.reverse() for node in inputs: text = doc.createTextNode("\n") docelem.insertBefore(text, docelem.firstChild) docelem.insertBefore(node, text) docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild) while doc.firstChild.nodeType == xml.dom.core.TEXT: doc.removeChild(doc.firstChild) def cleanup_root_text(doc): discards = [] skip = 0 for n in doc.childNodes: prevskip = skip skip = 0 if n.nodeType == xml.dom.core.TEXT and not prevskip: discards.append(n) elif n.nodeType == xml.dom.core.ELEMENT and n.tagName == "COMMENT": skip = 1 for node in discards: doc.removeChild(node) DESCRIPTOR_ELEMENTS = ( "cfuncdesc", "cvardesc", "ctypedesc", "classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni", "excdesc", "funcdesc", "funcdescni", "opcodedesc", "datadesc", "datadescni", ) def fixup_descriptors(doc): sections = find_all_elements(doc, "section") for section in sections: find_and_fix_descriptors(doc, section) def find_and_fix_descriptors(doc, container): children = container.childNodes for child in children: if child.nodeType == xml.dom.core.ELEMENT: tagName = child.tagName if tagName in DESCRIPTOR_ELEMENTS: rewrite_descriptor(doc, child) elif tagName == "subsection": find_and_fix_descriptors(doc, child) def rewrite_descriptor(doc, descriptor): # # Do these things: # 1. Add an "index=noindex" attribute to the element if the tagName # ends in 'ni', removing the 'ni' from the name. # 2. Create a from the name attribute and . # 3. Create additional s from <*line{,ni}> elements, # if found. # 4. Move remaining child nodes to a element. # 5. Put it back together. # descname = descriptor.tagName index = 1 if descname[-2:] == "ni": descname = descname[:-2] descriptor.setAttribute("index", "noindex") descriptor._node.name = descname index = 0 desctype = descname[:-4] # remove 'desc' linename = desctype + "line" if not index: linename = linename + "ni" # 2. signature = doc.createElement("signature") name = doc.createElement("name") signature.appendChild(doc.createTextNode("\n ")) signature.appendChild(name) name.appendChild(doc.createTextNode(descriptor.getAttribute("name"))) descriptor.removeAttribute("name") if descriptor.attributes.has_key("var"): variable = descriptor.getAttribute("var") if variable: args = doc.createElement("args") args.appendChild(doc.createTextNode(variable)) signature.appendChild(doc.createTextNode("\n ")) signature.appendChild(args) descriptor.removeAttribute("var") newchildren = [signature] children = descriptor.childNodes pos = skip_leading_nodes(children, 0) if pos < len(children): child = children[pos] if child.nodeType == xml.dom.core.ELEMENT and child.tagName == "args": # create an in : args = doc.createElement("args") argchildren = [] map(argchildren.append, child.childNodes) for n in argchildren: child.removeChild(n) args.appendChild(n) signature.appendChild(doc.createTextNode("\n ")) signature.appendChild(args) signature.appendChild(doc.createTextNode("\n ")) # 3. pos = skip_leading_nodes(children, pos + 1) while pos < len(children) \ and children[pos].nodeType == xml.dom.core.ELEMENT \ and children[pos].tagName == linename: # this is really a supplemental signature, create sig = methodline_to_signature(doc, children[pos]) newchildren.append(sig) pos = skip_leading_nodes(children, pos + 1) # 4. description = doc.createElement("description") description.appendChild(doc.createTextNode("\n")) newchildren.append(description) move_children(descriptor, description, pos) last = description.childNodes[-1] if last.nodeType == xml.dom.core.TEXT: last.data = string.rstrip(last.data) + "\n " # 5. # should have nothing but whitespace and signature lines in ; # discard them while descriptor.childNodes: descriptor.removeChild(descriptor.childNodes[0]) for node in newchildren: descriptor.appendChild(doc.createTextNode("\n ")) descriptor.appendChild(node) descriptor.appendChild(doc.createTextNode("\n")) def methodline_to_signature(doc, methodline): signature = doc.createElement("signature") signature.appendChild(doc.createTextNode("\n ")) name = doc.createElement("name") name.appendChild(doc.createTextNode(methodline.getAttribute("name"))) methodline.removeAttribute("name") signature.appendChild(name) if len(methodline.childNodes): args = doc.createElement("args") signature.appendChild(doc.createTextNode("\n ")) signature.appendChild(args) move_children(methodline, args) signature.appendChild(doc.createTextNode("\n ")) return signature def move_children(origin, dest, start=0): children = origin.childNodes while start < len(children): node = children[start] origin.removeChild(node) dest.appendChild(node) def handle_appendix(doc): # must be called after simplfy() if document is multi-rooted to begin with docelem = doc.documentElement toplevel = docelem.tagName == "manual" and "chapter" or "section" appendices = 0 nodes = [] for node in docelem.childNodes: if appendices: nodes.append(node) elif node.nodeType == xml.dom.core.ELEMENT: appnodes = node.getElementsByTagName("appendix") if appnodes: appendices = 1 parent = appnodes[0].parentNode parent.removeChild(appnodes[0]) parent.normalize() if nodes: map(docelem.removeChild, nodes) docelem.appendChild(doc.createTextNode("\n\n\n")) back = doc.createElement("back-matter") docelem.appendChild(back) back.appendChild(doc.createTextNode("\n")) while nodes and nodes[0].nodeType == xml.dom.core.TEXT \ and not string.strip(nodes[0].data): del nodes[0] map(back.appendChild, nodes) docelem.appendChild(doc.createTextNode("\n")) def handle_labels(doc): for label in find_all_elements(doc, "label"): id = label.getAttribute("id") if not id: continue parent = label.parentNode if parent.tagName == "title": parent.parentNode.setAttribute("id", id) else: parent.setAttribute("id", id) # now, remove