Reference cycle fixes
This commit is contained in:
parent
8fcaa92c5f
commit
73678dac48
|
@ -8,6 +8,6 @@ dom -- The W3C Document Object Model. This supports DOM Level 1 +
|
|||
parser -- Python wrappers for XML parsers (currently only supports Expat).
|
||||
|
||||
sax -- The Simple API for XML, developed by XML-Dev, led by David
|
||||
Megginson. This supports the SAX 2 API.
|
||||
|
||||
Megginson and ported to Python by Lars Marius Garsholm. This
|
||||
supports the SAX 2 API.
|
||||
"""
|
||||
|
|
|
@ -29,11 +29,19 @@ class Node:
|
|||
DOCUMENT_FRAGMENT_NODE = 11
|
||||
NOTATION_NODE = 12
|
||||
|
||||
allnodes=[]
|
||||
allnodes={}
|
||||
_debug=0
|
||||
_makeParentNodes=1
|
||||
debug=None
|
||||
|
||||
def __init__( self ):
|
||||
self.childNodes=[]
|
||||
Node.allnodes.append( repr( id( self ))+repr( self.__class__ ))
|
||||
if Node._debug:
|
||||
index=repr( id( self ))+repr( self.__class__ )
|
||||
Node.allnodes[index]=repr( self.__dict__ )
|
||||
if Node.debug==None:
|
||||
Node.debug=open( "debug4.out", "w" )
|
||||
Node.debug.write( "create %s\n"%index )
|
||||
|
||||
def __getattr__( self, key ):
|
||||
if key[0:2]=="__": raise AttributeError
|
||||
|
@ -72,12 +80,39 @@ class Node:
|
|||
if self.childNodes: return 1
|
||||
else: return 0
|
||||
|
||||
def _get_firstChild( self ):
|
||||
return self.childNodes[0]
|
||||
|
||||
def _get_lastChild( self ):
|
||||
return self.childNodes[-1]
|
||||
|
||||
def insertBefore( self, newChild, refChild):
|
||||
index=self.childNodes.index( refChild )
|
||||
self.childNodes.insert( index, newChild )
|
||||
if self._makeParentNodes:
|
||||
newChild.parentNode=self
|
||||
|
||||
def appendChild( self, node ):
|
||||
self.childNodes.append( node )
|
||||
return node
|
||||
|
||||
def replaceChild( self, newChild, oldChild ):
|
||||
index=self.childNodes.index( oldChild )
|
||||
self.childNodes[index]=oldChild
|
||||
|
||||
def removeChild( self, oldChild ):
|
||||
index=self.childNodes.index( oldChild )
|
||||
del self.childNodes[index]
|
||||
|
||||
def cloneNode( self, deep ):
|
||||
import new
|
||||
clone=new.instance( self.__class__, self.__dict__ )
|
||||
clone.attributes=self.attributes.copy()
|
||||
if not deep:
|
||||
clone.childNodes=[]
|
||||
else:
|
||||
clone.childNodes=map( lambda x: x.cloneNode, self.childNodes )
|
||||
return clone
|
||||
|
||||
def unlink( self ):
|
||||
self.parentNode=None
|
||||
|
@ -86,11 +121,14 @@ class Node:
|
|||
del self.childNodes[-1] # probably not most efficient!
|
||||
self.childNodes=None
|
||||
if self.attributes:
|
||||
for attr in self.attributes.values():
|
||||
attr.unlink()
|
||||
self.attributes=None
|
||||
index=Node.allnodes.index( repr( id( self ))+repr( self.__class__ ))
|
||||
del Node.allnodes[index]
|
||||
for attr in self._attrs.values():
|
||||
self.removeAttributeNode( attr )
|
||||
assert not len( self._attrs )
|
||||
assert not len( self._attrsNS )
|
||||
if Node._debug:
|
||||
index=repr( id( self ))+repr( self.__class__ )
|
||||
self.debug.write( "Deleting: %s\n" % index )
|
||||
del Node.allnodes[index]
|
||||
|
||||
def _write_data( writer, data):
|
||||
"Writes datachars to writer."
|
||||
|
@ -100,11 +138,6 @@ def _write_data( writer, data):
|
|||
data=string.replace(data,">",">")
|
||||
writer.write(data)
|
||||
|
||||
def _closeElement( element ):
|
||||
del element.parentNode
|
||||
for node in element.elements:
|
||||
_closeElement( node )
|
||||
|
||||
def _getElementsByTagNameHelper( parent, name, rc ):
|
||||
for node in parent.childNodes:
|
||||
if node.nodeType==Node.ELEMENT_NODE and\
|
||||
|
@ -123,17 +156,16 @@ def _getElementsByTagNameNSHelper( parent, nsURI, localName, rc ):
|
|||
|
||||
class Attr(Node):
|
||||
nodeType=Node.ATTRIBUTE_NODE
|
||||
def __init__( self, qName, namespaceURI="", prefix="",
|
||||
localName=None ):
|
||||
Node.__init__( self )
|
||||
assert qName
|
||||
def __init__( self, qName, namespaceURI="", localName=None,
|
||||
prefix=None ):
|
||||
# skip setattr for performance
|
||||
self.__dict__["nodeName"] = self.__dict__["name"] = qName
|
||||
self.__dict__["localName"]=localName or qName
|
||||
self.__dict__["prefix"]=prefix
|
||||
self.__dict__["nodeName"] = self.__dict__["name"] = qName
|
||||
self.__dict__["namespaceURI"]=namespaceURI
|
||||
# nodeValue and value are set elsewhere
|
||||
self.__dict__["prefix"]=prefix
|
||||
self.attributes=None
|
||||
Node.__init__( self )
|
||||
# nodeValue and value are set elsewhere
|
||||
|
||||
def __setattr__( self, name, value ):
|
||||
if name in ("value", "nodeValue" ):
|
||||
|
@ -142,12 +174,13 @@ class Attr(Node):
|
|||
self.__dict__[name]=value
|
||||
|
||||
class AttributeList:
|
||||
# the attribute list is a transient interface to the underlying dictionaries
|
||||
# mutations here will change the underlying element's dictionary
|
||||
"""the attribute list is a transient interface to the underlying
|
||||
dictionaries. mutations here will change the underlying element's
|
||||
dictionary"""
|
||||
def __init__( self, attrs, attrsNS ):
|
||||
self.__attrs=attrs
|
||||
self.__attrsNS=attrs
|
||||
self.length=len( self.__attrs.keys() )
|
||||
self._attrs=attrs
|
||||
self._attrsNS=attrsNS
|
||||
self.length=len( self._attrs.keys() )
|
||||
|
||||
def item( self, index ):
|
||||
try:
|
||||
|
@ -157,40 +190,46 @@ class AttributeList:
|
|||
|
||||
def items( self ):
|
||||
return map( lambda node: (node.tagName, node.value),
|
||||
self.__attrs.values() )
|
||||
self._attrs.values() )
|
||||
|
||||
def itemsNS( self ):
|
||||
return map( lambda node: ((node.URI, node.localName), node.value),
|
||||
self.__attrs.values() )
|
||||
self._attrs.values() )
|
||||
|
||||
def keys( self ):
|
||||
return self.__attrs.keys()
|
||||
return self._attrs.keys()
|
||||
|
||||
def keysNS( self ):
|
||||
return self.__attrsNS.keys()
|
||||
return self._attrsNS.keys()
|
||||
|
||||
def values( self ):
|
||||
return self.__attrs.values()
|
||||
return self._attrs.values()
|
||||
|
||||
def __len__( self ):
|
||||
return self.length
|
||||
|
||||
def __cmp__( self, other ):
|
||||
if self.__attrs is other.__attrs:
|
||||
if self._attrs is getattr( other, "_attrs", None ):
|
||||
return 0
|
||||
else:
|
||||
return cmp( id( self ), id( other ) )
|
||||
|
||||
#FIXME: is it appropriate to return .value?
|
||||
def __getitem__( self, attname_or_tuple ):
|
||||
if type( attname_or_tuple ) == type( (1,2) ):
|
||||
return self.__attrsNS[attname_or_tuple].value
|
||||
if type( attname_or_tuple ) == type( () ):
|
||||
return self._attrsNS[attname_or_tuple]
|
||||
else:
|
||||
return self.__attrs[attname_or_tuple].value
|
||||
return self._attrs[attname_or_tuple]
|
||||
|
||||
def __setitem__( self, attname ):
|
||||
raise TypeError, "object does not support item assignment"
|
||||
|
||||
def __delitem__( self, attname_or_tuple ):
|
||||
node=self[attname_or_tuple]
|
||||
node.unlink()
|
||||
del self._attrs[node.name]
|
||||
del self._attrsNS[(node.namespaceURI, node.localName)]
|
||||
|
||||
class Element( Node ):
|
||||
nodeType=Node.ELEMENT_NODE
|
||||
def __init__( self, tagName, namespaceURI="", prefix="",
|
||||
|
@ -202,18 +241,18 @@ class Element( Node ):
|
|||
self.namespaceURI=namespaceURI
|
||||
self.nodeValue=None
|
||||
|
||||
self.__attrs={} # attributes are double-indexed:
|
||||
self.__attrsNS={}# tagName -> Attribute
|
||||
self._attrs={} # attributes are double-indexed:
|
||||
self._attrsNS={}# tagName -> Attribute
|
||||
# URI,localName -> Attribute
|
||||
# in the future: consider lazy generation of attribute objects
|
||||
# this is too tricky for now because of headaches
|
||||
# with namespaces.
|
||||
|
||||
def getAttribute( self, attname ):
|
||||
return self.__attrs[attname].value
|
||||
return self._attrs[attname].value
|
||||
|
||||
def getAttributeNS( self, namespaceURI, localName ):
|
||||
return self.__attrsNS[(namespaceURI, localName)].value
|
||||
return self._attrsNS[(namespaceURI, localName)].value
|
||||
|
||||
def setAttribute( self, attname, value ):
|
||||
attr=Attr( attname )
|
||||
|
@ -222,26 +261,37 @@ class Element( Node ):
|
|||
self.setAttributeNode( attr )
|
||||
|
||||
def setAttributeNS( self, namespaceURI, qualifiedName, value ):
|
||||
attr=createAttributeNS( namespaceURI, qualifiedName )
|
||||
prefix,localname=_nssplit( qualifiedName )
|
||||
# for performance
|
||||
attr = Attr( qualifiedName, namespaceURI, localname, prefix )
|
||||
attr.__dict__["value"]=attr.__dict__["nodeValue"]=value
|
||||
self.setAttributeNode( attr )
|
||||
|
||||
def getAttributeNode( self, attrname ):
|
||||
return self._attrs.get( attrname )
|
||||
|
||||
def getAttributeNodeNS( self, namespaceURI, localName ):
|
||||
return self._attrsNS[(namespaceURI, localName)]
|
||||
|
||||
def setAttributeNode( self, attr ):
|
||||
self.__attrs[attr.name]=attr
|
||||
self.__attrsNS[(attr.namespaceURI,attr.localName)]=attr
|
||||
old=self._attrs.get( attr.name, None)
|
||||
if old:
|
||||
old.unlink()
|
||||
self._attrs[attr.name]=attr
|
||||
self._attrsNS[(attr.namespaceURI,attr.localName)]=attr
|
||||
|
||||
def removeAttribute( self, name ):
|
||||
attr = self.__attrs[name]
|
||||
attr = self._attrs[name]
|
||||
self.removeAttributeNode( attr )
|
||||
|
||||
def removeAttributeNS( self, namespaceURI, localName ):
|
||||
attr = self.__attrsNS[(uri, localName)]
|
||||
attr = self._attrsNS[(namespaceURI, localName)]
|
||||
self.removeAttributeNode( attr )
|
||||
|
||||
def removeAttributeNode( self, node ):
|
||||
del self.__attrs[node.name]
|
||||
del self.__attrsNS[(node.namespaceURI, node.localName)]
|
||||
node.unlink()
|
||||
del self._attrs[node.name]
|
||||
del self._attrsNS[(node.namespaceURI, node.localName)]
|
||||
|
||||
def getElementsByTagName( self, name ):
|
||||
return _getElementsByTagNameHelper( self, name, [] )
|
||||
|
@ -271,7 +321,7 @@ class Element( Node ):
|
|||
writer.write("/>")
|
||||
|
||||
def _get_attributes( self ):
|
||||
return AttributeList( self.__attrs, self.__attrsNS )
|
||||
return AttributeList( self._attrs, self._attrsNS )
|
||||
|
||||
class Comment( Node ):
|
||||
nodeType=Node.COMMENT_NODE
|
||||
|
@ -313,15 +363,30 @@ class Text( Node ):
|
|||
def writexml( self, writer ):
|
||||
_write_data( writer, self.data )
|
||||
|
||||
def _nssplit( qualifiedName ):
|
||||
fields = string.split(qualifiedName, ':')
|
||||
if len(fields) == 2:
|
||||
return fields
|
||||
elif len(fields) == 1:
|
||||
return( '', fields[0] )
|
||||
|
||||
class Document( Node ):
|
||||
nodeType=Node.DOCUMENT_NODE
|
||||
documentElement=None
|
||||
def __init__( self ):
|
||||
Node.__init__( self )
|
||||
self.documentElement=None
|
||||
self.attributes=None
|
||||
self.nodeName="#document"
|
||||
self.nodeValue=None
|
||||
|
||||
def appendChild( self, node ):
|
||||
if node.nodeType==Node.ELEMENT_NODE and self.documentElement:
|
||||
raise TypeError, "Two document elements disallowed"
|
||||
else:
|
||||
self.documentElement=node
|
||||
Node.appendChild( self, node )
|
||||
return node
|
||||
|
||||
createElement=Element
|
||||
|
||||
createTextNode=Text
|
||||
|
@ -333,32 +398,16 @@ class Document( Node ):
|
|||
createAttribute=Attr
|
||||
|
||||
def createElementNS(self, namespaceURI, qualifiedName):
|
||||
fields = string.split(qualifiedName, ':')
|
||||
if len(fields) == 2:
|
||||
prefix = fields[0]
|
||||
localName = fields[1]
|
||||
elif len(fields) == 1:
|
||||
prefix = ''
|
||||
localName = fields[0]
|
||||
return Element(self, qualifiedName, namespaceURI, prefix, localName)
|
||||
prefix,localName=_nssplit( qualifiedName )
|
||||
return Element(qualifiedName, namespaceURI, prefix, localName)
|
||||
|
||||
def createAttributeNS(self, namespaceURI, qualifiedName):
|
||||
fields = string.split(qualifiedName,':')
|
||||
if len(fields) == 2:
|
||||
localName = fields[1]
|
||||
prefix = fields[0]
|
||||
elif len(fields) == 1:
|
||||
localName = fields[0]
|
||||
prefix = None
|
||||
return Attr(qualifiedName, namespaceURI, prefix, localName)
|
||||
prefix,localName=_nssplit( qualifiedName )
|
||||
return Attr(namespaceURI, qualifiedName, localName, prefix)
|
||||
|
||||
def getElementsByTagNameNS(self,namespaceURI,localName):
|
||||
_getElementsByTagNameNSHelper( self, namespaceURI, localName )
|
||||
|
||||
def close( self ):
|
||||
for node in self.elements:
|
||||
_closeElement( node )
|
||||
|
||||
def unlink( self ):
|
||||
self.documentElement=None
|
||||
Node.unlink( self )
|
||||
|
|
|
@ -2,7 +2,6 @@ import minidom
|
|||
import types
|
||||
import string
|
||||
import sys
|
||||
import pyexpat
|
||||
from xml.sax import ExpatParser
|
||||
|
||||
#todo: SAX2/namespace handling
|
||||
|
@ -140,12 +139,8 @@ class DOMEventStream:
|
|||
if cur_node is node: return
|
||||
|
||||
if token !=END_ELEMENT:
|
||||
cur_node.parentNode.childNodes.append( cur_node )
|
||||
cur_node.parentNode.appendChild( cur_node )
|
||||
event=self.getEvent()
|
||||
if node.nodeType==minidom.Node.DOCUMENT_NODE:
|
||||
for child in node.childNodes:
|
||||
if child.nodeType==minidom.Node.ELEMENT_NODE:
|
||||
node.documentElement=child
|
||||
|
||||
def getEvent( self ):
|
||||
if not self.pulldom.firstEvent[1]:
|
||||
|
@ -193,75 +188,7 @@ def parseString( string, parser=None ):
|
|||
stringio=StringIO.StringIO
|
||||
|
||||
bufsize=len( string )
|
||||
stringio( string )
|
||||
buf=stringio( string )
|
||||
parser=_getParser()
|
||||
return DOMEventStream( buf, parser, bufsize )
|
||||
|
||||
#FIXME: Use Lars' instead!!!
|
||||
class SAX_expat:
|
||||
"SAX driver for the Pyexpat C module."
|
||||
|
||||
def __init__(self):
|
||||
self.parser=pyexpat.ParserCreate()
|
||||
self.started=0
|
||||
|
||||
def setDocumentHandler( self, handler ):
|
||||
self.parser.StartElementHandler = handler.startElement
|
||||
self.parser.EndElementHandler = handler.endElement
|
||||
self.parser.CharacterDataHandler = handler.datachars
|
||||
self.parser.ProcessingInstructionHandler = handler.processingInstruction
|
||||
self.doc_handler=handler
|
||||
|
||||
def setErrorHandler( self, handler ):
|
||||
self.err_handler=handler
|
||||
|
||||
# --- Locator methods. Only usable after errors.
|
||||
|
||||
def getLineNumber(self):
|
||||
return self.parser.ErrorLineNumber
|
||||
|
||||
def getColumnNumber(self):
|
||||
return self.parser.ErrorColumnNumber
|
||||
|
||||
# --- Internal
|
||||
|
||||
def __report_error(self):
|
||||
msg=pyexpat.ErrorString(self.parser.ErrorCode)
|
||||
self.err_handler.fatalError(msg)
|
||||
|
||||
# --- EXPERIMENTAL PYTHON SAX EXTENSIONS
|
||||
|
||||
def get_parser_name(self):
|
||||
return "pyexpat"
|
||||
|
||||
def get_parser_version(self):
|
||||
return "Unknown"
|
||||
|
||||
def get_driver_version(self):
|
||||
return version
|
||||
|
||||
def is_validating(self):
|
||||
return 0
|
||||
|
||||
def is_dtd_reading(self):
|
||||
return 0
|
||||
|
||||
def reset(self):
|
||||
self.parser=pyexpat.ParserCreate()
|
||||
self.parser.StartElementHandler = self.startElement
|
||||
self.parser.EndElementHandler = self.endElement
|
||||
self.parser.CharacterDataHandler = self.characters
|
||||
self.parser.ProcessingInstructionHandler = self.processingInstruction
|
||||
|
||||
def feed(self,data):
|
||||
if not self.started:
|
||||
self.doc_handler.startDocument()
|
||||
self.started=1
|
||||
if not self.parser.Parse(data):
|
||||
self.__report_error()
|
||||
|
||||
def close(self):
|
||||
if not self.parser.Parse("",1):
|
||||
self.__report_error()
|
||||
self.doc_handler.endDocument()
|
||||
self.parser = None
|
||||
|
|
|
@ -23,3 +23,27 @@ from _exceptions import *
|
|||
from saxutils import *
|
||||
from _exceptions import SAXParseException
|
||||
import xmlreader
|
||||
|
||||
def parse( filename_or_stream, handler, errorHandler=ErrorHandler() ):
|
||||
parser=ExpatParser()
|
||||
parser.setContentHandler( handler )
|
||||
parse.setErrorHandler( errorHandler )
|
||||
parser.parse( filename_or_stream )
|
||||
|
||||
# this may not work yet...Expat doesn't handle buffer inputs
|
||||
def parseString( string, handler, errorHandler=ErrorHandler() ):
|
||||
try:
|
||||
import cStringIO
|
||||
stringio=cStringIO.StringIO
|
||||
except ImportError:
|
||||
import StringIO
|
||||
stringio=StringIO.StringIO
|
||||
|
||||
bufsize=len( string )
|
||||
buf=stringio( string )
|
||||
|
||||
parser=ExpatParser()
|
||||
parser.setContentHandler( handler )
|
||||
parse.setErrorHandler( errorHandler )
|
||||
parser.parse( buf )
|
||||
|
||||
|
|
Loading…
Reference in New Issue