Replace the iter/itertext methods of Element in _elementtree with true C implementations, instead of the bootstrapped Python code. In addition to being cleaner (removing the last remains of the bootstrapping code in _elementtree), this gives a 10x performance boost for iter() on large documents.
Also reorganized the tests a bit to be more robust.
This commit is contained in:
parent
fedb04a37a
commit
64d11e60f2
|
@ -23,7 +23,8 @@ import weakref
|
|||
from test import support
|
||||
from test.support import findfile, import_fresh_module, gc_collect
|
||||
|
||||
pyET = import_fresh_module('xml.etree.ElementTree', blocked=['_elementtree'])
|
||||
pyET = None
|
||||
ET = None
|
||||
|
||||
SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
|
||||
try:
|
||||
|
@ -209,10 +210,8 @@ def interface():
|
|||
|
||||
These methods return an iterable. See bug 6472.
|
||||
|
||||
>>> check_method(element.iter("tag").__next__)
|
||||
>>> check_method(element.iterfind("tag").__next__)
|
||||
>>> check_method(element.iterfind("*").__next__)
|
||||
>>> check_method(tree.iter("tag").__next__)
|
||||
>>> check_method(tree.iterfind("tag").__next__)
|
||||
>>> check_method(tree.iterfind("*").__next__)
|
||||
|
||||
|
@ -291,42 +290,6 @@ def cdata():
|
|||
'<tag>hello</tag>'
|
||||
"""
|
||||
|
||||
# Only with Python implementation
|
||||
def simplefind():
|
||||
"""
|
||||
Test find methods using the elementpath fallback.
|
||||
|
||||
>>> ElementTree = pyET
|
||||
|
||||
>>> CurrentElementPath = ElementTree.ElementPath
|
||||
>>> ElementTree.ElementPath = ElementTree._SimpleElementPath()
|
||||
>>> elem = ElementTree.XML(SAMPLE_XML)
|
||||
>>> elem.find("tag").tag
|
||||
'tag'
|
||||
>>> ElementTree.ElementTree(elem).find("tag").tag
|
||||
'tag'
|
||||
>>> elem.findtext("tag")
|
||||
'text'
|
||||
>>> elem.findtext("tog")
|
||||
>>> elem.findtext("tog", "default")
|
||||
'default'
|
||||
>>> ElementTree.ElementTree(elem).findtext("tag")
|
||||
'text'
|
||||
>>> summarize_list(elem.findall("tag"))
|
||||
['tag', 'tag']
|
||||
>>> summarize_list(elem.findall(".//tag"))
|
||||
['tag', 'tag', 'tag']
|
||||
|
||||
Path syntax doesn't work in this case.
|
||||
|
||||
>>> elem.find("section/tag")
|
||||
>>> elem.findtext("section/tag")
|
||||
>>> summarize_list(elem.findall("section/tag"))
|
||||
[]
|
||||
|
||||
>>> ElementTree.ElementPath = CurrentElementPath
|
||||
"""
|
||||
|
||||
def find():
|
||||
"""
|
||||
Test find methods (including xpath syntax).
|
||||
|
@ -1002,36 +965,6 @@ def methods():
|
|||
'1 < 2\n'
|
||||
"""
|
||||
|
||||
def iterators():
|
||||
"""
|
||||
Test iterators.
|
||||
|
||||
>>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
|
||||
>>> summarize_list(e.iter())
|
||||
['html', 'body', 'i']
|
||||
>>> summarize_list(e.find("body").iter())
|
||||
['body', 'i']
|
||||
>>> summarize(next(e.iter()))
|
||||
'html'
|
||||
>>> "".join(e.itertext())
|
||||
'this is a paragraph...'
|
||||
>>> "".join(e.find("body").itertext())
|
||||
'this is a paragraph.'
|
||||
>>> next(e.itertext())
|
||||
'this is a '
|
||||
|
||||
Method iterparse should return an iterator. See bug 6472.
|
||||
|
||||
>>> sourcefile = serialize(e, to_string=False)
|
||||
>>> next(ET.iterparse(sourcefile)) # doctest: +ELLIPSIS
|
||||
('end', <Element 'i' at 0x...>)
|
||||
|
||||
>>> tree = ET.ElementTree(None)
|
||||
>>> tree.iter()
|
||||
Traceback (most recent call last):
|
||||
AttributeError: 'NoneType' object has no attribute 'iter'
|
||||
"""
|
||||
|
||||
ENTITY_XML = """\
|
||||
<!DOCTYPE points [
|
||||
<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
|
||||
|
@ -1339,6 +1272,7 @@ XINCLUDE["default.xml"] = """\
|
|||
</document>
|
||||
""".format(html.escape(SIMPLE_XMLFILE, True))
|
||||
|
||||
|
||||
def xinclude_loader(href, parse="xml", encoding=None):
|
||||
try:
|
||||
data = XINCLUDE[href]
|
||||
|
@ -1411,22 +1345,6 @@ def xinclude():
|
|||
>>> # print(serialize(document)) # C5
|
||||
"""
|
||||
|
||||
def xinclude_default():
|
||||
"""
|
||||
>>> from xml.etree import ElementInclude
|
||||
|
||||
>>> document = xinclude_loader("default.xml")
|
||||
>>> ElementInclude.include(document)
|
||||
>>> print(serialize(document)) # default
|
||||
<document>
|
||||
<p>Example.</p>
|
||||
<root>
|
||||
<element key="value">text</element>
|
||||
<element>text</element>tail
|
||||
<empty-element />
|
||||
</root>
|
||||
</document>
|
||||
"""
|
||||
|
||||
#
|
||||
# badly formatted xi:include tags
|
||||
|
@ -1917,9 +1835,8 @@ class ElementTreeTest(unittest.TestCase):
|
|||
self.assertIsInstance(ET.QName, type)
|
||||
self.assertIsInstance(ET.ElementTree, type)
|
||||
self.assertIsInstance(ET.Element, type)
|
||||
# XXX issue 14128 with C ElementTree
|
||||
# self.assertIsInstance(ET.TreeBuilder, type)
|
||||
# self.assertIsInstance(ET.XMLParser, type)
|
||||
self.assertIsInstance(ET.TreeBuilder, type)
|
||||
self.assertIsInstance(ET.XMLParser, type)
|
||||
|
||||
def test_Element_subclass_trivial(self):
|
||||
class MyElement(ET.Element):
|
||||
|
@ -1953,6 +1870,73 @@ class ElementTreeTest(unittest.TestCase):
|
|||
self.assertEqual(mye.newmethod(), 'joe')
|
||||
|
||||
|
||||
class ElementIterTest(unittest.TestCase):
|
||||
def _ilist(self, elem, tag=None):
|
||||
return summarize_list(elem.iter(tag))
|
||||
|
||||
def test_basic(self):
|
||||
doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
|
||||
self.assertEqual(self._ilist(doc), ['html', 'body', 'i'])
|
||||
self.assertEqual(self._ilist(doc.find('body')), ['body', 'i'])
|
||||
self.assertEqual(next(doc.iter()).tag, 'html')
|
||||
self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...')
|
||||
self.assertEqual(''.join(doc.find('body').itertext()),
|
||||
'this is a paragraph.')
|
||||
self.assertEqual(next(doc.itertext()), 'this is a ')
|
||||
|
||||
# iterparse should return an iterator
|
||||
sourcefile = serialize(doc, to_string=False)
|
||||
self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end')
|
||||
|
||||
tree = ET.ElementTree(None)
|
||||
self.assertRaises(AttributeError, tree.iter)
|
||||
|
||||
def test_corners(self):
|
||||
# single root, no subelements
|
||||
a = ET.Element('a')
|
||||
self.assertEqual(self._ilist(a), ['a'])
|
||||
|
||||
# one child
|
||||
b = ET.SubElement(a, 'b')
|
||||
self.assertEqual(self._ilist(a), ['a', 'b'])
|
||||
|
||||
# one child and one grandchild
|
||||
c = ET.SubElement(b, 'c')
|
||||
self.assertEqual(self._ilist(a), ['a', 'b', 'c'])
|
||||
|
||||
# two children, only first with grandchild
|
||||
d = ET.SubElement(a, 'd')
|
||||
self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd'])
|
||||
|
||||
# replace first child by second
|
||||
a[0] = a[1]
|
||||
del a[1]
|
||||
self.assertEqual(self._ilist(a), ['a', 'd'])
|
||||
|
||||
def test_iter_by_tag(self):
|
||||
doc = ET.XML('''
|
||||
<document>
|
||||
<house>
|
||||
<room>bedroom1</room>
|
||||
<room>bedroom2</room>
|
||||
</house>
|
||||
<shed>nothing here
|
||||
</shed>
|
||||
<house>
|
||||
<room>bedroom8</room>
|
||||
</house>
|
||||
</document>''')
|
||||
|
||||
self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3)
|
||||
self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2)
|
||||
|
||||
# make sure both tag=None and tag='*' return all tags
|
||||
all_tags = ['document', 'house', 'room', 'room',
|
||||
'shed', 'house', 'room']
|
||||
self.assertEqual(self._ilist(doc), all_tags)
|
||||
self.assertEqual(self._ilist(doc, '*'), all_tags)
|
||||
|
||||
|
||||
class TreeBuilderTest(unittest.TestCase):
|
||||
sample1 = ('<!DOCTYPE html PUBLIC'
|
||||
' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
|
||||
|
@ -2026,7 +2010,20 @@ class TreeBuilderTest(unittest.TestCase):
|
|||
('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
|
||||
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
|
||||
|
||||
|
||||
class XincludeTest(unittest.TestCase):
|
||||
def test_xinclude_default(self):
|
||||
from xml.etree import ElementInclude
|
||||
doc = xinclude_loader('default.xml')
|
||||
ElementInclude.include(doc)
|
||||
s = serialize(doc)
|
||||
self.assertEqual(s.strip(), '''<document>
|
||||
<p>Example.</p>
|
||||
<root>
|
||||
<element key="value">text</element>
|
||||
<element>text</element>tail
|
||||
<empty-element />
|
||||
</root>
|
||||
</document>''')
|
||||
class XMLParserTest(unittest.TestCase):
|
||||
sample1 = '<file><line>22</line></file>'
|
||||
sample2 = ('<!DOCTYPE html PUBLIC'
|
||||
|
@ -2073,13 +2070,6 @@ class XMLParserTest(unittest.TestCase):
|
|||
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
|
||||
|
||||
|
||||
class NoAcceleratorTest(unittest.TestCase):
|
||||
# Test that the C accelerator was not imported for pyET
|
||||
def test_correct_import_pyET(self):
|
||||
self.assertEqual(pyET.Element.__module__, 'xml.etree.ElementTree')
|
||||
self.assertEqual(pyET.SubElement.__module__, 'xml.etree.ElementTree')
|
||||
|
||||
|
||||
class NamespaceParseTest(unittest.TestCase):
|
||||
def test_find_with_namespace(self):
|
||||
nsmap = {'h': 'hello', 'f': 'foo'}
|
||||
|
@ -2090,7 +2080,6 @@ class NamespaceParseTest(unittest.TestCase):
|
|||
self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
|
||||
|
||||
|
||||
|
||||
class ElementSlicingTest(unittest.TestCase):
|
||||
def _elem_tags(self, elemlist):
|
||||
return [e.tag for e in elemlist]
|
||||
|
@ -2232,6 +2221,36 @@ class KeywordArgsTest(unittest.TestCase):
|
|||
with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
|
||||
ET.Element('a', attrib="I'm not a dict")
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
@unittest.skipUnless(pyET, 'only for the Python version')
|
||||
class NoAcceleratorTest(unittest.TestCase):
|
||||
# Test that the C accelerator was not imported for pyET
|
||||
def test_correct_import_pyET(self):
|
||||
self.assertEqual(pyET.Element.__module__, 'xml.etree.ElementTree')
|
||||
self.assertEqual(pyET.SubElement.__module__, 'xml.etree.ElementTree')
|
||||
|
||||
|
||||
class ElementPathFallbackTest(unittest.TestCase):
|
||||
def test_fallback(self):
|
||||
current_ElementPath = ET.ElementPath
|
||||
ET.ElementPath = ET._SimpleElementPath()
|
||||
elem = ET.XML(SAMPLE_XML)
|
||||
self.assertEqual(elem.find('tag').tag, 'tag')
|
||||
self.assertEqual(ET.ElementTree(elem).find('tag').tag, 'tag')
|
||||
self.assertEqual(elem.findtext('tag'), 'text')
|
||||
self.assertIsNone(elem.findtext('tog'))
|
||||
self.assertEqual(elem.findtext('tog', 'default'), 'default')
|
||||
self.assertEqual(ET.ElementTree(elem).findtext('tag'), 'text')
|
||||
self.assertEqual(summarize_list(elem.findall('tag')), ['tag', 'tag'])
|
||||
self.assertEqual(summarize_list(elem.findall('.//tag')),
|
||||
['tag', 'tag', 'tag'])
|
||||
|
||||
self.assertIsNone(elem.find('section/tag'))
|
||||
self.assertIsNone(elem.findtext('section/tag'))
|
||||
self.assertEqual(summarize_list(elem.findall('section/tag')), [])
|
||||
|
||||
ET.ElementPath = current_ElementPath
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
|
@ -2276,31 +2295,43 @@ class CleanContext(object):
|
|||
self.checkwarnings.__exit__(*args)
|
||||
|
||||
|
||||
def test_main(module=pyET):
|
||||
from test import test_xml_etree
|
||||
def test_main(module=None):
|
||||
# When invoked without a module, runs the Python ET tests by loading pyET.
|
||||
# Otherwise, uses the given module as the ET.
|
||||
if module is None:
|
||||
global pyET
|
||||
pyET = import_fresh_module('xml.etree.ElementTree',
|
||||
blocked=['_elementtree'])
|
||||
module = pyET
|
||||
|
||||
# The same doctests are used for both the Python and the C implementations
|
||||
test_xml_etree.ET = module
|
||||
global ET
|
||||
ET = module
|
||||
|
||||
test_classes = [
|
||||
ElementSlicingTest,
|
||||
BasicElementTest,
|
||||
StringIOTest,
|
||||
ParseErrorTest,
|
||||
XincludeTest,
|
||||
ElementTreeTest,
|
||||
NamespaceParseTest,
|
||||
ElementIterTest,
|
||||
TreeBuilderTest,
|
||||
XMLParserTest,
|
||||
KeywordArgsTest]
|
||||
if module is pyET:
|
||||
# Run the tests specific to the Python implementation
|
||||
test_classes += [NoAcceleratorTest]
|
||||
]
|
||||
|
||||
# These tests will only run for the pure-Python version that doesn't import
|
||||
# _elementtree. We can't use skipUnless here, because pyET is filled in only
|
||||
# after the module is loaded.
|
||||
if pyET:
|
||||
test_classes.extend([
|
||||
NoAcceleratorTest,
|
||||
ElementPathFallbackTest,
|
||||
])
|
||||
|
||||
support.run_unittest(*test_classes)
|
||||
|
||||
# XXX the C module should give the same warnings as the Python module
|
||||
with CleanContext(quiet=(module is not pyET)):
|
||||
support.run_doctest(test_xml_etree, verbosity=True)
|
||||
support.run_doctest(sys.modules[__name__], verbosity=True)
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_main()
|
||||
|
|
|
@ -8,31 +8,6 @@ cET = import_fresh_module('xml.etree.ElementTree', fresh=['_elementtree'])
|
|||
cET_alias = import_fresh_module('xml.etree.cElementTree', fresh=['_elementtree', 'xml.etree'])
|
||||
|
||||
|
||||
# cElementTree specific tests
|
||||
|
||||
def sanity():
|
||||
r"""
|
||||
Import sanity.
|
||||
|
||||
Issue #6697.
|
||||
|
||||
>>> cElementTree = cET
|
||||
>>> e = cElementTree.Element('a')
|
||||
>>> getattr(e, '\uD800') # doctest: +ELLIPSIS
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
UnicodeEncodeError: ...
|
||||
|
||||
>>> p = cElementTree.XMLParser()
|
||||
>>> p.version.split()[0]
|
||||
'Expat'
|
||||
>>> getattr(p, '\uD800')
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
AttributeError: 'XMLParser' object has no attribute '\ud800'
|
||||
"""
|
||||
|
||||
|
||||
class MiscTests(unittest.TestCase):
|
||||
# Issue #8651.
|
||||
@support.bigmemtest(size=support._2G + 100, memuse=1)
|
||||
|
@ -46,6 +21,7 @@ class MiscTests(unittest.TestCase):
|
|||
finally:
|
||||
data = None
|
||||
|
||||
|
||||
@unittest.skipUnless(cET, 'requires _elementtree')
|
||||
class TestAliasWorking(unittest.TestCase):
|
||||
# Test that the cET alias module is alive
|
||||
|
@ -53,6 +29,7 @@ class TestAliasWorking(unittest.TestCase):
|
|||
e = cET_alias.Element('foo')
|
||||
self.assertEqual(e.tag, 'foo')
|
||||
|
||||
|
||||
@unittest.skipUnless(cET, 'requires _elementtree')
|
||||
class TestAcceleratorImported(unittest.TestCase):
|
||||
# Test that the C accelerator was imported, as expected
|
||||
|
@ -67,7 +44,6 @@ def test_main():
|
|||
from test import test_xml_etree, test_xml_etree_c
|
||||
|
||||
# Run the tests specific to the C implementation
|
||||
support.run_doctest(test_xml_etree_c, verbosity=True)
|
||||
support.run_unittest(
|
||||
MiscTests,
|
||||
TestAliasWorking,
|
||||
|
|
|
@ -916,11 +916,7 @@ def _namespaces(elem, default_namespace=None):
|
|||
_raise_serialization_error(qname)
|
||||
|
||||
# populate qname and namespaces table
|
||||
try:
|
||||
iterate = elem.iter
|
||||
except AttributeError:
|
||||
iterate = elem.getiterator # cET compatibility
|
||||
for elem in iterate():
|
||||
for elem in elem.iter():
|
||||
tag = elem.tag
|
||||
if isinstance(tag, QName):
|
||||
if tag.text not in qnames:
|
||||
|
|
|
@ -103,8 +103,6 @@ do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
|
|||
/* glue functions (see the init function for details) */
|
||||
static PyObject* elementtree_parseerror_obj;
|
||||
static PyObject* elementtree_deepcopy_obj;
|
||||
static PyObject* elementtree_iter_obj;
|
||||
static PyObject* elementtree_itertext_obj;
|
||||
static PyObject* elementpath_obj;
|
||||
|
||||
/* helpers */
|
||||
|
@ -1109,67 +1107,32 @@ element_getchildren(ElementObject* self, PyObject* args)
|
|||
return list;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
element_iter(ElementObject* self, PyObject* args)
|
||||
{
|
||||
PyObject* result;
|
||||
|
||||
static PyObject *
|
||||
create_elementiter(ElementObject *self, PyObject *tag, int gettext);
|
||||
|
||||
|
||||
static PyObject *
|
||||
element_iter(ElementObject *self, PyObject *args)
|
||||
{
|
||||
PyObject* tag = Py_None;
|
||||
if (!PyArg_ParseTuple(args, "|O:iter", &tag))
|
||||
return NULL;
|
||||
|
||||
if (!elementtree_iter_obj) {
|
||||
PyErr_SetString(
|
||||
PyExc_RuntimeError,
|
||||
"iter helper not found"
|
||||
);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
args = PyTuple_New(2);
|
||||
if (!args)
|
||||
return NULL;
|
||||
|
||||
Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
|
||||
Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
|
||||
|
||||
result = PyObject_CallObject(elementtree_iter_obj, args);
|
||||
|
||||
Py_DECREF(args);
|
||||
|
||||
return result;
|
||||
return create_elementiter(self, tag, 0);
|
||||
}
|
||||
|
||||
|
||||
static PyObject*
|
||||
element_itertext(ElementObject* self, PyObject* args)
|
||||
{
|
||||
PyObject* result;
|
||||
|
||||
if (!PyArg_ParseTuple(args, ":itertext"))
|
||||
return NULL;
|
||||
|
||||
if (!elementtree_itertext_obj) {
|
||||
PyErr_SetString(
|
||||
PyExc_RuntimeError,
|
||||
"itertext helper not found"
|
||||
);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
args = PyTuple_New(1);
|
||||
if (!args)
|
||||
return NULL;
|
||||
|
||||
Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
|
||||
|
||||
result = PyObject_CallObject(elementtree_itertext_obj, args);
|
||||
|
||||
Py_DECREF(args);
|
||||
|
||||
return result;
|
||||
return create_elementiter(self, Py_None, 1);
|
||||
}
|
||||
|
||||
|
||||
static PyObject*
|
||||
element_getitem(PyObject* self_, Py_ssize_t index)
|
||||
{
|
||||
|
@ -1790,6 +1753,267 @@ static PyTypeObject Element_Type = {
|
|||
0, /* tp_free */
|
||||
};
|
||||
|
||||
/******************************* Element iterator ****************************/
|
||||
|
||||
/* ElementIterObject represents the iteration state over an XML element in
|
||||
* pre-order traversal. To keep track of which sub-element should be returned
|
||||
* next, a stack of parents is maintained. This is a standard stack-based
|
||||
* iterative pre-order traversal of a tree.
|
||||
* The stack is managed using a single-linked list starting at parent_stack.
|
||||
* Each stack node contains the saved parent to which we should return after
|
||||
* the current one is exhausted, and the next child to examine in that parent.
|
||||
*/
|
||||
typedef struct ParentLocator_t {
|
||||
ElementObject *parent;
|
||||
Py_ssize_t child_index;
|
||||
struct ParentLocator_t *next;
|
||||
} ParentLocator;
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
ParentLocator *parent_stack;
|
||||
ElementObject *root_element;
|
||||
PyObject *sought_tag;
|
||||
int root_done;
|
||||
int gettext;
|
||||
} ElementIterObject;
|
||||
|
||||
|
||||
static void
|
||||
elementiter_dealloc(ElementIterObject *it)
|
||||
{
|
||||
ParentLocator *p = it->parent_stack;
|
||||
while (p) {
|
||||
ParentLocator *temp = p;
|
||||
Py_XDECREF(p->parent);
|
||||
p = p->next;
|
||||
PyObject_Free(temp);
|
||||
}
|
||||
|
||||
Py_XDECREF(it->sought_tag);
|
||||
Py_XDECREF(it->root_element);
|
||||
|
||||
PyObject_GC_UnTrack(it);
|
||||
PyObject_GC_Del(it);
|
||||
}
|
||||
|
||||
static int
|
||||
elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
|
||||
{
|
||||
ParentLocator *p = it->parent_stack;
|
||||
while (p) {
|
||||
Py_VISIT(p->parent);
|
||||
p = p->next;
|
||||
}
|
||||
|
||||
Py_VISIT(it->root_element);
|
||||
Py_VISIT(it->sought_tag);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Helper function for elementiter_next. Add a new parent to the parent stack.
|
||||
*/
|
||||
static ParentLocator *
|
||||
parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
|
||||
{
|
||||
ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
|
||||
if (new_node) {
|
||||
new_node->parent = parent;
|
||||
Py_INCREF(parent);
|
||||
new_node->child_index = 0;
|
||||
new_node->next = stack;
|
||||
}
|
||||
return new_node;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
elementiter_next(ElementIterObject *it)
|
||||
{
|
||||
/* Sub-element iterator.
|
||||
*
|
||||
* A short note on gettext: this function serves both the iter() and
|
||||
* itertext() methods to avoid code duplication. However, there are a few
|
||||
* small differences in the way these iterations work. Namely:
|
||||
* - itertext() only yields text from nodes that have it, and continues
|
||||
* iterating when a node doesn't have text (so it doesn't return any
|
||||
* node like iter())
|
||||
* - itertext() also has to handle tail, after finishing with all the
|
||||
* children of a node.
|
||||
*/
|
||||
|
||||
while (1) {
|
||||
/* Handle the case reached in the beginning and end of iteration, where
|
||||
* the parent stack is empty. The root_done flag gives us indication
|
||||
* whether we've just started iterating (so root_done is 0), in which
|
||||
* case the root is returned. If root_done is 1 and we're here, the
|
||||
* iterator is exhausted.
|
||||
*/
|
||||
if (!it->parent_stack->parent) {
|
||||
if (it->root_done) {
|
||||
PyErr_SetNone(PyExc_StopIteration);
|
||||
return NULL;
|
||||
} else {
|
||||
it->parent_stack = parent_stack_push_new(it->parent_stack,
|
||||
it->root_element);
|
||||
if (!it->parent_stack) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
it->root_done = 1;
|
||||
if (it->sought_tag == Py_None ||
|
||||
PyObject_RichCompareBool(it->root_element->tag,
|
||||
it->sought_tag, Py_EQ) == 1) {
|
||||
if (it->gettext) {
|
||||
PyObject *text = JOIN_OBJ(it->root_element->text);
|
||||
if (PyObject_IsTrue(text)) {
|
||||
Py_INCREF(text);
|
||||
return text;
|
||||
}
|
||||
} else {
|
||||
Py_INCREF(it->root_element);
|
||||
return (PyObject *)it->root_element;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* See if there are children left to traverse in the current parent. If
|
||||
* yes, visit the next child. If not, pop the stack and try again.
|
||||
*/
|
||||
ElementObject *cur_parent = it->parent_stack->parent;
|
||||
Py_ssize_t child_index = it->parent_stack->child_index;
|
||||
if (cur_parent->extra && child_index < cur_parent->extra->length) {
|
||||
ElementObject *child = (ElementObject *)
|
||||
cur_parent->extra->children[child_index];
|
||||
it->parent_stack->child_index++;
|
||||
it->parent_stack = parent_stack_push_new(it->parent_stack,
|
||||
child);
|
||||
if (!it->parent_stack) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (it->gettext) {
|
||||
PyObject *text = JOIN_OBJ(child->text);
|
||||
if (PyObject_IsTrue(text)) {
|
||||
Py_INCREF(text);
|
||||
return text;
|
||||
}
|
||||
} else if (it->sought_tag == Py_None ||
|
||||
PyObject_RichCompareBool(child->tag,
|
||||
it->sought_tag, Py_EQ) == 1) {
|
||||
Py_INCREF(child);
|
||||
return (PyObject *)child;
|
||||
}
|
||||
else
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
PyObject *tail = it->gettext ? JOIN_OBJ(cur_parent->tail) : Py_None;
|
||||
ParentLocator *next = it->parent_stack->next;
|
||||
Py_XDECREF(it->parent_stack->parent);
|
||||
PyObject_Free(it->parent_stack);
|
||||
it->parent_stack = next;
|
||||
|
||||
/* Note that extra condition on it->parent_stack->parent here;
|
||||
* this is because itertext() is supposed to only return *inner*
|
||||
* text, not text following the element it began iteration with.
|
||||
*/
|
||||
if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
|
||||
Py_INCREF(tail);
|
||||
return tail;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
static PyTypeObject ElementIter_Type = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
"_elementtree._element_iterator", /* tp_name */
|
||||
sizeof(ElementIterObject), /* tp_basicsize */
|
||||
0, /* tp_itemsize */
|
||||
/* methods */
|
||||
(destructor)elementiter_dealloc, /* tp_dealloc */
|
||||
0, /* tp_print */
|
||||
0, /* tp_getattr */
|
||||
0, /* tp_setattr */
|
||||
0, /* tp_reserved */
|
||||
0, /* tp_repr */
|
||||
0, /* tp_as_number */
|
||||
0, /* tp_as_sequence */
|
||||
0, /* tp_as_mapping */
|
||||
0, /* tp_hash */
|
||||
0, /* tp_call */
|
||||
0, /* tp_str */
|
||||
0, /* tp_getattro */
|
||||
0, /* tp_setattro */
|
||||
0, /* tp_as_buffer */
|
||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
|
||||
0, /* tp_doc */
|
||||
(traverseproc)elementiter_traverse, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
PyObject_SelfIter, /* tp_iter */
|
||||
(iternextfunc)elementiter_next, /* tp_iternext */
|
||||
0, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
0, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
0, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
0, /* tp_new */
|
||||
};
|
||||
|
||||
|
||||
static PyObject *
|
||||
create_elementiter(ElementObject *self, PyObject *tag, int gettext)
|
||||
{
|
||||
ElementIterObject *it;
|
||||
PyObject *star = NULL;
|
||||
|
||||
it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
|
||||
if (!it)
|
||||
return NULL;
|
||||
if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
|
||||
PyObject_GC_Del(it);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
it->parent_stack->parent = NULL;
|
||||
it->parent_stack->child_index = 0;
|
||||
it->parent_stack->next = NULL;
|
||||
|
||||
if (PyUnicode_Check(tag))
|
||||
star = PyUnicode_FromString("*");
|
||||
else if (PyBytes_Check(tag))
|
||||
star = PyBytes_FromString("*");
|
||||
|
||||
if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
|
||||
tag = Py_None;
|
||||
|
||||
Py_XDECREF(star);
|
||||
it->sought_tag = tag;
|
||||
it->root_done = 0;
|
||||
it->gettext = gettext;
|
||||
it->root_element = self;
|
||||
|
||||
Py_INCREF(self);
|
||||
Py_INCREF(tag);
|
||||
|
||||
PyObject_GC_Track(it);
|
||||
return (PyObject *)it;
|
||||
}
|
||||
|
||||
|
||||
/* ==================================================================== */
|
||||
/* the tree builder type */
|
||||
|
||||
|
@ -3238,8 +3462,7 @@ static struct PyModuleDef _elementtreemodule = {
|
|||
PyMODINIT_FUNC
|
||||
PyInit__elementtree(void)
|
||||
{
|
||||
PyObject *m, *g, *temp;
|
||||
char* bootstrap;
|
||||
PyObject *m, *temp;
|
||||
|
||||
/* Initialize object types */
|
||||
if (PyType_Ready(&TreeBuilder_Type) < 0)
|
||||
|
@ -3255,44 +3478,6 @@ PyInit__elementtree(void)
|
|||
if (!m)
|
||||
return NULL;
|
||||
|
||||
/* The code below requires that the module gets already added
|
||||
to sys.modules. */
|
||||
PyDict_SetItemString(PyImport_GetModuleDict(),
|
||||
_elementtreemodule.m_name,
|
||||
m);
|
||||
|
||||
/* python glue code */
|
||||
|
||||
g = PyDict_New();
|
||||
if (!g)
|
||||
return NULL;
|
||||
|
||||
PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
|
||||
|
||||
bootstrap = (
|
||||
"def iter(node, tag=None):\n" /* helper */
|
||||
" if tag == '*':\n"
|
||||
" tag = None\n"
|
||||
" if tag is None or node.tag == tag:\n"
|
||||
" yield node\n"
|
||||
" for node in node:\n"
|
||||
" for node in iter(node, tag):\n"
|
||||
" yield node\n"
|
||||
|
||||
"def itertext(node):\n" /* helper */
|
||||
" if node.text:\n"
|
||||
" yield node.text\n"
|
||||
" for e in node:\n"
|
||||
" for s in e.itertext():\n"
|
||||
" yield s\n"
|
||||
" if e.tail:\n"
|
||||
" yield e.tail\n"
|
||||
|
||||
);
|
||||
|
||||
if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
|
||||
return NULL;
|
||||
|
||||
if (!(temp = PyImport_ImportModule("copy")))
|
||||
return NULL;
|
||||
elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
|
||||
|
@ -3301,9 +3486,6 @@ PyInit__elementtree(void)
|
|||
if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
|
||||
return NULL;
|
||||
|
||||
elementtree_iter_obj = PyDict_GetItemString(g, "iter");
|
||||
elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
|
||||
|
||||
/* link against pyexpat */
|
||||
expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
|
||||
if (expat_capi) {
|
||||
|
|
Loading…
Reference in New Issue