bpo-37399: Correctly attach tail text to the last element/comment/pi (GH-14856)
* bpo-37399: Correctly attach tail text to the last element/comment/pi, even when comments or pis are discarded. Also fixes the insertion of PIs when "insert_pis=True" is configured for a TreeBuilder.
This commit is contained in:
parent
151b91dfd2
commit
c6cb4cdd21
|
@ -2954,6 +2954,66 @@ class TreeBuilderTest(unittest.TestCase):
|
|||
self.assertEqual(b.pi('target'), (len('target'), None))
|
||||
self.assertEqual(b.pi('pitarget', ' text '), (len('pitarget'), ' text '))
|
||||
|
||||
def test_late_tail(self):
|
||||
# Issue #37399: The tail of an ignored comment could overwrite the text before it.
|
||||
class TreeBuilderSubclass(ET.TreeBuilder):
|
||||
pass
|
||||
|
||||
xml = "<a>text<!-- comment -->tail</a>"
|
||||
a = ET.fromstring(xml)
|
||||
self.assertEqual(a.text, "texttail")
|
||||
|
||||
parser = ET.XMLParser(target=TreeBuilderSubclass())
|
||||
parser.feed(xml)
|
||||
a = parser.close()
|
||||
self.assertEqual(a.text, "texttail")
|
||||
|
||||
xml = "<a>text<?pi data?>tail</a>"
|
||||
a = ET.fromstring(xml)
|
||||
self.assertEqual(a.text, "texttail")
|
||||
|
||||
xml = "<a>text<?pi data?>tail</a>"
|
||||
parser = ET.XMLParser(target=TreeBuilderSubclass())
|
||||
parser.feed(xml)
|
||||
a = parser.close()
|
||||
self.assertEqual(a.text, "texttail")
|
||||
|
||||
def test_late_tail_mix_pi_comments(self):
|
||||
# Issue #37399: The tail of an ignored comment could overwrite the text before it.
|
||||
# Test appending tails to comments/pis.
|
||||
class TreeBuilderSubclass(ET.TreeBuilder):
|
||||
pass
|
||||
|
||||
xml = "<a>text<?pi1?> <!-- comment -->\n<?pi2?>tail</a>"
|
||||
parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True))
|
||||
parser.feed(xml)
|
||||
a = parser.close()
|
||||
self.assertEqual(a[0].text, ' comment ')
|
||||
self.assertEqual(a[0].tail, '\ntail')
|
||||
self.assertEqual(a.text, "text ")
|
||||
|
||||
parser = ET.XMLParser(target=TreeBuilderSubclass(insert_comments=True))
|
||||
parser.feed(xml)
|
||||
a = parser.close()
|
||||
self.assertEqual(a[0].text, ' comment ')
|
||||
self.assertEqual(a[0].tail, '\ntail')
|
||||
self.assertEqual(a.text, "text ")
|
||||
|
||||
xml = "<a>text<!-- comment -->\n<?pi data?>tail</a>"
|
||||
parser = ET.XMLParser(target=ET.TreeBuilder(insert_pis=True))
|
||||
parser.feed(xml)
|
||||
a = parser.close()
|
||||
self.assertEqual(a[0].text, 'pi data')
|
||||
self.assertEqual(a[0].tail, 'tail')
|
||||
self.assertEqual(a.text, "text\n")
|
||||
|
||||
parser = ET.XMLParser(target=TreeBuilderSubclass(insert_pis=True))
|
||||
parser.feed(xml)
|
||||
a = parser.close()
|
||||
self.assertEqual(a[0].text, 'pi data')
|
||||
self.assertEqual(a[0].tail, 'tail')
|
||||
self.assertEqual(a.text, "text\n")
|
||||
|
||||
def test_treebuilder_elementfactory_none(self):
|
||||
parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None))
|
||||
parser.feed(self.sample1)
|
||||
|
|
|
@ -2399,6 +2399,7 @@ typedef struct {
|
|||
|
||||
PyObject *this; /* current node */
|
||||
PyObject *last; /* most recently created node */
|
||||
PyObject *last_for_tail; /* most recently created node that takes a tail */
|
||||
|
||||
PyObject *data; /* data collector (string or list), or NULL */
|
||||
|
||||
|
@ -2530,6 +2531,7 @@ treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
|
|||
Py_VISIT(self->root);
|
||||
Py_VISIT(self->this);
|
||||
Py_VISIT(self->last);
|
||||
Py_VISIT(self->last_for_tail);
|
||||
Py_VISIT(self->data);
|
||||
Py_VISIT(self->stack);
|
||||
Py_VISIT(self->pi_factory);
|
||||
|
@ -2551,6 +2553,7 @@ treebuilder_gc_clear(TreeBuilderObject *self)
|
|||
Py_CLEAR(self->stack);
|
||||
Py_CLEAR(self->data);
|
||||
Py_CLEAR(self->last);
|
||||
Py_CLEAR(self->last_for_tail);
|
||||
Py_CLEAR(self->this);
|
||||
Py_CLEAR(self->pi_factory);
|
||||
Py_CLEAR(self->comment_factory);
|
||||
|
@ -2622,21 +2625,50 @@ _elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
|
|||
}
|
||||
|
||||
static int
|
||||
treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
|
||||
PyObject **dest, _Py_Identifier *name)
|
||||
treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data,
|
||||
PyObject **dest, _Py_Identifier *name)
|
||||
{
|
||||
/* Fast paths for the "almost always" cases. */
|
||||
if (Element_CheckExact(element)) {
|
||||
PyObject *tmp = JOIN_OBJ(*dest);
|
||||
*dest = JOIN_SET(*data, PyList_CheckExact(*data));
|
||||
*data = NULL;
|
||||
Py_DECREF(tmp);
|
||||
return 0;
|
||||
PyObject *dest_obj = JOIN_OBJ(*dest);
|
||||
if (dest_obj == Py_None) {
|
||||
*dest = JOIN_SET(*data, PyList_CheckExact(*data));
|
||||
*data = NULL;
|
||||
Py_DECREF(dest_obj);
|
||||
return 0;
|
||||
}
|
||||
else if (JOIN_GET(*dest)) {
|
||||
if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) {
|
||||
return -1;
|
||||
}
|
||||
Py_CLEAR(*data);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
PyObject *joined = list_join(*data);
|
||||
|
||||
/* Fallback for the non-Element / non-trivial cases. */
|
||||
{
|
||||
int r;
|
||||
if (joined == NULL)
|
||||
PyObject* joined;
|
||||
PyObject* previous = _PyObject_GetAttrId(element, name);
|
||||
if (!previous)
|
||||
return -1;
|
||||
joined = list_join(*data);
|
||||
if (!joined) {
|
||||
Py_DECREF(previous);
|
||||
return -1;
|
||||
}
|
||||
if (previous != Py_None) {
|
||||
PyObject *tmp = PyNumber_Add(previous, joined);
|
||||
Py_DECREF(joined);
|
||||
Py_DECREF(previous);
|
||||
if (!tmp)
|
||||
return -1;
|
||||
joined = tmp;
|
||||
} else {
|
||||
Py_DECREF(previous);
|
||||
}
|
||||
|
||||
r = _PyObject_SetAttrId(element, name, joined);
|
||||
Py_DECREF(joined);
|
||||
if (r < 0)
|
||||
|
@ -2649,21 +2681,21 @@ treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
|
|||
LOCAL(int)
|
||||
treebuilder_flush_data(TreeBuilderObject* self)
|
||||
{
|
||||
PyObject *element = self->last;
|
||||
|
||||
if (!self->data) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (self->this == element) {
|
||||
if (!self->last_for_tail) {
|
||||
PyObject *element = self->last;
|
||||
_Py_IDENTIFIER(text);
|
||||
return treebuilder_set_element_text_or_tail(
|
||||
return treebuilder_extend_element_text_or_tail(
|
||||
element, &self->data,
|
||||
&((ElementObject *) element)->text, &PyId_text);
|
||||
}
|
||||
else {
|
||||
PyObject *element = self->last_for_tail;
|
||||
_Py_IDENTIFIER(tail);
|
||||
return treebuilder_set_element_text_or_tail(
|
||||
return treebuilder_extend_element_text_or_tail(
|
||||
element, &self->data,
|
||||
&((ElementObject *) element)->tail, &PyId_tail);
|
||||
}
|
||||
|
@ -2739,6 +2771,7 @@ treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
|
|||
}
|
||||
|
||||
this = self->this;
|
||||
Py_CLEAR(self->last_for_tail);
|
||||
|
||||
if (this != Py_None) {
|
||||
if (treebuilder_add_subelement(this, node) < 0)
|
||||
|
@ -2836,6 +2869,8 @@ treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
|
|||
|
||||
item = self->last;
|
||||
self->last = self->this;
|
||||
Py_INCREF(self->last);
|
||||
Py_XSETREF(self->last_for_tail, self->last);
|
||||
self->index--;
|
||||
self->this = PyList_GET_ITEM(self->stack, self->index);
|
||||
Py_INCREF(self->this);
|
||||
|
@ -2851,7 +2886,7 @@ treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
|
|||
LOCAL(PyObject*)
|
||||
treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
|
||||
{
|
||||
PyObject* comment = NULL;
|
||||
PyObject* comment;
|
||||
PyObject* this;
|
||||
|
||||
if (treebuilder_flush_data(self) < 0) {
|
||||
|
@ -2867,6 +2902,8 @@ treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
|
|||
if (self->insert_comments && this != Py_None) {
|
||||
if (treebuilder_add_subelement(this, comment) < 0)
|
||||
goto error;
|
||||
Py_INCREF(comment);
|
||||
Py_XSETREF(self->last_for_tail, comment);
|
||||
}
|
||||
} else {
|
||||
Py_INCREF(text);
|
||||
|
@ -2888,7 +2925,7 @@ treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
|
|||
LOCAL(PyObject*)
|
||||
treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
|
||||
{
|
||||
PyObject* pi = NULL;
|
||||
PyObject* pi;
|
||||
PyObject* this;
|
||||
PyObject* stack[2] = {target, text};
|
||||
|
||||
|
@ -2906,6 +2943,8 @@ treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
|
|||
if (self->insert_pis && this != Py_None) {
|
||||
if (treebuilder_add_subelement(this, pi) < 0)
|
||||
goto error;
|
||||
Py_INCREF(pi);
|
||||
Py_XSETREF(self->last_for_tail, pi);
|
||||
}
|
||||
} else {
|
||||
pi = PyTuple_Pack(2, target, text);
|
||||
|
@ -3495,8 +3534,8 @@ expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
|
|||
static void
|
||||
expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
|
||||
{
|
||||
PyObject* comment = NULL;
|
||||
PyObject* res = NULL;
|
||||
PyObject* comment;
|
||||
PyObject* res;
|
||||
|
||||
if (PyErr_Occurred())
|
||||
return;
|
||||
|
@ -3510,16 +3549,17 @@ expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
|
|||
return; /* parser will look for errors */
|
||||
|
||||
res = treebuilder_handle_comment(target, comment);
|
||||
Py_XDECREF(res);
|
||||
Py_DECREF(comment);
|
||||
} else if (self->handle_comment) {
|
||||
comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
|
||||
if (!comment)
|
||||
return;
|
||||
|
||||
res = _PyObject_CallOneArg(self->handle_comment, comment);
|
||||
Py_XDECREF(res);
|
||||
Py_DECREF(comment);
|
||||
}
|
||||
|
||||
Py_XDECREF(res);
|
||||
Py_DECREF(comment);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -3587,7 +3627,7 @@ static void
|
|||
expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
|
||||
const XML_Char* data_in)
|
||||
{
|
||||
PyObject* pi_target = NULL;
|
||||
PyObject* pi_target;
|
||||
PyObject* data;
|
||||
PyObject* res;
|
||||
PyObject* stack[2];
|
||||
|
@ -3599,7 +3639,7 @@ expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
|
|||
/* shortcut */
|
||||
TreeBuilderObject *target = (TreeBuilderObject*) self->target;
|
||||
|
||||
if (target->events_append && target->pi_event_obj) {
|
||||
if ((target->events_append && target->pi_event_obj) || target->insert_pis) {
|
||||
pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
|
||||
if (!pi_target)
|
||||
goto error;
|
||||
|
|
Loading…
Reference in New Issue