From 05744ac6e0948cbd6a50fc03a239a5402abceb14 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 29 Jun 2015 22:35:58 +0300 Subject: [PATCH] Issue #19176: Fixed doctype() related bugs in C implementation of ElementTree. A deprecation warning no longer issued by XMLParser subclass with default doctype() method. Direct call of doctype() now issues a warning. Parser's doctype() now is not called if target's doctype() is called. Based on patch by Martin Panter. --- Lib/test/test_xml_etree.py | 41 ++++++++++++++++++++++++++++++++++++++ Misc/NEWS | 6 ++++++ Modules/_elementtree.c | 35 +++++++++++++++++++------------- 3 files changed, 68 insertions(+), 14 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index d4c7c6e5056..1c6a939c29a 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -12,6 +12,7 @@ import pickle import sys import types import unittest +import warnings import weakref from itertools import product @@ -2237,6 +2238,20 @@ class XMLParserTest(unittest.TestCase): parser.feed(self.sample1) self._check_sample_element(parser.close()) + def test_doctype_warning(self): + parser = ET.XMLParser() + with self.assertWarns(DeprecationWarning): + parser.doctype('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', + 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd') + parser.feed('') + parser.close() + + with warnings.catch_warnings(): + warnings.simplefilter('error', DeprecationWarning) + parser = ET.XMLParser() + parser.feed(self.sample2) + parser.close() + def test_subclass_doctype(self): _doctype = None class MyParserWithDoctype(ET.XMLParser): @@ -2252,6 +2267,32 @@ class XMLParserTest(unittest.TestCase): ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) + _doctype = _doctype2 = None + with warnings.catch_warnings(): + warnings.simplefilter('error', DeprecationWarning) + class DoctypeParser: + def doctype(self, name, pubid, system): + nonlocal _doctype2 + _doctype2 = (name, pubid, system) + + parser = MyParserWithDoctype(target=DoctypeParser()) + parser.feed(self.sample2) + parser.close() + self.assertIsNone(_doctype) + self.assertEqual(_doctype2, + ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', + 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) + + def test_inherited_doctype(self): + '''Ensure that ordinary usage is not deprecated (Issue 19176)''' + with warnings.catch_warnings(): + warnings.simplefilter('error', DeprecationWarning) + class MyParserWithoutDoctype(ET.XMLParser): + pass + parser = MyParserWithoutDoctype() + parser.feed(self.sample2) + parser.close() + def test_parse_string(self): parser = ET.XMLParser(target=ET.TreeBuilder()) parser.feed(self.sample3) diff --git a/Misc/NEWS b/Misc/NEWS index 9d8f1e8b817..19451c4c737 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -64,6 +64,12 @@ Core and Builtins Library ------- +- Issue #19176: Fixed doctype() related bugs in C implementation of ElementTree. + A deprecation warning no longer issued by XMLParser subclass with default + doctype() method. Direct call of doctype() now issues a warning. Parser's + doctype() now is not called if target's doctype() is called. Based on patch + by Martin Panter. + - Issue #20387: Restore semantic round-trip correctness in tokenize/untokenize for tab-indented blocks. diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 136f19cae64..826342aa91b 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -2791,7 +2791,7 @@ typedef struct { } XMLParserObject; -#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type) +static PyObject* xmlparser_doctype(XMLParserObject* self, PyObject* args); /* helpers */ @@ -3190,20 +3190,21 @@ expat_start_doctype_handler(XMLParserObject *self, doctype_name_obj, pubid_obj, sysid_obj); Py_CLEAR(res); } - - /* Now see if the parser itself has a doctype method. If yes and it's - * a subclass, call it but warn about deprecation. If it's not a subclass - * (i.e. vanilla XMLParser), do nothing. - */ - parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype"); - if (parser_doctype) { - if (!XMLParser_CheckExact(self_pyobj)) { - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "This method of XMLParser is deprecated. Define" - " doctype() method on the TreeBuilder target.", - 1) < 0) { + else { + /* Now see if the parser itself has a doctype method. If yes and it's + * a custom method, call it but warn about deprecation. If it's only + * the vanilla XMLParser method, do nothing. + */ + parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype"); + if (parser_doctype && + !(PyCFunction_Check(parser_doctype) && + PyCFunction_GET_SELF(parser_doctype) == self_pyobj && + PyCFunction_GET_FUNCTION(parser_doctype) == + (PyCFunction) xmlparser_doctype)) { + res = xmlparser_doctype(self, NULL); + if (!res) goto clear; - } + Py_DECREF(res); res = PyObject_CallFunction(parser_doctype, "OOO", doctype_name_obj, pubid_obj, sysid_obj); Py_CLEAR(res); @@ -3556,6 +3557,12 @@ xmlparser_parse_whole(XMLParserObject* self, PyObject* args) static PyObject* xmlparser_doctype(XMLParserObject *self, PyObject *args) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "This method of XMLParser is deprecated. Define" + " doctype() method on the TreeBuilder target.", + 1) < 0) { + return NULL; + } Py_RETURN_NONE; }