Issue #16986: ElementTree now correctly parses a string input not only when
an internal XML encoding is UTF-8 or US-ASCII.
This commit is contained in:
parent
9e62d35e65
commit
66d53fa9ad
|
@ -45,6 +45,7 @@ struct PyExpat_CAPI
|
||||||
void (*SetUserData)(XML_Parser parser, void *userData);
|
void (*SetUserData)(XML_Parser parser, void *userData);
|
||||||
void (*SetStartDoctypeDeclHandler)(XML_Parser parser,
|
void (*SetStartDoctypeDeclHandler)(XML_Parser parser,
|
||||||
XML_StartDoctypeDeclHandler start);
|
XML_StartDoctypeDeclHandler start);
|
||||||
|
enum XML_Status (*SetEncoding)(XML_Parser parser, const XML_Char *encoding);
|
||||||
/* always add new stuff to the end! */
|
/* always add new stuff to the end! */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -677,15 +677,18 @@ class ElementTreeTest(unittest.TestCase):
|
||||||
elem = ET.fromstring("<html><body>text</body></html>")
|
elem = ET.fromstring("<html><body>text</body></html>")
|
||||||
self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
|
self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
|
||||||
|
|
||||||
def test_encoding(encoding):
|
def test_encoding(self):
|
||||||
def check(encoding):
|
def check(encoding, body=''):
|
||||||
ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding)
|
xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" %
|
||||||
check("ascii")
|
(encoding, body))
|
||||||
check("us-ascii")
|
self.assertEqual(ET.XML(xml.encode(encoding)).text, body)
|
||||||
check("iso-8859-1")
|
self.assertEqual(ET.XML(xml).text, body)
|
||||||
check("iso-8859-15")
|
check("ascii", 'a')
|
||||||
check("cp437")
|
check("us-ascii", 'a')
|
||||||
check("mac-roman")
|
check("iso-8859-1", '\xbd')
|
||||||
|
check("iso-8859-15", '\u20ac')
|
||||||
|
check("cp437", '\u221a')
|
||||||
|
check("mac-roman", '\u02da')
|
||||||
|
|
||||||
def test_methods(self):
|
def test_methods(self):
|
||||||
# Test serialization methods.
|
# Test serialization methods.
|
||||||
|
@ -1842,11 +1845,13 @@ class TreeBuilderTest(unittest.TestCase):
|
||||||
|
|
||||||
|
|
||||||
class XMLParserTest(unittest.TestCase):
|
class XMLParserTest(unittest.TestCase):
|
||||||
sample1 = '<file><line>22</line></file>'
|
sample1 = b'<file><line>22</line></file>'
|
||||||
sample2 = ('<!DOCTYPE html PUBLIC'
|
sample2 = (b'<!DOCTYPE html PUBLIC'
|
||||||
' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
|
b' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
|
||||||
' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
|
b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
|
||||||
'<html>text</html>')
|
b'<html>text</html>')
|
||||||
|
sample3 = ('<?xml version="1.0" encoding="iso-8859-1"?>\n'
|
||||||
|
'<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>')
|
||||||
|
|
||||||
def _check_sample_element(self, e):
|
def _check_sample_element(self, e):
|
||||||
self.assertEqual(e.tag, 'file')
|
self.assertEqual(e.tag, 'file')
|
||||||
|
@ -1882,12 +1887,21 @@ class XMLParserTest(unittest.TestCase):
|
||||||
_doctype = (name, pubid, system)
|
_doctype = (name, pubid, system)
|
||||||
|
|
||||||
parser = MyParserWithDoctype()
|
parser = MyParserWithDoctype()
|
||||||
parser.feed(self.sample2)
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
parser.feed(self.sample2)
|
||||||
parser.close()
|
parser.close()
|
||||||
self.assertEqual(_doctype,
|
self.assertEqual(_doctype,
|
||||||
('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
|
('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
|
||||||
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
|
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
|
||||||
|
|
||||||
|
def test_parse_string(self):
|
||||||
|
parser = ET.XMLParser(target=ET.TreeBuilder())
|
||||||
|
parser.feed(self.sample3)
|
||||||
|
e = parser.close()
|
||||||
|
self.assertEqual(e.tag, 'money')
|
||||||
|
self.assertEqual(e.attrib['value'], '$\xa3\u20ac\U0001017b')
|
||||||
|
self.assertEqual(e.text, '$\xa3\u20ac\U0001017b')
|
||||||
|
|
||||||
|
|
||||||
class NamespaceParseTest(unittest.TestCase):
|
class NamespaceParseTest(unittest.TestCase):
|
||||||
def test_find_with_namespace(self):
|
def test_find_with_namespace(self):
|
||||||
|
@ -2297,6 +2311,7 @@ def test_main(module=None):
|
||||||
ElementFindTest,
|
ElementFindTest,
|
||||||
ElementIterTest,
|
ElementIterTest,
|
||||||
TreeBuilderTest,
|
TreeBuilderTest,
|
||||||
|
XMLParserTest,
|
||||||
BugsTest,
|
BugsTest,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #16986: ElementTree now correctly parses a string input not only when
|
||||||
|
an internal XML encoding is UTF-8 or US-ASCII.
|
||||||
|
|
||||||
- Issue #17812: Fixed quadratic complexity of base64.b32encode().
|
- Issue #17812: Fixed quadratic complexity of base64.b32encode().
|
||||||
|
|
||||||
- Issue #17980: Fix possible abuse of ssl.match_hostname() for denial of
|
- Issue #17980: Fix possible abuse of ssl.match_hostname() for denial of
|
||||||
|
|
|
@ -3330,7 +3330,7 @@ xmlparser_dealloc(XMLParserObject* self)
|
||||||
}
|
}
|
||||||
|
|
||||||
LOCAL(PyObject*)
|
LOCAL(PyObject*)
|
||||||
expat_parse(XMLParserObject* self, char* data, int data_len, int final)
|
expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
|
||||||
{
|
{
|
||||||
int ok;
|
int ok;
|
||||||
|
|
||||||
|
@ -3376,16 +3376,37 @@ xmlparser_close(XMLParserObject* self, PyObject* args)
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
xmlparser_feed(XMLParserObject* self, PyObject* args)
|
xmlparser_feed(XMLParserObject* self, PyObject* arg)
|
||||||
{
|
{
|
||||||
/* feed data to parser */
|
/* feed data to parser */
|
||||||
|
|
||||||
char* data;
|
if (PyUnicode_Check(arg)) {
|
||||||
int data_len;
|
Py_ssize_t data_len;
|
||||||
if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
|
const char *data = PyUnicode_AsUTF8AndSize(arg, &data_len);
|
||||||
return NULL;
|
if (data == NULL)
|
||||||
|
return NULL;
|
||||||
return expat_parse(self, data, data_len, 0);
|
if (data_len > INT_MAX) {
|
||||||
|
PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
/* Explicitly set UTF-8 encoding. Return code ignored. */
|
||||||
|
(void)EXPAT(SetEncoding)(self->parser, "utf-8");
|
||||||
|
return expat_parse(self, data, (int)data_len, 0);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Py_buffer view;
|
||||||
|
PyObject *res;
|
||||||
|
if (PyObject_GetBuffer(arg, &view, PyBUF_SIMPLE) < 0)
|
||||||
|
return NULL;
|
||||||
|
if (view.len > INT_MAX) {
|
||||||
|
PyBuffer_Release(&view);
|
||||||
|
PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
res = expat_parse(self, view.buf, (int)view.len, 0);
|
||||||
|
PyBuffer_Release(&view);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
|
@ -3570,7 +3591,7 @@ xmlparser_setevents(XMLParserObject *self, PyObject* args)
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyMethodDef xmlparser_methods[] = {
|
static PyMethodDef xmlparser_methods[] = {
|
||||||
{"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
|
{"feed", (PyCFunction) xmlparser_feed, METH_O},
|
||||||
{"close", (PyCFunction) xmlparser_close, METH_VARARGS},
|
{"close", (PyCFunction) xmlparser_close, METH_VARARGS},
|
||||||
{"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
|
{"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
|
||||||
{"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
|
{"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
|
||||||
|
|
|
@ -1937,6 +1937,7 @@ MODULE_INITFUNC(void)
|
||||||
capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
|
capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
|
||||||
capi.SetUserData = XML_SetUserData;
|
capi.SetUserData = XML_SetUserData;
|
||||||
capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
|
capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
|
||||||
|
capi.SetEncoding = XML_SetEncoding;
|
||||||
|
|
||||||
/* export using capsule */
|
/* export using capsule */
|
||||||
capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
|
capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
|
||||||
|
|
Loading…
Reference in New Issue