Patch #1309009, Fix segfault in pyexpat when the XML document is

in latin_1, but Python incorrectly assumes it is in UTF-8 format

Will backport.
This commit is contained in:
Neal Norwitz 2005-09-30 04:46:49 +00:00
parent aa93517de8
commit 484d9a409a
4 changed files with 19 additions and 1 deletions

View File

@ -889,6 +889,15 @@ def testEncodings():
and doc.toxml('utf-8') == '<?xml version="1.0" encoding="utf-8"?><foo>\xe2\x82\xac</foo>'
and doc.toxml('iso-8859-15') == '<?xml version="1.0" encoding="iso-8859-15"?><foo>\xa4</foo>',
"testEncodings - encoding EURO SIGN")
# Verify that character decoding errors throw exceptions instead of crashing
try:
doc = parseString('<fran\xe7ais>Comment \xe7a va ? Tr\xe8s bien ?</fran\xe7ais>')
except UnicodeDecodeError:
pass
else:
print 'parsing with bad encoding should raise a UnicodeDecodeError'
doc.unlink()
class UserDataHandler:

View File

@ -305,6 +305,7 @@ Flemming Kj
Jiba
Orjan Johansen
Simon Johnston
Evan Jones
Richard Jones
Irmen de Jong
Lucas de Jonge

View File

@ -153,6 +153,9 @@ present).
Extension Modules
-----------------
- Patch #1309009, Fix segfault in pyexpat when the XML document is in latin_1,
but Python incorrectly assumes it is in UTF-8 format
- Fix parse errors in the readline module when compiling without threads.
- Patch #1288833: Removed thread lock from socket.getaddrinfo on

View File

@ -417,6 +417,9 @@ string_intern(xmlparseobject *self, const char* str)
{
PyObject *result = STRING_CONV_FUNC(str);
PyObject *value;
/* result can be NULL if the unicode conversion failed. */
if (!result)
return result;
if (!self->intern)
return result;
value = PyDict_GetItem(self->intern, result);
@ -572,7 +575,9 @@ my_StartElementHandler(void *userData,
Py_DECREF(v);
}
}
args = Py_BuildValue("(NN)", string_intern(self, name), container);
args = string_intern(self, name);
if (args != NULL)
args = Py_BuildValue("(NN)", args, container);
if (args == NULL) {
Py_DECREF(container);
return;