From 0facd77015465ac7bd486f5cb98d6265f61428a9 Mon Sep 17 00:00:00 2001 From: Eric Smith Date: Wed, 24 Feb 2010 15:42:29 +0000 Subject: [PATCH] Merged revisions 78418 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r78418 | eric.smith | 2010-02-24 09:15:36 -0500 (Wed, 24 Feb 2010) | 1 line Issue #7309: Unchecked pointer access when converting UnicodeEncodeError, UnicodeDecodeError, and UnicodeTranslateError to strings. ........ --- Lib/test/test_exceptions.py | 36 ++++++++++++ Misc/NEWS | 4 ++ Objects/exceptions.c | 109 +++++++++++++++++++++++++----------- 3 files changed, 117 insertions(+), 32 deletions(-) diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index b2dcd8df618..2838d476896 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -568,6 +568,42 @@ class ExceptionTests(unittest.TestCase): pass self.assertEquals(e, (None, None, None)) + def testUnicodeChangeAttributes(self): + # See issue 7309. This was a crasher. + + u = UnicodeEncodeError('baz', 'xxxxx', 1, 5, 'foo') + self.assertEqual(str(u), "'baz' codec can't encode characters in position 1-4: foo") + u.end = 2 + self.assertEqual(str(u), "'baz' codec can't encode character '\\x78' in position 1: foo") + u.end = 5 + u.reason = 0x345345345345345345 + self.assertEqual(str(u), "'baz' codec can't encode characters in position 1-4: 965230951443685724997") + u.encoding = 4000 + self.assertEqual(str(u), "'4000' codec can't encode characters in position 1-4: 965230951443685724997") + u.start = 1000 + self.assertEqual(str(u), "'4000' codec can't encode characters in position 1000-4: 965230951443685724997") + + u = UnicodeDecodeError('baz', b'xxxxx', 1, 5, 'foo') + self.assertEqual(str(u), "'baz' codec can't decode bytes in position 1-4: foo") + u.end = 2 + self.assertEqual(str(u), "'baz' codec can't decode byte 0x78 in position 1: foo") + u.end = 5 + u.reason = 0x345345345345345345 + self.assertEqual(str(u), "'baz' codec can't decode bytes in position 1-4: 965230951443685724997") + u.encoding = 4000 + self.assertEqual(str(u), "'4000' codec can't decode bytes in position 1-4: 965230951443685724997") + u.start = 1000 + self.assertEqual(str(u), "'4000' codec can't decode bytes in position 1000-4: 965230951443685724997") + + u = UnicodeTranslateError('xxxx', 1, 5, 'foo') + self.assertEqual(str(u), "can't translate characters in position 1-4: foo") + u.end = 2 + self.assertEqual(str(u), "can't translate character '\\x78' in position 1: foo") + u.end = 5 + u.reason = 0x345345345345345345 + self.assertEqual(str(u), "can't translate characters in position 1-4: 965230951443685724997") + u.start = 1000 + self.assertEqual(str(u), "can't translate characters in position 1000-4: 965230951443685724997") def test_badisinstance(self): # Bug #2542: if issubclass(e, MyException) raises an exception, diff --git a/Misc/NEWS b/Misc/NEWS index 5467809b806..d777a95b301 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,10 @@ What's New in Python 3.2 Alpha 1? Core and Builtins ----------------- +- Issue #7309: Fix unchecked attribute access when converting + UnicodeEncodeError, UnicodeDecodeError, and UnicodeTranslateError to + strings. + - Issue #6902: Fix problem with built-in types format incorrectly with 0 padding. diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 0e28f0fb70c..7025b6f8aa0 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -1434,8 +1434,20 @@ static PyObject * UnicodeEncodeError_str(PyObject *self) { PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self; + PyObject *result = NULL; + PyObject *reason_str = NULL; + PyObject *encoding_str = NULL; - if (uself->end==uself->start+1) { + /* Get reason and encoding as strings, which they might not be if + they've been modified after we were contructed. */ + reason_str = PyObject_Str(uself->reason); + if (reason_str == NULL) + goto done; + encoding_str = PyObject_Str(uself->encoding); + if (encoding_str == NULL) + goto done; + + if (uself->start < PyUnicode_GET_SIZE(uself->object) && uself->end == uself->start+1) { int badchar = (int)PyUnicode_AS_UNICODE(uself->object)[uself->start]; const char *fmt; if (badchar <= 0xff) @@ -1444,21 +1456,25 @@ UnicodeEncodeError_str(PyObject *self) fmt = "'%U' codec can't encode character '\\u%04x' in position %zd: %U"; else fmt = "'%U' codec can't encode character '\\U%08x' in position %zd: %U"; - return PyUnicode_FromFormat( + result = PyUnicode_FromFormat( fmt, - ((PyUnicodeErrorObject *)self)->encoding, + encoding_str, badchar, uself->start, - ((PyUnicodeErrorObject *)self)->reason - ); + reason_str); } - return PyUnicode_FromFormat( - "'%U' codec can't encode characters in position %zd-%zd: %U", - ((PyUnicodeErrorObject *)self)->encoding, - uself->start, - uself->end-1, - ((PyUnicodeErrorObject *)self)->reason - ); + else { + result = PyUnicode_FromFormat( + "'%U' codec can't encode characters in position %zd-%zd: %U", + encoding_str, + uself->start, + uself->end-1, + reason_str); + } +done: + Py_XDECREF(reason_str); + Py_XDECREF(encoding_str); + return result; } static PyTypeObject _PyExc_UnicodeEncodeError = { @@ -1536,24 +1552,41 @@ static PyObject * UnicodeDecodeError_str(PyObject *self) { PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self; + PyObject *result = NULL; + PyObject *reason_str = NULL; + PyObject *encoding_str = NULL; - if (uself->end==uself->start+1) { + /* Get reason and encoding as strings, which they might not be if + they've been modified after we were contructed. */ + reason_str = PyObject_Str(uself->reason); + if (reason_str == NULL) + goto done; + encoding_str = PyObject_Str(uself->encoding); + if (encoding_str == NULL) + goto done; + + if (uself->start < PyBytes_GET_SIZE(uself->object) && uself->end == uself->start+1) { int byte = (int)(PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[uself->start]&0xff); - return PyUnicode_FromFormat( + result = PyUnicode_FromFormat( "'%U' codec can't decode byte 0x%02x in position %zd: %U", - ((PyUnicodeErrorObject *)self)->encoding, + encoding_str, byte, uself->start, - ((PyUnicodeErrorObject *)self)->reason - ); + reason_str); } - return PyUnicode_FromFormat( - "'%U' codec can't decode bytes in position %zd-%zd: %U", - ((PyUnicodeErrorObject *)self)->encoding, - uself->start, - uself->end-1, - ((PyUnicodeErrorObject *)self)->reason - ); + else { + result = PyUnicode_FromFormat( + "'%U' codec can't decode bytes in position %zd-%zd: %U", + encoding_str, + uself->start, + uself->end-1, + reason_str + ); + } +done: + Py_XDECREF(reason_str); + Py_XDECREF(encoding_str); + return result; } static PyTypeObject _PyExc_UnicodeDecodeError = { @@ -1617,8 +1650,16 @@ static PyObject * UnicodeTranslateError_str(PyObject *self) { PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self; + PyObject *result = NULL; + PyObject *reason_str = NULL; - if (uself->end==uself->start+1) { + /* Get reason as a string, which it might not be if it's been + modified after we were contructed. */ + reason_str = PyObject_Str(uself->reason); + if (reason_str == NULL) + goto done; + + if (uself->start < PyUnicode_GET_SIZE(uself->object) && uself->end == uself->start+1) { int badchar = (int)PyUnicode_AS_UNICODE(uself->object)[uself->start]; const char *fmt; if (badchar <= 0xff) @@ -1631,15 +1672,19 @@ UnicodeTranslateError_str(PyObject *self) fmt, badchar, uself->start, - uself->reason + reason_str ); + } else { + result = PyUnicode_FromFormat( + "can't translate characters in position %zd-%zd: %U", + uself->start, + uself->end-1, + reason_str + ); } - return PyUnicode_FromFormat( - "can't translate characters in position %zd-%zd: %U", - uself->start, - uself->end-1, - uself->reason - ); +done: + Py_XDECREF(reason_str); + return result; } static PyTypeObject _PyExc_UnicodeTranslateError = {