Merged revisions 78418 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk ........ r78418 | eric.smith | 2010-02-24 09:15:36 -0500 (Wed, 24 Feb 2010) | 1 line Issue #7309: Unchecked pointer access when converting UnicodeEncodeError, UnicodeDecodeError, and UnicodeTranslateError to strings. ........
2010-02-24 15:42:29 +00:00 · 2010-02-24 15:42:29 +00:00 · 0facd77015
parent 33091a8321
commit 0facd77015
3 changed files with 117 additions and 32 deletions
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@ -568,6 +568,42 @@ class ExceptionTests(unittest.TestCase):
            pass
        self.assertEquals(e, (None, None, None))

+    def testUnicodeChangeAttributes(self):
+        # See issue 7309. This was a crasher.
+
+        u = UnicodeEncodeError('baz', 'xxxxx', 1, 5, 'foo')
+        self.assertEqual(str(u), "'baz' codec can't encode characters in position 1-4: foo")
+        u.end = 2
+        self.assertEqual(str(u), "'baz' codec can't encode character '\\x78' in position 1: foo")
+        u.end = 5
+        u.reason = 0x345345345345345345
+        self.assertEqual(str(u), "'baz' codec can't encode characters in position 1-4: 965230951443685724997")
+        u.encoding = 4000
+        self.assertEqual(str(u), "'4000' codec can't encode characters in position 1-4: 965230951443685724997")
+        u.start = 1000
+        self.assertEqual(str(u), "'4000' codec can't encode characters in position 1000-4: 965230951443685724997")
+
+        u = UnicodeDecodeError('baz', b'xxxxx', 1, 5, 'foo')
+        self.assertEqual(str(u), "'baz' codec can't decode bytes in position 1-4: foo")
+        u.end = 2
+        self.assertEqual(str(u), "'baz' codec can't decode byte 0x78 in position 1: foo")
+        u.end = 5
+        u.reason = 0x345345345345345345
+        self.assertEqual(str(u), "'baz' codec can't decode bytes in position 1-4: 965230951443685724997")
+        u.encoding = 4000
+        self.assertEqual(str(u), "'4000' codec can't decode bytes in position 1-4: 965230951443685724997")
+        u.start = 1000
+        self.assertEqual(str(u), "'4000' codec can't decode bytes in position 1000-4: 965230951443685724997")
+
+        u = UnicodeTranslateError('xxxx', 1, 5, 'foo')
+        self.assertEqual(str(u), "can't translate characters in position 1-4: foo")
+        u.end = 2
+        self.assertEqual(str(u), "can't translate character '\\x78' in position 1: foo")
+        u.end = 5
+        u.reason = 0x345345345345345345
+        self.assertEqual(str(u), "can't translate characters in position 1-4: 965230951443685724997")
+        u.start = 1000
+        self.assertEqual(str(u), "can't translate characters in position 1000-4: 965230951443685724997")

    def test_badisinstance(self):
        # Bug #2542: if issubclass(e, MyException) raises an exception,
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -12,6 +12,10 @@ What's New in Python 3.2 Alpha 1?
 Core and Builtins
 -----------------

+- Issue #7309: Fix unchecked attribute access when converting
+  UnicodeEncodeError, UnicodeDecodeError, and UnicodeTranslateError to
+  strings.
+
 - Issue #6902: Fix problem with built-in types format incorrectly with
  0 padding.

--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@ -1434,8 +1434,20 @@ static PyObject *
 UnicodeEncodeError_str(PyObject *self)
 {
    PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
+    PyObject *result = NULL;
+    PyObject *reason_str = NULL;
+    PyObject *encoding_str = NULL;

-    if (uself->end==uself->start+1) {
+    /* Get reason and encoding as strings, which they might not be if
+       they've been modified after we were contructed. */
+    reason_str = PyObject_Str(uself->reason);
+    if (reason_str == NULL)
+        goto done;
+    encoding_str = PyObject_Str(uself->encoding);
+    if (encoding_str == NULL)
+        goto done;
+
+    if (uself->start < PyUnicode_GET_SIZE(uself->object) && uself->end == uself->start+1) {
        int badchar = (int)PyUnicode_AS_UNICODE(uself->object)[uself->start];
        const char *fmt;
        if (badchar <= 0xff)
@ -1444,21 +1456,25 @@ UnicodeEncodeError_str(PyObject *self)
            fmt = "'%U' codec can't encode character '\\u%04x' in position %zd: %U";
        else
            fmt = "'%U' codec can't encode character '\\U%08x' in position %zd: %U";
-        return PyUnicode_FromFormat(
+        result = PyUnicode_FromFormat(
            fmt,
-            ((PyUnicodeErrorObject *)self)->encoding,
+            encoding_str,
            badchar,
            uself->start,
-            ((PyUnicodeErrorObject *)self)->reason
-        );
+            reason_str);
    }
-    return PyUnicode_FromFormat(
+    else {
+        result = PyUnicode_FromFormat(
            "'%U' codec can't encode characters in position %zd-%zd: %U",
-        ((PyUnicodeErrorObject *)self)->encoding,
+            encoding_str,
            uself->start,
            uself->end-1,
-        ((PyUnicodeErrorObject *)self)->reason
-    );
+            reason_str);
+    }
+done:
+    Py_XDECREF(reason_str);
+    Py_XDECREF(encoding_str);
+    return result;
 }

 static PyTypeObject _PyExc_UnicodeEncodeError = {
@ -1536,25 +1552,42 @@ static PyObject *
 UnicodeDecodeError_str(PyObject *self)
 {
    PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
+    PyObject *result = NULL;
+    PyObject *reason_str = NULL;
+    PyObject *encoding_str = NULL;

-    if (uself->end==uself->start+1) {
+    /* Get reason and encoding as strings, which they might not be if
+       they've been modified after we were contructed. */
+    reason_str = PyObject_Str(uself->reason);
+    if (reason_str == NULL)
+        goto done;
+    encoding_str = PyObject_Str(uself->encoding);
+    if (encoding_str == NULL)
+        goto done;
+
+    if (uself->start < PyBytes_GET_SIZE(uself->object) && uself->end == uself->start+1) {
        int byte = (int)(PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[uself->start]&0xff);
-        return PyUnicode_FromFormat(
+        result = PyUnicode_FromFormat(
            "'%U' codec can't decode byte 0x%02x in position %zd: %U",
-            ((PyUnicodeErrorObject *)self)->encoding,
+            encoding_str,
            byte,
            uself->start,
-            ((PyUnicodeErrorObject *)self)->reason
-        );
+            reason_str);
    }
-    return PyUnicode_FromFormat(
+    else {
+        result = PyUnicode_FromFormat(
            "'%U' codec can't decode bytes in position %zd-%zd: %U",
-        ((PyUnicodeErrorObject *)self)->encoding,
+            encoding_str,
            uself->start,
            uself->end-1,
-        ((PyUnicodeErrorObject *)self)->reason
+            reason_str
            );
    }
+done:
+    Py_XDECREF(reason_str);
+    Py_XDECREF(encoding_str);
+    return result;
+}

 static PyTypeObject _PyExc_UnicodeDecodeError = {
    PyVarObject_HEAD_INIT(NULL, 0)
@ -1617,8 +1650,16 @@ static PyObject *
 UnicodeTranslateError_str(PyObject *self)
 {
    PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
+    PyObject *result = NULL;
+    PyObject *reason_str = NULL;

-    if (uself->end==uself->start+1) {
+    /* Get reason as a string, which it might not be if it's been
+       modified after we were contructed. */
+    reason_str = PyObject_Str(uself->reason);
+    if (reason_str == NULL)
+        goto done;
+
+    if (uself->start < PyUnicode_GET_SIZE(uself->object) && uself->end == uself->start+1) {
        int badchar = (int)PyUnicode_AS_UNICODE(uself->object)[uself->start];
        const char *fmt;
        if (badchar <= 0xff)
@ -1631,16 +1672,20 @@ UnicodeTranslateError_str(PyObject *self)
            fmt,
            badchar,
            uself->start,
-            uself->reason
+            reason_str
        );
-    }
-    return PyUnicode_FromFormat(
+    } else {
+        result = PyUnicode_FromFormat(
            "can't translate characters in position %zd-%zd: %U",
            uself->start,
            uself->end-1,
-        uself->reason
+            reason_str
            );
    }
+done:
+    Py_XDECREF(reason_str);
+    return result;
+}

 static PyTypeObject _PyExc_UnicodeTranslateError = {
    PyVarObject_HEAD_INIT(NULL, 0)