From 6b56a7fd3d3ef1ceb6d97d3dad5409c40723011b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 4 Oct 2011 20:04:52 +0200 Subject: [PATCH 1/4] Add assertion to _Py_ReleaseInternedUnicodeStrings() if READY fails --- Objects/unicodeobject.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a958951c896..3299901bb0b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13131,7 +13131,7 @@ PyUnicode_InternInPlace(PyObject **p) if (PyUnicode_CHECK_INTERNED(s)) return; if (_PyUnicode_READY_REPLACE(p)) { - assert(0 && "PyUnicode_READY fail in PyUnicode_InternInPlace"); + assert(0 && "_PyUnicode_READY_REPLACE fail in PyUnicode_InternInPlace"); return; } s = (PyUnicodeObject *)(*p); @@ -13217,8 +13217,10 @@ _Py_ReleaseInternedUnicodeStrings(void) n); for (i = 0; i < n; i++) { s = (PyUnicodeObject *) PyList_GET_ITEM(keys, i); - if (PyUnicode_READY(s) == -1) + if (PyUnicode_READY(s) == -1) { + assert(0 && "could not ready string"); fprintf(stderr, "could not ready string\n"); + } switch (PyUnicode_CHECK_INTERNED(s)) { case SSTATE_NOT_INTERNED: /* XXX Shouldn't happen */ From 17efeed28469c5fe15a61aa927b17cbde5a1160d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 4 Oct 2011 20:05:46 +0200 Subject: [PATCH 2/4] Add DONT_MAKE_RESULT_READY to unicodeobject.c to help detecting bugs Use also _PyUnicode_READY_REPLACE() when it's applicable. --- Objects/unicodeobject.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3299901bb0b..3e2b8a2eaf1 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2625,10 +2625,12 @@ PyUnicode_Decode(const char *s, goto onError; } Py_DECREF(buffer); +#ifndef DONT_MAKE_RESULT_READY if (_PyUnicode_READY_REPLACE(&unicode)) { Py_DECREF(unicode); return NULL; } +#endif return unicode; onError: @@ -3674,10 +3676,12 @@ utf7Error: Py_XDECREF(errorHandler); Py_XDECREF(exc); +#ifndef DONT_MAKE_RESULT_READY if (_PyUnicode_READY_REPLACE(&unicode)) { Py_DECREF(unicode); return NULL; } +#endif return (PyObject *)unicode; onError: @@ -4244,10 +4248,12 @@ PyUnicode_DecodeUTF8Stateful(const char *s, Py_XDECREF(errorHandler); Py_XDECREF(exc); +#ifndef DONT_MAKE_RESULT_READY if (_PyUnicode_READY_REPLACE(&unicode)) { Py_DECREF(unicode); return NULL; } +#endif return (PyObject *)unicode; onError: @@ -4747,10 +4753,12 @@ PyUnicode_DecodeUTF32Stateful(const char *s, Py_XDECREF(errorHandler); Py_XDECREF(exc); +#ifndef DONT_MAKE_RESULT_READY if (_PyUnicode_READY_REPLACE(&unicode)) { Py_DECREF(unicode); return NULL; } +#endif return (PyObject *)unicode; onError: @@ -5145,10 +5153,12 @@ PyUnicode_DecodeUTF16Stateful(const char *s, Py_XDECREF(errorHandler); Py_XDECREF(exc); +#ifndef DONT_MAKE_RESULT_READY if (_PyUnicode_READY_REPLACE(&unicode)) { Py_DECREF(unicode); return NULL; } +#endif return (PyObject *)unicode; onError: @@ -5604,10 +5614,12 @@ PyUnicode_DecodeUnicodeEscape(const char *s, } Py_XDECREF(errorHandler); Py_XDECREF(exc); +#ifndef DONT_MAKE_RESULT_READY if (_PyUnicode_READY_REPLACE(&v)) { Py_DECREF(v); return NULL; } +#endif return (PyObject *)v; ucnhashError: @@ -5905,10 +5917,12 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s, goto onError; Py_XDECREF(errorHandler); Py_XDECREF(exc); +#ifndef DONT_MAKE_RESULT_READY if (_PyUnicode_READY_REPLACE(&v)) { Py_DECREF(v); return NULL; } +#endif return (PyObject *)v; onError: @@ -6093,10 +6107,12 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, goto onError; Py_XDECREF(errorHandler); Py_XDECREF(exc); +#ifndef DONT_MAKE_RESULT_READY if (_PyUnicode_READY_REPLACE(&v)) { Py_DECREF(v); return NULL; } +#endif return (PyObject *)v; onError: @@ -6519,10 +6535,12 @@ PyUnicode_DecodeASCII(const char *s, goto onError; Py_XDECREF(errorHandler); Py_XDECREF(exc); +#ifndef DONT_MAKE_RESULT_READY if (_PyUnicode_READY_REPLACE(&v)) { Py_DECREF(v); return NULL; } +#endif return (PyObject *)v; onError: @@ -6713,10 +6731,12 @@ PyUnicode_DecodeMBCSStateful(const char *s, goto retry; } #endif +#ifndef DONT_MAKE_RESULT_READY if (_PyUnicode_READY_REPLACE(&v)) { Py_DECREF(v); return NULL; } +#endif return (PyObject *)v; } @@ -7012,10 +7032,12 @@ PyUnicode_DecodeCharmap(const char *s, goto onError; Py_XDECREF(errorHandler); Py_XDECREF(exc); +#ifndef DONT_MAKE_RESULT_READY if (_PyUnicode_READY_REPLACE(&v)) { Py_DECREF(v); return NULL; } +#endif return (PyObject *)v; onError: @@ -8057,10 +8079,12 @@ PyUnicode_TransformDecimalToASCII(Py_UNICODE *s, p[i] = '0' + decimal; } } - if (PyUnicode_READY((PyUnicodeObject*)result) == -1) { +#ifndef DONT_MAKE_RESULT_READY + if (_PyUnicode_READY_REPLACE(&result)) { Py_DECREF(result); return NULL; } +#endif return result; } /* --- Decimal Encoder ---------------------------------------------------- */ @@ -10265,10 +10289,12 @@ unicode_expandtabs(PyUnicodeObject *self, PyObject *args) } } assert (j == PyUnicode_GET_LENGTH(u)); - if (PyUnicode_READY(u)) { +#ifndef DONT_MAKE_RESULT_READY + if (_PyUnicode_READY_REPLACE(&u)) { Py_DECREF(u); return NULL; } +#endif return (PyObject*) u; overflow: From e06e145943854d398adfe053cec757e269af0631 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 4 Oct 2011 20:52:31 +0200 Subject: [PATCH 3/4] _PyUnicode_READY_REPLACE() cannot be used in unicode_subtype_new() --- Objects/unicodeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3e2b8a2eaf1..b628eeb93a8 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12949,7 +12949,7 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (unicode == NULL) return NULL; assert(_PyUnicode_CHECK(unicode)); - if (_PyUnicode_READY_REPLACE(&unicode)) + if (PyUnicode_READY(unicode)) return NULL; self = (PyUnicodeObject *) type->tp_alloc(type, 0); From e1335c711c555567a9951dd5c1dfde85545445d4 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 4 Oct 2011 20:53:03 +0200 Subject: [PATCH 4/4] Fix usage og PyUnicode_READY() --- Modules/_io/stringio.c | 4 ++++ Objects/unicodeobject.c | 14 +++++++++----- Python/getargs.c | 21 ++++++++++++++------- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/Modules/_io/stringio.c b/Modules/_io/stringio.c index c40163f4def..a4536b1b07c 100644 --- a/Modules/_io/stringio.c +++ b/Modules/_io/stringio.c @@ -131,6 +131,10 @@ write_str(stringio *self, PyObject *obj) return -1; assert(PyUnicode_Check(decoded)); + if (PyUnicode_READY(decoded)) { + Py_DECREF(decoded); + return -1; + } len = PyUnicode_GET_LENGTH(decoded); assert(len >= 0); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b628eeb93a8..cd67f60906b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2120,6 +2120,10 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) str_obj = PyUnicode_DecodeUTF8(str, strlen(str), "replace"); if (!str_obj) goto fail; + if (PyUnicode_READY(str_obj)) { + Py_DECREF(str_obj); + goto fail; + } argmaxchar = PyUnicode_MAX_CHAR_VALUE(str_obj); maxchar = Py_MAX(maxchar, argmaxchar); n += PyUnicode_GET_LENGTH(str_obj); @@ -10062,17 +10066,17 @@ PyUnicode_Append(PyObject **p_left, PyObject *right) goto error; } + if (PyUnicode_READY(left)) + goto error; + if (PyUnicode_READY(right)) + goto error; + if (PyUnicode_CheckExact(left) && left != unicode_empty && PyUnicode_CheckExact(right) && right != unicode_empty && unicode_resizable(left) && (_PyUnicode_KIND(right) <= _PyUnicode_KIND(left) || _PyUnicode_WSTR(left) != NULL)) { - if (PyUnicode_READY(left)) - goto error; - if (PyUnicode_READY(right)) - goto error; - /* Don't resize for ascii += latin1. Convert ascii to latin1 requires to change the structure size, but characters are stored just after the structure, and so it requires to move all charactres which is diff --git a/Python/getargs.c b/Python/getargs.c index 0e7d9c43506..2c2db36193c 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -834,14 +834,21 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'C': {/* unicode char */ int *p = va_arg(*p_va, int *); - if (PyUnicode_Check(arg) && - PyUnicode_GET_LENGTH(arg) == 1) { - int kind = PyUnicode_KIND(arg); - void *data = PyUnicode_DATA(arg); - *p = PyUnicode_READ(kind, data, 0); - } - else + int kind; + void *data; + + if (!PyUnicode_Check(arg)) return converterr("a unicode character", arg, msgbuf, bufsize); + + if (PyUnicode_READY(arg)) + RETURN_ERR_OCCURRED; + + if (PyUnicode_GET_LENGTH(arg) != 1) + return converterr("a unicode character", arg, msgbuf, bufsize); + + kind = PyUnicode_KIND(arg); + data = PyUnicode_DATA(arg); + *p = PyUnicode_READ(kind, data, 0); break; }