From a2983c6734f06a0ccb32d03877e3f90e46fbdabb Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 1 Sep 2010 15:16:41 +0000 Subject: [PATCH] Merged revisions 84394 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r84394 | antoine.pitrou | 2010-09-01 17:10:12 +0200 (mer., 01 sept. 2010) | 4 lines Issue #7415: PyUnicode_FromEncodedObject() now uses the new buffer API properly. Patch by Stefan Behnel. ........ --- Misc/ACKS | 1 + Misc/NEWS | 3 +++ Objects/unicodeobject.c | 53 ++++++++++++++++++++--------------------- 3 files changed, 30 insertions(+), 27 deletions(-) diff --git a/Misc/ACKS b/Misc/ACKS index dfbbc81e133..b4ac3a70f81 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -58,6 +58,7 @@ David Beazley Robin Becker Neal Becker Bill Bedford +Stefan Behnel Reimer Behrends Ben Bell Thomas Bellman diff --git a/Misc/NEWS b/Misc/NEWS index 8fc8d083842..4086378017c 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,9 @@ What's New in Python 3.1.3? Core and Builtins ----------------- +- Issue #7415: PyUnicode_FromEncodedObject() now uses the new buffer API + properly. Patch by Stefan Behnel. + - Restore GIL in nis_cat in case of error. - Issue #9712: Fix tokenize on identifiers that start with non-ascii names. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 028b42d8ebf..b50293c89c6 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1162,8 +1162,7 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj, const char *encoding, const char *errors) { - const char *s = NULL; - Py_ssize_t len; + Py_buffer buffer; PyObject *v; if (obj == NULL) { @@ -1171,44 +1170,44 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj, return NULL; } + /* Decoding bytes objects is the most common case and should be fast */ + if (PyBytes_Check(obj)) { + if (PyBytes_GET_SIZE(obj) == 0) { + Py_INCREF(unicode_empty); + v = (PyObject *) unicode_empty; + } + else { + v = PyUnicode_Decode( + PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj), + encoding, errors); + } + return v; + } + if (PyUnicode_Check(obj)) { PyErr_SetString(PyExc_TypeError, "decoding str is not supported"); return NULL; } - /* Coerce object */ - if (PyBytes_Check(obj)) { - s = PyBytes_AS_STRING(obj); - len = PyBytes_GET_SIZE(obj); - } - else if (PyByteArray_Check(obj)) { - s = PyByteArray_AS_STRING(obj); - len = PyByteArray_GET_SIZE(obj); - } - else if (PyObject_AsCharBuffer(obj, &s, &len)) { - /* Overwrite the error message with something more useful in - case of a TypeError. */ - if (PyErr_ExceptionMatches(PyExc_TypeError)) - PyErr_Format(PyExc_TypeError, - "coercing to str: need string or buffer, " - "%.80s found", - Py_TYPE(obj)->tp_name); - goto onError; + /* Retrieve a bytes buffer view through the PEP 3118 buffer interface */ + if (PyObject_GetBuffer(obj, &buffer, PyBUF_SIMPLE) < 0) { + PyErr_Format(PyExc_TypeError, + "coercing to str: need bytes, bytearray " + "or buffer-like object, %.80s found", + Py_TYPE(obj)->tp_name); + return NULL; } - /* Convert to Unicode */ - if (len == 0) { + if (buffer.len == 0) { Py_INCREF(unicode_empty); - v = (PyObject *)unicode_empty; + v = (PyObject *) unicode_empty; } else - v = PyUnicode_Decode(s, len, encoding, errors); + v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors); + PyBuffer_Release(&buffer); return v; - - onError: - return NULL; } PyObject *PyUnicode_Decode(const char *s,