From 684d5fd42067109f46e94ea3ddab72ebd4e130ee Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 3 May 2012 02:32:34 +0200 Subject: [PATCH] Fix PyUnicode_Substring() for start >= length and start > end Remove the fast-path for 1-character string: unicode_fromascii() and _PyUnicode_FromUCS*() now have their own fast-path for 1-character strings. --- Objects/unicodeobject.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f427fd3d11c..09b57338a0d 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12067,20 +12067,22 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end) if (PyUnicode_READY(self) == -1) return NULL; - end = Py_MIN(end, PyUnicode_GET_LENGTH(self)); + length = PyUnicode_GET_LENGTH(self); + end = Py_MIN(end, length); - if (start == 0 && end == PyUnicode_GET_LENGTH(self)) + if (start == 0 && end == length) return unicode_result_unchanged(self); - length = end - start; - if (length == 1) - return unicode_getitem(self, start); - if (start < 0 || end < 0) { PyErr_SetString(PyExc_IndexError, "string index out of range"); return NULL; } + if (start >= length || end < start) { + assert(end == length); + return PyUnicode_New(0, 0); + } + length = end - start; if (PyUnicode_IS_ASCII(self)) { data = PyUnicode_1BYTE_DATA(self); return unicode_fromascii(data + start, length);