From d9d769fcdd573ab12b628798288c02dceba53505 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 24 Mar 2015 21:55:47 +0200 Subject: [PATCH] Issue #23573: Increased performance of string search operations (str.find, str.index, str.count, the in operator, str.split, str.partition) with arguments of different kinds (UCS1, UCS2, UCS4). --- Misc/NEWS | 4 + Objects/bytearrayobject.c | 33 ++- Objects/bytesobject.c | 33 ++- Objects/stringlib/fastsearch.h | 4 +- Objects/stringlib/find.h | 23 +-- Objects/unicodeobject.c | 354 ++++++++++++++++++--------------- 6 files changed, 253 insertions(+), 198 deletions(-) diff --git a/Misc/NEWS b/Misc/NEWS index 6a40fba7491..946335de8e4 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,10 @@ Release date: 2015-03-28 Core and Builtins ----------------- +- Issue #23573: Increased performance of string search operations (str.find, + str.index, str.count, the in operator, str.split, str.partition) with + arguments of different kinds (UCS1, UCS2, UCS4). + - Issue #23753: Python doesn't support anymore platforms without stat() or fstat(), these functions are always required. diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 8b3267e7450..195c79f4016 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -1142,7 +1142,7 @@ bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir) char byte; Py_buffer subbuf; const char *sub; - Py_ssize_t sub_len; + Py_ssize_t len, sub_len; Py_ssize_t start=0, end=PY_SSIZE_T_MAX; Py_ssize_t res; @@ -1161,15 +1161,30 @@ bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir) sub = &byte; sub_len = 1; } + len = PyByteArray_GET_SIZE(self); - if (dir > 0) - res = stringlib_find_slice( - PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), - sub, sub_len, start, end); - else - res = stringlib_rfind_slice( - PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), - sub, sub_len, start, end); + ADJUST_INDICES(start, end, len); + if (end - start < sub_len) + res = -1; + else if (sub_len == 1) { + unsigned char needle = *sub; + int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH; + res = stringlib_fastsearch_memchr_1char( + PyByteArray_AS_STRING(self) + start, end - start, + needle, needle, mode); + if (res >= 0) + res += start; + } + else { + if (dir > 0) + res = stringlib_find_slice( + PyByteArray_AS_STRING(self), len, + sub, sub_len, start, end); + else + res = stringlib_rfind_slice( + PyByteArray_AS_STRING(self), len, + sub, sub_len, start, end); + } if (subobj) PyBuffer_Release(&subbuf); diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 5a9dfbd5dbb..ae3c2895a01 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1914,7 +1914,7 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir) char byte; Py_buffer subbuf; const char *sub; - Py_ssize_t sub_len; + Py_ssize_t len, sub_len; Py_ssize_t start=0, end=PY_SSIZE_T_MAX; Py_ssize_t res; @@ -1933,15 +1933,30 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir) sub = &byte; sub_len = 1; } + len = PyBytes_GET_SIZE(self); - if (dir > 0) - res = stringlib_find_slice( - PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), - sub, sub_len, start, end); - else - res = stringlib_rfind_slice( - PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), - sub, sub_len, start, end); + ADJUST_INDICES(start, end, len); + if (end - start < sub_len) + res = -1; + else if (sub_len == 1) { + unsigned char needle = *sub; + int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH; + res = stringlib_fastsearch_memchr_1char( + PyBytes_AS_STRING(self) + start, end - start, + needle, needle, mode); + if (res >= 0) + res += start; + } + else { + if (dir > 0) + res = stringlib_find_slice( + PyBytes_AS_STRING(self), len, + sub, sub_len, start, end); + else + res = stringlib_rfind_slice( + PyBytes_AS_STRING(self), len, + sub, sub_len, start, end); + } if (subobj) PyBuffer_Release(&subbuf); diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h index cd7cac40fa4..cda68e77c8d 100644 --- a/Objects/stringlib/fastsearch.h +++ b/Objects/stringlib/fastsearch.h @@ -36,7 +36,7 @@ Py_LOCAL_INLINE(Py_ssize_t) STRINGLIB(fastsearch_memchr_1char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch, unsigned char needle, - Py_ssize_t maxcount, int mode) + int mode) { if (mode == FAST_SEARCH) { const STRINGLIB_CHAR *ptr = s; @@ -115,7 +115,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, if (needle != 0) #endif return STRINGLIB(fastsearch_memchr_1char) - (s, n, p[0], needle, maxcount, mode); + (s, n, p[0], needle, mode); } if (mode == FAST_COUNT) { for (i = 0; i < n; i++) diff --git a/Objects/stringlib/find.h b/Objects/stringlib/find.h index 518e012b759..14815f6e62b 100644 --- a/Objects/stringlib/find.h +++ b/Objects/stringlib/find.h @@ -11,8 +11,7 @@ STRINGLIB(find)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, { Py_ssize_t pos; - if (str_len < 0) - return -1; + assert(str_len >= 0); if (sub_len == 0) return offset; @@ -31,8 +30,7 @@ STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, { Py_ssize_t pos; - if (str_len < 0) - return -1; + assert(str_len >= 0); if (sub_len == 0) return str_len + offset; @@ -44,27 +42,11 @@ STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, return pos; } -/* helper macro to fixup start/end slice values */ -#define ADJUST_INDICES(start, end, len) \ - if (end > len) \ - end = len; \ - else if (end < 0) { \ - end += len; \ - if (end < 0) \ - end = 0; \ - } \ - if (start < 0) { \ - start += len; \ - if (start < 0) \ - start = 0; \ - } - Py_LOCAL_INLINE(Py_ssize_t) STRINGLIB(find_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, Py_ssize_t start, Py_ssize_t end) { - ADJUST_INDICES(start, end, str_len); return STRINGLIB(find)(str + start, end - start, sub, sub_len, start); } @@ -73,7 +55,6 @@ STRINGLIB(rfind_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, Py_ssize_t start, Py_ssize_t end) { - ADJUST_INDICES(start, end, str_len); return STRINGLIB(rfind)(str + start, end - start, sub, sub_len, start); } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 56d9a023ddc..5343b2106fb 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -641,7 +641,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len) static PyObject * fixup(PyObject *self, Py_UCS4 (*fixfct)(PyObject *s)); -Py_LOCAL_INLINE(Py_ssize_t) findchar(void *s, int kind, +Py_LOCAL_INLINE(Py_ssize_t) findchar(const void *s, int kind, Py_ssize_t size, Py_UCS4 ch, int direction) { @@ -8959,35 +8959,61 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s, /* --- Helpers ------------------------------------------------------------ */ +/* helper macro to fixup start/end slice values */ +#define ADJUST_INDICES(start, end, len) \ + if (end > len) \ + end = len; \ + else if (end < 0) { \ + end += len; \ + if (end < 0) \ + end = 0; \ + } \ + if (start < 0) { \ + start += len; \ + if (start < 0) \ + start = 0; \ + } + static Py_ssize_t any_find_slice(int direction, PyObject* s1, PyObject* s2, Py_ssize_t start, Py_ssize_t end) { - int kind1, kind2, kind; + int kind1, kind2; void *buf1, *buf2; Py_ssize_t len1, len2, result; kind1 = PyUnicode_KIND(s1); kind2 = PyUnicode_KIND(s2); - kind = kind1 > kind2 ? kind1 : kind2; - buf1 = PyUnicode_DATA(s1); - buf2 = PyUnicode_DATA(s2); - if (kind1 != kind) - buf1 = _PyUnicode_AsKind(s1, kind); - if (!buf1) - return -2; - if (kind2 != kind) - buf2 = _PyUnicode_AsKind(s2, kind); - if (!buf2) { - if (kind1 != kind) PyMem_Free(buf1); - return -2; - } + if (kind1 < kind2) + return -1; + len1 = PyUnicode_GET_LENGTH(s1); len2 = PyUnicode_GET_LENGTH(s2); + ADJUST_INDICES(start, end, len1); + if (end - start < len2) + return -1; + + buf1 = PyUnicode_DATA(s1); + buf2 = PyUnicode_DATA(s2); + if (len2 == 1) { + Py_UCS4 ch = PyUnicode_READ(kind2, buf2, 0); + result = findchar((const char *)buf1 + kind1*start, + kind1, end - start, ch, direction); + if (result == -1) + return -1; + else + return start + result; + } + + if (kind2 != kind1) { + buf2 = _PyUnicode_AsKind(s2, kind1); + if (!buf2) + return -2; + } if (direction > 0) { - switch (kind) { + switch (kind1) { case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(s1) && PyUnicode_IS_ASCII(s2)) result = asciilib_find_slice(buf1, len1, buf2, len2, start, end); @@ -9005,7 +9031,7 @@ any_find_slice(int direction, PyObject* s1, PyObject* s2, } } else { - switch (kind) { + switch (kind1) { case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(s1) && PyUnicode_IS_ASCII(s2)) result = asciilib_rfind_slice(buf1, len1, buf2, len2, start, end); @@ -9023,9 +9049,7 @@ any_find_slice(int direction, PyObject* s1, PyObject* s2, } } - if (kind1 != kind) - PyMem_Free(buf1); - if (kind2 != kind) + if (kind2 != kind1) PyMem_Free(buf2); return result; @@ -9115,21 +9139,6 @@ _PyUnicode_InsertThousandsGrouping( } -/* helper macro to fixup start/end slice values */ -#define ADJUST_INDICES(start, end, len) \ - if (end > len) \ - end = len; \ - else if (end < 0) { \ - end += len; \ - if (end < 0) \ - end = 0; \ - } \ - if (start < 0) { \ - start += len; \ - if (start < 0) \ - start = 0; \ - } - Py_ssize_t PyUnicode_Count(PyObject *str, PyObject *substr, @@ -9139,7 +9148,7 @@ PyUnicode_Count(PyObject *str, Py_ssize_t result; PyObject* str_obj; PyObject* sub_obj; - int kind1, kind2, kind; + int kind1, kind2; void *buf1 = NULL, *buf2 = NULL; Py_ssize_t len1, len2; @@ -9159,24 +9168,30 @@ PyUnicode_Count(PyObject *str, kind1 = PyUnicode_KIND(str_obj); kind2 = PyUnicode_KIND(sub_obj); - kind = kind1; - buf1 = PyUnicode_DATA(str_obj); - buf2 = PyUnicode_DATA(sub_obj); - if (kind2 != kind) { - if (kind2 > kind) { - Py_DECREF(sub_obj); - Py_DECREF(str_obj); - return 0; - } - buf2 = _PyUnicode_AsKind(sub_obj, kind); + if (kind1 < kind2) { + Py_DECREF(sub_obj); + Py_DECREF(str_obj); + return 0; } - if (!buf2) - goto onError; + len1 = PyUnicode_GET_LENGTH(str_obj); len2 = PyUnicode_GET_LENGTH(sub_obj); - ADJUST_INDICES(start, end, len1); - switch (kind) { + if (end - start < len2) { + Py_DECREF(sub_obj); + Py_DECREF(str_obj); + return 0; + } + + buf1 = PyUnicode_DATA(str_obj); + buf2 = PyUnicode_DATA(sub_obj); + if (kind2 != kind1) { + buf2 = _PyUnicode_AsKind(sub_obj, kind1); + if (!buf2) + goto onError; + } + + switch (kind1) { case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sub_obj)) result = asciilib_count( @@ -9208,14 +9223,14 @@ PyUnicode_Count(PyObject *str, Py_DECREF(sub_obj); Py_DECREF(str_obj); - if (kind2 != kind) + if (kind2 != kind1) PyMem_Free(buf2); return result; onError: Py_DECREF(sub_obj); Py_DECREF(str_obj); - if (kind2 != kind && buf2) + if (kind2 != kind1 && buf2) PyMem_Free(buf2); return -1; } @@ -9268,6 +9283,8 @@ PyUnicode_FindChar(PyObject *str, Py_UCS4 ch, } if (end > PyUnicode_GET_LENGTH(str)) end = PyUnicode_GET_LENGTH(str); + if (start >= end) + return -1; kind = PyUnicode_KIND(str); result = findchar(PyUnicode_1BYTE_DATA(str) + kind*start, kind, end-start, ch, direction); @@ -10014,7 +10031,7 @@ split(PyObject *self, PyObject *substring, Py_ssize_t maxcount) { - int kind1, kind2, kind; + int kind1, kind2; void *buf1, *buf2; Py_ssize_t len1, len2; PyObject* out; @@ -10058,23 +10075,25 @@ split(PyObject *self, kind1 = PyUnicode_KIND(self); kind2 = PyUnicode_KIND(substring); - kind = kind1 > kind2 ? kind1 : kind2; - buf1 = PyUnicode_DATA(self); - buf2 = PyUnicode_DATA(substring); - if (kind1 != kind) - buf1 = _PyUnicode_AsKind(self, kind); - if (!buf1) - return NULL; - if (kind2 != kind) - buf2 = _PyUnicode_AsKind(substring, kind); - if (!buf2) { - if (kind1 != kind) PyMem_Free(buf1); - return NULL; - } len1 = PyUnicode_GET_LENGTH(self); len2 = PyUnicode_GET_LENGTH(substring); + if (kind1 < kind2 || len1 < len2) { + out = PyList_New(1); + if (out == NULL) + return NULL; + Py_INCREF(self); + PyList_SET_ITEM(out, 0, self); + return out; + } + buf1 = PyUnicode_DATA(self); + buf2 = PyUnicode_DATA(substring); + if (kind2 != kind1) { + buf2 = _PyUnicode_AsKind(substring, kind1); + if (!buf2) + return NULL; + } - switch (kind) { + switch (kind1) { case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(self) && PyUnicode_IS_ASCII(substring)) out = asciilib_split( @@ -10094,9 +10113,7 @@ split(PyObject *self, default: out = NULL; } - if (kind1 != kind) - PyMem_Free(buf1); - if (kind2 != kind) + if (kind2 != kind1) PyMem_Free(buf2); return out; } @@ -10106,7 +10123,7 @@ rsplit(PyObject *self, PyObject *substring, Py_ssize_t maxcount) { - int kind1, kind2, kind; + int kind1, kind2; void *buf1, *buf2; Py_ssize_t len1, len2; PyObject* out; @@ -10150,23 +10167,25 @@ rsplit(PyObject *self, kind1 = PyUnicode_KIND(self); kind2 = PyUnicode_KIND(substring); - kind = kind1 > kind2 ? kind1 : kind2; - buf1 = PyUnicode_DATA(self); - buf2 = PyUnicode_DATA(substring); - if (kind1 != kind) - buf1 = _PyUnicode_AsKind(self, kind); - if (!buf1) - return NULL; - if (kind2 != kind) - buf2 = _PyUnicode_AsKind(substring, kind); - if (!buf2) { - if (kind1 != kind) PyMem_Free(buf1); - return NULL; - } len1 = PyUnicode_GET_LENGTH(self); len2 = PyUnicode_GET_LENGTH(substring); + if (kind1 < kind2 || len1 < len2) { + out = PyList_New(1); + if (out == NULL) + return NULL; + Py_INCREF(self); + PyList_SET_ITEM(out, 0, self); + return out; + } + buf1 = PyUnicode_DATA(self); + buf2 = PyUnicode_DATA(substring); + if (kind2 != kind1) { + buf2 = _PyUnicode_AsKind(substring, kind1); + if (!buf2) + return NULL; + } - switch (kind) { + switch (kind1) { case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(self) && PyUnicode_IS_ASCII(substring)) out = asciilib_rsplit( @@ -10186,9 +10205,7 @@ rsplit(PyObject *self, default: out = NULL; } - if (kind1 != kind) - PyMem_Free(buf1); - if (kind2 != kind) + if (kind2 != kind1) PyMem_Free(buf2); return out; } @@ -10407,7 +10424,7 @@ replace(PyObject *self, PyObject *str1, } /* new_size = PyUnicode_GET_LENGTH(self) + n * (PyUnicode_GET_LENGTH(str2) - PyUnicode_GET_LENGTH(str1))); */ - if (len2 > len1 && len2 - len1 > (PY_SSIZE_T_MAX - slen) / n) { + if (len1 < len2 && len2 - len1 > (PY_SSIZE_T_MAX - slen) / n) { PyErr_SetString(PyExc_OverflowError, "replace string is too long"); goto error; @@ -10816,7 +10833,7 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) } if (len1 > len2) return 1; /* uni is longer */ - if (len2 > len1) + if (len1 < len2) return -1; /* str is longer */ return 0; } @@ -10928,23 +10945,35 @@ PyUnicode_Contains(PyObject *container, PyObject *element) kind1 = PyUnicode_KIND(str); kind2 = PyUnicode_KIND(sub); - buf1 = PyUnicode_DATA(str); - buf2 = PyUnicode_DATA(sub); - if (kind2 != kind1) { - if (kind2 > kind1) { - Py_DECREF(sub); - Py_DECREF(str); - return 0; - } - buf2 = _PyUnicode_AsKind(sub, kind1); - } - if (!buf2) { + if (kind1 < kind2) { Py_DECREF(sub); Py_DECREF(str); - return -1; + return 0; } len1 = PyUnicode_GET_LENGTH(str); len2 = PyUnicode_GET_LENGTH(sub); + if (len1 < len2) { + Py_DECREF(sub); + Py_DECREF(str); + return 0; + } + buf1 = PyUnicode_DATA(str); + buf2 = PyUnicode_DATA(sub); + if (len2 == 1) { + Py_UCS4 ch = PyUnicode_READ(kind2, buf2, 0); + result = findchar((const char *)buf1, kind1, len1, ch, 1) != -1; + Py_DECREF(sub); + Py_DECREF(str); + return result; + } + if (kind2 != kind1) { + buf2 = _PyUnicode_AsKind(sub, kind1); + if (!buf2) { + Py_DECREF(sub); + Py_DECREF(str); + return -1; + } + } switch (kind1) { case PyUnicode_1BYTE_KIND: @@ -11129,7 +11158,7 @@ unicode_count(PyObject *self, PyObject *args) Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; PyObject *result; - int kind1, kind2, kind; + int kind1, kind2; void *buf1, *buf2; Py_ssize_t len1, len2, iresult; @@ -11139,24 +11168,27 @@ unicode_count(PyObject *self, PyObject *args) kind1 = PyUnicode_KIND(self); kind2 = PyUnicode_KIND(substring); - if (kind2 > kind1) { + if (kind1 < kind2) { Py_DECREF(substring); return PyLong_FromLong(0); } - kind = kind1; - buf1 = PyUnicode_DATA(self); - buf2 = PyUnicode_DATA(substring); - if (kind2 != kind) - buf2 = _PyUnicode_AsKind(substring, kind); - if (!buf2) { - Py_DECREF(substring); - return NULL; - } len1 = PyUnicode_GET_LENGTH(self); len2 = PyUnicode_GET_LENGTH(substring); - ADJUST_INDICES(start, end, len1); - switch (kind) { + if (end - start < len2) { + Py_DECREF(substring); + return PyLong_FromLong(0); + } + buf1 = PyUnicode_DATA(self); + buf2 = PyUnicode_DATA(substring); + if (kind2 != kind1) { + buf2 = _PyUnicode_AsKind(substring, kind1); + if (!buf2) { + Py_DECREF(substring); + return NULL; + } + } + switch (kind1) { case PyUnicode_1BYTE_KIND: iresult = ucs1lib_count( ((Py_UCS1*)buf1) + start, end - start, @@ -11181,7 +11213,7 @@ unicode_count(PyObject *self, PyObject *args) result = PyLong_FromSsize_t(iresult); - if (kind2 != kind) + if (kind2 != kind1) PyMem_Free(buf2); Py_DECREF(substring); @@ -12632,8 +12664,8 @@ PyUnicode_Partition(PyObject *str_in, PyObject *sep_in) PyObject* str_obj; PyObject* sep_obj; PyObject* out; - int kind1, kind2, kind; - void *buf1 = NULL, *buf2 = NULL; + int kind1, kind2; + void *buf1, *buf2; Py_ssize_t len1, len2; str_obj = PyUnicode_FromObject(str_in); @@ -12652,21 +12684,29 @@ PyUnicode_Partition(PyObject *str_in, PyObject *sep_in) kind1 = PyUnicode_KIND(str_obj); kind2 = PyUnicode_KIND(sep_obj); - kind = Py_MAX(kind1, kind2); - buf1 = PyUnicode_DATA(str_obj); - if (kind1 != kind) - buf1 = _PyUnicode_AsKind(str_obj, kind); - if (!buf1) - goto onError; - buf2 = PyUnicode_DATA(sep_obj); - if (kind2 != kind) - buf2 = _PyUnicode_AsKind(sep_obj, kind); - if (!buf2) - goto onError; len1 = PyUnicode_GET_LENGTH(str_obj); len2 = PyUnicode_GET_LENGTH(sep_obj); + if (kind1 < kind2 || len1 < len2) { + _Py_INCREF_UNICODE_EMPTY(); + if (!unicode_empty) + out = NULL; + else { + out = PyTuple_Pack(3, str_obj, unicode_empty, unicode_empty); + Py_DECREF(unicode_empty); + } + Py_DECREF(sep_obj); + Py_DECREF(str_obj); + return out; + } + buf1 = PyUnicode_DATA(str_obj); + buf2 = PyUnicode_DATA(sep_obj); + if (kind2 != kind1) { + buf2 = _PyUnicode_AsKind(sep_obj, kind1); + if (!buf2) + goto onError; + } - switch (PyUnicode_KIND(str_obj)) { + switch (kind1) { case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj)) out = asciilib_partition(str_obj, buf1, len1, sep_obj, buf2, len2); @@ -12686,18 +12726,14 @@ PyUnicode_Partition(PyObject *str_in, PyObject *sep_in) Py_DECREF(sep_obj); Py_DECREF(str_obj); - if (kind1 != kind) - PyMem_Free(buf1); - if (kind2 != kind) + if (kind2 != kind1) PyMem_Free(buf2); return out; onError: Py_DECREF(sep_obj); Py_DECREF(str_obj); - if (kind1 != kind && buf1) - PyMem_Free(buf1); - if (kind2 != kind && buf2) + if (kind2 != kind1 && buf2) PyMem_Free(buf2); return NULL; } @@ -12709,8 +12745,8 @@ PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in) PyObject* str_obj; PyObject* sep_obj; PyObject* out; - int kind1, kind2, kind; - void *buf1 = NULL, *buf2 = NULL; + int kind1, kind2; + void *buf1, *buf2; Py_ssize_t len1, len2; str_obj = PyUnicode_FromObject(str_in); @@ -12722,23 +12758,31 @@ PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in) return NULL; } - kind1 = PyUnicode_KIND(str_in); + kind1 = PyUnicode_KIND(str_obj); kind2 = PyUnicode_KIND(sep_obj); - kind = Py_MAX(kind1, kind2); - buf1 = PyUnicode_DATA(str_in); - if (kind1 != kind) - buf1 = _PyUnicode_AsKind(str_in, kind); - if (!buf1) - goto onError; - buf2 = PyUnicode_DATA(sep_obj); - if (kind2 != kind) - buf2 = _PyUnicode_AsKind(sep_obj, kind); - if (!buf2) - goto onError; len1 = PyUnicode_GET_LENGTH(str_obj); len2 = PyUnicode_GET_LENGTH(sep_obj); + if (kind1 < kind2 || len1 < len2) { + _Py_INCREF_UNICODE_EMPTY(); + if (!unicode_empty) + out = NULL; + else { + out = PyTuple_Pack(3, unicode_empty, unicode_empty, str_obj); + Py_DECREF(unicode_empty); + } + Py_DECREF(sep_obj); + Py_DECREF(str_obj); + return out; + } + buf1 = PyUnicode_DATA(str_obj); + buf2 = PyUnicode_DATA(sep_obj); + if (kind2 != kind1) { + buf2 = _PyUnicode_AsKind(sep_obj, kind1); + if (!buf2) + goto onError; + } - switch (PyUnicode_KIND(str_in)) { + switch (kind1) { case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj)) out = asciilib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2); @@ -12758,18 +12802,14 @@ PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in) Py_DECREF(sep_obj); Py_DECREF(str_obj); - if (kind1 != kind) - PyMem_Free(buf1); - if (kind2 != kind) + if (kind2 != kind1) PyMem_Free(buf2); return out; onError: Py_DECREF(sep_obj); Py_DECREF(str_obj); - if (kind1 != kind && buf1) - PyMem_Free(buf1); - if (kind2 != kind && buf2) + if (kind2 != kind1 && buf2) PyMem_Free(buf2); return NULL; }