Issue #23573: Increased performance of string search operations (str.find,

str.index, str.count, the in operator, str.split, str.partition) with
arguments of different kinds (UCS1, UCS2, UCS4).
This commit is contained in:
Serhiy Storchaka 2015-03-24 21:55:47 +02:00
parent f7ef47583e
commit d9d769fcdd
6 changed files with 253 additions and 198 deletions

View File

@ -10,6 +10,10 @@ Release date: 2015-03-28
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #23573: Increased performance of string search operations (str.find,
str.index, str.count, the in operator, str.split, str.partition) with
arguments of different kinds (UCS1, UCS2, UCS4).
- Issue #23753: Python doesn't support anymore platforms without stat() or - Issue #23753: Python doesn't support anymore platforms without stat() or
fstat(), these functions are always required. fstat(), these functions are always required.

View File

@ -1142,7 +1142,7 @@ bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
char byte; char byte;
Py_buffer subbuf; Py_buffer subbuf;
const char *sub; const char *sub;
Py_ssize_t sub_len; Py_ssize_t len, sub_len;
Py_ssize_t start=0, end=PY_SSIZE_T_MAX; Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Py_ssize_t res; Py_ssize_t res;
@ -1161,15 +1161,30 @@ bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
sub = &byte; sub = &byte;
sub_len = 1; sub_len = 1;
} }
len = PyByteArray_GET_SIZE(self);
if (dir > 0) ADJUST_INDICES(start, end, len);
res = stringlib_find_slice( if (end - start < sub_len)
PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), res = -1;
sub, sub_len, start, end); else if (sub_len == 1) {
else unsigned char needle = *sub;
res = stringlib_rfind_slice( int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), res = stringlib_fastsearch_memchr_1char(
sub, sub_len, start, end); PyByteArray_AS_STRING(self) + start, end - start,
needle, needle, mode);
if (res >= 0)
res += start;
}
else {
if (dir > 0)
res = stringlib_find_slice(
PyByteArray_AS_STRING(self), len,
sub, sub_len, start, end);
else
res = stringlib_rfind_slice(
PyByteArray_AS_STRING(self), len,
sub, sub_len, start, end);
}
if (subobj) if (subobj)
PyBuffer_Release(&subbuf); PyBuffer_Release(&subbuf);

View File

@ -1914,7 +1914,7 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
char byte; char byte;
Py_buffer subbuf; Py_buffer subbuf;
const char *sub; const char *sub;
Py_ssize_t sub_len; Py_ssize_t len, sub_len;
Py_ssize_t start=0, end=PY_SSIZE_T_MAX; Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Py_ssize_t res; Py_ssize_t res;
@ -1933,15 +1933,30 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
sub = &byte; sub = &byte;
sub_len = 1; sub_len = 1;
} }
len = PyBytes_GET_SIZE(self);
if (dir > 0) ADJUST_INDICES(start, end, len);
res = stringlib_find_slice( if (end - start < sub_len)
PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), res = -1;
sub, sub_len, start, end); else if (sub_len == 1) {
else unsigned char needle = *sub;
res = stringlib_rfind_slice( int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), res = stringlib_fastsearch_memchr_1char(
sub, sub_len, start, end); PyBytes_AS_STRING(self) + start, end - start,
needle, needle, mode);
if (res >= 0)
res += start;
}
else {
if (dir > 0)
res = stringlib_find_slice(
PyBytes_AS_STRING(self), len,
sub, sub_len, start, end);
else
res = stringlib_rfind_slice(
PyBytes_AS_STRING(self), len,
sub, sub_len, start, end);
}
if (subobj) if (subobj)
PyBuffer_Release(&subbuf); PyBuffer_Release(&subbuf);

View File

@ -36,7 +36,7 @@
Py_LOCAL_INLINE(Py_ssize_t) Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(fastsearch_memchr_1char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB(fastsearch_memchr_1char)(const STRINGLIB_CHAR* s, Py_ssize_t n,
STRINGLIB_CHAR ch, unsigned char needle, STRINGLIB_CHAR ch, unsigned char needle,
Py_ssize_t maxcount, int mode) int mode)
{ {
if (mode == FAST_SEARCH) { if (mode == FAST_SEARCH) {
const STRINGLIB_CHAR *ptr = s; const STRINGLIB_CHAR *ptr = s;
@ -115,7 +115,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
if (needle != 0) if (needle != 0)
#endif #endif
return STRINGLIB(fastsearch_memchr_1char) return STRINGLIB(fastsearch_memchr_1char)
(s, n, p[0], needle, maxcount, mode); (s, n, p[0], needle, mode);
} }
if (mode == FAST_COUNT) { if (mode == FAST_COUNT) {
for (i = 0; i < n; i++) for (i = 0; i < n; i++)

View File

@ -11,8 +11,7 @@ STRINGLIB(find)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
{ {
Py_ssize_t pos; Py_ssize_t pos;
if (str_len < 0) assert(str_len >= 0);
return -1;
if (sub_len == 0) if (sub_len == 0)
return offset; return offset;
@ -31,8 +30,7 @@ STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
{ {
Py_ssize_t pos; Py_ssize_t pos;
if (str_len < 0) assert(str_len >= 0);
return -1;
if (sub_len == 0) if (sub_len == 0)
return str_len + offset; return str_len + offset;
@ -44,27 +42,11 @@ STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
return pos; return pos;
} }
/* helper macro to fixup start/end slice values */
#define ADJUST_INDICES(start, end, len) \
if (end > len) \
end = len; \
else if (end < 0) { \
end += len; \
if (end < 0) \
end = 0; \
} \
if (start < 0) { \
start += len; \
if (start < 0) \
start = 0; \
}
Py_LOCAL_INLINE(Py_ssize_t) Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(find_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, STRINGLIB(find_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end) Py_ssize_t start, Py_ssize_t end)
{ {
ADJUST_INDICES(start, end, str_len);
return STRINGLIB(find)(str + start, end - start, sub, sub_len, start); return STRINGLIB(find)(str + start, end - start, sub, sub_len, start);
} }
@ -73,7 +55,6 @@ STRINGLIB(rfind_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end) Py_ssize_t start, Py_ssize_t end)
{ {
ADJUST_INDICES(start, end, str_len);
return STRINGLIB(rfind)(str + start, end - start, sub, sub_len, start); return STRINGLIB(rfind)(str + start, end - start, sub, sub_len, start);
} }

View File

@ -641,7 +641,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
static PyObject * static PyObject *
fixup(PyObject *self, Py_UCS4 (*fixfct)(PyObject *s)); fixup(PyObject *self, Py_UCS4 (*fixfct)(PyObject *s));
Py_LOCAL_INLINE(Py_ssize_t) findchar(void *s, int kind, Py_LOCAL_INLINE(Py_ssize_t) findchar(const void *s, int kind,
Py_ssize_t size, Py_UCS4 ch, Py_ssize_t size, Py_UCS4 ch,
int direction) int direction)
{ {
@ -8959,35 +8959,61 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
/* --- Helpers ------------------------------------------------------------ */ /* --- Helpers ------------------------------------------------------------ */
/* helper macro to fixup start/end slice values */
#define ADJUST_INDICES(start, end, len) \
if (end > len) \
end = len; \
else if (end < 0) { \
end += len; \
if (end < 0) \
end = 0; \
} \
if (start < 0) { \
start += len; \
if (start < 0) \
start = 0; \
}
static Py_ssize_t static Py_ssize_t
any_find_slice(int direction, PyObject* s1, PyObject* s2, any_find_slice(int direction, PyObject* s1, PyObject* s2,
Py_ssize_t start, Py_ssize_t start,
Py_ssize_t end) Py_ssize_t end)
{ {
int kind1, kind2, kind; int kind1, kind2;
void *buf1, *buf2; void *buf1, *buf2;
Py_ssize_t len1, len2, result; Py_ssize_t len1, len2, result;
kind1 = PyUnicode_KIND(s1); kind1 = PyUnicode_KIND(s1);
kind2 = PyUnicode_KIND(s2); kind2 = PyUnicode_KIND(s2);
kind = kind1 > kind2 ? kind1 : kind2; if (kind1 < kind2)
buf1 = PyUnicode_DATA(s1); return -1;
buf2 = PyUnicode_DATA(s2);
if (kind1 != kind)
buf1 = _PyUnicode_AsKind(s1, kind);
if (!buf1)
return -2;
if (kind2 != kind)
buf2 = _PyUnicode_AsKind(s2, kind);
if (!buf2) {
if (kind1 != kind) PyMem_Free(buf1);
return -2;
}
len1 = PyUnicode_GET_LENGTH(s1); len1 = PyUnicode_GET_LENGTH(s1);
len2 = PyUnicode_GET_LENGTH(s2); len2 = PyUnicode_GET_LENGTH(s2);
ADJUST_INDICES(start, end, len1);
if (end - start < len2)
return -1;
buf1 = PyUnicode_DATA(s1);
buf2 = PyUnicode_DATA(s2);
if (len2 == 1) {
Py_UCS4 ch = PyUnicode_READ(kind2, buf2, 0);
result = findchar((const char *)buf1 + kind1*start,
kind1, end - start, ch, direction);
if (result == -1)
return -1;
else
return start + result;
}
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(s2, kind1);
if (!buf2)
return -2;
}
if (direction > 0) { if (direction > 0) {
switch (kind) { switch (kind1) {
case PyUnicode_1BYTE_KIND: case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(s1) && PyUnicode_IS_ASCII(s2)) if (PyUnicode_IS_ASCII(s1) && PyUnicode_IS_ASCII(s2))
result = asciilib_find_slice(buf1, len1, buf2, len2, start, end); result = asciilib_find_slice(buf1, len1, buf2, len2, start, end);
@ -9005,7 +9031,7 @@ any_find_slice(int direction, PyObject* s1, PyObject* s2,
} }
} }
else { else {
switch (kind) { switch (kind1) {
case PyUnicode_1BYTE_KIND: case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(s1) && PyUnicode_IS_ASCII(s2)) if (PyUnicode_IS_ASCII(s1) && PyUnicode_IS_ASCII(s2))
result = asciilib_rfind_slice(buf1, len1, buf2, len2, start, end); result = asciilib_rfind_slice(buf1, len1, buf2, len2, start, end);
@ -9023,9 +9049,7 @@ any_find_slice(int direction, PyObject* s1, PyObject* s2,
} }
} }
if (kind1 != kind) if (kind2 != kind1)
PyMem_Free(buf1);
if (kind2 != kind)
PyMem_Free(buf2); PyMem_Free(buf2);
return result; return result;
@ -9115,21 +9139,6 @@ _PyUnicode_InsertThousandsGrouping(
} }
/* helper macro to fixup start/end slice values */
#define ADJUST_INDICES(start, end, len) \
if (end > len) \
end = len; \
else if (end < 0) { \
end += len; \
if (end < 0) \
end = 0; \
} \
if (start < 0) { \
start += len; \
if (start < 0) \
start = 0; \
}
Py_ssize_t Py_ssize_t
PyUnicode_Count(PyObject *str, PyUnicode_Count(PyObject *str,
PyObject *substr, PyObject *substr,
@ -9139,7 +9148,7 @@ PyUnicode_Count(PyObject *str,
Py_ssize_t result; Py_ssize_t result;
PyObject* str_obj; PyObject* str_obj;
PyObject* sub_obj; PyObject* sub_obj;
int kind1, kind2, kind; int kind1, kind2;
void *buf1 = NULL, *buf2 = NULL; void *buf1 = NULL, *buf2 = NULL;
Py_ssize_t len1, len2; Py_ssize_t len1, len2;
@ -9159,24 +9168,30 @@ PyUnicode_Count(PyObject *str,
kind1 = PyUnicode_KIND(str_obj); kind1 = PyUnicode_KIND(str_obj);
kind2 = PyUnicode_KIND(sub_obj); kind2 = PyUnicode_KIND(sub_obj);
kind = kind1; if (kind1 < kind2) {
buf1 = PyUnicode_DATA(str_obj); Py_DECREF(sub_obj);
buf2 = PyUnicode_DATA(sub_obj); Py_DECREF(str_obj);
if (kind2 != kind) { return 0;
if (kind2 > kind) {
Py_DECREF(sub_obj);
Py_DECREF(str_obj);
return 0;
}
buf2 = _PyUnicode_AsKind(sub_obj, kind);
} }
if (!buf2)
goto onError;
len1 = PyUnicode_GET_LENGTH(str_obj); len1 = PyUnicode_GET_LENGTH(str_obj);
len2 = PyUnicode_GET_LENGTH(sub_obj); len2 = PyUnicode_GET_LENGTH(sub_obj);
ADJUST_INDICES(start, end, len1); ADJUST_INDICES(start, end, len1);
switch (kind) { if (end - start < len2) {
Py_DECREF(sub_obj);
Py_DECREF(str_obj);
return 0;
}
buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sub_obj);
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(sub_obj, kind1);
if (!buf2)
goto onError;
}
switch (kind1) {
case PyUnicode_1BYTE_KIND: case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sub_obj)) if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sub_obj))
result = asciilib_count( result = asciilib_count(
@ -9208,14 +9223,14 @@ PyUnicode_Count(PyObject *str,
Py_DECREF(sub_obj); Py_DECREF(sub_obj);
Py_DECREF(str_obj); Py_DECREF(str_obj);
if (kind2 != kind) if (kind2 != kind1)
PyMem_Free(buf2); PyMem_Free(buf2);
return result; return result;
onError: onError:
Py_DECREF(sub_obj); Py_DECREF(sub_obj);
Py_DECREF(str_obj); Py_DECREF(str_obj);
if (kind2 != kind && buf2) if (kind2 != kind1 && buf2)
PyMem_Free(buf2); PyMem_Free(buf2);
return -1; return -1;
} }
@ -9268,6 +9283,8 @@ PyUnicode_FindChar(PyObject *str, Py_UCS4 ch,
} }
if (end > PyUnicode_GET_LENGTH(str)) if (end > PyUnicode_GET_LENGTH(str))
end = PyUnicode_GET_LENGTH(str); end = PyUnicode_GET_LENGTH(str);
if (start >= end)
return -1;
kind = PyUnicode_KIND(str); kind = PyUnicode_KIND(str);
result = findchar(PyUnicode_1BYTE_DATA(str) + kind*start, result = findchar(PyUnicode_1BYTE_DATA(str) + kind*start,
kind, end-start, ch, direction); kind, end-start, ch, direction);
@ -10014,7 +10031,7 @@ split(PyObject *self,
PyObject *substring, PyObject *substring,
Py_ssize_t maxcount) Py_ssize_t maxcount)
{ {
int kind1, kind2, kind; int kind1, kind2;
void *buf1, *buf2; void *buf1, *buf2;
Py_ssize_t len1, len2; Py_ssize_t len1, len2;
PyObject* out; PyObject* out;
@ -10058,23 +10075,25 @@ split(PyObject *self,
kind1 = PyUnicode_KIND(self); kind1 = PyUnicode_KIND(self);
kind2 = PyUnicode_KIND(substring); kind2 = PyUnicode_KIND(substring);
kind = kind1 > kind2 ? kind1 : kind2;
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind1 != kind)
buf1 = _PyUnicode_AsKind(self, kind);
if (!buf1)
return NULL;
if (kind2 != kind)
buf2 = _PyUnicode_AsKind(substring, kind);
if (!buf2) {
if (kind1 != kind) PyMem_Free(buf1);
return NULL;
}
len1 = PyUnicode_GET_LENGTH(self); len1 = PyUnicode_GET_LENGTH(self);
len2 = PyUnicode_GET_LENGTH(substring); len2 = PyUnicode_GET_LENGTH(substring);
if (kind1 < kind2 || len1 < len2) {
out = PyList_New(1);
if (out == NULL)
return NULL;
Py_INCREF(self);
PyList_SET_ITEM(out, 0, self);
return out;
}
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(substring, kind1);
if (!buf2)
return NULL;
}
switch (kind) { switch (kind1) {
case PyUnicode_1BYTE_KIND: case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(self) && PyUnicode_IS_ASCII(substring)) if (PyUnicode_IS_ASCII(self) && PyUnicode_IS_ASCII(substring))
out = asciilib_split( out = asciilib_split(
@ -10094,9 +10113,7 @@ split(PyObject *self,
default: default:
out = NULL; out = NULL;
} }
if (kind1 != kind) if (kind2 != kind1)
PyMem_Free(buf1);
if (kind2 != kind)
PyMem_Free(buf2); PyMem_Free(buf2);
return out; return out;
} }
@ -10106,7 +10123,7 @@ rsplit(PyObject *self,
PyObject *substring, PyObject *substring,
Py_ssize_t maxcount) Py_ssize_t maxcount)
{ {
int kind1, kind2, kind; int kind1, kind2;
void *buf1, *buf2; void *buf1, *buf2;
Py_ssize_t len1, len2; Py_ssize_t len1, len2;
PyObject* out; PyObject* out;
@ -10150,23 +10167,25 @@ rsplit(PyObject *self,
kind1 = PyUnicode_KIND(self); kind1 = PyUnicode_KIND(self);
kind2 = PyUnicode_KIND(substring); kind2 = PyUnicode_KIND(substring);
kind = kind1 > kind2 ? kind1 : kind2;
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind1 != kind)
buf1 = _PyUnicode_AsKind(self, kind);
if (!buf1)
return NULL;
if (kind2 != kind)
buf2 = _PyUnicode_AsKind(substring, kind);
if (!buf2) {
if (kind1 != kind) PyMem_Free(buf1);
return NULL;
}
len1 = PyUnicode_GET_LENGTH(self); len1 = PyUnicode_GET_LENGTH(self);
len2 = PyUnicode_GET_LENGTH(substring); len2 = PyUnicode_GET_LENGTH(substring);
if (kind1 < kind2 || len1 < len2) {
out = PyList_New(1);
if (out == NULL)
return NULL;
Py_INCREF(self);
PyList_SET_ITEM(out, 0, self);
return out;
}
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(substring, kind1);
if (!buf2)
return NULL;
}
switch (kind) { switch (kind1) {
case PyUnicode_1BYTE_KIND: case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(self) && PyUnicode_IS_ASCII(substring)) if (PyUnicode_IS_ASCII(self) && PyUnicode_IS_ASCII(substring))
out = asciilib_rsplit( out = asciilib_rsplit(
@ -10186,9 +10205,7 @@ rsplit(PyObject *self,
default: default:
out = NULL; out = NULL;
} }
if (kind1 != kind) if (kind2 != kind1)
PyMem_Free(buf1);
if (kind2 != kind)
PyMem_Free(buf2); PyMem_Free(buf2);
return out; return out;
} }
@ -10407,7 +10424,7 @@ replace(PyObject *self, PyObject *str1,
} }
/* new_size = PyUnicode_GET_LENGTH(self) + n * (PyUnicode_GET_LENGTH(str2) - /* new_size = PyUnicode_GET_LENGTH(self) + n * (PyUnicode_GET_LENGTH(str2) -
PyUnicode_GET_LENGTH(str1))); */ PyUnicode_GET_LENGTH(str1))); */
if (len2 > len1 && len2 - len1 > (PY_SSIZE_T_MAX - slen) / n) { if (len1 < len2 && len2 - len1 > (PY_SSIZE_T_MAX - slen) / n) {
PyErr_SetString(PyExc_OverflowError, PyErr_SetString(PyExc_OverflowError,
"replace string is too long"); "replace string is too long");
goto error; goto error;
@ -10816,7 +10833,7 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
} }
if (len1 > len2) if (len1 > len2)
return 1; /* uni is longer */ return 1; /* uni is longer */
if (len2 > len1) if (len1 < len2)
return -1; /* str is longer */ return -1; /* str is longer */
return 0; return 0;
} }
@ -10928,23 +10945,35 @@ PyUnicode_Contains(PyObject *container, PyObject *element)
kind1 = PyUnicode_KIND(str); kind1 = PyUnicode_KIND(str);
kind2 = PyUnicode_KIND(sub); kind2 = PyUnicode_KIND(sub);
buf1 = PyUnicode_DATA(str); if (kind1 < kind2) {
buf2 = PyUnicode_DATA(sub);
if (kind2 != kind1) {
if (kind2 > kind1) {
Py_DECREF(sub);
Py_DECREF(str);
return 0;
}
buf2 = _PyUnicode_AsKind(sub, kind1);
}
if (!buf2) {
Py_DECREF(sub); Py_DECREF(sub);
Py_DECREF(str); Py_DECREF(str);
return -1; return 0;
} }
len1 = PyUnicode_GET_LENGTH(str); len1 = PyUnicode_GET_LENGTH(str);
len2 = PyUnicode_GET_LENGTH(sub); len2 = PyUnicode_GET_LENGTH(sub);
if (len1 < len2) {
Py_DECREF(sub);
Py_DECREF(str);
return 0;
}
buf1 = PyUnicode_DATA(str);
buf2 = PyUnicode_DATA(sub);
if (len2 == 1) {
Py_UCS4 ch = PyUnicode_READ(kind2, buf2, 0);
result = findchar((const char *)buf1, kind1, len1, ch, 1) != -1;
Py_DECREF(sub);
Py_DECREF(str);
return result;
}
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(sub, kind1);
if (!buf2) {
Py_DECREF(sub);
Py_DECREF(str);
return -1;
}
}
switch (kind1) { switch (kind1) {
case PyUnicode_1BYTE_KIND: case PyUnicode_1BYTE_KIND:
@ -11129,7 +11158,7 @@ unicode_count(PyObject *self, PyObject *args)
Py_ssize_t start = 0; Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX; Py_ssize_t end = PY_SSIZE_T_MAX;
PyObject *result; PyObject *result;
int kind1, kind2, kind; int kind1, kind2;
void *buf1, *buf2; void *buf1, *buf2;
Py_ssize_t len1, len2, iresult; Py_ssize_t len1, len2, iresult;
@ -11139,24 +11168,27 @@ unicode_count(PyObject *self, PyObject *args)
kind1 = PyUnicode_KIND(self); kind1 = PyUnicode_KIND(self);
kind2 = PyUnicode_KIND(substring); kind2 = PyUnicode_KIND(substring);
if (kind2 > kind1) { if (kind1 < kind2) {
Py_DECREF(substring); Py_DECREF(substring);
return PyLong_FromLong(0); return PyLong_FromLong(0);
} }
kind = kind1;
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind2 != kind)
buf2 = _PyUnicode_AsKind(substring, kind);
if (!buf2) {
Py_DECREF(substring);
return NULL;
}
len1 = PyUnicode_GET_LENGTH(self); len1 = PyUnicode_GET_LENGTH(self);
len2 = PyUnicode_GET_LENGTH(substring); len2 = PyUnicode_GET_LENGTH(substring);
ADJUST_INDICES(start, end, len1); ADJUST_INDICES(start, end, len1);
switch (kind) { if (end - start < len2) {
Py_DECREF(substring);
return PyLong_FromLong(0);
}
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(substring, kind1);
if (!buf2) {
Py_DECREF(substring);
return NULL;
}
}
switch (kind1) {
case PyUnicode_1BYTE_KIND: case PyUnicode_1BYTE_KIND:
iresult = ucs1lib_count( iresult = ucs1lib_count(
((Py_UCS1*)buf1) + start, end - start, ((Py_UCS1*)buf1) + start, end - start,
@ -11181,7 +11213,7 @@ unicode_count(PyObject *self, PyObject *args)
result = PyLong_FromSsize_t(iresult); result = PyLong_FromSsize_t(iresult);
if (kind2 != kind) if (kind2 != kind1)
PyMem_Free(buf2); PyMem_Free(buf2);
Py_DECREF(substring); Py_DECREF(substring);
@ -12632,8 +12664,8 @@ PyUnicode_Partition(PyObject *str_in, PyObject *sep_in)
PyObject* str_obj; PyObject* str_obj;
PyObject* sep_obj; PyObject* sep_obj;
PyObject* out; PyObject* out;
int kind1, kind2, kind; int kind1, kind2;
void *buf1 = NULL, *buf2 = NULL; void *buf1, *buf2;
Py_ssize_t len1, len2; Py_ssize_t len1, len2;
str_obj = PyUnicode_FromObject(str_in); str_obj = PyUnicode_FromObject(str_in);
@ -12652,21 +12684,29 @@ PyUnicode_Partition(PyObject *str_in, PyObject *sep_in)
kind1 = PyUnicode_KIND(str_obj); kind1 = PyUnicode_KIND(str_obj);
kind2 = PyUnicode_KIND(sep_obj); kind2 = PyUnicode_KIND(sep_obj);
kind = Py_MAX(kind1, kind2);
buf1 = PyUnicode_DATA(str_obj);
if (kind1 != kind)
buf1 = _PyUnicode_AsKind(str_obj, kind);
if (!buf1)
goto onError;
buf2 = PyUnicode_DATA(sep_obj);
if (kind2 != kind)
buf2 = _PyUnicode_AsKind(sep_obj, kind);
if (!buf2)
goto onError;
len1 = PyUnicode_GET_LENGTH(str_obj); len1 = PyUnicode_GET_LENGTH(str_obj);
len2 = PyUnicode_GET_LENGTH(sep_obj); len2 = PyUnicode_GET_LENGTH(sep_obj);
if (kind1 < kind2 || len1 < len2) {
_Py_INCREF_UNICODE_EMPTY();
if (!unicode_empty)
out = NULL;
else {
out = PyTuple_Pack(3, str_obj, unicode_empty, unicode_empty);
Py_DECREF(unicode_empty);
}
Py_DECREF(sep_obj);
Py_DECREF(str_obj);
return out;
}
buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sep_obj);
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(sep_obj, kind1);
if (!buf2)
goto onError;
}
switch (PyUnicode_KIND(str_obj)) { switch (kind1) {
case PyUnicode_1BYTE_KIND: case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj)) if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj))
out = asciilib_partition(str_obj, buf1, len1, sep_obj, buf2, len2); out = asciilib_partition(str_obj, buf1, len1, sep_obj, buf2, len2);
@ -12686,18 +12726,14 @@ PyUnicode_Partition(PyObject *str_in, PyObject *sep_in)
Py_DECREF(sep_obj); Py_DECREF(sep_obj);
Py_DECREF(str_obj); Py_DECREF(str_obj);
if (kind1 != kind) if (kind2 != kind1)
PyMem_Free(buf1);
if (kind2 != kind)
PyMem_Free(buf2); PyMem_Free(buf2);
return out; return out;
onError: onError:
Py_DECREF(sep_obj); Py_DECREF(sep_obj);
Py_DECREF(str_obj); Py_DECREF(str_obj);
if (kind1 != kind && buf1) if (kind2 != kind1 && buf2)
PyMem_Free(buf1);
if (kind2 != kind && buf2)
PyMem_Free(buf2); PyMem_Free(buf2);
return NULL; return NULL;
} }
@ -12709,8 +12745,8 @@ PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in)
PyObject* str_obj; PyObject* str_obj;
PyObject* sep_obj; PyObject* sep_obj;
PyObject* out; PyObject* out;
int kind1, kind2, kind; int kind1, kind2;
void *buf1 = NULL, *buf2 = NULL; void *buf1, *buf2;
Py_ssize_t len1, len2; Py_ssize_t len1, len2;
str_obj = PyUnicode_FromObject(str_in); str_obj = PyUnicode_FromObject(str_in);
@ -12722,23 +12758,31 @@ PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in)
return NULL; return NULL;
} }
kind1 = PyUnicode_KIND(str_in); kind1 = PyUnicode_KIND(str_obj);
kind2 = PyUnicode_KIND(sep_obj); kind2 = PyUnicode_KIND(sep_obj);
kind = Py_MAX(kind1, kind2);
buf1 = PyUnicode_DATA(str_in);
if (kind1 != kind)
buf1 = _PyUnicode_AsKind(str_in, kind);
if (!buf1)
goto onError;
buf2 = PyUnicode_DATA(sep_obj);
if (kind2 != kind)
buf2 = _PyUnicode_AsKind(sep_obj, kind);
if (!buf2)
goto onError;
len1 = PyUnicode_GET_LENGTH(str_obj); len1 = PyUnicode_GET_LENGTH(str_obj);
len2 = PyUnicode_GET_LENGTH(sep_obj); len2 = PyUnicode_GET_LENGTH(sep_obj);
if (kind1 < kind2 || len1 < len2) {
_Py_INCREF_UNICODE_EMPTY();
if (!unicode_empty)
out = NULL;
else {
out = PyTuple_Pack(3, unicode_empty, unicode_empty, str_obj);
Py_DECREF(unicode_empty);
}
Py_DECREF(sep_obj);
Py_DECREF(str_obj);
return out;
}
buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sep_obj);
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(sep_obj, kind1);
if (!buf2)
goto onError;
}
switch (PyUnicode_KIND(str_in)) { switch (kind1) {
case PyUnicode_1BYTE_KIND: case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj)) if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj))
out = asciilib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2); out = asciilib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2);
@ -12758,18 +12802,14 @@ PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in)
Py_DECREF(sep_obj); Py_DECREF(sep_obj);
Py_DECREF(str_obj); Py_DECREF(str_obj);
if (kind1 != kind) if (kind2 != kind1)
PyMem_Free(buf1);
if (kind2 != kind)
PyMem_Free(buf2); PyMem_Free(buf2);
return out; return out;
onError: onError:
Py_DECREF(sep_obj); Py_DECREF(sep_obj);
Py_DECREF(str_obj); Py_DECREF(str_obj);
if (kind1 != kind && buf1) if (kind2 != kind1 && buf2)
PyMem_Free(buf1);
if (kind2 != kind && buf2)
PyMem_Free(buf2); PyMem_Free(buf2);
return NULL; return NULL;
} }