Merged revisions 77461 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk

........
  r77461 | antoine.pitrou | 2010-01-13 08:55:48 +0100 (mer., 13 janv. 2010) | 5 lines

  Issue #7622: Improve the split(), rsplit(), splitlines() and replace()
  methods of bytes, bytearray and unicode objects by using a common
  implementation based on stringlib's fast search.  Patch by Florent Xicluna.
........
This commit is contained in:
Antoine Pitrou 2010-01-13 08:07:53 +00:00
parent 5efea0430c
commit f2c5484f9e
16 changed files with 1123 additions and 1418 deletions

View File

@ -582,6 +582,7 @@ BYTESTR_DEPS = \
$(srcdir)/Objects/stringlib/fastsearch.h \
$(srcdir)/Objects/stringlib/find.h \
$(srcdir)/Objects/stringlib/partition.h \
$(srcdir)/Objects/stringlib/split.h \
$(srcdir)/Objects/stringlib/stringdefs.h \
$(srcdir)/Objects/stringlib/string_format.h \
$(srcdir)/Objects/stringlib/transmogrify.h \

View File

@ -12,6 +12,10 @@ What's New in Python 3.2 Alpha 1?
Core and Builtins
-----------------
- Issue #7622: Improve the split(), rsplit(), splitlines() and replace()
methods of bytes, bytearray and unicode objects by using a common
implementation based on stringlib's fast search. Patch by Florent Xicluna.
- Issue #7632: Fix a crash in dtoa.c that occurred in debug builds
when parsing certain long numeric strings corresponding to subnormal
values. Also fix a number of bugs in dtoa.c that could lead to

View File

@ -1039,14 +1039,16 @@ bytearray_dealloc(PyByteArrayObject *self)
#define STRINGLIB_STR PyByteArray_AS_STRING
#define STRINGLIB_NEW PyByteArray_FromStringAndSize
#define STRINGLIB_EMPTY nullbytes
#define STRINGLIB_ISSPACE Py_ISSPACE
#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
#define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
#define STRINGLIB_MUTABLE 1
#define FROM_BYTEARRAY 1
#include "stringlib/fastsearch.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/partition.h"
#include "stringlib/split.h"
#include "stringlib/ctype.h"
#include "stringlib/transmogrify.h"
@ -1054,21 +1056,20 @@ bytearray_dealloc(PyByteArrayObject *self)
/* The following Py_LOCAL_INLINE and Py_LOCAL functions
were copied from the old char* style string object. */
Py_LOCAL_INLINE(void)
_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
{
if (*end > len)
*end = len;
else if (*end < 0)
*end += len;
if (*end < 0)
*end = 0;
if (*start < 0)
*start += len;
if (*start < 0)
*start = 0;
}
/* helper macro to fixup start/end slice values */
#define ADJUST_INDICES(start, end, len) \
if (end > len) \
end = len; \
else if (end < 0) { \
end += len; \
if (end < 0) \
end = 0; \
} \
if (start < 0) { \
start += len; \
if (start < 0) \
start = 0; \
}
Py_LOCAL_INLINE(Py_ssize_t)
bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
@ -1136,10 +1137,10 @@ bytearray_count(PyByteArrayObject *self, PyObject *args)
if (_getbuffer(sub_obj, &vsub) < 0)
return NULL;
_adjust_indices(&start, &end, PyByteArray_GET_SIZE(self));
ADJUST_INDICES(start, end, PyByteArray_GET_SIZE(self));
count_obj = PyLong_FromSsize_t(
stringlib_count(str + start, end - start, vsub.buf, vsub.len)
stringlib_count(str + start, end - start, vsub.buf, vsub.len, PY_SSIZE_T_MAX)
);
PyBuffer_Release(&vsub);
return count_obj;
@ -1247,7 +1248,7 @@ _bytearray_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start
if (_getbuffer(substr, &vsubstr) < 0)
return -1;
_adjust_indices(&start, &end, len);
ADJUST_INDICES(start, end, len);
if (direction < 0) {
/* startswith */
@ -1459,20 +1460,11 @@ bytearray_maketrans(PyObject *null, PyObject *args)
}
#define FORWARD 1
#define REVERSE -1
/* find and count characters and substrings */
#define findchar(target, target_len, c) \
((char *)memchr((const void *)(target), c, target_len))
/* Don't call if length < 2 */
#define Py_STRING_MATCH(target, offset, pattern, length) \
(target[offset] == pattern[0] && \
target[offset+length-1] == pattern[length-1] && \
!memcmp(target+offset+1, pattern+1, length-2) )
/* Bytes ops must return a string, create a copy */
Py_LOCAL(PyByteArrayObject *)
@ -1500,93 +1492,6 @@ countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount
return count;
}
Py_LOCAL(Py_ssize_t)
findstring(const char *target, Py_ssize_t target_len,
const char *pattern, Py_ssize_t pattern_len,
Py_ssize_t start,
Py_ssize_t end,
int direction)
{
if (start < 0) {
start += target_len;
if (start < 0)
start = 0;
}
if (end > target_len) {
end = target_len;
} else if (end < 0) {
end += target_len;
if (end < 0)
end = 0;
}
/* zero-length substrings always match at the first attempt */
if (pattern_len == 0)
return (direction > 0) ? start : end;
end -= pattern_len;
if (direction < 0) {
for (; end >= start; end--)
if (Py_STRING_MATCH(target, end, pattern, pattern_len))
return end;
} else {
for (; start <= end; start++)
if (Py_STRING_MATCH(target, start, pattern, pattern_len))
return start;
}
return -1;
}
Py_LOCAL_INLINE(Py_ssize_t)
countstring(const char *target, Py_ssize_t target_len,
const char *pattern, Py_ssize_t pattern_len,
Py_ssize_t start,
Py_ssize_t end,
int direction, Py_ssize_t maxcount)
{
Py_ssize_t count=0;
if (start < 0) {
start += target_len;
if (start < 0)
start = 0;
}
if (end > target_len) {
end = target_len;
} else if (end < 0) {
end += target_len;
if (end < 0)
end = 0;
}
/* zero-length substrings match everywhere */
if (pattern_len == 0 || maxcount == 0) {
if (target_len+1 < maxcount)
return target_len+1;
return maxcount;
}
end -= pattern_len;
if (direction < 0) {
for (; (end >= start); end--)
if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
count++;
if (--maxcount <= 0) break;
end -= pattern_len-1;
}
} else {
for (; (start <= end); start++)
if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
count++;
if (--maxcount <= 0)
break;
start += pattern_len-1;
}
}
return count;
}
/* Algorithms for different cases of string replacement */
@ -1708,10 +1613,9 @@ replace_delete_substring(PyByteArrayObject *self,
self_len = PyByteArray_GET_SIZE(self);
self_s = PyByteArray_AS_STRING(self);
count = countstring(self_s, self_len,
from_s, from_len,
0, self_len, 1,
maxcount);
count = stringlib_count(self_s, self_len,
from_s, from_len,
maxcount);
if (count == 0) {
/* no matches */
@ -1730,9 +1634,9 @@ replace_delete_substring(PyByteArrayObject *self,
start = self_s;
end = self_s + self_len;
while (count-- > 0) {
offset = findstring(start, end-start,
from_s, from_len,
0, end-start, FORWARD);
offset = stringlib_find(start, end-start,
from_s, from_len,
0);
if (offset == -1)
break;
next = start + offset;
@ -1808,9 +1712,9 @@ replace_substring_in_place(PyByteArrayObject *self,
self_s = PyByteArray_AS_STRING(self);
self_len = PyByteArray_GET_SIZE(self);
offset = findstring(self_s, self_len,
from_s, from_len,
0, self_len, FORWARD);
offset = stringlib_find(self_s, self_len,
from_s, from_len,
0);
if (offset == -1) {
/* No matches; return the original bytes */
return return_self(self);
@ -1830,9 +1734,9 @@ replace_substring_in_place(PyByteArrayObject *self,
end = result_s + self_len;
while ( --maxcount > 0) {
offset = findstring(start, end-start,
from_s, from_len,
0, end-start, FORWARD);
offset = stringlib_find(start, end-start,
from_s, from_len,
0);
if (offset==-1)
break;
Py_MEMCPY(start+offset, to_s, from_len);
@ -1925,9 +1829,10 @@ replace_substring(PyByteArrayObject *self,
self_s = PyByteArray_AS_STRING(self);
self_len = PyByteArray_GET_SIZE(self);
count = countstring(self_s, self_len,
from_s, from_len,
0, self_len, FORWARD, maxcount);
count = stringlib_count(self_s, self_len,
from_s, from_len,
maxcount);
if (count == 0) {
/* no matches, return unchanged */
return return_self(self);
@ -1954,9 +1859,9 @@ replace_substring(PyByteArrayObject *self,
start = self_s;
end = self_s + self_len;
while (count-- > 0) {
offset = findstring(start, end-start,
from_s, from_len,
0, end-start, FORWARD);
offset = stringlib_find(start, end-start,
from_s, from_len,
0);
if (offset == -1)
break;
next = start+offset;
@ -2085,123 +1990,6 @@ bytearray_replace(PyByteArrayObject *self, PyObject *args)
return res;
}
/* Overallocate the initial list to reduce the number of reallocs for small
split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
resizes, to sizes 4, 8, then 16. Most observed string splits are for human
text (roughly 11 words per line) and field delimited data (usually 1-10
fields). For large strings the split algorithms are bandwidth limited
so increasing the preallocation likely will not improve things.*/
#define MAX_PREALLOC 12
/* 5 splits gives 6 elements */
#define PREALLOC_SIZE(maxsplit) \
(maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
#define SPLIT_APPEND(data, left, right) \
str = PyByteArray_FromStringAndSize((data) + (left), \
(right) - (left)); \
if (str == NULL) \
goto onError; \
if (PyList_Append(list, str)) { \
Py_DECREF(str); \
goto onError; \
} \
else \
Py_DECREF(str);
#define SPLIT_ADD(data, left, right) { \
str = PyByteArray_FromStringAndSize((data) + (left), \
(right) - (left)); \
if (str == NULL) \
goto onError; \
if (count < MAX_PREALLOC) { \
PyList_SET_ITEM(list, count, str); \
} else { \
if (PyList_Append(list, str)) { \
Py_DECREF(str); \
goto onError; \
} \
else \
Py_DECREF(str); \
} \
count++; }
/* Always force the list to the expected size. */
#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Py_LOCAL_INLINE(PyObject *)
split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
{
register Py_ssize_t i, j, count = 0;
PyObject *str;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
i = j = 0;
while ((j < len) && (maxcount-- > 0)) {
for(; j < len; j++) {
/* I found that using memchr makes no difference */
if (s[j] == ch) {
SPLIT_ADD(s, i, j);
i = j = j + 1;
break;
}
}
}
if (i <= len) {
SPLIT_ADD(s, i, len);
}
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
{
register Py_ssize_t i, j, count = 0;
PyObject *str;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
for (i = j = 0; i < len; ) {
/* find a token */
while (i < len && Py_ISSPACE(s[i]))
i++;
j = i;
while (i < len && !Py_ISSPACE(s[i]))
i++;
if (j < i) {
if (maxcount-- <= 0)
break;
SPLIT_ADD(s, j, i);
while (i < len && Py_ISSPACE(s[i]))
i++;
j = i;
}
}
if (j < len) {
SPLIT_ADD(s, j, len);
}
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
PyDoc_STRVAR(split__doc__,
"B.split([sep[, maxsplit]]) -> list of bytearrays\n\
\n\
@ -2213,10 +2001,10 @@ If maxsplit is given, at most maxsplit splits are done.");
static PyObject *
bytearray_split(PyByteArrayObject *self, PyObject *args)
{
Py_ssize_t len = PyByteArray_GET_SIZE(self), n, i, j, pos;
Py_ssize_t maxsplit = -1, count = 0;
Py_ssize_t len = PyByteArray_GET_SIZE(self), n;
Py_ssize_t maxsplit = -1;
const char *s = PyByteArray_AS_STRING(self), *sub;
PyObject *list, *str, *subobj = Py_None;
PyObject *list, *subobj = Py_None;
Py_buffer vsub;
if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
@ -2225,73 +2013,18 @@ bytearray_split(PyByteArrayObject *self, PyObject *args)
maxsplit = PY_SSIZE_T_MAX;
if (subobj == Py_None)
return split_whitespace(s, len, maxsplit);
return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
if (_getbuffer(subobj, &vsub) < 0)
return NULL;
sub = vsub.buf;
n = vsub.len;
if (n == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
PyBuffer_Release(&vsub);
return NULL;
}
if (n == 1) {
list = split_char(s, len, sub[0], maxsplit);
PyBuffer_Release(&vsub);
return list;
}
list = PyList_New(PREALLOC_SIZE(maxsplit));
if (list == NULL) {
PyBuffer_Release(&vsub);
return NULL;
}
i = j = 0;
while (maxsplit-- > 0) {
pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
if (pos < 0)
break;
j = i+pos;
SPLIT_ADD(s, i, j);
i = j + n;
}
SPLIT_ADD(s, i, len);
FIX_PREALLOC_SIZE(list);
list = stringlib_split(
(PyObject*) self, s, len, sub, n, maxsplit
);
PyBuffer_Release(&vsub);
return list;
onError:
Py_DECREF(list);
PyBuffer_Release(&vsub);
return NULL;
}
/* stringlib's partition shares nullbytes in some cases.
undo this, we don't want the nullbytes to be shared. */
static PyObject *
make_nullbytes_unique(PyObject *result)
{
if (result != NULL) {
int i;
assert(PyTuple_Check(result));
assert(PyTuple_GET_SIZE(result) == 3);
for (i = 0; i < 3; i++) {
if (PyTuple_GET_ITEM(result, i) == (PyObject *)nullbytes) {
PyObject *new = PyByteArray_FromStringAndSize(NULL, 0);
if (new == NULL) {
Py_DECREF(result);
result = NULL;
break;
}
Py_DECREF(nullbytes);
PyTuple_SET_ITEM(result, i, new);
}
}
}
return result;
}
PyDoc_STRVAR(partition__doc__,
@ -2318,7 +2051,7 @@ bytearray_partition(PyByteArrayObject *self, PyObject *sep_obj)
);
Py_DECREF(bytesep);
return make_nullbytes_unique(result);
return result;
}
PyDoc_STRVAR(rpartition__doc__,
@ -2346,81 +2079,7 @@ bytearray_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
);
Py_DECREF(bytesep);
return make_nullbytes_unique(result);
}
Py_LOCAL_INLINE(PyObject *)
rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
{
register Py_ssize_t i, j, count=0;
PyObject *str;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
i = j = len - 1;
while ((i >= 0) && (maxcount-- > 0)) {
for (; i >= 0; i--) {
if (s[i] == ch) {
SPLIT_ADD(s, i + 1, j + 1);
j = i = i - 1;
break;
}
}
}
if (j >= -1) {
SPLIT_ADD(s, 0, j + 1);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount)
{
register Py_ssize_t i, j, count = 0;
PyObject *str;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
for (i = j = len - 1; i >= 0; ) {
/* find a token */
while (i >= 0 && Py_ISSPACE(s[i]))
i--;
j = i;
while (i >= 0 && !Py_ISSPACE(s[i]))
i--;
if (j > i) {
if (maxcount-- <= 0)
break;
SPLIT_ADD(s, i + 1, j + 1);
while (i >= 0 && Py_ISSPACE(s[i]))
i--;
j = i;
}
}
if (j >= 0) {
SPLIT_ADD(s, 0, j + 1);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
return result;
}
PyDoc_STRVAR(rsplit__doc__,
@ -2435,10 +2094,10 @@ If maxsplit is given, at most maxsplit splits are done.");
static PyObject *
bytearray_rsplit(PyByteArrayObject *self, PyObject *args)
{
Py_ssize_t len = PyByteArray_GET_SIZE(self), n, j, pos;
Py_ssize_t maxsplit = -1, count = 0;
Py_ssize_t len = PyByteArray_GET_SIZE(self), n;
Py_ssize_t maxsplit = -1;
const char *s = PyByteArray_AS_STRING(self), *sub;
PyObject *list, *str, *subobj = Py_None;
PyObject *list, *subobj = Py_None;
Py_buffer vsub;
if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
@ -2447,50 +2106,18 @@ bytearray_rsplit(PyByteArrayObject *self, PyObject *args)
maxsplit = PY_SSIZE_T_MAX;
if (subobj == Py_None)
return rsplit_whitespace(s, len, maxsplit);
return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
if (_getbuffer(subobj, &vsub) < 0)
return NULL;
sub = vsub.buf;
n = vsub.len;
if (n == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
PyBuffer_Release(&vsub);
return NULL;
}
else if (n == 1) {
list = rsplit_char(s, len, sub[0], maxsplit);
PyBuffer_Release(&vsub);
return list;
}
list = PyList_New(PREALLOC_SIZE(maxsplit));
if (list == NULL) {
PyBuffer_Release(&vsub);
return NULL;
}
j = len;
while (maxsplit-- > 0) {
pos = fastsearch(s, j, sub, n, FAST_RSEARCH);
if (pos < 0)
break;
SPLIT_ADD(s, pos + n, j);
j = pos;
}
SPLIT_ADD(s, 0, j);
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
list = stringlib_rsplit(
(PyObject*) self, s, len, sub, n, maxsplit
);
PyBuffer_Release(&vsub);
return list;
onError:
Py_DECREF(list);
PyBuffer_Release(&vsub);
return NULL;
}
PyDoc_STRVAR(reverse__doc__,
@ -2956,6 +2583,27 @@ bytearray_join(PyByteArrayObject *self, PyObject *it)
return NULL;
}
PyDoc_STRVAR(splitlines__doc__,
"B.splitlines([keepends]) -> list of lines\n\
\n\
Return a list of the lines in B, breaking at line boundaries.\n\
Line breaks are not included in the resulting list unless keepends\n\
is given and true.");
static PyObject*
bytearray_splitlines(PyObject *self, PyObject *args)
{
int keepends = 0;
if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
return NULL;
return stringlib_splitlines(
(PyObject*) self, PyByteArray_AS_STRING(self),
PyByteArray_GET_SIZE(self), keepends
);
}
PyDoc_STRVAR(fromhex_doc,
"bytearray.fromhex(string) -> bytearray (static method)\n\
\n\
@ -3134,7 +2782,7 @@ bytearray_methods[] = {
{"rsplit", (PyCFunction)bytearray_rsplit, METH_VARARGS, rsplit__doc__},
{"rstrip", (PyCFunction)bytearray_rstrip, METH_VARARGS, rstrip__doc__},
{"split", (PyCFunction)bytearray_split, METH_VARARGS, split__doc__},
{"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
{"splitlines", (PyCFunction)bytearray_splitlines, METH_VARARGS,
splitlines__doc__},
{"startswith", (PyCFunction)bytearray_startswith, METH_VARARGS ,
startswith__doc__},

View File

@ -56,7 +56,7 @@ static PyBytesObject *nullstring;
If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
bytes (setting the last byte to the null terminating character) and you can
fill in the data yourself. If `str' is non-NULL then the resulting
PyString object must be treated as immutable and you must not fill in nor
PyBytes object must be treated as immutable and you must not fill in nor
alter the data yourself, since the strings may be shared.
The PyObject member `op->ob_size', which denotes the number of "extra
@ -568,9 +568,9 @@ PyBytes_AsStringAndSize(register PyObject *obj,
#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/partition.h"
#include "stringlib/split.h"
#include "stringlib/ctype.h"
#define STRINGLIB_MUTABLE 0
#include "stringlib/transmogrify.h"
PyObject *
@ -1000,133 +1000,6 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
#define STRIPNAME(i) (stripformat[i]+3)
/* Don't call if length < 2 */
#define Py_STRING_MATCH(target, offset, pattern, length) \
(target[offset] == pattern[0] && \
target[offset+length-1] == pattern[length-1] && \
!memcmp(target+offset+1, pattern+1, length-2) )
/* Overallocate the initial list to reduce the number of reallocs for small
split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
resizes, to sizes 4, 8, then 16. Most observed string splits are for human
text (roughly 11 words per line) and field delimited data (usually 1-10
fields). For large strings the split algorithms are bandwidth limited
so increasing the preallocation likely will not improve things.*/
#define MAX_PREALLOC 12
/* 5 splits gives 6 elements */
#define PREALLOC_SIZE(maxsplit) \
(maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
#define SPLIT_ADD(data, left, right) { \
str = PyBytes_FromStringAndSize((data) + (left), \
(right) - (left)); \
if (str == NULL) \
goto onError; \
if (count < MAX_PREALLOC) { \
PyList_SET_ITEM(list, count, str); \
} else { \
if (PyList_Append(list, str)) { \
Py_DECREF(str); \
goto onError; \
} \
else \
Py_DECREF(str); \
} \
count++; }
/* Always force the list to the expected size. */
#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
#define SKIP_SPACE(s, i, len) { while (i<len && ISSPACE(s[i])) i++; }
#define SKIP_NONSPACE(s, i, len) { while (i<len && !ISSPACE(s[i])) i++; }
#define RSKIP_SPACE(s, i) { while (i>=0 && ISSPACE(s[i])) i--; }
#define RSKIP_NONSPACE(s, i) { while (i>=0 && !ISSPACE(s[i])) i--; }
Py_LOCAL_INLINE(PyObject *)
split_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
{
const char *s = PyBytes_AS_STRING(self);
Py_ssize_t i, j, count=0;
PyObject *str;
PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
if (list == NULL)
return NULL;
i = j = 0;
while (maxsplit-- > 0) {
SKIP_SPACE(s, i, len);
if (i==len) break;
j = i; i++;
SKIP_NONSPACE(s, i, len);
if (j == 0 && i == len && PyBytes_CheckExact(self)) {
/* No whitespace in self, so just use it as list[0] */
Py_INCREF(self);
PyList_SET_ITEM(list, 0, (PyObject *)self);
count++;
break;
}
SPLIT_ADD(s, j, i);
}
if (i < len) {
/* Only occurs when maxsplit was reached */
/* Skip any remaining whitespace and copy to end of string */
SKIP_SPACE(s, i, len);
if (i != len)
SPLIT_ADD(s, i, len);
}
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
split_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
{
const char *s = PyBytes_AS_STRING(self);
register Py_ssize_t i, j, count=0;
PyObject *str;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
i = j = 0;
while ((j < len) && (maxcount-- > 0)) {
for(; j<len; j++) {
/* I found that using memchr makes no difference */
if (s[j] == ch) {
SPLIT_ADD(s, i, j);
i = j = j + 1;
break;
}
}
}
if (i == 0 && count == 0 && PyBytes_CheckExact(self)) {
/* ch not in self, so just use self as list[0] */
Py_INCREF(self);
PyList_SET_ITEM(list, 0, (PyObject *)self);
count++;
}
else if (i <= len) {
SPLIT_ADD(s, i, len);
}
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
PyDoc_STRVAR(split__doc__,
"B.split([sep[, maxsplit]]) -> list of bytes\n\
\n\
@ -1138,74 +1011,26 @@ If maxsplit is given, at most maxsplit splits are done.");
static PyObject *
bytes_split(PyBytesObject *self, PyObject *args)
{
Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
Py_ssize_t maxsplit = -1, count=0;
Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Py_ssize_t maxsplit = -1;
const char *s = PyBytes_AS_STRING(self), *sub;
Py_buffer vsub;
PyObject *list, *str, *subobj = Py_None;
#ifdef USE_FAST
Py_ssize_t pos;
#endif
PyObject *list, *subobj = Py_None;
if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
return NULL;
if (maxsplit < 0)
maxsplit = PY_SSIZE_T_MAX;
if (subobj == Py_None)
return split_whitespace(self, len, maxsplit);
return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
if (_getbuffer(subobj, &vsub) < 0)
return NULL;
sub = vsub.buf;
n = vsub.len;
if (n == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
PyBuffer_Release(&vsub);
return NULL;
}
else if (n == 1) {
list = split_char(self, len, sub[0], maxsplit);
PyBuffer_Release(&vsub);
return list;
}
list = PyList_New(PREALLOC_SIZE(maxsplit));
if (list == NULL) {
PyBuffer_Release(&vsub);
return NULL;
}
#ifdef USE_FAST
i = j = 0;
while (maxsplit-- > 0) {
pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
if (pos < 0)
break;
j = i+pos;
SPLIT_ADD(s, i, j);
i = j + n;
}
#else
i = j = 0;
while ((j+n <= len) && (maxsplit-- > 0)) {
for (; j+n <= len; j++) {
if (Py_STRING_MATCH(s, j, sub, n)) {
SPLIT_ADD(s, i, j);
i = j = j + n;
break;
}
}
}
#endif
SPLIT_ADD(s, i, len);
FIX_PREALLOC_SIZE(list);
list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
PyBuffer_Release(&vsub);
return list;
onError:
Py_DECREF(list);
PyBuffer_Release(&vsub);
return NULL;
}
PyDoc_STRVAR(partition__doc__,
@ -1263,90 +1088,6 @@ bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
);
}
Py_LOCAL_INLINE(PyObject *)
rsplit_whitespace(PyBytesObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
{
const char *s = PyBytes_AS_STRING(self);
Py_ssize_t i, j, count=0;
PyObject *str;
PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
if (list == NULL)
return NULL;
i = j = len-1;
while (maxsplit-- > 0) {
RSKIP_SPACE(s, i);
if (i<0) break;
j = i; i--;
RSKIP_NONSPACE(s, i);
if (j == len-1 && i < 0 && PyBytes_CheckExact(self)) {
/* No whitespace in self, so just use it as list[0] */
Py_INCREF(self);
PyList_SET_ITEM(list, 0, (PyObject *)self);
count++;
break;
}
SPLIT_ADD(s, i + 1, j + 1);
}
if (i >= 0) {
/* Only occurs when maxsplit was reached. Skip any remaining
whitespace and copy to beginning of string. */
RSKIP_SPACE(s, i);
if (i >= 0)
SPLIT_ADD(s, 0, i + 1);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
rsplit_char(PyBytesObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
{
const char *s = PyBytes_AS_STRING(self);
register Py_ssize_t i, j, count=0;
PyObject *str;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
i = j = len - 1;
while ((i >= 0) && (maxcount-- > 0)) {
for (; i >= 0; i--) {
if (s[i] == ch) {
SPLIT_ADD(s, i + 1, j + 1);
j = i = i - 1;
break;
}
}
}
if (i < 0 && count == 0 && PyBytes_CheckExact(self)) {
/* ch not in self, so just use self as list[0] */
Py_INCREF(self);
PyList_SET_ITEM(list, 0, (PyObject *)self);
count++;
}
else if (j >= -1) {
SPLIT_ADD(s, 0, j + 1);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
PyDoc_STRVAR(rsplit__doc__,
"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
\n\
@ -1360,71 +1101,28 @@ If maxsplit is given, at most maxsplit splits are done.");
static PyObject *
bytes_rsplit(PyBytesObject *self, PyObject *args)
{
Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j;
Py_ssize_t maxsplit = -1, count=0;
const char *s, *sub;
Py_ssize_t len = PyBytes_GET_SIZE(self), n;
Py_ssize_t maxsplit = -1;
const char *s = PyBytes_AS_STRING(self), *sub;
Py_buffer vsub;
PyObject *list, *str, *subobj = Py_None;
PyObject *list, *subobj = Py_None;
if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
return NULL;
if (maxsplit < 0)
maxsplit = PY_SSIZE_T_MAX;
if (subobj == Py_None)
return rsplit_whitespace(self, len, maxsplit);
return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
if (_getbuffer(subobj, &vsub) < 0)
return NULL;
sub = vsub.buf;
n = vsub.len;
if (n == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
PyBuffer_Release(&vsub);
return NULL;
}
else if (n == 1) {
list = rsplit_char(self, len, sub[0], maxsplit);
PyBuffer_Release(&vsub);
return list;
}
list = PyList_New(PREALLOC_SIZE(maxsplit));
if (list == NULL) {
PyBuffer_Release(&vsub);
return NULL;
}
j = len;
i = j - n;
s = PyBytes_AS_STRING(self);
while ( (i >= 0) && (maxsplit-- > 0) ) {
for (; i>=0; i--) {
if (Py_STRING_MATCH(s, i, sub, n)) {
SPLIT_ADD(s, i + n, j);
j = i;
i -= n;
break;
}
}
}
SPLIT_ADD(s, 0, j);
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
PyBuffer_Release(&vsub);
return list;
onError:
Py_DECREF(list);
PyBuffer_Release(&vsub);
return NULL;
}
#undef SPLIT_ADD
#undef MAX_PREALLOC
#undef PREALLOC_SIZE
PyDoc_STRVAR(join__doc__,
"B.join(iterable_of_bytes) -> bytes\n\
@ -1531,20 +1229,20 @@ _PyBytes_Join(PyObject *sep, PyObject *x)
return bytes_join(sep, x);
}
Py_LOCAL_INLINE(void)
bytes_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
{
if (*end > len)
*end = len;
else if (*end < 0)
*end += len;
if (*end < 0)
*end = 0;
if (*start < 0)
*start += len;
if (*start < 0)
*start = 0;
}
/* helper macro to fixup start/end slice values */
#define ADJUST_INDICES(start, end, len) \
if (end > len) \
end = len; \
else if (end < 0) { \
end += len; \
if (end < 0) \
end = 0; \
} \
if (start < 0) { \
start += len; \
if (start < 0) \
start = 0; \
}
Py_LOCAL_INLINE(Py_ssize_t)
bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
@ -1591,7 +1289,7 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
PyDoc_STRVAR(find__doc__,
"B.find(sub[, start[, end]]) -> int\n\
\n\
Return the lowest index in S where substring sub is found,\n\
Return the lowest index in B where substring sub is found,\n\
such that sub is contained within s[start:end]. Optional\n\
arguments start and end are interpreted as in slice notation.\n\
\n\
@ -1801,7 +1499,7 @@ PyDoc_STRVAR(count__doc__,
"B.count(sub[, start[, end]]) -> int\n\
\n\
Return the number of non-overlapping occurrences of substring sub in\n\
string S[start:end]. Optional arguments start and end are interpreted\n\
string B[start:end]. Optional arguments start and end are interpreted\n\
as in slice notation.");
static PyObject *
@ -1823,10 +1521,10 @@ bytes_count(PyBytesObject *self, PyObject *args)
else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
return NULL;
bytes_adjust_indices(&start, &end, PyBytes_GET_SIZE(self));
ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
return PyLong_FromSsize_t(
stringlib_count(str + start, end - start, sub, sub_len)
stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
);
}
@ -1943,9 +1641,6 @@ bytes_maketrans(PyObject *null, PyObject *args)
return _Py_bytes_maketrans(args);
}
#define FORWARD 1
#define REVERSE -1
/* find and count characters and substrings */
#define findchar(target, target_len, c) \
@ -1981,94 +1676,6 @@ countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
return count;
}
Py_LOCAL(Py_ssize_t)
findstring(const char *target, Py_ssize_t target_len,
const char *pattern, Py_ssize_t pattern_len,
Py_ssize_t start,
Py_ssize_t end,
int direction)
{
if (start < 0) {
start += target_len;
if (start < 0)
start = 0;
}
if (end > target_len) {
end = target_len;
} else if (end < 0) {
end += target_len;
if (end < 0)
end = 0;
}
/* zero-length substrings always match at the first attempt */
if (pattern_len == 0)
return (direction > 0) ? start : end;
end -= pattern_len;
if (direction < 0) {
for (; end >= start; end--)
if (Py_STRING_MATCH(target, end, pattern, pattern_len))
return end;
} else {
for (; start <= end; start++)
if (Py_STRING_MATCH(target, start,pattern,pattern_len))
return start;
}
return -1;
}
Py_LOCAL_INLINE(Py_ssize_t)
countstring(const char *target, Py_ssize_t target_len,
const char *pattern, Py_ssize_t pattern_len,
Py_ssize_t start,
Py_ssize_t end,
int direction, Py_ssize_t maxcount)
{
Py_ssize_t count=0;
if (start < 0) {
start += target_len;
if (start < 0)
start = 0;
}
if (end > target_len) {
end = target_len;
} else if (end < 0) {
end += target_len;
if (end < 0)
end = 0;
}
/* zero-length substrings match everywhere */
if (pattern_len == 0 || maxcount == 0) {
if (target_len+1 < maxcount)
return target_len+1;
return maxcount;
}
end -= pattern_len;
if (direction < 0) {
for (; (end >= start); end--)
if (Py_STRING_MATCH(target, end,pattern,pattern_len)) {
count++;
if (--maxcount <= 0) break;
end -= pattern_len-1;
}
} else {
for (; (start <= end); start++)
if (Py_STRING_MATCH(target, start,
pattern, pattern_len)) {
count++;
if (--maxcount <= 0)
break;
start += pattern_len-1;
}
}
return count;
}
/* Algorithms for different cases of string replacement */
@ -2189,10 +1796,9 @@ replace_delete_substring(PyBytesObject *self,
self_len = PyBytes_GET_SIZE(self);
self_s = PyBytes_AS_STRING(self);
count = countstring(self_s, self_len,
from_s, from_len,
0, self_len, 1,
maxcount);
count = stringlib_count(self_s, self_len,
from_s, from_len,
maxcount);
if (count == 0) {
/* no matches */
@ -2211,9 +1817,9 @@ replace_delete_substring(PyBytesObject *self,
start = self_s;
end = self_s + self_len;
while (count-- > 0) {
offset = findstring(start, end-start,
from_s, from_len,
0, end-start, FORWARD);
offset = stringlib_find(start, end-start,
from_s, from_len,
0);
if (offset == -1)
break;
next = start + offset;
@ -2289,9 +1895,9 @@ replace_substring_in_place(PyBytesObject *self,
self_s = PyBytes_AS_STRING(self);
self_len = PyBytes_GET_SIZE(self);
offset = findstring(self_s, self_len,
from_s, from_len,
0, self_len, FORWARD);
offset = stringlib_find(self_s, self_len,
from_s, from_len,
0);
if (offset == -1) {
/* No matches; return the original string */
return return_self(self);
@ -2311,9 +1917,9 @@ replace_substring_in_place(PyBytesObject *self,
end = result_s + self_len;
while ( --maxcount > 0) {
offset = findstring(start, end-start,
from_s, from_len,
0, end-start, FORWARD);
offset = stringlib_find(start, end-start,
from_s, from_len,
0);
if (offset==-1)
break;
Py_MEMCPY(start+offset, to_s, from_len);
@ -2407,9 +2013,10 @@ replace_substring(PyBytesObject *self,
self_s = PyBytes_AS_STRING(self);
self_len = PyBytes_GET_SIZE(self);
count = countstring(self_s, self_len,
from_s, from_len,
0, self_len, FORWARD, maxcount);
count = stringlib_count(self_s, self_len,
from_s, from_len,
maxcount);
if (count == 0) {
/* no matches, return unchanged */
return return_self(self);
@ -2438,9 +2045,9 @@ replace_substring(PyBytesObject *self,
start = self_s;
end = self_s + self_len;
while (count-- > 0) {
offset = findstring(start, end-start,
from_s, from_len,
0, end-start, FORWARD);
offset = stringlib_find(start, end-start,
from_s, from_len,
0);
if (offset == -1)
break;
next = start+offset;
@ -2598,7 +2205,7 @@ _bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
return -1;
str = PyBytes_AS_STRING(self);
bytes_adjust_indices(&start, &end, len);
ADJUST_INDICES(start, end, len);
if (direction < 0) {
/* startswith */
@ -2703,7 +2310,7 @@ bytes_endswith(PyBytesObject *self, PyObject *args)
PyDoc_STRVAR(decode__doc__,
"B.decode([encoding[, errors]]) -> str\n\
\n\
Decode S using the codec registered for encoding. encoding defaults\n\
Decode B using the codec registered for encoding. encoding defaults\n\
to the default encoding. errors may be given to set a different error\n\
handling scheme. Default is 'strict' meaning that encoding errors raise\n\
a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
@ -2725,6 +2332,28 @@ bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
}
PyDoc_STRVAR(splitlines__doc__,
"B.splitlines([keepends]) -> list of lines\n\
\n\
Return a list of the lines in B, breaking at line boundaries.\n\
Line breaks are not included in the resulting list unless keepends\n\
is given and true.");
static PyObject*
bytes_splitlines(PyObject *self, PyObject *args)
{
int keepends = 0;
if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
return NULL;
return stringlib_splitlines(
(PyObject*) self, PyBytes_AS_STRING(self),
PyBytes_GET_SIZE(self), keepends
);
}
PyDoc_STRVAR(fromhex_doc,
"bytes.fromhex(string) -> bytes\n\
\n\
@ -2857,7 +2486,7 @@ bytes_methods[] = {
{"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
{"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
{"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
{"splitlines", (PyCFunction)stringlib_splitlines, METH_VARARGS,
{"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS,
splitlines__doc__},
{"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
startswith__doc__},
@ -3239,7 +2868,7 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
* the F_ALT flag, for Python's long (unbounded) ints. It's not used for
* Python's regular ints.
* Return value: a new PyString*, or NULL if error.
* Return value: a new PyBytes*, or NULL if error.
* . *pbuf is set to point into it,
* *plen set to the # of chars following that.
* Caller must decref it when done using pbuf.

View File

@ -9,28 +9,22 @@
Py_LOCAL_INLINE(Py_ssize_t)
stringlib_count(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len)
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t maxcount)
{
Py_ssize_t count;
if (str_len < 0)
return 0; /* start > len(str) */
if (sub_len == 0)
return str_len + 1;
return (str_len < maxcount) ? str_len + 1 : maxcount;
count = fastsearch(str, str_len, sub, sub_len, FAST_COUNT);
count = fastsearch(str, str_len, sub, sub_len, maxcount, FAST_COUNT);
if (count < 0)
count = 0; /* no match */
return 0; /* no match */
return count;
}
#endif
/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil
End:
*/

View File

@ -107,4 +107,3 @@ stringlib_swapcase(PyObject *self)
STRINGLIB_LEN(self));
return newobj;
}

View File

@ -18,10 +18,13 @@
#define FAST_SEARCH 1
#define FAST_RSEARCH 2
#define BLOOM_ADD(mask, ch) ((mask |= (1 << ((ch) & (LONG_BIT - 1)))))
#define BLOOM(mask, ch) ((mask & (1 << ((ch) & (LONG_BIT - 1)))))
Py_LOCAL_INLINE(Py_ssize_t)
fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
const STRINGLIB_CHAR* p, Py_ssize_t m,
int mode)
Py_ssize_t maxcount, int mode)
{
long mask;
Py_ssize_t skip, count = 0;
@ -29,7 +32,7 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
w = n - m;
if (w < 0)
if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
return -1;
/* look for special cases */
@ -39,8 +42,11 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
/* use special case for 1-character strings */
if (mode == FAST_COUNT) {
for (i = 0; i < n; i++)
if (s[i] == p[0])
if (s[i] == p[0]) {
count++;
if (count == maxcount)
return maxcount;
}
return count;
} else if (mode == FAST_SEARCH) {
for (i = 0; i < n; i++)
@ -56,19 +62,20 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
mlast = m - 1;
skip = mlast - 1;
mask = 0;
if (mode != FAST_RSEARCH) {
/* create compressed boyer-moore delta 1 table */
/* process pattern[:-1] */
for (mask = i = 0; i < mlast; i++) {
mask |= (1 << (p[i] & 0x1F));
for (i = 0; i < mlast; i++) {
BLOOM_ADD(mask, p[i]);
if (p[i] == p[mlast])
skip = mlast - i - 1;
}
/* process pattern[-1] outside the loop */
mask |= (1 << (p[mlast] & 0x1F));
BLOOM_ADD(mask, p[mlast]);
for (i = 0; i <= w; i++) {
/* note: using mlast in the skip path slows things down on x86 */
@ -82,17 +89,19 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
if (mode != FAST_COUNT)
return i;
count++;
if (count == maxcount)
return maxcount;
i = i + mlast;
continue;
}
/* miss: check if next character is part of pattern */
if (!(mask & (1 << (s[i+m] & 0x1F))))
if (!BLOOM(mask, s[i+m]))
i = i + m;
else
i = i + skip;
} else {
/* skip: check if next character is part of pattern */
if (!(mask & (1 << (s[i+m] & 0x1F))))
if (!BLOOM(mask, s[i+m]))
i = i + m;
}
}
@ -101,10 +110,10 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
/* create compressed boyer-moore delta 1 table */
/* process pattern[0] outside the loop */
mask = (1 << (p[0] & 0x1F));
BLOOM_ADD(mask, p[0]);
/* process pattern[:0:-1] */
for (i = mlast; i > 0; i--) {
mask |= (1 << (p[i] & 0x1F));
BLOOM_ADD(mask, p[i]);
if (p[i] == p[0])
skip = i - 1;
}
@ -119,13 +128,13 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
/* got a match! */
return i;
/* miss: check if previous character is part of pattern */
if (!(mask & (1 << (s[i-1] & 0x1F))))
if (!BLOOM(mask, s[i-1]))
i = i - m;
else
i = i - skip;
} else {
/* skip: check if previous character is part of pattern */
if (!(mask & (1 << (s[i-1] & 0x1F))))
if (!BLOOM(mask, s[i-1]))
i = i - m;
}
}
@ -137,10 +146,3 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
}
#endif
/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil
End:
*/

View File

@ -19,7 +19,7 @@ stringlib_find(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
if (sub_len == 0)
return offset;
pos = fastsearch(str, str_len, sub, sub_len, FAST_SEARCH);
pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_SEARCH);
if (pos >= 0)
pos += offset;
@ -39,7 +39,7 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
if (sub_len == 0)
return str_len + offset;
pos = fastsearch(str, str_len, sub, sub_len, FAST_RSEARCH);
pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_RSEARCH);
if (pos >= 0)
pos += offset;
@ -47,22 +47,27 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
return pos;
}
/* helper macro to fixup start/end slice values */
#define ADJUST_INDICES(start, end, len) \
if (end > len) \
end = len; \
else if (end < 0) { \
end += len; \
if (end < 0) \
end = 0; \
} \
if (start < 0) { \
start += len; \
if (start < 0) \
start = 0; \
}
Py_LOCAL_INLINE(Py_ssize_t)
stringlib_find_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end)
{
if (start < 0)
start += str_len;
if (start < 0)
start = 0;
if (end > str_len)
end = str_len;
if (end < 0)
end += str_len;
if (end < 0)
end = 0;
ADJUST_INDICES(start, end, str_len);
return stringlib_find(str + start, end - start, sub, sub_len, start);
}
@ -71,17 +76,7 @@ stringlib_rfind_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
Py_ssize_t start, Py_ssize_t end)
{
if (start < 0)
start += str_len;
if (start < 0)
start = 0;
if (end > str_len)
end = str_len;
if (end < 0)
end += str_len;
if (end < 0)
end = 0;
ADJUST_INDICES(start, end, str_len);
return stringlib_rfind(str + start, end - start, sub, sub_len, start);
}
@ -96,9 +91,9 @@ stringlib_contains_obj(PyObject* str, PyObject* sub)
) != -1;
}
#endif /* STRINGLIB_STR */
#endif /* STRINGLIB_WANT_CONTAINS_OBJ */
#ifdef FROM_UNICODE
#if STRINGLIB_IS_UNICODE
/*
This function is a helper for the "find" family (find, rfind, index,
@ -146,13 +141,6 @@ _ParseTupleFinds (PyObject *args, PyObject **substring,
return 1;
}
#endif /* FROM_UNICODE */
#endif /* STRINGLIB_IS_UNICODE */
#endif /* STRINGLIB_FIND_H */
/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil
End:
*/

View File

@ -8,33 +8,39 @@
#endif
Py_LOCAL_INLINE(PyObject*)
stringlib_partition(
PyObject* str_obj, const STRINGLIB_CHAR* str, Py_ssize_t str_len,
PyObject* sep_obj, const STRINGLIB_CHAR* sep, Py_ssize_t sep_len
)
stringlib_partition(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
PyObject* sep_obj,
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
{
PyObject* out;
Py_ssize_t pos;
if (sep_len == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL;
return NULL;
}
out = PyTuple_New(3);
if (!out)
return NULL;
return NULL;
pos = fastsearch(str, str_len, sep, sep_len, FAST_SEARCH);
pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_SEARCH);
if (pos < 0) {
Py_INCREF(str_obj);
PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY);
return out;
#if STRINGLIB_MUTABLE
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, str_len));
PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(NULL, 0));
#else
Py_INCREF(str_obj);
PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY);
#endif
return out;
}
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos));
@ -44,41 +50,47 @@ stringlib_partition(
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos));
if (PyErr_Occurred()) {
Py_DECREF(out);
return NULL;
Py_DECREF(out);
return NULL;
}
return out;
}
Py_LOCAL_INLINE(PyObject*)
stringlib_rpartition(
PyObject* str_obj, const STRINGLIB_CHAR* str, Py_ssize_t str_len,
PyObject* sep_obj, const STRINGLIB_CHAR* sep, Py_ssize_t sep_len
)
stringlib_rpartition(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
PyObject* sep_obj,
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
{
PyObject* out;
Py_ssize_t pos;
if (sep_len == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL;
return NULL;
}
out = PyTuple_New(3);
if (!out)
return NULL;
return NULL;
pos = fastsearch(str, str_len, sep, sep_len, FAST_RSEARCH);
pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_RSEARCH);
if (pos < 0) {
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(str_obj);
PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj);
return out;
#if STRINGLIB_MUTABLE
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(NULL, 0));
PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str, str_len));
#else
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(str_obj);
PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj);
#endif
return out;
}
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos));
@ -88,18 +100,11 @@ stringlib_rpartition(
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos));
if (PyErr_Occurred()) {
Py_DECREF(out);
return NULL;
Py_DECREF(out);
return NULL;
}
return out;
}
#endif
/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil
End:
*/

788
Objects/stringlib/split.h Normal file
View File

@ -0,0 +1,788 @@
/* stringlib: split implementation */
#ifndef STRINGLIB_SPLIT_H
#define STRINGLIB_SPLIT_H
#ifndef STRINGLIB_FASTSEARCH_H
#error must include "stringlib/fastsearch.h" before including this module
#endif
/* Overallocate the initial list to reduce the number of reallocs for small
split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
resizes, to sizes 4, 8, then 16. Most observed string splits are for human
text (roughly 11 words per line) and field delimited data (usually 1-10
fields). For large strings the split algorithms are bandwidth limited
so increasing the preallocation likely will not improve things.*/
#define MAX_PREALLOC 12
/* 5 splits gives 6 elements */
#define PREALLOC_SIZE(maxsplit) \
(maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
#define SPLIT_APPEND(data, left, right) \
sub = STRINGLIB_NEW((data) + (left), \
(right) - (left)); \
if (sub == NULL) \
goto onError; \
if (PyList_Append(list, sub)) { \
Py_DECREF(sub); \
goto onError; \
} \
else \
Py_DECREF(sub);
#define SPLIT_ADD(data, left, right) { \
sub = STRINGLIB_NEW((data) + (left), \
(right) - (left)); \
if (sub == NULL) \
goto onError; \
if (count < MAX_PREALLOC) { \
PyList_SET_ITEM(list, count, sub); \
} else { \
if (PyList_Append(list, sub)) { \
Py_DECREF(sub); \
goto onError; \
} \
else \
Py_DECREF(sub); \
} \
count++; }
/* Always force the list to the expected size. */
#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Py_LOCAL_INLINE(PyObject *)
stringlib_split_whitespace(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
Py_ssize_t maxcount)
{
Py_ssize_t i, j, count=0;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
PyObject *sub;
if (list == NULL)
return NULL;
i = j = 0;
while (maxcount-- > 0) {
while (i < str_len && STRINGLIB_ISSPACE(str[i]))
i++;
if (i == str_len) break;
j = i; i++;
while (i < str_len && !STRINGLIB_ISSPACE(str[i]))
i++;
#ifndef STRINGLIB_MUTABLE
if (j == 0 && i == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
/* No whitespace in str_obj, so just use it as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
break;
}
#endif
SPLIT_ADD(str, j, i);
}
if (i < str_len) {
/* Only occurs when maxcount was reached */
/* Skip any remaining whitespace and copy to end of string */
while (i < str_len && STRINGLIB_ISSPACE(str[i]))
i++;
if (i != str_len)
SPLIT_ADD(str, i, str_len);
}
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_split_char(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR ch,
Py_ssize_t maxcount)
{
Py_ssize_t i, j, count=0;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
PyObject *sub;
if (list == NULL)
return NULL;
i = j = 0;
while ((j < str_len) && (maxcount-- > 0)) {
for(; j < str_len; j++) {
/* I found that using memchr makes no difference */
if (str[j] == ch) {
SPLIT_ADD(str, i, j);
i = j = j + 1;
break;
}
}
}
#ifndef STRINGLIB_MUTABLE
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
/* ch not in str_obj, so just use str_obj as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
} else
#endif
if (i <= str_len) {
SPLIT_ADD(str, i, str_len);
}
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_split(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
Py_ssize_t maxcount)
{
Py_ssize_t i, j, pos, count=0;
PyObject *list, *sub;
if (sep_len == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL;
}
else if (sep_len == 1)
return stringlib_split_char(str_obj, str, str_len, sep[0], maxcount);
list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
i = j = 0;
while (maxcount-- > 0) {
pos = fastsearch(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH);
if (pos < 0)
break;
j = i + pos;
SPLIT_ADD(str, i, j);
i = j + sep_len;
}
#ifndef STRINGLIB_MUTABLE
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
/* No match in str_obj, so just use it as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
} else
#endif
{
SPLIT_ADD(str, i, str_len);
}
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_rsplit_whitespace(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
Py_ssize_t maxcount)
{
Py_ssize_t i, j, count=0;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
PyObject *sub;
if (list == NULL)
return NULL;
i = j = str_len - 1;
while (maxcount-- > 0) {
while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
i--;
if (i < 0) break;
j = i; i--;
while (i >= 0 && !STRINGLIB_ISSPACE(str[i]))
i--;
#ifndef STRINGLIB_MUTABLE
if (j == str_len - 1 && i < 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
/* No whitespace in str_obj, so just use it as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
break;
}
#endif
SPLIT_ADD(str, i + 1, j + 1);
}
if (i >= 0) {
/* Only occurs when maxcount was reached */
/* Skip any remaining whitespace and copy to beginning of string */
while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
i--;
if (i >= 0)
SPLIT_ADD(str, 0, i + 1);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_rsplit_char(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR ch,
Py_ssize_t maxcount)
{
Py_ssize_t i, j, count=0;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
PyObject *sub;
if (list == NULL)
return NULL;
i = j = str_len - 1;
while ((i >= 0) && (maxcount-- > 0)) {
for(; i >= 0; i--) {
if (str[i] == ch) {
SPLIT_ADD(str, i + 1, j + 1);
j = i = i - 1;
break;
}
}
}
#ifndef STRINGLIB_MUTABLE
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
/* ch not in str_obj, so just use str_obj as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
} else
#endif
if (j >= -1) {
SPLIT_ADD(str, 0, j + 1);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_rsplit(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
Py_ssize_t maxcount)
{
Py_ssize_t j, pos, count=0;
PyObject *list, *sub;
if (sep_len == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL;
}
else if (sep_len == 1)
return stringlib_rsplit_char(str_obj, str, str_len, sep[0], maxcount);
list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
j = str_len;
while (maxcount-- > 0) {
pos = fastsearch(str, j, sep, sep_len, -1, FAST_RSEARCH);
if (pos < 0)
break;
SPLIT_ADD(str, pos + sep_len, j);
j = pos;
}
#ifndef STRINGLIB_MUTABLE
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
/* No match in str_obj, so just use it as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
} else
#endif
{
SPLIT_ADD(str, 0, j);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_splitlines(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
int keepends)
{
/* This does not use the preallocated list because splitlines is
usually run with hundreds of newlines. The overhead of
switching between PyList_SET_ITEM and append causes about a
2-3% slowdown for that common case. A smarter implementation
could move the if check out, so the SET_ITEMs are done first
and the appends only done when the prealloc buffer is full.
That's too much work for little gain.*/
register Py_ssize_t i;
register Py_ssize_t j;
PyObject *list = PyList_New(0);
PyObject *sub;
if (list == NULL)
return NULL;
for (i = j = 0; i < str_len; ) {
Py_ssize_t eol;
/* Find a line and append it */
while (i < str_len && !STRINGLIB_ISLINEBREAK(str[i]))
i++;
/* Skip the line break reading CRLF as one line break */
eol = i;
if (i < str_len) {
if (str[i] == '\r' && i + 1 < str_len && str[i+1] == '\n')
i += 2;
else
i++;
if (keepends)
eol = i;
}
#ifndef STRINGLIB_MUTABLE
if (j == 0 && eol == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
/* No linebreak in str_obj, so just use it as list[0] */
if (PyList_Append(list, str_obj))
goto onError;
break;
}
#endif
SPLIT_APPEND(str, j, eol);
j = i;
}
return list;
onError:
Py_DECREF(list);
return NULL;
}
#endif
/* stringlib: split implementation */
#ifndef STRINGLIB_SPLIT_H
#define STRINGLIB_SPLIT_H
#ifndef STRINGLIB_FASTSEARCH_H
#error must include "stringlib/fastsearch.h" before including this module
#endif
/* Overallocate the initial list to reduce the number of reallocs for small
split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
resizes, to sizes 4, 8, then 16. Most observed string splits are for human
text (roughly 11 words per line) and field delimited data (usually 1-10
fields). For large strings the split algorithms are bandwidth limited
so increasing the preallocation likely will not improve things.*/
#define MAX_PREALLOC 12
/* 5 splits gives 6 elements */
#define PREALLOC_SIZE(maxsplit) \
(maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
#define SPLIT_APPEND(data, left, right) \
sub = STRINGLIB_NEW((data) + (left), \
(right) - (left)); \
if (sub == NULL) \
goto onError; \
if (PyList_Append(list, sub)) { \
Py_DECREF(sub); \
goto onError; \
} \
else \
Py_DECREF(sub);
#define SPLIT_ADD(data, left, right) { \
sub = STRINGLIB_NEW((data) + (left), \
(right) - (left)); \
if (sub == NULL) \
goto onError; \
if (count < MAX_PREALLOC) { \
PyList_SET_ITEM(list, count, sub); \
} else { \
if (PyList_Append(list, sub)) { \
Py_DECREF(sub); \
goto onError; \
} \
else \
Py_DECREF(sub); \
} \
count++; }
/* Always force the list to the expected size. */
#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Py_LOCAL_INLINE(PyObject *)
stringlib_split_whitespace(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
Py_ssize_t maxcount)
{
Py_ssize_t i, j, count=0;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
PyObject *sub;
if (list == NULL)
return NULL;
i = j = 0;
while (maxcount-- > 0) {
while (i < str_len && STRINGLIB_ISSPACE(str[i]))
i++;
if (i == str_len) break;
j = i; i++;
while (i < str_len && !STRINGLIB_ISSPACE(str[i]))
i++;
#ifndef STRINGLIB_MUTABLE
if (j == 0 && i == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
/* No whitespace in str_obj, so just use it as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
break;
}
#endif
SPLIT_ADD(str, j, i);
}
if (i < str_len) {
/* Only occurs when maxcount was reached */
/* Skip any remaining whitespace and copy to end of string */
while (i < str_len && STRINGLIB_ISSPACE(str[i]))
i++;
if (i != str_len)
SPLIT_ADD(str, i, str_len);
}
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_split_char(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR ch,
Py_ssize_t maxcount)
{
Py_ssize_t i, j, count=0;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
PyObject *sub;
if (list == NULL)
return NULL;
i = j = 0;
while ((j < str_len) && (maxcount-- > 0)) {
for(; j < str_len; j++) {
/* I found that using memchr makes no difference */
if (str[j] == ch) {
SPLIT_ADD(str, i, j);
i = j = j + 1;
break;
}
}
}
#ifndef STRINGLIB_MUTABLE
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
/* ch not in str_obj, so just use str_obj as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
} else
#endif
if (i <= str_len) {
SPLIT_ADD(str, i, str_len);
}
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_split(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
Py_ssize_t maxcount)
{
Py_ssize_t i, j, pos, count=0;
PyObject *list, *sub;
if (sep_len == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL;
}
else if (sep_len == 1)
return stringlib_split_char(str_obj, str, str_len, sep[0], maxcount);
list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
i = j = 0;
while (maxcount-- > 0) {
pos = fastsearch(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH);
if (pos < 0)
break;
j = i + pos;
SPLIT_ADD(str, i, j);
i = j + sep_len;
}
#ifndef STRINGLIB_MUTABLE
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
/* No match in str_obj, so just use it as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
} else
#endif
{
SPLIT_ADD(str, i, str_len);
}
FIX_PREALLOC_SIZE(list);
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_rsplit_whitespace(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
Py_ssize_t maxcount)
{
Py_ssize_t i, j, count=0;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
PyObject *sub;
if (list == NULL)
return NULL;
i = j = str_len - 1;
while (maxcount-- > 0) {
while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
i--;
if (i < 0) break;
j = i; i--;
while (i >= 0 && !STRINGLIB_ISSPACE(str[i]))
i--;
#ifndef STRINGLIB_MUTABLE
if (j == str_len - 1 && i < 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
/* No whitespace in str_obj, so just use it as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
break;
}
#endif
SPLIT_ADD(str, i + 1, j + 1);
}
if (i >= 0) {
/* Only occurs when maxcount was reached */
/* Skip any remaining whitespace and copy to beginning of string */
while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
i--;
if (i >= 0)
SPLIT_ADD(str, 0, i + 1);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_rsplit_char(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR ch,
Py_ssize_t maxcount)
{
Py_ssize_t i, j, count=0;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
PyObject *sub;
if (list == NULL)
return NULL;
i = j = str_len - 1;
while ((i >= 0) && (maxcount-- > 0)) {
for(; i >= 0; i--) {
if (str[i] == ch) {
SPLIT_ADD(str, i + 1, j + 1);
j = i = i - 1;
break;
}
}
}
#ifndef STRINGLIB_MUTABLE
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
/* ch not in str_obj, so just use str_obj as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
} else
#endif
if (j >= -1) {
SPLIT_ADD(str, 0, j + 1);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_rsplit(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
Py_ssize_t maxcount)
{
Py_ssize_t j, pos, count=0;
PyObject *list, *sub;
if (sep_len == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL;
}
else if (sep_len == 1)
return stringlib_rsplit_char(str_obj, str, str_len, sep[0], maxcount);
list = PyList_New(PREALLOC_SIZE(maxcount));
if (list == NULL)
return NULL;
j = str_len;
while (maxcount-- > 0) {
pos = fastsearch(str, j, sep, sep_len, -1, FAST_RSEARCH);
if (pos < 0)
break;
SPLIT_ADD(str, pos + sep_len, j);
j = pos;
}
#ifndef STRINGLIB_MUTABLE
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
/* No match in str_obj, so just use it as list[0] */
Py_INCREF(str_obj);
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
count++;
} else
#endif
{
SPLIT_ADD(str, 0, j);
}
FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
Py_LOCAL_INLINE(PyObject *)
stringlib_splitlines(PyObject* str_obj,
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
int keepends)
{
/* This does not use the preallocated list because splitlines is
usually run with hundreds of newlines. The overhead of
switching between PyList_SET_ITEM and append causes about a
2-3% slowdown for that common case. A smarter implementation
could move the if check out, so the SET_ITEMs are done first
and the appends only done when the prealloc buffer is full.
That's too much work for little gain.*/
register Py_ssize_t i;
register Py_ssize_t j;
PyObject *list = PyList_New(0);
PyObject *sub;
if (list == NULL)
return NULL;
for (i = j = 0; i < str_len; ) {
Py_ssize_t eol;
/* Find a line and append it */
while (i < str_len && !STRINGLIB_ISLINEBREAK(str[i]))
i++;
/* Skip the line break reading CRLF as one line break */
eol = i;
if (i < str_len) {
if (str[i] == '\r' && i + 1 < str_len && str[i+1] == '\n')
i += 2;
else
i++;
if (keepends)
eol = i;
}
#ifndef STRINGLIB_MUTABLE
if (j == 0 && eol == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
/* No linebreak in str_obj, so just use it as list[0] */
if (PyList_Append(list, str_obj))
goto onError;
break;
}
#endif
SPLIT_APPEND(str, j, eol);
j = i;
}
return list;
onError:
Py_DECREF(list);
return NULL;
}
#endif

View File

@ -11,6 +11,8 @@
#define STRINGLIB_TYPE_NAME "string"
#define STRINGLIB_PARSE_CODE "S"
#define STRINGLIB_EMPTY nullstring
#define STRINGLIB_ISSPACE Py_ISSPACE
#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
#define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9'))
#define STRINGLIB_TODECIMAL(x) (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1)
#define STRINGLIB_TOUPPER Py_TOUPPER

View File

@ -1,13 +1,6 @@
/* NOTE: this API is -ONLY- for use with single byte character strings. */
/* Do not use it with Unicode. */
#include "bytes_methods.h"
#ifndef STRINGLIB_MUTABLE
#warning "STRINGLIB_MUTABLE not defined before #include, assuming 0"
#define STRINGLIB_MUTABLE 0
#endif
/* the more complicated methods. parts of these should be pulled out into the
shared code in bytes_methods.c to cut down on duplicate code bloat. */
@ -269,87 +262,3 @@ stringlib_zfill(PyObject *self, PyObject *args)
return (PyObject*) s;
}
#define _STRINGLIB_SPLIT_APPEND(data, left, right) \
str = STRINGLIB_NEW((data) + (left), \
(right) - (left)); \
if (str == NULL) \
goto onError; \
if (PyList_Append(list, str)) { \
Py_DECREF(str); \
goto onError; \
} \
else \
Py_DECREF(str);
PyDoc_STRVAR(splitlines__doc__,
"B.splitlines([keepends]) -> list of lines\n\
\n\
Return a list of the lines in B, breaking at line boundaries.\n\
Line breaks are not included in the resulting list unless keepends\n\
is given and true.");
static PyObject*
stringlib_splitlines(PyObject *self, PyObject *args)
{
register Py_ssize_t i;
register Py_ssize_t j;
Py_ssize_t len;
int keepends = 0;
PyObject *list;
PyObject *str;
char *data;
if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
return NULL;
data = STRINGLIB_STR(self);
len = STRINGLIB_LEN(self);
/* This does not use the preallocated list because splitlines is
usually run with hundreds of newlines. The overhead of
switching between PyList_SET_ITEM and append causes about a
2-3% slowdown for that common case. A smarter implementation
could move the if check out, so the SET_ITEMs are done first
and the appends only done when the prealloc buffer is full.
That's too much work for little gain.*/
list = PyList_New(0);
if (!list)
goto onError;
for (i = j = 0; i < len; ) {
Py_ssize_t eol;
/* Find a line and append it */
while (i < len && data[i] != '\n' && data[i] != '\r')
i++;
/* Skip the line break reading CRLF as one line break */
eol = i;
if (i < len) {
if (data[i] == '\r' && i + 1 < len &&
data[i+1] == '\n')
i += 2;
else
i++;
if (keepends)
eol = i;
}
_STRINGLIB_SPLIT_APPEND(data, j, eol);
j = i;
}
if (j < len) {
_STRINGLIB_SPLIT_APPEND(data, j, len);
}
return list;
onError:
Py_XDECREF(list);
return NULL;
}
#undef _STRINGLIB_SPLIT_APPEND

View File

@ -11,6 +11,8 @@
#define STRINGLIB_TYPE_NAME "unicode"
#define STRINGLIB_PARSE_CODE "U"
#define STRINGLIB_EMPTY unicode_empty
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
#define STRINGLIB_TOUPPER Py_UNICODE_TOUPPER

View File

@ -210,7 +210,8 @@ PyUnicode_GetMax(void)
static BLOOM_MASK bloom_linebreak;
#define BLOOM(mask, ch) ((mask & (1 << ((ch) & 0x1F))))
#define BLOOM_ADD(mask, ch) ((mask |= (1 << ((ch) & (LONG_BIT - 1)))))
#define BLOOM(mask, ch) ((mask & (1 << ((ch) & (LONG_BIT - 1)))))
#define BLOOM_LINEBREAK(ch) \
((ch) < 128U ? ascii_linebreak[(ch)] : \
@ -225,7 +226,7 @@ Py_LOCAL_INLINE(BLOOM_MASK) make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len)
mask = 0;
for (i = 0; i < len; i++)
mask |= (1 << (ptr[i] & 0x1F));
BLOOM_ADD(mask, ptr[i]);
return mask;
}
@ -5873,28 +5874,30 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
#include "stringlib/unicodedefs.h"
#include "stringlib/fastsearch.h"
#include "stringlib/count.h"
/* Include _ParseTupleFinds from find.h */
#define FROM_UNICODE
#include "stringlib/find.h"
#include "stringlib/partition.h"
#include "stringlib/split.h"
#define _Py_InsertThousandsGrouping _PyUnicode_InsertThousandsGrouping
#define _Py_InsertThousandsGroupingLocale _PyUnicode_InsertThousandsGroupingLocale
#include "stringlib/localeutil.h"
/* helper macro to fixup start/end slice values */
#define FIX_START_END(obj) \
if (start < 0) \
start += (obj)->length; \
if (start < 0) \
start = 0; \
if (end > (obj)->length) \
end = (obj)->length; \
if (end < 0) \
end += (obj)->length; \
if (end < 0) \
end = 0;
#define ADJUST_INDICES(start, end, len) \
if (end > len) \
end = len; \
else if (end < 0) { \
end += len; \
if (end < 0) \
end = 0; \
} \
if (start < 0) { \
start += len; \
if (start < 0) \
start = 0; \
}
Py_ssize_t PyUnicode_Count(PyObject *str,
PyObject *substr,
@ -5914,10 +5917,10 @@ Py_ssize_t PyUnicode_Count(PyObject *str,
return -1;
}
FIX_START_END(str_obj);
ADJUST_INDICES(start, end, str_obj->length);
result = stringlib_count(
str_obj->str + start, end - start, sub_obj->str, sub_obj->length
str_obj->str + start, end - start, sub_obj->str, sub_obj->length,
PY_SSIZE_T_MAX
);
Py_DECREF(sub_obj);
@ -5972,8 +5975,7 @@ int tailmatch(PyUnicodeObject *self,
if (substring->length == 0)
return 1;
FIX_START_END(self);
ADJUST_INDICES(start, end, self->length);
end -= substring->length;
if (end < start)
return 0;
@ -6314,305 +6316,40 @@ PyUnicodeObject *pad(PyUnicodeObject *self,
return u;
}
#define SPLIT_APPEND(data, left, right) \
str = PyUnicode_FromUnicode((data) + (left), (right) - (left)); \
if (!str) \
goto onError; \
if (PyList_Append(list, str)) { \
Py_DECREF(str); \
goto onError; \
} \
else \
Py_DECREF(str);
static
PyObject *split_whitespace(PyUnicodeObject *self,
PyObject *list,
Py_ssize_t maxcount)
PyObject *PyUnicode_Splitlines(PyObject *string, int keepends)
{
register Py_ssize_t i;
register Py_ssize_t j;
Py_ssize_t len = self->length;
PyObject *str;
register const Py_UNICODE *buf = self->str;
for (i = j = 0; i < len; ) {
/* find a token */
while (i < len && Py_UNICODE_ISSPACE(buf[i]))
i++;
j = i;
while (i < len && !Py_UNICODE_ISSPACE(buf[i]))
i++;
if (j < i) {
if (maxcount-- <= 0)
break;
SPLIT_APPEND(buf, j, i);
while (i < len && Py_UNICODE_ISSPACE(buf[i]))
i++;
j = i;
}
}
if (j < len) {
SPLIT_APPEND(buf, j, len);
}
return list;
onError:
Py_DECREF(list);
return NULL;
}
PyObject *PyUnicode_Splitlines(PyObject *string,
int keepends)
{
register Py_ssize_t i;
register Py_ssize_t j;
Py_ssize_t len;
PyObject *list;
PyObject *str;
Py_UNICODE *data;
string = PyUnicode_FromObject(string);
if (string == NULL)
return NULL;
data = PyUnicode_AS_UNICODE(string);
len = PyUnicode_GET_SIZE(string);
list = PyList_New(0);
if (!list)
goto onError;
for (i = j = 0; i < len; ) {
Py_ssize_t eol;
/* Find a line and append it */
while (i < len && !BLOOM_LINEBREAK(data[i]))
i++;
/* Skip the line break reading CRLF as one line break */
eol = i;
if (i < len) {
if (data[i] == '\r' && i + 1 < len &&
data[i+1] == '\n')
i += 2;
else
i++;
if (keepends)
eol = i;
}
SPLIT_APPEND(data, j, eol);
j = i;
}
if (j < len) {
SPLIT_APPEND(data, j, len);
}
list = stringlib_splitlines(
(PyObject*) string, PyUnicode_AS_UNICODE(string),
PyUnicode_GET_SIZE(string), keepends);
Py_DECREF(string);
return list;
onError:
Py_XDECREF(list);
Py_DECREF(string);
return NULL;
}
static
PyObject *split_char(PyUnicodeObject *self,
PyObject *list,
Py_UNICODE ch,
Py_ssize_t maxcount)
{
register Py_ssize_t i;
register Py_ssize_t j;
Py_ssize_t len = self->length;
PyObject *str;
register const Py_UNICODE *buf = self->str;
for (i = j = 0; i < len; ) {
if (buf[i] == ch) {
if (maxcount-- <= 0)
break;
SPLIT_APPEND(buf, j, i);
i = j = i + 1;
} else
i++;
}
if (j <= len) {
SPLIT_APPEND(buf, j, len);
}
return list;
onError:
Py_DECREF(list);
return NULL;
}
static
PyObject *split_substring(PyUnicodeObject *self,
PyObject *list,
PyUnicodeObject *substring,
Py_ssize_t maxcount)
{
register Py_ssize_t i;
register Py_ssize_t j;
Py_ssize_t len = self->length;
Py_ssize_t sublen = substring->length;
PyObject *str;
for (i = j = 0; i <= len - sublen; ) {
if (Py_UNICODE_MATCH(self, i, substring)) {
if (maxcount-- <= 0)
break;
SPLIT_APPEND(self->str, j, i);
i = j = i + sublen;
} else
i++;
}
if (j <= len) {
SPLIT_APPEND(self->str, j, len);
}
return list;
onError:
Py_DECREF(list);
return NULL;
}
static
PyObject *rsplit_whitespace(PyUnicodeObject *self,
PyObject *list,
Py_ssize_t maxcount)
{
register Py_ssize_t i;
register Py_ssize_t j;
Py_ssize_t len = self->length;
PyObject *str;
register const Py_UNICODE *buf = self->str;
for (i = j = len - 1; i >= 0; ) {
/* find a token */
while (i >= 0 && Py_UNICODE_ISSPACE(buf[i]))
i--;
j = i;
while (i >= 0 && !Py_UNICODE_ISSPACE(buf[i]))
i--;
if (j > i) {
if (maxcount-- <= 0)
break;
SPLIT_APPEND(buf, i + 1, j + 1);
while (i >= 0 && Py_UNICODE_ISSPACE(buf[i]))
i--;
j = i;
}
}
if (j >= 0) {
SPLIT_APPEND(buf, 0, j + 1);
}
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
static
PyObject *rsplit_char(PyUnicodeObject *self,
PyObject *list,
Py_UNICODE ch,
Py_ssize_t maxcount)
{
register Py_ssize_t i;
register Py_ssize_t j;
Py_ssize_t len = self->length;
PyObject *str;
register const Py_UNICODE *buf = self->str;
for (i = j = len - 1; i >= 0; ) {
if (buf[i] == ch) {
if (maxcount-- <= 0)
break;
SPLIT_APPEND(buf, i + 1, j + 1);
j = i = i - 1;
} else
i--;
}
if (j >= -1) {
SPLIT_APPEND(buf, 0, j + 1);
}
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
static
PyObject *rsplit_substring(PyUnicodeObject *self,
PyObject *list,
PyUnicodeObject *substring,
Py_ssize_t maxcount)
{
register Py_ssize_t i;
register Py_ssize_t j;
Py_ssize_t len = self->length;
Py_ssize_t sublen = substring->length;
PyObject *str;
for (i = len - sublen, j = len; i >= 0; ) {
if (Py_UNICODE_MATCH(self, i, substring)) {
if (maxcount-- <= 0)
break;
SPLIT_APPEND(self->str, i + sublen, j);
j = i;
i -= sublen;
} else
i--;
}
if (j >= 0) {
SPLIT_APPEND(self->str, 0, j);
}
if (PyList_Reverse(list) < 0)
goto onError;
return list;
onError:
Py_DECREF(list);
return NULL;
}
#undef SPLIT_APPEND
static
PyObject *split(PyUnicodeObject *self,
PyUnicodeObject *substring,
Py_ssize_t maxcount)
{
PyObject *list;
if (maxcount < 0)
maxcount = PY_SSIZE_T_MAX;
list = PyList_New(0);
if (!list)
return NULL;
if (substring == NULL)
return split_whitespace(self,list,maxcount);
return stringlib_split_whitespace(
(PyObject*) self, self->str, self->length, maxcount
);
else if (substring->length == 1)
return split_char(self,list,substring->str[0],maxcount);
else if (substring->length == 0) {
Py_DECREF(list);
PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL;
}
else
return split_substring(self,list,substring,maxcount);
return stringlib_split(
(PyObject*) self, self->str, self->length,
substring->str, substring->length,
maxcount
);
}
static
@ -6620,28 +6357,19 @@ PyObject *rsplit(PyUnicodeObject *self,
PyUnicodeObject *substring,
Py_ssize_t maxcount)
{
PyObject *list;
if (maxcount < 0)
maxcount = PY_SSIZE_T_MAX;
list = PyList_New(0);
if (!list)
return NULL;
if (substring == NULL)
return rsplit_whitespace(self,list,maxcount);
return stringlib_rsplit_whitespace(
(PyObject*) self, self->str, self->length, maxcount
);
else if (substring->length == 1)
return rsplit_char(self,list,substring->str[0],maxcount);
else if (substring->length == 0) {
Py_DECREF(list);
PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL;
}
else
return rsplit_substring(self,list,substring,maxcount);
return stringlib_rsplit(
(PyObject*) self, self->str, self->length,
substring->str, substring->length,
maxcount
);
}
static
@ -6654,9 +6382,13 @@ PyObject *replace(PyUnicodeObject *self,
if (maxcount < 0)
maxcount = PY_SSIZE_T_MAX;
else if (maxcount == 0 || self->length == 0)
goto nothing;
if (str1->length == str2->length) {
/* same length */
if (str1->length == 0)
goto nothing;
Py_ssize_t i;
if (str1->length == 1) {
/* replace characters */
@ -6676,8 +6408,8 @@ PyObject *replace(PyUnicodeObject *self,
u->str[i] = u2;
}
} else {
i = fastsearch(
self->str, self->length, str1->str, str1->length, FAST_SEARCH
i = stringlib_find(
self->str, self->length, str1->str, str1->length, 0
);
if (i < 0)
goto nothing;
@ -6685,14 +6417,20 @@ PyObject *replace(PyUnicodeObject *self,
if (!u)
return NULL;
Py_UNICODE_COPY(u->str, self->str, self->length);
while (i <= self->length - str1->length)
if (Py_UNICODE_MATCH(self, i, str1)) {
if (--maxcount < 0)
break;
Py_UNICODE_COPY(u->str+i, str2->str, str2->length);
i += str1->length;
} else
i++;
/* change everything in-place, starting with this one */
Py_UNICODE_COPY(u->str+i, str2->str, str2->length);
i += str1->length;
while ( --maxcount > 0) {
i = stringlib_find(self->str+i, self->length-i,
str1->str, str1->length,
i);
if (i == -1)
break;
Py_UNICODE_COPY(u->str+i, str2->str, str2->length);
i += str1->length;
}
}
} else {
@ -6701,9 +6439,8 @@ PyObject *replace(PyUnicodeObject *self,
Py_UNICODE *p;
/* replace strings */
n = stringlib_count(self->str, self->length, str1->str, str1->length);
if (n > maxcount)
n = maxcount;
n = stringlib_count(self->str, self->length, str1->str, str1->length,
maxcount);
if (n == 0)
goto nothing;
/* new_size = self->length + n * (str2->length - str1->length)); */
@ -6733,15 +6470,12 @@ PyObject *replace(PyUnicodeObject *self,
if (str1->length > 0) {
while (n-- > 0) {
/* look for next match */
j = i;
while (j <= e) {
if (Py_UNICODE_MATCH(self, j, str1))
break;
j++;
}
if (j > i) {
if (j > e)
break;
j = stringlib_find(self->str+i, self->length-i,
str1->str, str1->length,
i);
if (j == -1)
break;
else if (j > i) {
/* copy unchanged part [i:j] */
Py_UNICODE_COPY(p, self->str+i, j-i);
p += j - i;
@ -7192,11 +6926,11 @@ unicode_count(PyUnicodeObject *self, PyObject *args)
if (substring == NULL)
return NULL;
FIX_START_END(self);
ADJUST_INDICES(start, end, self->length);
result = PyLong_FromSsize_t(
stringlib_count(self->str + start, end - start,
substring->str, substring->length)
substring->str, substring->length,
PY_SSIZE_T_MAX)
);
Py_DECREF(substring);
@ -10066,11 +9800,3 @@ Py_UNICODE_strchr(const Py_UNICODE *s, Py_UNICODE c)
#ifdef __cplusplus
}
#endif
/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil
End:
*/

View File

@ -1490,6 +1490,10 @@
RelativePath="..\..\Objects\sliceobject.c"
>
</File>
<File
RelativePath="..\..\Objects\stringlib\split.h"
>
</File>
<File
RelativePath="..\..\Objects\structseq.c"
>

View File

@ -1495,6 +1495,10 @@
RelativePath="..\Objects\sliceobject.c"
>
</File>
<File
RelativePath="..\Objects\stringlib\split.h"
>
</File>
<File
RelativePath="..\Objects\structseq.c"
>