mirror of https://github.com/python/cpython
needforspeed: more stringlib refactoring
This commit is contained in:
parent
d532ba0746
commit
2d23d5bf2e
|
@ -3,3 +3,32 @@ possibly other modules, in a not too distant future).
|
|||
|
||||
the stuff in here is included into relevant places; see the individual
|
||||
source files for details.
|
||||
|
||||
--------------------------------------------------------------------
|
||||
the following defines used by the different modules:
|
||||
|
||||
STRINGLIB_CHAR
|
||||
|
||||
the type used to hold a character (char or Py_UNICODE)
|
||||
|
||||
STRINGLIB_EMPTY
|
||||
|
||||
a PyObject representing the empty string
|
||||
|
||||
int STRINGLIB_CMP(STRINGLIB_CHAR*, STRINGLIB_CHAR*, Py_ssize_t)
|
||||
|
||||
compares two strings. returns 0 if they match, and non-zero if not.
|
||||
|
||||
Py_ssize_t STRINGLIB_LEN(PyObject*)
|
||||
|
||||
returns the length of the given string object (which must be of the
|
||||
right type)
|
||||
|
||||
PyObject* STRINGLIB_NEW(STRINGLIB_CHAR*, Py_ssize_t)
|
||||
|
||||
creates a new string object
|
||||
|
||||
STRINGLIB_CHAR* STRINGLIB_STR(PyObject*)
|
||||
|
||||
returns the pointer to the character data for the given string
|
||||
object (which must be of the right type)
|
||||
|
|
|
@ -48,6 +48,39 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
|
|||
return pos;
|
||||
}
|
||||
|
||||
#ifdef STRINGLIB_STR
|
||||
|
||||
Py_LOCAL(Py_ssize_t)
|
||||
stringlib_find_obj(PyObject* str, PyObject* sub,
|
||||
Py_ssize_t start, Py_ssize_t end)
|
||||
{
|
||||
return stringlib_find(
|
||||
STRINGLIB_STR(str) + start, end - start,
|
||||
STRINGLIB_STR(sub), STRINGLIB_LEN(sub), start
|
||||
);
|
||||
}
|
||||
|
||||
Py_LOCAL(int)
|
||||
stringlib_contains_obj(PyObject* str, PyObject* sub)
|
||||
{
|
||||
return stringlib_find(
|
||||
STRINGLIB_STR(str), STRINGLIB_LEN(str),
|
||||
STRINGLIB_STR(sub), STRINGLIB_LEN(sub), 0
|
||||
) != -1;
|
||||
}
|
||||
|
||||
Py_LOCAL(Py_ssize_t)
|
||||
stringlib_rfind_obj(PyObject* str, PyObject* sub,
|
||||
Py_ssize_t start, Py_ssize_t end)
|
||||
{
|
||||
return stringlib_rfind(
|
||||
STRINGLIB_STR(str) + start, end - start,
|
||||
STRINGLIB_STR(sub), STRINGLIB_LEN(sub), start
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
|
|
@ -690,6 +690,9 @@ PyObject *PyString_DecodeEscape(const char *s,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/* object api */
|
||||
|
||||
static Py_ssize_t
|
||||
string_getsize(register PyObject *op)
|
||||
{
|
||||
|
@ -765,22 +768,23 @@ PyString_AsStringAndSize(register PyObject *obj,
|
|||
}
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/* stringlib components */
|
||||
/* Methods */
|
||||
|
||||
#define STRINGLIB_CHAR char
|
||||
|
||||
#define STRINGLIB_NEW PyString_FromStringAndSize
|
||||
#define STRINGLIB_CMP memcmp
|
||||
#define STRINGLIB_LEN PyString_GET_SIZE
|
||||
#define STRINGLIB_NEW PyString_FromStringAndSize
|
||||
#define STRINGLIB_STR PyString_AS_STRING
|
||||
|
||||
#define STRINGLIB_EMPTY nullstring
|
||||
|
||||
#include "stringlib/fastsearch.h"
|
||||
|
||||
#include "stringlib/count.h"
|
||||
#include "stringlib/find.h"
|
||||
#include "stringlib/partition.h"
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/* Methods */
|
||||
|
||||
static int
|
||||
string_print(PyStringObject *op, FILE *fp, int flags)
|
||||
|
@ -1048,49 +1052,36 @@ string_slice(register PyStringObject *a, register Py_ssize_t i,
|
|||
}
|
||||
|
||||
static int
|
||||
string_contains(PyObject *a, PyObject *el)
|
||||
string_contains(PyObject *str_obj, PyObject *sub_obj)
|
||||
{
|
||||
char *s = PyString_AS_STRING(a);
|
||||
const char *sub = PyString_AS_STRING(el);
|
||||
Py_ssize_t len_sub = PyString_GET_SIZE(el);
|
||||
Py_ssize_t pos;
|
||||
|
||||
if (!PyString_CheckExact(el)) {
|
||||
if (!PyString_CheckExact(sub_obj)) {
|
||||
#ifdef Py_USING_UNICODE
|
||||
if (PyUnicode_Check(el))
|
||||
return PyUnicode_Contains(a, el);
|
||||
if (PyUnicode_Check(sub_obj))
|
||||
return PyUnicode_Contains(str_obj, sub_obj);
|
||||
#endif
|
||||
if (!PyString_Check(el)) {
|
||||
if (!PyString_Check(sub_obj)) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"'in <string>' requires string as left operand");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (len_sub == 0)
|
||||
return 1;
|
||||
|
||||
pos = fastsearch(
|
||||
s, PyString_GET_SIZE(a),
|
||||
sub, len_sub, FAST_SEARCH
|
||||
);
|
||||
|
||||
return (pos != -1);
|
||||
return stringlib_contains_obj(str_obj, sub_obj);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
string_item(PyStringObject *a, register Py_ssize_t i)
|
||||
{
|
||||
char pchar;
|
||||
PyObject *v;
|
||||
char *pchar;
|
||||
if (i < 0 || i >= a->ob_size) {
|
||||
PyErr_SetString(PyExc_IndexError, "string index out of range");
|
||||
return NULL;
|
||||
}
|
||||
pchar = a->ob_sval + i;
|
||||
v = (PyObject *)characters[*pchar & UCHAR_MAX];
|
||||
pchar = a->ob_sval[i];
|
||||
v = (PyObject *)characters[pchar & UCHAR_MAX];
|
||||
if (v == NULL)
|
||||
v = PyString_FromStringAndSize(pchar, 1);
|
||||
v = PyString_FromStringAndSize(&pchar, 1);
|
||||
else {
|
||||
#ifdef COUNT_ALLOCS
|
||||
one_strings++;
|
||||
|
@ -1166,9 +1157,8 @@ string_richcompare(PyStringObject *a, PyStringObject *b, int op)
|
|||
int
|
||||
_PyString_Eq(PyObject *o1, PyObject *o2)
|
||||
{
|
||||
PyStringObject *a, *b;
|
||||
a = (PyStringObject*)o1;
|
||||
b = (PyStringObject*)o2;
|
||||
PyStringObject *a = (PyStringObject*) o1;
|
||||
PyStringObject *b = (PyStringObject*) o2;
|
||||
return a->ob_size == b->ob_size
|
||||
&& *a->ob_sval == *b->ob_sval
|
||||
&& memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
|
||||
|
@ -2264,43 +2254,37 @@ as in slice notation.");
|
|||
static PyObject *
|
||||
string_count(PyStringObject *self, PyObject *args)
|
||||
{
|
||||
const char *s = PyString_AS_STRING(self), *sub;
|
||||
Py_ssize_t len = PyString_GET_SIZE(self), n;
|
||||
Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
|
||||
Py_ssize_t m, r;
|
||||
PyObject *subobj;
|
||||
PyObject *sub_obj;
|
||||
const char *str = PyString_AS_STRING(self), *sub;
|
||||
Py_ssize_t sub_len;
|
||||
Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
|
||||
_PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
|
||||
if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
|
||||
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
|
||||
return NULL;
|
||||
|
||||
if (PyString_Check(subobj)) {
|
||||
sub = PyString_AS_STRING(subobj);
|
||||
n = PyString_GET_SIZE(subobj);
|
||||
if (PyString_Check(sub_obj)) {
|
||||
sub = PyString_AS_STRING(sub_obj);
|
||||
sub_len = PyString_GET_SIZE(sub_obj);
|
||||
}
|
||||
#ifdef Py_USING_UNICODE
|
||||
else if (PyUnicode_Check(subobj)) {
|
||||
else if (PyUnicode_Check(sub_obj)) {
|
||||
Py_ssize_t count;
|
||||
count = PyUnicode_Count((PyObject *)self, subobj, i, last);
|
||||
count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
|
||||
if (count == -1)
|
||||
return NULL;
|
||||
else
|
||||
return PyInt_FromLong((long) count);
|
||||
return PyInt_FromSsize_t(count);
|
||||
}
|
||||
#endif
|
||||
else if (PyObject_AsCharBuffer(subobj, &sub, &n))
|
||||
else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
|
||||
return NULL;
|
||||
|
||||
string_adjust_indices(&i, &last, len);
|
||||
string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
|
||||
|
||||
m = last + 1 - n;
|
||||
if (n == 0)
|
||||
return PyInt_FromSsize_t(m-i);
|
||||
|
||||
r = fastsearch(s + i, last - i, sub, n, FAST_COUNT);
|
||||
if (r < 0)
|
||||
r = 0; /* no match */
|
||||
return PyInt_FromSsize_t(r);
|
||||
return PyInt_FromSsize_t(
|
||||
stringlib_count(str + start, end - start, sub, sub_len)
|
||||
);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(swapcase__doc__,
|
||||
|
@ -2477,7 +2461,7 @@ return_self(PyStringObject *self)
|
|||
}
|
||||
|
||||
Py_LOCAL(Py_ssize_t)
|
||||
countchar(char *target, int target_len, char c, Py_ssize_t maxcount)
|
||||
countchar(char *target, int target_len, char c, Py_ssize_t maxcount)
|
||||
{
|
||||
Py_ssize_t count=0;
|
||||
char *start=target;
|
||||
|
@ -2580,7 +2564,7 @@ countstring(char *target, Py_ssize_t target_len,
|
|||
}
|
||||
|
||||
|
||||
/* Algorithms for difference cases of string replacement */
|
||||
/* Algorithms for different cases of string replacement */
|
||||
|
||||
/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
|
||||
Py_LOCAL(PyStringObject *)
|
||||
|
|
|
@ -3857,7 +3857,9 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
|
|||
|
||||
#define STRINGLIB_CHAR Py_UNICODE
|
||||
|
||||
#define STRINGLIB_LEN PyUnicode_GET_SIZE
|
||||
#define STRINGLIB_NEW PyUnicode_FromUnicode
|
||||
#define STRINGLIB_STR PyUnicode_AS_UNICODE
|
||||
|
||||
Py_LOCAL(int)
|
||||
STRINGLIB_CMP(const Py_UNICODE* str, const Py_UNICODE* other, Py_ssize_t len)
|
||||
|
@ -3918,67 +3920,33 @@ Py_ssize_t PyUnicode_Count(PyObject *str,
|
|||
return result;
|
||||
}
|
||||
|
||||
static Py_ssize_t findstring(PyUnicodeObject *self,
|
||||
PyUnicodeObject *substring,
|
||||
Py_ssize_t start,
|
||||
Py_ssize_t end,
|
||||
int direction)
|
||||
{
|
||||
FIX_START_END(self);
|
||||
|
||||
if (substring->length == 0)
|
||||
return (direction > 0) ? start : end;
|
||||
|
||||
if (direction > 0) {
|
||||
Py_ssize_t pos = fastsearch(
|
||||
PyUnicode_AS_UNICODE(self) + start, end - start,
|
||||
substring->str, substring->length, FAST_SEARCH
|
||||
);
|
||||
if (pos >= 0)
|
||||
return pos + start;
|
||||
} else {
|
||||
end -= substring->length;
|
||||
for (; end >= start; end--)
|
||||
if (Py_UNICODE_MATCH(self, end, substring))
|
||||
return end;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
Py_ssize_t PyUnicode_Find(PyObject *str,
|
||||
PyObject *substr,
|
||||
PyObject *sub,
|
||||
Py_ssize_t start,
|
||||
Py_ssize_t end,
|
||||
int direction)
|
||||
{
|
||||
Py_ssize_t result;
|
||||
PyUnicodeObject* str_obj;
|
||||
PyUnicodeObject* sub_obj;
|
||||
|
||||
str_obj = (PyUnicodeObject*) PyUnicode_FromObject(str);
|
||||
if (!str_obj)
|
||||
str = PyUnicode_FromObject(str);
|
||||
if (!str)
|
||||
return -2;
|
||||
sub_obj = (PyUnicodeObject*) PyUnicode_FromObject(substr);
|
||||
if (!sub_obj) {
|
||||
Py_DECREF(str_obj);
|
||||
sub = PyUnicode_FromObject(sub);
|
||||
if (!sub) {
|
||||
Py_DECREF(str);
|
||||
return -2;
|
||||
}
|
||||
|
||||
FIX_START_END(str_obj);
|
||||
FIX_START_END((PyUnicodeObject*) str);
|
||||
|
||||
if (direction > 0)
|
||||
result = stringlib_find(
|
||||
str_obj->str + start, end - start, sub_obj->str, sub_obj->length,
|
||||
start
|
||||
);
|
||||
result = stringlib_find_obj(str, sub, start, end);
|
||||
else
|
||||
result = stringlib_rfind(
|
||||
str_obj->str + start, end - start, sub_obj->str, sub_obj->length,
|
||||
start
|
||||
);
|
||||
result = stringlib_rfind_obj(str, sub, start, end);
|
||||
|
||||
Py_DECREF(str);
|
||||
Py_DECREF(sub);
|
||||
|
||||
Py_DECREF(str_obj);
|
||||
Py_DECREF(sub_obj);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -5046,39 +5014,29 @@ onError:
|
|||
int PyUnicode_Contains(PyObject *container,
|
||||
PyObject *element)
|
||||
{
|
||||
PyUnicodeObject *u, *v;
|
||||
Py_ssize_t size;
|
||||
Py_ssize_t pos;
|
||||
PyObject *str, *sub;
|
||||
int result;
|
||||
|
||||
/* Coerce the two arguments */
|
||||
v = (PyUnicodeObject *) PyUnicode_FromObject(element);
|
||||
if (!v) {
|
||||
sub = PyUnicode_FromObject(element);
|
||||
if (!sub) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"'in <string>' requires string as left operand");
|
||||
return -1;
|
||||
}
|
||||
|
||||
u = (PyUnicodeObject *) PyUnicode_FromObject(container);
|
||||
if (!u) {
|
||||
Py_DECREF(v);
|
||||
str = PyUnicode_FromObject(container);
|
||||
if (!str) {
|
||||
Py_DECREF(sub);
|
||||
return -1;
|
||||
}
|
||||
|
||||
size = PyUnicode_GET_SIZE(v);
|
||||
if (!size) {
|
||||
pos = 0;
|
||||
goto done;
|
||||
}
|
||||
result = stringlib_contains_obj(str, sub);
|
||||
|
||||
pos = fastsearch(
|
||||
PyUnicode_AS_UNICODE(u), PyUnicode_GET_SIZE(u),
|
||||
PyUnicode_AS_UNICODE(v), size, FAST_SEARCH
|
||||
);
|
||||
Py_DECREF(str);
|
||||
Py_DECREF(sub);
|
||||
|
||||
done:
|
||||
Py_DECREF(u);
|
||||
Py_DECREF(v);
|
||||
return (pos != -1);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Concat to string or Unicode object giving a new Unicode object. */
|
||||
|
@ -5305,23 +5263,26 @@ Return -1 on failure.");
|
|||
static PyObject *
|
||||
unicode_find(PyUnicodeObject *self, PyObject *args)
|
||||
{
|
||||
PyUnicodeObject *substring;
|
||||
PyObject *substring;
|
||||
Py_ssize_t start = 0;
|
||||
Py_ssize_t end = PY_SSIZE_T_MAX;
|
||||
PyObject *result;
|
||||
Py_ssize_t result;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "O|O&O&:find", &substring,
|
||||
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
|
||||
return NULL;
|
||||
substring = (PyUnicodeObject *)PyUnicode_FromObject(
|
||||
(PyObject *)substring);
|
||||
if (substring == NULL)
|
||||
|
||||
substring = PyUnicode_FromObject(substring);
|
||||
if (!substring)
|
||||
return NULL;
|
||||
|
||||
result = PyInt_FromSsize_t(findstring(self, substring, start, end, 1));
|
||||
FIX_START_END(self);
|
||||
|
||||
result = stringlib_find_obj((PyObject*) self, substring, start, end);
|
||||
|
||||
Py_DECREF(substring);
|
||||
return result;
|
||||
|
||||
return PyInt_FromSsize_t(result);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
@ -5371,7 +5332,7 @@ static PyObject *
|
|||
unicode_index(PyUnicodeObject *self, PyObject *args)
|
||||
{
|
||||
Py_ssize_t result;
|
||||
PyUnicodeObject *substring;
|
||||
PyObject *substring;
|
||||
Py_ssize_t start = 0;
|
||||
Py_ssize_t end = PY_SSIZE_T_MAX;
|
||||
|
||||
|
@ -5379,18 +5340,21 @@ unicode_index(PyUnicodeObject *self, PyObject *args)
|
|||
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
|
||||
return NULL;
|
||||
|
||||
substring = (PyUnicodeObject *)PyUnicode_FromObject(
|
||||
(PyObject *)substring);
|
||||
if (substring == NULL)
|
||||
substring = PyUnicode_FromObject(substring);
|
||||
if (!substring)
|
||||
return NULL;
|
||||
|
||||
result = findstring(self, substring, start, end, 1);
|
||||
FIX_START_END(self);
|
||||
|
||||
result = stringlib_find_obj((PyObject*) self, substring, start, end);
|
||||
|
||||
Py_DECREF(substring);
|
||||
|
||||
if (result < 0) {
|
||||
PyErr_SetString(PyExc_ValueError, "substring not found");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return PyInt_FromSsize_t(result);
|
||||
}
|
||||
|
||||
|
@ -6038,23 +6002,25 @@ Return -1 on failure.");
|
|||
static PyObject *
|
||||
unicode_rfind(PyUnicodeObject *self, PyObject *args)
|
||||
{
|
||||
PyUnicodeObject *substring;
|
||||
PyObject *substring;
|
||||
Py_ssize_t start = 0;
|
||||
Py_ssize_t end = PY_SSIZE_T_MAX;
|
||||
PyObject *result;
|
||||
Py_ssize_t result;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "O|O&O&:rfind", &substring,
|
||||
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
|
||||
return NULL;
|
||||
substring = (PyUnicodeObject *)PyUnicode_FromObject(
|
||||
(PyObject *)substring);
|
||||
if (substring == NULL)
|
||||
substring = PyUnicode_FromObject(substring);
|
||||
if (!substring)
|
||||
return NULL;
|
||||
|
||||
result = PyInt_FromSsize_t(findstring(self, substring, start, end, -1));
|
||||
FIX_START_END(self);
|
||||
|
||||
result = stringlib_rfind_obj((PyObject*)self, substring, start, end);
|
||||
|
||||
Py_DECREF(substring);
|
||||
return result;
|
||||
|
||||
return PyInt_FromSsize_t(result);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(rindex__doc__,
|
||||
|
@ -6065,22 +6031,24 @@ Like S.rfind() but raise ValueError when the substring is not found.");
|
|||
static PyObject *
|
||||
unicode_rindex(PyUnicodeObject *self, PyObject *args)
|
||||
{
|
||||
Py_ssize_t result;
|
||||
PyUnicodeObject *substring;
|
||||
PyObject *substring;
|
||||
Py_ssize_t start = 0;
|
||||
Py_ssize_t end = PY_SSIZE_T_MAX;
|
||||
Py_ssize_t result;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "O|O&O&:rindex", &substring,
|
||||
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
|
||||
return NULL;
|
||||
substring = (PyUnicodeObject *)PyUnicode_FromObject(
|
||||
(PyObject *)substring);
|
||||
if (substring == NULL)
|
||||
substring = PyUnicode_FromObject(substring);
|
||||
if (!substring)
|
||||
return NULL;
|
||||
|
||||
result = findstring(self, substring, start, end, -1);
|
||||
FIX_START_END(self);
|
||||
|
||||
result = stringlib_rfind_obj((PyObject*)self, substring, start, end);
|
||||
|
||||
Py_DECREF(substring);
|
||||
|
||||
if (result < 0) {
|
||||
PyErr_SetString(PyExc_ValueError, "substring not found");
|
||||
return NULL;
|
||||
|
|
Loading…
Reference in New Issue