needforspeed: more stringlib refactoring

This commit is contained in:
Fredrik Lundh 2006-05-27 10:05:10 +00:00
parent d532ba0746
commit 2d23d5bf2e
4 changed files with 161 additions and 147 deletions

View File

@ -3,3 +3,32 @@ possibly other modules, in a not too distant future).
the stuff in here is included into relevant places; see the individual the stuff in here is included into relevant places; see the individual
source files for details. source files for details.
--------------------------------------------------------------------
the following defines used by the different modules:
STRINGLIB_CHAR
the type used to hold a character (char or Py_UNICODE)
STRINGLIB_EMPTY
a PyObject representing the empty string
int STRINGLIB_CMP(STRINGLIB_CHAR*, STRINGLIB_CHAR*, Py_ssize_t)
compares two strings. returns 0 if they match, and non-zero if not.
Py_ssize_t STRINGLIB_LEN(PyObject*)
returns the length of the given string object (which must be of the
right type)
PyObject* STRINGLIB_NEW(STRINGLIB_CHAR*, Py_ssize_t)
creates a new string object
STRINGLIB_CHAR* STRINGLIB_STR(PyObject*)
returns the pointer to the character data for the given string
object (which must be of the right type)

View File

@ -48,6 +48,39 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
return pos; return pos;
} }
#ifdef STRINGLIB_STR
Py_LOCAL(Py_ssize_t)
stringlib_find_obj(PyObject* str, PyObject* sub,
Py_ssize_t start, Py_ssize_t end)
{
return stringlib_find(
STRINGLIB_STR(str) + start, end - start,
STRINGLIB_STR(sub), STRINGLIB_LEN(sub), start
);
}
Py_LOCAL(int)
stringlib_contains_obj(PyObject* str, PyObject* sub)
{
return stringlib_find(
STRINGLIB_STR(str), STRINGLIB_LEN(str),
STRINGLIB_STR(sub), STRINGLIB_LEN(sub), 0
) != -1;
}
Py_LOCAL(Py_ssize_t)
stringlib_rfind_obj(PyObject* str, PyObject* sub,
Py_ssize_t start, Py_ssize_t end)
{
return stringlib_rfind(
STRINGLIB_STR(str) + start, end - start,
STRINGLIB_STR(sub), STRINGLIB_LEN(sub), start
);
}
#endif
#endif #endif
/* /*

View File

@ -690,6 +690,9 @@ PyObject *PyString_DecodeEscape(const char *s,
return NULL; return NULL;
} }
/* -------------------------------------------------------------------- */
/* object api */
static Py_ssize_t static Py_ssize_t
string_getsize(register PyObject *op) string_getsize(register PyObject *op)
{ {
@ -765,22 +768,23 @@ PyString_AsStringAndSize(register PyObject *obj,
} }
/* -------------------------------------------------------------------- */ /* -------------------------------------------------------------------- */
/* stringlib components */ /* Methods */
#define STRINGLIB_CHAR char #define STRINGLIB_CHAR char
#define STRINGLIB_NEW PyString_FromStringAndSize
#define STRINGLIB_CMP memcmp #define STRINGLIB_CMP memcmp
#define STRINGLIB_LEN PyString_GET_SIZE
#define STRINGLIB_NEW PyString_FromStringAndSize
#define STRINGLIB_STR PyString_AS_STRING
#define STRINGLIB_EMPTY nullstring #define STRINGLIB_EMPTY nullstring
#include "stringlib/fastsearch.h" #include "stringlib/fastsearch.h"
#include "stringlib/count.h"
#include "stringlib/find.h" #include "stringlib/find.h"
#include "stringlib/partition.h" #include "stringlib/partition.h"
/* -------------------------------------------------------------------- */
/* Methods */
static int static int
string_print(PyStringObject *op, FILE *fp, int flags) string_print(PyStringObject *op, FILE *fp, int flags)
@ -1048,49 +1052,36 @@ string_slice(register PyStringObject *a, register Py_ssize_t i,
} }
static int static int
string_contains(PyObject *a, PyObject *el) string_contains(PyObject *str_obj, PyObject *sub_obj)
{ {
char *s = PyString_AS_STRING(a); if (!PyString_CheckExact(sub_obj)) {
const char *sub = PyString_AS_STRING(el);
Py_ssize_t len_sub = PyString_GET_SIZE(el);
Py_ssize_t pos;
if (!PyString_CheckExact(el)) {
#ifdef Py_USING_UNICODE #ifdef Py_USING_UNICODE
if (PyUnicode_Check(el)) if (PyUnicode_Check(sub_obj))
return PyUnicode_Contains(a, el); return PyUnicode_Contains(str_obj, sub_obj);
#endif #endif
if (!PyString_Check(el)) { if (!PyString_Check(sub_obj)) {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"'in <string>' requires string as left operand"); "'in <string>' requires string as left operand");
return -1; return -1;
} }
} }
if (len_sub == 0) return stringlib_contains_obj(str_obj, sub_obj);
return 1;
pos = fastsearch(
s, PyString_GET_SIZE(a),
sub, len_sub, FAST_SEARCH
);
return (pos != -1);
} }
static PyObject * static PyObject *
string_item(PyStringObject *a, register Py_ssize_t i) string_item(PyStringObject *a, register Py_ssize_t i)
{ {
char pchar;
PyObject *v; PyObject *v;
char *pchar;
if (i < 0 || i >= a->ob_size) { if (i < 0 || i >= a->ob_size) {
PyErr_SetString(PyExc_IndexError, "string index out of range"); PyErr_SetString(PyExc_IndexError, "string index out of range");
return NULL; return NULL;
} }
pchar = a->ob_sval + i; pchar = a->ob_sval[i];
v = (PyObject *)characters[*pchar & UCHAR_MAX]; v = (PyObject *)characters[pchar & UCHAR_MAX];
if (v == NULL) if (v == NULL)
v = PyString_FromStringAndSize(pchar, 1); v = PyString_FromStringAndSize(&pchar, 1);
else { else {
#ifdef COUNT_ALLOCS #ifdef COUNT_ALLOCS
one_strings++; one_strings++;
@ -1166,9 +1157,8 @@ string_richcompare(PyStringObject *a, PyStringObject *b, int op)
int int
_PyString_Eq(PyObject *o1, PyObject *o2) _PyString_Eq(PyObject *o1, PyObject *o2)
{ {
PyStringObject *a, *b; PyStringObject *a = (PyStringObject*) o1;
a = (PyStringObject*)o1; PyStringObject *b = (PyStringObject*) o2;
b = (PyStringObject*)o2;
return a->ob_size == b->ob_size return a->ob_size == b->ob_size
&& *a->ob_sval == *b->ob_sval && *a->ob_sval == *b->ob_sval
&& memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0; && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
@ -2264,43 +2254,37 @@ as in slice notation.");
static PyObject * static PyObject *
string_count(PyStringObject *self, PyObject *args) string_count(PyStringObject *self, PyObject *args)
{ {
const char *s = PyString_AS_STRING(self), *sub; PyObject *sub_obj;
Py_ssize_t len = PyString_GET_SIZE(self), n; const char *str = PyString_AS_STRING(self), *sub;
Py_ssize_t i = 0, last = PY_SSIZE_T_MAX; Py_ssize_t sub_len;
Py_ssize_t m, r; Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
PyObject *subobj;
if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj, if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
_PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last)) _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL; return NULL;
if (PyString_Check(subobj)) { if (PyString_Check(sub_obj)) {
sub = PyString_AS_STRING(subobj); sub = PyString_AS_STRING(sub_obj);
n = PyString_GET_SIZE(subobj); sub_len = PyString_GET_SIZE(sub_obj);
} }
#ifdef Py_USING_UNICODE #ifdef Py_USING_UNICODE
else if (PyUnicode_Check(subobj)) { else if (PyUnicode_Check(sub_obj)) {
Py_ssize_t count; Py_ssize_t count;
count = PyUnicode_Count((PyObject *)self, subobj, i, last); count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
if (count == -1) if (count == -1)
return NULL; return NULL;
else else
return PyInt_FromLong((long) count); return PyInt_FromSsize_t(count);
} }
#endif #endif
else if (PyObject_AsCharBuffer(subobj, &sub, &n)) else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
return NULL; return NULL;
string_adjust_indices(&i, &last, len); string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
m = last + 1 - n; return PyInt_FromSsize_t(
if (n == 0) stringlib_count(str + start, end - start, sub, sub_len)
return PyInt_FromSsize_t(m-i); );
r = fastsearch(s + i, last - i, sub, n, FAST_COUNT);
if (r < 0)
r = 0; /* no match */
return PyInt_FromSsize_t(r);
} }
PyDoc_STRVAR(swapcase__doc__, PyDoc_STRVAR(swapcase__doc__,
@ -2477,7 +2461,7 @@ return_self(PyStringObject *self)
} }
Py_LOCAL(Py_ssize_t) Py_LOCAL(Py_ssize_t)
countchar(char *target, int target_len, char c, Py_ssize_t maxcount) countchar(char *target, int target_len, char c, Py_ssize_t maxcount)
{ {
Py_ssize_t count=0; Py_ssize_t count=0;
char *start=target; char *start=target;
@ -2580,7 +2564,7 @@ countstring(char *target, Py_ssize_t target_len,
} }
/* Algorithms for difference cases of string replacement */ /* Algorithms for different cases of string replacement */
/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */ /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Py_LOCAL(PyStringObject *) Py_LOCAL(PyStringObject *)

View File

@ -3857,7 +3857,9 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
#define STRINGLIB_CHAR Py_UNICODE #define STRINGLIB_CHAR Py_UNICODE
#define STRINGLIB_LEN PyUnicode_GET_SIZE
#define STRINGLIB_NEW PyUnicode_FromUnicode #define STRINGLIB_NEW PyUnicode_FromUnicode
#define STRINGLIB_STR PyUnicode_AS_UNICODE
Py_LOCAL(int) Py_LOCAL(int)
STRINGLIB_CMP(const Py_UNICODE* str, const Py_UNICODE* other, Py_ssize_t len) STRINGLIB_CMP(const Py_UNICODE* str, const Py_UNICODE* other, Py_ssize_t len)
@ -3918,67 +3920,33 @@ Py_ssize_t PyUnicode_Count(PyObject *str,
return result; return result;
} }
static Py_ssize_t findstring(PyUnicodeObject *self,
PyUnicodeObject *substring,
Py_ssize_t start,
Py_ssize_t end,
int direction)
{
FIX_START_END(self);
if (substring->length == 0)
return (direction > 0) ? start : end;
if (direction > 0) {
Py_ssize_t pos = fastsearch(
PyUnicode_AS_UNICODE(self) + start, end - start,
substring->str, substring->length, FAST_SEARCH
);
if (pos >= 0)
return pos + start;
} else {
end -= substring->length;
for (; end >= start; end--)
if (Py_UNICODE_MATCH(self, end, substring))
return end;
}
return -1;
}
Py_ssize_t PyUnicode_Find(PyObject *str, Py_ssize_t PyUnicode_Find(PyObject *str,
PyObject *substr, PyObject *sub,
Py_ssize_t start, Py_ssize_t start,
Py_ssize_t end, Py_ssize_t end,
int direction) int direction)
{ {
Py_ssize_t result; Py_ssize_t result;
PyUnicodeObject* str_obj;
PyUnicodeObject* sub_obj;
str_obj = (PyUnicodeObject*) PyUnicode_FromObject(str); str = PyUnicode_FromObject(str);
if (!str_obj) if (!str)
return -2; return -2;
sub_obj = (PyUnicodeObject*) PyUnicode_FromObject(substr); sub = PyUnicode_FromObject(sub);
if (!sub_obj) { if (!sub) {
Py_DECREF(str_obj); Py_DECREF(str);
return -2; return -2;
} }
FIX_START_END(str_obj); FIX_START_END((PyUnicodeObject*) str);
if (direction > 0) if (direction > 0)
result = stringlib_find( result = stringlib_find_obj(str, sub, start, end);
str_obj->str + start, end - start, sub_obj->str, sub_obj->length,
start
);
else else
result = stringlib_rfind( result = stringlib_rfind_obj(str, sub, start, end);
str_obj->str + start, end - start, sub_obj->str, sub_obj->length,
start Py_DECREF(str);
); Py_DECREF(sub);
Py_DECREF(str_obj);
Py_DECREF(sub_obj);
return result; return result;
} }
@ -5046,39 +5014,29 @@ onError:
int PyUnicode_Contains(PyObject *container, int PyUnicode_Contains(PyObject *container,
PyObject *element) PyObject *element)
{ {
PyUnicodeObject *u, *v; PyObject *str, *sub;
Py_ssize_t size; int result;
Py_ssize_t pos;
/* Coerce the two arguments */ /* Coerce the two arguments */
v = (PyUnicodeObject *) PyUnicode_FromObject(element); sub = PyUnicode_FromObject(element);
if (!v) { if (!sub) {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"'in <string>' requires string as left operand"); "'in <string>' requires string as left operand");
return -1; return -1;
} }
u = (PyUnicodeObject *) PyUnicode_FromObject(container); str = PyUnicode_FromObject(container);
if (!u) { if (!str) {
Py_DECREF(v); Py_DECREF(sub);
return -1; return -1;
} }
size = PyUnicode_GET_SIZE(v); result = stringlib_contains_obj(str, sub);
if (!size) {
pos = 0;
goto done;
}
pos = fastsearch( Py_DECREF(str);
PyUnicode_AS_UNICODE(u), PyUnicode_GET_SIZE(u), Py_DECREF(sub);
PyUnicode_AS_UNICODE(v), size, FAST_SEARCH
);
done: return result;
Py_DECREF(u);
Py_DECREF(v);
return (pos != -1);
} }
/* Concat to string or Unicode object giving a new Unicode object. */ /* Concat to string or Unicode object giving a new Unicode object. */
@ -5305,23 +5263,26 @@ Return -1 on failure.");
static PyObject * static PyObject *
unicode_find(PyUnicodeObject *self, PyObject *args) unicode_find(PyUnicodeObject *self, PyObject *args)
{ {
PyUnicodeObject *substring; PyObject *substring;
Py_ssize_t start = 0; Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX; Py_ssize_t end = PY_SSIZE_T_MAX;
PyObject *result; Py_ssize_t result;
if (!PyArg_ParseTuple(args, "O|O&O&:find", &substring, if (!PyArg_ParseTuple(args, "O|O&O&:find", &substring,
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL; return NULL;
substring = (PyUnicodeObject *)PyUnicode_FromObject(
(PyObject *)substring); substring = PyUnicode_FromObject(substring);
if (substring == NULL) if (!substring)
return NULL; return NULL;
result = PyInt_FromSsize_t(findstring(self, substring, start, end, 1)); FIX_START_END(self);
result = stringlib_find_obj((PyObject*) self, substring, start, end);
Py_DECREF(substring); Py_DECREF(substring);
return result;
return PyInt_FromSsize_t(result);
} }
static PyObject * static PyObject *
@ -5371,7 +5332,7 @@ static PyObject *
unicode_index(PyUnicodeObject *self, PyObject *args) unicode_index(PyUnicodeObject *self, PyObject *args)
{ {
Py_ssize_t result; Py_ssize_t result;
PyUnicodeObject *substring; PyObject *substring;
Py_ssize_t start = 0; Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX; Py_ssize_t end = PY_SSIZE_T_MAX;
@ -5379,18 +5340,21 @@ unicode_index(PyUnicodeObject *self, PyObject *args)
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL; return NULL;
substring = (PyUnicodeObject *)PyUnicode_FromObject( substring = PyUnicode_FromObject(substring);
(PyObject *)substring); if (!substring)
if (substring == NULL)
return NULL; return NULL;
result = findstring(self, substring, start, end, 1); FIX_START_END(self);
result = stringlib_find_obj((PyObject*) self, substring, start, end);
Py_DECREF(substring); Py_DECREF(substring);
if (result < 0) { if (result < 0) {
PyErr_SetString(PyExc_ValueError, "substring not found"); PyErr_SetString(PyExc_ValueError, "substring not found");
return NULL; return NULL;
} }
return PyInt_FromSsize_t(result); return PyInt_FromSsize_t(result);
} }
@ -6038,23 +6002,25 @@ Return -1 on failure.");
static PyObject * static PyObject *
unicode_rfind(PyUnicodeObject *self, PyObject *args) unicode_rfind(PyUnicodeObject *self, PyObject *args)
{ {
PyUnicodeObject *substring; PyObject *substring;
Py_ssize_t start = 0; Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX; Py_ssize_t end = PY_SSIZE_T_MAX;
PyObject *result; Py_ssize_t result;
if (!PyArg_ParseTuple(args, "O|O&O&:rfind", &substring, if (!PyArg_ParseTuple(args, "O|O&O&:rfind", &substring,
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL; return NULL;
substring = (PyUnicodeObject *)PyUnicode_FromObject( substring = PyUnicode_FromObject(substring);
(PyObject *)substring); if (!substring)
if (substring == NULL)
return NULL; return NULL;
result = PyInt_FromSsize_t(findstring(self, substring, start, end, -1)); FIX_START_END(self);
result = stringlib_rfind_obj((PyObject*)self, substring, start, end);
Py_DECREF(substring); Py_DECREF(substring);
return result;
return PyInt_FromSsize_t(result);
} }
PyDoc_STRVAR(rindex__doc__, PyDoc_STRVAR(rindex__doc__,
@ -6065,22 +6031,24 @@ Like S.rfind() but raise ValueError when the substring is not found.");
static PyObject * static PyObject *
unicode_rindex(PyUnicodeObject *self, PyObject *args) unicode_rindex(PyUnicodeObject *self, PyObject *args)
{ {
Py_ssize_t result; PyObject *substring;
PyUnicodeObject *substring;
Py_ssize_t start = 0; Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX; Py_ssize_t end = PY_SSIZE_T_MAX;
Py_ssize_t result;
if (!PyArg_ParseTuple(args, "O|O&O&:rindex", &substring, if (!PyArg_ParseTuple(args, "O|O&O&:rindex", &substring,
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL; return NULL;
substring = (PyUnicodeObject *)PyUnicode_FromObject( substring = PyUnicode_FromObject(substring);
(PyObject *)substring); if (!substring)
if (substring == NULL)
return NULL; return NULL;
result = findstring(self, substring, start, end, -1); FIX_START_END(self);
result = stringlib_rfind_obj((PyObject*)self, substring, start, end);
Py_DECREF(substring); Py_DECREF(substring);
if (result < 0) { if (result < 0) {
PyErr_SetString(PyExc_ValueError, "substring not found"); PyErr_SetString(PyExc_ValueError, "substring not found");
return NULL; return NULL;