needforspeed: more stringlib refactoring

This commit is contained in:
Fredrik Lundh 2006-05-27 10:05:10 +00:00
parent d532ba0746
commit 2d23d5bf2e
4 changed files with 161 additions and 147 deletions

View File

@ -3,3 +3,32 @@ possibly other modules, in a not too distant future).
the stuff in here is included into relevant places; see the individual
source files for details.
--------------------------------------------------------------------
the following defines used by the different modules:
STRINGLIB_CHAR
the type used to hold a character (char or Py_UNICODE)
STRINGLIB_EMPTY
a PyObject representing the empty string
int STRINGLIB_CMP(STRINGLIB_CHAR*, STRINGLIB_CHAR*, Py_ssize_t)
compares two strings. returns 0 if they match, and non-zero if not.
Py_ssize_t STRINGLIB_LEN(PyObject*)
returns the length of the given string object (which must be of the
right type)
PyObject* STRINGLIB_NEW(STRINGLIB_CHAR*, Py_ssize_t)
creates a new string object
STRINGLIB_CHAR* STRINGLIB_STR(PyObject*)
returns the pointer to the character data for the given string
object (which must be of the right type)

View File

@ -48,6 +48,39 @@ stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
return pos;
}
#ifdef STRINGLIB_STR
Py_LOCAL(Py_ssize_t)
stringlib_find_obj(PyObject* str, PyObject* sub,
Py_ssize_t start, Py_ssize_t end)
{
return stringlib_find(
STRINGLIB_STR(str) + start, end - start,
STRINGLIB_STR(sub), STRINGLIB_LEN(sub), start
);
}
Py_LOCAL(int)
stringlib_contains_obj(PyObject* str, PyObject* sub)
{
return stringlib_find(
STRINGLIB_STR(str), STRINGLIB_LEN(str),
STRINGLIB_STR(sub), STRINGLIB_LEN(sub), 0
) != -1;
}
Py_LOCAL(Py_ssize_t)
stringlib_rfind_obj(PyObject* str, PyObject* sub,
Py_ssize_t start, Py_ssize_t end)
{
return stringlib_rfind(
STRINGLIB_STR(str) + start, end - start,
STRINGLIB_STR(sub), STRINGLIB_LEN(sub), start
);
}
#endif
#endif
/*

View File

@ -690,6 +690,9 @@ PyObject *PyString_DecodeEscape(const char *s,
return NULL;
}
/* -------------------------------------------------------------------- */
/* object api */
static Py_ssize_t
string_getsize(register PyObject *op)
{
@ -765,22 +768,23 @@ PyString_AsStringAndSize(register PyObject *obj,
}
/* -------------------------------------------------------------------- */
/* stringlib components */
/* Methods */
#define STRINGLIB_CHAR char
#define STRINGLIB_NEW PyString_FromStringAndSize
#define STRINGLIB_CMP memcmp
#define STRINGLIB_LEN PyString_GET_SIZE
#define STRINGLIB_NEW PyString_FromStringAndSize
#define STRINGLIB_STR PyString_AS_STRING
#define STRINGLIB_EMPTY nullstring
#include "stringlib/fastsearch.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/partition.h"
/* -------------------------------------------------------------------- */
/* Methods */
static int
string_print(PyStringObject *op, FILE *fp, int flags)
@ -1048,49 +1052,36 @@ string_slice(register PyStringObject *a, register Py_ssize_t i,
}
static int
string_contains(PyObject *a, PyObject *el)
string_contains(PyObject *str_obj, PyObject *sub_obj)
{
char *s = PyString_AS_STRING(a);
const char *sub = PyString_AS_STRING(el);
Py_ssize_t len_sub = PyString_GET_SIZE(el);
Py_ssize_t pos;
if (!PyString_CheckExact(el)) {
if (!PyString_CheckExact(sub_obj)) {
#ifdef Py_USING_UNICODE
if (PyUnicode_Check(el))
return PyUnicode_Contains(a, el);
if (PyUnicode_Check(sub_obj))
return PyUnicode_Contains(str_obj, sub_obj);
#endif
if (!PyString_Check(el)) {
if (!PyString_Check(sub_obj)) {
PyErr_SetString(PyExc_TypeError,
"'in <string>' requires string as left operand");
return -1;
}
}
if (len_sub == 0)
return 1;
pos = fastsearch(
s, PyString_GET_SIZE(a),
sub, len_sub, FAST_SEARCH
);
return (pos != -1);
return stringlib_contains_obj(str_obj, sub_obj);
}
static PyObject *
string_item(PyStringObject *a, register Py_ssize_t i)
{
char pchar;
PyObject *v;
char *pchar;
if (i < 0 || i >= a->ob_size) {
PyErr_SetString(PyExc_IndexError, "string index out of range");
return NULL;
}
pchar = a->ob_sval + i;
v = (PyObject *)characters[*pchar & UCHAR_MAX];
pchar = a->ob_sval[i];
v = (PyObject *)characters[pchar & UCHAR_MAX];
if (v == NULL)
v = PyString_FromStringAndSize(pchar, 1);
v = PyString_FromStringAndSize(&pchar, 1);
else {
#ifdef COUNT_ALLOCS
one_strings++;
@ -1166,9 +1157,8 @@ string_richcompare(PyStringObject *a, PyStringObject *b, int op)
int
_PyString_Eq(PyObject *o1, PyObject *o2)
{
PyStringObject *a, *b;
a = (PyStringObject*)o1;
b = (PyStringObject*)o2;
PyStringObject *a = (PyStringObject*) o1;
PyStringObject *b = (PyStringObject*) o2;
return a->ob_size == b->ob_size
&& *a->ob_sval == *b->ob_sval
&& memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
@ -2264,43 +2254,37 @@ as in slice notation.");
static PyObject *
string_count(PyStringObject *self, PyObject *args)
{
const char *s = PyString_AS_STRING(self), *sub;
Py_ssize_t len = PyString_GET_SIZE(self), n;
Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
Py_ssize_t m, r;
PyObject *subobj;
PyObject *sub_obj;
const char *str = PyString_AS_STRING(self), *sub;
Py_ssize_t sub_len;
Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
_PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL;
if (PyString_Check(subobj)) {
sub = PyString_AS_STRING(subobj);
n = PyString_GET_SIZE(subobj);
if (PyString_Check(sub_obj)) {
sub = PyString_AS_STRING(sub_obj);
sub_len = PyString_GET_SIZE(sub_obj);
}
#ifdef Py_USING_UNICODE
else if (PyUnicode_Check(subobj)) {
else if (PyUnicode_Check(sub_obj)) {
Py_ssize_t count;
count = PyUnicode_Count((PyObject *)self, subobj, i, last);
count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
if (count == -1)
return NULL;
else
return PyInt_FromLong((long) count);
return PyInt_FromSsize_t(count);
}
#endif
else if (PyObject_AsCharBuffer(subobj, &sub, &n))
else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
return NULL;
string_adjust_indices(&i, &last, len);
string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
m = last + 1 - n;
if (n == 0)
return PyInt_FromSsize_t(m-i);
r = fastsearch(s + i, last - i, sub, n, FAST_COUNT);
if (r < 0)
r = 0; /* no match */
return PyInt_FromSsize_t(r);
return PyInt_FromSsize_t(
stringlib_count(str + start, end - start, sub, sub_len)
);
}
PyDoc_STRVAR(swapcase__doc__,
@ -2477,7 +2461,7 @@ return_self(PyStringObject *self)
}
Py_LOCAL(Py_ssize_t)
countchar(char *target, int target_len, char c, Py_ssize_t maxcount)
countchar(char *target, int target_len, char c, Py_ssize_t maxcount)
{
Py_ssize_t count=0;
char *start=target;
@ -2580,7 +2564,7 @@ countstring(char *target, Py_ssize_t target_len,
}
/* Algorithms for difference cases of string replacement */
/* Algorithms for different cases of string replacement */
/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Py_LOCAL(PyStringObject *)

View File

@ -3857,7 +3857,9 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
#define STRINGLIB_CHAR Py_UNICODE
#define STRINGLIB_LEN PyUnicode_GET_SIZE
#define STRINGLIB_NEW PyUnicode_FromUnicode
#define STRINGLIB_STR PyUnicode_AS_UNICODE
Py_LOCAL(int)
STRINGLIB_CMP(const Py_UNICODE* str, const Py_UNICODE* other, Py_ssize_t len)
@ -3918,67 +3920,33 @@ Py_ssize_t PyUnicode_Count(PyObject *str,
return result;
}
static Py_ssize_t findstring(PyUnicodeObject *self,
PyUnicodeObject *substring,
Py_ssize_t start,
Py_ssize_t end,
int direction)
{
FIX_START_END(self);
if (substring->length == 0)
return (direction > 0) ? start : end;
if (direction > 0) {
Py_ssize_t pos = fastsearch(
PyUnicode_AS_UNICODE(self) + start, end - start,
substring->str, substring->length, FAST_SEARCH
);
if (pos >= 0)
return pos + start;
} else {
end -= substring->length;
for (; end >= start; end--)
if (Py_UNICODE_MATCH(self, end, substring))
return end;
}
return -1;
}
Py_ssize_t PyUnicode_Find(PyObject *str,
PyObject *substr,
PyObject *sub,
Py_ssize_t start,
Py_ssize_t end,
int direction)
{
Py_ssize_t result;
PyUnicodeObject* str_obj;
PyUnicodeObject* sub_obj;
str_obj = (PyUnicodeObject*) PyUnicode_FromObject(str);
if (!str_obj)
str = PyUnicode_FromObject(str);
if (!str)
return -2;
sub_obj = (PyUnicodeObject*) PyUnicode_FromObject(substr);
if (!sub_obj) {
Py_DECREF(str_obj);
sub = PyUnicode_FromObject(sub);
if (!sub) {
Py_DECREF(str);
return -2;
}
FIX_START_END(str_obj);
FIX_START_END((PyUnicodeObject*) str);
if (direction > 0)
result = stringlib_find(
str_obj->str + start, end - start, sub_obj->str, sub_obj->length,
start
);
result = stringlib_find_obj(str, sub, start, end);
else
result = stringlib_rfind(
str_obj->str + start, end - start, sub_obj->str, sub_obj->length,
start
);
result = stringlib_rfind_obj(str, sub, start, end);
Py_DECREF(str);
Py_DECREF(sub);
Py_DECREF(str_obj);
Py_DECREF(sub_obj);
return result;
}
@ -5046,39 +5014,29 @@ onError:
int PyUnicode_Contains(PyObject *container,
PyObject *element)
{
PyUnicodeObject *u, *v;
Py_ssize_t size;
Py_ssize_t pos;
PyObject *str, *sub;
int result;
/* Coerce the two arguments */
v = (PyUnicodeObject *) PyUnicode_FromObject(element);
if (!v) {
sub = PyUnicode_FromObject(element);
if (!sub) {
PyErr_SetString(PyExc_TypeError,
"'in <string>' requires string as left operand");
return -1;
}
u = (PyUnicodeObject *) PyUnicode_FromObject(container);
if (!u) {
Py_DECREF(v);
str = PyUnicode_FromObject(container);
if (!str) {
Py_DECREF(sub);
return -1;
}
size = PyUnicode_GET_SIZE(v);
if (!size) {
pos = 0;
goto done;
}
result = stringlib_contains_obj(str, sub);
pos = fastsearch(
PyUnicode_AS_UNICODE(u), PyUnicode_GET_SIZE(u),
PyUnicode_AS_UNICODE(v), size, FAST_SEARCH
);
Py_DECREF(str);
Py_DECREF(sub);
done:
Py_DECREF(u);
Py_DECREF(v);
return (pos != -1);
return result;
}
/* Concat to string or Unicode object giving a new Unicode object. */
@ -5305,23 +5263,26 @@ Return -1 on failure.");
static PyObject *
unicode_find(PyUnicodeObject *self, PyObject *args)
{
PyUnicodeObject *substring;
PyObject *substring;
Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX;
PyObject *result;
Py_ssize_t result;
if (!PyArg_ParseTuple(args, "O|O&O&:find", &substring,
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL;
substring = (PyUnicodeObject *)PyUnicode_FromObject(
(PyObject *)substring);
if (substring == NULL)
substring = PyUnicode_FromObject(substring);
if (!substring)
return NULL;
result = PyInt_FromSsize_t(findstring(self, substring, start, end, 1));
FIX_START_END(self);
result = stringlib_find_obj((PyObject*) self, substring, start, end);
Py_DECREF(substring);
return result;
return PyInt_FromSsize_t(result);
}
static PyObject *
@ -5371,7 +5332,7 @@ static PyObject *
unicode_index(PyUnicodeObject *self, PyObject *args)
{
Py_ssize_t result;
PyUnicodeObject *substring;
PyObject *substring;
Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX;
@ -5379,18 +5340,21 @@ unicode_index(PyUnicodeObject *self, PyObject *args)
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL;
substring = (PyUnicodeObject *)PyUnicode_FromObject(
(PyObject *)substring);
if (substring == NULL)
substring = PyUnicode_FromObject(substring);
if (!substring)
return NULL;
result = findstring(self, substring, start, end, 1);
FIX_START_END(self);
result = stringlib_find_obj((PyObject*) self, substring, start, end);
Py_DECREF(substring);
if (result < 0) {
PyErr_SetString(PyExc_ValueError, "substring not found");
return NULL;
}
return PyInt_FromSsize_t(result);
}
@ -6038,23 +6002,25 @@ Return -1 on failure.");
static PyObject *
unicode_rfind(PyUnicodeObject *self, PyObject *args)
{
PyUnicodeObject *substring;
PyObject *substring;
Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX;
PyObject *result;
Py_ssize_t result;
if (!PyArg_ParseTuple(args, "O|O&O&:rfind", &substring,
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL;
substring = (PyUnicodeObject *)PyUnicode_FromObject(
(PyObject *)substring);
if (substring == NULL)
substring = PyUnicode_FromObject(substring);
if (!substring)
return NULL;
result = PyInt_FromSsize_t(findstring(self, substring, start, end, -1));
FIX_START_END(self);
result = stringlib_rfind_obj((PyObject*)self, substring, start, end);
Py_DECREF(substring);
return result;
return PyInt_FromSsize_t(result);
}
PyDoc_STRVAR(rindex__doc__,
@ -6065,22 +6031,24 @@ Like S.rfind() but raise ValueError when the substring is not found.");
static PyObject *
unicode_rindex(PyUnicodeObject *self, PyObject *args)
{
Py_ssize_t result;
PyUnicodeObject *substring;
PyObject *substring;
Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX;
Py_ssize_t result;
if (!PyArg_ParseTuple(args, "O|O&O&:rindex", &substring,
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL;
substring = (PyUnicodeObject *)PyUnicode_FromObject(
(PyObject *)substring);
if (substring == NULL)
substring = PyUnicode_FromObject(substring);
if (!substring)
return NULL;
result = findstring(self, substring, start, end, -1);
FIX_START_END(self);
result = stringlib_rfind_obj((PyObject*)self, substring, start, end);
Py_DECREF(substring);
if (result < 0) {
PyErr_SetString(PyExc_ValueError, "substring not found");
return NULL;