mirror of https://github.com/python/cpython
gh-117431: Adapt str.find and friends to Argument Clinic (#117468)
This change gives a significant speedup, as the METH_FASTCALL calling convention is now used. The following methods are adapted: - str.count - str.find - str.index - str.rfind - str.rindex
This commit is contained in:
parent
345194de8c
commit
7ecd55d604
|
@ -1503,15 +1503,15 @@ class StringLikeTest(BaseTest):
|
||||||
# issue 11828
|
# issue 11828
|
||||||
s = 'hello'
|
s = 'hello'
|
||||||
x = 'x'
|
x = 'x'
|
||||||
self.assertRaisesRegex(TypeError, r'^find\(', s.find,
|
self.assertRaisesRegex(TypeError, r'^find\b', s.find,
|
||||||
x, None, None, None)
|
x, None, None, None)
|
||||||
self.assertRaisesRegex(TypeError, r'^rfind\(', s.rfind,
|
self.assertRaisesRegex(TypeError, r'^rfind\b', s.rfind,
|
||||||
x, None, None, None)
|
x, None, None, None)
|
||||||
self.assertRaisesRegex(TypeError, r'^index\(', s.index,
|
self.assertRaisesRegex(TypeError, r'^index\b', s.index,
|
||||||
x, None, None, None)
|
x, None, None, None)
|
||||||
self.assertRaisesRegex(TypeError, r'^rindex\(', s.rindex,
|
self.assertRaisesRegex(TypeError, r'^rindex\b', s.rindex,
|
||||||
x, None, None, None)
|
x, None, None, None)
|
||||||
self.assertRaisesRegex(TypeError, r'^count\(', s.count,
|
self.assertRaisesRegex(TypeError, r'^count\b', s.count,
|
||||||
x, None, None, None)
|
x, None, None, None)
|
||||||
self.assertRaisesRegex(TypeError, r'^startswith\b', s.startswith,
|
self.assertRaisesRegex(TypeError, r'^startswith\b', s.startswith,
|
||||||
x, None, None, None)
|
x, None, None, None)
|
||||||
|
|
|
@ -1,2 +1,10 @@
|
||||||
Improve the performance of :meth:`str.startswith` and :meth:`str.endswith`
|
Improve the performance of the following :class:`str` methods
|
||||||
by adapting them to the :c:macro:`METH_FASTCALL` calling convention.
|
by adapting them to the :c:macro:`METH_FASTCALL` calling convention:
|
||||||
|
|
||||||
|
* :meth:`~str.count`
|
||||||
|
* :meth:`~str.endswith`
|
||||||
|
* :meth:`~str.find`
|
||||||
|
* :meth:`~str.index`
|
||||||
|
* :meth:`~str.rfind`
|
||||||
|
* :meth:`~str.rindex`
|
||||||
|
* :meth:`~str.startswith`
|
||||||
|
|
|
@ -136,6 +136,61 @@ exit:
|
||||||
return return_value;
|
return return_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyDoc_STRVAR(unicode_count__doc__,
|
||||||
|
"count($self, sub[, start[, end]], /)\n"
|
||||||
|
"--\n"
|
||||||
|
"\n"
|
||||||
|
"Return the number of non-overlapping occurrences of substring sub in string S[start:end].\n"
|
||||||
|
"\n"
|
||||||
|
"Optional arguments start and end are interpreted as in slice notation.");
|
||||||
|
|
||||||
|
#define UNICODE_COUNT_METHODDEF \
|
||||||
|
{"count", _PyCFunction_CAST(unicode_count), METH_FASTCALL, unicode_count__doc__},
|
||||||
|
|
||||||
|
static Py_ssize_t
|
||||||
|
unicode_count_impl(PyObject *str, PyObject *substr, Py_ssize_t start,
|
||||||
|
Py_ssize_t end);
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
unicode_count(PyObject *str, PyObject *const *args, Py_ssize_t nargs)
|
||||||
|
{
|
||||||
|
PyObject *return_value = NULL;
|
||||||
|
PyObject *substr;
|
||||||
|
Py_ssize_t start = 0;
|
||||||
|
Py_ssize_t end = PY_SSIZE_T_MAX;
|
||||||
|
Py_ssize_t _return_value;
|
||||||
|
|
||||||
|
if (!_PyArg_CheckPositional("count", nargs, 1, 3)) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
if (!PyUnicode_Check(args[0])) {
|
||||||
|
_PyArg_BadArgument("count", "argument 1", "str", args[0]);
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
substr = args[0];
|
||||||
|
if (nargs < 2) {
|
||||||
|
goto skip_optional;
|
||||||
|
}
|
||||||
|
if (!_PyEval_SliceIndex(args[1], &start)) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
if (nargs < 3) {
|
||||||
|
goto skip_optional;
|
||||||
|
}
|
||||||
|
if (!_PyEval_SliceIndex(args[2], &end)) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
skip_optional:
|
||||||
|
_return_value = unicode_count_impl(str, substr, start, end);
|
||||||
|
if ((_return_value == -1) && PyErr_Occurred()) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
return_value = PyLong_FromSsize_t(_return_value);
|
||||||
|
|
||||||
|
exit:
|
||||||
|
return return_value;
|
||||||
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(unicode_encode__doc__,
|
PyDoc_STRVAR(unicode_encode__doc__,
|
||||||
"encode($self, /, encoding=\'utf-8\', errors=\'strict\')\n"
|
"encode($self, /, encoding=\'utf-8\', errors=\'strict\')\n"
|
||||||
"--\n"
|
"--\n"
|
||||||
|
@ -301,6 +356,118 @@ exit:
|
||||||
return return_value;
|
return return_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyDoc_STRVAR(unicode_find__doc__,
|
||||||
|
"find($self, sub, start=None, end=None, /)\n"
|
||||||
|
"--\n"
|
||||||
|
"\n"
|
||||||
|
"Return the lowest index in S where substring sub is found, such that sub is contained within S[start:end].\n"
|
||||||
|
"\n"
|
||||||
|
"Optional arguments start and end are interpreted as in slice notation.\n"
|
||||||
|
"Return -1 on failure.");
|
||||||
|
|
||||||
|
#define UNICODE_FIND_METHODDEF \
|
||||||
|
{"find", _PyCFunction_CAST(unicode_find), METH_FASTCALL, unicode_find__doc__},
|
||||||
|
|
||||||
|
static Py_ssize_t
|
||||||
|
unicode_find_impl(PyObject *str, PyObject *substr, Py_ssize_t start,
|
||||||
|
Py_ssize_t end);
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
unicode_find(PyObject *str, PyObject *const *args, Py_ssize_t nargs)
|
||||||
|
{
|
||||||
|
PyObject *return_value = NULL;
|
||||||
|
PyObject *substr;
|
||||||
|
Py_ssize_t start = 0;
|
||||||
|
Py_ssize_t end = PY_SSIZE_T_MAX;
|
||||||
|
Py_ssize_t _return_value;
|
||||||
|
|
||||||
|
if (!_PyArg_CheckPositional("find", nargs, 1, 3)) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
if (!PyUnicode_Check(args[0])) {
|
||||||
|
_PyArg_BadArgument("find", "argument 1", "str", args[0]);
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
substr = args[0];
|
||||||
|
if (nargs < 2) {
|
||||||
|
goto skip_optional;
|
||||||
|
}
|
||||||
|
if (!_PyEval_SliceIndex(args[1], &start)) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
if (nargs < 3) {
|
||||||
|
goto skip_optional;
|
||||||
|
}
|
||||||
|
if (!_PyEval_SliceIndex(args[2], &end)) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
skip_optional:
|
||||||
|
_return_value = unicode_find_impl(str, substr, start, end);
|
||||||
|
if ((_return_value == -1) && PyErr_Occurred()) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
return_value = PyLong_FromSsize_t(_return_value);
|
||||||
|
|
||||||
|
exit:
|
||||||
|
return return_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyDoc_STRVAR(unicode_index__doc__,
|
||||||
|
"index($self, sub, start=None, end=None, /)\n"
|
||||||
|
"--\n"
|
||||||
|
"\n"
|
||||||
|
"Return the lowest index in S where substring sub is found, such that sub is contained within S[start:end].\n"
|
||||||
|
"\n"
|
||||||
|
"Optional arguments start and end are interpreted as in slice notation.\n"
|
||||||
|
"Raises ValueError when the substring is not found.");
|
||||||
|
|
||||||
|
#define UNICODE_INDEX_METHODDEF \
|
||||||
|
{"index", _PyCFunction_CAST(unicode_index), METH_FASTCALL, unicode_index__doc__},
|
||||||
|
|
||||||
|
static Py_ssize_t
|
||||||
|
unicode_index_impl(PyObject *str, PyObject *substr, Py_ssize_t start,
|
||||||
|
Py_ssize_t end);
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
unicode_index(PyObject *str, PyObject *const *args, Py_ssize_t nargs)
|
||||||
|
{
|
||||||
|
PyObject *return_value = NULL;
|
||||||
|
PyObject *substr;
|
||||||
|
Py_ssize_t start = 0;
|
||||||
|
Py_ssize_t end = PY_SSIZE_T_MAX;
|
||||||
|
Py_ssize_t _return_value;
|
||||||
|
|
||||||
|
if (!_PyArg_CheckPositional("index", nargs, 1, 3)) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
if (!PyUnicode_Check(args[0])) {
|
||||||
|
_PyArg_BadArgument("index", "argument 1", "str", args[0]);
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
substr = args[0];
|
||||||
|
if (nargs < 2) {
|
||||||
|
goto skip_optional;
|
||||||
|
}
|
||||||
|
if (!_PyEval_SliceIndex(args[1], &start)) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
if (nargs < 3) {
|
||||||
|
goto skip_optional;
|
||||||
|
}
|
||||||
|
if (!_PyEval_SliceIndex(args[2], &end)) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
skip_optional:
|
||||||
|
_return_value = unicode_index_impl(str, substr, start, end);
|
||||||
|
if ((_return_value == -1) && PyErr_Occurred()) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
return_value = PyLong_FromSsize_t(_return_value);
|
||||||
|
|
||||||
|
exit:
|
||||||
|
return return_value;
|
||||||
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(unicode_isascii__doc__,
|
PyDoc_STRVAR(unicode_isascii__doc__,
|
||||||
"isascii($self, /)\n"
|
"isascii($self, /)\n"
|
||||||
"--\n"
|
"--\n"
|
||||||
|
@ -892,6 +1059,118 @@ exit:
|
||||||
return return_value;
|
return return_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyDoc_STRVAR(unicode_rfind__doc__,
|
||||||
|
"rfind($self, sub, start=None, end=None, /)\n"
|
||||||
|
"--\n"
|
||||||
|
"\n"
|
||||||
|
"Return the highest index in S where substring sub is found, such that sub is contained within S[start:end].\n"
|
||||||
|
"\n"
|
||||||
|
"Optional arguments start and end are interpreted as in slice notation.\n"
|
||||||
|
"Return -1 on failure.");
|
||||||
|
|
||||||
|
#define UNICODE_RFIND_METHODDEF \
|
||||||
|
{"rfind", _PyCFunction_CAST(unicode_rfind), METH_FASTCALL, unicode_rfind__doc__},
|
||||||
|
|
||||||
|
static Py_ssize_t
|
||||||
|
unicode_rfind_impl(PyObject *str, PyObject *substr, Py_ssize_t start,
|
||||||
|
Py_ssize_t end);
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
unicode_rfind(PyObject *str, PyObject *const *args, Py_ssize_t nargs)
|
||||||
|
{
|
||||||
|
PyObject *return_value = NULL;
|
||||||
|
PyObject *substr;
|
||||||
|
Py_ssize_t start = 0;
|
||||||
|
Py_ssize_t end = PY_SSIZE_T_MAX;
|
||||||
|
Py_ssize_t _return_value;
|
||||||
|
|
||||||
|
if (!_PyArg_CheckPositional("rfind", nargs, 1, 3)) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
if (!PyUnicode_Check(args[0])) {
|
||||||
|
_PyArg_BadArgument("rfind", "argument 1", "str", args[0]);
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
substr = args[0];
|
||||||
|
if (nargs < 2) {
|
||||||
|
goto skip_optional;
|
||||||
|
}
|
||||||
|
if (!_PyEval_SliceIndex(args[1], &start)) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
if (nargs < 3) {
|
||||||
|
goto skip_optional;
|
||||||
|
}
|
||||||
|
if (!_PyEval_SliceIndex(args[2], &end)) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
skip_optional:
|
||||||
|
_return_value = unicode_rfind_impl(str, substr, start, end);
|
||||||
|
if ((_return_value == -1) && PyErr_Occurred()) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
return_value = PyLong_FromSsize_t(_return_value);
|
||||||
|
|
||||||
|
exit:
|
||||||
|
return return_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyDoc_STRVAR(unicode_rindex__doc__,
|
||||||
|
"rindex($self, sub, start=None, end=None, /)\n"
|
||||||
|
"--\n"
|
||||||
|
"\n"
|
||||||
|
"Return the highest index in S where substring sub is found, such that sub is contained within S[start:end].\n"
|
||||||
|
"\n"
|
||||||
|
"Optional arguments start and end are interpreted as in slice notation.\n"
|
||||||
|
"Raises ValueError when the substring is not found.");
|
||||||
|
|
||||||
|
#define UNICODE_RINDEX_METHODDEF \
|
||||||
|
{"rindex", _PyCFunction_CAST(unicode_rindex), METH_FASTCALL, unicode_rindex__doc__},
|
||||||
|
|
||||||
|
static Py_ssize_t
|
||||||
|
unicode_rindex_impl(PyObject *str, PyObject *substr, Py_ssize_t start,
|
||||||
|
Py_ssize_t end);
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
unicode_rindex(PyObject *str, PyObject *const *args, Py_ssize_t nargs)
|
||||||
|
{
|
||||||
|
PyObject *return_value = NULL;
|
||||||
|
PyObject *substr;
|
||||||
|
Py_ssize_t start = 0;
|
||||||
|
Py_ssize_t end = PY_SSIZE_T_MAX;
|
||||||
|
Py_ssize_t _return_value;
|
||||||
|
|
||||||
|
if (!_PyArg_CheckPositional("rindex", nargs, 1, 3)) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
if (!PyUnicode_Check(args[0])) {
|
||||||
|
_PyArg_BadArgument("rindex", "argument 1", "str", args[0]);
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
substr = args[0];
|
||||||
|
if (nargs < 2) {
|
||||||
|
goto skip_optional;
|
||||||
|
}
|
||||||
|
if (!_PyEval_SliceIndex(args[1], &start)) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
if (nargs < 3) {
|
||||||
|
goto skip_optional;
|
||||||
|
}
|
||||||
|
if (!_PyEval_SliceIndex(args[2], &end)) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
skip_optional:
|
||||||
|
_return_value = unicode_rindex_impl(str, substr, start, end);
|
||||||
|
if ((_return_value == -1) && PyErr_Occurred()) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
return_value = PyLong_FromSsize_t(_return_value);
|
||||||
|
|
||||||
|
exit:
|
||||||
|
return return_value;
|
||||||
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(unicode_rjust__doc__,
|
PyDoc_STRVAR(unicode_rjust__doc__,
|
||||||
"rjust($self, width, fillchar=\' \', /)\n"
|
"rjust($self, width, fillchar=\' \', /)\n"
|
||||||
"--\n"
|
"--\n"
|
||||||
|
@ -1609,4 +1888,4 @@ skip_optional_pos:
|
||||||
exit:
|
exit:
|
||||||
return return_value;
|
return return_value;
|
||||||
}
|
}
|
||||||
/*[clinic end generated code: output=1734aa1fcc9b076a input=a9049054013a1b77]*/
|
/*[clinic end generated code: output=3aa49013ffa3fa93 input=a9049054013a1b77]*/
|
||||||
|
|
|
@ -9194,75 +9194,6 @@ _PyUnicode_InsertThousandsGrouping(
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static Py_ssize_t
|
|
||||||
unicode_count_impl(PyObject *str,
|
|
||||||
PyObject *substr,
|
|
||||||
Py_ssize_t start,
|
|
||||||
Py_ssize_t end)
|
|
||||||
{
|
|
||||||
assert(PyUnicode_Check(str));
|
|
||||||
assert(PyUnicode_Check(substr));
|
|
||||||
|
|
||||||
Py_ssize_t result;
|
|
||||||
int kind1, kind2;
|
|
||||||
const void *buf1 = NULL, *buf2 = NULL;
|
|
||||||
Py_ssize_t len1, len2;
|
|
||||||
|
|
||||||
kind1 = PyUnicode_KIND(str);
|
|
||||||
kind2 = PyUnicode_KIND(substr);
|
|
||||||
if (kind1 < kind2)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
len1 = PyUnicode_GET_LENGTH(str);
|
|
||||||
len2 = PyUnicode_GET_LENGTH(substr);
|
|
||||||
ADJUST_INDICES(start, end, len1);
|
|
||||||
if (end - start < len2)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
buf1 = PyUnicode_DATA(str);
|
|
||||||
buf2 = PyUnicode_DATA(substr);
|
|
||||||
if (kind2 != kind1) {
|
|
||||||
buf2 = unicode_askind(kind2, buf2, len2, kind1);
|
|
||||||
if (!buf2)
|
|
||||||
goto onError;
|
|
||||||
}
|
|
||||||
|
|
||||||
// We don't reuse `anylib_count` here because of the explicit casts.
|
|
||||||
switch (kind1) {
|
|
||||||
case PyUnicode_1BYTE_KIND:
|
|
||||||
result = ucs1lib_count(
|
|
||||||
((const Py_UCS1*)buf1) + start, end - start,
|
|
||||||
buf2, len2, PY_SSIZE_T_MAX
|
|
||||||
);
|
|
||||||
break;
|
|
||||||
case PyUnicode_2BYTE_KIND:
|
|
||||||
result = ucs2lib_count(
|
|
||||||
((const Py_UCS2*)buf1) + start, end - start,
|
|
||||||
buf2, len2, PY_SSIZE_T_MAX
|
|
||||||
);
|
|
||||||
break;
|
|
||||||
case PyUnicode_4BYTE_KIND:
|
|
||||||
result = ucs4lib_count(
|
|
||||||
((const Py_UCS4*)buf1) + start, end - start,
|
|
||||||
buf2, len2, PY_SSIZE_T_MAX
|
|
||||||
);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Py_UNREACHABLE();
|
|
||||||
}
|
|
||||||
|
|
||||||
assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr)));
|
|
||||||
if (kind2 != kind1)
|
|
||||||
PyMem_Free((void *)buf2);
|
|
||||||
|
|
||||||
return result;
|
|
||||||
onError:
|
|
||||||
assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr)));
|
|
||||||
if (kind2 != kind1)
|
|
||||||
PyMem_Free((void *)buf2);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
Py_ssize_t
|
Py_ssize_t
|
||||||
PyUnicode_Count(PyObject *str,
|
PyUnicode_Count(PyObject *str,
|
||||||
PyObject *substr,
|
PyObject *substr,
|
||||||
|
@ -11131,47 +11062,87 @@ PyUnicode_AppendAndDel(PyObject **pleft, PyObject *right)
|
||||||
Py_XDECREF(right);
|
Py_XDECREF(right);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*[clinic input]
|
||||||
Wraps asciilib_parse_args_finds() and additionally ensures that the
|
@text_signature "($self, sub[, start[, end]], /)"
|
||||||
first argument is a unicode object.
|
str.count as unicode_count -> Py_ssize_t
|
||||||
*/
|
|
||||||
|
|
||||||
static inline int
|
self as str: self
|
||||||
parse_args_finds_unicode(const char * function_name, PyObject *args,
|
sub as substr: unicode
|
||||||
PyObject **substring,
|
start: slice_index(accept={int, NoneType}, c_default='0') = None
|
||||||
Py_ssize_t *start, Py_ssize_t *end)
|
end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
|
||||||
|
/
|
||||||
|
|
||||||
|
Return the number of non-overlapping occurrences of substring sub in string S[start:end].
|
||||||
|
|
||||||
|
Optional arguments start and end are interpreted as in slice notation.
|
||||||
|
[clinic start generated code]*/
|
||||||
|
|
||||||
|
static Py_ssize_t
|
||||||
|
unicode_count_impl(PyObject *str, PyObject *substr, Py_ssize_t start,
|
||||||
|
Py_ssize_t end)
|
||||||
|
/*[clinic end generated code: output=8fcc3aef0b18edbf input=6f168ffd94be8785]*/
|
||||||
{
|
{
|
||||||
if (asciilib_parse_args_finds(function_name, args, substring, start, end)) {
|
assert(PyUnicode_Check(str));
|
||||||
if (ensure_unicode(*substring) < 0)
|
assert(PyUnicode_Check(substr));
|
||||||
return 0;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
PyDoc_STRVAR(count__doc__,
|
|
||||||
"S.count(sub[, start[, end]]) -> int\n\
|
|
||||||
\n\
|
|
||||||
Return the number of non-overlapping occurrences of substring sub in\n\
|
|
||||||
string S[start:end]. Optional arguments start and end are\n\
|
|
||||||
interpreted as in slice notation.");
|
|
||||||
|
|
||||||
static PyObject *
|
|
||||||
unicode_count(PyObject *self, PyObject *args)
|
|
||||||
{
|
|
||||||
PyObject *substring = NULL; /* initialize to fix a compiler warning */
|
|
||||||
Py_ssize_t start = 0;
|
|
||||||
Py_ssize_t end = PY_SSIZE_T_MAX;
|
|
||||||
Py_ssize_t result;
|
Py_ssize_t result;
|
||||||
|
int kind1, kind2;
|
||||||
|
const void *buf1 = NULL, *buf2 = NULL;
|
||||||
|
Py_ssize_t len1, len2;
|
||||||
|
|
||||||
if (!parse_args_finds_unicode("count", args, &substring, &start, &end))
|
kind1 = PyUnicode_KIND(str);
|
||||||
return NULL;
|
kind2 = PyUnicode_KIND(substr);
|
||||||
|
if (kind1 < kind2)
|
||||||
|
return 0;
|
||||||
|
|
||||||
result = unicode_count_impl(self, substring, start, end);
|
len1 = PyUnicode_GET_LENGTH(str);
|
||||||
if (result == -1)
|
len2 = PyUnicode_GET_LENGTH(substr);
|
||||||
return NULL;
|
ADJUST_INDICES(start, end, len1);
|
||||||
|
if (end - start < len2)
|
||||||
|
return 0;
|
||||||
|
|
||||||
return PyLong_FromSsize_t(result);
|
buf1 = PyUnicode_DATA(str);
|
||||||
|
buf2 = PyUnicode_DATA(substr);
|
||||||
|
if (kind2 != kind1) {
|
||||||
|
buf2 = unicode_askind(kind2, buf2, len2, kind1);
|
||||||
|
if (!buf2)
|
||||||
|
goto onError;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We don't reuse `anylib_count` here because of the explicit casts.
|
||||||
|
switch (kind1) {
|
||||||
|
case PyUnicode_1BYTE_KIND:
|
||||||
|
result = ucs1lib_count(
|
||||||
|
((const Py_UCS1*)buf1) + start, end - start,
|
||||||
|
buf2, len2, PY_SSIZE_T_MAX
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
case PyUnicode_2BYTE_KIND:
|
||||||
|
result = ucs2lib_count(
|
||||||
|
((const Py_UCS2*)buf1) + start, end - start,
|
||||||
|
buf2, len2, PY_SSIZE_T_MAX
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
case PyUnicode_4BYTE_KIND:
|
||||||
|
result = ucs4lib_count(
|
||||||
|
((const Py_UCS4*)buf1) + start, end - start,
|
||||||
|
buf2, len2, PY_SSIZE_T_MAX
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
Py_UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
|
assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr)));
|
||||||
|
if (kind2 != kind1)
|
||||||
|
PyMem_Free((void *)buf2);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
onError:
|
||||||
|
assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr)));
|
||||||
|
if (kind2 != kind1)
|
||||||
|
PyMem_Free((void *)buf2);
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*[clinic input]
|
/*[clinic input]
|
||||||
|
@ -11282,33 +11253,25 @@ unicode_expandtabs_impl(PyObject *self, int tabsize)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(find__doc__,
|
/*[clinic input]
|
||||||
"S.find(sub[, start[, end]]) -> int\n\
|
str.find as unicode_find = str.count
|
||||||
\n\
|
|
||||||
Return the lowest index in S where substring sub is found,\n\
|
|
||||||
such that sub is contained within S[start:end]. Optional\n\
|
|
||||||
arguments start and end are interpreted as in slice notation.\n\
|
|
||||||
\n\
|
|
||||||
Return -1 on failure.");
|
|
||||||
|
|
||||||
static PyObject *
|
Return the lowest index in S where substring sub is found, such that sub is contained within S[start:end].
|
||||||
unicode_find(PyObject *self, PyObject *args)
|
|
||||||
|
Optional arguments start and end are interpreted as in slice notation.
|
||||||
|
Return -1 on failure.
|
||||||
|
[clinic start generated code]*/
|
||||||
|
|
||||||
|
static Py_ssize_t
|
||||||
|
unicode_find_impl(PyObject *str, PyObject *substr, Py_ssize_t start,
|
||||||
|
Py_ssize_t end)
|
||||||
|
/*[clinic end generated code: output=51dbe6255712e278 input=4a89d2d68ef57256]*/
|
||||||
{
|
{
|
||||||
/* initialize variables to prevent gcc warning */
|
Py_ssize_t result = any_find_slice(str, substr, start, end, 1);
|
||||||
PyObject *substring = NULL;
|
if (result < 0) {
|
||||||
Py_ssize_t start = 0;
|
return -1;
|
||||||
Py_ssize_t end = 0;
|
}
|
||||||
Py_ssize_t result;
|
return result;
|
||||||
|
|
||||||
if (!parse_args_finds_unicode("find", args, &substring, &start, &end))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
result = any_find_slice(self, substring, start, end, 1);
|
|
||||||
|
|
||||||
if (result == -2)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
return PyLong_FromSsize_t(result);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
@ -11351,38 +11314,28 @@ unicode_hash(PyObject *self)
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(index__doc__,
|
/*[clinic input]
|
||||||
"S.index(sub[, start[, end]]) -> int\n\
|
str.index as unicode_index = str.count
|
||||||
\n\
|
|
||||||
Return the lowest index in S where substring sub is found,\n\
|
|
||||||
such that sub is contained within S[start:end]. Optional\n\
|
|
||||||
arguments start and end are interpreted as in slice notation.\n\
|
|
||||||
\n\
|
|
||||||
Raises ValueError when the substring is not found.");
|
|
||||||
|
|
||||||
static PyObject *
|
Return the lowest index in S where substring sub is found, such that sub is contained within S[start:end].
|
||||||
unicode_index(PyObject *self, PyObject *args)
|
|
||||||
|
Optional arguments start and end are interpreted as in slice notation.
|
||||||
|
Raises ValueError when the substring is not found.
|
||||||
|
[clinic start generated code]*/
|
||||||
|
|
||||||
|
static Py_ssize_t
|
||||||
|
unicode_index_impl(PyObject *str, PyObject *substr, Py_ssize_t start,
|
||||||
|
Py_ssize_t end)
|
||||||
|
/*[clinic end generated code: output=77558288837cdf40 input=d986aeac0be14a1c]*/
|
||||||
{
|
{
|
||||||
/* initialize variables to prevent gcc warning */
|
Py_ssize_t result = any_find_slice(str, substr, start, end, 1);
|
||||||
Py_ssize_t result;
|
if (result == -1) {
|
||||||
PyObject *substring = NULL;
|
|
||||||
Py_ssize_t start = 0;
|
|
||||||
Py_ssize_t end = 0;
|
|
||||||
|
|
||||||
if (!parse_args_finds_unicode("index", args, &substring, &start, &end))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
result = any_find_slice(self, substring, start, end, 1);
|
|
||||||
|
|
||||||
if (result == -2)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
if (result < 0) {
|
|
||||||
PyErr_SetString(PyExc_ValueError, "substring not found");
|
PyErr_SetString(PyExc_ValueError, "substring not found");
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
else if (result < 0) {
|
||||||
return PyLong_FromSsize_t(result);
|
return -1;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*[clinic input]
|
/*[clinic input]
|
||||||
|
@ -12462,67 +12415,49 @@ unicode_repr(PyObject *unicode)
|
||||||
return repr;
|
return repr;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(rfind__doc__,
|
/*[clinic input]
|
||||||
"S.rfind(sub[, start[, end]]) -> int\n\
|
str.rfind as unicode_rfind = str.count
|
||||||
\n\
|
|
||||||
Return the highest index in S where substring sub is found,\n\
|
|
||||||
such that sub is contained within S[start:end]. Optional\n\
|
|
||||||
arguments start and end are interpreted as in slice notation.\n\
|
|
||||||
\n\
|
|
||||||
Return -1 on failure.");
|
|
||||||
|
|
||||||
static PyObject *
|
Return the highest index in S where substring sub is found, such that sub is contained within S[start:end].
|
||||||
unicode_rfind(PyObject *self, PyObject *args)
|
|
||||||
|
Optional arguments start and end are interpreted as in slice notation.
|
||||||
|
Return -1 on failure.
|
||||||
|
[clinic start generated code]*/
|
||||||
|
|
||||||
|
static Py_ssize_t
|
||||||
|
unicode_rfind_impl(PyObject *str, PyObject *substr, Py_ssize_t start,
|
||||||
|
Py_ssize_t end)
|
||||||
|
/*[clinic end generated code: output=880b29f01dd014c8 input=898361fb71f59294]*/
|
||||||
{
|
{
|
||||||
/* initialize variables to prevent gcc warning */
|
Py_ssize_t result = any_find_slice(str, substr, start, end, -1);
|
||||||
PyObject *substring = NULL;
|
if (result < 0) {
|
||||||
Py_ssize_t start = 0;
|
return -1;
|
||||||
Py_ssize_t end = 0;
|
}
|
||||||
Py_ssize_t result;
|
return result;
|
||||||
|
|
||||||
if (!parse_args_finds_unicode("rfind", args, &substring, &start, &end))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
result = any_find_slice(self, substring, start, end, -1);
|
|
||||||
|
|
||||||
if (result == -2)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
return PyLong_FromSsize_t(result);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(rindex__doc__,
|
/*[clinic input]
|
||||||
"S.rindex(sub[, start[, end]]) -> int\n\
|
str.rindex as unicode_rindex = str.count
|
||||||
\n\
|
|
||||||
Return the highest index in S where substring sub is found,\n\
|
|
||||||
such that sub is contained within S[start:end]. Optional\n\
|
|
||||||
arguments start and end are interpreted as in slice notation.\n\
|
|
||||||
\n\
|
|
||||||
Raises ValueError when the substring is not found.");
|
|
||||||
|
|
||||||
static PyObject *
|
Return the highest index in S where substring sub is found, such that sub is contained within S[start:end].
|
||||||
unicode_rindex(PyObject *self, PyObject *args)
|
|
||||||
|
Optional arguments start and end are interpreted as in slice notation.
|
||||||
|
Raises ValueError when the substring is not found.
|
||||||
|
[clinic start generated code]*/
|
||||||
|
|
||||||
|
static Py_ssize_t
|
||||||
|
unicode_rindex_impl(PyObject *str, PyObject *substr, Py_ssize_t start,
|
||||||
|
Py_ssize_t end)
|
||||||
|
/*[clinic end generated code: output=5f3aef124c867fe1 input=35943dead6c1ea9d]*/
|
||||||
{
|
{
|
||||||
/* initialize variables to prevent gcc warning */
|
Py_ssize_t result = any_find_slice(str, substr, start, end, -1);
|
||||||
PyObject *substring = NULL;
|
if (result == -1) {
|
||||||
Py_ssize_t start = 0;
|
|
||||||
Py_ssize_t end = 0;
|
|
||||||
Py_ssize_t result;
|
|
||||||
|
|
||||||
if (!parse_args_finds_unicode("rindex", args, &substring, &start, &end))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
result = any_find_slice(self, substring, start, end, -1);
|
|
||||||
|
|
||||||
if (result == -2)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
if (result < 0) {
|
|
||||||
PyErr_SetString(PyExc_ValueError, "substring not found");
|
PyErr_SetString(PyExc_ValueError, "substring not found");
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
else if (result < 0) {
|
||||||
return PyLong_FromSsize_t(result);
|
return -1;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*[clinic input]
|
/*[clinic input]
|
||||||
|
@ -13562,16 +13497,16 @@ static PyMethodDef unicode_methods[] = {
|
||||||
UNICODE_CASEFOLD_METHODDEF
|
UNICODE_CASEFOLD_METHODDEF
|
||||||
UNICODE_TITLE_METHODDEF
|
UNICODE_TITLE_METHODDEF
|
||||||
UNICODE_CENTER_METHODDEF
|
UNICODE_CENTER_METHODDEF
|
||||||
{"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__},
|
UNICODE_COUNT_METHODDEF
|
||||||
UNICODE_EXPANDTABS_METHODDEF
|
UNICODE_EXPANDTABS_METHODDEF
|
||||||
{"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__},
|
UNICODE_FIND_METHODDEF
|
||||||
UNICODE_PARTITION_METHODDEF
|
UNICODE_PARTITION_METHODDEF
|
||||||
{"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__},
|
UNICODE_INDEX_METHODDEF
|
||||||
UNICODE_LJUST_METHODDEF
|
UNICODE_LJUST_METHODDEF
|
||||||
UNICODE_LOWER_METHODDEF
|
UNICODE_LOWER_METHODDEF
|
||||||
UNICODE_LSTRIP_METHODDEF
|
UNICODE_LSTRIP_METHODDEF
|
||||||
{"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__},
|
UNICODE_RFIND_METHODDEF
|
||||||
{"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__},
|
UNICODE_RINDEX_METHODDEF
|
||||||
UNICODE_RJUST_METHODDEF
|
UNICODE_RJUST_METHODDEF
|
||||||
UNICODE_RSTRIP_METHODDEF
|
UNICODE_RSTRIP_METHODDEF
|
||||||
UNICODE_RPARTITION_METHODDEF
|
UNICODE_RPARTITION_METHODDEF
|
||||||
|
|
Loading…
Reference in New Issue