Apply patch diff.txt from SF feature request
http://www.python.org/sf/444708 This adds the optional argument for str.strip to unicode.strip too and makes it possible to call str.strip with a unicode argument and unicode.strip with a str argument.
This commit is contained in:
parent
a7cc43b9e8
commit
de02bcb265
|
@ -235,17 +235,28 @@ The functions defined in this module are:
|
|||
\function{joinfields()} was only used with two arguments.)
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{lstrip}{s}
|
||||
Return a copy of \var{s} but without leading whitespace characters.
|
||||
\begin{funcdesc}{lstrip}{s\optional{, chars}}
|
||||
Return a copy of the string with leading characters removed. If
|
||||
\var{chars} is omitted or \code{None}, whitespace characters are
|
||||
removed. If given and not \code{None}, \var{chars} must be a string;
|
||||
the characters in the string will be stripped from the beginning of
|
||||
the string this method is called on.
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{rstrip}{s}
|
||||
Return a copy of \var{s} but without trailing whitespace
|
||||
characters.
|
||||
\begin{funcdesc}{rstrip}{s\optional{, chars}}
|
||||
Return a copy of the string with trailing characters removed. If
|
||||
\var{chars} is omitted or \code{None}, whitespace characters are
|
||||
removed. If given and not \code{None}, \var{chars} must be a string;
|
||||
the characters in the string will be stripped from the end of the
|
||||
string this method is called on.
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{strip}{s}
|
||||
Return a copy of \var{s} without leading or trailing whitespace.
|
||||
\begin{funcdesc}{strip}{s\optional{, chars}}
|
||||
Return a copy of the string with leading and trailing characters
|
||||
removed. If \var{chars} is omitted or \code{None}, whitespace
|
||||
characters are removed. If given and not \code{None}, \var{chars}
|
||||
must be a string; the characters in the string will be stripped from
|
||||
the both ends of the string this method is called on.
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{swapcase}{s}
|
||||
|
|
|
@ -1040,6 +1040,13 @@ extern DL_IMPORT(int) PyUnicode_Contains(
|
|||
PyObject *element /* Element string */
|
||||
);
|
||||
|
||||
/* Externally visible for str.strip(unicode) */
|
||||
extern DL_IMPORT(PyObject *) _PyUnicode_XStrip(
|
||||
PyUnicodeObject *self,
|
||||
int striptype,
|
||||
PyObject *sepobj
|
||||
);
|
||||
|
||||
/* === Characters Type APIs =============================================== */
|
||||
|
||||
/* These should not be used directly. Use the Py_UNICODE_IS* and
|
||||
|
|
|
@ -169,12 +169,18 @@ def run_method_tests(test):
|
|||
test('rstrip', ' hello ', ' hello', None)
|
||||
test('strip', 'hello', 'hello', None)
|
||||
|
||||
# strip/lstrip/rstrip with real arg
|
||||
# strip/lstrip/rstrip with str arg
|
||||
test('strip', 'xyzzyhelloxyzzy', 'hello', 'xyz')
|
||||
test('lstrip', 'xyzzyhelloxyzzy', 'helloxyzzy', 'xyz')
|
||||
test('rstrip', 'xyzzyhelloxyzzy', 'xyzzyhello', 'xyz')
|
||||
test('strip', 'hello', 'hello', 'xyz')
|
||||
|
||||
# strip/lstrip/rstrip with unicode arg
|
||||
test('strip', 'xyzzyhelloxyzzy', u'hello', u'xyz')
|
||||
test('lstrip', 'xyzzyhelloxyzzy', u'helloxyzzy', u'xyz')
|
||||
test('rstrip', 'xyzzyhelloxyzzy', u'xyzzyhello', u'xyz')
|
||||
test('strip', 'hello', u'hello', u'xyz')
|
||||
|
||||
test('swapcase', 'HeLLo cOmpUteRs', 'hEllO CoMPuTErS')
|
||||
test('translate', 'xyzabcdef', 'xyzxyz', transtable, 'def')
|
||||
|
||||
|
|
|
@ -169,6 +169,24 @@ test('lstrip', u' hello ', u'hello ')
|
|||
test('rstrip', u' hello ', u' hello')
|
||||
test('strip', u'hello', u'hello')
|
||||
|
||||
# strip/lstrip/rstrip with None arg
|
||||
test('strip', u' hello ', u'hello', None)
|
||||
test('lstrip', u' hello ', u'hello ', None)
|
||||
test('rstrip', u' hello ', u' hello', None)
|
||||
test('strip', u'hello', u'hello', None)
|
||||
|
||||
# strip/lstrip/rstrip with unicode arg
|
||||
test('strip', u'xyzzyhelloxyzzy', u'hello', u'xyz')
|
||||
test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', u'xyz')
|
||||
test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', u'xyz')
|
||||
test('strip', u'hello', u'hello', u'xyz')
|
||||
|
||||
# strip/lstrip/rstrip with str arg
|
||||
test('strip', u'xyzzyhelloxyzzy', u'hello', 'xyz')
|
||||
test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', 'xyz')
|
||||
test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', 'xyz')
|
||||
test('strip', u'hello', u'hello', 'xyz')
|
||||
|
||||
test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
|
||||
|
||||
if 0:
|
||||
|
|
|
@ -1005,7 +1005,9 @@ static PyBufferProcs string_as_buffer = {
|
|||
#define BOTHSTRIP 2
|
||||
|
||||
/* Arrays indexed by above */
|
||||
static const char *stripname[] = {"lstrip", "rstrip", "strip"};
|
||||
static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
|
||||
|
||||
#define STRIPNAME(i) (stripformat[i]+3)
|
||||
|
||||
|
||||
static PyObject *
|
||||
|
@ -1449,15 +1451,26 @@ do_argstrip(PyStringObject *self, int striptype, PyObject *args)
|
|||
{
|
||||
PyObject *sep = NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "|O:[lr]strip", &sep))
|
||||
if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
|
||||
return NULL;
|
||||
|
||||
if (sep != NULL && sep != Py_None) {
|
||||
/* XXX What about Unicode? */
|
||||
if (!PyString_Check(sep)) {
|
||||
if (PyString_Check(sep))
|
||||
return do_xstrip(self, striptype, sep);
|
||||
else if (PyUnicode_Check(sep)) {
|
||||
PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
|
||||
PyObject *res;
|
||||
if (uniself==NULL)
|
||||
return NULL;
|
||||
res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
|
||||
striptype, sep);
|
||||
Py_DECREF(uniself);
|
||||
return res;
|
||||
}
|
||||
else {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%s arg must be None or string",
|
||||
stripname[striptype]);
|
||||
"%s arg must be None, str or unicode",
|
||||
STRIPNAME(striptype));
|
||||
return NULL;
|
||||
}
|
||||
return do_xstrip(self, striptype, sep);
|
||||
|
@ -1468,11 +1481,12 @@ do_argstrip(PyStringObject *self, int striptype, PyObject *args)
|
|||
|
||||
|
||||
static char strip__doc__[] =
|
||||
"S.strip([sep]) -> string\n\
|
||||
"S.strip([sep]) -> string or unicode\n\
|
||||
\n\
|
||||
Return a copy of the string S with leading and trailing\n\
|
||||
whitespace removed.\n\
|
||||
If sep is given and not None, remove characters in sep instead.";
|
||||
If sep is given and not None, remove characters in sep instead.\n\
|
||||
If sep is unicode, S will be converted to unicode before stripping";
|
||||
|
||||
static PyObject *
|
||||
string_strip(PyStringObject *self, PyObject *args)
|
||||
|
@ -1485,10 +1499,11 @@ string_strip(PyStringObject *self, PyObject *args)
|
|||
|
||||
|
||||
static char lstrip__doc__[] =
|
||||
"S.lstrip([sep]) -> string\n\
|
||||
"S.lstrip([sep]) -> string or unicode\n\
|
||||
\n\
|
||||
Return a copy of the string S with leading whitespace removed.\n\
|
||||
If sep is given and not None, remove characters in sep instead.";
|
||||
If sep is given and not None, remove characters in sep instead.\n\
|
||||
If sep is unicode, S will be converted to unicode before stripping";
|
||||
|
||||
static PyObject *
|
||||
string_lstrip(PyStringObject *self, PyObject *args)
|
||||
|
@ -1501,10 +1516,11 @@ string_lstrip(PyStringObject *self, PyObject *args)
|
|||
|
||||
|
||||
static char rstrip__doc__[] =
|
||||
"S.rstrip([sep]) -> string\n\
|
||||
"S.rstrip([sep]) -> string or unicode\n\
|
||||
\n\
|
||||
Return a copy of the string S with trailing whitespace removed.\n\
|
||||
If sep is given and not None, remove characters in sep instead.";
|
||||
If sep is given and not None, remove characters in sep instead.\n\
|
||||
If sep is unicode, S will be converted to unicode before stripping";
|
||||
|
||||
static PyObject *
|
||||
string_rstrip(PyStringObject *self, PyObject *args)
|
||||
|
|
|
@ -3503,35 +3503,6 @@ PyObject *split(PyUnicodeObject *self,
|
|||
return split_substring(self,list,substring,maxcount);
|
||||
}
|
||||
|
||||
static
|
||||
PyObject *strip(PyUnicodeObject *self,
|
||||
int left,
|
||||
int right)
|
||||
{
|
||||
Py_UNICODE *p = self->str;
|
||||
int start = 0;
|
||||
int end = self->length;
|
||||
|
||||
if (left)
|
||||
while (start < end && Py_UNICODE_ISSPACE(p[start]))
|
||||
start++;
|
||||
|
||||
if (right)
|
||||
while (end > start && Py_UNICODE_ISSPACE(p[end-1]))
|
||||
end--;
|
||||
|
||||
if (start == 0 && end == self->length && PyUnicode_CheckExact(self)) {
|
||||
/* couldn't strip anything off, return original string */
|
||||
Py_INCREF(self);
|
||||
return (PyObject*) self;
|
||||
}
|
||||
|
||||
return (PyObject*) PyUnicode_FromUnicode(
|
||||
self->str + start,
|
||||
end - start
|
||||
);
|
||||
}
|
||||
|
||||
static
|
||||
PyObject *replace(PyUnicodeObject *self,
|
||||
PyUnicodeObject *str1,
|
||||
|
@ -4464,17 +4435,173 @@ unicode_lower(PyUnicodeObject *self)
|
|||
return fixup(self, fixlower);
|
||||
}
|
||||
|
||||
static char lstrip__doc__[] =
|
||||
"S.lstrip() -> unicode\n\
|
||||
\n\
|
||||
Return a copy of the string S with leading whitespace removed.";
|
||||
#define LEFTSTRIP 0
|
||||
#define RIGHTSTRIP 1
|
||||
#define BOTHSTRIP 2
|
||||
|
||||
/* Arrays indexed by above */
|
||||
static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
|
||||
|
||||
#define STRIPNAME(i) (stripformat[i]+3)
|
||||
|
||||
static const Py_UNICODE *
|
||||
unicode_memchr(const Py_UNICODE *s, Py_UNICODE c, size_t n)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i<n; ++i)
|
||||
if (s[i]==c)
|
||||
return s+i;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* externally visible for str.strip(unicode) */
|
||||
PyObject *
|
||||
_PyUnicode_XStrip(PyUnicodeObject *self, int striptype, PyObject *sepobj)
|
||||
{
|
||||
Py_UNICODE *s = PyUnicode_AS_UNICODE(self);
|
||||
int len = PyUnicode_GET_SIZE(self);
|
||||
Py_UNICODE *sep = PyUnicode_AS_UNICODE(sepobj);
|
||||
int seplen = PyUnicode_GET_SIZE(sepobj);
|
||||
int i, j;
|
||||
|
||||
i = 0;
|
||||
if (striptype != RIGHTSTRIP) {
|
||||
while (i < len && unicode_memchr(sep, s[i], seplen)) {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
j = len;
|
||||
if (striptype != LEFTSTRIP) {
|
||||
do {
|
||||
j--;
|
||||
} while (j >= i && unicode_memchr(sep, s[j], seplen));
|
||||
j++;
|
||||
}
|
||||
|
||||
if (i == 0 && j == len && PyUnicode_CheckExact(self)) {
|
||||
Py_INCREF(self);
|
||||
return (PyObject*)self;
|
||||
}
|
||||
else
|
||||
return PyUnicode_FromUnicode(s+i, j-i);
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
unicode_lstrip(PyUnicodeObject *self)
|
||||
do_strip(PyUnicodeObject *self, int striptype)
|
||||
{
|
||||
return strip(self, 1, 0);
|
||||
Py_UNICODE *s = PyUnicode_AS_UNICODE(self);
|
||||
int len = PyUnicode_GET_SIZE(self), i, j;
|
||||
|
||||
i = 0;
|
||||
if (striptype != RIGHTSTRIP) {
|
||||
while (i < len && Py_UNICODE_ISSPACE(s[i])) {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
j = len;
|
||||
if (striptype != LEFTSTRIP) {
|
||||
do {
|
||||
j--;
|
||||
} while (j >= i && Py_UNICODE_ISSPACE(s[j]));
|
||||
j++;
|
||||
}
|
||||
|
||||
if (i == 0 && j == len && PyUnicode_CheckExact(self)) {
|
||||
Py_INCREF(self);
|
||||
return (PyObject*)self;
|
||||
}
|
||||
else
|
||||
return PyUnicode_FromUnicode(s+i, j-i);
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
do_argstrip(PyUnicodeObject *self, int striptype, PyObject *args)
|
||||
{
|
||||
PyObject *sep = NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
|
||||
return NULL;
|
||||
|
||||
if (sep != NULL && sep != Py_None) {
|
||||
if (PyUnicode_Check(sep))
|
||||
return _PyUnicode_XStrip(self, striptype, sep);
|
||||
else if (PyString_Check(sep)) {
|
||||
PyObject *res;
|
||||
sep = PyUnicode_FromObject(sep);
|
||||
if (sep==NULL)
|
||||
return NULL;
|
||||
res = _PyUnicode_XStrip(self, striptype, sep);
|
||||
Py_DECREF(sep);
|
||||
return res;
|
||||
}
|
||||
else {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%s arg must be None, unicode or str",
|
||||
STRIPNAME(striptype));
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return do_strip(self, striptype);
|
||||
}
|
||||
|
||||
|
||||
static char strip__doc__[] =
|
||||
"S.strip([sep]) -> unicode\n\
|
||||
\n\
|
||||
Return a copy of the string S with leading and trailing\n\
|
||||
whitespace removed.\n\
|
||||
If sep is given and not None, remove characters in sep instead.\n\
|
||||
If sep is a str, it will be converted to unicode before stripping";
|
||||
|
||||
static PyObject *
|
||||
unicode_strip(PyUnicodeObject *self, PyObject *args)
|
||||
{
|
||||
if (PyTuple_GET_SIZE(args) == 0)
|
||||
return do_strip(self, BOTHSTRIP); /* Common case */
|
||||
else
|
||||
return do_argstrip(self, BOTHSTRIP, args);
|
||||
}
|
||||
|
||||
|
||||
static char lstrip__doc__[] =
|
||||
"S.lstrip([sep]) -> unicode\n\
|
||||
\n\
|
||||
Return a copy of the string S with leading whitespace removed.\n\
|
||||
If sep is given and not None, remove characters in sep instead.\n\
|
||||
If sep is a str, it will be converted to unicode before stripping";
|
||||
|
||||
static PyObject *
|
||||
unicode_lstrip(PyUnicodeObject *self, PyObject *args)
|
||||
{
|
||||
if (PyTuple_GET_SIZE(args) == 0)
|
||||
return do_strip(self, LEFTSTRIP); /* Common case */
|
||||
else
|
||||
return do_argstrip(self, LEFTSTRIP, args);
|
||||
}
|
||||
|
||||
|
||||
static char rstrip__doc__[] =
|
||||
"S.rstrip([sep]) -> unicode\n\
|
||||
\n\
|
||||
Return a copy of the string S with trailing whitespace removed.\n\
|
||||
If sep is given and not None, remove characters in sep instead.\n\
|
||||
If sep is a str, it will be converted to unicode before stripping";
|
||||
|
||||
static PyObject *
|
||||
unicode_rstrip(PyUnicodeObject *self, PyObject *args)
|
||||
{
|
||||
if (PyTuple_GET_SIZE(args) == 0)
|
||||
return do_strip(self, RIGHTSTRIP); /* Common case */
|
||||
else
|
||||
return do_argstrip(self, RIGHTSTRIP, args);
|
||||
}
|
||||
|
||||
|
||||
static PyObject*
|
||||
unicode_repeat(PyUnicodeObject *str, int len)
|
||||
{
|
||||
|
@ -4677,17 +4804,6 @@ unicode_rjust(PyUnicodeObject *self, PyObject *args)
|
|||
return (PyObject*) pad(self, width - self->length, 0, ' ');
|
||||
}
|
||||
|
||||
static char rstrip__doc__[] =
|
||||
"S.rstrip() -> unicode\n\
|
||||
\n\
|
||||
Return a copy of the string S with trailing whitespace removed.";
|
||||
|
||||
static PyObject *
|
||||
unicode_rstrip(PyUnicodeObject *self)
|
||||
{
|
||||
return strip(self, 0, 1);
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
unicode_slice(PyUnicodeObject *self, int start, int end)
|
||||
{
|
||||
|
@ -4783,17 +4899,6 @@ PyObject *unicode_str(PyUnicodeObject *self)
|
|||
return PyUnicode_AsEncodedString((PyObject *)self, NULL, NULL);
|
||||
}
|
||||
|
||||
static char strip__doc__[] =
|
||||
"S.strip() -> unicode\n\
|
||||
\n\
|
||||
Return a copy of S with leading and trailing whitespace removed.";
|
||||
|
||||
static PyObject *
|
||||
unicode_strip(PyUnicodeObject *self)
|
||||
{
|
||||
return strip(self, 1, 1);
|
||||
}
|
||||
|
||||
static char swapcase__doc__[] =
|
||||
"S.swapcase() -> unicode\n\
|
||||
\n\
|
||||
|
@ -4966,14 +5071,14 @@ static PyMethodDef unicode_methods[] = {
|
|||
{"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__},
|
||||
{"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__},
|
||||
{"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__},
|
||||
{"lstrip", (PyCFunction) unicode_lstrip, METH_NOARGS, lstrip__doc__},
|
||||
{"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__},
|
||||
/* {"maketrans", (PyCFunction) unicode_maketrans, METH_VARARGS, maketrans__doc__}, */
|
||||
{"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__},
|
||||
{"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__},
|
||||
{"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__},
|
||||
{"rstrip", (PyCFunction) unicode_rstrip, METH_NOARGS, rstrip__doc__},
|
||||
{"rstrip", (PyCFunction) unicode_rstrip, METH_VARARGS, rstrip__doc__},
|
||||
{"splitlines", (PyCFunction) unicode_splitlines, METH_VARARGS, splitlines__doc__},
|
||||
{"strip", (PyCFunction) unicode_strip, METH_NOARGS, strip__doc__},
|
||||
{"strip", (PyCFunction) unicode_strip, METH_VARARGS, strip__doc__},
|
||||
{"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__},
|
||||
{"translate", (PyCFunction) unicode_translate, METH_O, translate__doc__},
|
||||
{"upper", (PyCFunction) unicode_upper, METH_NOARGS, upper__doc__},
|
||||
|
|
Loading…
Reference in New Issue