Committing patch #591250 which provides "str1 in str2" when str1 is a

string of longer than 1 character.
This commit is contained in:
Barry Warsaw 2002-08-06 16:58:21 +00:00
parent b57089cdf8
commit 817918cc3c
8 changed files with 140 additions and 99 deletions

View File

@ -432,15 +432,15 @@ This table lists the sequence operations sorted in ascending priority
and \var{j} are integers:
\begin{tableiii}{c|l|c}{code}{Operation}{Result}{Notes}
\lineiii{\var{x} in \var{s}}{\code{1} if an item of \var{s} is equal to \var{x}, else \code{0}}{}
\lineiii{\var{x} in \var{s}}{\code{1} if an item of \var{s} is equal to \var{x}, else \code{0}}{(1)}
\lineiii{\var{x} not in \var{s}}{\code{0} if an item of \var{s} is
equal to \var{x}, else \code{1}}{}
equal to \var{x}, else \code{1}}{(1)}
\hline
\lineiii{\var{s} + \var{t}}{the concatenation of \var{s} and \var{t}}{}
\lineiii{\var{s} * \var{n}\textrm{,} \var{n} * \var{s}}{\var{n} shallow copies of \var{s} concatenated}{(1)}
\lineiii{\var{s} * \var{n}\textrm{,} \var{n} * \var{s}}{\var{n} shallow copies of \var{s} concatenated}{(2)}
\hline
\lineiii{\var{s}[\var{i}]}{\var{i}'th item of \var{s}, origin 0}{(2)}
\lineiii{\var{s}[\var{i}:\var{j}]}{slice of \var{s} from \var{i} to \var{j}}{(2), (3)}
\lineiii{\var{s}[\var{i}]}{\var{i}'th item of \var{s}, origin 0}{(3)}
\lineiii{\var{s}[\var{i}:\var{j}]}{slice of \var{s} from \var{i} to \var{j}}{(3), (4)}
\hline
\lineiii{len(\var{s})}{length of \var{s}}{}
\lineiii{min(\var{s})}{smallest item of \var{s}}{}
@ -461,7 +461,12 @@ equal to \var{x}, else \code{1}}{}
Notes:
\begin{description}
\item[(1)] Values of \var{n} less than \code{0} are treated as
\item[(1)] When \var{s} is a string or Unicode string object the
\code{in} and \code{not in} operations act like a substring test. In
Python versions before 2.3, \var{x} had to be a string of length 1.
In Python 2.3 and beyond, \var{x} may be a string of any length.
\item[(2)] Values of \var{n} less than \code{0} are treated as
\code{0} (which yields an empty sequence of the same type as
\var{s}). Note also that the copies are shallow; nested structures
are not copied. This often haunts new Python programmers; consider:
@ -489,12 +494,12 @@ Notes:
[[3], [5], [7]]
\end{verbatim}
\item[(2)] If \var{i} or \var{j} is negative, the index is relative to
\item[(3)] If \var{i} or \var{j} is negative, the index is relative to
the end of the string: \code{len(\var{s}) + \var{i}} or
\code{len(\var{s}) + \var{j}} is substituted. But note that \code{-0} is
still \code{0}.
\item[(3)] The slice of \var{s} from \var{i} to \var{j} is defined as
\item[(4)] The slice of \var{s} from \var{i} to \var{j} is defined as
the sequence of items with index \var{k} such that \code{\var{i} <=
\var{k} < \var{j}}. If \var{i} or \var{j} is greater than
\code{len(\var{s})}, use \code{len(\var{s})}. If \var{i} is omitted,

View File

@ -1,7 +1,7 @@
"""Common tests shared by test_string and test_userstring"""
import string
from test.test_support import verify, verbose, TestFailed, have_unicode
from test.test_support import verify, vereq, verbose, TestFailed, have_unicode
transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
@ -295,3 +295,23 @@ def run_method_tests(test):
data = 'x\x9c\xcbH\xcd\xc9\xc9W(\xcf/\xcaI\x01\x00\x1a\x0b\x04]'
verify('hello world'.encode('zlib') == data)
verify(data.decode('zlib') == 'hello world')
def test_exception(lhs, rhs, msg):
try:
lhs in rhs
except TypeError:
pass
else:
raise TestFailed, msg
def run_contains_tests(test):
vereq('' in '', True)
vereq('' in 'abc', True)
vereq('\0' in 'abc', False)
vereq('\0' in '\0abc', True)
vereq('\0' in 'abc\0', True)
vereq('a' in '\0abc', True)
vereq('asdf' in 'asdf', True)
vereq('asdf' in 'asd', False)
vereq('asdf' in '', False)

View File

@ -45,17 +45,8 @@ except TypeError:
check('c' in 'abc', "'c' not in 'abc'")
check('d' not in 'abc', "'d' in 'abc'")
try:
'' in 'abc'
check(0, "'' in 'abc' did not raise error")
except TypeError:
pass
try:
'ab' in 'abc'
check(0, "'ab' in 'abc' did not raise error")
except TypeError:
pass
check('' in '', "'' not in ''")
check('' in 'abc', "'' not in 'abc'")
try:
None in 'abc'
@ -71,17 +62,12 @@ if have_unicode:
check('c' in unicode('abc'), "'c' not in u'abc'")
check('d' not in unicode('abc'), "'d' in u'abc'")
try:
'' in unicode('abc')
check(0, "'' in u'abc' did not raise error")
except TypeError:
pass
try:
'ab' in unicode('abc')
check(0, "'ab' in u'abc' did not raise error")
except TypeError:
pass
check('' in unicode(''), "'' not in u''")
check(unicode('') in '', "u'' not in ''")
check(unicode('') in unicode(''), "u'' not in u''")
check('' in unicode('abc'), "'' not in u'abc'")
check(unicode('') in 'abc', "u'' not in 'abc'")
check(unicode('') in unicode('abc'), "u'' not in u'abc'")
try:
None in unicode('abc')
@ -94,35 +80,11 @@ if have_unicode:
check(unicode('c') in unicode('abc'), "u'c' not in u'abc'")
check(unicode('d') not in unicode('abc'), "u'd' in u'abc'")
try:
unicode('') in unicode('abc')
check(0, "u'' in u'abc' did not raise error")
except TypeError:
pass
try:
unicode('ab') in unicode('abc')
check(0, "u'ab' in u'abc' did not raise error")
except TypeError:
pass
# Test Unicode char in string
check(unicode('c') in 'abc', "u'c' not in 'abc'")
check(unicode('d') not in 'abc', "u'd' in 'abc'")
try:
unicode('') in 'abc'
check(0, "u'' in 'abc' did not raise error")
except TypeError:
pass
try:
unicode('ab') in 'abc'
check(0, "u'ab' in 'abc' did not raise error")
except TypeError:
pass
# A collection of tests on builtin sequence types
a = range(10)
for i in a:

View File

@ -51,6 +51,7 @@ def test(name, input, output, *args):
string_tests.run_module_tests(test)
string_tests.run_method_tests(test)
string_tests.run_contains_tests(test)
string.whitespace
string.lowercase

View File

@ -6,7 +6,7 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
from test.test_support import verify, verbose, TestFailed
from test.test_support import verify, vereq, verbose, TestFailed
import sys, string
if not sys.platform.startswith('java'):
@ -396,23 +396,23 @@ test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c
# Contains:
print 'Testing Unicode contains method...',
verify(('a' in u'abdb') == 1)
verify(('a' in u'bdab') == 1)
verify(('a' in u'bdaba') == 1)
verify(('a' in u'bdba') == 1)
verify(('a' in u'bdba') == 1)
verify((u'a' in u'bdba') == 1)
verify((u'a' in u'bdb') == 0)
verify((u'a' in 'bdb') == 0)
verify((u'a' in 'bdba') == 1)
verify((u'a' in ('a',1,None)) == 1)
verify((u'a' in (1,None,'a')) == 1)
verify((u'a' in (1,None,u'a')) == 1)
verify(('a' in ('a',1,None)) == 1)
verify(('a' in (1,None,'a')) == 1)
verify(('a' in (1,None,u'a')) == 1)
verify(('a' in ('x',1,u'y')) == 0)
verify(('a' in ('x',1,None)) == 0)
vereq(('a' in u'abdb'), True)
vereq(('a' in u'bdab'), True)
vereq(('a' in u'bdaba'), True)
vereq(('a' in u'bdba'), True)
vereq(('a' in u'bdba'), True)
vereq((u'a' in u'bdba'), True)
vereq((u'a' in u'bdb'), False)
vereq((u'a' in 'bdb'), False)
vereq((u'a' in 'bdba'), True)
vereq((u'a' in ('a',1,None)), True)
vereq((u'a' in (1,None,'a')), True)
vereq((u'a' in (1,None,u'a')), True)
vereq(('a' in ('a',1,None)), True)
vereq(('a' in (1,None,'a')), True)
vereq(('a' in (1,None,u'a')), True)
vereq(('a' in ('x',1,u'y')), False)
vereq(('a' in ('x',1,None)), False)
print 'done.'
# Formatting:
@ -758,3 +758,42 @@ print u'abc\n',
print u'def\n'
print u'def\n'
print 'done.'
def test_exception(lhs, rhs, msg):
try:
lhs in rhs
except TypeError:
pass
else:
raise TestFailed, msg
def run_contains_tests():
vereq(u'' in '', True)
vereq('' in u'', True)
vereq(u'' in u'', True)
vereq(u'' in 'abc', True)
vereq('' in u'abc', True)
vereq(u'' in u'abc', True)
vereq(u'\0' in 'abc', False)
vereq('\0' in u'abc', False)
vereq(u'\0' in u'abc', False)
vereq(u'\0' in '\0abc', True)
vereq('\0' in u'\0abc', True)
vereq(u'\0' in u'\0abc', True)
vereq(u'\0' in 'abc\0', True)
vereq('\0' in u'abc\0', True)
vereq(u'\0' in u'abc\0', True)
vereq(u'a' in '\0abc', True)
vereq('a' in u'\0abc', True)
vereq(u'a' in u'\0abc', True)
vereq(u'asdf' in 'asdf', True)
vereq('asdf' in u'asdf', True)
vereq(u'asdf' in u'asdf', True)
vereq(u'asdf' in 'asd', False)
vereq('asdf' in u'asd', False)
vereq(u'asdf' in u'asd', False)
vereq(u'asdf' in '', False)
vereq('asdf' in u'', False)
vereq(u'asdf' in u'', False)
run_contains_tests()

View File

@ -41,3 +41,4 @@ def test(methodname, input, output, *args):
print (methodname, input, output, args, res[0], res[1], res[2])
string_tests.run_method_tests(test)
string_tests.run_contains_tests(test)

View File

@ -803,24 +803,31 @@ string_slice(register PyStringObject *a, register int i, register int j)
static int
string_contains(PyObject *a, PyObject *el)
{
register char *s, *end;
register char c;
const char *lhs, *rhs, *end;
int size;
#ifdef Py_USING_UNICODE
if (PyUnicode_Check(el))
return PyUnicode_Contains(a, el);
#endif
if (!PyString_Check(el) || PyString_Size(el) != 1) {
if (!PyString_Check(el)) {
PyErr_SetString(PyExc_TypeError,
"'in <string>' requires character as left operand");
"'in <string>' requires string as left operand");
return -1;
}
c = PyString_AsString(el)[0];
s = PyString_AsString(a);
end = s + PyString_Size(a);
while (s < end) {
if (c == *s++)
size = PyString_Size(el);
rhs = PyString_AS_STRING(el);
lhs = PyString_AS_STRING(a);
/* optimize for a single character */
if (size == 1)
return memchr(lhs, *rhs, PyString_Size(a)) != NULL;
end = lhs + (PyString_Size(a) - size);
while (lhs <= end) {
if (memcmp(lhs++, rhs, size) == 0)
return 1;
}
return 0;
}

View File

@ -3732,15 +3732,14 @@ int PyUnicode_Contains(PyObject *container,
PyObject *element)
{
PyUnicodeObject *u = NULL, *v = NULL;
int result;
register const Py_UNICODE *p, *e;
register Py_UNICODE ch;
int result, size;
register const Py_UNICODE *lhs, *end, *rhs;
/* Coerce the two arguments */
v = (PyUnicodeObject *)PyUnicode_FromObject(element);
if (v == NULL) {
PyErr_SetString(PyExc_TypeError,
"'in <string>' requires character as left operand");
"'in <string>' requires string as left operand");
goto onError;
}
u = (PyUnicodeObject *)PyUnicode_FromObject(container);
@ -3749,20 +3748,27 @@ int PyUnicode_Contains(PyObject *container,
goto onError;
}
/* Check v in u */
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"'in <string>' requires character as left operand");
goto onError;
}
ch = *PyUnicode_AS_UNICODE(v);
p = PyUnicode_AS_UNICODE(u);
e = p + PyUnicode_GET_SIZE(u);
size = PyUnicode_GET_SIZE(v);
rhs = PyUnicode_AS_UNICODE(v);
lhs = PyUnicode_AS_UNICODE(u);
result = 0;
while (p < e) {
if (*p++ == ch) {
result = 1;
break;
if (size == 1) {
end = lhs + PyUnicode_GET_SIZE(u);
while (lhs < end) {
if (*lhs++ == *rhs) {
result = 1;
break;
}
}
}
else {
end = lhs + (PyUnicode_GET_SIZE(u) - size);
while (lhs <= end) {
if (memcmp(lhs++, rhs, size) == 0) {
result = 1;
break;
}
}
}