Committing patch #591250 which provides "str1 in str2" when str1 is a

string of longer than 1 character.
2002-08-06 16:58:21 +00:00 · 2002-08-06 16:58:21 +00:00 · 817918cc3c
parent b57089cdf8
commit 817918cc3c
8 changed files with 140 additions and 99 deletions
--- a/Doc/lib/libstdtypes.tex
+++ b/Doc/lib/libstdtypes.tex
@ -432,15 +432,15 @@ This table lists the sequence operations sorted in ascending priority
 and \var{j} are integers:

 \begin{tableiii}{c|l|c}{code}{Operation}{Result}{Notes}
-  \lineiii{\var{x} in \var{s}}{\code{1} if an item of \var{s} is equal to \var{x}, else \code{0}}{}
+  \lineiii{\var{x} in \var{s}}{\code{1} if an item of \var{s} is equal to \var{x}, else \code{0}}{(1)}
  \lineiii{\var{x} not in \var{s}}{\code{0} if an item of \var{s} is
-equal to \var{x}, else \code{1}}{}
+equal to \var{x}, else \code{1}}{(1)}
  \hline
  \lineiii{\var{s} + \var{t}}{the concatenation of \var{s} and \var{t}}{}
-  \lineiii{\var{s} * \var{n}\textrm{,} \var{n} * \var{s}}{\var{n} shallow copies of \var{s} concatenated}{(1)}
+  \lineiii{\var{s} * \var{n}\textrm{,} \var{n} * \var{s}}{\var{n} shallow copies of \var{s} concatenated}{(2)}
  \hline
-  \lineiii{\var{s}[\var{i}]}{\var{i}'th item of \var{s}, origin 0}{(2)}
-  \lineiii{\var{s}[\var{i}:\var{j}]}{slice of \var{s} from \var{i} to \var{j}}{(2), (3)}
+  \lineiii{\var{s}[\var{i}]}{\var{i}'th item of \var{s}, origin 0}{(3)}
+  \lineiii{\var{s}[\var{i}:\var{j}]}{slice of \var{s} from \var{i} to \var{j}}{(3), (4)}
  \hline
  \lineiii{len(\var{s})}{length of \var{s}}{}
  \lineiii{min(\var{s})}{smallest item of \var{s}}{}
@ -461,7 +461,12 @@ equal to \var{x}, else \code{1}}{}
 Notes:

 \begin{description}
-\item[(1)] Values of \var{n} less than \code{0} are treated as
+\item[(1)] When \var{s} is a string or Unicode string object the
+\code{in} and \code{not in} operations act like a substring test.  In
+Python versions before 2.3, \var{x} had to be a string of length 1.
+In Python 2.3 and beyond, \var{x} may be a string of any length.
+
+\item[(2)] Values of \var{n} less than \code{0} are treated as
  \code{0} (which yields an empty sequence of the same type as
  \var{s}).  Note also that the copies are shallow; nested structures
  are not copied.  This often haunts new Python programmers; consider:
@ -489,12 +494,12 @@ Notes:
 [[3], [5], [7]]
 \end{verbatim}

-\item[(2)] If \var{i} or \var{j} is negative, the index is relative to
+\item[(3)] If \var{i} or \var{j} is negative, the index is relative to
  the end of the string: \code{len(\var{s}) + \var{i}} or
  \code{len(\var{s}) + \var{j}} is substituted.  But note that \code{-0} is
  still \code{0}.

-\item[(3)] The slice of \var{s} from \var{i} to \var{j} is defined as
+\item[(4)] The slice of \var{s} from \var{i} to \var{j} is defined as
  the sequence of items with index \var{k} such that \code{\var{i} <=
  \var{k} < \var{j}}.  If \var{i} or \var{j} is greater than
  \code{len(\var{s})}, use \code{len(\var{s})}.  If \var{i} is omitted,
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@ -1,7 +1,7 @@
 """Common tests shared by test_string and test_userstring"""

 import string
-from test.test_support import verify, verbose, TestFailed, have_unicode
+from test.test_support import verify, vereq, verbose, TestFailed, have_unicode

 transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'

@ -295,3 +295,23 @@ def run_method_tests(test):
        data = 'x\x9c\xcbH\xcd\xc9\xc9W(\xcf/\xcaI\x01\x00\x1a\x0b\x04]'
        verify('hello world'.encode('zlib') == data)
        verify(data.decode('zlib') == 'hello world')
+
+def test_exception(lhs, rhs, msg):
+    try:
+        lhs in rhs
+    except TypeError:
+        pass
+    else:
+        raise TestFailed, msg
+
+def run_contains_tests(test):
+    vereq('' in '', True)
+    vereq('' in 'abc', True)
+    vereq('\0' in 'abc', False)
+    vereq('\0' in '\0abc', True)
+    vereq('\0' in 'abc\0', True)
+    vereq('a' in '\0abc', True)
+    vereq('asdf' in 'asdf', True)
+    vereq('asdf' in 'asd', False)
+    vereq('asdf' in '', False)
+
--- a/Lib/test/test_contains.py
+++ b/Lib/test/test_contains.py
@ -45,17 +45,8 @@ except TypeError:
 check('c' in 'abc', "'c' not in 'abc'")
 check('d' not in 'abc', "'d' in 'abc'")

-try:
-    '' in 'abc'
-    check(0, "'' in 'abc' did not raise error")
-except TypeError:
-    pass
-
-try:
-    'ab' in 'abc'
-    check(0, "'ab' in 'abc' did not raise error")
-except TypeError:
-    pass
+check('' in '', "'' not in ''")
+check('' in 'abc', "'' not in 'abc'")

 try:
    None in 'abc'
@ -71,17 +62,12 @@ if have_unicode:
    check('c' in unicode('abc'), "'c' not in u'abc'")
    check('d' not in unicode('abc'), "'d' in u'abc'")

-    try:
-        '' in unicode('abc')
-        check(0, "'' in u'abc' did not raise error")
-    except TypeError:
-        pass
-
-    try:
-        'ab' in unicode('abc')
-        check(0, "'ab' in u'abc' did not raise error")
-    except TypeError:
-        pass
+    check('' in unicode(''), "'' not in u''")
+    check(unicode('') in '', "u'' not in ''")
+    check(unicode('') in unicode(''), "u'' not in u''")
+    check('' in unicode('abc'), "'' not in u'abc'")
+    check(unicode('') in 'abc', "u'' not in 'abc'")
+    check(unicode('') in unicode('abc'), "u'' not in u'abc'")

    try:
        None in unicode('abc')
@ -94,35 +80,11 @@ if have_unicode:
    check(unicode('c') in unicode('abc'), "u'c' not in u'abc'")
    check(unicode('d') not in unicode('abc'), "u'd' in u'abc'")

-    try:
-        unicode('') in unicode('abc')
-        check(0, "u'' in u'abc' did not raise error")
-    except TypeError:
-        pass
-
-    try:
-        unicode('ab') in unicode('abc')
-        check(0, "u'ab' in u'abc' did not raise error")
-    except TypeError:
-        pass
-
    # Test Unicode char in string

    check(unicode('c') in 'abc', "u'c' not in 'abc'")
    check(unicode('d') not in 'abc', "u'd' in 'abc'")

-    try:
-        unicode('') in 'abc'
-        check(0, "u'' in 'abc' did not raise error")
-    except TypeError:
-        pass
-
-    try:
-        unicode('ab') in 'abc'
-        check(0, "u'ab' in 'abc' did not raise error")
-    except TypeError:
-        pass
-
 # A collection of tests on builtin sequence types
 a = range(10)
 for i in a:
--- a/Lib/test/test_string.py
+++ b/Lib/test/test_string.py
@ -51,6 +51,7 @@ def test(name, input, output, *args):

 string_tests.run_module_tests(test)
 string_tests.run_method_tests(test)
+string_tests.run_contains_tests(test)

 string.whitespace
 string.lowercase
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@ -6,7 +6,7 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.

 """#"
-from test.test_support import verify, verbose, TestFailed
+from test.test_support import verify, vereq, verbose, TestFailed
 import sys, string

 if not sys.platform.startswith('java'):
@ -396,23 +396,23 @@ test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c

 # Contains:
 print 'Testing Unicode contains method...',
-verify(('a' in u'abdb') == 1)
-verify(('a' in u'bdab') == 1)
-verify(('a' in u'bdaba') == 1)
-verify(('a' in u'bdba') == 1)
-verify(('a' in u'bdba') == 1)
-verify((u'a' in u'bdba') == 1)
-verify((u'a' in u'bdb') == 0)
-verify((u'a' in 'bdb') == 0)
-verify((u'a' in 'bdba') == 1)
-verify((u'a' in ('a',1,None)) == 1)
-verify((u'a' in (1,None,'a')) == 1)
-verify((u'a' in (1,None,u'a')) == 1)
-verify(('a' in ('a',1,None)) == 1)
-verify(('a' in (1,None,'a')) == 1)
-verify(('a' in (1,None,u'a')) == 1)
-verify(('a' in ('x',1,u'y')) == 0)
-verify(('a' in ('x',1,None)) == 0)
+vereq(('a' in u'abdb'), True)
+vereq(('a' in u'bdab'), True)
+vereq(('a' in u'bdaba'), True)
+vereq(('a' in u'bdba'), True)
+vereq(('a' in u'bdba'), True)
+vereq((u'a' in u'bdba'), True)
+vereq((u'a' in u'bdb'), False)
+vereq((u'a' in 'bdb'), False)
+vereq((u'a' in 'bdba'), True)
+vereq((u'a' in ('a',1,None)), True)
+vereq((u'a' in (1,None,'a')), True)
+vereq((u'a' in (1,None,u'a')), True)
+vereq(('a' in ('a',1,None)), True)
+vereq(('a' in (1,None,'a')), True)
+vereq(('a' in (1,None,u'a')), True)
+vereq(('a' in ('x',1,u'y')), False)
+vereq(('a' in ('x',1,None)), False)
 print 'done.'

 # Formatting:
@ -758,3 +758,42 @@ print u'abc\n',
 print u'def\n'
 print u'def\n'
 print 'done.'
+
+def test_exception(lhs, rhs, msg):
+    try:
+        lhs in rhs
+    except TypeError:
+        pass
+    else:
+        raise TestFailed, msg
+
+def run_contains_tests():
+    vereq(u'' in '', True)
+    vereq('' in u'', True)
+    vereq(u'' in u'', True)
+    vereq(u'' in 'abc', True)
+    vereq('' in u'abc', True)
+    vereq(u'' in u'abc', True)
+    vereq(u'\0' in 'abc', False)
+    vereq('\0' in u'abc', False)
+    vereq(u'\0' in u'abc', False)
+    vereq(u'\0' in '\0abc', True)
+    vereq('\0' in u'\0abc', True)
+    vereq(u'\0' in u'\0abc', True)
+    vereq(u'\0' in 'abc\0', True)
+    vereq('\0' in u'abc\0', True)
+    vereq(u'\0' in u'abc\0', True)
+    vereq(u'a' in '\0abc', True)
+    vereq('a' in u'\0abc', True)
+    vereq(u'a' in u'\0abc', True)
+    vereq(u'asdf' in 'asdf', True)
+    vereq('asdf' in u'asdf', True)
+    vereq(u'asdf' in u'asdf', True)
+    vereq(u'asdf' in 'asd', False)
+    vereq('asdf' in u'asd', False)
+    vereq(u'asdf' in u'asd', False)
+    vereq(u'asdf' in '', False)
+    vereq('asdf' in u'', False)
+    vereq(u'asdf' in u'', False)
+
+run_contains_tests()
--- a/Lib/test/test_userstring.py
+++ b/Lib/test/test_userstring.py
@ -41,3 +41,4 @@ def test(methodname, input, output, *args):
        print (methodname, input, output, args, res[0], res[1], res[2])

 string_tests.run_method_tests(test)
+string_tests.run_contains_tests(test)
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@ -803,24 +803,31 @@ string_slice(register PyStringObject *a, register int i, register int j)
 static int
 string_contains(PyObject *a, PyObject *el)
 {
-	register char *s, *end;
-	register char c;
+	const char *lhs, *rhs, *end;
+	int size;
 #ifdef Py_USING_UNICODE
 	if (PyUnicode_Check(el))
 		return PyUnicode_Contains(a, el);
 #endif
-	if (!PyString_Check(el) || PyString_Size(el) != 1) {
+	if (!PyString_Check(el)) {
 		PyErr_SetString(PyExc_TypeError,
-		    "'in <string>' requires character as left operand");
+			      "'in <string>' requires string as left operand");
 		return -1;
 	}
-	c = PyString_AsString(el)[0];
-	s = PyString_AsString(a);
-	end = s + PyString_Size(a);
-	while (s < end) {
-		if (c == *s++)
+	size = PyString_Size(el);
+	rhs = PyString_AS_STRING(el);
+	lhs = PyString_AS_STRING(a);
+
+	/* optimize for a single character */
+	if (size == 1)
+		return memchr(lhs, *rhs, PyString_Size(a)) != NULL;
+
+	end = lhs + (PyString_Size(a) - size);
+	while (lhs <= end) {
+		if (memcmp(lhs++, rhs, size) == 0)
 			return 1;
 	}
+
 	return 0;
 }

--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -3732,15 +3732,14 @@ int PyUnicode_Contains(PyObject *container,
 		       PyObject *element)
 {
    PyUnicodeObject *u = NULL, *v = NULL;
-    int result;
-    register const Py_UNICODE *p, *e;
-    register Py_UNICODE ch;
+    int result, size;
+    register const Py_UNICODE *lhs, *end, *rhs;

    /* Coerce the two arguments */
    v = (PyUnicodeObject *)PyUnicode_FromObject(element);
    if (v == NULL) {
 	PyErr_SetString(PyExc_TypeError,
-	    "'in <string>' requires character as left operand");
+	    "'in <string>' requires string as left operand");
 	goto onError;
    }
    u = (PyUnicodeObject *)PyUnicode_FromObject(container);
@ -3749,20 +3748,27 @@ int PyUnicode_Contains(PyObject *container,
 	goto onError;
    }

-    /* Check v in u */
-    if (PyUnicode_GET_SIZE(v) != 1) {
-	PyErr_SetString(PyExc_TypeError,
-	    "'in <string>' requires character as left operand");
-	goto onError;
-    }
-    ch = *PyUnicode_AS_UNICODE(v);
-    p = PyUnicode_AS_UNICODE(u);
-    e = p + PyUnicode_GET_SIZE(u);
+    size = PyUnicode_GET_SIZE(v);
+    rhs = PyUnicode_AS_UNICODE(v);
+    lhs = PyUnicode_AS_UNICODE(u);
+
    result = 0;
-    while (p < e) {
-	if (*p++ == ch) {
-	    result = 1;
-	    break;
+    if (size == 1) {
+	end = lhs + PyUnicode_GET_SIZE(u);
+	while (lhs < end) {
+	    if (*lhs++ == *rhs) {
+		result = 1;
+		break;
+	    }
+	}
+    }
+    else {
+	end = lhs + (PyUnicode_GET_SIZE(u) - size);
+	while (lhs <= end) {
+	    if (memcmp(lhs++, rhs, size) == 0) {
+		result = 1;
+		break;
+	    }
 	}
    }