From 817918cc3c10d0ed6b14e0e3f2bc0c5227c508cd Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Tue, 6 Aug 2002 16:58:21 +0000 Subject: [PATCH] Committing patch #591250 which provides "str1 in str2" when str1 is a string of longer than 1 character. --- Doc/lib/libstdtypes.tex | 21 +++++++---- Lib/test/string_tests.py | 22 ++++++++++- Lib/test/test_contains.py | 54 ++++---------------------- Lib/test/test_string.py | 1 + Lib/test/test_unicode.py | 75 ++++++++++++++++++++++++++++--------- Lib/test/test_userstring.py | 1 + Objects/stringobject.c | 25 ++++++++----- Objects/unicodeobject.c | 40 +++++++++++--------- 8 files changed, 140 insertions(+), 99 deletions(-) diff --git a/Doc/lib/libstdtypes.tex b/Doc/lib/libstdtypes.tex index 87d5402d05c..df602cda988 100644 --- a/Doc/lib/libstdtypes.tex +++ b/Doc/lib/libstdtypes.tex @@ -432,15 +432,15 @@ This table lists the sequence operations sorted in ascending priority and \var{j} are integers: \begin{tableiii}{c|l|c}{code}{Operation}{Result}{Notes} - \lineiii{\var{x} in \var{s}}{\code{1} if an item of \var{s} is equal to \var{x}, else \code{0}}{} + \lineiii{\var{x} in \var{s}}{\code{1} if an item of \var{s} is equal to \var{x}, else \code{0}}{(1)} \lineiii{\var{x} not in \var{s}}{\code{0} if an item of \var{s} is -equal to \var{x}, else \code{1}}{} +equal to \var{x}, else \code{1}}{(1)} \hline \lineiii{\var{s} + \var{t}}{the concatenation of \var{s} and \var{t}}{} - \lineiii{\var{s} * \var{n}\textrm{,} \var{n} * \var{s}}{\var{n} shallow copies of \var{s} concatenated}{(1)} + \lineiii{\var{s} * \var{n}\textrm{,} \var{n} * \var{s}}{\var{n} shallow copies of \var{s} concatenated}{(2)} \hline - \lineiii{\var{s}[\var{i}]}{\var{i}'th item of \var{s}, origin 0}{(2)} - \lineiii{\var{s}[\var{i}:\var{j}]}{slice of \var{s} from \var{i} to \var{j}}{(2), (3)} + \lineiii{\var{s}[\var{i}]}{\var{i}'th item of \var{s}, origin 0}{(3)} + \lineiii{\var{s}[\var{i}:\var{j}]}{slice of \var{s} from \var{i} to \var{j}}{(3), (4)} \hline \lineiii{len(\var{s})}{length of \var{s}}{} \lineiii{min(\var{s})}{smallest item of \var{s}}{} @@ -461,7 +461,12 @@ equal to \var{x}, else \code{1}}{} Notes: \begin{description} -\item[(1)] Values of \var{n} less than \code{0} are treated as +\item[(1)] When \var{s} is a string or Unicode string object the +\code{in} and \code{not in} operations act like a substring test. In +Python versions before 2.3, \var{x} had to be a string of length 1. +In Python 2.3 and beyond, \var{x} may be a string of any length. + +\item[(2)] Values of \var{n} less than \code{0} are treated as \code{0} (which yields an empty sequence of the same type as \var{s}). Note also that the copies are shallow; nested structures are not copied. This often haunts new Python programmers; consider: @@ -489,12 +494,12 @@ Notes: [[3], [5], [7]] \end{verbatim} -\item[(2)] If \var{i} or \var{j} is negative, the index is relative to +\item[(3)] If \var{i} or \var{j} is negative, the index is relative to the end of the string: \code{len(\var{s}) + \var{i}} or \code{len(\var{s}) + \var{j}} is substituted. But note that \code{-0} is still \code{0}. -\item[(3)] The slice of \var{s} from \var{i} to \var{j} is defined as +\item[(4)] The slice of \var{s} from \var{i} to \var{j} is defined as the sequence of items with index \var{k} such that \code{\var{i} <= \var{k} < \var{j}}. If \var{i} or \var{j} is greater than \code{len(\var{s})}, use \code{len(\var{s})}. If \var{i} is omitted, diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 47d7510c075..836836b1f09 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -1,7 +1,7 @@ """Common tests shared by test_string and test_userstring""" import string -from test.test_support import verify, verbose, TestFailed, have_unicode +from test.test_support import verify, vereq, verbose, TestFailed, have_unicode transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377' @@ -295,3 +295,23 @@ def run_method_tests(test): data = 'x\x9c\xcbH\xcd\xc9\xc9W(\xcf/\xcaI\x01\x00\x1a\x0b\x04]' verify('hello world'.encode('zlib') == data) verify(data.decode('zlib') == 'hello world') + +def test_exception(lhs, rhs, msg): + try: + lhs in rhs + except TypeError: + pass + else: + raise TestFailed, msg + +def run_contains_tests(test): + vereq('' in '', True) + vereq('' in 'abc', True) + vereq('\0' in 'abc', False) + vereq('\0' in '\0abc', True) + vereq('\0' in 'abc\0', True) + vereq('a' in '\0abc', True) + vereq('asdf' in 'asdf', True) + vereq('asdf' in 'asd', False) + vereq('asdf' in '', False) + diff --git a/Lib/test/test_contains.py b/Lib/test/test_contains.py index 9abed1512bd..04eedf1757a 100644 --- a/Lib/test/test_contains.py +++ b/Lib/test/test_contains.py @@ -45,17 +45,8 @@ except TypeError: check('c' in 'abc', "'c' not in 'abc'") check('d' not in 'abc', "'d' in 'abc'") -try: - '' in 'abc' - check(0, "'' in 'abc' did not raise error") -except TypeError: - pass - -try: - 'ab' in 'abc' - check(0, "'ab' in 'abc' did not raise error") -except TypeError: - pass +check('' in '', "'' not in ''") +check('' in 'abc', "'' not in 'abc'") try: None in 'abc' @@ -71,17 +62,12 @@ if have_unicode: check('c' in unicode('abc'), "'c' not in u'abc'") check('d' not in unicode('abc'), "'d' in u'abc'") - try: - '' in unicode('abc') - check(0, "'' in u'abc' did not raise error") - except TypeError: - pass - - try: - 'ab' in unicode('abc') - check(0, "'ab' in u'abc' did not raise error") - except TypeError: - pass + check('' in unicode(''), "'' not in u''") + check(unicode('') in '', "u'' not in ''") + check(unicode('') in unicode(''), "u'' not in u''") + check('' in unicode('abc'), "'' not in u'abc'") + check(unicode('') in 'abc', "u'' not in 'abc'") + check(unicode('') in unicode('abc'), "u'' not in u'abc'") try: None in unicode('abc') @@ -94,35 +80,11 @@ if have_unicode: check(unicode('c') in unicode('abc'), "u'c' not in u'abc'") check(unicode('d') not in unicode('abc'), "u'd' in u'abc'") - try: - unicode('') in unicode('abc') - check(0, "u'' in u'abc' did not raise error") - except TypeError: - pass - - try: - unicode('ab') in unicode('abc') - check(0, "u'ab' in u'abc' did not raise error") - except TypeError: - pass - # Test Unicode char in string check(unicode('c') in 'abc', "u'c' not in 'abc'") check(unicode('d') not in 'abc', "u'd' in 'abc'") - try: - unicode('') in 'abc' - check(0, "u'' in 'abc' did not raise error") - except TypeError: - pass - - try: - unicode('ab') in 'abc' - check(0, "u'ab' in 'abc' did not raise error") - except TypeError: - pass - # A collection of tests on builtin sequence types a = range(10) for i in a: diff --git a/Lib/test/test_string.py b/Lib/test/test_string.py index af8c1bce66d..c92f5f7d1f7 100644 --- a/Lib/test/test_string.py +++ b/Lib/test/test_string.py @@ -51,6 +51,7 @@ def test(name, input, output, *args): string_tests.run_module_tests(test) string_tests.run_method_tests(test) +string_tests.run_contains_tests(test) string.whitespace string.lowercase diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 028e97ad802..f38467ad0d7 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -6,7 +6,7 @@ Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. """#" -from test.test_support import verify, verbose, TestFailed +from test.test_support import verify, vereq, verbose, TestFailed import sys, string if not sys.platform.startswith('java'): @@ -396,23 +396,23 @@ test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c # Contains: print 'Testing Unicode contains method...', -verify(('a' in u'abdb') == 1) -verify(('a' in u'bdab') == 1) -verify(('a' in u'bdaba') == 1) -verify(('a' in u'bdba') == 1) -verify(('a' in u'bdba') == 1) -verify((u'a' in u'bdba') == 1) -verify((u'a' in u'bdb') == 0) -verify((u'a' in 'bdb') == 0) -verify((u'a' in 'bdba') == 1) -verify((u'a' in ('a',1,None)) == 1) -verify((u'a' in (1,None,'a')) == 1) -verify((u'a' in (1,None,u'a')) == 1) -verify(('a' in ('a',1,None)) == 1) -verify(('a' in (1,None,'a')) == 1) -verify(('a' in (1,None,u'a')) == 1) -verify(('a' in ('x',1,u'y')) == 0) -verify(('a' in ('x',1,None)) == 0) +vereq(('a' in u'abdb'), True) +vereq(('a' in u'bdab'), True) +vereq(('a' in u'bdaba'), True) +vereq(('a' in u'bdba'), True) +vereq(('a' in u'bdba'), True) +vereq((u'a' in u'bdba'), True) +vereq((u'a' in u'bdb'), False) +vereq((u'a' in 'bdb'), False) +vereq((u'a' in 'bdba'), True) +vereq((u'a' in ('a',1,None)), True) +vereq((u'a' in (1,None,'a')), True) +vereq((u'a' in (1,None,u'a')), True) +vereq(('a' in ('a',1,None)), True) +vereq(('a' in (1,None,'a')), True) +vereq(('a' in (1,None,u'a')), True) +vereq(('a' in ('x',1,u'y')), False) +vereq(('a' in ('x',1,None)), False) print 'done.' # Formatting: @@ -758,3 +758,42 @@ print u'abc\n', print u'def\n' print u'def\n' print 'done.' + +def test_exception(lhs, rhs, msg): + try: + lhs in rhs + except TypeError: + pass + else: + raise TestFailed, msg + +def run_contains_tests(): + vereq(u'' in '', True) + vereq('' in u'', True) + vereq(u'' in u'', True) + vereq(u'' in 'abc', True) + vereq('' in u'abc', True) + vereq(u'' in u'abc', True) + vereq(u'\0' in 'abc', False) + vereq('\0' in u'abc', False) + vereq(u'\0' in u'abc', False) + vereq(u'\0' in '\0abc', True) + vereq('\0' in u'\0abc', True) + vereq(u'\0' in u'\0abc', True) + vereq(u'\0' in 'abc\0', True) + vereq('\0' in u'abc\0', True) + vereq(u'\0' in u'abc\0', True) + vereq(u'a' in '\0abc', True) + vereq('a' in u'\0abc', True) + vereq(u'a' in u'\0abc', True) + vereq(u'asdf' in 'asdf', True) + vereq('asdf' in u'asdf', True) + vereq(u'asdf' in u'asdf', True) + vereq(u'asdf' in 'asd', False) + vereq('asdf' in u'asd', False) + vereq(u'asdf' in u'asd', False) + vereq(u'asdf' in '', False) + vereq('asdf' in u'', False) + vereq(u'asdf' in u'', False) + +run_contains_tests() diff --git a/Lib/test/test_userstring.py b/Lib/test/test_userstring.py index 78af807b53d..5492f2e526d 100755 --- a/Lib/test/test_userstring.py +++ b/Lib/test/test_userstring.py @@ -41,3 +41,4 @@ def test(methodname, input, output, *args): print (methodname, input, output, args, res[0], res[1], res[2]) string_tests.run_method_tests(test) +string_tests.run_contains_tests(test) diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 3c1b303a503..1d5277c0c78 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -803,24 +803,31 @@ string_slice(register PyStringObject *a, register int i, register int j) static int string_contains(PyObject *a, PyObject *el) { - register char *s, *end; - register char c; + const char *lhs, *rhs, *end; + int size; #ifdef Py_USING_UNICODE if (PyUnicode_Check(el)) return PyUnicode_Contains(a, el); #endif - if (!PyString_Check(el) || PyString_Size(el) != 1) { + if (!PyString_Check(el)) { PyErr_SetString(PyExc_TypeError, - "'in ' requires character as left operand"); + "'in ' requires string as left operand"); return -1; } - c = PyString_AsString(el)[0]; - s = PyString_AsString(a); - end = s + PyString_Size(a); - while (s < end) { - if (c == *s++) + size = PyString_Size(el); + rhs = PyString_AS_STRING(el); + lhs = PyString_AS_STRING(a); + + /* optimize for a single character */ + if (size == 1) + return memchr(lhs, *rhs, PyString_Size(a)) != NULL; + + end = lhs + (PyString_Size(a) - size); + while (lhs <= end) { + if (memcmp(lhs++, rhs, size) == 0) return 1; } + return 0; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 6ca709b8d30..a577bfd4d77 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3732,15 +3732,14 @@ int PyUnicode_Contains(PyObject *container, PyObject *element) { PyUnicodeObject *u = NULL, *v = NULL; - int result; - register const Py_UNICODE *p, *e; - register Py_UNICODE ch; + int result, size; + register const Py_UNICODE *lhs, *end, *rhs; /* Coerce the two arguments */ v = (PyUnicodeObject *)PyUnicode_FromObject(element); if (v == NULL) { PyErr_SetString(PyExc_TypeError, - "'in ' requires character as left operand"); + "'in ' requires string as left operand"); goto onError; } u = (PyUnicodeObject *)PyUnicode_FromObject(container); @@ -3749,20 +3748,27 @@ int PyUnicode_Contains(PyObject *container, goto onError; } - /* Check v in u */ - if (PyUnicode_GET_SIZE(v) != 1) { - PyErr_SetString(PyExc_TypeError, - "'in ' requires character as left operand"); - goto onError; - } - ch = *PyUnicode_AS_UNICODE(v); - p = PyUnicode_AS_UNICODE(u); - e = p + PyUnicode_GET_SIZE(u); + size = PyUnicode_GET_SIZE(v); + rhs = PyUnicode_AS_UNICODE(v); + lhs = PyUnicode_AS_UNICODE(u); + result = 0; - while (p < e) { - if (*p++ == ch) { - result = 1; - break; + if (size == 1) { + end = lhs + PyUnicode_GET_SIZE(u); + while (lhs < end) { + if (*lhs++ == *rhs) { + result = 1; + break; + } + } + } + else { + end = lhs + (PyUnicode_GET_SIZE(u) - size); + while (lhs <= end) { + if (memcmp(lhs++, rhs, size) == 0) { + result = 1; + break; + } } }