diff --git a/Lib/re.py b/Lib/re.py index 41987739a2b..f6bac0871da 100644 --- a/Lib/re.py +++ b/Lib/re.py @@ -66,8 +66,8 @@ def escape(pattern): alphanum=string.letters+'_'+string.digits for char in pattern: if char not in alphanum: - if char == '\000': result.append(r'\000') - else: result.append('\\' + char) + if char=='\000': result.append('\\000') + else: result.append('\\'+char) else: result.append(char) return string.join(result, '') @@ -132,9 +132,9 @@ class RegexObject: def subn(self, repl, source, count=0): """Return a 2-tuple containing (new_string, number). new_string is the string obtained by replacing the leftmost - non-overlapping occurrences of the pattern in string by the - replacement repl. number is the number of substitutions that - were made.""" + non-overlapping occurrences of the pattern in the source + string by the replacement repl. number is the number of + substitutions that were made.""" if count < 0: raise error, "negative substitution count" @@ -174,7 +174,7 @@ class RegexObject: return (string.join(results, ''), n) def split(self, source, maxsplit=0): - """Split \var{string} by the occurrences of the pattern, + """Split the \var{source} string by the occurrences of the pattern, returning a list containing the resulting substrings.""" if maxsplit < 0: diff --git a/Lib/test/re_tests.py b/Lib/test/re_tests.py index 9bbfa54ebd5..a42857a76b3 100755 --- a/Lib/test/re_tests.py +++ b/Lib/test/re_tests.py @@ -2,7 +2,7 @@ # -*- mode: python -*- # $Id$ -# Re test suite and benchmark suite v1.5b2 +# Re test suite and benchmark suite v1.5 # The 3 possible outcomes for each pattern [SUCCEED, FAIL, SYNTAX_ERROR] = range(3) @@ -62,23 +62,20 @@ tests = [ ('(?Pa)', 'a', SUCCEED, 'g1', 'a'), ('(?Pa)(?P=foo_123)', 'aa', SUCCEED, 'g1', 'a'), - + # Test octal escapes - ('\\1', 'a', SYNTAX_ERROR), + ('\\1', 'a', SYNTAX_ERROR), # Backreference + ('[\\1]', '\1', SUCCEED, 'found', '\1'), # Character ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'), ('\\141', 'a', SUCCEED, 'found', 'a'), ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'), - # Test that a literal \0 is handled everywhere - ('\0', '\0', SUCCEED, 'found', '\0'), + # Test \0 is handled everywhere (r'\0', '\0', SUCCEED, 'found', '\0'), - ('[\0a]', '\0', SUCCEED, 'found', '\0'), - ('[a\0]', '\0', SUCCEED, 'found', '\0'), - ('[^a\0]', '\0', FAIL), (r'[\0a]', '\0', SUCCEED, 'found', '\0'), (r'[a\0]', '\0', SUCCEED, 'found', '\0'), (r'[^a\0]', '\0', FAIL), - + # Test various letter escapes (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'), (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'), @@ -103,6 +100,8 @@ tests = [ ('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'), ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'), + (')', '', SYNTAX_ERROR), # Unmatched right bracket + ('', '', SUCCEED, 'found', ''), # Empty pattern ('abc', 'abc', SUCCEED, 'found', 'abc'), ('abc', 'xbc', FAIL), ('abc', 'axc', FAIL), @@ -393,9 +392,6 @@ tests = [ ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'), ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'), ('[k]', 'ab', FAIL), -# XXX -# ('abcd', 'abcd', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'abcd-$&-\\abcd'), -# ('a(bc)d', 'abcd', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'bc-$1-\\bc'), ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'), ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'), ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'), diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index ffb5c669bf4..d5b16c662b1 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2,12 +2,36 @@ # -*- mode: python -*- # $Id$ +import sys +sys.path=['.']+sys.path + from test_support import verbose, TestFailed import re import sys, os, string, traceback # Misc tests from Tim Peters' re.doc +if verbose: + print 'Running tests on re.search and re.match' + +try: + assert re.search('x*', 'axx').span(0) == (0, 0) + assert re.search('x*', 'axx').span() == (0, 0) + assert re.search('x+', 'axx').span(0) == (1, 3) + assert re.search('x+', 'axx').span() == (1, 3) + assert re.search('x', 'aaa') == None +except: + raise TestFailed, "re.search" + +try: + assert re.match('a*', 'xxx').span(0) == (0, 0) + assert re.match('a*', 'xxx').span() == (0, 0) + assert re.match('x*', 'xxxa').span(0) == (0, 3) + assert re.match('x*', 'xxxa').span() == (0, 3) + assert re.match('a+', 'xxx') == None +except: + raise TestFailed, "re.search" + if verbose: print 'Running tests on re.sub' @@ -19,25 +43,30 @@ try: return str(int_value + 1) assert re.sub(r'\d+', bump_num, '08.2 -2 23x99y') == '9.3 -3 24x100y' + assert re.sub(r'\d+', bump_num, '08.2 -2 23x99y', 3) == '9.3 -3 23x99y' assert re.sub('.', lambda m: r"\n", 'x') == '\\n' assert re.sub('.', r"\n", 'x') == '\n' s = r"\1\1" assert re.sub('(.)', s, 'x') == 'xx' - assert re.sub('(.)', re.escape(s), 'x') == s + assert re.sub('(.)', re.escape(s), 'x') == s assert re.sub('(.)', lambda m: s, 'x') == s assert re.sub('(?Px)', '\g\g', 'xx') == 'xxxx' + assert re.sub('(?Px)', '\g\g<1>', 'xx') == 'xxxx' assert re.sub('(?Px)', '\g\g', 'xx') == 'xxxx' + assert re.sub('(?Px)', '\g<1>\g<1>', 'xx') == 'xxxx' - assert re.sub('a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a') == '\t\n\v\r\f\a\bBZ\aAwWsSdD' + assert re.sub('a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a') == '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D' assert re.sub('a', '\t\n\v\r\f\a', 'a') == '\t\n\v\r\f\a' assert re.sub('a', '\t\n\v\r\f\a', 'a') == (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)) + assert re.sub('^\s*', 'X', 'test') == 'Xtest' except AssertionError: raise TestFailed, "re.sub" + try: assert re.sub('a', 'b', 'aaaaa') == 'bbbbb' assert re.sub('a', 'b', 'aaaaa', 1) == 'baaaa' @@ -75,6 +104,13 @@ except re.error, reason: else: raise TestFailed, "symbolic reference" +try: + re.sub('(?Px)', '\g<1a1>', 'xx') +except re.error, reason: + pass +else: + raise TestFailed, "symbolic reference" + try: re.sub('(?Px)', '\g', 'xx') except IndexError, reason: @@ -104,9 +140,13 @@ try: assert re.subn("b+", "x", "bbbb BBBB") == ('x BBBB', 1) assert re.subn("b+", "x", "xyz") == ('xyz', 0) assert re.subn("b*", "x", "xyz") == ('xxxyxzx', 4) + assert re.subn("b*", "x", "xyz", 2) == ('xxxyz', 2) except AssertionError: raise TestFailed, "re.subn" +if verbose: + print 'Running tests on re.split' + try: assert re.split(":", ":a:b::c") == ['', 'a', 'b', '', 'c'] assert re.split(":*", ":a:b::c") == ['', 'a', 'b', 'c'] @@ -117,7 +157,6 @@ try: assert re.split("(b)|(:+)", ":a:b::c") == \ ['', None, ':', 'a', None, ':', '', 'b', None, '', None, '::', 'c'] assert re.split("(?:b)|(?::+)", ":a:b::c") == ['', 'a', '', '', 'c'] - except AssertionError: raise TestFailed, "re.split" @@ -130,16 +169,55 @@ try: except AssertionError: raise TestFailed, "qualified re.split" -if verbose: - print 'Pickling a RegexObject instance' - import pickle - pat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)') - s = pickle.dumps(pat) - pat = pickle.loads(s) +try: + # No groups at all + m = re.match('a', 'a') ; assert m.groups() == () + # A single group + m = re.match('(a)', 'a') ; assert m.groups() == ('a',) + + pat = re.compile('((a)|(b))(c)?') + assert pat.match('a').groups() == ('a', 'a', None, None) + assert pat.match('b').groups() == ('b', None, 'b', None) + assert pat.match('ac').groups() == ('a', 'a', None, 'c') + assert pat.match('bc').groups() == ('b', None, 'b', 'c') +except AssertionError: + raise TestFailed, "match .groups() method" + +try: + # A single group + m = re.match('(a)', 'a') + assert m.group(0) == 'a' ; assert m.group(0) == 'a' + assert m.group(1) == 'a' ; assert m.group(1, 1) == ('a', 'a') + + pat = re.compile('(?:(?Pa)|(?Pb))(?Pc)?') + assert pat.match('a').group(1, 2, 3) == ('a', None, None) + assert pat.match('b').group('a1', 'b2', 'c3') == (None, 'b', None) + assert pat.match('ac').group(1, 'b2', 3) == ('a', None, 'c') +except AssertionError: + raise TestFailed, "match .group() method" + +try: + p="" + for i in range(0, 256): + p = p + chr(i) + assert re.match(re.escape(chr(i)), chr(i)) != None + assert re.match(re.escape(chr(i)), chr(i)).span() == (0,1) + + pat=re.compile( re.escape(p) ) + assert pat.match(p) != None + assert pat.match(p).span() == (0,256) +except AssertionError: + raise TestFailed, "re.escape" + if verbose: - print 'Running tests on re.split' - + print 'Pickling a RegexObject instance' + +import pickle +pat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)') +s = pickle.dumps(pat) +pat = pickle.loads(s) + try: assert re.I == re.IGNORECASE assert re.L == re.LOCALE @@ -156,11 +234,13 @@ for flags in [re.I, re.M, re.X, re.S, re.L]: print 'Exception raised on flag', flags from re_tests import * + if verbose: print 'Running re_tests test suite' else: # To save time, only run the first and last 10 tests - pass #tests = tests[:10] + tests[-10:] + #tests = tests[:10] + tests[-10:] + pass for t in tests: sys.stdout.flush() @@ -180,7 +260,7 @@ for t in tests: print '=== Syntax error:', t except KeyboardInterrupt: raise KeyboardInterrupt except: - print '*** Unexpected error ***' + print '*** Unexpected error ***', t if verbose: traceback.print_exc(file=sys.stdout) else: @@ -250,4 +330,3 @@ for t in tests: result=obj.search(s) if result==None: print '=== Fails on locale-sensitive match', t -