From df02d0b3f0f438e6a773528010cc360d01b8f393 Mon Sep 17 00:00:00 2001 From: Fredrik Lundh Date: Fri, 30 Jun 2000 07:08:20 +0000 Subject: [PATCH] - fixed default value handling in group/groupdict - added test suite --- Lib/sre.py | 2 +- Lib/test/output/test_sre | 14 ++ Lib/test/test_sre.py | 385 +++++++++++++++++++++++++++++++++++++++ Modules/_sre.c | 41 +++-- 4 files changed, 423 insertions(+), 19 deletions(-) create mode 100644 Lib/test/output/test_sre create mode 100644 Lib/test/test_sre.py diff --git a/Lib/sre.py b/Lib/sre.py index d5bb462e7bc..fef4983e5cf 100644 --- a/Lib/sre.py +++ b/Lib/sre.py @@ -20,7 +20,7 @@ M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE S = DOTALL = sre_compile.SRE_FLAG_DOTALL X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE -# sre extensions (may or may not be in 1.6 final) +# sre extensions (may or may not be in 2.0 final) T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE U = UNICODE = sre_compile.SRE_FLAG_UNICODE diff --git a/Lib/test/output/test_sre b/Lib/test/output/test_sre new file mode 100644 index 00000000000..65eadf92b0d --- /dev/null +++ b/Lib/test/output/test_sre @@ -0,0 +1,14 @@ +test_sre +test_support -- test failed re module pickle +test_support -- test failed re module cPickle +=== Syntax error: ('(?Pa)(?P=foo_123)', 'aa', 0, 'g1', 'a') +=== Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A') +=== Failed incorrectly ('(a+)+\\1', 'aa', 0, 'found+"-"+g1', 'aa-a') +=== grouping error ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') 'd:msgs/tdir/sub1/-trial/' should be 'd:msgs/tdir/sub1/-tdir/' +=== Syntax error: ('(?Paa)(?P=id)', 'aaaa', 0, 'found+"-"+id', 'aaaa-aa') +=== grouping error ('([abc])*bcd', 'abcd', 0, 'found+"-"+g1', 'abcd-a') 'abcd-c' should be 'abcd-a' +=== grouping error ('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A') 'ABCD-C' should be 'ABCD-A' +=== Syntax error: ('a(?!b).', 'abad', 0, 'found', 'ad') +=== Syntax error: ('a(?=d).', 'abad', 0, 'found', 'ad') +=== Syntax error: ('a(?=c|d).', 'abad', 0, 'found', 'ad') +=== Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A') diff --git a/Lib/test/test_sre.py b/Lib/test/test_sre.py new file mode 100644 index 00000000000..bc336c1bc53 --- /dev/null +++ b/Lib/test/test_sre.py @@ -0,0 +1,385 @@ +# FIXME: this is basically test_re.py, with a few + +import sys +sys.path=['.']+sys.path + +from test_support import verbose, TestFailed +import sre +import sys, os, string, traceback + +# Misc tests from Tim Peters' re.doc + +if verbose: + print 'Running tests on sre.search and sre.match' + +try: + assert sre.search('x*', 'axx').span(0) == (0, 0) + assert sre.search('x*', 'axx').span() == (0, 0) + assert sre.search('x+', 'axx').span(0) == (1, 3) + assert sre.search('x+', 'axx').span() == (1, 3) + assert sre.search('x', 'aaa') == None +except: + raise TestFailed, "sre.search" + +try: + assert sre.match('a*', 'xxx').span(0) == (0, 0) + assert sre.match('a*', 'xxx').span() == (0, 0) + assert sre.match('x*', 'xxxa').span(0) == (0, 3) + assert sre.match('x*', 'xxxa').span() == (0, 3) + assert sre.match('a+', 'xxx') == None +except: + raise TestFailed, "sre.search" + +if verbose: + print 'Running tests on sre.sub' + +try: + assert sre.sub("(?i)b+", "x", "bbbb BBBB") == 'x x' + + def bump_num(matchobj): + int_value = int(matchobj.group(0)) + return str(int_value + 1) + + assert sre.sub(r'\d+', bump_num, '08.2 -2 23x99y') == '9.3 -3 24x100y' + assert sre.sub(r'\d+', bump_num, '08.2 -2 23x99y', 3) == '9.3 -3 23x99y' + + assert sre.sub('.', lambda m: r"\n", 'x') == '\\n' + assert sre.sub('.', r"\n", 'x') == '\n' + + s = r"\1\1" + assert sre.sub('(.)', s, 'x') == 'xx' + assert sre.sub('(.)', sre.escape(s), 'x') == s + assert sre.sub('(.)', lambda m: s, 'x') == s + + assert sre.sub('(?Px)', '\g\g', 'xx') == 'xxxx' + assert sre.sub('(?Px)', '\g\g<1>', 'xx') == 'xxxx' + assert sre.sub('(?Px)', '\g\g', 'xx') == 'xxxx' + assert sre.sub('(?Px)', '\g<1>\g<1>', 'xx') == 'xxxx' + + assert sre.sub('a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a') == '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D' + assert sre.sub('a', '\t\n\v\r\f\a', 'a') == '\t\n\v\r\f\a' + assert sre.sub('a', '\t\n\v\r\f\a', 'a') == (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)) + + assert sre.sub('^\s*', 'X', 'test') == 'Xtest' +except AssertionError: + raise TestFailed, "sre.sub" + + +try: + assert sre.sub('a', 'b', 'aaaaa') == 'bbbbb' + assert sre.sub('a', 'b', 'aaaaa', 1) == 'baaaa' +except AssertionError: + raise TestFailed, "qualified sre.sub" + +if verbose: + print 'Running tests on symbolic references' + +try: + sre.sub('(?Px)', '\gx)', '\g<', 'xx') +except sre.error, reason: + pass +else: + raise TestFailed, "symbolic reference" + +try: + sre.sub('(?Px)', '\g', 'xx') +except sre.error, reason: + pass +else: + raise TestFailed, "symbolic reference" + +try: + sre.sub('(?Px)', '\g', 'xx') +except sre.error, reason: + pass +else: + raise TestFailed, "symbolic reference" + +try: + sre.sub('(?Px)', '\g<1a1>', 'xx') +except sre.error, reason: + pass +else: + raise TestFailed, "symbolic reference" + +try: + sre.sub('(?Px)', '\g', 'xx') +except IndexError, reason: + pass +else: + raise TestFailed, "symbolic reference" + +try: + sre.sub('(?Px)|(?Py)', '\g', 'xx') +except sre.error, reason: + pass +else: + raise TestFailed, "symbolic reference" + +try: + sre.sub('(?Px)|(?Py)', '\\2', 'xx') +except sre.error, reason: + pass +else: + raise TestFailed, "symbolic reference" + +if verbose: + print 'Running tests on sre.subn' + +try: + assert sre.subn("(?i)b+", "x", "bbbb BBBB") == ('x x', 2) + assert sre.subn("b+", "x", "bbbb BBBB") == ('x BBBB', 1) + assert sre.subn("b+", "x", "xyz") == ('xyz', 0) + assert sre.subn("b*", "x", "xyz") == ('xxxyxzx', 4) + assert sre.subn("b*", "x", "xyz", 2) == ('xxxyz', 2) +except AssertionError: + raise TestFailed, "sre.subn" + +if verbose: + print 'Running tests on sre.split' + +try: + assert sre.split(":", ":a:b::c") == ['', 'a', 'b', '', 'c'] + assert sre.split(":*", ":a:b::c") == ['', 'a', 'b', 'c'] + assert sre.split("(:*)", ":a:b::c") == ['', ':', 'a', ':', 'b', '::', 'c'] + assert sre.split("(?::*)", ":a:b::c") == ['', 'a', 'b', 'c'] + assert sre.split("(:)*", ":a:b::c") == ['', ':', 'a', ':', 'b', ':', 'c'] + assert sre.split("([b:]+)", ":a:b::c") == ['', ':', 'a', ':b::', 'c'] +# FIXME: group problem +# assert sre.split("(b)|(:+)", ":a:b::c") == \ +# ['', None, ':', 'a', None, ':', '', 'b', None, '', None, '::', 'c'] + assert sre.split("(?:b)|(?::+)", ":a:b::c") == ['', 'a', '', '', 'c'] +except AssertionError: + raise TestFailed, "sre.split" + +try: + assert sre.split(":", ":a:b::c", 2) == ['', 'a', 'b::c'] + assert sre.split(':', 'a:b:c:d', 2) == ['a', 'b', 'c:d'] + + assert sre.split("(:)", ":a:b::c", 2) == ['', ':', 'a', ':', 'b::c'] + assert sre.split("(:*)", ":a:b::c", 2) == ['', ':', 'a', ':', 'b::c'] +except AssertionError: + raise TestFailed, "qualified sre.split" + +if verbose: + print "Running tests on sre.findall" + +try: + assert sre.findall(":+", "abc") == [] + assert sre.findall(":+", "a:b::c:::d") == [":", "::", ":::"] + assert sre.findall("(:+)", "a:b::c:::d") == [":", "::", ":::"] + assert sre.findall("(:)(:*)", "a:b::c:::d") == [(":", ""), + (":", ":"), + (":", "::")] +except AssertionError: + raise TestFailed, "sre.findall" + +if verbose: + print "Running tests on sre.match" + +try: + # No groups at all + m = sre.match('a', 'a') ; assert m.groups() == () + # A single group + m = sre.match('(a)', 'a') ; assert m.groups() == ('a',) + + pat = sre.compile('((a)|(b))(c)?') + assert pat.match('a').groups() == ('a', 'a', None, None) + assert pat.match('b').groups() == ('b', None, 'b', None) + assert pat.match('ac').groups() == ('a', 'a', None, 'c') + assert pat.match('bc').groups() == ('b', None, 'b', 'c') + assert pat.match('bc').groups("") == ('b', "", 'b', 'c') +except AssertionError: + raise TestFailed, "match .groups() method" + +try: + # A single group + m = sre.match('(a)', 'a') + assert m.group(0) == 'a' ; assert m.group(0) == 'a' + assert m.group(1) == 'a' ; assert m.group(1, 1) == ('a', 'a') + + pat = sre.compile('(?:(?Pa)|(?Pb))(?Pc)?') + assert pat.match('a').group(1, 2, 3) == ('a', None, None) + assert pat.match('b').group('a1', 'b2', 'c3') == (None, 'b', None) + assert pat.match('ac').group(1, 'b2', 3) == ('a', None, 'c') +except AssertionError: + raise TestFailed, "match .group() method" + +if verbose: + print "Running tests on sre.escape" + +try: + p="" + for i in range(0, 256): + p = p + chr(i) + assert sre.match(sre.escape(chr(i)), chr(i)) != None + assert sre.match(sre.escape(chr(i)), chr(i)).span() == (0,1) + + pat=sre.compile( sre.escape(p) ) + assert pat.match(p) != None + assert pat.match(p).span() == (0,256) +except AssertionError: + raise TestFailed, "sre.escape" + + +if verbose: + print 'Pickling a SRE_Pattern instance' + +try: + import pickle + pat = sre.compile('a(?:b|(c|e){1,2}?|d)+?(.)') + s = pickle.dumps(pat) + pat = pickle.loads(s) +except: + print TestFailed, 're module pickle' # expected + +try: + import cPickle + pat = sre.compile('a(?:b|(c|e){1,2}?|d)+?(.)') + s = cPickle.dumps(pat) + pat = cPickle.loads(s) +except: + print TestFailed, 're module cPickle' # expected + +try: + assert sre.I == sre.IGNORECASE + assert sre.L == sre.LOCALE + assert sre.M == sre.MULTILINE + assert sre.S == sre.DOTALL + assert sre.X == sre.VERBOSE + assert sre.T == sre.TEMPLATE + assert sre.U == sre.UNICODE +except AssertionError: + raise TestFailed, 're module constants' + +for flags in [sre.I, sre.M, sre.X, sre.S, sre.L, sre.T, sre.U]: + try: + r = sre.compile('^pattern$', flags) + except: + print 'Exception raised on flag', flags + +from re_tests import * + +if verbose: + print 'Running re_tests test suite' +else: + # To save time, only run the first and last 10 tests + #tests = tests[:10] + tests[-10:] + pass + +for t in tests: + sys.stdout.flush() + pattern=s=outcome=repl=expected=None + if len(t)==5: + pattern, s, outcome, repl, expected = t + elif len(t)==3: + pattern, s, outcome = t + else: + raise ValueError, ('Test tuples should have 3 or 5 fields',t) + + try: + obj=sre.compile(pattern) + except sre.error: + if outcome==SYNTAX_ERROR: pass # Expected a syntax error + else: + print '=== Syntax error:', t + except KeyboardInterrupt: raise KeyboardInterrupt + except: + print '*** Unexpected error ***', t + if verbose: + traceback.print_exc(file=sys.stdout) + else: + try: + result=obj.search(s) + except (sre.error), msg: + print '=== Unexpected exception', t, repr(msg) + if outcome==SYNTAX_ERROR: + # This should have been a syntax error; forget it. + pass + elif outcome==FAIL: + if result is None: pass # No match, as expected + else: print '=== Succeeded incorrectly', t + elif outcome==SUCCEED: + if result is not None: + # Matched, as expected, so now we compute the + # result string and compare it to our expected result. + start, end = result.span(0) + vardict={'found': result.group(0), + 'groups': result.group(), + 'flags': result.re.flags} + for i in range(1, 100): + try: + gi = result.group(i) + # Special hack because else the string concat fails: + if gi is None: + gi = "None" + except IndexError: + gi = "Error" + vardict['g%d' % i] = gi + for i in result.re.groupindex.keys(): + try: + gi = result.group(i) + if gi is None: + gi = "None" + except IndexError: + gi = "Error" + vardict[i] = gi + repl=eval(repl, vardict) + if repl!=expected: + print '=== grouping error', t, + print repr(repl)+' should be '+repr(expected) + else: + print '=== Failed incorrectly', t + continue + + # Try the match on a unicode string, and check that it + # still succeeds. + result=obj.search(unicode(s, "latin-1")) + if result==None: + print '=== Fails on unicode match', t + + # Try the match on a unicode pattern, and check that it + # still succeeds. + obj=sre.compile(unicode(pattern, "latin-1")) + result=obj.search(s) + if result==None: + print '=== Fails on unicode pattern match', t + + # Try the match with the search area limited to the extent + # of the match and see if it still succeeds. \B will + # break (because it won't match at the end or start of a + # string), so we'll ignore patterns that feature it. + + if pattern[:2]!='\\B' and pattern[-2:]!='\\B': + obj=sre.compile(pattern) + result=obj.search(s, result.start(0), result.end(0)+1) + if result==None: + print '=== Failed on range-limited match', t + + # Try the match with IGNORECASE enabled, and check that it + # still succeeds. + obj=sre.compile(pattern, sre.IGNORECASE) + result=obj.search(s) + if result==None: + print '=== Fails on case-insensitive match', t + + # Try the match with LOCALE enabled, and check that it + # still succeeds. + obj=sre.compile(pattern, sre.LOCALE) + result=obj.search(s) + if result==None: + print '=== Fails on locale-sensitive match', t + + # Try the match with UNICODE enabled, and check that it + # still succeeds. + obj=sre.compile(pattern, sre.UNICODE) + result=obj.search(s) + if result==None: + print '=== Fails on unicode-sensitive match', t diff --git a/Modules/_sre.c b/Modules/_sre.c index 7b1adbd177f..6fcd65ea739 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -1566,7 +1566,7 @@ match_dealloc(MatchObject* self) } static PyObject* -match_getslice_by_index(MatchObject* self, int index) +match_getslice_by_index(MatchObject* self, int index, PyObject* def) { if (index < 0 || index >= self->groups) { /* raise IndexError if we were given a bad group number */ @@ -1578,9 +1578,9 @@ match_getslice_by_index(MatchObject* self, int index) } if (self->string == Py_None || self->mark[index+index] < 0) { - /* return None if the string or group is undefined */ - Py_INCREF(Py_None); - return Py_None; + /* return default value if the string or group is undefined */ + Py_INCREF(def); + return def; } return PySequence_GetSlice( @@ -1605,9 +1605,9 @@ match_getindex(MatchObject* self, PyObject* index) } static PyObject* -match_getslice(MatchObject* self, PyObject* index) +match_getslice(MatchObject* self, PyObject* index, PyObject* def) { - return match_getslice_by_index(self, match_getindex(self, index)); + return match_getslice_by_index(self, match_getindex(self, index), def); } static PyObject* @@ -1620,10 +1620,10 @@ match_group(MatchObject* self, PyObject* args) switch (size) { case 0: - result = match_getslice(self, Py_False); + result = match_getslice(self, Py_False, Py_None); break; case 1: - result = match_getslice(self, PyTuple_GET_ITEM(args, 0)); + result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None); break; default: /* fetch multiple items */ @@ -1631,7 +1631,9 @@ match_group(MatchObject* self, PyObject* args) if (!result) return NULL; for (i = 0; i < size; i++) { - PyObject* item = match_getslice(self, PyTuple_GET_ITEM(args, i)); + PyObject* item = match_getslice( + self, PyTuple_GET_ITEM(args, i), Py_None + ); if (!item) { Py_DECREF(result); return NULL; @@ -1649,7 +1651,9 @@ match_groups(MatchObject* self, PyObject* args) PyObject* result; int index; - /* FIXME: handle default value! */ + PyObject* def = Py_None; + if (!PyArg_ParseTuple(args, "|O", &def)) + return NULL; result = PyTuple_New(self->groups-1); if (!result) @@ -1657,8 +1661,7 @@ match_groups(MatchObject* self, PyObject* args) for (index = 1; index < self->groups; index++) { PyObject* item; - /* FIXME: handle default! */ - item = match_getslice_by_index(self, index); + item = match_getslice_by_index(self, index, def); if (!item) { Py_DECREF(result); return NULL; @@ -1676,17 +1679,19 @@ match_groupdict(MatchObject* self, PyObject* args) PyObject* keys; int index; - /* FIXME: handle default value! */ + PyObject* def = Py_None; + if (!PyArg_ParseTuple(args, "|O", &def)) + return NULL; result = PyDict_New(); - if (!result) - return NULL; - if (!self->pattern->groupindex) + if (!result || !self->pattern->groupindex) return result; keys = PyMapping_Keys(self->pattern->groupindex); - if (!keys) + if (!keys) { + Py_DECREF(result); return NULL; + } for (index = 0; index < PyList_GET_SIZE(keys); index++) { PyObject* key; @@ -1697,7 +1702,7 @@ match_groupdict(MatchObject* self, PyObject* args) Py_DECREF(result); return NULL; } - item = match_getslice(self, key); + item = match_getslice(self, key, def); if (!item) { Py_DECREF(key); Py_DECREF(keys);