sre.Scanner fixes (from Greg Chapman). also added a Scanner sanity

check to the test suite.

added a few missing exception checks in the _sre module
This commit is contained in:
Fredrik Lundh 2001-10-21 18:04:11 +00:00
parent bec95b9d88
commit 1296a8d77e
3 changed files with 43 additions and 5 deletions

View File

@ -330,15 +330,16 @@ copy_reg.pickle(_pattern_type, _pickle, _compile)
# experimental stuff (see python-dev discussions for details) # experimental stuff (see python-dev discussions for details)
class Scanner: class Scanner:
def __init__(self, lexicon): def __init__(self, lexicon, flags=0):
from sre_constants import BRANCH, SUBPATTERN from sre_constants import BRANCH, SUBPATTERN
self.lexicon = lexicon self.lexicon = lexicon
# combine phrases into a compound pattern # combine phrases into a compound pattern
p = [] p = []
s = sre_parse.Pattern() s = sre_parse.Pattern()
s.flags = flags
for phrase, action in lexicon: for phrase, action in lexicon:
p.append(sre_parse.SubPattern(s, [ p.append(sre_parse.SubPattern(s, [
(SUBPATTERN, (len(p), sre_parse.parse(phrase))), (SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))),
])) ]))
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
s.groups = len(p) s.groups = len(p)
@ -346,16 +347,16 @@ class Scanner:
def scan(self, string): def scan(self, string):
result = [] result = []
append = result.append append = result.append
match = self.scanner.match match = self.scanner.scanner(string).match
i = 0 i = 0
while 1: while 1:
m = match(string, i) m = match()
if not m: if not m:
break break
j = m.end() j = m.end()
if i == j: if i == j:
break break
action = self.lexicon[m.lastindex][1] action = self.lexicon[m.lastindex-1][1]
if callable(action): if callable(action):
self.match = m self.match = m
action = action(self, m.group()) action = action(self, m.group())

View File

@ -223,6 +223,26 @@ pat = sre.compile(sre.escape(p))
test(r"""pat.match(p) is not None""", 1) test(r"""pat.match(p) is not None""", 1)
test(r"""pat.match(p).span()""", (0,256)) test(r"""pat.match(p).span()""", (0,256))
if verbose:
print 'Running tests on sre.Scanner'
def s_ident(scanner, token): return token
def s_operator(scanner, token): return "op%s" % token
def s_float(scanner, token): return float(token)
def s_int(scanner, token): return int(token)
scanner = sre.Scanner([
(r"[a-zA-Z_]\w*", s_ident),
(r"\d+\.\d*", s_float),
(r"\d+", s_int),
(r"=|\+|-|\*|/", s_operator),
(r"\s+", None),
])
# sanity check
test('scanner.scan("sum = 3*foo + 312.50 + bar")',
(['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5, 'op+', 'bar'], ''))
if verbose: if verbose:
print 'Pickling a SRE_Pattern instance' print 'Pickling a SRE_Pattern instance'

View File

@ -1800,6 +1800,11 @@ join(PyObject* list, PyObject* pattern)
return NULL; return NULL;
} }
args = PyTuple_New(1); args = PyTuple_New(1);
if (!args) {
Py_DECREF(function);
Py_DECREF(joiner);
return NULL;
}
PyTuple_SET_ITEM(args, 0, list); PyTuple_SET_ITEM(args, 0, list);
result = PyObject_CallObject(function, args); result = PyObject_CallObject(function, args);
Py_DECREF(args); /* also removes list */ Py_DECREF(args); /* also removes list */
@ -1896,6 +1901,10 @@ pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
return NULL; return NULL;
list = PyList_New(0); list = PyList_New(0);
if (!list) {
state_fini(&state);
return NULL;
}
while (state.start <= state.end) { while (state.start <= state.end) {
@ -1995,6 +2004,10 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
return NULL; return NULL;
list = PyList_New(0); list = PyList_New(0);
if (!list) {
state_fini(&state);
return NULL;
}
n = 0; n = 0;
last = state.start; last = state.start;
@ -2110,6 +2123,10 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
return NULL; return NULL;
list = PyList_New(0); list = PyList_New(0);
if (!list) {
state_fini(&state);
return NULL;
}
n = i = 0; n = i = 0;