sre.Scanner fixes (from Greg Chapman). also added a Scanner sanity
check to the test suite. added a few missing exception checks in the _sre module
This commit is contained in:
parent
bec95b9d88
commit
1296a8d77e
11
Lib/sre.py
11
Lib/sre.py
|
@ -330,15 +330,16 @@ copy_reg.pickle(_pattern_type, _pickle, _compile)
|
||||||
# experimental stuff (see python-dev discussions for details)
|
# experimental stuff (see python-dev discussions for details)
|
||||||
|
|
||||||
class Scanner:
|
class Scanner:
|
||||||
def __init__(self, lexicon):
|
def __init__(self, lexicon, flags=0):
|
||||||
from sre_constants import BRANCH, SUBPATTERN
|
from sre_constants import BRANCH, SUBPATTERN
|
||||||
self.lexicon = lexicon
|
self.lexicon = lexicon
|
||||||
# combine phrases into a compound pattern
|
# combine phrases into a compound pattern
|
||||||
p = []
|
p = []
|
||||||
s = sre_parse.Pattern()
|
s = sre_parse.Pattern()
|
||||||
|
s.flags = flags
|
||||||
for phrase, action in lexicon:
|
for phrase, action in lexicon:
|
||||||
p.append(sre_parse.SubPattern(s, [
|
p.append(sre_parse.SubPattern(s, [
|
||||||
(SUBPATTERN, (len(p), sre_parse.parse(phrase))),
|
(SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))),
|
||||||
]))
|
]))
|
||||||
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
|
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
|
||||||
s.groups = len(p)
|
s.groups = len(p)
|
||||||
|
@ -346,16 +347,16 @@ class Scanner:
|
||||||
def scan(self, string):
|
def scan(self, string):
|
||||||
result = []
|
result = []
|
||||||
append = result.append
|
append = result.append
|
||||||
match = self.scanner.match
|
match = self.scanner.scanner(string).match
|
||||||
i = 0
|
i = 0
|
||||||
while 1:
|
while 1:
|
||||||
m = match(string, i)
|
m = match()
|
||||||
if not m:
|
if not m:
|
||||||
break
|
break
|
||||||
j = m.end()
|
j = m.end()
|
||||||
if i == j:
|
if i == j:
|
||||||
break
|
break
|
||||||
action = self.lexicon[m.lastindex][1]
|
action = self.lexicon[m.lastindex-1][1]
|
||||||
if callable(action):
|
if callable(action):
|
||||||
self.match = m
|
self.match = m
|
||||||
action = action(self, m.group())
|
action = action(self, m.group())
|
||||||
|
|
|
@ -223,6 +223,26 @@ pat = sre.compile(sre.escape(p))
|
||||||
test(r"""pat.match(p) is not None""", 1)
|
test(r"""pat.match(p) is not None""", 1)
|
||||||
test(r"""pat.match(p).span()""", (0,256))
|
test(r"""pat.match(p).span()""", (0,256))
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print 'Running tests on sre.Scanner'
|
||||||
|
|
||||||
|
def s_ident(scanner, token): return token
|
||||||
|
def s_operator(scanner, token): return "op%s" % token
|
||||||
|
def s_float(scanner, token): return float(token)
|
||||||
|
def s_int(scanner, token): return int(token)
|
||||||
|
|
||||||
|
scanner = sre.Scanner([
|
||||||
|
(r"[a-zA-Z_]\w*", s_ident),
|
||||||
|
(r"\d+\.\d*", s_float),
|
||||||
|
(r"\d+", s_int),
|
||||||
|
(r"=|\+|-|\*|/", s_operator),
|
||||||
|
(r"\s+", None),
|
||||||
|
])
|
||||||
|
|
||||||
|
# sanity check
|
||||||
|
test('scanner.scan("sum = 3*foo + 312.50 + bar")',
|
||||||
|
(['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5, 'op+', 'bar'], ''))
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print 'Pickling a SRE_Pattern instance'
|
print 'Pickling a SRE_Pattern instance'
|
||||||
|
|
||||||
|
|
|
@ -1800,6 +1800,11 @@ join(PyObject* list, PyObject* pattern)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
args = PyTuple_New(1);
|
args = PyTuple_New(1);
|
||||||
|
if (!args) {
|
||||||
|
Py_DECREF(function);
|
||||||
|
Py_DECREF(joiner);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
PyTuple_SET_ITEM(args, 0, list);
|
PyTuple_SET_ITEM(args, 0, list);
|
||||||
result = PyObject_CallObject(function, args);
|
result = PyObject_CallObject(function, args);
|
||||||
Py_DECREF(args); /* also removes list */
|
Py_DECREF(args); /* also removes list */
|
||||||
|
@ -1896,6 +1901,10 @@ pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
list = PyList_New(0);
|
list = PyList_New(0);
|
||||||
|
if (!list) {
|
||||||
|
state_fini(&state);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
while (state.start <= state.end) {
|
while (state.start <= state.end) {
|
||||||
|
|
||||||
|
@ -1995,6 +2004,10 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
list = PyList_New(0);
|
list = PyList_New(0);
|
||||||
|
if (!list) {
|
||||||
|
state_fini(&state);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
n = 0;
|
n = 0;
|
||||||
last = state.start;
|
last = state.start;
|
||||||
|
@ -2110,6 +2123,10 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
list = PyList_New(0);
|
list = PyList_New(0);
|
||||||
|
if (!list) {
|
||||||
|
state_fini(&state);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
n = i = 0;
|
n = i = 0;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue