SF patch #720991 by Gary Herron:
A small fix for bug #545855 and Greg Chapman's addition of op code SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of pattern '*?' on a long string.
This commit is contained in:
parent
44c62ef5ee
commit
41c99e7f96
|
@ -55,8 +55,11 @@ def _compile(code, pattern, flags):
|
|||
_compile(code, av[2], flags)
|
||||
emit(OPCODES[SUCCESS])
|
||||
code[skip] = len(code) - skip
|
||||
elif _simple(av) and op == MAX_REPEAT:
|
||||
emit(OPCODES[REPEAT_ONE])
|
||||
elif _simple(av) and op != REPEAT:
|
||||
if op == MAX_REPEAT:
|
||||
emit(OPCODES[REPEAT_ONE])
|
||||
else:
|
||||
emit(OPCODES[MIN_REPEAT_ONE])
|
||||
skip = len(code); emit(0)
|
||||
emit(av[0])
|
||||
emit(av[1])
|
||||
|
|
|
@ -60,6 +60,7 @@ RANGE = "range"
|
|||
REPEAT = "repeat"
|
||||
REPEAT_ONE = "repeat_one"
|
||||
SUBPATTERN = "subpattern"
|
||||
MIN_REPEAT_ONE = "min_repeat_one"
|
||||
|
||||
# positions
|
||||
AT_BEGINNING = "at_beginning"
|
||||
|
@ -120,7 +121,8 @@ OPCODES = [
|
|||
RANGE,
|
||||
REPEAT,
|
||||
REPEAT_ONE,
|
||||
SUBPATTERN
|
||||
SUBPATTERN,
|
||||
MIN_REPEAT_ONE
|
||||
|
||||
]
|
||||
|
||||
|
|
|
@ -419,7 +419,7 @@ def _parse(source, state):
|
|||
set.append(code1)
|
||||
set.append((LITERAL, ord("-")))
|
||||
break
|
||||
else:
|
||||
elif this:
|
||||
if this[0] == "\\":
|
||||
code2 = _class_escape(source, this)
|
||||
else:
|
||||
|
@ -431,6 +431,8 @@ def _parse(source, state):
|
|||
if hi < lo:
|
||||
raise error, "bad character range"
|
||||
set.append((RANGE, (lo, hi)))
|
||||
else:
|
||||
raise error, "unexpected end of regular expression"
|
||||
else:
|
||||
if code1[0] is IN:
|
||||
code1 = code1[1][0]
|
||||
|
|
|
@ -83,6 +83,19 @@ test(r"""sre.match(r'(a)?a','a').lastindex""", None)
|
|||
test(r"""sre.match(r'(a)(b)?b','ab').lastindex""", 1)
|
||||
test(r"""sre.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup""", 'a')
|
||||
|
||||
# bug 545855 -- This pattern failed to cause a compile error as it
|
||||
# should, instead provoking a TypeError.
|
||||
test(r"""sre.compile('foo[a-')""", None, sre.error)
|
||||
|
||||
# bugs 418626 at al. -- Testing Greg Chapman's addition of op code
|
||||
# SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
|
||||
# pattern '*?' on a long string.
|
||||
test(r"""sre.match('.*?c', 10000*'ab'+'cd').end(0)""", 20001)
|
||||
test(r"""sre.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0)""", 20003)
|
||||
test(r"""sre.match('.*?cd', 20000*'abc'+'de').end(0)""", 60001)
|
||||
# non-simple '*?' still recurses and hits the recursion limit
|
||||
test(r"""sre.search('(a|b)*?c', 10000*'ab'+'cd').end(0)""", None, RuntimeError)
|
||||
|
||||
if verbose:
|
||||
print 'Running tests on sre.sub'
|
||||
|
||||
|
|
|
@ -54,6 +54,10 @@ Core and builtins
|
|||
Extension modules
|
||||
-----------------
|
||||
|
||||
- The .*? pattern in the re module is now special-cased to avoid the
|
||||
recursion limit. (SF patch #720991 -- many thanks to Gary Herron
|
||||
and Greg Chapman.)
|
||||
|
||||
- New function sys.call_tracing() allows pdb to debug code
|
||||
recursively.
|
||||
|
||||
|
|
|
@ -993,6 +993,66 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
|||
}
|
||||
return 0;
|
||||
|
||||
case SRE_OP_MIN_REPEAT_ONE:
|
||||
/* match repeated sequence (minimizing regexp) */
|
||||
|
||||
/* this operator only works if the repeated item is
|
||||
exactly one character wide, and we're not already
|
||||
collecting backtracking points. for other cases,
|
||||
use the MIN_REPEAT operator */
|
||||
|
||||
/* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
|
||||
|
||||
TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
|
||||
pattern[1], pattern[2]));
|
||||
|
||||
if (ptr + pattern[1] > end)
|
||||
return 0; /* cannot match */
|
||||
|
||||
state->ptr = ptr;
|
||||
|
||||
if (pattern[1] == 0)
|
||||
count = 0;
|
||||
else {
|
||||
/* count using pattern min as the maximum */
|
||||
count = SRE_COUNT(state, pattern + 3, pattern[1], level + 1);
|
||||
|
||||
if (count < 0)
|
||||
return count; /* exception */
|
||||
if (count < (int) pattern[1])
|
||||
return 0; /* did not match minimum number of times */
|
||||
ptr += count; /* advance past minimum matches of repeat */
|
||||
}
|
||||
|
||||
if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
|
||||
/* tail is empty. we're finished */
|
||||
state->ptr = ptr;
|
||||
return 1;
|
||||
|
||||
} else {
|
||||
/* general case */
|
||||
int matchmax = ((int)pattern[2] == 65535);
|
||||
int c;
|
||||
lastmark = state->lastmark;
|
||||
while (matchmax || count <= (int) pattern[2]) {
|
||||
state->ptr = ptr;
|
||||
i = SRE_MATCH(state, pattern + pattern[0], level + 1);
|
||||
if (i)
|
||||
return i;
|
||||
state->ptr = ptr;
|
||||
c = SRE_COUNT(state, pattern+3, 1, level+1);
|
||||
if (c < 0)
|
||||
return c;
|
||||
if (c == 0)
|
||||
break;
|
||||
assert(c == 1);
|
||||
ptr++;
|
||||
count++;
|
||||
}
|
||||
lastmark_restore(state, lastmark);
|
||||
}
|
||||
return 0;
|
||||
|
||||
case SRE_OP_REPEAT:
|
||||
/* create repeat context. all the hard work is done
|
||||
by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
#define SRE_OP_REPEAT 27
|
||||
#define SRE_OP_REPEAT_ONE 28
|
||||
#define SRE_OP_SUBPATTERN 29
|
||||
#define SRE_OP_MIN_REPEAT_ONE 30
|
||||
#define SRE_AT_BEGINNING 0
|
||||
#define SRE_AT_BEGINNING_LINE 1
|
||||
#define SRE_AT_BEGINNING_STRING 2
|
||||
|
|
Loading…
Reference in New Issue