SF patch #720991 by Gary Herron:
A small fix for bug #545855 and Greg Chapman's addition of op code SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of pattern '*?' on a long string.
This commit is contained in:
parent
44c62ef5ee
commit
41c99e7f96
|
@ -55,8 +55,11 @@ def _compile(code, pattern, flags):
|
||||||
_compile(code, av[2], flags)
|
_compile(code, av[2], flags)
|
||||||
emit(OPCODES[SUCCESS])
|
emit(OPCODES[SUCCESS])
|
||||||
code[skip] = len(code) - skip
|
code[skip] = len(code) - skip
|
||||||
elif _simple(av) and op == MAX_REPEAT:
|
elif _simple(av) and op != REPEAT:
|
||||||
emit(OPCODES[REPEAT_ONE])
|
if op == MAX_REPEAT:
|
||||||
|
emit(OPCODES[REPEAT_ONE])
|
||||||
|
else:
|
||||||
|
emit(OPCODES[MIN_REPEAT_ONE])
|
||||||
skip = len(code); emit(0)
|
skip = len(code); emit(0)
|
||||||
emit(av[0])
|
emit(av[0])
|
||||||
emit(av[1])
|
emit(av[1])
|
||||||
|
|
|
@ -60,6 +60,7 @@ RANGE = "range"
|
||||||
REPEAT = "repeat"
|
REPEAT = "repeat"
|
||||||
REPEAT_ONE = "repeat_one"
|
REPEAT_ONE = "repeat_one"
|
||||||
SUBPATTERN = "subpattern"
|
SUBPATTERN = "subpattern"
|
||||||
|
MIN_REPEAT_ONE = "min_repeat_one"
|
||||||
|
|
||||||
# positions
|
# positions
|
||||||
AT_BEGINNING = "at_beginning"
|
AT_BEGINNING = "at_beginning"
|
||||||
|
@ -120,7 +121,8 @@ OPCODES = [
|
||||||
RANGE,
|
RANGE,
|
||||||
REPEAT,
|
REPEAT,
|
||||||
REPEAT_ONE,
|
REPEAT_ONE,
|
||||||
SUBPATTERN
|
SUBPATTERN,
|
||||||
|
MIN_REPEAT_ONE
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -419,7 +419,7 @@ def _parse(source, state):
|
||||||
set.append(code1)
|
set.append(code1)
|
||||||
set.append((LITERAL, ord("-")))
|
set.append((LITERAL, ord("-")))
|
||||||
break
|
break
|
||||||
else:
|
elif this:
|
||||||
if this[0] == "\\":
|
if this[0] == "\\":
|
||||||
code2 = _class_escape(source, this)
|
code2 = _class_escape(source, this)
|
||||||
else:
|
else:
|
||||||
|
@ -431,6 +431,8 @@ def _parse(source, state):
|
||||||
if hi < lo:
|
if hi < lo:
|
||||||
raise error, "bad character range"
|
raise error, "bad character range"
|
||||||
set.append((RANGE, (lo, hi)))
|
set.append((RANGE, (lo, hi)))
|
||||||
|
else:
|
||||||
|
raise error, "unexpected end of regular expression"
|
||||||
else:
|
else:
|
||||||
if code1[0] is IN:
|
if code1[0] is IN:
|
||||||
code1 = code1[1][0]
|
code1 = code1[1][0]
|
||||||
|
|
|
@ -83,6 +83,19 @@ test(r"""sre.match(r'(a)?a','a').lastindex""", None)
|
||||||
test(r"""sre.match(r'(a)(b)?b','ab').lastindex""", 1)
|
test(r"""sre.match(r'(a)(b)?b','ab').lastindex""", 1)
|
||||||
test(r"""sre.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup""", 'a')
|
test(r"""sre.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup""", 'a')
|
||||||
|
|
||||||
|
# bug 545855 -- This pattern failed to cause a compile error as it
|
||||||
|
# should, instead provoking a TypeError.
|
||||||
|
test(r"""sre.compile('foo[a-')""", None, sre.error)
|
||||||
|
|
||||||
|
# bugs 418626 at al. -- Testing Greg Chapman's addition of op code
|
||||||
|
# SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
|
||||||
|
# pattern '*?' on a long string.
|
||||||
|
test(r"""sre.match('.*?c', 10000*'ab'+'cd').end(0)""", 20001)
|
||||||
|
test(r"""sre.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0)""", 20003)
|
||||||
|
test(r"""sre.match('.*?cd', 20000*'abc'+'de').end(0)""", 60001)
|
||||||
|
# non-simple '*?' still recurses and hits the recursion limit
|
||||||
|
test(r"""sre.search('(a|b)*?c', 10000*'ab'+'cd').end(0)""", None, RuntimeError)
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print 'Running tests on sre.sub'
|
print 'Running tests on sre.sub'
|
||||||
|
|
||||||
|
|
|
@ -54,6 +54,10 @@ Core and builtins
|
||||||
Extension modules
|
Extension modules
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- The .*? pattern in the re module is now special-cased to avoid the
|
||||||
|
recursion limit. (SF patch #720991 -- many thanks to Gary Herron
|
||||||
|
and Greg Chapman.)
|
||||||
|
|
||||||
- New function sys.call_tracing() allows pdb to debug code
|
- New function sys.call_tracing() allows pdb to debug code
|
||||||
recursively.
|
recursively.
|
||||||
|
|
||||||
|
|
|
@ -993,6 +993,66 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
case SRE_OP_MIN_REPEAT_ONE:
|
||||||
|
/* match repeated sequence (minimizing regexp) */
|
||||||
|
|
||||||
|
/* this operator only works if the repeated item is
|
||||||
|
exactly one character wide, and we're not already
|
||||||
|
collecting backtracking points. for other cases,
|
||||||
|
use the MIN_REPEAT operator */
|
||||||
|
|
||||||
|
/* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
|
||||||
|
|
||||||
|
TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
|
||||||
|
pattern[1], pattern[2]));
|
||||||
|
|
||||||
|
if (ptr + pattern[1] > end)
|
||||||
|
return 0; /* cannot match */
|
||||||
|
|
||||||
|
state->ptr = ptr;
|
||||||
|
|
||||||
|
if (pattern[1] == 0)
|
||||||
|
count = 0;
|
||||||
|
else {
|
||||||
|
/* count using pattern min as the maximum */
|
||||||
|
count = SRE_COUNT(state, pattern + 3, pattern[1], level + 1);
|
||||||
|
|
||||||
|
if (count < 0)
|
||||||
|
return count; /* exception */
|
||||||
|
if (count < (int) pattern[1])
|
||||||
|
return 0; /* did not match minimum number of times */
|
||||||
|
ptr += count; /* advance past minimum matches of repeat */
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
|
||||||
|
/* tail is empty. we're finished */
|
||||||
|
state->ptr = ptr;
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
/* general case */
|
||||||
|
int matchmax = ((int)pattern[2] == 65535);
|
||||||
|
int c;
|
||||||
|
lastmark = state->lastmark;
|
||||||
|
while (matchmax || count <= (int) pattern[2]) {
|
||||||
|
state->ptr = ptr;
|
||||||
|
i = SRE_MATCH(state, pattern + pattern[0], level + 1);
|
||||||
|
if (i)
|
||||||
|
return i;
|
||||||
|
state->ptr = ptr;
|
||||||
|
c = SRE_COUNT(state, pattern+3, 1, level+1);
|
||||||
|
if (c < 0)
|
||||||
|
return c;
|
||||||
|
if (c == 0)
|
||||||
|
break;
|
||||||
|
assert(c == 1);
|
||||||
|
ptr++;
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
lastmark_restore(state, lastmark);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
|
||||||
case SRE_OP_REPEAT:
|
case SRE_OP_REPEAT:
|
||||||
/* create repeat context. all the hard work is done
|
/* create repeat context. all the hard work is done
|
||||||
by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
|
by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
|
||||||
|
|
|
@ -42,6 +42,7 @@
|
||||||
#define SRE_OP_REPEAT 27
|
#define SRE_OP_REPEAT 27
|
||||||
#define SRE_OP_REPEAT_ONE 28
|
#define SRE_OP_REPEAT_ONE 28
|
||||||
#define SRE_OP_SUBPATTERN 29
|
#define SRE_OP_SUBPATTERN 29
|
||||||
|
#define SRE_OP_MIN_REPEAT_ONE 30
|
||||||
#define SRE_AT_BEGINNING 0
|
#define SRE_AT_BEGINNING 0
|
||||||
#define SRE_AT_BEGINNING_LINE 1
|
#define SRE_AT_BEGINNING_LINE 1
|
||||||
#define SRE_AT_BEGINNING_STRING 2
|
#define SRE_AT_BEGINNING_STRING 2
|
||||||
|
|
Loading…
Reference in New Issue