- fixed grouping error bug

- changed "group" operator to "groupref"
This commit is contained in:
Fredrik Lundh 2000-07-03 21:31:48 +00:00
parent 6f01398236
commit 72b82ba16d
6 changed files with 55 additions and 31 deletions

View File

@ -148,18 +148,25 @@ def _compile(code, pattern, flags):
skip = len(code); emit(0) skip = len(code); emit(0)
emit(av[0]) emit(av[0])
emit(av[1]) emit(av[1])
mark = MAXCODE
if av[2][0][0] == SUBPATTERN:
# repeated subpattern
gid, foo = av[2][0][1]
if gid:
mark = (gid-1)*2
emit(mark)
_compile(code, av[2], flags) _compile(code, av[2], flags)
emit(OPCODES[SUCCESS]) emit(OPCODES[SUCCESS])
code[skip] = len(code) - skip code[skip] = len(code) - skip
elif op is SUBPATTERN: elif op is SUBPATTERN:
group = av[0] gid = av[0]
if group: if gid:
emit(OPCODES[MARK]) emit(OPCODES[MARK])
emit((group-1)*2) emit((gid-1)*2)
_compile(code, av[1], flags) _compile(code, av[1], flags)
if group: if gid:
emit(OPCODES[MARK]) emit(OPCODES[MARK])
emit((group-1)*2+1) emit((gid-1)*2+1)
elif op in (SUCCESS, FAILURE): elif op in (SUCCESS, FAILURE):
emit(OPCODES[op]) emit(OPCODES[op])
elif op in (ASSERT, ASSERT_NOT): elif op in (ASSERT, ASSERT_NOT):
@ -207,7 +214,7 @@ def _compile(code, pattern, flags):
emit(CHCODES[CH_UNICODE[av]]) emit(CHCODES[CH_UNICODE[av]])
else: else:
emit(CHCODES[av]) emit(CHCODES[av])
elif op is GROUP: elif op is GROUPREF:
if flags & SRE_FLAG_IGNORECASE: if flags & SRE_FLAG_IGNORECASE:
emit(OPCODES[OP_IGNORE[op]]) emit(OPCODES[OP_IGNORE[op]])
else: else:

View File

@ -29,8 +29,8 @@ BRANCH = "branch"
CALL = "call" CALL = "call"
CATEGORY = "category" CATEGORY = "category"
CHARSET = "charset" CHARSET = "charset"
GROUP = "group" GROUPREF = "groupref"
GROUP_IGNORE = "group_ignore" GROUPREF_IGNORE = "groupref_ignore"
IN = "in" IN = "in"
IN_IGNORE = "in_ignore" IN_IGNORE = "in_ignore"
INDEX = "index" INDEX = "index"
@ -90,7 +90,7 @@ OPCODES = [
CALL, CALL,
CATEGORY, CATEGORY,
CHARSET, CHARSET,
GROUP, GROUP_IGNORE, GROUPREF, GROUPREF_IGNORE,
INDEX, INDEX,
IN, IN_IGNORE, IN, IN_IGNORE,
INFO, INFO,
@ -136,7 +136,7 @@ CHCODES = makedict(CHCODES)
# replacement operations for "ignore case" mode # replacement operations for "ignore case" mode
OP_IGNORE = { OP_IGNORE = {
GROUP: GROUP_IGNORE, GROUPREF: GROUPREF_IGNORE,
IN: IN_IGNORE, IN: IN_IGNORE,
LITERAL: LITERAL_IGNORE, LITERAL: LITERAL_IGNORE,
NOT_LITERAL: NOT_LITERAL_IGNORE NOT_LITERAL: NOT_LITERAL_IGNORE

View File

@ -241,7 +241,7 @@ def _escape(source, escape, state):
if group: if group:
if (not source.next or if (not source.next or
not _group(escape + source.next, state.groups)): not _group(escape + source.next, state.groups)):
return GROUP, group return GROUPREF, group
escape = escape + source.get() escape = escape + source.get()
elif source.next in OCTDIGITS: elif source.next in OCTDIGITS:
escape = escape + source.get() escape = escape + source.get()
@ -450,7 +450,7 @@ def _parse(source, state):
gid = state.groupdict.get(name) gid = state.groupdict.get(name)
if gid is None: if gid is None:
raise error, "unknown group name" raise error, "unknown group name"
subpattern.append((GROUP, gid)) subpattern.append((GROUPREF, gid))
elif source.match("#"): elif source.match("#"):
index = "" index = ""
while 1: while 1:

View File

@ -1,7 +1,4 @@
test_sre test_sre
=== Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A') === Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')
=== Failed incorrectly ('(a+)+\\1', 'aa', 0, 'found+"-"+g1', 'aa-a') === Failed incorrectly ('(a+)+\\1', 'aa', 0, 'found+"-"+g1', 'aa-a')
=== grouping error ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') 'd:msgs/tdir/sub1/-trial/' should be 'd:msgs/tdir/sub1/-tdir/'
=== grouping error ('([abc])*bcd', 'abcd', 0, 'found+"-"+g1', 'abcd-a') 'abcd-c' should be 'abcd-a'
=== grouping error ('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A') 'ABCD-C' should be 'ABCD-A'
=== Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A') === Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')

View File

@ -406,6 +406,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
int stackbase; int stackbase;
int lastmark; int lastmark;
int i, count; int i, count;
SRE_STACK* sp;
/* FIXME: this is a hack! */ /* FIXME: this is a hack! */
void* mark_copy[SRE_MARK_SIZE]; void* mark_copy[SRE_MARK_SIZE];
@ -571,8 +572,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
/* set mark */ /* set mark */
/* args: <mark> */ /* args: <mark> */
TRACE(("%8d: set mark %d\n", PTR(ptr), pattern[0])); TRACE(("%8d: set mark %d\n", PTR(ptr), pattern[0]));
if (state->lastmark < pattern[0]) if (state->lastmark < pattern[0]+1)
state->lastmark = pattern[0]; state->lastmark = pattern[0]+1;
if (!mark) { if (!mark) {
mark = mark_copy; mark = mark_copy;
memcpy(mark, state->mark, state->lastmark*sizeof(void*)); memcpy(mark, state->mark, state->lastmark*sizeof(void*));
@ -780,10 +781,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
#endif #endif
case SRE_OP_MAX_REPEAT: case SRE_OP_MAX_REPEAT:
/* match repeated sequence (maximizing regexp). repeated /* match repeated sequence (maximizing regexp) */
group should end with a MAX_UNTIL code */ /* args: <skip> <1=min> <2=max> <3=save> <4=item> */
/* args: <skip> <min> <max> <item> */
TRACE(("%8d: max repeat (%d %d)\n", PTR(ptr), TRACE(("%8d: max repeat (%d %d)\n", PTR(ptr),
pattern[1], pattern[2])); pattern[1], pattern[2]));
@ -793,7 +792,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
/* match minimum number of items */ /* match minimum number of items */
while (count < (int) pattern[1]) { while (count < (int) pattern[1]) {
i = SRE_MATCH(state, pattern + 3); i = SRE_MATCH(state, pattern + 4);
if (i < 0) if (i < 0)
return i; return i;
if (!i) if (!i)
@ -817,8 +816,13 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
points to the stack */ points to the stack */
while (pattern[2] == 65535 || count < (int) pattern[2]) { while (pattern[2] == 65535 || count < (int) pattern[2]) {
void *mark0, *mark1;
if (pattern[3] != 65535) {
mark0 = state->mark[pattern[3]];
mark1 = state->mark[pattern[3]+1];
}
state->stackbase = stack; state->stackbase = stack;
i = SRE_MATCH(state, pattern + 3); i = SRE_MATCH(state, pattern + 4);
state->stackbase = stackbase; /* rewind */ state->stackbase = stackbase; /* rewind */
if (i < 0) if (i < 0)
return i; return i;
@ -837,8 +841,14 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
return i; /* out of memory */ return i; /* out of memory */
} }
TRACE(("%8d: stack[%d] = %d\n", PTR(ptr), stack, PTR(ptr))); TRACE(("%8d: stack[%d] = %d\n", PTR(ptr), stack, PTR(ptr)));
state->stack[stack].ptr = ptr; sp = state->stack + stack;
state->stack[stack].pattern = pattern + pattern[0]; sp->ptr = ptr;
sp->pattern = pattern + pattern[0];
sp->mark = pattern[3];
if (pattern[3] != 65535) {
sp->mark0 = mark0;
sp->mark1 = mark1;
}
stack++; stack++;
/* move forward */ /* move forward */
ptr = state->ptr; ptr = state->ptr;
@ -855,13 +865,15 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
case SRE_OP_MIN_REPEAT: case SRE_OP_MIN_REPEAT:
/* match repeated sequence (minimizing regexp) */ /* match repeated sequence (minimizing regexp) */
/* args: <skip> <1=min> <2=max> <3=save> <4=item> */
TRACE(("%8d: min repeat %d %d\n", PTR(ptr), TRACE(("%8d: min repeat %d %d\n", PTR(ptr),
pattern[1], pattern[2])); pattern[1], pattern[2]));
count = 0; count = 0;
state->ptr = ptr; state->ptr = ptr;
/* match minimum number of items */ /* match minimum number of items */
while (count < (int) pattern[1]) { while (count < (int) pattern[1]) {
i = SRE_MATCH(state, pattern + 3); i = SRE_MATCH(state, pattern + 4);
if (i < 0) if (i < 0)
return i; return i;
if (!i) if (!i)
@ -877,7 +889,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
goto success; goto success;
} }
state->ptr = ptr; /* backtrack */ state->ptr = ptr; /* backtrack */
i = SRE_MATCH(state, pattern + 3); i = SRE_MATCH(state, pattern + 4);
if (i < 0) if (i < 0)
return i; return i;
if (!i) if (!i)
@ -940,15 +952,20 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
} }
failure: failure:
TRACE(("%8d: leave (failure)\n", PTR(ptr)));
if (stack-- > stackbase) { if (stack-- > stackbase) {
ptr = state->stack[stack].ptr; sp = state->stack + stack;
pattern = state->stack[stack].pattern; ptr = sp->ptr;
pattern = sp->pattern;
if (sp->mark != 65535) {
state->mark[sp->mark] = sp->mark0;
state->mark[sp->mark+1] = sp->mark1;
}
TRACE(("%8d: retry (%d)\n", PTR(ptr), stack)); TRACE(("%8d: retry (%d)\n", PTR(ptr), stack));
goto retry; goto retry;
} }
TRACE(("%8d: leave (failure)\n", PTR(ptr)));
state->stackbase = stackbase;
state->lastmark = lastmark; state->lastmark = lastmark;
state->stackbase = stackbase;
if (mark) if (mark)
memcpy(state->mark, mark, state->lastmark*sizeof(void*)); memcpy(state->mark, mark, state->lastmark*sizeof(void*));
return 0; return 0;

View File

@ -46,6 +46,9 @@ typedef struct {
/* stack elements */ /* stack elements */
SRE_CODE* pattern; SRE_CODE* pattern;
void* ptr; void* ptr;
int mark;
void* mark0;
void* mark1;
} SRE_STACK; } SRE_STACK;
/* FIXME: <fl> shouldn't be a constant, really... */ /* FIXME: <fl> shouldn't be a constant, really... */