- fixed grouping error bug
- changed "group" operator to "groupref"
This commit is contained in:
parent
6f01398236
commit
72b82ba16d
|
@ -148,18 +148,25 @@ def _compile(code, pattern, flags):
|
||||||
skip = len(code); emit(0)
|
skip = len(code); emit(0)
|
||||||
emit(av[0])
|
emit(av[0])
|
||||||
emit(av[1])
|
emit(av[1])
|
||||||
|
mark = MAXCODE
|
||||||
|
if av[2][0][0] == SUBPATTERN:
|
||||||
|
# repeated subpattern
|
||||||
|
gid, foo = av[2][0][1]
|
||||||
|
if gid:
|
||||||
|
mark = (gid-1)*2
|
||||||
|
emit(mark)
|
||||||
_compile(code, av[2], flags)
|
_compile(code, av[2], flags)
|
||||||
emit(OPCODES[SUCCESS])
|
emit(OPCODES[SUCCESS])
|
||||||
code[skip] = len(code) - skip
|
code[skip] = len(code) - skip
|
||||||
elif op is SUBPATTERN:
|
elif op is SUBPATTERN:
|
||||||
group = av[0]
|
gid = av[0]
|
||||||
if group:
|
if gid:
|
||||||
emit(OPCODES[MARK])
|
emit(OPCODES[MARK])
|
||||||
emit((group-1)*2)
|
emit((gid-1)*2)
|
||||||
_compile(code, av[1], flags)
|
_compile(code, av[1], flags)
|
||||||
if group:
|
if gid:
|
||||||
emit(OPCODES[MARK])
|
emit(OPCODES[MARK])
|
||||||
emit((group-1)*2+1)
|
emit((gid-1)*2+1)
|
||||||
elif op in (SUCCESS, FAILURE):
|
elif op in (SUCCESS, FAILURE):
|
||||||
emit(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
elif op in (ASSERT, ASSERT_NOT):
|
elif op in (ASSERT, ASSERT_NOT):
|
||||||
|
@ -207,7 +214,7 @@ def _compile(code, pattern, flags):
|
||||||
emit(CHCODES[CH_UNICODE[av]])
|
emit(CHCODES[CH_UNICODE[av]])
|
||||||
else:
|
else:
|
||||||
emit(CHCODES[av])
|
emit(CHCODES[av])
|
||||||
elif op is GROUP:
|
elif op is GROUPREF:
|
||||||
if flags & SRE_FLAG_IGNORECASE:
|
if flags & SRE_FLAG_IGNORECASE:
|
||||||
emit(OPCODES[OP_IGNORE[op]])
|
emit(OPCODES[OP_IGNORE[op]])
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -29,8 +29,8 @@ BRANCH = "branch"
|
||||||
CALL = "call"
|
CALL = "call"
|
||||||
CATEGORY = "category"
|
CATEGORY = "category"
|
||||||
CHARSET = "charset"
|
CHARSET = "charset"
|
||||||
GROUP = "group"
|
GROUPREF = "groupref"
|
||||||
GROUP_IGNORE = "group_ignore"
|
GROUPREF_IGNORE = "groupref_ignore"
|
||||||
IN = "in"
|
IN = "in"
|
||||||
IN_IGNORE = "in_ignore"
|
IN_IGNORE = "in_ignore"
|
||||||
INDEX = "index"
|
INDEX = "index"
|
||||||
|
@ -90,7 +90,7 @@ OPCODES = [
|
||||||
CALL,
|
CALL,
|
||||||
CATEGORY,
|
CATEGORY,
|
||||||
CHARSET,
|
CHARSET,
|
||||||
GROUP, GROUP_IGNORE,
|
GROUPREF, GROUPREF_IGNORE,
|
||||||
INDEX,
|
INDEX,
|
||||||
IN, IN_IGNORE,
|
IN, IN_IGNORE,
|
||||||
INFO,
|
INFO,
|
||||||
|
@ -136,7 +136,7 @@ CHCODES = makedict(CHCODES)
|
||||||
|
|
||||||
# replacement operations for "ignore case" mode
|
# replacement operations for "ignore case" mode
|
||||||
OP_IGNORE = {
|
OP_IGNORE = {
|
||||||
GROUP: GROUP_IGNORE,
|
GROUPREF: GROUPREF_IGNORE,
|
||||||
IN: IN_IGNORE,
|
IN: IN_IGNORE,
|
||||||
LITERAL: LITERAL_IGNORE,
|
LITERAL: LITERAL_IGNORE,
|
||||||
NOT_LITERAL: NOT_LITERAL_IGNORE
|
NOT_LITERAL: NOT_LITERAL_IGNORE
|
||||||
|
|
|
@ -241,7 +241,7 @@ def _escape(source, escape, state):
|
||||||
if group:
|
if group:
|
||||||
if (not source.next or
|
if (not source.next or
|
||||||
not _group(escape + source.next, state.groups)):
|
not _group(escape + source.next, state.groups)):
|
||||||
return GROUP, group
|
return GROUPREF, group
|
||||||
escape = escape + source.get()
|
escape = escape + source.get()
|
||||||
elif source.next in OCTDIGITS:
|
elif source.next in OCTDIGITS:
|
||||||
escape = escape + source.get()
|
escape = escape + source.get()
|
||||||
|
@ -450,7 +450,7 @@ def _parse(source, state):
|
||||||
gid = state.groupdict.get(name)
|
gid = state.groupdict.get(name)
|
||||||
if gid is None:
|
if gid is None:
|
||||||
raise error, "unknown group name"
|
raise error, "unknown group name"
|
||||||
subpattern.append((GROUP, gid))
|
subpattern.append((GROUPREF, gid))
|
||||||
elif source.match("#"):
|
elif source.match("#"):
|
||||||
index = ""
|
index = ""
|
||||||
while 1:
|
while 1:
|
||||||
|
|
|
@ -1,7 +1,4 @@
|
||||||
test_sre
|
test_sre
|
||||||
=== Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')
|
=== Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')
|
||||||
=== Failed incorrectly ('(a+)+\\1', 'aa', 0, 'found+"-"+g1', 'aa-a')
|
=== Failed incorrectly ('(a+)+\\1', 'aa', 0, 'found+"-"+g1', 'aa-a')
|
||||||
=== grouping error ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') 'd:msgs/tdir/sub1/-trial/' should be 'd:msgs/tdir/sub1/-tdir/'
|
|
||||||
=== grouping error ('([abc])*bcd', 'abcd', 0, 'found+"-"+g1', 'abcd-a') 'abcd-c' should be 'abcd-a'
|
|
||||||
=== grouping error ('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A') 'ABCD-C' should be 'ABCD-A'
|
|
||||||
=== Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')
|
=== Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')
|
||||||
|
|
|
@ -406,6 +406,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
int stackbase;
|
int stackbase;
|
||||||
int lastmark;
|
int lastmark;
|
||||||
int i, count;
|
int i, count;
|
||||||
|
SRE_STACK* sp;
|
||||||
|
|
||||||
/* FIXME: this is a hack! */
|
/* FIXME: this is a hack! */
|
||||||
void* mark_copy[SRE_MARK_SIZE];
|
void* mark_copy[SRE_MARK_SIZE];
|
||||||
|
@ -571,8 +572,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
/* set mark */
|
/* set mark */
|
||||||
/* args: <mark> */
|
/* args: <mark> */
|
||||||
TRACE(("%8d: set mark %d\n", PTR(ptr), pattern[0]));
|
TRACE(("%8d: set mark %d\n", PTR(ptr), pattern[0]));
|
||||||
if (state->lastmark < pattern[0])
|
if (state->lastmark < pattern[0]+1)
|
||||||
state->lastmark = pattern[0];
|
state->lastmark = pattern[0]+1;
|
||||||
if (!mark) {
|
if (!mark) {
|
||||||
mark = mark_copy;
|
mark = mark_copy;
|
||||||
memcpy(mark, state->mark, state->lastmark*sizeof(void*));
|
memcpy(mark, state->mark, state->lastmark*sizeof(void*));
|
||||||
|
@ -780,10 +781,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
case SRE_OP_MAX_REPEAT:
|
case SRE_OP_MAX_REPEAT:
|
||||||
/* match repeated sequence (maximizing regexp). repeated
|
/* match repeated sequence (maximizing regexp) */
|
||||||
group should end with a MAX_UNTIL code */
|
/* args: <skip> <1=min> <2=max> <3=save> <4=item> */
|
||||||
|
|
||||||
/* args: <skip> <min> <max> <item> */
|
|
||||||
|
|
||||||
TRACE(("%8d: max repeat (%d %d)\n", PTR(ptr),
|
TRACE(("%8d: max repeat (%d %d)\n", PTR(ptr),
|
||||||
pattern[1], pattern[2]));
|
pattern[1], pattern[2]));
|
||||||
|
@ -793,7 +792,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
|
|
||||||
/* match minimum number of items */
|
/* match minimum number of items */
|
||||||
while (count < (int) pattern[1]) {
|
while (count < (int) pattern[1]) {
|
||||||
i = SRE_MATCH(state, pattern + 3);
|
i = SRE_MATCH(state, pattern + 4);
|
||||||
if (i < 0)
|
if (i < 0)
|
||||||
return i;
|
return i;
|
||||||
if (!i)
|
if (!i)
|
||||||
|
@ -817,8 +816,13 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
points to the stack */
|
points to the stack */
|
||||||
|
|
||||||
while (pattern[2] == 65535 || count < (int) pattern[2]) {
|
while (pattern[2] == 65535 || count < (int) pattern[2]) {
|
||||||
|
void *mark0, *mark1;
|
||||||
|
if (pattern[3] != 65535) {
|
||||||
|
mark0 = state->mark[pattern[3]];
|
||||||
|
mark1 = state->mark[pattern[3]+1];
|
||||||
|
}
|
||||||
state->stackbase = stack;
|
state->stackbase = stack;
|
||||||
i = SRE_MATCH(state, pattern + 3);
|
i = SRE_MATCH(state, pattern + 4);
|
||||||
state->stackbase = stackbase; /* rewind */
|
state->stackbase = stackbase; /* rewind */
|
||||||
if (i < 0)
|
if (i < 0)
|
||||||
return i;
|
return i;
|
||||||
|
@ -837,8 +841,14 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
return i; /* out of memory */
|
return i; /* out of memory */
|
||||||
}
|
}
|
||||||
TRACE(("%8d: stack[%d] = %d\n", PTR(ptr), stack, PTR(ptr)));
|
TRACE(("%8d: stack[%d] = %d\n", PTR(ptr), stack, PTR(ptr)));
|
||||||
state->stack[stack].ptr = ptr;
|
sp = state->stack + stack;
|
||||||
state->stack[stack].pattern = pattern + pattern[0];
|
sp->ptr = ptr;
|
||||||
|
sp->pattern = pattern + pattern[0];
|
||||||
|
sp->mark = pattern[3];
|
||||||
|
if (pattern[3] != 65535) {
|
||||||
|
sp->mark0 = mark0;
|
||||||
|
sp->mark1 = mark1;
|
||||||
|
}
|
||||||
stack++;
|
stack++;
|
||||||
/* move forward */
|
/* move forward */
|
||||||
ptr = state->ptr;
|
ptr = state->ptr;
|
||||||
|
@ -855,13 +865,15 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
|
|
||||||
case SRE_OP_MIN_REPEAT:
|
case SRE_OP_MIN_REPEAT:
|
||||||
/* match repeated sequence (minimizing regexp) */
|
/* match repeated sequence (minimizing regexp) */
|
||||||
|
/* args: <skip> <1=min> <2=max> <3=save> <4=item> */
|
||||||
|
|
||||||
TRACE(("%8d: min repeat %d %d\n", PTR(ptr),
|
TRACE(("%8d: min repeat %d %d\n", PTR(ptr),
|
||||||
pattern[1], pattern[2]));
|
pattern[1], pattern[2]));
|
||||||
count = 0;
|
count = 0;
|
||||||
state->ptr = ptr;
|
state->ptr = ptr;
|
||||||
/* match minimum number of items */
|
/* match minimum number of items */
|
||||||
while (count < (int) pattern[1]) {
|
while (count < (int) pattern[1]) {
|
||||||
i = SRE_MATCH(state, pattern + 3);
|
i = SRE_MATCH(state, pattern + 4);
|
||||||
if (i < 0)
|
if (i < 0)
|
||||||
return i;
|
return i;
|
||||||
if (!i)
|
if (!i)
|
||||||
|
@ -877,7 +889,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
goto success;
|
goto success;
|
||||||
}
|
}
|
||||||
state->ptr = ptr; /* backtrack */
|
state->ptr = ptr; /* backtrack */
|
||||||
i = SRE_MATCH(state, pattern + 3);
|
i = SRE_MATCH(state, pattern + 4);
|
||||||
if (i < 0)
|
if (i < 0)
|
||||||
return i;
|
return i;
|
||||||
if (!i)
|
if (!i)
|
||||||
|
@ -940,15 +952,20 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
}
|
}
|
||||||
|
|
||||||
failure:
|
failure:
|
||||||
|
TRACE(("%8d: leave (failure)\n", PTR(ptr)));
|
||||||
if (stack-- > stackbase) {
|
if (stack-- > stackbase) {
|
||||||
ptr = state->stack[stack].ptr;
|
sp = state->stack + stack;
|
||||||
pattern = state->stack[stack].pattern;
|
ptr = sp->ptr;
|
||||||
|
pattern = sp->pattern;
|
||||||
|
if (sp->mark != 65535) {
|
||||||
|
state->mark[sp->mark] = sp->mark0;
|
||||||
|
state->mark[sp->mark+1] = sp->mark1;
|
||||||
|
}
|
||||||
TRACE(("%8d: retry (%d)\n", PTR(ptr), stack));
|
TRACE(("%8d: retry (%d)\n", PTR(ptr), stack));
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
TRACE(("%8d: leave (failure)\n", PTR(ptr)));
|
|
||||||
state->stackbase = stackbase;
|
|
||||||
state->lastmark = lastmark;
|
state->lastmark = lastmark;
|
||||||
|
state->stackbase = stackbase;
|
||||||
if (mark)
|
if (mark)
|
||||||
memcpy(state->mark, mark, state->lastmark*sizeof(void*));
|
memcpy(state->mark, mark, state->lastmark*sizeof(void*));
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -46,6 +46,9 @@ typedef struct {
|
||||||
/* stack elements */
|
/* stack elements */
|
||||||
SRE_CODE* pattern;
|
SRE_CODE* pattern;
|
||||||
void* ptr;
|
void* ptr;
|
||||||
|
int mark;
|
||||||
|
void* mark0;
|
||||||
|
void* mark1;
|
||||||
} SRE_STACK;
|
} SRE_STACK;
|
||||||
|
|
||||||
/* FIXME: <fl> shouldn't be a constant, really... */
|
/* FIXME: <fl> shouldn't be a constant, really... */
|
||||||
|
|
Loading…
Reference in New Issue