- untabified sre_constants.py
This commit is contained in:
parent
a4657f736c
commit
43b3b49b5a
|
@ -26,52 +26,12 @@ def _compile(code, pattern, flags):
|
|||
# internal: compile a (sub)pattern
|
||||
emit = code.append
|
||||
for op, av in pattern:
|
||||
if op is ANY:
|
||||
if flags & SRE_FLAG_DOTALL:
|
||||
emit(OPCODES[op])
|
||||
else:
|
||||
emit(OPCODES[CATEGORY])
|
||||
emit(CHCODES[CATEGORY_NOT_LINEBREAK])
|
||||
elif op in (SUCCESS, FAILURE):
|
||||
emit(OPCODES[op])
|
||||
elif op is AT:
|
||||
emit(OPCODES[op])
|
||||
if flags & SRE_FLAG_MULTILINE:
|
||||
emit(ATCODES[AT_MULTILINE[av]])
|
||||
else:
|
||||
emit(ATCODES[av])
|
||||
elif op is BRANCH:
|
||||
emit(OPCODES[op])
|
||||
tail = []
|
||||
for av in av[1]:
|
||||
skip = len(code); emit(0)
|
||||
_compile(code, av, flags)
|
||||
emit(OPCODES[JUMP])
|
||||
tail.append(len(code)); emit(0)
|
||||
code[skip] = len(code) - skip
|
||||
emit(0) # end of branch
|
||||
for tail in tail:
|
||||
code[tail] = len(code) - tail
|
||||
elif op is CALL:
|
||||
emit(OPCODES[op])
|
||||
skip = len(code); emit(0)
|
||||
_compile(code, av, flags)
|
||||
emit(OPCODES[SUCCESS])
|
||||
code[skip] = len(code) - skip
|
||||
elif op is CATEGORY:
|
||||
emit(OPCODES[op])
|
||||
if flags & SRE_FLAG_LOCALE:
|
||||
emit(CHCODES[CH_LOCALE[av]])
|
||||
elif flags & SRE_FLAG_UNICODE:
|
||||
emit(CHCODES[CH_UNICODE[av]])
|
||||
else:
|
||||
emit(CHCODES[av])
|
||||
elif op is GROUP:
|
||||
if op in (LITERAL, NOT_LITERAL):
|
||||
if flags & SRE_FLAG_IGNORECASE:
|
||||
emit(OPCODES[OP_IGNORE[op]])
|
||||
else:
|
||||
emit(OPCODES[op])
|
||||
emit(av-1)
|
||||
emit(ord(av))
|
||||
elif op is IN:
|
||||
if flags & SRE_FLAG_IGNORECASE:
|
||||
emit(OPCODES[OP_IGNORE[op]])
|
||||
|
@ -101,15 +61,12 @@ def _compile(code, pattern, flags):
|
|||
raise error, "internal: unsupported set operator"
|
||||
emit(OPCODES[FAILURE])
|
||||
code[skip] = len(code) - skip
|
||||
elif op in (LITERAL, NOT_LITERAL):
|
||||
if flags & SRE_FLAG_IGNORECASE:
|
||||
emit(OPCODES[OP_IGNORE[op]])
|
||||
else:
|
||||
elif op is ANY:
|
||||
if flags & SRE_FLAG_DOTALL:
|
||||
emit(OPCODES[op])
|
||||
emit(ord(av))
|
||||
elif op is MARK:
|
||||
emit(OPCODES[op])
|
||||
emit(av)
|
||||
else:
|
||||
emit(OPCODES[CATEGORY])
|
||||
emit(CHCODES[CATEGORY_NOT_LINEBREAK])
|
||||
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
|
||||
if flags & SRE_FLAG_TEMPLATE:
|
||||
emit(OPCODES[REPEAT])
|
||||
|
@ -150,6 +107,49 @@ def _compile(code, pattern, flags):
|
|||
if group:
|
||||
emit(OPCODES[MARK])
|
||||
emit((group-1)*2+1)
|
||||
elif op in (SUCCESS, FAILURE):
|
||||
emit(OPCODES[op])
|
||||
elif op in (ASSERT, ASSERT_NOT, CALL):
|
||||
emit(OPCODES[op])
|
||||
skip = len(code); emit(0)
|
||||
_compile(code, av, flags)
|
||||
emit(OPCODES[SUCCESS])
|
||||
code[skip] = len(code) - skip
|
||||
elif op is AT:
|
||||
emit(OPCODES[op])
|
||||
if flags & SRE_FLAG_MULTILINE:
|
||||
emit(ATCODES[AT_MULTILINE[av]])
|
||||
else:
|
||||
emit(ATCODES[av])
|
||||
elif op is BRANCH:
|
||||
emit(OPCODES[op])
|
||||
tail = []
|
||||
for av in av[1]:
|
||||
skip = len(code); emit(0)
|
||||
_compile(code, av, flags)
|
||||
emit(OPCODES[JUMP])
|
||||
tail.append(len(code)); emit(0)
|
||||
code[skip] = len(code) - skip
|
||||
emit(0) # end of branch
|
||||
for tail in tail:
|
||||
code[tail] = len(code) - tail
|
||||
elif op is CATEGORY:
|
||||
emit(OPCODES[op])
|
||||
if flags & SRE_FLAG_LOCALE:
|
||||
emit(CHCODES[CH_LOCALE[av]])
|
||||
elif flags & SRE_FLAG_UNICODE:
|
||||
emit(CHCODES[CH_UNICODE[av]])
|
||||
else:
|
||||
emit(CHCODES[av])
|
||||
elif op is GROUP:
|
||||
if flags & SRE_FLAG_IGNORECASE:
|
||||
emit(OPCODES[OP_IGNORE[op]])
|
||||
else:
|
||||
emit(OPCODES[op])
|
||||
emit(av-1)
|
||||
elif op is MARK:
|
||||
emit(OPCODES[op])
|
||||
emit(av)
|
||||
else:
|
||||
raise ValueError, ("unsupported operand type", op)
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@ SUCCESS = "success"
|
|||
|
||||
ANY = "any"
|
||||
ASSERT = "assert"
|
||||
ASSERT_NOT = "assert_not"
|
||||
AT = "at"
|
||||
BRANCH = "branch"
|
||||
CALL = "call"
|
||||
|
@ -81,7 +82,7 @@ OPCODES = [
|
|||
FAILURE, SUCCESS,
|
||||
|
||||
ANY,
|
||||
ASSERT,
|
||||
ASSERT, ASSERT_NOT,
|
||||
AT,
|
||||
BRANCH,
|
||||
CALL,
|
||||
|
@ -121,8 +122,8 @@ def makedict(list):
|
|||
d = {}
|
||||
i = 0
|
||||
for item in list:
|
||||
d[item] = i
|
||||
i = i + 1
|
||||
d[item] = i
|
||||
i = i + 1
|
||||
return d
|
||||
|
||||
OPCODES = makedict(OPCODES)
|
||||
|
@ -176,12 +177,27 @@ SRE_FLAG_VERBOSE = 64
|
|||
if __name__ == "__main__":
|
||||
import string
|
||||
def dump(f, d, prefix):
|
||||
items = d.items()
|
||||
items.sort(lambda a, b: cmp(a[1], b[1]))
|
||||
for k, v in items:
|
||||
f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
|
||||
items = d.items()
|
||||
items.sort(lambda a, b: cmp(a[1], b[1]))
|
||||
for k, v in items:
|
||||
f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
|
||||
f = open("sre_constants.h", "w")
|
||||
f.write("/* generated from sre_constants.py */\n")
|
||||
f.write("""\
|
||||
/*
|
||||
* Secret Labs' Regular Expression Engine
|
||||
*
|
||||
* regular expression matching engine
|
||||
*
|
||||
* NOTE: This file is generated by sre_constants.py. If you need
|
||||
* to change anything in here, edit sre_constants.py and run it.
|
||||
*
|
||||
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
|
||||
*
|
||||
* See the _sre.c file for information on usage and redistribution.
|
||||
*/
|
||||
|
||||
""")
|
||||
|
||||
dump(f, OPCODES, "SRE_OP")
|
||||
dump(f, ATCODES, "SRE")
|
||||
dump(f, CHCODES, "SRE")
|
||||
|
|
|
@ -470,6 +470,25 @@ def _parse(source, state, flags=0):
|
|||
if source.next is None or source.next == ")":
|
||||
break
|
||||
source.get()
|
||||
elif source.next in ("=", "!"):
|
||||
# lookahead assertions
|
||||
char = source.get()
|
||||
b = []
|
||||
while 1:
|
||||
p = _parse(source, state, flags)
|
||||
if source.next == ")":
|
||||
if b:
|
||||
b.append(p)
|
||||
p = _branch(state, b)
|
||||
if char == "=":
|
||||
subpattern.append((ASSERT, p))
|
||||
else:
|
||||
subpattern.append((ASSERT_NOT, p))
|
||||
break
|
||||
elif source.match("|"):
|
||||
b.append(p)
|
||||
else:
|
||||
raise error, "pattern not properly closed"
|
||||
else:
|
||||
# flags
|
||||
while FLAGS.has_key(source.next):
|
||||
|
|
|
@ -6,7 +6,4 @@ test_support -- test failed re module cPickle
|
|||
=== grouping error ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') 'd:msgs/tdir/sub1/-trial/' should be 'd:msgs/tdir/sub1/-tdir/'
|
||||
=== grouping error ('([abc])*bcd', 'abcd', 0, 'found+"-"+g1', 'abcd-a') 'abcd-c' should be 'abcd-a'
|
||||
=== grouping error ('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A') 'ABCD-C' should be 'ABCD-A'
|
||||
=== Syntax error: ('a(?!b).', 'abad', 0, 'found', 'ad')
|
||||
=== Syntax error: ('a(?=d).', 'abad', 0, 'found', 'ad')
|
||||
=== Syntax error: ('a(?=c|d).', 'abad', 0, 'found', 'ad')
|
||||
=== Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
* 00-06-28 fl fixed findall (0.9.1)
|
||||
* 00-06-29 fl fixed split, added more scanner features (0.9.2)
|
||||
* 00-06-30 fl tuning, fast search (0.9.3)
|
||||
* 00-06-30 fl added assert (lookahead) primitives (0.9.4)
|
||||
*
|
||||
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
|
||||
*
|
||||
|
@ -30,7 +31,7 @@
|
|||
|
||||
#ifndef SRE_RECURSIVE
|
||||
|
||||
char copyright[] = " SRE 0.9.3 Copyright (c) 1997-2000 by Secret Labs AB ";
|
||||
char copyright[] = " SRE 0.9.4 Copyright (c) 1997-2000 by Secret Labs AB ";
|
||||
|
||||
#include "Python.h"
|
||||
|
||||
|
@ -576,11 +577,10 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
|||
pattern += pattern[0];
|
||||
break;
|
||||
|
||||
#if 0
|
||||
case SRE_OP_CALL:
|
||||
/* match subpattern, without backtracking */
|
||||
case SRE_OP_ASSERT:
|
||||
/* assert subpattern */
|
||||
/* args: <skip> <pattern> */
|
||||
TRACE(("%8d: subpattern\n", PTR(ptr)));
|
||||
TRACE(("%8d: assert subpattern\n", PTR(ptr)));
|
||||
state->ptr = ptr;
|
||||
i = SRE_MATCH(state, pattern + 1);
|
||||
if (i < 0)
|
||||
|
@ -588,9 +588,20 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
|||
if (!i)
|
||||
goto failure;
|
||||
pattern += pattern[0];
|
||||
ptr = state->ptr;
|
||||
break;
|
||||
#endif
|
||||
|
||||
case SRE_OP_ASSERT_NOT:
|
||||
/* assert not subpattern */
|
||||
/* args: <skip> <pattern> */
|
||||
TRACE(("%8d: assert not subpattern\n", PTR(ptr)));
|
||||
state->ptr = ptr;
|
||||
i = SRE_MATCH(state, pattern + 1);
|
||||
if (i < 0)
|
||||
return i;
|
||||
if (i)
|
||||
goto failure;
|
||||
pattern += pattern[0];
|
||||
break;
|
||||
|
||||
#if 0
|
||||
case SRE_OP_MAX_REPEAT_ONE:
|
||||
|
|
|
@ -1,29 +1,42 @@
|
|||
/* generated from sre_constants.py */
|
||||
/*
|
||||
* Secret Labs' Regular Expression Engine
|
||||
*
|
||||
* regular expression matching engine
|
||||
*
|
||||
* NOTE: This file is generated by sre_constants.py. If you need
|
||||
* to change anything in here, edit sre_constants.py and run it.
|
||||
*
|
||||
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
|
||||
*
|
||||
* See the _sre.c file for information on usage and redistribution.
|
||||
*/
|
||||
|
||||
#define SRE_OP_FAILURE 0
|
||||
#define SRE_OP_SUCCESS 1
|
||||
#define SRE_OP_ANY 2
|
||||
#define SRE_OP_ASSERT 3
|
||||
#define SRE_OP_AT 4
|
||||
#define SRE_OP_BRANCH 5
|
||||
#define SRE_OP_CALL 6
|
||||
#define SRE_OP_CATEGORY 7
|
||||
#define SRE_OP_GROUP 8
|
||||
#define SRE_OP_GROUP_IGNORE 9
|
||||
#define SRE_OP_IN 10
|
||||
#define SRE_OP_IN_IGNORE 11
|
||||
#define SRE_OP_INFO 12
|
||||
#define SRE_OP_JUMP 13
|
||||
#define SRE_OP_LITERAL 14
|
||||
#define SRE_OP_LITERAL_IGNORE 15
|
||||
#define SRE_OP_MARK 16
|
||||
#define SRE_OP_MAX_REPEAT 17
|
||||
#define SRE_OP_MAX_REPEAT_ONE 18
|
||||
#define SRE_OP_MIN_REPEAT 19
|
||||
#define SRE_OP_NOT_LITERAL 20
|
||||
#define SRE_OP_NOT_LITERAL_IGNORE 21
|
||||
#define SRE_OP_NEGATE 22
|
||||
#define SRE_OP_RANGE 23
|
||||
#define SRE_OP_REPEAT 24
|
||||
#define SRE_OP_ASSERT_NOT 4
|
||||
#define SRE_OP_AT 5
|
||||
#define SRE_OP_BRANCH 6
|
||||
#define SRE_OP_CALL 7
|
||||
#define SRE_OP_CATEGORY 8
|
||||
#define SRE_OP_GROUP 9
|
||||
#define SRE_OP_GROUP_IGNORE 10
|
||||
#define SRE_OP_IN 11
|
||||
#define SRE_OP_IN_IGNORE 12
|
||||
#define SRE_OP_INFO 13
|
||||
#define SRE_OP_JUMP 14
|
||||
#define SRE_OP_LITERAL 15
|
||||
#define SRE_OP_LITERAL_IGNORE 16
|
||||
#define SRE_OP_MARK 17
|
||||
#define SRE_OP_MAX_REPEAT 18
|
||||
#define SRE_OP_MAX_REPEAT_ONE 19
|
||||
#define SRE_OP_MIN_REPEAT 20
|
||||
#define SRE_OP_NOT_LITERAL 21
|
||||
#define SRE_OP_NOT_LITERAL_IGNORE 22
|
||||
#define SRE_OP_NEGATE 23
|
||||
#define SRE_OP_RANGE 24
|
||||
#define SRE_OP_REPEAT 25
|
||||
#define SRE_AT_BEGINNING 0
|
||||
#define SRE_AT_BEGINNING_LINE 1
|
||||
#define SRE_AT_BOUNDARY 2
|
||||
|
|
Loading…
Reference in New Issue