Issue #22434: Constants in sre_constants are now named constants (enum-like).
This commit is contained in:
parent
bf764a1912
commit
c7f7d3897e
|
@ -13,7 +13,6 @@
|
|||
import _sre
|
||||
import sre_parse
|
||||
from sre_constants import *
|
||||
from _sre import MAXREPEAT
|
||||
|
||||
assert _sre.MAGIC == MAGIC, "SRE module mismatch"
|
||||
|
||||
|
@ -38,65 +37,65 @@ def _compile(code, pattern, flags):
|
|||
for op, av in pattern:
|
||||
if op in LITERAL_CODES:
|
||||
if flags & SRE_FLAG_IGNORECASE:
|
||||
emit(OPCODES[OP_IGNORE[op]])
|
||||
emit(OP_IGNORE[op])
|
||||
emit(_sre.getlower(av, flags))
|
||||
else:
|
||||
emit(OPCODES[op])
|
||||
emit(op)
|
||||
emit(av)
|
||||
elif op is IN:
|
||||
if flags & SRE_FLAG_IGNORECASE:
|
||||
emit(OPCODES[OP_IGNORE[op]])
|
||||
emit(OP_IGNORE[op])
|
||||
def fixup(literal, flags=flags):
|
||||
return _sre.getlower(literal, flags)
|
||||
else:
|
||||
emit(OPCODES[op])
|
||||
emit(op)
|
||||
fixup = None
|
||||
skip = _len(code); emit(0)
|
||||
_compile_charset(av, flags, code, fixup)
|
||||
code[skip] = _len(code) - skip
|
||||
elif op is ANY:
|
||||
if flags & SRE_FLAG_DOTALL:
|
||||
emit(OPCODES[ANY_ALL])
|
||||
emit(ANY_ALL)
|
||||
else:
|
||||
emit(OPCODES[ANY])
|
||||
emit(ANY)
|
||||
elif op in REPEATING_CODES:
|
||||
if flags & SRE_FLAG_TEMPLATE:
|
||||
raise error("internal: unsupported template operator")
|
||||
elif _simple(av) and op is not REPEAT:
|
||||
if op is MAX_REPEAT:
|
||||
emit(OPCODES[REPEAT_ONE])
|
||||
emit(REPEAT_ONE)
|
||||
else:
|
||||
emit(OPCODES[MIN_REPEAT_ONE])
|
||||
emit(MIN_REPEAT_ONE)
|
||||
skip = _len(code); emit(0)
|
||||
emit(av[0])
|
||||
emit(av[1])
|
||||
_compile(code, av[2], flags)
|
||||
emit(OPCODES[SUCCESS])
|
||||
emit(SUCCESS)
|
||||
code[skip] = _len(code) - skip
|
||||
else:
|
||||
emit(OPCODES[REPEAT])
|
||||
emit(REPEAT)
|
||||
skip = _len(code); emit(0)
|
||||
emit(av[0])
|
||||
emit(av[1])
|
||||
_compile(code, av[2], flags)
|
||||
code[skip] = _len(code) - skip
|
||||
if op is MAX_REPEAT:
|
||||
emit(OPCODES[MAX_UNTIL])
|
||||
emit(MAX_UNTIL)
|
||||
else:
|
||||
emit(OPCODES[MIN_UNTIL])
|
||||
emit(MIN_UNTIL)
|
||||
elif op is SUBPATTERN:
|
||||
if av[0]:
|
||||
emit(OPCODES[MARK])
|
||||
emit(MARK)
|
||||
emit((av[0]-1)*2)
|
||||
# _compile_info(code, av[1], flags)
|
||||
_compile(code, av[1], flags)
|
||||
if av[0]:
|
||||
emit(OPCODES[MARK])
|
||||
emit(MARK)
|
||||
emit((av[0]-1)*2+1)
|
||||
elif op in SUCCESS_CODES:
|
||||
emit(OPCODES[op])
|
||||
emit(op)
|
||||
elif op in ASSERT_CODES:
|
||||
emit(OPCODES[op])
|
||||
emit(op)
|
||||
skip = _len(code); emit(0)
|
||||
if av[0] >= 0:
|
||||
emit(0) # look ahead
|
||||
|
@ -106,57 +105,57 @@ def _compile(code, pattern, flags):
|
|||
raise error("look-behind requires fixed-width pattern")
|
||||
emit(lo) # look behind
|
||||
_compile(code, av[1], flags)
|
||||
emit(OPCODES[SUCCESS])
|
||||
emit(SUCCESS)
|
||||
code[skip] = _len(code) - skip
|
||||
elif op is CALL:
|
||||
emit(OPCODES[op])
|
||||
emit(op)
|
||||
skip = _len(code); emit(0)
|
||||
_compile(code, av, flags)
|
||||
emit(OPCODES[SUCCESS])
|
||||
emit(SUCCESS)
|
||||
code[skip] = _len(code) - skip
|
||||
elif op is AT:
|
||||
emit(OPCODES[op])
|
||||
emit(op)
|
||||
if flags & SRE_FLAG_MULTILINE:
|
||||
av = AT_MULTILINE.get(av, av)
|
||||
if flags & SRE_FLAG_LOCALE:
|
||||
av = AT_LOCALE.get(av, av)
|
||||
elif flags & SRE_FLAG_UNICODE:
|
||||
av = AT_UNICODE.get(av, av)
|
||||
emit(ATCODES[av])
|
||||
emit(av)
|
||||
elif op is BRANCH:
|
||||
emit(OPCODES[op])
|
||||
emit(op)
|
||||
tail = []
|
||||
tailappend = tail.append
|
||||
for av in av[1]:
|
||||
skip = _len(code); emit(0)
|
||||
# _compile_info(code, av, flags)
|
||||
_compile(code, av, flags)
|
||||
emit(OPCODES[JUMP])
|
||||
emit(JUMP)
|
||||
tailappend(_len(code)); emit(0)
|
||||
code[skip] = _len(code) - skip
|
||||
emit(0) # end of branch
|
||||
for tail in tail:
|
||||
code[tail] = _len(code) - tail
|
||||
elif op is CATEGORY:
|
||||
emit(OPCODES[op])
|
||||
emit(op)
|
||||
if flags & SRE_FLAG_LOCALE:
|
||||
av = CH_LOCALE[av]
|
||||
elif flags & SRE_FLAG_UNICODE:
|
||||
av = CH_UNICODE[av]
|
||||
emit(CHCODES[av])
|
||||
emit(av)
|
||||
elif op is GROUPREF:
|
||||
if flags & SRE_FLAG_IGNORECASE:
|
||||
emit(OPCODES[OP_IGNORE[op]])
|
||||
emit(OP_IGNORE[op])
|
||||
else:
|
||||
emit(OPCODES[op])
|
||||
emit(op)
|
||||
emit(av-1)
|
||||
elif op is GROUPREF_EXISTS:
|
||||
emit(OPCODES[op])
|
||||
emit(op)
|
||||
emit(av[0]-1)
|
||||
skipyes = _len(code); emit(0)
|
||||
_compile(code, av[1], flags)
|
||||
if av[2]:
|
||||
emit(OPCODES[JUMP])
|
||||
emit(JUMP)
|
||||
skipno = _len(code); emit(0)
|
||||
code[skipyes] = _len(code) - skipyes + 1
|
||||
_compile(code, av[2], flags)
|
||||
|
@ -170,7 +169,7 @@ def _compile_charset(charset, flags, code, fixup=None):
|
|||
# compile charset subprogram
|
||||
emit = code.append
|
||||
for op, av in _optimize_charset(charset, fixup):
|
||||
emit(OPCODES[op])
|
||||
emit(op)
|
||||
if op is NEGATE:
|
||||
pass
|
||||
elif op is LITERAL:
|
||||
|
@ -184,14 +183,14 @@ def _compile_charset(charset, flags, code, fixup=None):
|
|||
code.extend(av)
|
||||
elif op is CATEGORY:
|
||||
if flags & SRE_FLAG_LOCALE:
|
||||
emit(CHCODES[CH_LOCALE[av]])
|
||||
emit(CH_LOCALE[av])
|
||||
elif flags & SRE_FLAG_UNICODE:
|
||||
emit(CHCODES[CH_UNICODE[av]])
|
||||
emit(CH_UNICODE[av])
|
||||
else:
|
||||
emit(CHCODES[av])
|
||||
emit(av)
|
||||
else:
|
||||
raise error("internal: unsupported set operator")
|
||||
emit(OPCODES[FAILURE])
|
||||
emit(FAILURE)
|
||||
|
||||
def _optimize_charset(charset, fixup):
|
||||
# internal: optimize character set
|
||||
|
@ -414,7 +413,7 @@ def _compile_info(code, pattern, flags):
|
|||
## print "*** CHARSET", charset
|
||||
# add an info block
|
||||
emit = code.append
|
||||
emit(OPCODES[INFO])
|
||||
emit(INFO)
|
||||
skip = len(code); emit(0)
|
||||
# literal flag
|
||||
mask = 0
|
||||
|
@ -460,7 +459,7 @@ def _code(p, flags):
|
|||
# compile the pattern
|
||||
_compile(code, p.data, flags)
|
||||
|
||||
code.append(OPCODES[SUCCESS])
|
||||
code.append(SUCCESS)
|
||||
|
||||
return code
|
||||
|
||||
|
@ -475,7 +474,7 @@ def compile(p, flags=0):
|
|||
|
||||
code = _code(p, flags)
|
||||
|
||||
# print code
|
||||
# print(code)
|
||||
|
||||
# map in either direction
|
||||
groupindex = p.pattern.groupdict
|
||||
|
|
|
@ -23,138 +23,81 @@ from _sre import MAXREPEAT, MAXGROUPS
|
|||
class error(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class _NamedIntConstant(int):
|
||||
def __new__(cls, value, name):
|
||||
self = super(_NamedIntConstant, cls).__new__(cls, value)
|
||||
self.name = name
|
||||
return self
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
MAXREPEAT = _NamedIntConstant(MAXREPEAT, 'MAXREPEAT')
|
||||
|
||||
def _makecodes(names):
|
||||
names = names.strip().split()
|
||||
items = [_NamedIntConstant(i, name) for i, name in enumerate(names)]
|
||||
globals().update({item.name: item for item in items})
|
||||
return items
|
||||
|
||||
# operators
|
||||
# failure=0 success=1 (just because it looks better that way :-)
|
||||
OPCODES = _makecodes("""
|
||||
FAILURE SUCCESS
|
||||
|
||||
FAILURE = "failure"
|
||||
SUCCESS = "success"
|
||||
ANY ANY_ALL
|
||||
ASSERT ASSERT_NOT
|
||||
AT
|
||||
BRANCH
|
||||
CALL
|
||||
CATEGORY
|
||||
CHARSET BIGCHARSET
|
||||
GROUPREF GROUPREF_EXISTS GROUPREF_IGNORE
|
||||
IN IN_IGNORE
|
||||
INFO
|
||||
JUMP
|
||||
LITERAL LITERAL_IGNORE
|
||||
MARK
|
||||
MAX_UNTIL
|
||||
MIN_UNTIL
|
||||
NOT_LITERAL NOT_LITERAL_IGNORE
|
||||
NEGATE
|
||||
RANGE
|
||||
REPEAT
|
||||
REPEAT_ONE
|
||||
SUBPATTERN
|
||||
MIN_REPEAT_ONE
|
||||
RANGE_IGNORE
|
||||
|
||||
ANY = "any"
|
||||
ANY_ALL = "any_all"
|
||||
ASSERT = "assert"
|
||||
ASSERT_NOT = "assert_not"
|
||||
AT = "at"
|
||||
BIGCHARSET = "bigcharset"
|
||||
BRANCH = "branch"
|
||||
CALL = "call"
|
||||
CATEGORY = "category"
|
||||
CHARSET = "charset"
|
||||
GROUPREF = "groupref"
|
||||
GROUPREF_IGNORE = "groupref_ignore"
|
||||
GROUPREF_EXISTS = "groupref_exists"
|
||||
IN = "in"
|
||||
IN_IGNORE = "in_ignore"
|
||||
INFO = "info"
|
||||
JUMP = "jump"
|
||||
LITERAL = "literal"
|
||||
LITERAL_IGNORE = "literal_ignore"
|
||||
MARK = "mark"
|
||||
MAX_REPEAT = "max_repeat"
|
||||
MAX_UNTIL = "max_until"
|
||||
MIN_REPEAT = "min_repeat"
|
||||
MIN_UNTIL = "min_until"
|
||||
NEGATE = "negate"
|
||||
NOT_LITERAL = "not_literal"
|
||||
NOT_LITERAL_IGNORE = "not_literal_ignore"
|
||||
RANGE = "range"
|
||||
RANGE_IGNORE = "range_ignore"
|
||||
REPEAT = "repeat"
|
||||
REPEAT_ONE = "repeat_one"
|
||||
SUBPATTERN = "subpattern"
|
||||
MIN_REPEAT_ONE = "min_repeat_one"
|
||||
MIN_REPEAT MAX_REPEAT
|
||||
""")
|
||||
del OPCODES[-2:] # remove MIN_REPEAT and MAX_REPEAT
|
||||
|
||||
# positions
|
||||
AT_BEGINNING = "at_beginning"
|
||||
AT_BEGINNING_LINE = "at_beginning_line"
|
||||
AT_BEGINNING_STRING = "at_beginning_string"
|
||||
AT_BOUNDARY = "at_boundary"
|
||||
AT_NON_BOUNDARY = "at_non_boundary"
|
||||
AT_END = "at_end"
|
||||
AT_END_LINE = "at_end_line"
|
||||
AT_END_STRING = "at_end_string"
|
||||
AT_LOC_BOUNDARY = "at_loc_boundary"
|
||||
AT_LOC_NON_BOUNDARY = "at_loc_non_boundary"
|
||||
AT_UNI_BOUNDARY = "at_uni_boundary"
|
||||
AT_UNI_NON_BOUNDARY = "at_uni_non_boundary"
|
||||
ATCODES = _makecodes("""
|
||||
AT_BEGINNING AT_BEGINNING_LINE AT_BEGINNING_STRING
|
||||
AT_BOUNDARY AT_NON_BOUNDARY
|
||||
AT_END AT_END_LINE AT_END_STRING
|
||||
AT_LOC_BOUNDARY AT_LOC_NON_BOUNDARY
|
||||
AT_UNI_BOUNDARY AT_UNI_NON_BOUNDARY
|
||||
""")
|
||||
|
||||
# categories
|
||||
CATEGORY_DIGIT = "category_digit"
|
||||
CATEGORY_NOT_DIGIT = "category_not_digit"
|
||||
CATEGORY_SPACE = "category_space"
|
||||
CATEGORY_NOT_SPACE = "category_not_space"
|
||||
CATEGORY_WORD = "category_word"
|
||||
CATEGORY_NOT_WORD = "category_not_word"
|
||||
CATEGORY_LINEBREAK = "category_linebreak"
|
||||
CATEGORY_NOT_LINEBREAK = "category_not_linebreak"
|
||||
CATEGORY_LOC_WORD = "category_loc_word"
|
||||
CATEGORY_LOC_NOT_WORD = "category_loc_not_word"
|
||||
CATEGORY_UNI_DIGIT = "category_uni_digit"
|
||||
CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit"
|
||||
CATEGORY_UNI_SPACE = "category_uni_space"
|
||||
CATEGORY_UNI_NOT_SPACE = "category_uni_not_space"
|
||||
CATEGORY_UNI_WORD = "category_uni_word"
|
||||
CATEGORY_UNI_NOT_WORD = "category_uni_not_word"
|
||||
CATEGORY_UNI_LINEBREAK = "category_uni_linebreak"
|
||||
CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak"
|
||||
CHCODES = _makecodes("""
|
||||
CATEGORY_DIGIT CATEGORY_NOT_DIGIT
|
||||
CATEGORY_SPACE CATEGORY_NOT_SPACE
|
||||
CATEGORY_WORD CATEGORY_NOT_WORD
|
||||
CATEGORY_LINEBREAK CATEGORY_NOT_LINEBREAK
|
||||
CATEGORY_LOC_WORD CATEGORY_LOC_NOT_WORD
|
||||
CATEGORY_UNI_DIGIT CATEGORY_UNI_NOT_DIGIT
|
||||
CATEGORY_UNI_SPACE CATEGORY_UNI_NOT_SPACE
|
||||
CATEGORY_UNI_WORD CATEGORY_UNI_NOT_WORD
|
||||
CATEGORY_UNI_LINEBREAK CATEGORY_UNI_NOT_LINEBREAK
|
||||
""")
|
||||
|
||||
OPCODES = [
|
||||
|
||||
# failure=0 success=1 (just because it looks better that way :-)
|
||||
FAILURE, SUCCESS,
|
||||
|
||||
ANY, ANY_ALL,
|
||||
ASSERT, ASSERT_NOT,
|
||||
AT,
|
||||
BRANCH,
|
||||
CALL,
|
||||
CATEGORY,
|
||||
CHARSET, BIGCHARSET,
|
||||
GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE,
|
||||
IN, IN_IGNORE,
|
||||
INFO,
|
||||
JUMP,
|
||||
LITERAL, LITERAL_IGNORE,
|
||||
MARK,
|
||||
MAX_UNTIL,
|
||||
MIN_UNTIL,
|
||||
NOT_LITERAL, NOT_LITERAL_IGNORE,
|
||||
NEGATE,
|
||||
RANGE,
|
||||
REPEAT,
|
||||
REPEAT_ONE,
|
||||
SUBPATTERN,
|
||||
MIN_REPEAT_ONE,
|
||||
RANGE_IGNORE,
|
||||
|
||||
]
|
||||
|
||||
ATCODES = [
|
||||
AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
|
||||
AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING,
|
||||
AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY,
|
||||
AT_UNI_NON_BOUNDARY
|
||||
]
|
||||
|
||||
CHCODES = [
|
||||
CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE,
|
||||
CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD,
|
||||
CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD,
|
||||
CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT,
|
||||
CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD,
|
||||
CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK,
|
||||
CATEGORY_UNI_NOT_LINEBREAK
|
||||
]
|
||||
|
||||
def makedict(list):
|
||||
d = {}
|
||||
i = 0
|
||||
for item in list:
|
||||
d[item] = i
|
||||
i = i + 1
|
||||
return d
|
||||
|
||||
OPCODES = makedict(OPCODES)
|
||||
ATCODES = makedict(ATCODES)
|
||||
CHCODES = makedict(CHCODES)
|
||||
|
||||
# replacement operations for "ignore case" mode
|
||||
OP_IGNORE = {
|
||||
|
@ -220,9 +163,9 @@ SRE_INFO_CHARSET = 4 # pattern starts with character from given set
|
|||
|
||||
if __name__ == "__main__":
|
||||
def dump(f, d, prefix):
|
||||
items = sorted(d.items(), key=lambda a: a[1])
|
||||
for k, v in items:
|
||||
f.write("#define %s_%s %s\n" % (prefix, k.upper(), v))
|
||||
items = sorted(d)
|
||||
for item in items:
|
||||
f.write("#define %s_%s %d\n" % (prefix, item, item))
|
||||
f = open("sre_constants.h", "w")
|
||||
f.write("""\
|
||||
/*
|
||||
|
|
|
@ -13,7 +13,6 @@
|
|||
# XXX: show string offset and offending character for all errors
|
||||
|
||||
from sre_constants import *
|
||||
from _sre import MAXREPEAT
|
||||
|
||||
SPECIAL_CHARS = ".\\[{()*+?^$|"
|
||||
REPEAT_CHARS = "*+?{"
|
||||
|
@ -103,24 +102,24 @@ class SubPattern:
|
|||
nl = True
|
||||
seqtypes = (tuple, list)
|
||||
for op, av in self.data:
|
||||
print(level*" " + op, end='')
|
||||
print(level*" " + str(op), end='')
|
||||
if op == IN:
|
||||
# member sublanguage
|
||||
print()
|
||||
for op, a in av:
|
||||
print((level+1)*" " + op, a)
|
||||
print((level+1)*" " + str(op), a)
|
||||
elif op == BRANCH:
|
||||
print()
|
||||
for i, a in enumerate(av[1]):
|
||||
if i:
|
||||
print(level*" " + "or")
|
||||
print(level*" " + "OR")
|
||||
a.dump(level+1)
|
||||
elif op == GROUPREF_EXISTS:
|
||||
condgroup, item_yes, item_no = av
|
||||
print('', condgroup)
|
||||
item_yes.dump(level+1)
|
||||
if item_no:
|
||||
print(level*" " + "else")
|
||||
print(level*" " + "ELSE")
|
||||
item_no.dump(level+1)
|
||||
elif isinstance(av, seqtypes):
|
||||
nl = False
|
||||
|
|
|
@ -1285,22 +1285,22 @@ class ReTests(unittest.TestCase):
|
|||
with captured_stdout() as out:
|
||||
re.compile(pat, re.DEBUG)
|
||||
dump = '''\
|
||||
subpattern 1
|
||||
literal 46
|
||||
subpattern None
|
||||
branch
|
||||
in
|
||||
literal 99
|
||||
literal 104
|
||||
or
|
||||
literal 112
|
||||
literal 121
|
||||
subpattern None
|
||||
groupref_exists 1
|
||||
at at_end
|
||||
else
|
||||
literal 58
|
||||
literal 32
|
||||
SUBPATTERN 1
|
||||
LITERAL 46
|
||||
SUBPATTERN None
|
||||
BRANCH
|
||||
IN
|
||||
LITERAL 99
|
||||
LITERAL 104
|
||||
OR
|
||||
LITERAL 112
|
||||
LITERAL 121
|
||||
SUBPATTERN None
|
||||
GROUPREF_EXISTS 1
|
||||
AT AT_END
|
||||
ELSE
|
||||
LITERAL 58
|
||||
LITERAL 32
|
||||
'''
|
||||
self.assertEqual(out.getvalue(), dump)
|
||||
# Debug output is output again even a second time (bypassing
|
||||
|
|
Loading…
Reference in New Issue