bpo-30299: Display a bytecode when compile a regex in debug mode. (#1491)
`re.compile(..., re.DEBUG)` now displays the compiled bytecode in human readable form.
This commit is contained in:
parent
821a9d146b
commit
4ab6abfca4
|
@ -595,6 +595,150 @@ def _code(p, flags):
|
|||
|
||||
return code
|
||||
|
||||
def _hex_code(code):
|
||||
return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code)
|
||||
|
||||
def dis(code):
|
||||
import sys
|
||||
|
||||
labels = set()
|
||||
level = 0
|
||||
offset_width = len(str(len(code) - 1))
|
||||
|
||||
def dis_(start, end):
|
||||
def print_(*args, to=None):
|
||||
if to is not None:
|
||||
labels.add(to)
|
||||
args += ('(to %d)' % (to,),)
|
||||
print('%*d%s ' % (offset_width, start, ':' if start in labels else '.'),
|
||||
end=' '*(level-1))
|
||||
print(*args)
|
||||
|
||||
def print_2(*args):
|
||||
print(end=' '*(offset_width + 2*level))
|
||||
print(*args)
|
||||
|
||||
nonlocal level
|
||||
level += 1
|
||||
i = start
|
||||
while i < end:
|
||||
start = i
|
||||
op = code[i]
|
||||
i += 1
|
||||
op = OPCODES[op]
|
||||
if op in (SUCCESS, FAILURE, ANY, ANY_ALL,
|
||||
MAX_UNTIL, MIN_UNTIL, NEGATE):
|
||||
print_(op)
|
||||
elif op in (LITERAL, NOT_LITERAL,
|
||||
LITERAL_IGNORE, NOT_LITERAL_IGNORE,
|
||||
LITERAL_LOC_IGNORE, NOT_LITERAL_LOC_IGNORE):
|
||||
arg = code[i]
|
||||
i += 1
|
||||
print_(op, '%#02x (%r)' % (arg, chr(arg)))
|
||||
elif op is AT:
|
||||
arg = code[i]
|
||||
i += 1
|
||||
arg = str(ATCODES[arg])
|
||||
assert arg[:3] == 'AT_'
|
||||
print_(op, arg[3:])
|
||||
elif op is CATEGORY:
|
||||
arg = code[i]
|
||||
i += 1
|
||||
arg = str(CHCODES[arg])
|
||||
assert arg[:9] == 'CATEGORY_'
|
||||
print_(op, arg[9:])
|
||||
elif op in (IN, IN_IGNORE, IN_LOC_IGNORE):
|
||||
skip = code[i]
|
||||
print_(op, skip, to=i+skip)
|
||||
dis_(i+1, i+skip)
|
||||
i += skip
|
||||
elif op in (RANGE, RANGE_IGNORE):
|
||||
lo, hi = code[i: i+2]
|
||||
i += 2
|
||||
print_(op, '%#02x %#02x (%r-%r)' % (lo, hi, chr(lo), chr(hi)))
|
||||
elif op is CHARSET:
|
||||
print_(op, _hex_code(code[i: i + 256//_CODEBITS]))
|
||||
i += 256//_CODEBITS
|
||||
elif op is BIGCHARSET:
|
||||
arg = code[i]
|
||||
i += 1
|
||||
mapping = list(b''.join(x.to_bytes(_sre.CODESIZE, sys.byteorder)
|
||||
for x in code[i: i + 256//_sre.CODESIZE]))
|
||||
print_(op, arg, mapping)
|
||||
i += 256//_sre.CODESIZE
|
||||
level += 1
|
||||
for j in range(arg):
|
||||
print_2(_hex_code(code[i: i + 256//_CODEBITS]))
|
||||
i += 256//_CODEBITS
|
||||
level -= 1
|
||||
elif op in (MARK, GROUPREF, GROUPREF_IGNORE):
|
||||
arg = code[i]
|
||||
i += 1
|
||||
print_(op, arg)
|
||||
elif op is JUMP:
|
||||
skip = code[i]
|
||||
print_(op, skip, to=i+skip)
|
||||
i += 1
|
||||
elif op is BRANCH:
|
||||
skip = code[i]
|
||||
print_(op, skip, to=i+skip)
|
||||
while skip:
|
||||
dis_(i+1, i+skip)
|
||||
i += skip
|
||||
start = i
|
||||
skip = code[i]
|
||||
if skip:
|
||||
print_('branch', skip, to=i+skip)
|
||||
else:
|
||||
print_(FAILURE)
|
||||
i += 1
|
||||
elif op in (REPEAT, REPEAT_ONE, MIN_REPEAT_ONE):
|
||||
skip, min, max = code[i: i+3]
|
||||
if max == MAXREPEAT:
|
||||
max = 'MAXREPEAT'
|
||||
print_(op, skip, min, max, to=i+skip)
|
||||
dis_(i+3, i+skip)
|
||||
i += skip
|
||||
elif op is GROUPREF_EXISTS:
|
||||
arg, skip = code[i: i+2]
|
||||
print_(op, arg, skip, to=i+skip)
|
||||
i += 2
|
||||
elif op in (ASSERT, ASSERT_NOT):
|
||||
skip, arg = code[i: i+2]
|
||||
print_(op, skip, arg, to=i+skip)
|
||||
dis_(i+2, i+skip)
|
||||
i += skip
|
||||
elif op is INFO:
|
||||
skip, flags, min, max = code[i: i+4]
|
||||
if max == MAXREPEAT:
|
||||
max = 'MAXREPEAT'
|
||||
print_(op, skip, bin(flags), min, max, to=i+skip)
|
||||
start = i+4
|
||||
if flags & SRE_INFO_PREFIX:
|
||||
prefix_len, prefix_skip = code[i+4: i+6]
|
||||
print_2(' prefix_skip', prefix_skip)
|
||||
start = i + 6
|
||||
prefix = code[start: start+prefix_len]
|
||||
print_2(' prefix',
|
||||
'[%s]' % ', '.join('%#02x' % x for x in prefix),
|
||||
'(%r)' % ''.join(map(chr, prefix)))
|
||||
start += prefix_len
|
||||
print_2(' overlap', code[start: start+prefix_len])
|
||||
start += prefix_len
|
||||
if flags & SRE_INFO_CHARSET:
|
||||
level += 1
|
||||
print_2('in')
|
||||
dis_(start, i+skip)
|
||||
level -= 1
|
||||
i += skip
|
||||
else:
|
||||
raise ValueError(op)
|
||||
|
||||
level -= 1
|
||||
|
||||
dis_(0, len(code))
|
||||
|
||||
|
||||
def compile(p, flags=0):
|
||||
# internal: convert pattern list to internal format
|
||||
|
||||
|
@ -606,7 +750,9 @@ def compile(p, flags=0):
|
|||
|
||||
code = _code(p, flags)
|
||||
|
||||
# print(code)
|
||||
if flags & SRE_FLAG_DEBUG:
|
||||
print()
|
||||
dis(code)
|
||||
|
||||
# map in either direction
|
||||
groupindex = p.pattern.groupdict
|
||||
|
|
|
@ -1688,10 +1688,12 @@ class ReTests(unittest.TestCase):
|
|||
self.assertEqual(m.group(1), "")
|
||||
self.assertEqual(m.group(2), "y")
|
||||
|
||||
@cpython_only
|
||||
def test_debug_flag(self):
|
||||
pat = r'(\.)(?:[ch]|py)(?(1)$|: )'
|
||||
with captured_stdout() as out:
|
||||
re.compile(pat, re.DEBUG)
|
||||
self.maxDiff = None
|
||||
dump = '''\
|
||||
SUBPATTERN 1 0 0
|
||||
LITERAL 46
|
||||
|
@ -1707,6 +1709,31 @@ GROUPREF_EXISTS 1
|
|||
ELSE
|
||||
LITERAL 58
|
||||
LITERAL 32
|
||||
|
||||
0. INFO 8 0b1 2 5 (to 9)
|
||||
prefix_skip 0
|
||||
prefix [0x2e] ('.')
|
||||
overlap [0]
|
||||
9: MARK 0
|
||||
11. LITERAL 0x2e ('.')
|
||||
13. MARK 1
|
||||
15. BRANCH 10 (to 26)
|
||||
17. IN 6 (to 24)
|
||||
19. LITERAL 0x63 ('c')
|
||||
21. LITERAL 0x68 ('h')
|
||||
23. FAILURE
|
||||
24: JUMP 9 (to 34)
|
||||
26: branch 7 (to 33)
|
||||
27. LITERAL 0x70 ('p')
|
||||
29. LITERAL 0x79 ('y')
|
||||
31. JUMP 2 (to 34)
|
||||
33: FAILURE
|
||||
34: GROUPREF_EXISTS 0 6 (to 41)
|
||||
37. AT END
|
||||
39. JUMP 5 (to 45)
|
||||
41: LITERAL 0x3a (':')
|
||||
43. LITERAL 0x20 (' ')
|
||||
45: SUCCESS
|
||||
'''
|
||||
self.assertEqual(out.getvalue(), dump)
|
||||
# Debug output is output again even a second time (bypassing
|
||||
|
|
|
@ -323,6 +323,9 @@ Extension Modules
|
|||
Library
|
||||
-------
|
||||
|
||||
- bpo-30299: Compiling regular expression in debug mode on CPython now displays
|
||||
the compiled bytecode in human readable form.
|
||||
|
||||
- bpo-30048: Fixed ``Task.cancel()`` can be ignored when the task is
|
||||
running coroutine and the coroutine returned without any more ``await``.
|
||||
|
||||
|
|
Loading…
Reference in New Issue