bpo-30299: Display a bytecode when compile a regex in debug mode. (#1491)
`re.compile(..., re.DEBUG)` now displays the compiled bytecode in human readable form.
This commit is contained in:
parent
821a9d146b
commit
4ab6abfca4
|
@ -595,6 +595,150 @@ def _code(p, flags):
|
||||||
|
|
||||||
return code
|
return code
|
||||||
|
|
||||||
|
def _hex_code(code):
|
||||||
|
return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code)
|
||||||
|
|
||||||
|
def dis(code):
|
||||||
|
import sys
|
||||||
|
|
||||||
|
labels = set()
|
||||||
|
level = 0
|
||||||
|
offset_width = len(str(len(code) - 1))
|
||||||
|
|
||||||
|
def dis_(start, end):
|
||||||
|
def print_(*args, to=None):
|
||||||
|
if to is not None:
|
||||||
|
labels.add(to)
|
||||||
|
args += ('(to %d)' % (to,),)
|
||||||
|
print('%*d%s ' % (offset_width, start, ':' if start in labels else '.'),
|
||||||
|
end=' '*(level-1))
|
||||||
|
print(*args)
|
||||||
|
|
||||||
|
def print_2(*args):
|
||||||
|
print(end=' '*(offset_width + 2*level))
|
||||||
|
print(*args)
|
||||||
|
|
||||||
|
nonlocal level
|
||||||
|
level += 1
|
||||||
|
i = start
|
||||||
|
while i < end:
|
||||||
|
start = i
|
||||||
|
op = code[i]
|
||||||
|
i += 1
|
||||||
|
op = OPCODES[op]
|
||||||
|
if op in (SUCCESS, FAILURE, ANY, ANY_ALL,
|
||||||
|
MAX_UNTIL, MIN_UNTIL, NEGATE):
|
||||||
|
print_(op)
|
||||||
|
elif op in (LITERAL, NOT_LITERAL,
|
||||||
|
LITERAL_IGNORE, NOT_LITERAL_IGNORE,
|
||||||
|
LITERAL_LOC_IGNORE, NOT_LITERAL_LOC_IGNORE):
|
||||||
|
arg = code[i]
|
||||||
|
i += 1
|
||||||
|
print_(op, '%#02x (%r)' % (arg, chr(arg)))
|
||||||
|
elif op is AT:
|
||||||
|
arg = code[i]
|
||||||
|
i += 1
|
||||||
|
arg = str(ATCODES[arg])
|
||||||
|
assert arg[:3] == 'AT_'
|
||||||
|
print_(op, arg[3:])
|
||||||
|
elif op is CATEGORY:
|
||||||
|
arg = code[i]
|
||||||
|
i += 1
|
||||||
|
arg = str(CHCODES[arg])
|
||||||
|
assert arg[:9] == 'CATEGORY_'
|
||||||
|
print_(op, arg[9:])
|
||||||
|
elif op in (IN, IN_IGNORE, IN_LOC_IGNORE):
|
||||||
|
skip = code[i]
|
||||||
|
print_(op, skip, to=i+skip)
|
||||||
|
dis_(i+1, i+skip)
|
||||||
|
i += skip
|
||||||
|
elif op in (RANGE, RANGE_IGNORE):
|
||||||
|
lo, hi = code[i: i+2]
|
||||||
|
i += 2
|
||||||
|
print_(op, '%#02x %#02x (%r-%r)' % (lo, hi, chr(lo), chr(hi)))
|
||||||
|
elif op is CHARSET:
|
||||||
|
print_(op, _hex_code(code[i: i + 256//_CODEBITS]))
|
||||||
|
i += 256//_CODEBITS
|
||||||
|
elif op is BIGCHARSET:
|
||||||
|
arg = code[i]
|
||||||
|
i += 1
|
||||||
|
mapping = list(b''.join(x.to_bytes(_sre.CODESIZE, sys.byteorder)
|
||||||
|
for x in code[i: i + 256//_sre.CODESIZE]))
|
||||||
|
print_(op, arg, mapping)
|
||||||
|
i += 256//_sre.CODESIZE
|
||||||
|
level += 1
|
||||||
|
for j in range(arg):
|
||||||
|
print_2(_hex_code(code[i: i + 256//_CODEBITS]))
|
||||||
|
i += 256//_CODEBITS
|
||||||
|
level -= 1
|
||||||
|
elif op in (MARK, GROUPREF, GROUPREF_IGNORE):
|
||||||
|
arg = code[i]
|
||||||
|
i += 1
|
||||||
|
print_(op, arg)
|
||||||
|
elif op is JUMP:
|
||||||
|
skip = code[i]
|
||||||
|
print_(op, skip, to=i+skip)
|
||||||
|
i += 1
|
||||||
|
elif op is BRANCH:
|
||||||
|
skip = code[i]
|
||||||
|
print_(op, skip, to=i+skip)
|
||||||
|
while skip:
|
||||||
|
dis_(i+1, i+skip)
|
||||||
|
i += skip
|
||||||
|
start = i
|
||||||
|
skip = code[i]
|
||||||
|
if skip:
|
||||||
|
print_('branch', skip, to=i+skip)
|
||||||
|
else:
|
||||||
|
print_(FAILURE)
|
||||||
|
i += 1
|
||||||
|
elif op in (REPEAT, REPEAT_ONE, MIN_REPEAT_ONE):
|
||||||
|
skip, min, max = code[i: i+3]
|
||||||
|
if max == MAXREPEAT:
|
||||||
|
max = 'MAXREPEAT'
|
||||||
|
print_(op, skip, min, max, to=i+skip)
|
||||||
|
dis_(i+3, i+skip)
|
||||||
|
i += skip
|
||||||
|
elif op is GROUPREF_EXISTS:
|
||||||
|
arg, skip = code[i: i+2]
|
||||||
|
print_(op, arg, skip, to=i+skip)
|
||||||
|
i += 2
|
||||||
|
elif op in (ASSERT, ASSERT_NOT):
|
||||||
|
skip, arg = code[i: i+2]
|
||||||
|
print_(op, skip, arg, to=i+skip)
|
||||||
|
dis_(i+2, i+skip)
|
||||||
|
i += skip
|
||||||
|
elif op is INFO:
|
||||||
|
skip, flags, min, max = code[i: i+4]
|
||||||
|
if max == MAXREPEAT:
|
||||||
|
max = 'MAXREPEAT'
|
||||||
|
print_(op, skip, bin(flags), min, max, to=i+skip)
|
||||||
|
start = i+4
|
||||||
|
if flags & SRE_INFO_PREFIX:
|
||||||
|
prefix_len, prefix_skip = code[i+4: i+6]
|
||||||
|
print_2(' prefix_skip', prefix_skip)
|
||||||
|
start = i + 6
|
||||||
|
prefix = code[start: start+prefix_len]
|
||||||
|
print_2(' prefix',
|
||||||
|
'[%s]' % ', '.join('%#02x' % x for x in prefix),
|
||||||
|
'(%r)' % ''.join(map(chr, prefix)))
|
||||||
|
start += prefix_len
|
||||||
|
print_2(' overlap', code[start: start+prefix_len])
|
||||||
|
start += prefix_len
|
||||||
|
if flags & SRE_INFO_CHARSET:
|
||||||
|
level += 1
|
||||||
|
print_2('in')
|
||||||
|
dis_(start, i+skip)
|
||||||
|
level -= 1
|
||||||
|
i += skip
|
||||||
|
else:
|
||||||
|
raise ValueError(op)
|
||||||
|
|
||||||
|
level -= 1
|
||||||
|
|
||||||
|
dis_(0, len(code))
|
||||||
|
|
||||||
|
|
||||||
def compile(p, flags=0):
|
def compile(p, flags=0):
|
||||||
# internal: convert pattern list to internal format
|
# internal: convert pattern list to internal format
|
||||||
|
|
||||||
|
@ -606,7 +750,9 @@ def compile(p, flags=0):
|
||||||
|
|
||||||
code = _code(p, flags)
|
code = _code(p, flags)
|
||||||
|
|
||||||
# print(code)
|
if flags & SRE_FLAG_DEBUG:
|
||||||
|
print()
|
||||||
|
dis(code)
|
||||||
|
|
||||||
# map in either direction
|
# map in either direction
|
||||||
groupindex = p.pattern.groupdict
|
groupindex = p.pattern.groupdict
|
||||||
|
|
|
@ -1688,10 +1688,12 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertEqual(m.group(1), "")
|
self.assertEqual(m.group(1), "")
|
||||||
self.assertEqual(m.group(2), "y")
|
self.assertEqual(m.group(2), "y")
|
||||||
|
|
||||||
|
@cpython_only
|
||||||
def test_debug_flag(self):
|
def test_debug_flag(self):
|
||||||
pat = r'(\.)(?:[ch]|py)(?(1)$|: )'
|
pat = r'(\.)(?:[ch]|py)(?(1)$|: )'
|
||||||
with captured_stdout() as out:
|
with captured_stdout() as out:
|
||||||
re.compile(pat, re.DEBUG)
|
re.compile(pat, re.DEBUG)
|
||||||
|
self.maxDiff = None
|
||||||
dump = '''\
|
dump = '''\
|
||||||
SUBPATTERN 1 0 0
|
SUBPATTERN 1 0 0
|
||||||
LITERAL 46
|
LITERAL 46
|
||||||
|
@ -1707,6 +1709,31 @@ GROUPREF_EXISTS 1
|
||||||
ELSE
|
ELSE
|
||||||
LITERAL 58
|
LITERAL 58
|
||||||
LITERAL 32
|
LITERAL 32
|
||||||
|
|
||||||
|
0. INFO 8 0b1 2 5 (to 9)
|
||||||
|
prefix_skip 0
|
||||||
|
prefix [0x2e] ('.')
|
||||||
|
overlap [0]
|
||||||
|
9: MARK 0
|
||||||
|
11. LITERAL 0x2e ('.')
|
||||||
|
13. MARK 1
|
||||||
|
15. BRANCH 10 (to 26)
|
||||||
|
17. IN 6 (to 24)
|
||||||
|
19. LITERAL 0x63 ('c')
|
||||||
|
21. LITERAL 0x68 ('h')
|
||||||
|
23. FAILURE
|
||||||
|
24: JUMP 9 (to 34)
|
||||||
|
26: branch 7 (to 33)
|
||||||
|
27. LITERAL 0x70 ('p')
|
||||||
|
29. LITERAL 0x79 ('y')
|
||||||
|
31. JUMP 2 (to 34)
|
||||||
|
33: FAILURE
|
||||||
|
34: GROUPREF_EXISTS 0 6 (to 41)
|
||||||
|
37. AT END
|
||||||
|
39. JUMP 5 (to 45)
|
||||||
|
41: LITERAL 0x3a (':')
|
||||||
|
43. LITERAL 0x20 (' ')
|
||||||
|
45: SUCCESS
|
||||||
'''
|
'''
|
||||||
self.assertEqual(out.getvalue(), dump)
|
self.assertEqual(out.getvalue(), dump)
|
||||||
# Debug output is output again even a second time (bypassing
|
# Debug output is output again even a second time (bypassing
|
||||||
|
|
|
@ -323,6 +323,9 @@ Extension Modules
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- bpo-30299: Compiling regular expression in debug mode on CPython now displays
|
||||||
|
the compiled bytecode in human readable form.
|
||||||
|
|
||||||
- bpo-30048: Fixed ``Task.cancel()`` can be ignored when the task is
|
- bpo-30048: Fixed ``Task.cancel()`` can be ignored when the task is
|
||||||
running coroutine and the coroutine returned without any more ``await``.
|
running coroutine and the coroutine returned without any more ``await``.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue