mirror of https://github.com/python/cpython
gh-91404: Use computed gotos and reduce indirection in re (#91495)
This commit is contained in:
parent
d104f4d21f
commit
1b34b5687b
|
@ -520,6 +520,12 @@ Optimizations
|
|||
becomes 272 bytes from 352 bytes on 64bit platform.
|
||||
(Contributed by Inada Naoki in :issue:`46845`.)
|
||||
|
||||
* :mod:`re`'s regular expression matching engine has been partially refactored,
|
||||
and now uses computed gotos (or "threaded code") on supported platforms. As a
|
||||
result, Python 3.11 executes the `pyperformance regular expression benchmarks
|
||||
<https://pyperformance.readthedocs.io/benchmarks.html#regex-dna>`_ up to 10%
|
||||
faster than Python 3.10.
|
||||
|
||||
|
||||
Faster CPython
|
||||
==============
|
||||
|
|
|
@ -1351,11 +1351,12 @@ regen-stdlib-module-names: build_all Programs/_testembed
|
|||
$(UPDATE_FILE) $(srcdir)/Python/stdlib_module_names.h $(srcdir)/Python/stdlib_module_names.h.new
|
||||
|
||||
regen-sre:
|
||||
# Regenerate Modules/_sre/sre_constants.h from Lib/re/_constants.py
|
||||
# using Tools/scripts/generate_sre_constants.py
|
||||
# Regenerate Modules/_sre/sre_constants.h and Modules/_sre/sre_targets.h
|
||||
# from Lib/re/_constants.py using Tools/scripts/generate_sre_constants.py
|
||||
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_sre_constants.py \
|
||||
$(srcdir)/Lib/re/_constants.py \
|
||||
$(srcdir)/Modules/_sre/sre_constants.h
|
||||
$(srcdir)/Modules/_sre/sre_constants.h \
|
||||
$(srcdir)/Modules/_sre/sre_targets.h
|
||||
|
||||
Python/compile.o Python/symtable.o Python/ast_unparse.o Python/ast.o Python/future.o: $(srcdir)/Include/internal/pycore_ast.h
|
||||
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
Improve the performance of :mod:`re` matching by using computed gotos (or
|
||||
"threaded code") on supported platforms and removing expensive pointer
|
||||
indirections.
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Secret Labs' Regular Expression Engine
|
||||
*
|
||||
* regular expression matching engine
|
||||
*
|
||||
* Auto-generated by Tools/scripts/generate_sre_constants.py from
|
||||
* Lib/re/_constants.py.
|
||||
*
|
||||
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
|
||||
*
|
||||
* See the sre.c file for information on usage and redistribution.
|
||||
*/
|
||||
|
||||
static void *sre_targets[44] = {
|
||||
&&TARGET_SRE_OP_FAILURE,
|
||||
&&TARGET_SRE_OP_SUCCESS,
|
||||
&&TARGET_SRE_OP_ANY,
|
||||
&&TARGET_SRE_OP_ANY_ALL,
|
||||
&&TARGET_SRE_OP_ASSERT,
|
||||
&&TARGET_SRE_OP_ASSERT_NOT,
|
||||
&&TARGET_SRE_OP_AT,
|
||||
&&TARGET_SRE_OP_BRANCH,
|
||||
&&TARGET_SRE_OP_CALL,
|
||||
&&TARGET_SRE_OP_CATEGORY,
|
||||
&&TARGET_SRE_OP_CHARSET,
|
||||
&&TARGET_SRE_OP_BIGCHARSET,
|
||||
&&TARGET_SRE_OP_GROUPREF,
|
||||
&&TARGET_SRE_OP_GROUPREF_EXISTS,
|
||||
&&TARGET_SRE_OP_IN,
|
||||
&&TARGET_SRE_OP_INFO,
|
||||
&&TARGET_SRE_OP_JUMP,
|
||||
&&TARGET_SRE_OP_LITERAL,
|
||||
&&TARGET_SRE_OP_MARK,
|
||||
&&TARGET_SRE_OP_MAX_UNTIL,
|
||||
&&TARGET_SRE_OP_MIN_UNTIL,
|
||||
&&TARGET_SRE_OP_NOT_LITERAL,
|
||||
&&TARGET_SRE_OP_NEGATE,
|
||||
&&TARGET_SRE_OP_RANGE,
|
||||
&&TARGET_SRE_OP_REPEAT,
|
||||
&&TARGET_SRE_OP_REPEAT_ONE,
|
||||
&&TARGET_SRE_OP_SUBPATTERN,
|
||||
&&TARGET_SRE_OP_MIN_REPEAT_ONE,
|
||||
&&TARGET_SRE_OP_ATOMIC_GROUP,
|
||||
&&TARGET_SRE_OP_POSSESSIVE_REPEAT,
|
||||
&&TARGET_SRE_OP_POSSESSIVE_REPEAT_ONE,
|
||||
&&TARGET_SRE_OP_GROUPREF_IGNORE,
|
||||
&&TARGET_SRE_OP_IN_IGNORE,
|
||||
&&TARGET_SRE_OP_LITERAL_IGNORE,
|
||||
&&TARGET_SRE_OP_NOT_LITERAL_IGNORE,
|
||||
&&TARGET_SRE_OP_GROUPREF_LOC_IGNORE,
|
||||
&&TARGET_SRE_OP_IN_LOC_IGNORE,
|
||||
&&TARGET_SRE_OP_LITERAL_LOC_IGNORE,
|
||||
&&TARGET_SRE_OP_NOT_LITERAL_LOC_IGNORE,
|
||||
&&TARGET_SRE_OP_GROUPREF_UNI_IGNORE,
|
||||
&&TARGET_SRE_OP_IN_UNI_IGNORE,
|
||||
&&TARGET_SRE_OP_LITERAL_UNI_IGNORE,
|
||||
&&TARGET_SRE_OP_NOT_LITERAL_UNI_IGNORE,
|
||||
&&TARGET_SRE_OP_RANGE_UNI_IGNORE,
|
||||
};
|
|
@ -29,7 +29,11 @@ sre_constants_header = """\
|
|||
|
||||
"""
|
||||
|
||||
def main(infile='Lib/re/_constants.py', outfile='Modules/_sre/sre_constants.h'):
|
||||
def main(
|
||||
infile="Lib/re/_constants.py",
|
||||
outfile_constants="Modules/_sre/sre_constants.h",
|
||||
outfile_targets="Modules/_sre/sre_targets.h",
|
||||
):
|
||||
ns = {}
|
||||
with open(infile) as fp:
|
||||
code = fp.read()
|
||||
|
@ -46,6 +50,11 @@ def main(infile='Lib/re/_constants.py', outfile='Modules/_sre/sre_constants.h'):
|
|||
for value, name in sorted(items):
|
||||
yield "#define %s %d\n" % (name, value)
|
||||
|
||||
def dump_gotos(d, prefix):
|
||||
for i, item in enumerate(sorted(d)):
|
||||
assert i == item
|
||||
yield f" &&{prefix}_{item},\n"
|
||||
|
||||
content = [sre_constants_header]
|
||||
content.append("#define SRE_MAGIC %d\n" % ns["MAGIC"])
|
||||
content.extend(dump(ns["OPCODES"], "SRE_OP"))
|
||||
|
@ -54,7 +63,14 @@ def main(infile='Lib/re/_constants.py', outfile='Modules/_sre/sre_constants.h'):
|
|||
content.extend(dump2(ns, "SRE_FLAG_"))
|
||||
content.extend(dump2(ns, "SRE_INFO_"))
|
||||
|
||||
update_file(outfile, ''.join(content))
|
||||
update_file(outfile_constants, ''.join(content))
|
||||
|
||||
content = [sre_constants_header]
|
||||
content.append(f"static void *sre_targets[{len(ns['OPCODES'])}] = {{\n")
|
||||
content.extend(dump_gotos(ns["OPCODES"], "TARGET_SRE_OP"))
|
||||
content.append("};\n")
|
||||
|
||||
update_file(outfile_targets, ''.join(content))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
Loading…
Reference in New Issue