mirror of https://github.com/python/cpython
gh-114058: Foundations of the Tier2 redundancy eliminator (GH-115085)
--------- Co-authored-by: Mark Shannon <9448417+markshannon@users.noreply.github.com> Co-authored-by: Jules <57632293+JuliaPoo@users.noreply.github.com> Co-authored-by: Guido van Rossum <gvanrossum@users.noreply.github.com>
This commit is contained in:
parent
ccc76c3e88
commit
7cce857622
|
@ -94,6 +94,7 @@ Programs/test_frozenmain.h generated
|
|||
Python/Python-ast.c generated
|
||||
Python/executor_cases.c.h generated
|
||||
Python/generated_cases.c.h generated
|
||||
Python/tier2_redundancy_eliminator_bytecodes.c.h generated
|
||||
Python/opcode_targets.h generated
|
||||
Python/stdlib_module_names.h generated
|
||||
Tools/peg_generator/pegen/grammar_parser.py generated
|
||||
|
|
|
@ -120,6 +120,9 @@ typedef struct _optimization_stats {
|
|||
uint64_t trace_length_hist[_Py_UOP_HIST_SIZE];
|
||||
uint64_t trace_run_length_hist[_Py_UOP_HIST_SIZE];
|
||||
uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE];
|
||||
uint64_t optimizer_attempts;
|
||||
uint64_t optimizer_successes;
|
||||
uint64_t optimizer_failure_reason_no_memory;
|
||||
} OptimizationStats;
|
||||
|
||||
typedef struct _rare_event_stats {
|
||||
|
|
|
@ -1094,7 +1094,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = {
|
|||
[MATCH_KEYS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
|
||||
[MATCH_MAPPING] = { true, INSTR_FMT_IX, 0 },
|
||||
[MATCH_SEQUENCE] = { true, INSTR_FMT_IX, 0 },
|
||||
[NOP] = { true, INSTR_FMT_IX, 0 },
|
||||
[NOP] = { true, INSTR_FMT_IX, HAS_PURE_FLAG },
|
||||
[POP_EXCEPT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG },
|
||||
[POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG },
|
||||
[POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG },
|
||||
|
@ -1156,10 +1156,10 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = {
|
|||
[LOAD_SUPER_METHOD] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
|
||||
[LOAD_ZERO_SUPER_ATTR] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
|
||||
[LOAD_ZERO_SUPER_METHOD] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
|
||||
[POP_BLOCK] = { true, -1, 0 },
|
||||
[SETUP_CLEANUP] = { true, -1, HAS_ARG_FLAG },
|
||||
[SETUP_FINALLY] = { true, -1, HAS_ARG_FLAG },
|
||||
[SETUP_WITH] = { true, -1, HAS_ARG_FLAG },
|
||||
[POP_BLOCK] = { true, -1, HAS_PURE_FLAG },
|
||||
[SETUP_CLEANUP] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG },
|
||||
[SETUP_FINALLY] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG },
|
||||
[SETUP_WITH] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG },
|
||||
[STORE_FAST_MAYBE_NULL] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG },
|
||||
};
|
||||
#endif
|
||||
|
|
|
@ -8,6 +8,13 @@ extern "C" {
|
|||
# error "this header requires Py_BUILD_CORE define"
|
||||
#endif
|
||||
|
||||
#include "pycore_uop_ids.h"
|
||||
|
||||
// This is the length of the trace we project initially.
|
||||
#define UOP_MAX_TRACE_LENGTH 512
|
||||
|
||||
#define TRACE_STACK_SIZE 5
|
||||
|
||||
int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame,
|
||||
_PyUOpInstruction *trace, int trace_len, int curr_stackentries,
|
||||
_PyBloomFilter *dependencies);
|
||||
|
|
|
@ -16,7 +16,7 @@ extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];
|
|||
|
||||
#ifdef NEED_OPCODE_METADATA
|
||||
const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
|
||||
[_NOP] = 0,
|
||||
[_NOP] = HAS_PURE_FLAG,
|
||||
[_RESUME_CHECK] = HAS_DEOPT_FLAG,
|
||||
[_LOAD_FAST_CHECK] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG,
|
||||
[_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG,
|
||||
|
@ -202,10 +202,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
|
|||
[_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG,
|
||||
[_EXIT_TRACE] = HAS_DEOPT_FLAG,
|
||||
[_CHECK_VALIDITY] = HAS_DEOPT_FLAG,
|
||||
[_LOAD_CONST_INLINE] = 0,
|
||||
[_LOAD_CONST_INLINE_BORROW] = 0,
|
||||
[_LOAD_CONST_INLINE_WITH_NULL] = 0,
|
||||
[_LOAD_CONST_INLINE_BORROW_WITH_NULL] = 0,
|
||||
[_LOAD_CONST_INLINE] = HAS_PURE_FLAG,
|
||||
[_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG,
|
||||
[_LOAD_CONST_INLINE_WITH_NULL] = HAS_PURE_FLAG,
|
||||
[_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_PURE_FLAG,
|
||||
[_CHECK_GLOBALS] = HAS_DEOPT_FLAG,
|
||||
[_CHECK_BUILTINS] = HAS_DEOPT_FLAG,
|
||||
[_INTERNAL_INCREMENT_OPT_COUNTER] = 0,
|
||||
|
|
|
@ -3,6 +3,7 @@ import opcode
|
|||
import sys
|
||||
import textwrap
|
||||
import unittest
|
||||
import gc
|
||||
|
||||
import _testinternalcapi
|
||||
|
||||
|
@ -556,6 +557,214 @@ class TestUops(unittest.TestCase):
|
|||
# too much already.
|
||||
self.assertEqual(count, 1)
|
||||
|
||||
class TestUopsOptimization(unittest.TestCase):
|
||||
|
||||
def test_int_type_propagation(self):
|
||||
def testfunc(loops):
|
||||
num = 0
|
||||
while num < loops:
|
||||
x = num + num
|
||||
a = x + 1
|
||||
num += 1
|
||||
return a
|
||||
|
||||
opt = _testinternalcapi.get_uop_optimizer()
|
||||
res = None
|
||||
with temporary_optimizer(opt):
|
||||
res = testfunc(32)
|
||||
|
||||
ex = get_first_executor(testfunc)
|
||||
self.assertIsNotNone(ex)
|
||||
self.assertEqual(res, 63)
|
||||
binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"]
|
||||
guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"]
|
||||
self.assertGreaterEqual(len(binop_count), 3)
|
||||
self.assertLessEqual(len(guard_both_int_count), 1)
|
||||
|
||||
def test_int_type_propagation_through_frame(self):
|
||||
def double(x):
|
||||
return x + x
|
||||
def testfunc(loops):
|
||||
num = 0
|
||||
while num < loops:
|
||||
x = num + num
|
||||
a = double(x)
|
||||
num += 1
|
||||
return a
|
||||
|
||||
opt = _testinternalcapi.get_uop_optimizer()
|
||||
res = None
|
||||
with temporary_optimizer(opt):
|
||||
res = testfunc(32)
|
||||
|
||||
ex = get_first_executor(testfunc)
|
||||
self.assertIsNotNone(ex)
|
||||
self.assertEqual(res, 124)
|
||||
binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"]
|
||||
guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"]
|
||||
self.assertGreaterEqual(len(binop_count), 3)
|
||||
self.assertLessEqual(len(guard_both_int_count), 1)
|
||||
|
||||
def test_int_type_propagation_from_frame(self):
|
||||
def double(x):
|
||||
return x + x
|
||||
def testfunc(loops):
|
||||
num = 0
|
||||
while num < loops:
|
||||
a = double(num)
|
||||
x = a + a
|
||||
num += 1
|
||||
return x
|
||||
|
||||
opt = _testinternalcapi.get_uop_optimizer()
|
||||
res = None
|
||||
with temporary_optimizer(opt):
|
||||
res = testfunc(32)
|
||||
|
||||
ex = get_first_executor(testfunc)
|
||||
self.assertIsNotNone(ex)
|
||||
self.assertEqual(res, 124)
|
||||
binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"]
|
||||
guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"]
|
||||
self.assertGreaterEqual(len(binop_count), 3)
|
||||
self.assertLessEqual(len(guard_both_int_count), 1)
|
||||
|
||||
def test_int_impure_region(self):
|
||||
def testfunc(loops):
|
||||
num = 0
|
||||
while num < loops:
|
||||
x = num + num
|
||||
y = 1
|
||||
x // 2
|
||||
a = x + y
|
||||
num += 1
|
||||
return a
|
||||
|
||||
opt = _testinternalcapi.get_uop_optimizer()
|
||||
res = None
|
||||
with temporary_optimizer(opt):
|
||||
res = testfunc(64)
|
||||
|
||||
ex = get_first_executor(testfunc)
|
||||
self.assertIsNotNone(ex)
|
||||
binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"]
|
||||
self.assertGreaterEqual(len(binop_count), 3)
|
||||
|
||||
def test_call_py_exact_args(self):
|
||||
def testfunc(n):
|
||||
def dummy(x):
|
||||
return x+1
|
||||
for i in range(n):
|
||||
dummy(i)
|
||||
|
||||
opt = _testinternalcapi.get_uop_optimizer()
|
||||
with temporary_optimizer(opt):
|
||||
testfunc(20)
|
||||
|
||||
ex = get_first_executor(testfunc)
|
||||
self.assertIsNotNone(ex)
|
||||
uops = {opname for opname, _, _ in ex}
|
||||
self.assertIn("_PUSH_FRAME", uops)
|
||||
self.assertIn("_BINARY_OP_ADD_INT", uops)
|
||||
self.assertNotIn("_CHECK_PEP_523", uops)
|
||||
|
||||
def test_int_type_propagate_through_range(self):
|
||||
def testfunc(n):
|
||||
|
||||
for i in range(n):
|
||||
x = i + i
|
||||
return x
|
||||
|
||||
opt = _testinternalcapi.get_uop_optimizer()
|
||||
with temporary_optimizer(opt):
|
||||
res = testfunc(20)
|
||||
|
||||
ex = get_first_executor(testfunc)
|
||||
self.assertEqual(res, 19 * 2)
|
||||
self.assertIsNotNone(ex)
|
||||
uops = {opname for opname, _, _ in ex}
|
||||
self.assertNotIn("_GUARD_BOTH_INT", uops)
|
||||
|
||||
def test_int_value_numbering(self):
|
||||
def testfunc(n):
|
||||
|
||||
y = 1
|
||||
for i in range(n):
|
||||
x = y
|
||||
z = x
|
||||
a = z
|
||||
b = a
|
||||
res = x + z + a + b
|
||||
return res
|
||||
|
||||
opt = _testinternalcapi.get_uop_optimizer()
|
||||
with temporary_optimizer(opt):
|
||||
res = testfunc(20)
|
||||
|
||||
ex = get_first_executor(testfunc)
|
||||
self.assertEqual(res, 4)
|
||||
self.assertIsNotNone(ex)
|
||||
uops = {opname for opname, _, _ in ex}
|
||||
self.assertIn("_GUARD_BOTH_INT", uops)
|
||||
guard_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"]
|
||||
self.assertEqual(len(guard_count), 1)
|
||||
|
||||
def test_comprehension(self):
|
||||
def testfunc(n):
|
||||
for _ in range(n):
|
||||
return [i for i in range(n)]
|
||||
|
||||
opt = _testinternalcapi.get_uop_optimizer()
|
||||
with temporary_optimizer(opt):
|
||||
testfunc(20)
|
||||
|
||||
ex = get_first_executor(testfunc)
|
||||
self.assertIsNotNone(ex)
|
||||
uops = {opname for opname, _, _ in ex}
|
||||
self.assertNotIn("_BINARY_OP_ADD_INT", uops)
|
||||
|
||||
def test_call_py_exact_args_disappearing(self):
|
||||
def dummy(x):
|
||||
return x+1
|
||||
|
||||
def testfunc(n):
|
||||
for i in range(n):
|
||||
dummy(i)
|
||||
|
||||
opt = _testinternalcapi.get_uop_optimizer()
|
||||
# Trigger specialization
|
||||
testfunc(8)
|
||||
with temporary_optimizer(opt):
|
||||
del dummy
|
||||
gc.collect()
|
||||
|
||||
def dummy(x):
|
||||
return x + 2
|
||||
testfunc(10)
|
||||
|
||||
ex = get_first_executor(testfunc)
|
||||
# Honestly as long as it doesn't crash it's fine.
|
||||
# Whether we get an executor or not is non-deterministic,
|
||||
# because it's decided by when the function is freed.
|
||||
# This test is a little implementation specific.
|
||||
|
||||
def test_promote_globals_to_constants(self):
|
||||
def testfunc(n):
|
||||
for i in range(n):
|
||||
x = range(i)
|
||||
return x
|
||||
|
||||
opt = _testinternalcapi.get_uop_optimizer()
|
||||
with temporary_optimizer(opt):
|
||||
testfunc(20)
|
||||
|
||||
ex = get_first_executor(testfunc)
|
||||
self.assertIsNotNone(ex)
|
||||
uops = {opname for opname, _, _ in ex}
|
||||
self.assertNotIn("_LOAD_GLOBAL_BUILTIN", uops)
|
||||
self.assertIn("_LOAD_CONST_INLINE_BORROW_WITH_NULL", uops)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
@ -33,6 +33,7 @@ with test_tools.imports_under_tool("cases_generator"):
|
|||
import parser
|
||||
from stack import Stack
|
||||
import tier1_generator
|
||||
import tier2_abstract_generator
|
||||
|
||||
|
||||
def handle_stderr():
|
||||
|
@ -793,5 +794,157 @@ class TestGeneratedCases(unittest.TestCase):
|
|||
self.run_cases_test(input, output)
|
||||
|
||||
|
||||
class TestGeneratedAbstractCases(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
super().setUp()
|
||||
self.maxDiff = None
|
||||
|
||||
self.temp_dir = tempfile.gettempdir()
|
||||
self.temp_input_filename = os.path.join(self.temp_dir, "input.txt")
|
||||
self.temp_input2_filename = os.path.join(self.temp_dir, "input2.txt")
|
||||
self.temp_output_filename = os.path.join(self.temp_dir, "output.txt")
|
||||
|
||||
def tearDown(self) -> None:
|
||||
for filename in [
|
||||
self.temp_input_filename,
|
||||
self.temp_input2_filename,
|
||||
self.temp_output_filename,
|
||||
]:
|
||||
try:
|
||||
os.remove(filename)
|
||||
except:
|
||||
pass
|
||||
super().tearDown()
|
||||
|
||||
def run_cases_test(self, input: str, input2: str, expected: str):
|
||||
with open(self.temp_input_filename, "w+") as temp_input:
|
||||
temp_input.write(parser.BEGIN_MARKER)
|
||||
temp_input.write(input)
|
||||
temp_input.write(parser.END_MARKER)
|
||||
temp_input.flush()
|
||||
|
||||
with open(self.temp_input2_filename, "w+") as temp_input:
|
||||
temp_input.write(parser.BEGIN_MARKER)
|
||||
temp_input.write(input2)
|
||||
temp_input.write(parser.END_MARKER)
|
||||
temp_input.flush()
|
||||
|
||||
with handle_stderr():
|
||||
tier2_abstract_generator.generate_tier2_abstract_from_files(
|
||||
[self.temp_input_filename, self.temp_input2_filename],
|
||||
self.temp_output_filename
|
||||
)
|
||||
|
||||
with open(self.temp_output_filename) as temp_output:
|
||||
lines = temp_output.readlines()
|
||||
while lines and lines[0].startswith(("// ", "#", " #", "\n")):
|
||||
lines.pop(0)
|
||||
while lines and lines[-1].startswith(("#", "\n")):
|
||||
lines.pop(-1)
|
||||
actual = "".join(lines)
|
||||
self.assertEqual(actual.strip(), expected.strip())
|
||||
|
||||
def test_overridden_abstract(self):
|
||||
input = """
|
||||
pure op(OP, (--)) {
|
||||
spam();
|
||||
}
|
||||
"""
|
||||
input2 = """
|
||||
pure op(OP, (--)) {
|
||||
eggs();
|
||||
}
|
||||
"""
|
||||
output = """
|
||||
case OP: {
|
||||
eggs();
|
||||
break;
|
||||
}
|
||||
"""
|
||||
self.run_cases_test(input, input2, output)
|
||||
|
||||
def test_overridden_abstract_args(self):
|
||||
input = """
|
||||
pure op(OP, (arg1 -- out)) {
|
||||
spam();
|
||||
}
|
||||
op(OP2, (arg1 -- out)) {
|
||||
eggs();
|
||||
}
|
||||
"""
|
||||
input2 = """
|
||||
op(OP, (arg1 -- out)) {
|
||||
eggs();
|
||||
}
|
||||
"""
|
||||
output = """
|
||||
case OP: {
|
||||
_Py_UOpsSymType *arg1;
|
||||
_Py_UOpsSymType *out;
|
||||
arg1 = stack_pointer[-1];
|
||||
eggs();
|
||||
stack_pointer[-1] = out;
|
||||
break;
|
||||
}
|
||||
|
||||
case OP2: {
|
||||
_Py_UOpsSymType *out;
|
||||
out = sym_new_unknown(ctx);
|
||||
if (out == NULL) goto out_of_space;
|
||||
stack_pointer[-1] = out;
|
||||
break;
|
||||
}
|
||||
"""
|
||||
self.run_cases_test(input, input2, output)
|
||||
|
||||
def test_no_overridden_case(self):
|
||||
input = """
|
||||
pure op(OP, (arg1 -- out)) {
|
||||
spam();
|
||||
}
|
||||
|
||||
pure op(OP2, (arg1 -- out)) {
|
||||
}
|
||||
|
||||
"""
|
||||
input2 = """
|
||||
pure op(OP2, (arg1 -- out)) {
|
||||
}
|
||||
"""
|
||||
output = """
|
||||
case OP: {
|
||||
_Py_UOpsSymType *out;
|
||||
out = sym_new_unknown(ctx);
|
||||
if (out == NULL) goto out_of_space;
|
||||
stack_pointer[-1] = out;
|
||||
break;
|
||||
}
|
||||
|
||||
case OP2: {
|
||||
_Py_UOpsSymType *arg1;
|
||||
_Py_UOpsSymType *out;
|
||||
arg1 = stack_pointer[-1];
|
||||
stack_pointer[-1] = out;
|
||||
break;
|
||||
}
|
||||
"""
|
||||
self.run_cases_test(input, input2, output)
|
||||
|
||||
def test_missing_override_failure(self):
|
||||
input = """
|
||||
pure op(OP, (arg1 -- out)) {
|
||||
spam();
|
||||
}
|
||||
"""
|
||||
input2 = """
|
||||
pure op(OTHER, (arg1 -- out)) {
|
||||
}
|
||||
"""
|
||||
output = """
|
||||
"""
|
||||
with self.assertRaisesRegex(AssertionError, "All abstract uops"):
|
||||
self.run_cases_test(input, input2, output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
@ -1863,6 +1863,10 @@ regen-cases:
|
|||
-o $(srcdir)/Python/generated_cases.c.h.new $(srcdir)/Python/bytecodes.c
|
||||
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/tier2_generator.py \
|
||||
-o $(srcdir)/Python/executor_cases.c.h.new $(srcdir)/Python/bytecodes.c
|
||||
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/tier2_abstract_generator.py \
|
||||
-o $(srcdir)/Python/tier2_redundancy_eliminator_cases.c.h.new \
|
||||
$(srcdir)/Python/tier2_redundancy_eliminator_bytecodes.c \
|
||||
$(srcdir)/Python/bytecodes.c
|
||||
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/opcode_metadata_generator.py \
|
||||
-o $(srcdir)/Include/internal/pycore_opcode_metadata.h.new $(srcdir)/Python/bytecodes.c
|
||||
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/uop_metadata_generator.py -o \
|
||||
|
@ -1874,6 +1878,7 @@ regen-cases:
|
|||
$(UPDATE_FILE) $(srcdir)/Include/internal/pycore_opcode_metadata.h $(srcdir)/Include/internal/pycore_opcode_metadata.h.new
|
||||
$(UPDATE_FILE) $(srcdir)/Include/internal/pycore_uop_metadata.h $(srcdir)/Include/internal/pycore_uop_metadata.h.new
|
||||
$(UPDATE_FILE) $(srcdir)/Python/executor_cases.c.h $(srcdir)/Python/executor_cases.c.h.new
|
||||
$(UPDATE_FILE) $(srcdir)/Python/tier2_redundancy_eliminator_cases.c.h $(srcdir)/Python/tier2_redundancy_eliminator_cases.c.h.new
|
||||
$(UPDATE_FILE) $(srcdir)/Lib/_opcode_metadata.py $(srcdir)/Lib/_opcode_metadata.py.new
|
||||
|
||||
Python/compile.o: $(srcdir)/Include/internal/pycore_opcode_metadata.h
|
||||
|
@ -1895,7 +1900,8 @@ Python/optimizer.o: \
|
|||
|
||||
Python/optimizer_analysis.o: \
|
||||
$(srcdir)/Include/internal/pycore_opcode_metadata.h \
|
||||
$(srcdir)/Include/internal/pycore_optimizer.h
|
||||
$(srcdir)/Include/internal/pycore_optimizer.h \
|
||||
$(srcdir)/Python/tier2_redundancy_eliminator_cases.c.h
|
||||
|
||||
Python/frozen.o: $(FROZEN_FILES_OUT)
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Implement the foundations of the Tier 2 redundancy eliminator.
|
|
@ -133,7 +133,7 @@ dummy_func(
|
|||
switch (opcode) {
|
||||
|
||||
// BEGIN BYTECODES //
|
||||
inst(NOP, (--)) {
|
||||
pure inst(NOP, (--)) {
|
||||
}
|
||||
|
||||
family(RESUME, 0) = {
|
||||
|
@ -411,12 +411,12 @@ dummy_func(
|
|||
// BINARY_OP_INPLACE_ADD_UNICODE, // See comments at that opcode.
|
||||
};
|
||||
|
||||
op(_GUARD_BOTH_INT, (left, right -- left: &PYLONG_TYPE, right: &PYLONG_TYPE)) {
|
||||
op(_GUARD_BOTH_INT, (left, right -- left, right)) {
|
||||
DEOPT_IF(!PyLong_CheckExact(left));
|
||||
DEOPT_IF(!PyLong_CheckExact(right));
|
||||
}
|
||||
|
||||
pure op(_BINARY_OP_MULTIPLY_INT, (left, right -- res: &PYLONG_TYPE)) {
|
||||
pure op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) {
|
||||
STAT_INC(BINARY_OP, hit);
|
||||
res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right);
|
||||
_Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free);
|
||||
|
@ -424,7 +424,7 @@ dummy_func(
|
|||
ERROR_IF(res == NULL, error);
|
||||
}
|
||||
|
||||
pure op(_BINARY_OP_ADD_INT, (left, right -- res: &PYLONG_TYPE)) {
|
||||
pure op(_BINARY_OP_ADD_INT, (left, right -- res)) {
|
||||
STAT_INC(BINARY_OP, hit);
|
||||
res = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right);
|
||||
_Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free);
|
||||
|
@ -432,7 +432,7 @@ dummy_func(
|
|||
ERROR_IF(res == NULL, error);
|
||||
}
|
||||
|
||||
pure op(_BINARY_OP_SUBTRACT_INT, (left, right -- res: &PYLONG_TYPE)) {
|
||||
pure op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) {
|
||||
STAT_INC(BINARY_OP, hit);
|
||||
res = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right);
|
||||
_Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free);
|
||||
|
@ -447,12 +447,12 @@ dummy_func(
|
|||
macro(BINARY_OP_SUBTRACT_INT) =
|
||||
_GUARD_BOTH_INT + unused/1 + _BINARY_OP_SUBTRACT_INT;
|
||||
|
||||
op(_GUARD_BOTH_FLOAT, (left, right -- left: &PYFLOAT_TYPE, right: &PYFLOAT_TYPE)) {
|
||||
op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) {
|
||||
DEOPT_IF(!PyFloat_CheckExact(left));
|
||||
DEOPT_IF(!PyFloat_CheckExact(right));
|
||||
}
|
||||
|
||||
pure op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res: &PYFLOAT_TYPE)) {
|
||||
pure op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) {
|
||||
STAT_INC(BINARY_OP, hit);
|
||||
double dres =
|
||||
((PyFloatObject *)left)->ob_fval *
|
||||
|
@ -460,7 +460,7 @@ dummy_func(
|
|||
DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res);
|
||||
}
|
||||
|
||||
pure op(_BINARY_OP_ADD_FLOAT, (left, right -- res: &PYFLOAT_TYPE)) {
|
||||
pure op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) {
|
||||
STAT_INC(BINARY_OP, hit);
|
||||
double dres =
|
||||
((PyFloatObject *)left)->ob_fval +
|
||||
|
@ -468,7 +468,7 @@ dummy_func(
|
|||
DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res);
|
||||
}
|
||||
|
||||
pure op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res: &PYFLOAT_TYPE)) {
|
||||
pure op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res)) {
|
||||
STAT_INC(BINARY_OP, hit);
|
||||
double dres =
|
||||
((PyFloatObject *)left)->ob_fval -
|
||||
|
@ -483,12 +483,12 @@ dummy_func(
|
|||
macro(BINARY_OP_SUBTRACT_FLOAT) =
|
||||
_GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_SUBTRACT_FLOAT;
|
||||
|
||||
op(_GUARD_BOTH_UNICODE, (left, right -- left: &PYUNICODE_TYPE, right: &PYUNICODE_TYPE)) {
|
||||
op(_GUARD_BOTH_UNICODE, (left, right -- left, right)) {
|
||||
DEOPT_IF(!PyUnicode_CheckExact(left));
|
||||
DEOPT_IF(!PyUnicode_CheckExact(right));
|
||||
}
|
||||
|
||||
pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res: &PYUNICODE_TYPE)) {
|
||||
pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) {
|
||||
STAT_INC(BINARY_OP, hit);
|
||||
res = PyUnicode_Concat(left, right);
|
||||
_Py_DECREF_SPECIALIZED(left, _PyUnicode_ExactDealloc);
|
||||
|
@ -1877,7 +1877,7 @@ dummy_func(
|
|||
something was returned by a descriptor protocol). Set
|
||||
the second element of the stack to NULL, to signal
|
||||
CALL that it's not a method call.
|
||||
NULL | meth | arg1 | ... | argN
|
||||
meth | NULL | arg1 | ... | argN
|
||||
*/
|
||||
DECREF_INPUTS();
|
||||
ERROR_IF(attr == NULL, error);
|
||||
|
@ -1901,7 +1901,7 @@ dummy_func(
|
|||
LOAD_ATTR,
|
||||
};
|
||||
|
||||
op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner: &(GUARD_TYPE_VERSION_TYPE + type_version))) {
|
||||
op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) {
|
||||
PyTypeObject *tp = Py_TYPE(owner);
|
||||
assert(type_version != 0);
|
||||
DEOPT_IF(tp->tp_version_tag != type_version);
|
||||
|
@ -2082,7 +2082,7 @@ dummy_func(
|
|||
DISPATCH_INLINED(new_frame);
|
||||
}
|
||||
|
||||
op(_GUARD_DORV_VALUES, (owner -- owner: &GUARD_DORV_VALUES_TYPE)) {
|
||||
op(_GUARD_DORV_VALUES, (owner -- owner)) {
|
||||
assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
|
||||
PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
|
||||
DEOPT_IF(!_PyDictOrValues_IsValues(dorv));
|
||||
|
@ -2711,7 +2711,7 @@ dummy_func(
|
|||
DEOPT_IF(r->len <= 0);
|
||||
}
|
||||
|
||||
op(_ITER_NEXT_RANGE, (iter -- iter, next: &PYLONG_TYPE)) {
|
||||
op(_ITER_NEXT_RANGE, (iter -- iter, next)) {
|
||||
_PyRangeIterObject *r = (_PyRangeIterObject *)iter;
|
||||
assert(Py_TYPE(r) == &PyRangeIter_Type);
|
||||
assert(r->len > 0);
|
||||
|
@ -2869,13 +2869,13 @@ dummy_func(
|
|||
exc_info->exc_value = Py_NewRef(new_exc);
|
||||
}
|
||||
|
||||
op(_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, (owner -- owner: &GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE)) {
|
||||
op(_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, (owner -- owner)) {
|
||||
assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
|
||||
PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner);
|
||||
DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && !_PyObject_MakeInstanceAttributesFromDict(owner, dorv));
|
||||
}
|
||||
|
||||
op(_GUARD_KEYS_VERSION, (keys_version/2, owner -- owner: &(GUARD_KEYS_VERSION_TYPE + keys_version))) {
|
||||
op(_GUARD_KEYS_VERSION, (keys_version/2, owner -- owner)) {
|
||||
PyTypeObject *owner_cls = Py_TYPE(owner);
|
||||
PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls;
|
||||
DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version != keys_version);
|
||||
|
@ -3090,7 +3090,7 @@ dummy_func(
|
|||
|
||||
macro(CALL) = _SPECIALIZE_CALL + unused/2 + _CALL;
|
||||
|
||||
op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- callable: &PYMETHOD_TYPE, null: &NULL_TYPE, unused[oparg])) {
|
||||
op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- callable, null, unused[oparg])) {
|
||||
DEOPT_IF(null != NULL);
|
||||
DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type);
|
||||
}
|
||||
|
@ -3108,7 +3108,7 @@ dummy_func(
|
|||
DEOPT_IF(tstate->interp->eval_frame);
|
||||
}
|
||||
|
||||
op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable: &(PYFUNCTION_TYPE_VERSION_TYPE + func_version), self_or_null, unused[oparg])) {
|
||||
op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) {
|
||||
DEOPT_IF(!PyFunction_Check(callable));
|
||||
PyFunctionObject *func = (PyFunctionObject *)callable;
|
||||
DEOPT_IF(func->func_version != func_version);
|
||||
|
@ -4059,23 +4059,23 @@ dummy_func(
|
|||
DEOPT_IF(!current_executor->vm_data.valid);
|
||||
}
|
||||
|
||||
op(_LOAD_CONST_INLINE, (ptr/4 -- value)) {
|
||||
pure op(_LOAD_CONST_INLINE, (ptr/4 -- value)) {
|
||||
TIER_TWO_ONLY
|
||||
value = Py_NewRef(ptr);
|
||||
}
|
||||
|
||||
op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) {
|
||||
pure op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) {
|
||||
TIER_TWO_ONLY
|
||||
value = ptr;
|
||||
}
|
||||
|
||||
op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) {
|
||||
pure op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) {
|
||||
TIER_TWO_ONLY
|
||||
value = Py_NewRef(ptr);
|
||||
null = NULL;
|
||||
}
|
||||
|
||||
op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) {
|
||||
pure op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) {
|
||||
TIER_TWO_ONLY
|
||||
value = ptr;
|
||||
null = NULL;
|
||||
|
|
|
@ -1598,7 +1598,7 @@
|
|||
something was returned by a descriptor protocol). Set
|
||||
the second element of the stack to NULL, to signal
|
||||
CALL that it's not a method call.
|
||||
NULL | meth | arg1 | ... | argN
|
||||
meth | NULL | arg1 | ... | argN
|
||||
*/
|
||||
Py_DECREF(owner);
|
||||
if (attr == NULL) goto pop_1_error_tier_two;
|
||||
|
|
|
@ -3420,7 +3420,7 @@
|
|||
something was returned by a descriptor protocol). Set
|
||||
the second element of the stack to NULL, to signal
|
||||
CALL that it's not a method call.
|
||||
NULL | meth | arg1 | ... | argN
|
||||
meth | NULL | arg1 | ... | argN
|
||||
*/
|
||||
Py_DECREF(owner);
|
||||
if (attr == NULL) goto pop_1_error;
|
||||
|
|
|
@ -17,8 +17,6 @@
|
|||
#include "pycore_uop_metadata.h" // Uop tables
|
||||
#undef NEED_OPCODE_METADATA
|
||||
|
||||
#define UOP_MAX_TRACE_LENGTH 512
|
||||
|
||||
#define MAX_EXECUTORS_SIZE 256
|
||||
|
||||
|
||||
|
@ -308,8 +306,6 @@ BRANCH_TO_GUARD[4][2] = {
|
|||
[POP_JUMP_IF_NOT_NONE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_NOT_NONE_POP,
|
||||
};
|
||||
|
||||
#define TRACE_STACK_SIZE 5
|
||||
|
||||
#define CONFIDENCE_RANGE 1000
|
||||
#define CONFIDENCE_CUTOFF 333
|
||||
|
||||
|
@ -323,10 +319,11 @@ BRANCH_TO_GUARD[4][2] = {
|
|||
|
||||
#define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \
|
||||
DPRINTF(2, \
|
||||
" ADD_TO_TRACE(%s, %d, %" PRIu64 ")\n", \
|
||||
" ADD_TO_TRACE(%s, %d, %" PRIu64 ", %d)\n", \
|
||||
_PyUOpName(OPCODE), \
|
||||
(OPARG), \
|
||||
(uint64_t)(OPERAND)); \
|
||||
(uint64_t)(OPERAND), \
|
||||
TARGET); \
|
||||
assert(trace_length < max_length); \
|
||||
trace[trace_length].opcode = (OPCODE); \
|
||||
trace[trace_length].oparg = (OPARG); \
|
||||
|
@ -825,11 +822,13 @@ uop_optimize(
|
|||
char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE");
|
||||
if (uop_optimize == NULL || *uop_optimize > '0') {
|
||||
err = _Py_uop_analyze_and_optimize(frame, buffer,
|
||||
UOP_MAX_TRACE_LENGTH, curr_stackentries, &dependencies);
|
||||
UOP_MAX_TRACE_LENGTH,
|
||||
curr_stackentries, &dependencies);
|
||||
if (err <= 0) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
assert(err == 1);
|
||||
_PyExecutorObject *executor = make_executor_from_uops(buffer, &dependencies);
|
||||
if (executor == NULL) {
|
||||
return -1;
|
||||
|
|
|
@ -1,3 +1,14 @@
|
|||
/*
|
||||
* This file contains the support code for CPython's uops redundancy eliminator.
|
||||
* It also performs some simple optimizations.
|
||||
* It performs a traditional data-flow analysis[1] over the trace of uops.
|
||||
* Using the information gained, it chooses to emit, or skip certain instructions
|
||||
* if possible.
|
||||
*
|
||||
* [1] For information on data-flow analysis, please see
|
||||
* https://clang.llvm.org/docs/DataFlowAnalysisIntro.html
|
||||
*
|
||||
* */
|
||||
#include "Python.h"
|
||||
#include "opcode.h"
|
||||
#include "pycore_dict.h"
|
||||
|
@ -9,10 +20,355 @@
|
|||
#include "pycore_dict.h"
|
||||
#include "pycore_long.h"
|
||||
#include "cpython/optimizer.h"
|
||||
#include "pycore_optimizer.h"
|
||||
#include "pycore_object.h"
|
||||
#include "pycore_dict.h"
|
||||
#include "pycore_function.h"
|
||||
#include "pycore_uop_metadata.h"
|
||||
#include "pycore_uop_ids.h"
|
||||
#include "pycore_range.h"
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include "pycore_optimizer.h"
|
||||
|
||||
// Holds locals, stack, locals, stack ... co_consts (in that order)
|
||||
#define MAX_ABSTRACT_INTERP_SIZE 4096
|
||||
|
||||
#define OVERALLOCATE_FACTOR 5
|
||||
|
||||
#define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * OVERALLOCATE_FACTOR)
|
||||
|
||||
// Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH())
|
||||
#define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2)
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG";
|
||||
static inline int get_lltrace(void) {
|
||||
char *uop_debug = Py_GETENV(DEBUG_ENV);
|
||||
int lltrace = 0;
|
||||
if (uop_debug != NULL && *uop_debug >= '0') {
|
||||
lltrace = *uop_debug - '0'; // TODO: Parse an int and all that
|
||||
}
|
||||
return lltrace;
|
||||
}
|
||||
#define DPRINTF(level, ...) \
|
||||
if (get_lltrace() >= (level)) { printf(__VA_ARGS__); }
|
||||
#else
|
||||
#define DPRINTF(level, ...)
|
||||
#endif
|
||||
|
||||
|
||||
// Flags for below.
|
||||
#define KNOWN 1 << 0
|
||||
#define TRUE_CONST 1 << 1
|
||||
#define IS_NULL 1 << 2
|
||||
#define NOT_NULL 1 << 3
|
||||
|
||||
typedef struct {
|
||||
int flags;
|
||||
PyTypeObject *typ;
|
||||
// constant propagated value (might be NULL)
|
||||
PyObject *const_val;
|
||||
} _Py_UOpsSymType;
|
||||
|
||||
|
||||
typedef struct _Py_UOpsAbstractFrame {
|
||||
// Max stacklen
|
||||
int stack_len;
|
||||
int locals_len;
|
||||
|
||||
_Py_UOpsSymType **stack_pointer;
|
||||
_Py_UOpsSymType **stack;
|
||||
_Py_UOpsSymType **locals;
|
||||
} _Py_UOpsAbstractFrame;
|
||||
|
||||
|
||||
typedef struct ty_arena {
|
||||
int ty_curr_number;
|
||||
int ty_max_number;
|
||||
_Py_UOpsSymType arena[TY_ARENA_SIZE];
|
||||
} ty_arena;
|
||||
|
||||
// Tier 2 types meta interpreter
|
||||
typedef struct _Py_UOpsAbstractInterpContext {
|
||||
PyObject_HEAD
|
||||
// The current "executing" frame.
|
||||
_Py_UOpsAbstractFrame *frame;
|
||||
_Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH];
|
||||
int curr_frame_depth;
|
||||
|
||||
// Arena for the symbolic types.
|
||||
ty_arena t_arena;
|
||||
|
||||
_Py_UOpsSymType **n_consumed;
|
||||
_Py_UOpsSymType **limit;
|
||||
_Py_UOpsSymType *locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
|
||||
} _Py_UOpsAbstractInterpContext;
|
||||
|
||||
static inline _Py_UOpsSymType* sym_new_unknown(_Py_UOpsAbstractInterpContext *ctx);
|
||||
|
||||
// 0 on success, -1 on error.
|
||||
static _Py_UOpsAbstractFrame *
|
||||
ctx_frame_new(
|
||||
_Py_UOpsAbstractInterpContext *ctx,
|
||||
PyCodeObject *co,
|
||||
_Py_UOpsSymType **localsplus_start,
|
||||
int n_locals_already_filled,
|
||||
int curr_stackentries
|
||||
)
|
||||
{
|
||||
assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH);
|
||||
_Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth];
|
||||
|
||||
frame->stack_len = co->co_stacksize;
|
||||
frame->locals_len = co->co_nlocalsplus;
|
||||
|
||||
frame->locals = localsplus_start;
|
||||
frame->stack = frame->locals + co->co_nlocalsplus;
|
||||
frame->stack_pointer = frame->stack + curr_stackentries;
|
||||
ctx->n_consumed = localsplus_start + (co->co_nlocalsplus + co->co_stacksize);
|
||||
if (ctx->n_consumed >= ctx->limit) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
// Initialize with the initial state of all local variables
|
||||
for (int i = n_locals_already_filled; i < co->co_nlocalsplus; i++) {
|
||||
_Py_UOpsSymType *local = sym_new_unknown(ctx);
|
||||
if (local == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
frame->locals[i] = local;
|
||||
}
|
||||
|
||||
|
||||
// Initialize the stack as well
|
||||
for (int i = 0; i < curr_stackentries; i++) {
|
||||
_Py_UOpsSymType *stackvar = sym_new_unknown(ctx);
|
||||
if (stackvar == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
frame->stack[i] = stackvar;
|
||||
}
|
||||
|
||||
return frame;
|
||||
}
|
||||
|
||||
static void
|
||||
abstractcontext_fini(_Py_UOpsAbstractInterpContext *ctx)
|
||||
{
|
||||
if (ctx == NULL) {
|
||||
return;
|
||||
}
|
||||
ctx->curr_frame_depth = 0;
|
||||
int tys = ctx->t_arena.ty_curr_number;
|
||||
for (int i = 0; i < tys; i++) {
|
||||
Py_CLEAR(ctx->t_arena.arena[i].const_val);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
abstractcontext_init(
|
||||
_Py_UOpsAbstractInterpContext *ctx,
|
||||
PyCodeObject *co,
|
||||
int curr_stacklen,
|
||||
int ir_entries
|
||||
)
|
||||
{
|
||||
ctx->limit = ctx->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE;
|
||||
ctx->n_consumed = ctx->locals_and_stack;
|
||||
#ifdef Py_DEBUG // Aids debugging a little. There should never be NULL in the abstract interpreter.
|
||||
for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) {
|
||||
ctx->locals_and_stack[i] = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Setup the arena for sym expressions.
|
||||
ctx->t_arena.ty_curr_number = 0;
|
||||
ctx->t_arena.ty_max_number = TY_ARENA_SIZE;
|
||||
|
||||
// Frame setup
|
||||
ctx->curr_frame_depth = 0;
|
||||
_Py_UOpsAbstractFrame *frame = ctx_frame_new(ctx, co, ctx->n_consumed, 0, curr_stacklen);
|
||||
if (frame == NULL) {
|
||||
return -1;
|
||||
}
|
||||
ctx->curr_frame_depth++;
|
||||
ctx->frame = frame;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
ctx_frame_pop(
|
||||
_Py_UOpsAbstractInterpContext *ctx
|
||||
)
|
||||
{
|
||||
_Py_UOpsAbstractFrame *frame = ctx->frame;
|
||||
|
||||
ctx->n_consumed = frame->locals;
|
||||
ctx->curr_frame_depth--;
|
||||
assert(ctx->curr_frame_depth >= 1);
|
||||
ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// Takes a borrowed reference to const_val, turns that into a strong reference.
|
||||
static _Py_UOpsSymType*
|
||||
sym_new(_Py_UOpsAbstractInterpContext *ctx,
|
||||
PyObject *const_val)
|
||||
{
|
||||
_Py_UOpsSymType *self = &ctx->t_arena.arena[ctx->t_arena.ty_curr_number];
|
||||
if (ctx->t_arena.ty_curr_number >= ctx->t_arena.ty_max_number) {
|
||||
OPT_STAT_INC(optimizer_failure_reason_no_memory);
|
||||
DPRINTF(1, "out of space for symbolic expression type\n");
|
||||
return NULL;
|
||||
}
|
||||
ctx->t_arena.ty_curr_number++;
|
||||
self->const_val = NULL;
|
||||
self->typ = NULL;
|
||||
self->flags = 0;
|
||||
|
||||
if (const_val != NULL) {
|
||||
self->const_val = Py_NewRef(const_val);
|
||||
}
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
static inline void
|
||||
sym_set_flag(_Py_UOpsSymType *sym, int flag)
|
||||
{
|
||||
sym->flags |= flag;
|
||||
}
|
||||
|
||||
static inline void
|
||||
sym_clear_flag(_Py_UOpsSymType *sym, int flag)
|
||||
{
|
||||
sym->flags &= (~flag);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
sym_has_flag(_Py_UOpsSymType *sym, int flag)
|
||||
{
|
||||
return (sym->flags & flag) != 0;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
sym_is_known(_Py_UOpsSymType *sym)
|
||||
{
|
||||
return sym_has_flag(sym, KNOWN);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
sym_is_not_null(_Py_UOpsSymType *sym)
|
||||
{
|
||||
return (sym->flags & (IS_NULL | NOT_NULL)) == NOT_NULL;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
sym_is_null(_Py_UOpsSymType *sym)
|
||||
{
|
||||
return (sym->flags & (IS_NULL | NOT_NULL)) == IS_NULL;
|
||||
}
|
||||
|
||||
static inline void
|
||||
sym_set_type(_Py_UOpsSymType *sym, PyTypeObject *tp)
|
||||
{
|
||||
assert(PyType_Check(tp));
|
||||
sym->typ = tp;
|
||||
sym_set_flag(sym, KNOWN);
|
||||
sym_set_flag(sym, NOT_NULL);
|
||||
}
|
||||
|
||||
static inline void
|
||||
sym_set_null(_Py_UOpsSymType *sym)
|
||||
{
|
||||
sym_set_flag(sym, IS_NULL);
|
||||
sym_set_flag(sym, KNOWN);
|
||||
}
|
||||
|
||||
|
||||
static inline _Py_UOpsSymType*
|
||||
sym_new_unknown(_Py_UOpsAbstractInterpContext *ctx)
|
||||
{
|
||||
return sym_new(ctx,NULL);
|
||||
}
|
||||
|
||||
static inline _Py_UOpsSymType*
|
||||
sym_new_known_notnull(_Py_UOpsAbstractInterpContext *ctx)
|
||||
{
|
||||
_Py_UOpsSymType *res = sym_new_unknown(ctx);
|
||||
if (res == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
sym_set_flag(res, NOT_NULL);
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline _Py_UOpsSymType*
|
||||
sym_new_known_type(_Py_UOpsAbstractInterpContext *ctx,
|
||||
PyTypeObject *typ)
|
||||
{
|
||||
_Py_UOpsSymType *res = sym_new(ctx,NULL);
|
||||
if (res == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
sym_set_type(res, typ);
|
||||
return res;
|
||||
}
|
||||
|
||||
// Takes a borrowed reference to const_val.
|
||||
static inline _Py_UOpsSymType*
|
||||
sym_new_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val)
|
||||
{
|
||||
assert(const_val != NULL);
|
||||
_Py_UOpsSymType *temp = sym_new(
|
||||
ctx,
|
||||
const_val
|
||||
);
|
||||
if (temp == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
sym_set_type(temp, Py_TYPE(const_val));
|
||||
sym_set_flag(temp, TRUE_CONST);
|
||||
sym_set_flag(temp, KNOWN);
|
||||
sym_set_flag(temp, NOT_NULL);
|
||||
return temp;
|
||||
}
|
||||
|
||||
static _Py_UOpsSymType*
|
||||
sym_new_null(_Py_UOpsAbstractInterpContext *ctx)
|
||||
{
|
||||
_Py_UOpsSymType *null_sym = sym_new_unknown(ctx);
|
||||
if (null_sym == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
sym_set_null(null_sym);
|
||||
return null_sym;
|
||||
}
|
||||
|
||||
|
||||
static inline bool
|
||||
sym_matches_type(_Py_UOpsSymType *sym, PyTypeObject *typ)
|
||||
{
|
||||
assert(typ == NULL || PyType_Check(typ));
|
||||
if (!sym_has_flag(sym, KNOWN)) {
|
||||
return false;
|
||||
}
|
||||
return sym->typ == typ;
|
||||
}
|
||||
|
||||
|
||||
static inline bool
|
||||
op_is_end(uint32_t opcode)
|
||||
{
|
||||
return opcode == _EXIT_TRACE || opcode == _JUMP_TO_TOP;
|
||||
}
|
||||
|
||||
static int
|
||||
get_mutations(PyObject* dict) {
|
||||
|
@ -199,14 +555,138 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
|
|||
builtins = func->func_builtins;
|
||||
break;
|
||||
}
|
||||
case _JUMP_TO_TOP:
|
||||
case _EXIT_TRACE:
|
||||
default:
|
||||
if (op_is_end(opcode)) {
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define STACK_LEVEL() ((int)(stack_pointer - ctx->frame->stack))
|
||||
|
||||
#define GETLOCAL(idx) ((ctx->frame->locals[idx]))
|
||||
|
||||
#define REPLACE_OP(INST, OP, ARG, OPERAND) \
|
||||
INST->opcode = OP; \
|
||||
INST->oparg = ARG; \
|
||||
INST->operand = OPERAND;
|
||||
|
||||
#define _LOAD_ATTR_NOT_NULL \
|
||||
do { \
|
||||
attr = sym_new_known_notnull(ctx); \
|
||||
if (attr == NULL) { \
|
||||
goto error; \
|
||||
} \
|
||||
null = sym_new_null(ctx); \
|
||||
if (null == NULL) { \
|
||||
goto error; \
|
||||
} \
|
||||
} while (0);
|
||||
|
||||
|
||||
/* 1 for success, 0 for not ready, cannot error at the moment. */
|
||||
static int
|
||||
uop_redundancy_eliminator(
|
||||
PyCodeObject *co,
|
||||
_PyUOpInstruction *trace,
|
||||
int trace_len,
|
||||
int curr_stacklen
|
||||
)
|
||||
{
|
||||
|
||||
_Py_UOpsAbstractInterpContext context;
|
||||
_Py_UOpsAbstractInterpContext *ctx = &context;
|
||||
|
||||
if (abstractcontext_init(
|
||||
ctx,
|
||||
co, curr_stacklen,
|
||||
trace_len) < 0) {
|
||||
goto out_of_space;
|
||||
}
|
||||
|
||||
for (_PyUOpInstruction *this_instr = trace;
|
||||
this_instr < trace + trace_len && !op_is_end(this_instr->opcode);
|
||||
this_instr++) {
|
||||
|
||||
int oparg = this_instr->oparg;
|
||||
uint32_t opcode = this_instr->opcode;
|
||||
|
||||
_Py_UOpsSymType **stack_pointer = ctx->frame->stack_pointer;
|
||||
|
||||
DPRINTF(3, "Abstract interpreting %s:%d ",
|
||||
_PyOpcode_uop_name[opcode],
|
||||
oparg);
|
||||
switch (opcode) {
|
||||
#include "tier2_redundancy_eliminator_cases.c.h"
|
||||
|
||||
default:
|
||||
DPRINTF(1, "Unknown opcode in abstract interpreter\n");
|
||||
Py_UNREACHABLE();
|
||||
}
|
||||
assert(ctx->frame != NULL);
|
||||
DPRINTF(3, " stack_level %d\n", STACK_LEVEL());
|
||||
ctx->frame->stack_pointer = stack_pointer;
|
||||
assert(STACK_LEVEL() >= 0);
|
||||
}
|
||||
|
||||
abstractcontext_fini(ctx);
|
||||
return 1;
|
||||
|
||||
out_of_space:
|
||||
DPRINTF(1, "Out of space in abstract interpreter\n");
|
||||
abstractcontext_fini(ctx);
|
||||
return 0;
|
||||
|
||||
error:
|
||||
DPRINTF(1, "Encountered error in abstract interpreter\n");
|
||||
abstractcontext_fini(ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
|
||||
{
|
||||
int last_set_ip = -1;
|
||||
bool maybe_invalid = false;
|
||||
for (int pc = 0; pc < buffer_size; pc++) {
|
||||
int opcode = buffer[pc].opcode;
|
||||
if (opcode == _SET_IP) {
|
||||
buffer[pc].opcode = NOP;
|
||||
last_set_ip = pc;
|
||||
}
|
||||
else if (opcode == _CHECK_VALIDITY) {
|
||||
if (maybe_invalid) {
|
||||
maybe_invalid = false;
|
||||
}
|
||||
else {
|
||||
buffer[pc].opcode = NOP;
|
||||
}
|
||||
}
|
||||
else if (op_is_end(opcode)) {
|
||||
break;
|
||||
}
|
||||
else {
|
||||
if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) {
|
||||
maybe_invalid = true;
|
||||
if (last_set_ip >= 0) {
|
||||
buffer[last_set_ip].opcode = _SET_IP;
|
||||
}
|
||||
}
|
||||
if ((_PyUop_Flags[opcode] & HAS_ERROR_FLAG) || opcode == _PUSH_FRAME) {
|
||||
if (last_set_ip >= 0) {
|
||||
buffer[last_set_ip].opcode = _SET_IP;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size)
|
||||
{
|
||||
|
@ -250,44 +730,9 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
|
||||
{
|
||||
int last_set_ip = -1;
|
||||
bool maybe_invalid = false;
|
||||
for (int pc = 0; pc < buffer_size; pc++) {
|
||||
int opcode = buffer[pc].opcode;
|
||||
if (opcode == _SET_IP) {
|
||||
buffer[pc].opcode = NOP;
|
||||
last_set_ip = pc;
|
||||
}
|
||||
else if (opcode == _CHECK_VALIDITY) {
|
||||
if (maybe_invalid) {
|
||||
maybe_invalid = false;
|
||||
}
|
||||
else {
|
||||
buffer[pc].opcode = NOP;
|
||||
}
|
||||
}
|
||||
else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) {
|
||||
break;
|
||||
}
|
||||
else {
|
||||
if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) {
|
||||
maybe_invalid = true;
|
||||
if (last_set_ip >= 0) {
|
||||
buffer[last_set_ip].opcode = _SET_IP;
|
||||
}
|
||||
}
|
||||
if ((_PyUop_Flags[opcode] & HAS_ERROR_FLAG) || opcode == _PUSH_FRAME) {
|
||||
if (last_set_ip >= 0) {
|
||||
buffer[last_set_ip].opcode = _SET_IP;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 0 - failure, no error raised, just fall back to Tier 1
|
||||
// -1 - failure, and raise error
|
||||
// 1 - optimizer success
|
||||
int
|
||||
_Py_uop_analyze_and_optimize(
|
||||
_PyInterpreterFrame *frame,
|
||||
|
@ -297,11 +742,33 @@ _Py_uop_analyze_and_optimize(
|
|||
_PyBloomFilter *dependencies
|
||||
)
|
||||
{
|
||||
OPT_STAT_INC(optimizer_attempts);
|
||||
|
||||
int err = remove_globals(frame, buffer, buffer_size, dependencies);
|
||||
if (err <= 0) {
|
||||
return err;
|
||||
if (err == 0) {
|
||||
goto not_ready;
|
||||
}
|
||||
if (err < 0) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
peephole_opt(frame, buffer, buffer_size);
|
||||
remove_unneeded_uops(buffer, buffer_size);
|
||||
return 1;
|
||||
|
||||
err = uop_redundancy_eliminator(
|
||||
(PyCodeObject *)frame->f_executable, buffer,
|
||||
buffer_size, curr_stacklen);
|
||||
|
||||
if (err == 0) {
|
||||
goto not_ready;
|
||||
}
|
||||
assert(err == 1);
|
||||
|
||||
remove_unneeded_uops(buffer, buffer_size);
|
||||
|
||||
OPT_STAT_INC(optimizer_successes);
|
||||
return 1;
|
||||
not_ready:
|
||||
return 0;
|
||||
error:
|
||||
return -1;
|
||||
}
|
||||
|
|
|
@ -240,6 +240,11 @@ print_optimization_stats(FILE *out, OptimizationStats *stats)
|
|||
print_histogram(out, "Trace run length", stats->trace_run_length_hist);
|
||||
print_histogram(out, "Optimized trace length", stats->optimized_trace_length_hist);
|
||||
|
||||
fprintf(out, "Optimization optimizer attempts: %" PRIu64 "\n", stats->optimizer_attempts);
|
||||
fprintf(out, "Optimization optimizer successes: %" PRIu64 "\n", stats->optimizer_successes);
|
||||
fprintf(out, "Optimization optimizer failure no memory: %" PRIu64 "\n",
|
||||
stats->optimizer_failure_reason_no_memory);
|
||||
|
||||
const char* const* names;
|
||||
for (int i = 0; i < 512; i++) {
|
||||
if (i < 256) {
|
||||
|
|
|
@ -0,0 +1,272 @@
|
|||
#include "Python.h"
|
||||
#include "pycore_uops.h"
|
||||
#include "pycore_uop_ids.h"
|
||||
|
||||
#define op(name, ...) /* NAME is ignored */
|
||||
|
||||
typedef struct _Py_UOpsSymType _Py_UOpsSymType;
|
||||
typedef struct _Py_UOpsAbstractInterpContext _Py_UOpsAbstractInterpContext;
|
||||
typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;
|
||||
|
||||
static int
|
||||
dummy_func(void) {
|
||||
|
||||
PyCodeObject *code;
|
||||
int oparg;
|
||||
_Py_UOpsSymType *flag;
|
||||
_Py_UOpsSymType *left;
|
||||
_Py_UOpsSymType *right;
|
||||
_Py_UOpsSymType *value;
|
||||
_Py_UOpsSymType *res;
|
||||
_Py_UOpsSymType *iter;
|
||||
_Py_UOpsSymType *top;
|
||||
_Py_UOpsSymType *bottom;
|
||||
_Py_UOpsAbstractFrame *frame;
|
||||
_Py_UOpsAbstractInterpContext *ctx;
|
||||
_PyUOpInstruction *this_instr;
|
||||
_PyBloomFilter *dependencies;
|
||||
int modified;
|
||||
|
||||
// BEGIN BYTECODES //
|
||||
|
||||
op(_LOAD_FAST_CHECK, (-- value)) {
|
||||
value = GETLOCAL(oparg);
|
||||
// We guarantee this will error - just bail and don't optimize it.
|
||||
if (sym_is_null(value)) {
|
||||
goto out_of_space;
|
||||
}
|
||||
}
|
||||
|
||||
op(_LOAD_FAST, (-- value)) {
|
||||
value = GETLOCAL(oparg);
|
||||
}
|
||||
|
||||
op(_LOAD_FAST_AND_CLEAR, (-- value)) {
|
||||
value = GETLOCAL(oparg);
|
||||
_Py_UOpsSymType *temp = sym_new_null(ctx);
|
||||
if (temp == NULL) {
|
||||
goto out_of_space;
|
||||
}
|
||||
GETLOCAL(oparg) = temp;
|
||||
}
|
||||
|
||||
op(_STORE_FAST, (value --)) {
|
||||
GETLOCAL(oparg) = value;
|
||||
}
|
||||
|
||||
op(_PUSH_NULL, (-- res)) {
|
||||
res = sym_new_null(ctx);
|
||||
if (res == NULL) {
|
||||
goto out_of_space;
|
||||
};
|
||||
}
|
||||
|
||||
op(_GUARD_BOTH_INT, (left, right -- left, right)) {
|
||||
if (sym_matches_type(left, &PyLong_Type) &&
|
||||
sym_matches_type(right, &PyLong_Type)) {
|
||||
REPLACE_OP(this_instr, _NOP, 0, 0);
|
||||
}
|
||||
sym_set_type(left, &PyLong_Type);
|
||||
sym_set_type(right, &PyLong_Type);
|
||||
}
|
||||
|
||||
op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) {
|
||||
if (sym_matches_type(left, &PyFloat_Type) &&
|
||||
sym_matches_type(right, &PyFloat_Type)) {
|
||||
REPLACE_OP(this_instr, _NOP, 0 ,0);
|
||||
}
|
||||
sym_set_type(left, &PyFloat_Type);
|
||||
sym_set_type(right, &PyFloat_Type);
|
||||
}
|
||||
|
||||
|
||||
op(_BINARY_OP_ADD_INT, (left, right -- res)) {
|
||||
// TODO constant propagation
|
||||
(void)left;
|
||||
(void)right;
|
||||
res = sym_new_known_type(ctx, &PyLong_Type);
|
||||
if (res == NULL) {
|
||||
goto out_of_space;
|
||||
}
|
||||
}
|
||||
|
||||
op(_LOAD_CONST, (-- value)) {
|
||||
// There should be no LOAD_CONST. It should be all
|
||||
// replaced by peephole_opt.
|
||||
Py_UNREACHABLE();
|
||||
}
|
||||
|
||||
op(_LOAD_CONST_INLINE, (ptr/4 -- value)) {
|
||||
value = sym_new_const(ctx, ptr);
|
||||
if (value == NULL) {
|
||||
goto out_of_space;
|
||||
}
|
||||
}
|
||||
|
||||
op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) {
|
||||
value = sym_new_const(ctx, ptr);
|
||||
if (value == NULL) {
|
||||
goto out_of_space;
|
||||
}
|
||||
}
|
||||
|
||||
op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) {
|
||||
value = sym_new_const(ctx, ptr);
|
||||
if (value == NULL) {
|
||||
goto out_of_space;
|
||||
}
|
||||
null = sym_new_null(ctx);
|
||||
if (null == NULL) {
|
||||
goto out_of_space;
|
||||
}
|
||||
}
|
||||
|
||||
op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) {
|
||||
value = sym_new_const(ctx, ptr);
|
||||
if (value == NULL) {
|
||||
goto out_of_space;
|
||||
}
|
||||
null = sym_new_null(ctx);
|
||||
if (null == NULL) {
|
||||
goto out_of_space;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
op(_COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) {
|
||||
assert(oparg > 0);
|
||||
top = bottom;
|
||||
}
|
||||
|
||||
op(_SWAP, (bottom, unused[oparg-2], top --
|
||||
top, unused[oparg-2], bottom)) {
|
||||
}
|
||||
|
||||
op(_LOAD_ATTR_INSTANCE_VALUE, (index/1, owner -- attr, null if (oparg & 1))) {
|
||||
_LOAD_ATTR_NOT_NULL
|
||||
(void)index;
|
||||
(void)owner;
|
||||
}
|
||||
|
||||
op(_LOAD_ATTR_MODULE, (index/1, owner -- attr, null if (oparg & 1))) {
|
||||
_LOAD_ATTR_NOT_NULL
|
||||
(void)index;
|
||||
(void)owner;
|
||||
}
|
||||
|
||||
op(_LOAD_ATTR_WITH_HINT, (hint/1, owner -- attr, null if (oparg & 1))) {
|
||||
_LOAD_ATTR_NOT_NULL
|
||||
(void)hint;
|
||||
(void)owner;
|
||||
}
|
||||
|
||||
op(_LOAD_ATTR_SLOT, (index/1, owner -- attr, null if (oparg & 1))) {
|
||||
_LOAD_ATTR_NOT_NULL
|
||||
(void)index;
|
||||
(void)owner;
|
||||
}
|
||||
|
||||
op(_LOAD_ATTR_CLASS, (descr/4, owner -- attr, null if (oparg & 1))) {
|
||||
_LOAD_ATTR_NOT_NULL
|
||||
(void)descr;
|
||||
(void)owner;
|
||||
}
|
||||
|
||||
op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) {
|
||||
sym_set_type(callable, &PyFunction_Type);
|
||||
(void)self_or_null;
|
||||
(void)func_version;
|
||||
}
|
||||
|
||||
op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- callable, null, unused[oparg])) {
|
||||
sym_set_null(null);
|
||||
sym_set_type(callable, &PyMethod_Type);
|
||||
}
|
||||
|
||||
op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) {
|
||||
int argcount = oparg;
|
||||
|
||||
(void)callable;
|
||||
|
||||
PyFunctionObject *func = (PyFunctionObject *)(this_instr + 2)->operand;
|
||||
if (func == NULL) {
|
||||
goto error;
|
||||
}
|
||||
PyCodeObject *co = (PyCodeObject *)func->func_code;
|
||||
|
||||
assert(self_or_null != NULL);
|
||||
assert(args != NULL);
|
||||
if (sym_is_not_null(self_or_null)) {
|
||||
// Bound method fiddling, same as _INIT_CALL_PY_EXACT_ARGS in VM
|
||||
args--;
|
||||
argcount++;
|
||||
}
|
||||
|
||||
_Py_UOpsSymType **localsplus_start = ctx->n_consumed;
|
||||
int n_locals_already_filled = 0;
|
||||
// Can determine statically, so we interleave the new locals
|
||||
// and make the current stack the new locals.
|
||||
// This also sets up for true call inlining.
|
||||
if (sym_is_known(self_or_null)) {
|
||||
localsplus_start = args;
|
||||
n_locals_already_filled = argcount;
|
||||
}
|
||||
new_frame = ctx_frame_new(ctx, co, localsplus_start, n_locals_already_filled, 0);
|
||||
if (new_frame == NULL){
|
||||
goto out_of_space;
|
||||
}
|
||||
}
|
||||
|
||||
op(_POP_FRAME, (retval -- res)) {
|
||||
SYNC_SP();
|
||||
ctx->frame->stack_pointer = stack_pointer;
|
||||
ctx_frame_pop(ctx);
|
||||
stack_pointer = ctx->frame->stack_pointer;
|
||||
res = retval;
|
||||
}
|
||||
|
||||
op(_PUSH_FRAME, (new_frame: _Py_UOpsAbstractFrame * -- unused if (0))) {
|
||||
SYNC_SP();
|
||||
ctx->frame->stack_pointer = stack_pointer;
|
||||
ctx->frame = new_frame;
|
||||
ctx->curr_frame_depth++;
|
||||
stack_pointer = new_frame->stack_pointer;
|
||||
}
|
||||
|
||||
op(_UNPACK_SEQUENCE, (seq -- values[oparg])) {
|
||||
/* This has to be done manually */
|
||||
(void)seq;
|
||||
for (int i = 0; i < oparg; i++) {
|
||||
values[i] = sym_new_unknown(ctx);
|
||||
if (values[i] == NULL) {
|
||||
goto out_of_space;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
op(_UNPACK_EX, (seq -- values[oparg & 0xFF], unused, unused[oparg >> 8])) {
|
||||
/* This has to be done manually */
|
||||
(void)seq;
|
||||
int totalargs = (oparg & 0xFF) + (oparg >> 8) + 1;
|
||||
for (int i = 0; i < totalargs; i++) {
|
||||
values[i] = sym_new_unknown(ctx);
|
||||
if (values[i] == NULL) {
|
||||
goto out_of_space;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
op(_ITER_NEXT_RANGE, (iter -- iter, next)) {
|
||||
next = sym_new_known_type(ctx, &PyLong_Type);
|
||||
if (next == NULL) {
|
||||
goto out_of_space;
|
||||
}
|
||||
(void)iter;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// END BYTECODES //
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -83,9 +83,11 @@ Python/deepfreeze/*.c
|
|||
Python/frozen_modules/*.h
|
||||
Python/generated_cases.c.h
|
||||
Python/executor_cases.c.h
|
||||
Python/tier2_redundancy_eliminator_cases.c.h
|
||||
|
||||
# not actually source
|
||||
Python/bytecodes.c
|
||||
Python/tier2_redundancy_eliminator_bytecodes.c
|
||||
|
||||
# mimalloc
|
||||
Objects/mimalloc/*.c
|
||||
|
|
|
@ -734,6 +734,6 @@ Modules/expat/xmlrole.c - error -
|
|||
## other
|
||||
Modules/_io/_iomodule.c - _PyIO_Module -
|
||||
Modules/_sqlite/module.c - _sqlite3module -
|
||||
Python/optimizer_analysis.c - _Py_PartitionRootNode_Type -
|
||||
Python/optimizer_analysis.c - _Py_UOpsAbstractFrame_Type -
|
||||
Python/optimizer_analysis.c - _Py_UOpsAbstractInterpContext_Type -
|
||||
Modules/clinic/md5module.c.h _md5_md5 _keywords -
|
||||
|
|
Can't render this file because it has a wrong number of fields in line 4.
|
|
@ -13,6 +13,9 @@ What's currently here:
|
|||
- `parser.py` helper for interactions with `parsing.py`
|
||||
- `tierN_generator.py`: a couple of driver scripts to read `Python/bytecodes.c` and
|
||||
write `Python/generated_cases.c.h` (and several other files)
|
||||
- `tier2_abstract_generator.py`: reads `Python/bytecodes.c` and
|
||||
`Python/tier2_redundancy_eliminator_bytecodes.c` and writes
|
||||
`Python/tier2_redundancy_eliminator_cases.c.h`
|
||||
- `stack.py`: code to handle generalized stack effects
|
||||
- `cwriter.py`: code which understands tokens and how to format C code;
|
||||
main class: `CWriter`
|
||||
|
|
|
@ -24,7 +24,6 @@ class Properties:
|
|||
|
||||
pure: bool
|
||||
passthrough: bool
|
||||
guard: bool
|
||||
|
||||
def dump(self, indent: str) -> None:
|
||||
print(indent, end="")
|
||||
|
@ -51,7 +50,6 @@ class Properties:
|
|||
has_free=any(p.has_free for p in properties),
|
||||
pure=all(p.pure for p in properties),
|
||||
passthrough=all(p.passthrough for p in properties),
|
||||
guard=all(p.guard for p in properties),
|
||||
)
|
||||
|
||||
|
||||
|
@ -73,7 +71,6 @@ SKIP_PROPERTIES = Properties(
|
|||
has_free=False,
|
||||
pure=False,
|
||||
passthrough=False,
|
||||
guard=False,
|
||||
)
|
||||
|
||||
|
||||
|
@ -273,7 +270,7 @@ def override_error(
|
|||
|
||||
def convert_stack_item(item: parser.StackEffect) -> StackItem:
|
||||
return StackItem(
|
||||
item.name, item.type, item.cond, (item.size or "1"), type_prop=item.type_prop
|
||||
item.name, item.type, item.cond, (item.size or "1")
|
||||
)
|
||||
|
||||
|
||||
|
@ -473,7 +470,6 @@ def compute_properties(op: parser.InstDef) -> Properties:
|
|||
has_free=has_free,
|
||||
pure="pure" in op.annotations,
|
||||
passthrough=passthrough,
|
||||
guard=passthrough and deopts,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -109,10 +109,7 @@ and a piece of C code describing its semantics::
|
|||
NAME [":" type] [ "if" "(" C-expression ")" ]
|
||||
|
||||
type:
|
||||
NAME ["*"] | type_prop
|
||||
|
||||
type_prop:
|
||||
"&" "(" NAME ["+" NAME] ")"
|
||||
NAME ["*"]
|
||||
|
||||
stream:
|
||||
NAME "/" size
|
||||
|
@ -142,26 +139,7 @@ The following definitions may occur:
|
|||
The optional `type` in an `object` is the C type. It defaults to `PyObject *`.
|
||||
The objects before the "--" are the objects on top of the stack at the start of
|
||||
the instruction. Those after the "--" are the objects on top of the stack at the
|
||||
end of the instruction. When prefixed by a `&`, the `type` production rule follows the
|
||||
`type_prop` production rule. This indicates the type of the value is of that specific type
|
||||
after the operation. In this case, the type may also contain 64-bit refinement information
|
||||
that is fetched from a previously defined operand in the instruction header, such as
|
||||
a type version tag. This follows the format `type + refinement`. The list of possible types
|
||||
and their refinements are below. They obey the following predicates:
|
||||
|
||||
|
||||
* `PYLONG_TYPE`: `Py_TYPE(val) == &PyLong_Type`
|
||||
* `PYFLOAT_TYPE`: `Py_TYPE(val) == &PyFloat_Type`
|
||||
* `PYUNICODE_TYPE`: `Py_TYPE(val) == &PYUNICODE_TYPE`
|
||||
* `NULL_TYPE`: `val == NULL`
|
||||
* `GUARD_TYPE_VERSION_TYPE`: `type->tp_version_tag == auxillary`
|
||||
* `GUARD_DORV_VALUES_TYPE`: `_PyDictOrValues_IsValues(obj)`
|
||||
* `GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE`:
|
||||
`_PyDictOrValues_IsValues(obj) || _PyObject_MakeInstanceAttributesFromDict(obj, dorv)`
|
||||
* `GUARD_KEYS_VERSION_TYPE`: `owner_heap_type->ht_cached_keys->dk_version == auxillary`
|
||||
* `PYMETHOD_TYPE`: `Py_TYPE(val) == &PyMethod_Type`
|
||||
* `PYFUNCTION_TYPE_VERSION_TYPE`:
|
||||
`PyFunction_Check(callable) && func->func_version == auxillary && code->co_argcount == oparg + (self_or_null != NULL)`
|
||||
end of the instruction.
|
||||
|
||||
|
||||
An `inst` without `stack_effect` is a transitional form to allow the original C code
|
||||
|
|
|
@ -75,11 +75,6 @@ class StackEffect(Node):
|
|||
size: str = "" # Optional `[size]`
|
||||
# Note: size cannot be combined with type or cond
|
||||
|
||||
# Optional `(type, refinement)`
|
||||
type_prop: None | tuple[str, None | str] = field(
|
||||
default_factory=lambda: None, init=True, compare=False, hash=False
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
items = [self.name, self.type, self.cond, self.size]
|
||||
while items and items[-1] == "":
|
||||
|
@ -260,25 +255,14 @@ class Parser(PLexer):
|
|||
|
||||
@contextual
|
||||
def stack_effect(self) -> StackEffect | None:
|
||||
# IDENTIFIER [':' [IDENTIFIER [TIMES]] ['&' '(' IDENTIFIER ['+' IDENTIFIER] ')']] ['if' '(' expression ')']
|
||||
# IDENTIFIER [':' IDENTIFIER [TIMES]] ['if' '(' expression ')']
|
||||
# | IDENTIFIER '[' expression ']'
|
||||
if tkn := self.expect(lx.IDENTIFIER):
|
||||
type_text = ""
|
||||
type_prop = None
|
||||
if self.expect(lx.COLON):
|
||||
if i := self.expect(lx.IDENTIFIER):
|
||||
type_text = i.text.strip()
|
||||
type_text = self.require(lx.IDENTIFIER).text.strip()
|
||||
if self.expect(lx.TIMES):
|
||||
type_text += " *"
|
||||
if self.expect(lx.AND):
|
||||
consumed_bracket = self.expect(lx.LPAREN) is not None
|
||||
type_prop_text = self.require(lx.IDENTIFIER).text.strip()
|
||||
refinement = None
|
||||
if self.expect(lx.PLUS):
|
||||
refinement = self.require(lx.IDENTIFIER).text.strip()
|
||||
type_prop = (type_prop_text, refinement)
|
||||
if consumed_bracket:
|
||||
self.require(lx.RPAREN)
|
||||
cond_text = ""
|
||||
if self.expect(lx.IF):
|
||||
self.require(lx.LPAREN)
|
||||
|
@ -295,7 +279,7 @@ class Parser(PLexer):
|
|||
self.require(lx.RBRACKET)
|
||||
type_text = "PyObject **"
|
||||
size_text = size.text.strip()
|
||||
return StackEffect(tkn.text, type_text, cond_text, size_text, type_prop)
|
||||
return StackEffect(tkn.text, type_text, cond_text, size_text)
|
||||
return None
|
||||
|
||||
@contextual
|
||||
|
|
|
@ -168,11 +168,11 @@ class Stack:
|
|||
self.top_offset.push(var)
|
||||
return ""
|
||||
|
||||
def flush(self, out: CWriter) -> None:
|
||||
def flush(self, out: CWriter, cast_type: str = "PyObject *") -> None:
|
||||
out.start_line()
|
||||
for var in self.variables:
|
||||
if not var.peek:
|
||||
cast = "(PyObject *)" if var.type else ""
|
||||
cast = f"({cast_type})" if var.type else ""
|
||||
if var.name not in UNUSED and not var.is_array():
|
||||
if var.condition:
|
||||
out.emit(f"if ({var.condition}) ")
|
||||
|
|
|
@ -0,0 +1,235 @@
|
|||
"""Generate the cases for the tier 2 redundancy eliminator/abstract interpreter.
|
||||
Reads the instruction definitions from bytecodes.c. and tier2_redundancy_eliminator.bytecodes.c
|
||||
Writes the cases to tier2_redundancy_eliminator_cases.c.h, which is #included in Python/optimizer_analysis.c.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os.path
|
||||
import sys
|
||||
|
||||
from analyzer import (
|
||||
Analysis,
|
||||
Instruction,
|
||||
Uop,
|
||||
Part,
|
||||
analyze_files,
|
||||
Skip,
|
||||
StackItem,
|
||||
analysis_error,
|
||||
)
|
||||
from generators_common import (
|
||||
DEFAULT_INPUT,
|
||||
ROOT,
|
||||
write_header,
|
||||
emit_tokens,
|
||||
emit_to,
|
||||
replace_sync_sp,
|
||||
)
|
||||
from cwriter import CWriter
|
||||
from typing import TextIO, Iterator
|
||||
from lexer import Token
|
||||
from stack import StackOffset, Stack, SizeMismatch, UNUSED
|
||||
|
||||
DEFAULT_OUTPUT = ROOT / "Python/tier2_redundancy_eliminator_cases.c.h"
|
||||
DEFAULT_ABSTRACT_INPUT = ROOT / "Python/tier2_redundancy_eliminator_bytecodes.c"
|
||||
|
||||
|
||||
def validate_uop(override: Uop, uop: Uop) -> None:
|
||||
# To do
|
||||
pass
|
||||
|
||||
|
||||
def type_name(var: StackItem) -> str:
|
||||
if var.is_array():
|
||||
return f"_Py_UOpsSymType **"
|
||||
if var.type:
|
||||
return var.type
|
||||
return f"_Py_UOpsSymType *"
|
||||
|
||||
|
||||
def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None:
|
||||
variables = {"unused"}
|
||||
if not skip_inputs:
|
||||
for var in reversed(uop.stack.inputs):
|
||||
if var.name not in variables:
|
||||
variables.add(var.name)
|
||||
if var.condition:
|
||||
out.emit(f"{type_name(var)}{var.name} = NULL;\n")
|
||||
else:
|
||||
out.emit(f"{type_name(var)}{var.name};\n")
|
||||
for var in uop.stack.outputs:
|
||||
if var.peek:
|
||||
continue
|
||||
if var.name not in variables:
|
||||
variables.add(var.name)
|
||||
if var.condition:
|
||||
out.emit(f"{type_name(var)}{var.name} = NULL;\n")
|
||||
else:
|
||||
out.emit(f"{type_name(var)}{var.name};\n")
|
||||
|
||||
|
||||
def decref_inputs(
|
||||
out: CWriter,
|
||||
tkn: Token,
|
||||
tkn_iter: Iterator[Token],
|
||||
uop: Uop,
|
||||
stack: Stack,
|
||||
inst: Instruction | None,
|
||||
) -> None:
|
||||
next(tkn_iter)
|
||||
next(tkn_iter)
|
||||
next(tkn_iter)
|
||||
out.emit_at("", tkn)
|
||||
|
||||
|
||||
def emit_default(out: CWriter, uop: Uop) -> None:
|
||||
for i, var in enumerate(uop.stack.outputs):
|
||||
if var.name != "unused" and not var.peek:
|
||||
if var.is_array():
|
||||
out.emit(f"for (int _i = {var.size}; --_i >= 0;) {{\n")
|
||||
out.emit(f"{var.name}[_i] = sym_new_unknown(ctx);\n")
|
||||
out.emit(f"if ({var.name}[_i] == NULL) goto out_of_space;\n")
|
||||
out.emit("}\n")
|
||||
elif var.name == "null":
|
||||
out.emit(f"{var.name} = sym_new_null(ctx);\n")
|
||||
out.emit(f"if ({var.name} == NULL) goto out_of_space;\n")
|
||||
else:
|
||||
out.emit(f"{var.name} = sym_new_unknown(ctx);\n")
|
||||
out.emit(f"if ({var.name} == NULL) goto out_of_space;\n")
|
||||
|
||||
|
||||
def write_uop(
|
||||
override: Uop | None,
|
||||
uop: Uop,
|
||||
out: CWriter,
|
||||
stack: Stack,
|
||||
debug: bool,
|
||||
skip_inputs: bool,
|
||||
) -> None:
|
||||
try:
|
||||
prototype = override if override else uop
|
||||
is_override = override is not None
|
||||
out.start_line()
|
||||
for var in reversed(prototype.stack.inputs):
|
||||
res = stack.pop(var)
|
||||
if not skip_inputs:
|
||||
out.emit(res)
|
||||
if not prototype.properties.stores_sp:
|
||||
for i, var in enumerate(prototype.stack.outputs):
|
||||
res = stack.push(var)
|
||||
if not var.peek or is_override:
|
||||
out.emit(res)
|
||||
if debug:
|
||||
args = []
|
||||
for var in prototype.stack.inputs:
|
||||
if not var.peek or is_override:
|
||||
args.append(var.name)
|
||||
out.emit(f'DEBUG_PRINTF({", ".join(args)});\n')
|
||||
if override:
|
||||
for cache in uop.caches:
|
||||
if cache.name != "unused":
|
||||
if cache.size == 4:
|
||||
type = cast = "PyObject *"
|
||||
else:
|
||||
type = f"uint{cache.size*16}_t "
|
||||
cast = f"uint{cache.size*16}_t"
|
||||
out.emit(f"{type}{cache.name} = ({cast})this_instr->operand;\n")
|
||||
if override:
|
||||
replacement_funcs = {
|
||||
"DECREF_INPUTS": decref_inputs,
|
||||
"SYNC_SP": replace_sync_sp,
|
||||
}
|
||||
emit_tokens(out, override, stack, None, replacement_funcs)
|
||||
else:
|
||||
emit_default(out, uop)
|
||||
|
||||
if prototype.properties.stores_sp:
|
||||
for i, var in enumerate(prototype.stack.outputs):
|
||||
if not var.peek or is_override:
|
||||
out.emit(stack.push(var))
|
||||
out.start_line()
|
||||
stack.flush(out, cast_type="_Py_UOpsSymType *")
|
||||
except SizeMismatch as ex:
|
||||
raise analysis_error(ex.args[0], uop.body[0])
|
||||
|
||||
|
||||
SKIPS = ("_EXTENDED_ARG",)
|
||||
|
||||
|
||||
def generate_abstract_interpreter(
|
||||
filenames: list[str],
|
||||
abstract: Analysis,
|
||||
base: Analysis,
|
||||
outfile: TextIO,
|
||||
debug: bool,
|
||||
) -> None:
|
||||
write_header(__file__, filenames, outfile)
|
||||
out = CWriter(outfile, 2, False)
|
||||
out.emit("\n")
|
||||
base_uop_names = set([uop.name for uop in base.uops.values()])
|
||||
for abstract_uop_name in abstract.uops:
|
||||
assert abstract_uop_name in base_uop_names,\
|
||||
f"All abstract uops should override base uops, but {abstract_uop_name} is not."
|
||||
|
||||
for uop in base.uops.values():
|
||||
override: Uop | None = None
|
||||
if uop.name in abstract.uops:
|
||||
override = abstract.uops[uop.name]
|
||||
validate_uop(override, uop)
|
||||
if uop.properties.tier_one_only:
|
||||
continue
|
||||
if uop.is_super():
|
||||
continue
|
||||
if not uop.is_viable():
|
||||
out.emit(f"/* {uop.name} is not a viable micro-op for tier 2 */\n\n")
|
||||
continue
|
||||
out.emit(f"case {uop.name}: {{\n")
|
||||
if override:
|
||||
declare_variables(override, out, skip_inputs=False)
|
||||
else:
|
||||
declare_variables(uop, out, skip_inputs=True)
|
||||
stack = Stack()
|
||||
write_uop(override, uop, out, stack, debug, skip_inputs=(override is None))
|
||||
out.start_line()
|
||||
out.emit("break;\n")
|
||||
out.emit("}")
|
||||
out.emit("\n\n")
|
||||
|
||||
|
||||
def generate_tier2_abstract_from_files(
|
||||
filenames: list[str], outfilename: str, debug: bool=False
|
||||
) -> None:
|
||||
assert len(filenames) == 2, "Need a base file and an abstract cases file."
|
||||
base = analyze_files([filenames[0]])
|
||||
abstract = analyze_files([filenames[1]])
|
||||
with open(outfilename, "w") as outfile:
|
||||
generate_abstract_interpreter(filenames, abstract, base, outfile, debug)
|
||||
|
||||
|
||||
arg_parser = argparse.ArgumentParser(
|
||||
description="Generate the code for the tier 2 interpreter.",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
)
|
||||
|
||||
arg_parser.add_argument(
|
||||
"-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT
|
||||
)
|
||||
|
||||
|
||||
arg_parser.add_argument("input", nargs=1, help="Abstract interpreter definition file")
|
||||
|
||||
arg_parser.add_argument(
|
||||
"base", nargs=argparse.REMAINDER, help="The base instruction definition file(s)"
|
||||
)
|
||||
|
||||
arg_parser.add_argument("-d", "--debug", help="Insert debug calls", action="store_true")
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = arg_parser.parse_args()
|
||||
if len(args.base) == 0:
|
||||
args.input.append(DEFAULT_INPUT)
|
||||
args.input.append(DEFAULT_ABSTRACT_INPUT)
|
||||
abstract = analyze_files(args.input)
|
||||
base = analyze_files(args.base)
|
||||
with open(args.output, "w") as outfile:
|
||||
generate_abstract_interpreter(args.input, abstract, base, outfile, args.debug)
|
Loading…
Reference in New Issue