gh-114058: Foundations of the Tier2 redundancy eliminator (GH-115085)

---------

Co-authored-by: Mark Shannon <9448417+markshannon@users.noreply.github.com>
Co-authored-by: Jules <57632293+JuliaPoo@users.noreply.github.com>
Co-authored-by: Guido van Rossum <gvanrossum@users.noreply.github.com>
This commit is contained in:
Ken Jin 2024-02-13 21:24:48 +08:00 committed by GitHub
parent ccc76c3e88
commit 7cce857622
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
25 changed files with 3137 additions and 140 deletions

1
.gitattributes vendored
View File

@ -94,6 +94,7 @@ Programs/test_frozenmain.h generated
Python/Python-ast.c generated
Python/executor_cases.c.h generated
Python/generated_cases.c.h generated
Python/tier2_redundancy_eliminator_bytecodes.c.h generated
Python/opcode_targets.h generated
Python/stdlib_module_names.h generated
Tools/peg_generator/pegen/grammar_parser.py generated

View File

@ -120,6 +120,9 @@ typedef struct _optimization_stats {
uint64_t trace_length_hist[_Py_UOP_HIST_SIZE];
uint64_t trace_run_length_hist[_Py_UOP_HIST_SIZE];
uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE];
uint64_t optimizer_attempts;
uint64_t optimizer_successes;
uint64_t optimizer_failure_reason_no_memory;
} OptimizationStats;
typedef struct _rare_event_stats {

View File

@ -1094,7 +1094,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = {
[MATCH_KEYS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[MATCH_MAPPING] = { true, INSTR_FMT_IX, 0 },
[MATCH_SEQUENCE] = { true, INSTR_FMT_IX, 0 },
[NOP] = { true, INSTR_FMT_IX, 0 },
[NOP] = { true, INSTR_FMT_IX, HAS_PURE_FLAG },
[POP_EXCEPT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG },
[POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG },
[POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG },
@ -1156,10 +1156,10 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = {
[LOAD_SUPER_METHOD] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[LOAD_ZERO_SUPER_ATTR] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[LOAD_ZERO_SUPER_METHOD] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
[POP_BLOCK] = { true, -1, 0 },
[SETUP_CLEANUP] = { true, -1, HAS_ARG_FLAG },
[SETUP_FINALLY] = { true, -1, HAS_ARG_FLAG },
[SETUP_WITH] = { true, -1, HAS_ARG_FLAG },
[POP_BLOCK] = { true, -1, HAS_PURE_FLAG },
[SETUP_CLEANUP] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG },
[SETUP_FINALLY] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG },
[SETUP_WITH] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG },
[STORE_FAST_MAYBE_NULL] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG },
};
#endif

View File

@ -8,6 +8,13 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif
#include "pycore_uop_ids.h"
// This is the length of the trace we project initially.
#define UOP_MAX_TRACE_LENGTH 512
#define TRACE_STACK_SIZE 5
int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame,
_PyUOpInstruction *trace, int trace_len, int curr_stackentries,
_PyBloomFilter *dependencies);

View File

@ -16,7 +16,7 @@ extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];
#ifdef NEED_OPCODE_METADATA
const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_NOP] = 0,
[_NOP] = HAS_PURE_FLAG,
[_RESUME_CHECK] = HAS_DEOPT_FLAG,
[_LOAD_FAST_CHECK] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG,
[_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG,
@ -202,10 +202,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG,
[_EXIT_TRACE] = HAS_DEOPT_FLAG,
[_CHECK_VALIDITY] = HAS_DEOPT_FLAG,
[_LOAD_CONST_INLINE] = 0,
[_LOAD_CONST_INLINE_BORROW] = 0,
[_LOAD_CONST_INLINE_WITH_NULL] = 0,
[_LOAD_CONST_INLINE_BORROW_WITH_NULL] = 0,
[_LOAD_CONST_INLINE] = HAS_PURE_FLAG,
[_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG,
[_LOAD_CONST_INLINE_WITH_NULL] = HAS_PURE_FLAG,
[_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_PURE_FLAG,
[_CHECK_GLOBALS] = HAS_DEOPT_FLAG,
[_CHECK_BUILTINS] = HAS_DEOPT_FLAG,
[_INTERNAL_INCREMENT_OPT_COUNTER] = 0,

View File

@ -3,6 +3,7 @@ import opcode
import sys
import textwrap
import unittest
import gc
import _testinternalcapi
@ -556,6 +557,214 @@ class TestUops(unittest.TestCase):
# too much already.
self.assertEqual(count, 1)
class TestUopsOptimization(unittest.TestCase):
def test_int_type_propagation(self):
def testfunc(loops):
num = 0
while num < loops:
x = num + num
a = x + 1
num += 1
return a
opt = _testinternalcapi.get_uop_optimizer()
res = None
with temporary_optimizer(opt):
res = testfunc(32)
ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
self.assertEqual(res, 63)
binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"]
guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"]
self.assertGreaterEqual(len(binop_count), 3)
self.assertLessEqual(len(guard_both_int_count), 1)
def test_int_type_propagation_through_frame(self):
def double(x):
return x + x
def testfunc(loops):
num = 0
while num < loops:
x = num + num
a = double(x)
num += 1
return a
opt = _testinternalcapi.get_uop_optimizer()
res = None
with temporary_optimizer(opt):
res = testfunc(32)
ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
self.assertEqual(res, 124)
binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"]
guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"]
self.assertGreaterEqual(len(binop_count), 3)
self.assertLessEqual(len(guard_both_int_count), 1)
def test_int_type_propagation_from_frame(self):
def double(x):
return x + x
def testfunc(loops):
num = 0
while num < loops:
a = double(num)
x = a + a
num += 1
return x
opt = _testinternalcapi.get_uop_optimizer()
res = None
with temporary_optimizer(opt):
res = testfunc(32)
ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
self.assertEqual(res, 124)
binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"]
guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"]
self.assertGreaterEqual(len(binop_count), 3)
self.assertLessEqual(len(guard_both_int_count), 1)
def test_int_impure_region(self):
def testfunc(loops):
num = 0
while num < loops:
x = num + num
y = 1
x // 2
a = x + y
num += 1
return a
opt = _testinternalcapi.get_uop_optimizer()
res = None
with temporary_optimizer(opt):
res = testfunc(64)
ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"]
self.assertGreaterEqual(len(binop_count), 3)
def test_call_py_exact_args(self):
def testfunc(n):
def dummy(x):
return x+1
for i in range(n):
dummy(i)
opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
testfunc(20)
ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertIn("_PUSH_FRAME", uops)
self.assertIn("_BINARY_OP_ADD_INT", uops)
self.assertNotIn("_CHECK_PEP_523", uops)
def test_int_type_propagate_through_range(self):
def testfunc(n):
for i in range(n):
x = i + i
return x
opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
res = testfunc(20)
ex = get_first_executor(testfunc)
self.assertEqual(res, 19 * 2)
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertNotIn("_GUARD_BOTH_INT", uops)
def test_int_value_numbering(self):
def testfunc(n):
y = 1
for i in range(n):
x = y
z = x
a = z
b = a
res = x + z + a + b
return res
opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
res = testfunc(20)
ex = get_first_executor(testfunc)
self.assertEqual(res, 4)
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertIn("_GUARD_BOTH_INT", uops)
guard_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"]
self.assertEqual(len(guard_count), 1)
def test_comprehension(self):
def testfunc(n):
for _ in range(n):
return [i for i in range(n)]
opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
testfunc(20)
ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertNotIn("_BINARY_OP_ADD_INT", uops)
def test_call_py_exact_args_disappearing(self):
def dummy(x):
return x+1
def testfunc(n):
for i in range(n):
dummy(i)
opt = _testinternalcapi.get_uop_optimizer()
# Trigger specialization
testfunc(8)
with temporary_optimizer(opt):
del dummy
gc.collect()
def dummy(x):
return x + 2
testfunc(10)
ex = get_first_executor(testfunc)
# Honestly as long as it doesn't crash it's fine.
# Whether we get an executor or not is non-deterministic,
# because it's decided by when the function is freed.
# This test is a little implementation specific.
def test_promote_globals_to_constants(self):
def testfunc(n):
for i in range(n):
x = range(i)
return x
opt = _testinternalcapi.get_uop_optimizer()
with temporary_optimizer(opt):
testfunc(20)
ex = get_first_executor(testfunc)
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertNotIn("_LOAD_GLOBAL_BUILTIN", uops)
self.assertIn("_LOAD_CONST_INLINE_BORROW_WITH_NULL", uops)
if __name__ == "__main__":
unittest.main()

View File

@ -33,6 +33,7 @@ with test_tools.imports_under_tool("cases_generator"):
import parser
from stack import Stack
import tier1_generator
import tier2_abstract_generator
def handle_stderr():
@ -793,5 +794,157 @@ class TestGeneratedCases(unittest.TestCase):
self.run_cases_test(input, output)
class TestGeneratedAbstractCases(unittest.TestCase):
def setUp(self) -> None:
super().setUp()
self.maxDiff = None
self.temp_dir = tempfile.gettempdir()
self.temp_input_filename = os.path.join(self.temp_dir, "input.txt")
self.temp_input2_filename = os.path.join(self.temp_dir, "input2.txt")
self.temp_output_filename = os.path.join(self.temp_dir, "output.txt")
def tearDown(self) -> None:
for filename in [
self.temp_input_filename,
self.temp_input2_filename,
self.temp_output_filename,
]:
try:
os.remove(filename)
except:
pass
super().tearDown()
def run_cases_test(self, input: str, input2: str, expected: str):
with open(self.temp_input_filename, "w+") as temp_input:
temp_input.write(parser.BEGIN_MARKER)
temp_input.write(input)
temp_input.write(parser.END_MARKER)
temp_input.flush()
with open(self.temp_input2_filename, "w+") as temp_input:
temp_input.write(parser.BEGIN_MARKER)
temp_input.write(input2)
temp_input.write(parser.END_MARKER)
temp_input.flush()
with handle_stderr():
tier2_abstract_generator.generate_tier2_abstract_from_files(
[self.temp_input_filename, self.temp_input2_filename],
self.temp_output_filename
)
with open(self.temp_output_filename) as temp_output:
lines = temp_output.readlines()
while lines and lines[0].startswith(("// ", "#", " #", "\n")):
lines.pop(0)
while lines and lines[-1].startswith(("#", "\n")):
lines.pop(-1)
actual = "".join(lines)
self.assertEqual(actual.strip(), expected.strip())
def test_overridden_abstract(self):
input = """
pure op(OP, (--)) {
spam();
}
"""
input2 = """
pure op(OP, (--)) {
eggs();
}
"""
output = """
case OP: {
eggs();
break;
}
"""
self.run_cases_test(input, input2, output)
def test_overridden_abstract_args(self):
input = """
pure op(OP, (arg1 -- out)) {
spam();
}
op(OP2, (arg1 -- out)) {
eggs();
}
"""
input2 = """
op(OP, (arg1 -- out)) {
eggs();
}
"""
output = """
case OP: {
_Py_UOpsSymType *arg1;
_Py_UOpsSymType *out;
arg1 = stack_pointer[-1];
eggs();
stack_pointer[-1] = out;
break;
}
case OP2: {
_Py_UOpsSymType *out;
out = sym_new_unknown(ctx);
if (out == NULL) goto out_of_space;
stack_pointer[-1] = out;
break;
}
"""
self.run_cases_test(input, input2, output)
def test_no_overridden_case(self):
input = """
pure op(OP, (arg1 -- out)) {
spam();
}
pure op(OP2, (arg1 -- out)) {
}
"""
input2 = """
pure op(OP2, (arg1 -- out)) {
}
"""
output = """
case OP: {
_Py_UOpsSymType *out;
out = sym_new_unknown(ctx);
if (out == NULL) goto out_of_space;
stack_pointer[-1] = out;
break;
}
case OP2: {
_Py_UOpsSymType *arg1;
_Py_UOpsSymType *out;
arg1 = stack_pointer[-1];
stack_pointer[-1] = out;
break;
}
"""
self.run_cases_test(input, input2, output)
def test_missing_override_failure(self):
input = """
pure op(OP, (arg1 -- out)) {
spam();
}
"""
input2 = """
pure op(OTHER, (arg1 -- out)) {
}
"""
output = """
"""
with self.assertRaisesRegex(AssertionError, "All abstract uops"):
self.run_cases_test(input, input2, output)
if __name__ == "__main__":
unittest.main()

View File

@ -1863,6 +1863,10 @@ regen-cases:
-o $(srcdir)/Python/generated_cases.c.h.new $(srcdir)/Python/bytecodes.c
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/tier2_generator.py \
-o $(srcdir)/Python/executor_cases.c.h.new $(srcdir)/Python/bytecodes.c
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/tier2_abstract_generator.py \
-o $(srcdir)/Python/tier2_redundancy_eliminator_cases.c.h.new \
$(srcdir)/Python/tier2_redundancy_eliminator_bytecodes.c \
$(srcdir)/Python/bytecodes.c
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/opcode_metadata_generator.py \
-o $(srcdir)/Include/internal/pycore_opcode_metadata.h.new $(srcdir)/Python/bytecodes.c
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/uop_metadata_generator.py -o \
@ -1874,6 +1878,7 @@ regen-cases:
$(UPDATE_FILE) $(srcdir)/Include/internal/pycore_opcode_metadata.h $(srcdir)/Include/internal/pycore_opcode_metadata.h.new
$(UPDATE_FILE) $(srcdir)/Include/internal/pycore_uop_metadata.h $(srcdir)/Include/internal/pycore_uop_metadata.h.new
$(UPDATE_FILE) $(srcdir)/Python/executor_cases.c.h $(srcdir)/Python/executor_cases.c.h.new
$(UPDATE_FILE) $(srcdir)/Python/tier2_redundancy_eliminator_cases.c.h $(srcdir)/Python/tier2_redundancy_eliminator_cases.c.h.new
$(UPDATE_FILE) $(srcdir)/Lib/_opcode_metadata.py $(srcdir)/Lib/_opcode_metadata.py.new
Python/compile.o: $(srcdir)/Include/internal/pycore_opcode_metadata.h
@ -1895,7 +1900,8 @@ Python/optimizer.o: \
Python/optimizer_analysis.o: \
$(srcdir)/Include/internal/pycore_opcode_metadata.h \
$(srcdir)/Include/internal/pycore_optimizer.h
$(srcdir)/Include/internal/pycore_optimizer.h \
$(srcdir)/Python/tier2_redundancy_eliminator_cases.c.h
Python/frozen.o: $(FROZEN_FILES_OUT)

View File

@ -0,0 +1 @@
Implement the foundations of the Tier 2 redundancy eliminator.

View File

@ -133,7 +133,7 @@ dummy_func(
switch (opcode) {
// BEGIN BYTECODES //
inst(NOP, (--)) {
pure inst(NOP, (--)) {
}
family(RESUME, 0) = {
@ -411,12 +411,12 @@ dummy_func(
// BINARY_OP_INPLACE_ADD_UNICODE, // See comments at that opcode.
};
op(_GUARD_BOTH_INT, (left, right -- left: &PYLONG_TYPE, right: &PYLONG_TYPE)) {
op(_GUARD_BOTH_INT, (left, right -- left, right)) {
DEOPT_IF(!PyLong_CheckExact(left));
DEOPT_IF(!PyLong_CheckExact(right));
}
pure op(_BINARY_OP_MULTIPLY_INT, (left, right -- res: &PYLONG_TYPE)) {
pure op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) {
STAT_INC(BINARY_OP, hit);
res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right);
_Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free);
@ -424,7 +424,7 @@ dummy_func(
ERROR_IF(res == NULL, error);
}
pure op(_BINARY_OP_ADD_INT, (left, right -- res: &PYLONG_TYPE)) {
pure op(_BINARY_OP_ADD_INT, (left, right -- res)) {
STAT_INC(BINARY_OP, hit);
res = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right);
_Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free);
@ -432,7 +432,7 @@ dummy_func(
ERROR_IF(res == NULL, error);
}
pure op(_BINARY_OP_SUBTRACT_INT, (left, right -- res: &PYLONG_TYPE)) {
pure op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) {
STAT_INC(BINARY_OP, hit);
res = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right);
_Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free);
@ -447,12 +447,12 @@ dummy_func(
macro(BINARY_OP_SUBTRACT_INT) =
_GUARD_BOTH_INT + unused/1 + _BINARY_OP_SUBTRACT_INT;
op(_GUARD_BOTH_FLOAT, (left, right -- left: &PYFLOAT_TYPE, right: &PYFLOAT_TYPE)) {
op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) {
DEOPT_IF(!PyFloat_CheckExact(left));
DEOPT_IF(!PyFloat_CheckExact(right));
}
pure op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res: &PYFLOAT_TYPE)) {
pure op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) {
STAT_INC(BINARY_OP, hit);
double dres =
((PyFloatObject *)left)->ob_fval *
@ -460,7 +460,7 @@ dummy_func(
DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res);
}
pure op(_BINARY_OP_ADD_FLOAT, (left, right -- res: &PYFLOAT_TYPE)) {
pure op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) {
STAT_INC(BINARY_OP, hit);
double dres =
((PyFloatObject *)left)->ob_fval +
@ -468,7 +468,7 @@ dummy_func(
DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res);
}
pure op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res: &PYFLOAT_TYPE)) {
pure op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res)) {
STAT_INC(BINARY_OP, hit);
double dres =
((PyFloatObject *)left)->ob_fval -
@ -483,12 +483,12 @@ dummy_func(
macro(BINARY_OP_SUBTRACT_FLOAT) =
_GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_SUBTRACT_FLOAT;
op(_GUARD_BOTH_UNICODE, (left, right -- left: &PYUNICODE_TYPE, right: &PYUNICODE_TYPE)) {
op(_GUARD_BOTH_UNICODE, (left, right -- left, right)) {
DEOPT_IF(!PyUnicode_CheckExact(left));
DEOPT_IF(!PyUnicode_CheckExact(right));
}
pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res: &PYUNICODE_TYPE)) {
pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) {
STAT_INC(BINARY_OP, hit);
res = PyUnicode_Concat(left, right);
_Py_DECREF_SPECIALIZED(left, _PyUnicode_ExactDealloc);
@ -1877,7 +1877,7 @@ dummy_func(
something was returned by a descriptor protocol). Set
the second element of the stack to NULL, to signal
CALL that it's not a method call.
NULL | meth | arg1 | ... | argN
meth | NULL | arg1 | ... | argN
*/
DECREF_INPUTS();
ERROR_IF(attr == NULL, error);
@ -1901,7 +1901,7 @@ dummy_func(
LOAD_ATTR,
};
op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner: &(GUARD_TYPE_VERSION_TYPE + type_version))) {
op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) {
PyTypeObject *tp = Py_TYPE(owner);
assert(type_version != 0);
DEOPT_IF(tp->tp_version_tag != type_version);
@ -2082,7 +2082,7 @@ dummy_func(
DISPATCH_INLINED(new_frame);
}
op(_GUARD_DORV_VALUES, (owner -- owner: &GUARD_DORV_VALUES_TYPE)) {
op(_GUARD_DORV_VALUES, (owner -- owner)) {
assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner);
DEOPT_IF(!_PyDictOrValues_IsValues(dorv));
@ -2711,7 +2711,7 @@ dummy_func(
DEOPT_IF(r->len <= 0);
}
op(_ITER_NEXT_RANGE, (iter -- iter, next: &PYLONG_TYPE)) {
op(_ITER_NEXT_RANGE, (iter -- iter, next)) {
_PyRangeIterObject *r = (_PyRangeIterObject *)iter;
assert(Py_TYPE(r) == &PyRangeIter_Type);
assert(r->len > 0);
@ -2869,13 +2869,13 @@ dummy_func(
exc_info->exc_value = Py_NewRef(new_exc);
}
op(_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, (owner -- owner: &GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE)) {
op(_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, (owner -- owner)) {
assert(Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
PyDictOrValues *dorv = _PyObject_DictOrValuesPointer(owner);
DEOPT_IF(!_PyDictOrValues_IsValues(*dorv) && !_PyObject_MakeInstanceAttributesFromDict(owner, dorv));
}
op(_GUARD_KEYS_VERSION, (keys_version/2, owner -- owner: &(GUARD_KEYS_VERSION_TYPE + keys_version))) {
op(_GUARD_KEYS_VERSION, (keys_version/2, owner -- owner)) {
PyTypeObject *owner_cls = Py_TYPE(owner);
PyHeapTypeObject *owner_heap_type = (PyHeapTypeObject *)owner_cls;
DEOPT_IF(owner_heap_type->ht_cached_keys->dk_version != keys_version);
@ -3090,7 +3090,7 @@ dummy_func(
macro(CALL) = _SPECIALIZE_CALL + unused/2 + _CALL;
op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- callable: &PYMETHOD_TYPE, null: &NULL_TYPE, unused[oparg])) {
op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- callable, null, unused[oparg])) {
DEOPT_IF(null != NULL);
DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type);
}
@ -3108,7 +3108,7 @@ dummy_func(
DEOPT_IF(tstate->interp->eval_frame);
}
op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable: &(PYFUNCTION_TYPE_VERSION_TYPE + func_version), self_or_null, unused[oparg])) {
op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) {
DEOPT_IF(!PyFunction_Check(callable));
PyFunctionObject *func = (PyFunctionObject *)callable;
DEOPT_IF(func->func_version != func_version);
@ -4059,23 +4059,23 @@ dummy_func(
DEOPT_IF(!current_executor->vm_data.valid);
}
op(_LOAD_CONST_INLINE, (ptr/4 -- value)) {
pure op(_LOAD_CONST_INLINE, (ptr/4 -- value)) {
TIER_TWO_ONLY
value = Py_NewRef(ptr);
}
op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) {
pure op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) {
TIER_TWO_ONLY
value = ptr;
}
op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) {
pure op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) {
TIER_TWO_ONLY
value = Py_NewRef(ptr);
null = NULL;
}
op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) {
pure op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) {
TIER_TWO_ONLY
value = ptr;
null = NULL;

View File

@ -1598,7 +1598,7 @@
something was returned by a descriptor protocol). Set
the second element of the stack to NULL, to signal
CALL that it's not a method call.
NULL | meth | arg1 | ... | argN
meth | NULL | arg1 | ... | argN
*/
Py_DECREF(owner);
if (attr == NULL) goto pop_1_error_tier_two;

View File

@ -3420,7 +3420,7 @@
something was returned by a descriptor protocol). Set
the second element of the stack to NULL, to signal
CALL that it's not a method call.
NULL | meth | arg1 | ... | argN
meth | NULL | arg1 | ... | argN
*/
Py_DECREF(owner);
if (attr == NULL) goto pop_1_error;

View File

@ -17,8 +17,6 @@
#include "pycore_uop_metadata.h" // Uop tables
#undef NEED_OPCODE_METADATA
#define UOP_MAX_TRACE_LENGTH 512
#define MAX_EXECUTORS_SIZE 256
@ -308,8 +306,6 @@ BRANCH_TO_GUARD[4][2] = {
[POP_JUMP_IF_NOT_NONE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_NOT_NONE_POP,
};
#define TRACE_STACK_SIZE 5
#define CONFIDENCE_RANGE 1000
#define CONFIDENCE_CUTOFF 333
@ -323,10 +319,11 @@ BRANCH_TO_GUARD[4][2] = {
#define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \
DPRINTF(2, \
" ADD_TO_TRACE(%s, %d, %" PRIu64 ")\n", \
" ADD_TO_TRACE(%s, %d, %" PRIu64 ", %d)\n", \
_PyUOpName(OPCODE), \
(OPARG), \
(uint64_t)(OPERAND)); \
(uint64_t)(OPERAND), \
TARGET); \
assert(trace_length < max_length); \
trace[trace_length].opcode = (OPCODE); \
trace[trace_length].oparg = (OPARG); \
@ -825,11 +822,13 @@ uop_optimize(
char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE");
if (uop_optimize == NULL || *uop_optimize > '0') {
err = _Py_uop_analyze_and_optimize(frame, buffer,
UOP_MAX_TRACE_LENGTH, curr_stackentries, &dependencies);
UOP_MAX_TRACE_LENGTH,
curr_stackentries, &dependencies);
if (err <= 0) {
return err;
}
}
assert(err == 1);
_PyExecutorObject *executor = make_executor_from_uops(buffer, &dependencies);
if (executor == NULL) {
return -1;

View File

@ -1,3 +1,14 @@
/*
* This file contains the support code for CPython's uops redundancy eliminator.
* It also performs some simple optimizations.
* It performs a traditional data-flow analysis[1] over the trace of uops.
* Using the information gained, it chooses to emit, or skip certain instructions
* if possible.
*
* [1] For information on data-flow analysis, please see
* https://clang.llvm.org/docs/DataFlowAnalysisIntro.html
*
* */
#include "Python.h"
#include "opcode.h"
#include "pycore_dict.h"
@ -9,10 +20,355 @@
#include "pycore_dict.h"
#include "pycore_long.h"
#include "cpython/optimizer.h"
#include "pycore_optimizer.h"
#include "pycore_object.h"
#include "pycore_dict.h"
#include "pycore_function.h"
#include "pycore_uop_metadata.h"
#include "pycore_uop_ids.h"
#include "pycore_range.h"
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stddef.h>
#include "pycore_optimizer.h"
// Holds locals, stack, locals, stack ... co_consts (in that order)
#define MAX_ABSTRACT_INTERP_SIZE 4096
#define OVERALLOCATE_FACTOR 5
#define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * OVERALLOCATE_FACTOR)
// Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH())
#define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2)
#ifdef Py_DEBUG
static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG";
static inline int get_lltrace(void) {
char *uop_debug = Py_GETENV(DEBUG_ENV);
int lltrace = 0;
if (uop_debug != NULL && *uop_debug >= '0') {
lltrace = *uop_debug - '0'; // TODO: Parse an int and all that
}
return lltrace;
}
#define DPRINTF(level, ...) \
if (get_lltrace() >= (level)) { printf(__VA_ARGS__); }
#else
#define DPRINTF(level, ...)
#endif
// Flags for below.
#define KNOWN 1 << 0
#define TRUE_CONST 1 << 1
#define IS_NULL 1 << 2
#define NOT_NULL 1 << 3
typedef struct {
int flags;
PyTypeObject *typ;
// constant propagated value (might be NULL)
PyObject *const_val;
} _Py_UOpsSymType;
typedef struct _Py_UOpsAbstractFrame {
// Max stacklen
int stack_len;
int locals_len;
_Py_UOpsSymType **stack_pointer;
_Py_UOpsSymType **stack;
_Py_UOpsSymType **locals;
} _Py_UOpsAbstractFrame;
typedef struct ty_arena {
int ty_curr_number;
int ty_max_number;
_Py_UOpsSymType arena[TY_ARENA_SIZE];
} ty_arena;
// Tier 2 types meta interpreter
typedef struct _Py_UOpsAbstractInterpContext {
PyObject_HEAD
// The current "executing" frame.
_Py_UOpsAbstractFrame *frame;
_Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH];
int curr_frame_depth;
// Arena for the symbolic types.
ty_arena t_arena;
_Py_UOpsSymType **n_consumed;
_Py_UOpsSymType **limit;
_Py_UOpsSymType *locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
} _Py_UOpsAbstractInterpContext;
static inline _Py_UOpsSymType* sym_new_unknown(_Py_UOpsAbstractInterpContext *ctx);
// 0 on success, -1 on error.
static _Py_UOpsAbstractFrame *
ctx_frame_new(
_Py_UOpsAbstractInterpContext *ctx,
PyCodeObject *co,
_Py_UOpsSymType **localsplus_start,
int n_locals_already_filled,
int curr_stackentries
)
{
assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH);
_Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth];
frame->stack_len = co->co_stacksize;
frame->locals_len = co->co_nlocalsplus;
frame->locals = localsplus_start;
frame->stack = frame->locals + co->co_nlocalsplus;
frame->stack_pointer = frame->stack + curr_stackentries;
ctx->n_consumed = localsplus_start + (co->co_nlocalsplus + co->co_stacksize);
if (ctx->n_consumed >= ctx->limit) {
return NULL;
}
// Initialize with the initial state of all local variables
for (int i = n_locals_already_filled; i < co->co_nlocalsplus; i++) {
_Py_UOpsSymType *local = sym_new_unknown(ctx);
if (local == NULL) {
return NULL;
}
frame->locals[i] = local;
}
// Initialize the stack as well
for (int i = 0; i < curr_stackentries; i++) {
_Py_UOpsSymType *stackvar = sym_new_unknown(ctx);
if (stackvar == NULL) {
return NULL;
}
frame->stack[i] = stackvar;
}
return frame;
}
static void
abstractcontext_fini(_Py_UOpsAbstractInterpContext *ctx)
{
if (ctx == NULL) {
return;
}
ctx->curr_frame_depth = 0;
int tys = ctx->t_arena.ty_curr_number;
for (int i = 0; i < tys; i++) {
Py_CLEAR(ctx->t_arena.arena[i].const_val);
}
}
static int
abstractcontext_init(
_Py_UOpsAbstractInterpContext *ctx,
PyCodeObject *co,
int curr_stacklen,
int ir_entries
)
{
ctx->limit = ctx->locals_and_stack + MAX_ABSTRACT_INTERP_SIZE;
ctx->n_consumed = ctx->locals_and_stack;
#ifdef Py_DEBUG // Aids debugging a little. There should never be NULL in the abstract interpreter.
for (int i = 0 ; i < MAX_ABSTRACT_INTERP_SIZE; i++) {
ctx->locals_and_stack[i] = NULL;
}
#endif
// Setup the arena for sym expressions.
ctx->t_arena.ty_curr_number = 0;
ctx->t_arena.ty_max_number = TY_ARENA_SIZE;
// Frame setup
ctx->curr_frame_depth = 0;
_Py_UOpsAbstractFrame *frame = ctx_frame_new(ctx, co, ctx->n_consumed, 0, curr_stacklen);
if (frame == NULL) {
return -1;
}
ctx->curr_frame_depth++;
ctx->frame = frame;
return 0;
}
static int
ctx_frame_pop(
_Py_UOpsAbstractInterpContext *ctx
)
{
_Py_UOpsAbstractFrame *frame = ctx->frame;
ctx->n_consumed = frame->locals;
ctx->curr_frame_depth--;
assert(ctx->curr_frame_depth >= 1);
ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1];
return 0;
}
// Takes a borrowed reference to const_val, turns that into a strong reference.
static _Py_UOpsSymType*
sym_new(_Py_UOpsAbstractInterpContext *ctx,
PyObject *const_val)
{
_Py_UOpsSymType *self = &ctx->t_arena.arena[ctx->t_arena.ty_curr_number];
if (ctx->t_arena.ty_curr_number >= ctx->t_arena.ty_max_number) {
OPT_STAT_INC(optimizer_failure_reason_no_memory);
DPRINTF(1, "out of space for symbolic expression type\n");
return NULL;
}
ctx->t_arena.ty_curr_number++;
self->const_val = NULL;
self->typ = NULL;
self->flags = 0;
if (const_val != NULL) {
self->const_val = Py_NewRef(const_val);
}
return self;
}
static inline void
sym_set_flag(_Py_UOpsSymType *sym, int flag)
{
sym->flags |= flag;
}
static inline void
sym_clear_flag(_Py_UOpsSymType *sym, int flag)
{
sym->flags &= (~flag);
}
static inline bool
sym_has_flag(_Py_UOpsSymType *sym, int flag)
{
return (sym->flags & flag) != 0;
}
static inline bool
sym_is_known(_Py_UOpsSymType *sym)
{
return sym_has_flag(sym, KNOWN);
}
static inline bool
sym_is_not_null(_Py_UOpsSymType *sym)
{
return (sym->flags & (IS_NULL | NOT_NULL)) == NOT_NULL;
}
static inline bool
sym_is_null(_Py_UOpsSymType *sym)
{
return (sym->flags & (IS_NULL | NOT_NULL)) == IS_NULL;
}
static inline void
sym_set_type(_Py_UOpsSymType *sym, PyTypeObject *tp)
{
assert(PyType_Check(tp));
sym->typ = tp;
sym_set_flag(sym, KNOWN);
sym_set_flag(sym, NOT_NULL);
}
static inline void
sym_set_null(_Py_UOpsSymType *sym)
{
sym_set_flag(sym, IS_NULL);
sym_set_flag(sym, KNOWN);
}
static inline _Py_UOpsSymType*
sym_new_unknown(_Py_UOpsAbstractInterpContext *ctx)
{
return sym_new(ctx,NULL);
}
static inline _Py_UOpsSymType*
sym_new_known_notnull(_Py_UOpsAbstractInterpContext *ctx)
{
_Py_UOpsSymType *res = sym_new_unknown(ctx);
if (res == NULL) {
return NULL;
}
sym_set_flag(res, NOT_NULL);
return res;
}
static inline _Py_UOpsSymType*
sym_new_known_type(_Py_UOpsAbstractInterpContext *ctx,
PyTypeObject *typ)
{
_Py_UOpsSymType *res = sym_new(ctx,NULL);
if (res == NULL) {
return NULL;
}
sym_set_type(res, typ);
return res;
}
// Takes a borrowed reference to const_val.
static inline _Py_UOpsSymType*
sym_new_const(_Py_UOpsAbstractInterpContext *ctx, PyObject *const_val)
{
assert(const_val != NULL);
_Py_UOpsSymType *temp = sym_new(
ctx,
const_val
);
if (temp == NULL) {
return NULL;
}
sym_set_type(temp, Py_TYPE(const_val));
sym_set_flag(temp, TRUE_CONST);
sym_set_flag(temp, KNOWN);
sym_set_flag(temp, NOT_NULL);
return temp;
}
static _Py_UOpsSymType*
sym_new_null(_Py_UOpsAbstractInterpContext *ctx)
{
_Py_UOpsSymType *null_sym = sym_new_unknown(ctx);
if (null_sym == NULL) {
return NULL;
}
sym_set_null(null_sym);
return null_sym;
}
static inline bool
sym_matches_type(_Py_UOpsSymType *sym, PyTypeObject *typ)
{
assert(typ == NULL || PyType_Check(typ));
if (!sym_has_flag(sym, KNOWN)) {
return false;
}
return sym->typ == typ;
}
static inline bool
op_is_end(uint32_t opcode)
{
return opcode == _EXIT_TRACE || opcode == _JUMP_TO_TOP;
}
static int
get_mutations(PyObject* dict) {
@ -199,14 +555,138 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
builtins = func->func_builtins;
break;
}
case _JUMP_TO_TOP:
case _EXIT_TRACE:
return 1;
default:
if (op_is_end(opcode)) {
return 1;
}
break;
}
}
return 0;
}
#define STACK_LEVEL() ((int)(stack_pointer - ctx->frame->stack))
#define GETLOCAL(idx) ((ctx->frame->locals[idx]))
#define REPLACE_OP(INST, OP, ARG, OPERAND) \
INST->opcode = OP; \
INST->oparg = ARG; \
INST->operand = OPERAND;
#define _LOAD_ATTR_NOT_NULL \
do { \
attr = sym_new_known_notnull(ctx); \
if (attr == NULL) { \
goto error; \
} \
null = sym_new_null(ctx); \
if (null == NULL) { \
goto error; \
} \
} while (0);
/* 1 for success, 0 for not ready, cannot error at the moment. */
static int
uop_redundancy_eliminator(
PyCodeObject *co,
_PyUOpInstruction *trace,
int trace_len,
int curr_stacklen
)
{
_Py_UOpsAbstractInterpContext context;
_Py_UOpsAbstractInterpContext *ctx = &context;
if (abstractcontext_init(
ctx,
co, curr_stacklen,
trace_len) < 0) {
goto out_of_space;
}
for (_PyUOpInstruction *this_instr = trace;
this_instr < trace + trace_len && !op_is_end(this_instr->opcode);
this_instr++) {
int oparg = this_instr->oparg;
uint32_t opcode = this_instr->opcode;
_Py_UOpsSymType **stack_pointer = ctx->frame->stack_pointer;
DPRINTF(3, "Abstract interpreting %s:%d ",
_PyOpcode_uop_name[opcode],
oparg);
switch (opcode) {
#include "tier2_redundancy_eliminator_cases.c.h"
default:
DPRINTF(1, "Unknown opcode in abstract interpreter\n");
Py_UNREACHABLE();
}
assert(ctx->frame != NULL);
DPRINTF(3, " stack_level %d\n", STACK_LEVEL());
ctx->frame->stack_pointer = stack_pointer;
assert(STACK_LEVEL() >= 0);
}
abstractcontext_fini(ctx);
return 1;
out_of_space:
DPRINTF(1, "Out of space in abstract interpreter\n");
abstractcontext_fini(ctx);
return 0;
error:
DPRINTF(1, "Encountered error in abstract interpreter\n");
abstractcontext_fini(ctx);
return 0;
}
static void
remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
{
int last_set_ip = -1;
bool maybe_invalid = false;
for (int pc = 0; pc < buffer_size; pc++) {
int opcode = buffer[pc].opcode;
if (opcode == _SET_IP) {
buffer[pc].opcode = NOP;
last_set_ip = pc;
}
else if (opcode == _CHECK_VALIDITY) {
if (maybe_invalid) {
maybe_invalid = false;
}
else {
buffer[pc].opcode = NOP;
}
}
else if (op_is_end(opcode)) {
break;
}
else {
if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) {
maybe_invalid = true;
if (last_set_ip >= 0) {
buffer[last_set_ip].opcode = _SET_IP;
}
}
if ((_PyUop_Flags[opcode] & HAS_ERROR_FLAG) || opcode == _PUSH_FRAME) {
if (last_set_ip >= 0) {
buffer[last_set_ip].opcode = _SET_IP;
}
}
}
}
}
static void
peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size)
{
@ -250,44 +730,9 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s
}
}
static void
remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
{
int last_set_ip = -1;
bool maybe_invalid = false;
for (int pc = 0; pc < buffer_size; pc++) {
int opcode = buffer[pc].opcode;
if (opcode == _SET_IP) {
buffer[pc].opcode = NOP;
last_set_ip = pc;
}
else if (opcode == _CHECK_VALIDITY) {
if (maybe_invalid) {
maybe_invalid = false;
}
else {
buffer[pc].opcode = NOP;
}
}
else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) {
break;
}
else {
if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) {
maybe_invalid = true;
if (last_set_ip >= 0) {
buffer[last_set_ip].opcode = _SET_IP;
}
}
if ((_PyUop_Flags[opcode] & HAS_ERROR_FLAG) || opcode == _PUSH_FRAME) {
if (last_set_ip >= 0) {
buffer[last_set_ip].opcode = _SET_IP;
}
}
}
}
}
// 0 - failure, no error raised, just fall back to Tier 1
// -1 - failure, and raise error
// 1 - optimizer success
int
_Py_uop_analyze_and_optimize(
_PyInterpreterFrame *frame,
@ -297,11 +742,33 @@ _Py_uop_analyze_and_optimize(
_PyBloomFilter *dependencies
)
{
OPT_STAT_INC(optimizer_attempts);
int err = remove_globals(frame, buffer, buffer_size, dependencies);
if (err <= 0) {
return err;
if (err == 0) {
goto not_ready;
}
if (err < 0) {
goto error;
}
peephole_opt(frame, buffer, buffer_size);
err = uop_redundancy_eliminator(
(PyCodeObject *)frame->f_executable, buffer,
buffer_size, curr_stacklen);
if (err == 0) {
goto not_ready;
}
assert(err == 1);
remove_unneeded_uops(buffer, buffer_size);
OPT_STAT_INC(optimizer_successes);
return 1;
not_ready:
return 0;
error:
return -1;
}

View File

@ -240,6 +240,11 @@ print_optimization_stats(FILE *out, OptimizationStats *stats)
print_histogram(out, "Trace run length", stats->trace_run_length_hist);
print_histogram(out, "Optimized trace length", stats->optimized_trace_length_hist);
fprintf(out, "Optimization optimizer attempts: %" PRIu64 "\n", stats->optimizer_attempts);
fprintf(out, "Optimization optimizer successes: %" PRIu64 "\n", stats->optimizer_successes);
fprintf(out, "Optimization optimizer failure no memory: %" PRIu64 "\n",
stats->optimizer_failure_reason_no_memory);
const char* const* names;
for (int i = 0; i < 512; i++) {
if (i < 256) {

View File

@ -0,0 +1,272 @@
#include "Python.h"
#include "pycore_uops.h"
#include "pycore_uop_ids.h"
#define op(name, ...) /* NAME is ignored */
typedef struct _Py_UOpsSymType _Py_UOpsSymType;
typedef struct _Py_UOpsAbstractInterpContext _Py_UOpsAbstractInterpContext;
typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;
static int
dummy_func(void) {
PyCodeObject *code;
int oparg;
_Py_UOpsSymType *flag;
_Py_UOpsSymType *left;
_Py_UOpsSymType *right;
_Py_UOpsSymType *value;
_Py_UOpsSymType *res;
_Py_UOpsSymType *iter;
_Py_UOpsSymType *top;
_Py_UOpsSymType *bottom;
_Py_UOpsAbstractFrame *frame;
_Py_UOpsAbstractInterpContext *ctx;
_PyUOpInstruction *this_instr;
_PyBloomFilter *dependencies;
int modified;
// BEGIN BYTECODES //
op(_LOAD_FAST_CHECK, (-- value)) {
value = GETLOCAL(oparg);
// We guarantee this will error - just bail and don't optimize it.
if (sym_is_null(value)) {
goto out_of_space;
}
}
op(_LOAD_FAST, (-- value)) {
value = GETLOCAL(oparg);
}
op(_LOAD_FAST_AND_CLEAR, (-- value)) {
value = GETLOCAL(oparg);
_Py_UOpsSymType *temp = sym_new_null(ctx);
if (temp == NULL) {
goto out_of_space;
}
GETLOCAL(oparg) = temp;
}
op(_STORE_FAST, (value --)) {
GETLOCAL(oparg) = value;
}
op(_PUSH_NULL, (-- res)) {
res = sym_new_null(ctx);
if (res == NULL) {
goto out_of_space;
};
}
op(_GUARD_BOTH_INT, (left, right -- left, right)) {
if (sym_matches_type(left, &PyLong_Type) &&
sym_matches_type(right, &PyLong_Type)) {
REPLACE_OP(this_instr, _NOP, 0, 0);
}
sym_set_type(left, &PyLong_Type);
sym_set_type(right, &PyLong_Type);
}
op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) {
if (sym_matches_type(left, &PyFloat_Type) &&
sym_matches_type(right, &PyFloat_Type)) {
REPLACE_OP(this_instr, _NOP, 0 ,0);
}
sym_set_type(left, &PyFloat_Type);
sym_set_type(right, &PyFloat_Type);
}
op(_BINARY_OP_ADD_INT, (left, right -- res)) {
// TODO constant propagation
(void)left;
(void)right;
res = sym_new_known_type(ctx, &PyLong_Type);
if (res == NULL) {
goto out_of_space;
}
}
op(_LOAD_CONST, (-- value)) {
// There should be no LOAD_CONST. It should be all
// replaced by peephole_opt.
Py_UNREACHABLE();
}
op(_LOAD_CONST_INLINE, (ptr/4 -- value)) {
value = sym_new_const(ctx, ptr);
if (value == NULL) {
goto out_of_space;
}
}
op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) {
value = sym_new_const(ctx, ptr);
if (value == NULL) {
goto out_of_space;
}
}
op(_LOAD_CONST_INLINE_WITH_NULL, (ptr/4 -- value, null)) {
value = sym_new_const(ctx, ptr);
if (value == NULL) {
goto out_of_space;
}
null = sym_new_null(ctx);
if (null == NULL) {
goto out_of_space;
}
}
op(_LOAD_CONST_INLINE_BORROW_WITH_NULL, (ptr/4 -- value, null)) {
value = sym_new_const(ctx, ptr);
if (value == NULL) {
goto out_of_space;
}
null = sym_new_null(ctx);
if (null == NULL) {
goto out_of_space;
}
}
op(_COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) {
assert(oparg > 0);
top = bottom;
}
op(_SWAP, (bottom, unused[oparg-2], top --
top, unused[oparg-2], bottom)) {
}
op(_LOAD_ATTR_INSTANCE_VALUE, (index/1, owner -- attr, null if (oparg & 1))) {
_LOAD_ATTR_NOT_NULL
(void)index;
(void)owner;
}
op(_LOAD_ATTR_MODULE, (index/1, owner -- attr, null if (oparg & 1))) {
_LOAD_ATTR_NOT_NULL
(void)index;
(void)owner;
}
op(_LOAD_ATTR_WITH_HINT, (hint/1, owner -- attr, null if (oparg & 1))) {
_LOAD_ATTR_NOT_NULL
(void)hint;
(void)owner;
}
op(_LOAD_ATTR_SLOT, (index/1, owner -- attr, null if (oparg & 1))) {
_LOAD_ATTR_NOT_NULL
(void)index;
(void)owner;
}
op(_LOAD_ATTR_CLASS, (descr/4, owner -- attr, null if (oparg & 1))) {
_LOAD_ATTR_NOT_NULL
(void)descr;
(void)owner;
}
op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) {
sym_set_type(callable, &PyFunction_Type);
(void)self_or_null;
(void)func_version;
}
op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- callable, null, unused[oparg])) {
sym_set_null(null);
sym_set_type(callable, &PyMethod_Type);
}
op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) {
int argcount = oparg;
(void)callable;
PyFunctionObject *func = (PyFunctionObject *)(this_instr + 2)->operand;
if (func == NULL) {
goto error;
}
PyCodeObject *co = (PyCodeObject *)func->func_code;
assert(self_or_null != NULL);
assert(args != NULL);
if (sym_is_not_null(self_or_null)) {
// Bound method fiddling, same as _INIT_CALL_PY_EXACT_ARGS in VM
args--;
argcount++;
}
_Py_UOpsSymType **localsplus_start = ctx->n_consumed;
int n_locals_already_filled = 0;
// Can determine statically, so we interleave the new locals
// and make the current stack the new locals.
// This also sets up for true call inlining.
if (sym_is_known(self_or_null)) {
localsplus_start = args;
n_locals_already_filled = argcount;
}
new_frame = ctx_frame_new(ctx, co, localsplus_start, n_locals_already_filled, 0);
if (new_frame == NULL){
goto out_of_space;
}
}
op(_POP_FRAME, (retval -- res)) {
SYNC_SP();
ctx->frame->stack_pointer = stack_pointer;
ctx_frame_pop(ctx);
stack_pointer = ctx->frame->stack_pointer;
res = retval;
}
op(_PUSH_FRAME, (new_frame: _Py_UOpsAbstractFrame * -- unused if (0))) {
SYNC_SP();
ctx->frame->stack_pointer = stack_pointer;
ctx->frame = new_frame;
ctx->curr_frame_depth++;
stack_pointer = new_frame->stack_pointer;
}
op(_UNPACK_SEQUENCE, (seq -- values[oparg])) {
/* This has to be done manually */
(void)seq;
for (int i = 0; i < oparg; i++) {
values[i] = sym_new_unknown(ctx);
if (values[i] == NULL) {
goto out_of_space;
}
}
}
op(_UNPACK_EX, (seq -- values[oparg & 0xFF], unused, unused[oparg >> 8])) {
/* This has to be done manually */
(void)seq;
int totalargs = (oparg & 0xFF) + (oparg >> 8) + 1;
for (int i = 0; i < totalargs; i++) {
values[i] = sym_new_unknown(ctx);
if (values[i] == NULL) {
goto out_of_space;
}
}
}
op(_ITER_NEXT_RANGE, (iter -- iter, next)) {
next = sym_new_known_type(ctx, &PyLong_Type);
if (next == NULL) {
goto out_of_space;
}
(void)iter;
}
// END BYTECODES //
}

File diff suppressed because it is too large Load Diff

View File

@ -83,9 +83,11 @@ Python/deepfreeze/*.c
Python/frozen_modules/*.h
Python/generated_cases.c.h
Python/executor_cases.c.h
Python/tier2_redundancy_eliminator_cases.c.h
# not actually source
Python/bytecodes.c
Python/tier2_redundancy_eliminator_bytecodes.c
# mimalloc
Objects/mimalloc/*.c

View File

@ -734,6 +734,6 @@ Modules/expat/xmlrole.c - error -
## other
Modules/_io/_iomodule.c - _PyIO_Module -
Modules/_sqlite/module.c - _sqlite3module -
Python/optimizer_analysis.c - _Py_PartitionRootNode_Type -
Python/optimizer_analysis.c - _Py_UOpsAbstractFrame_Type -
Python/optimizer_analysis.c - _Py_UOpsAbstractInterpContext_Type -
Modules/clinic/md5module.c.h _md5_md5 _keywords -

Can't render this file because it has a wrong number of fields in line 4.

View File

@ -13,6 +13,9 @@ What's currently here:
- `parser.py` helper for interactions with `parsing.py`
- `tierN_generator.py`: a couple of driver scripts to read `Python/bytecodes.c` and
write `Python/generated_cases.c.h` (and several other files)
- `tier2_abstract_generator.py`: reads `Python/bytecodes.c` and
`Python/tier2_redundancy_eliminator_bytecodes.c` and writes
`Python/tier2_redundancy_eliminator_cases.c.h`
- `stack.py`: code to handle generalized stack effects
- `cwriter.py`: code which understands tokens and how to format C code;
main class: `CWriter`

View File

@ -24,7 +24,6 @@ class Properties:
pure: bool
passthrough: bool
guard: bool
def dump(self, indent: str) -> None:
print(indent, end="")
@ -51,7 +50,6 @@ class Properties:
has_free=any(p.has_free for p in properties),
pure=all(p.pure for p in properties),
passthrough=all(p.passthrough for p in properties),
guard=all(p.guard for p in properties),
)
@ -73,7 +71,6 @@ SKIP_PROPERTIES = Properties(
has_free=False,
pure=False,
passthrough=False,
guard=False,
)
@ -273,7 +270,7 @@ def override_error(
def convert_stack_item(item: parser.StackEffect) -> StackItem:
return StackItem(
item.name, item.type, item.cond, (item.size or "1"), type_prop=item.type_prop
item.name, item.type, item.cond, (item.size or "1")
)
@ -473,7 +470,6 @@ def compute_properties(op: parser.InstDef) -> Properties:
has_free=has_free,
pure="pure" in op.annotations,
passthrough=passthrough,
guard=passthrough and deopts,
)

View File

@ -109,10 +109,7 @@ and a piece of C code describing its semantics::
NAME [":" type] [ "if" "(" C-expression ")" ]
type:
NAME ["*"] | type_prop
type_prop:
"&" "(" NAME ["+" NAME] ")"
NAME ["*"]
stream:
NAME "/" size
@ -142,26 +139,7 @@ The following definitions may occur:
The optional `type` in an `object` is the C type. It defaults to `PyObject *`.
The objects before the "--" are the objects on top of the stack at the start of
the instruction. Those after the "--" are the objects on top of the stack at the
end of the instruction. When prefixed by a `&`, the `type` production rule follows the
`type_prop` production rule. This indicates the type of the value is of that specific type
after the operation. In this case, the type may also contain 64-bit refinement information
that is fetched from a previously defined operand in the instruction header, such as
a type version tag. This follows the format `type + refinement`. The list of possible types
and their refinements are below. They obey the following predicates:
* `PYLONG_TYPE`: `Py_TYPE(val) == &PyLong_Type`
* `PYFLOAT_TYPE`: `Py_TYPE(val) == &PyFloat_Type`
* `PYUNICODE_TYPE`: `Py_TYPE(val) == &PYUNICODE_TYPE`
* `NULL_TYPE`: `val == NULL`
* `GUARD_TYPE_VERSION_TYPE`: `type->tp_version_tag == auxillary`
* `GUARD_DORV_VALUES_TYPE`: `_PyDictOrValues_IsValues(obj)`
* `GUARD_DORV_VALUES_INST_ATTR_FROM_DICT_TYPE`:
`_PyDictOrValues_IsValues(obj) || _PyObject_MakeInstanceAttributesFromDict(obj, dorv)`
* `GUARD_KEYS_VERSION_TYPE`: `owner_heap_type->ht_cached_keys->dk_version == auxillary`
* `PYMETHOD_TYPE`: `Py_TYPE(val) == &PyMethod_Type`
* `PYFUNCTION_TYPE_VERSION_TYPE`:
`PyFunction_Check(callable) && func->func_version == auxillary && code->co_argcount == oparg + (self_or_null != NULL)`
end of the instruction.
An `inst` without `stack_effect` is a transitional form to allow the original C code

View File

@ -75,11 +75,6 @@ class StackEffect(Node):
size: str = "" # Optional `[size]`
# Note: size cannot be combined with type or cond
# Optional `(type, refinement)`
type_prop: None | tuple[str, None | str] = field(
default_factory=lambda: None, init=True, compare=False, hash=False
)
def __repr__(self) -> str:
items = [self.name, self.type, self.cond, self.size]
while items and items[-1] == "":
@ -260,25 +255,14 @@ class Parser(PLexer):
@contextual
def stack_effect(self) -> StackEffect | None:
# IDENTIFIER [':' [IDENTIFIER [TIMES]] ['&' '(' IDENTIFIER ['+' IDENTIFIER] ')']] ['if' '(' expression ')']
# IDENTIFIER [':' IDENTIFIER [TIMES]] ['if' '(' expression ')']
# | IDENTIFIER '[' expression ']'
if tkn := self.expect(lx.IDENTIFIER):
type_text = ""
type_prop = None
if self.expect(lx.COLON):
if i := self.expect(lx.IDENTIFIER):
type_text = i.text.strip()
if self.expect(lx.TIMES):
type_text += " *"
if self.expect(lx.AND):
consumed_bracket = self.expect(lx.LPAREN) is not None
type_prop_text = self.require(lx.IDENTIFIER).text.strip()
refinement = None
if self.expect(lx.PLUS):
refinement = self.require(lx.IDENTIFIER).text.strip()
type_prop = (type_prop_text, refinement)
if consumed_bracket:
self.require(lx.RPAREN)
type_text = self.require(lx.IDENTIFIER).text.strip()
if self.expect(lx.TIMES):
type_text += " *"
cond_text = ""
if self.expect(lx.IF):
self.require(lx.LPAREN)
@ -295,7 +279,7 @@ class Parser(PLexer):
self.require(lx.RBRACKET)
type_text = "PyObject **"
size_text = size.text.strip()
return StackEffect(tkn.text, type_text, cond_text, size_text, type_prop)
return StackEffect(tkn.text, type_text, cond_text, size_text)
return None
@contextual

View File

@ -168,11 +168,11 @@ class Stack:
self.top_offset.push(var)
return ""
def flush(self, out: CWriter) -> None:
def flush(self, out: CWriter, cast_type: str = "PyObject *") -> None:
out.start_line()
for var in self.variables:
if not var.peek:
cast = "(PyObject *)" if var.type else ""
cast = f"({cast_type})" if var.type else ""
if var.name not in UNUSED and not var.is_array():
if var.condition:
out.emit(f"if ({var.condition}) ")

View File

@ -0,0 +1,235 @@
"""Generate the cases for the tier 2 redundancy eliminator/abstract interpreter.
Reads the instruction definitions from bytecodes.c. and tier2_redundancy_eliminator.bytecodes.c
Writes the cases to tier2_redundancy_eliminator_cases.c.h, which is #included in Python/optimizer_analysis.c.
"""
import argparse
import os.path
import sys
from analyzer import (
Analysis,
Instruction,
Uop,
Part,
analyze_files,
Skip,
StackItem,
analysis_error,
)
from generators_common import (
DEFAULT_INPUT,
ROOT,
write_header,
emit_tokens,
emit_to,
replace_sync_sp,
)
from cwriter import CWriter
from typing import TextIO, Iterator
from lexer import Token
from stack import StackOffset, Stack, SizeMismatch, UNUSED
DEFAULT_OUTPUT = ROOT / "Python/tier2_redundancy_eliminator_cases.c.h"
DEFAULT_ABSTRACT_INPUT = ROOT / "Python/tier2_redundancy_eliminator_bytecodes.c"
def validate_uop(override: Uop, uop: Uop) -> None:
# To do
pass
def type_name(var: StackItem) -> str:
if var.is_array():
return f"_Py_UOpsSymType **"
if var.type:
return var.type
return f"_Py_UOpsSymType *"
def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None:
variables = {"unused"}
if not skip_inputs:
for var in reversed(uop.stack.inputs):
if var.name not in variables:
variables.add(var.name)
if var.condition:
out.emit(f"{type_name(var)}{var.name} = NULL;\n")
else:
out.emit(f"{type_name(var)}{var.name};\n")
for var in uop.stack.outputs:
if var.peek:
continue
if var.name not in variables:
variables.add(var.name)
if var.condition:
out.emit(f"{type_name(var)}{var.name} = NULL;\n")
else:
out.emit(f"{type_name(var)}{var.name};\n")
def decref_inputs(
out: CWriter,
tkn: Token,
tkn_iter: Iterator[Token],
uop: Uop,
stack: Stack,
inst: Instruction | None,
) -> None:
next(tkn_iter)
next(tkn_iter)
next(tkn_iter)
out.emit_at("", tkn)
def emit_default(out: CWriter, uop: Uop) -> None:
for i, var in enumerate(uop.stack.outputs):
if var.name != "unused" and not var.peek:
if var.is_array():
out.emit(f"for (int _i = {var.size}; --_i >= 0;) {{\n")
out.emit(f"{var.name}[_i] = sym_new_unknown(ctx);\n")
out.emit(f"if ({var.name}[_i] == NULL) goto out_of_space;\n")
out.emit("}\n")
elif var.name == "null":
out.emit(f"{var.name} = sym_new_null(ctx);\n")
out.emit(f"if ({var.name} == NULL) goto out_of_space;\n")
else:
out.emit(f"{var.name} = sym_new_unknown(ctx);\n")
out.emit(f"if ({var.name} == NULL) goto out_of_space;\n")
def write_uop(
override: Uop | None,
uop: Uop,
out: CWriter,
stack: Stack,
debug: bool,
skip_inputs: bool,
) -> None:
try:
prototype = override if override else uop
is_override = override is not None
out.start_line()
for var in reversed(prototype.stack.inputs):
res = stack.pop(var)
if not skip_inputs:
out.emit(res)
if not prototype.properties.stores_sp:
for i, var in enumerate(prototype.stack.outputs):
res = stack.push(var)
if not var.peek or is_override:
out.emit(res)
if debug:
args = []
for var in prototype.stack.inputs:
if not var.peek or is_override:
args.append(var.name)
out.emit(f'DEBUG_PRINTF({", ".join(args)});\n')
if override:
for cache in uop.caches:
if cache.name != "unused":
if cache.size == 4:
type = cast = "PyObject *"
else:
type = f"uint{cache.size*16}_t "
cast = f"uint{cache.size*16}_t"
out.emit(f"{type}{cache.name} = ({cast})this_instr->operand;\n")
if override:
replacement_funcs = {
"DECREF_INPUTS": decref_inputs,
"SYNC_SP": replace_sync_sp,
}
emit_tokens(out, override, stack, None, replacement_funcs)
else:
emit_default(out, uop)
if prototype.properties.stores_sp:
for i, var in enumerate(prototype.stack.outputs):
if not var.peek or is_override:
out.emit(stack.push(var))
out.start_line()
stack.flush(out, cast_type="_Py_UOpsSymType *")
except SizeMismatch as ex:
raise analysis_error(ex.args[0], uop.body[0])
SKIPS = ("_EXTENDED_ARG",)
def generate_abstract_interpreter(
filenames: list[str],
abstract: Analysis,
base: Analysis,
outfile: TextIO,
debug: bool,
) -> None:
write_header(__file__, filenames, outfile)
out = CWriter(outfile, 2, False)
out.emit("\n")
base_uop_names = set([uop.name for uop in base.uops.values()])
for abstract_uop_name in abstract.uops:
assert abstract_uop_name in base_uop_names,\
f"All abstract uops should override base uops, but {abstract_uop_name} is not."
for uop in base.uops.values():
override: Uop | None = None
if uop.name in abstract.uops:
override = abstract.uops[uop.name]
validate_uop(override, uop)
if uop.properties.tier_one_only:
continue
if uop.is_super():
continue
if not uop.is_viable():
out.emit(f"/* {uop.name} is not a viable micro-op for tier 2 */\n\n")
continue
out.emit(f"case {uop.name}: {{\n")
if override:
declare_variables(override, out, skip_inputs=False)
else:
declare_variables(uop, out, skip_inputs=True)
stack = Stack()
write_uop(override, uop, out, stack, debug, skip_inputs=(override is None))
out.start_line()
out.emit("break;\n")
out.emit("}")
out.emit("\n\n")
def generate_tier2_abstract_from_files(
filenames: list[str], outfilename: str, debug: bool=False
) -> None:
assert len(filenames) == 2, "Need a base file and an abstract cases file."
base = analyze_files([filenames[0]])
abstract = analyze_files([filenames[1]])
with open(outfilename, "w") as outfile:
generate_abstract_interpreter(filenames, abstract, base, outfile, debug)
arg_parser = argparse.ArgumentParser(
description="Generate the code for the tier 2 interpreter.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
arg_parser.add_argument(
"-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT
)
arg_parser.add_argument("input", nargs=1, help="Abstract interpreter definition file")
arg_parser.add_argument(
"base", nargs=argparse.REMAINDER, help="The base instruction definition file(s)"
)
arg_parser.add_argument("-d", "--debug", help="Insert debug calls", action="store_true")
if __name__ == "__main__":
args = arg_parser.parse_args()
if len(args.base) == 0:
args.input.append(DEFAULT_INPUT)
args.input.append(DEFAULT_ABSTRACT_INPUT)
abstract = analyze_files(args.input)
base = analyze_files(args.base)
with open(args.output, "w") as outfile:
generate_abstract_interpreter(args.input, abstract, base, outfile, args.debug)