gh-87092: expose the compiler's codegen to python for unit tests (GH-99111)

This commit is contained in:
Irit Katriel 2022-11-14 13:56:40 +00:00 committed by GitHub
parent 06d4e02c3b
commit a3ac9232f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 323 additions and 99 deletions

View File

@ -40,6 +40,13 @@ extern int _PyAST_Optimize(
_PyASTOptimizeState *state);
/* Access compiler internals for unit testing */
PyAPI_FUNC(PyObject*) _PyCompile_CodeGen(
PyObject *ast,
PyObject *filename,
PyCompilerFlags *flags,
int optimize);
PyAPI_FUNC(PyObject*) _PyCompile_OptimizeCfg(
PyObject *instructions,
PyObject *consts);

View File

@ -781,6 +781,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(arguments));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(argv));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(as_integer_ratio));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ast));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(attribute));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(authorizer_callback));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(autocommit));

View File

@ -267,6 +267,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(arguments)
STRUCT_FOR_ID(argv)
STRUCT_FOR_ID(as_integer_ratio)
STRUCT_FOR_ID(ast)
STRUCT_FOR_ID(attribute)
STRUCT_FOR_ID(authorizer_callback)
STRUCT_FOR_ID(autocommit)

View File

@ -773,6 +773,7 @@ extern "C" {
INIT_ID(arguments), \
INIT_ID(argv), \
INIT_ID(as_integer_ratio), \
INIT_ID(ast), \
INIT_ID(attribute), \
INIT_ID(authorizer_callback), \
INIT_ID(autocommit), \

View File

@ -440,6 +440,8 @@ _PyUnicode_InitStaticStrings(void) {
PyUnicode_InternInPlace(&string);
string = &_Py_ID(as_integer_ratio);
PyUnicode_InternInPlace(&string);
string = &_Py_ID(ast);
PyUnicode_InternInPlace(&string);
string = &_Py_ID(attribute);
PyUnicode_InternInPlace(&string);
string = &_Py_ID(authorizer_callback);

View File

@ -3,7 +3,7 @@
import unittest
import dis
import io
from _testinternalcapi import optimize_cfg
from _testinternalcapi import compiler_codegen, optimize_cfg
_UNSPECIFIED = object()
@ -44,8 +44,7 @@ class BytecodeTestCase(unittest.TestCase):
msg = msg % (opname, argval, disassembly)
self.fail(msg)
class CfgOptimizationTestCase(unittest.TestCase):
class CompilationStepTestCase(unittest.TestCase):
HAS_ARG = set(dis.hasarg)
HAS_TARGET = set(dis.hasjrel + dis.hasjabs + dis.hasexc)
@ -58,24 +57,35 @@ class CfgOptimizationTestCase(unittest.TestCase):
self.last_label += 1
return self.last_label
def complete_insts_info(self, insts):
# fill in omitted fields in location, and oparg 0 for ops with no arg.
instructions = []
for item in insts:
if isinstance(item, int):
instructions.append(item)
else:
assert isinstance(item, tuple)
inst = list(reversed(item))
opcode = dis.opmap[inst.pop()]
oparg = inst.pop() if opcode in self.HAS_ARG_OR_TARGET else 0
loc = inst + [-1] * (4 - len(inst))
instructions.append((opcode, oparg, *loc))
return instructions
def assertInstructionsMatch(self, actual_, expected_):
# get two lists where each entry is a label or
# an instruction tuple. Compare them, while mapping
# each actual label to a corresponding expected label
# based on their locations.
self.assertIsInstance(actual_, list)
self.assertIsInstance(expected_, list)
actual = self.normalize_insts(actual_)
expected = self.normalize_insts(expected_)
self.assertEqual(len(actual), len(expected))
# compare instructions
for act, exp in zip(actual, expected):
if isinstance(act, int):
self.assertEqual(exp, act)
continue
self.assertIsInstance(exp, tuple)
self.assertIsInstance(act, tuple)
# crop comparison to the provided expected values
if len(act) > len(exp):
act = act[:len(exp)]
self.assertEqual(exp, act)
def normalize_insts(self, insts):
""" Map labels to instruction index.
Remove labels which are not used as jump targets.
Map opcodes to opnames.
"""
labels_map = {}
targets = set()
@ -107,31 +117,32 @@ class CfgOptimizationTestCase(unittest.TestCase):
res.append((opcode, arg, *loc))
return res
class CodegenTestCase(CompilationStepTestCase):
def generate_code(self, ast):
insts = compiler_codegen(ast, "my_file.py", 0)
return insts
class CfgOptimizationTestCase(CompilationStepTestCase):
def complete_insts_info(self, insts):
# fill in omitted fields in location, and oparg 0 for ops with no arg.
instructions = []
for item in insts:
if isinstance(item, int):
instructions.append(item)
else:
assert isinstance(item, tuple)
inst = list(reversed(item))
opcode = dis.opmap[inst.pop()]
oparg = inst.pop() if opcode in self.HAS_ARG_OR_TARGET else 0
loc = inst + [-1] * (4 - len(inst))
instructions.append((opcode, oparg, *loc))
return instructions
def get_optimized(self, insts, consts):
insts = self.complete_insts_info(insts)
insts = optimize_cfg(insts, consts)
return insts, consts
def compareInstructions(self, actual_, expected_):
# get two lists where each entry is a label or
# an instruction tuple. Compare them, while mapping
# each actual label to a corresponding expected label
# based on their locations.
self.assertIsInstance(actual_, list)
self.assertIsInstance(expected_, list)
actual = self.normalize_insts(actual_)
expected = self.normalize_insts(expected_)
self.assertEqual(len(actual), len(expected))
# compare instructions
for act, exp in zip(actual, expected):
if isinstance(act, int):
self.assertEqual(exp, act)
continue
self.assertIsInstance(exp, tuple)
self.assertIsInstance(act, tuple)
# pad exp with -1's (if location info is incomplete)
exp += (-1,) * (len(act) - len(exp))
self.assertEqual(exp, act)

View File

@ -0,0 +1,50 @@
from test.support.bytecode_helper import CodegenTestCase
# Tests for the code-generation stage of the compiler.
# Examine the un-optimized code generated from the AST.
class IsolatedCodeGenTests(CodegenTestCase):
def codegen_test(self, snippet, expected_insts):
import ast
a = ast.parse(snippet, "my_file.py", "exec");
insts = self.generate_code(a)
self.assertInstructionsMatch(insts, expected_insts)
def test_if_expression(self):
snippet = "42 if True else 24"
false_lbl = self.Label()
expected = [
('RESUME', 0, 0),
('LOAD_CONST', 0, 1),
('POP_JUMP_IF_FALSE', false_lbl := self.Label(), 1),
('LOAD_CONST', 1, 1),
('JUMP', exit_lbl := self.Label()),
false_lbl,
('LOAD_CONST', 2, 1),
exit_lbl,
('POP_TOP', None),
]
self.codegen_test(snippet, expected)
def test_for_loop(self):
snippet = "for x in l:\n\tprint(x)"
false_lbl = self.Label()
expected = [
('RESUME', 0, 0),
('LOAD_NAME', 0, 1),
('GET_ITER', None, 1),
loop_lbl := self.Label(),
('FOR_ITER', exit_lbl := self.Label(), 1),
('STORE_NAME', None, 1),
('PUSH_NULL', None, 2),
('LOAD_NAME', None, 2),
('LOAD_NAME', None, 2),
('CALL', None, 2),
('POP_TOP', None),
('JUMP', loop_lbl),
exit_lbl,
('END_FOR', None),
]
self.codegen_test(snippet, expected)

View File

@ -984,7 +984,7 @@ class DirectiCfgOptimizerTests(CfgOptimizationTestCase):
if expected_consts is None:
expected_consts = consts
opt_insts, opt_consts = self.get_optimized(insts, consts)
self.compareInstructions(opt_insts, expected_insts)
self.assertInstructionsMatch(opt_insts, expected_insts)
self.assertEqual(opt_consts, expected_consts)
def test_conditional_jump_forward_non_const_condition(self):

View File

@ -14,7 +14,7 @@
#include "Python.h"
#include "pycore_atomic_funcs.h" // _Py_atomic_int_get()
#include "pycore_bitutils.h" // _Py_bswap32()
#include "pycore_compile.h" // _PyCompile_OptimizeCfg()
#include "pycore_compile.h" // _PyCompile_CodeGen, _PyCompile_OptimizeCfg
#include "pycore_fileutils.h" // _Py_normpath
#include "pycore_frame.h" // _PyInterpreterFrame
#include "pycore_gc.h" // PyGC_Head
@ -529,6 +529,26 @@ set_eval_frame_record(PyObject *self, PyObject *list)
Py_RETURN_NONE;
}
/*[clinic input]
_testinternalcapi.compiler_codegen -> object
ast: object
filename: object
optimize: int
Apply compiler code generation to an AST.
[clinic start generated code]*/
static PyObject *
_testinternalcapi_compiler_codegen_impl(PyObject *module, PyObject *ast,
PyObject *filename, int optimize)
/*[clinic end generated code: output=fbbbbfb34700c804 input=e9fbe6562f7f75e4]*/
{
PyCompilerFlags *flags = NULL;
return _PyCompile_CodeGen(ast, filename, flags, optimize);
}
/*[clinic input]
@ -612,6 +632,7 @@ static PyMethodDef TestMethods[] = {
{"DecodeLocaleEx", decode_locale_ex, METH_VARARGS},
{"set_eval_frame_default", set_eval_frame_default, METH_NOARGS, NULL},
{"set_eval_frame_record", set_eval_frame_record, METH_O, NULL},
_TESTINTERNALCAPI_COMPILER_CODEGEN_METHODDEF
_TESTINTERNALCAPI_OPTIMIZE_CFG_METHODDEF
{"get_interp_settings", get_interp_settings, METH_VARARGS, NULL},
{NULL, NULL} /* sentinel */

View File

@ -8,6 +8,69 @@ preserve
#endif
PyDoc_STRVAR(_testinternalcapi_compiler_codegen__doc__,
"compiler_codegen($module, /, ast, filename, optimize)\n"
"--\n"
"\n"
"Apply compiler code generation to an AST.");
#define _TESTINTERNALCAPI_COMPILER_CODEGEN_METHODDEF \
{"compiler_codegen", _PyCFunction_CAST(_testinternalcapi_compiler_codegen), METH_FASTCALL|METH_KEYWORDS, _testinternalcapi_compiler_codegen__doc__},
static PyObject *
_testinternalcapi_compiler_codegen_impl(PyObject *module, PyObject *ast,
PyObject *filename, int optimize);
static PyObject *
_testinternalcapi_compiler_codegen(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
#define NUM_KEYWORDS 3
static struct {
PyGC_Head _this_is_not_used;
PyObject_VAR_HEAD
PyObject *ob_item[NUM_KEYWORDS];
} _kwtuple = {
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
.ob_item = { &_Py_ID(ast), &_Py_ID(filename), &_Py_ID(optimize), },
};
#undef NUM_KEYWORDS
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
#else // !Py_BUILD_CORE
# define KWTUPLE NULL
#endif // !Py_BUILD_CORE
static const char * const _keywords[] = {"ast", "filename", "optimize", NULL};
static _PyArg_Parser _parser = {
.keywords = _keywords,
.fname = "compiler_codegen",
.kwtuple = KWTUPLE,
};
#undef KWTUPLE
PyObject *argsbuf[3];
PyObject *ast;
PyObject *filename;
int optimize;
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 3, 3, 0, argsbuf);
if (!args) {
goto exit;
}
ast = args[0];
filename = args[1];
optimize = _PyLong_AsInt(args[2]);
if (optimize == -1 && PyErr_Occurred()) {
goto exit;
}
return_value = _testinternalcapi_compiler_codegen_impl(module, ast, filename, optimize);
exit:
return return_value;
}
PyDoc_STRVAR(_testinternalcapi_optimize_cfg__doc__,
"optimize_cfg($module, /, instructions, consts)\n"
"--\n"
@ -65,4 +128,4 @@ _testinternalcapi_optimize_cfg(PyObject *module, PyObject *const *args, Py_ssize
exit:
return return_value;
}
/*[clinic end generated code: output=3b1fd713290f68a9 input=a9049054013a1b77]*/
/*[clinic end generated code: output=efe95836482fd542 input=a9049054013a1b77]*/

View File

@ -122,7 +122,7 @@
(opcode) == STORE_FAST__STORE_FAST)
#define IS_TOP_LEVEL_AWAIT(c) ( \
(c->c_flags->cf_flags & PyCF_ALLOW_TOP_LEVEL_AWAIT) \
(c->c_flags.cf_flags & PyCF_ALLOW_TOP_LEVEL_AWAIT) \
&& (c->u->u_ste->ste_type == ModuleBlock))
typedef _PyCompilerSrcLocation location;
@ -418,7 +418,7 @@ struct compiler {
PyObject *c_filename;
struct symtable *c_st;
PyFutureFeatures c_future; /* module's __future__ */
PyCompilerFlags *c_flags;
PyCompilerFlags c_flags;
int c_optimize; /* optimization level */
int c_interactive; /* true if in interactive mode */
@ -583,11 +583,11 @@ _Py_Mangle(PyObject *privateobj, PyObject *ident)
return result;
}
static int
compiler_init(struct compiler *c)
{
memset(c, 0, sizeof(struct compiler));
static int
compiler_setup(struct compiler *c, mod_ty mod, PyObject *filename,
PyCompilerFlags flags, int optimize, PyArena *arena)
{
c->c_const_cache = PyDict_New();
if (!c->c_const_cache) {
return 0;
@ -595,57 +595,65 @@ compiler_init(struct compiler *c)
c->c_stack = PyList_New(0);
if (!c->c_stack) {
Py_CLEAR(c->c_const_cache);
return 0;
}
return 1;
}
PyCodeObject *
_PyAST_Compile(mod_ty mod, PyObject *filename, PyCompilerFlags *flags,
int optimize, PyArena *arena)
{
struct compiler c;
PyCodeObject *co = NULL;
PyCompilerFlags local_flags = _PyCompilerFlags_INIT;
int merged;
if (!compiler_init(&c))
return NULL;
c.c_filename = Py_NewRef(filename);
c.c_arena = arena;
if (!_PyFuture_FromAST(mod, filename, &c.c_future)) {
goto finally;
c->c_filename = Py_NewRef(filename);
c->c_arena = arena;
if (!_PyFuture_FromAST(mod, filename, &c->c_future)) {
return 0;
}
if (!flags) {
flags = &local_flags;
}
merged = c.c_future.ff_features | flags->cf_flags;
c.c_future.ff_features = merged;
flags->cf_flags = merged;
c.c_flags = flags;
c.c_optimize = (optimize == -1) ? _Py_GetConfig()->optimization_level : optimize;
c.c_nestlevel = 0;
int merged = c->c_future.ff_features | flags.cf_flags;
c->c_future.ff_features = merged;
flags.cf_flags = merged;
c->c_flags = flags;
c->c_optimize = (optimize == -1) ? _Py_GetConfig()->optimization_level : optimize;
c->c_nestlevel = 0;
_PyASTOptimizeState state;
state.optimize = c.c_optimize;
state.optimize = c->c_optimize;
state.ff_features = merged;
if (!_PyAST_Optimize(mod, arena, &state)) {
goto finally;
return 0;
}
c.c_st = _PySymtable_Build(mod, filename, &c.c_future);
if (c.c_st == NULL) {
if (!PyErr_Occurred())
c->c_st = _PySymtable_Build(mod, filename, &c->c_future);
if (c->c_st == NULL) {
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_SystemError, "no symtable");
goto finally;
}
return 0;
}
return 1;
}
static struct compiler*
new_compiler(mod_ty mod, PyObject *filename, PyCompilerFlags *pflags,
int optimize, PyArena *arena)
{
PyCompilerFlags flags = pflags ? *pflags : _PyCompilerFlags_INIT;
struct compiler *c = PyMem_Calloc(1, sizeof(struct compiler));
if (c == NULL) {
return NULL;
}
if (!compiler_setup(c, mod, filename, flags, optimize, arena)) {
compiler_free(c);
return NULL;
}
return c;
}
PyCodeObject *
_PyAST_Compile(mod_ty mod, PyObject *filename, PyCompilerFlags *pflags,
int optimize, PyArena *arena)
{
struct compiler *c = new_compiler(mod, filename, pflags, optimize, arena);
if (c == NULL) {
return NULL;
}
co = compiler_mod(&c, mod);
finally:
compiler_free(&c);
PyCodeObject *co = compiler_mod(c, mod);
compiler_free(c);
assert(co || PyErr_Occurred());
return co;
}
@ -656,8 +664,9 @@ compiler_free(struct compiler *c)
if (c->c_st)
_PySymtable_Free(c->c_st);
Py_XDECREF(c->c_filename);
Py_DECREF(c->c_const_cache);
Py_DECREF(c->c_stack);
Py_XDECREF(c->c_const_cache);
Py_XDECREF(c->c_stack);
PyMem_Free(c);
}
static PyObject *
@ -2136,15 +2145,13 @@ compiler_body(struct compiler *c, location loc, asdl_stmt_seq *stmts)
return 1;
}
static PyCodeObject *
compiler_mod(struct compiler *c, mod_ty mod)
static int
compiler_codegen(struct compiler *c, mod_ty mod)
{
PyCodeObject *co;
int addNone = 1;
_Py_DECLARE_STR(anon_module, "<module>");
if (!compiler_enter_scope(c, &_Py_STR(anon_module), COMPILER_SCOPE_MODULE,
mod, 1)) {
return NULL;
return 0;
}
location loc = LOCATION(1, 1, 0, 0);
switch (mod->kind) {
@ -2163,7 +2170,6 @@ compiler_mod(struct compiler *c, mod_ty mod)
break;
case Expression_kind:
VISIT_IN_SCOPE(c, expr, mod->v.Expression.body);
addNone = 0;
break;
default:
PyErr_Format(PyExc_SystemError,
@ -2171,7 +2177,17 @@ compiler_mod(struct compiler *c, mod_ty mod)
mod->kind);
return 0;
}
co = assemble(c, addNone);
return 1;
}
static PyCodeObject *
compiler_mod(struct compiler *c, mod_ty mod)
{
int addNone = mod->kind != Expression_kind;
if (!compiler_codegen(c, mod)) {
return NULL;
}
PyCodeObject *co = assemble(c, addNone);
compiler_exit_scope(c);
return co;
}
@ -8229,7 +8245,7 @@ compute_code_flags(struct compiler *c)
}
/* (Only) inherit compilerflags in PyCF_MASK */
flags |= (c->c_flags->cf_flags & PyCF_MASK);
flags |= (c->c_flags.cf_flags & PyCF_MASK);
if ((IS_TOP_LEVEL_AWAIT(c)) &&
ste->ste_coroutine &&
@ -9859,6 +9875,9 @@ duplicate_exits_without_lineno(cfg_builder *g)
/* Access to compiler optimizations for unit tests.
*
* _PyCompile_CodeGen takes and AST, applies code-gen and
* returns the unoptimized CFG as an instruction list.
*
* _PyCompile_OptimizeCfg takes an instruction list, constructs
* a CFG, optimizes it and converts back to an instruction list.
@ -9954,7 +9973,9 @@ cfg_to_instructions(cfg_builder *g)
for (int i = 0; i < b->b_iused; i++) {
struct instr *instr = &b->b_instr[i];
location loc = instr->i_loc;
int arg = HAS_TARGET(instr->i_opcode) ? instr->i_target->b_label : instr->i_oparg;
int arg = HAS_TARGET(instr->i_opcode) ?
instr->i_target->b_label : instr->i_oparg;
PyObject *inst_tuple = Py_BuildValue(
"(iiiiii)", instr->i_opcode, arg,
loc.lineno, loc.end_lineno,
@ -9977,6 +9998,52 @@ error:
return NULL;
}
PyObject *
_PyCompile_CodeGen(PyObject *ast, PyObject *filename, PyCompilerFlags *pflags,
int optimize)
{
PyObject *res = NULL;
if (!PyAST_Check(ast)) {
PyErr_SetString(PyExc_TypeError, "expected an AST");
return NULL;
}
PyArena *arena = _PyArena_New();
if (arena == NULL) {
return NULL;
}
mod_ty mod = PyAST_obj2mod(ast, arena, 0 /* exec */);
if (mod == NULL || !_PyAST_Validate(mod)) {
_PyArena_Free(arena);
return NULL;
}
struct compiler *c = new_compiler(mod, filename, pflags, optimize, arena);
if (c == NULL) {
_PyArena_Free(arena);
return NULL;
}
if (!compiler_codegen(c, mod)) {
goto finally;
}
cfg_builder *g = CFG_BUILDER(c);
if (translate_jump_labels_to_targets(g->g_entryblock) < 0) {
goto finally;
}
res = cfg_to_instructions(g);
finally:
compiler_exit_scope(c);
compiler_free(c);
_PyArena_Free(arena);
return res;
}
PyObject *
_PyCompile_OptimizeCfg(PyObject *instructions, PyObject *consts)