From e5fbe0cbd4be99ced5f000ad382208ad2a561c90 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 15 Sep 2020 18:03:34 +0200 Subject: [PATCH] bpo-41631: _ast module uses again a global state (#21961) Partially revert commit ac46eb4ad6662cf6d771b20d8963658b2186c48c: "bpo-38113: Update the Python-ast.c generator to PEP384 (gh-15957)". Using a module state per module instance is causing subtle practical problems. For example, the Mercurial project replaces the __import__() function to implement lazy import, whereas Python expected that "import _ast" always return a fully initialized _ast module. Add _PyAST_Fini() to clear the state at exit. The _ast module has no state (set _astmodule.m_size to 0). Remove astmodule_traverse(), astmodule_clear() and astmodule_free() functions. --- Include/internal/pycore_pylifecycle.h | 1 + Lib/ast.py | 37 ++- Lib/test/test_ast.py | 84 ++++++ .../2020-08-26-11-23-31.bpo-41631.3jZcd9.rst | 5 + Parser/asdl_c.py | 65 ++--- Python/Python-ast.c | 275 ++---------------- Python/pylifecycle.c | 6 + 7 files changed, 162 insertions(+), 311 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2020-08-26-11-23-31.bpo-41631.3jZcd9.rst diff --git a/Include/internal/pycore_pylifecycle.h b/Include/internal/pycore_pylifecycle.h index 22def3dbc8b..6d84e37232b 100644 --- a/Include/internal/pycore_pylifecycle.h +++ b/Include/internal/pycore_pylifecycle.h @@ -84,6 +84,7 @@ extern void _PyFaulthandler_Fini(void); extern void _PyHash_Fini(void); extern void _PyTraceMalloc_Fini(void); extern void _PyWarnings_Fini(PyInterpreterState *interp); +extern void _PyAST_Fini(PyThreadState *tstate); extern PyStatus _PyGILState_Init(PyThreadState *tstate); extern void _PyGILState_Fini(PyThreadState *tstate); diff --git a/Lib/ast.py b/Lib/ast.py index 65ebd0100de..d860917f4d0 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -497,18 +497,20 @@ class NodeTransformer(NodeVisitor): return node -# The following code is for backward compatibility. -# It will be removed in future. +# If the ast module is loaded more than once, only add deprecated methods once +if not hasattr(Constant, 'n'): + # The following code is for backward compatibility. + # It will be removed in future. -def _getter(self): - """Deprecated. Use value instead.""" - return self.value + def _getter(self): + """Deprecated. Use value instead.""" + return self.value -def _setter(self, value): - self.value = value + def _setter(self, value): + self.value = value -Constant.n = property(_getter, _setter) -Constant.s = property(_getter, _setter) + Constant.n = property(_getter, _setter) + Constant.s = property(_getter, _setter) class _ABC(type): @@ -600,14 +602,19 @@ class ExtSlice(slice): def __new__(cls, dims=(), **kwargs): return Tuple(list(dims), Load(), **kwargs) -def _dims_getter(self): - """Deprecated. Use elts instead.""" - return self.elts +# If the ast module is loaded more than once, only add deprecated methods once +if not hasattr(Tuple, 'dims'): + # The following code is for backward compatibility. + # It will be removed in future. -def _dims_setter(self, value): - self.elts = value + def _dims_getter(self): + """Deprecated. Use elts instead.""" + return self.elts -Tuple.dims = property(_dims_getter, _dims_setter) + def _dims_setter(self, value): + self.elts = value + + Tuple.dims = property(_dims_getter, _dims_setter) class Suite(mod): """Deprecated AST node class. Unused in Python 3.""" diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py index f5aef61ec6f..5f57ce87244 100644 --- a/Lib/test/test_ast.py +++ b/Lib/test/test_ast.py @@ -1,7 +1,9 @@ import ast +import builtins import dis import os import sys +import types import unittest import warnings import weakref @@ -1945,6 +1947,88 @@ class NodeVisitorTests(unittest.TestCase): ]) +@support.cpython_only +class ModuleStateTests(unittest.TestCase): + # bpo-41194, bpo-41261, bpo-41631: The _ast module uses a global state. + + def check_ast_module(self): + # Check that the _ast module still works as expected + code = 'x + 1' + filename = '' + mode = 'eval' + + # Create _ast.AST subclasses instances + ast_tree = compile(code, filename, mode, flags=ast.PyCF_ONLY_AST) + + # Call PyAST_Check() + code = compile(ast_tree, filename, mode) + self.assertIsInstance(code, types.CodeType) + + def test_reload_module(self): + # bpo-41194: Importing the _ast module twice must not crash. + with support.swap_item(sys.modules, '_ast', None): + del sys.modules['_ast'] + import _ast as ast1 + + del sys.modules['_ast'] + import _ast as ast2 + + self.check_ast_module() + + # Unloading the two _ast module instances must not crash. + del ast1 + del ast2 + support.gc_collect() + + self.check_ast_module() + + def test_sys_modules(self): + # bpo-41631: Test reproducing a Mercurial crash when PyAST_Check() + # imported the _ast module internally. + lazy_mod = object() + + def my_import(name, *args, **kw): + sys.modules[name] = lazy_mod + return lazy_mod + + with support.swap_item(sys.modules, '_ast', None): + del sys.modules['_ast'] + + with support.swap_attr(builtins, '__import__', my_import): + # Test that compile() does not import the _ast module + self.check_ast_module() + self.assertNotIn('_ast', sys.modules) + + # Sanity check of the test itself + import _ast + self.assertIs(_ast, lazy_mod) + + def test_subinterpreter(self): + # bpo-41631: Importing and using the _ast module in a subinterpreter + # must not crash. + code = dedent(''' + import _ast + import ast + import gc + import sys + import types + + # Create _ast.AST subclasses instances and call PyAST_Check() + ast_tree = compile('x+1', '', 'eval', + flags=ast.PyCF_ONLY_AST) + code = compile(ast_tree, 'string', 'eval') + if not isinstance(code, types.CodeType): + raise AssertionError + + # Unloading the _ast module must not crash. + del ast, _ast + del sys.modules['ast'], sys.modules['_ast'] + gc.collect() + ''') + res = support.run_in_subinterp(code) + self.assertEqual(res, 0) + + def main(): if __name__ != '__main__': return diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-08-26-11-23-31.bpo-41631.3jZcd9.rst b/Misc/NEWS.d/next/Core and Builtins/2020-08-26-11-23-31.bpo-41631.3jZcd9.rst new file mode 100644 index 00000000000..68bb51024d9 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-08-26-11-23-31.bpo-41631.3jZcd9.rst @@ -0,0 +1,5 @@ +The ``_ast`` module uses again a global state. Using a module state per module +instance is causing subtle practical problems. For example, the Mercurial +project replaces the ``__import__()`` function to implement lazy import, +whereas Python expected that ``import _ast`` always return a fully initialized +``_ast`` module. diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index 6fe44b99f79..0c053393d68 100755 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -1089,11 +1089,9 @@ static PyModuleDef_Slot astmodule_slots[] = { static struct PyModuleDef _astmodule = { PyModuleDef_HEAD_INIT, .m_name = "_ast", - .m_size = sizeof(astmodulestate), + // The _ast module uses a global state (global_ast_state). + .m_size = 0, .m_slots = astmodule_slots, - .m_traverse = astmodule_traverse, - .m_clear = astmodule_clear, - .m_free = astmodule_free, }; PyMODINIT_FUNC @@ -1374,59 +1372,40 @@ def generate_module_def(f, mod): f.write(' PyObject *' + s + ';\n') f.write('} astmodulestate;\n\n') f.write(""" -static astmodulestate* -get_ast_state(PyObject *module) -{ - void *state = PyModule_GetState(module); - assert(state != NULL); - return (astmodulestate*)state; -} +// Forward declaration +static int init_types(astmodulestate *state); + +// bpo-41194, bpo-41261, bpo-41631: The _ast module uses a global state. +static astmodulestate global_ast_state = {0}; static astmodulestate* get_global_ast_state(void) { - _Py_IDENTIFIER(_ast); - PyObject *name = _PyUnicode_FromId(&PyId__ast); // borrowed reference - if (name == NULL) { + astmodulestate* state = &global_ast_state; + if (!init_types(state)) { return NULL; } - PyObject *module = PyImport_GetModule(name); - if (module == NULL) { - if (PyErr_Occurred()) { - return NULL; - } - module = PyImport_Import(name); - if (module == NULL) { - return NULL; - } - } - astmodulestate *state = get_ast_state(module); - Py_DECREF(module); return state; } -static int astmodule_clear(PyObject *module) +static astmodulestate* +get_ast_state(PyObject* Py_UNUSED(module)) { - astmodulestate *state = get_ast_state(module); + astmodulestate* state = get_global_ast_state(); + // get_ast_state() must only be called after _ast module is imported, + // and astmodule_exec() calls init_types() + assert(state != NULL); + return state; +} + +void _PyAST_Fini(PyThreadState *tstate) +{ + astmodulestate* state = &global_ast_state; """) for s in module_state: f.write(" Py_CLEAR(state->" + s + ');\n') f.write(""" - return 0; -} - -static int astmodule_traverse(PyObject *module, visitproc visit, void* arg) -{ - astmodulestate *state = get_ast_state(module); -""") - for s in module_state: - f.write(" Py_VISIT(state->" + s + ');\n') - f.write(""" - return 0; -} - -static void astmodule_free(void* module) { - astmodule_clear((PyObject*)module); + state->initialized = 0; } """) diff --git a/Python/Python-ast.c b/Python/Python-ast.c index 396a6832702..094010e6c9d 100644 --- a/Python/Python-ast.c +++ b/Python/Python-ast.c @@ -224,40 +224,35 @@ typedef struct { } astmodulestate; -static astmodulestate* -get_ast_state(PyObject *module) -{ - void *state = PyModule_GetState(module); - assert(state != NULL); - return (astmodulestate*)state; -} +// Forward declaration +static int init_types(astmodulestate *state); + +// bpo-41194, bpo-41261, bpo-41631: The _ast module uses a global state. +static astmodulestate global_ast_state = {0}; static astmodulestate* get_global_ast_state(void) { - _Py_IDENTIFIER(_ast); - PyObject *name = _PyUnicode_FromId(&PyId__ast); // borrowed reference - if (name == NULL) { + astmodulestate* state = &global_ast_state; + if (!init_types(state)) { return NULL; } - PyObject *module = PyImport_GetModule(name); - if (module == NULL) { - if (PyErr_Occurred()) { - return NULL; - } - module = PyImport_Import(name); - if (module == NULL) { - return NULL; - } - } - astmodulestate *state = get_ast_state(module); - Py_DECREF(module); return state; } -static int astmodule_clear(PyObject *module) +static astmodulestate* +get_ast_state(PyObject* Py_UNUSED(module)) { - astmodulestate *state = get_ast_state(module); + astmodulestate* state = get_global_ast_state(); + // get_ast_state() must only be called after _ast module is imported, + // and astmodule_exec() calls init_types() + assert(state != NULL); + return state; +} + +void _PyAST_Fini(PyThreadState *tstate) +{ + astmodulestate* state = &global_ast_state; Py_CLEAR(state->AST_type); Py_CLEAR(state->Add_singleton); Py_CLEAR(state->Add_type); @@ -472,231 +467,7 @@ static int astmodule_clear(PyObject *module) Py_CLEAR(state->vararg); Py_CLEAR(state->withitem_type); - return 0; -} - -static int astmodule_traverse(PyObject *module, visitproc visit, void* arg) -{ - astmodulestate *state = get_ast_state(module); - Py_VISIT(state->AST_type); - Py_VISIT(state->Add_singleton); - Py_VISIT(state->Add_type); - Py_VISIT(state->And_singleton); - Py_VISIT(state->And_type); - Py_VISIT(state->AnnAssign_type); - Py_VISIT(state->Assert_type); - Py_VISIT(state->Assign_type); - Py_VISIT(state->AsyncFor_type); - Py_VISIT(state->AsyncFunctionDef_type); - Py_VISIT(state->AsyncWith_type); - Py_VISIT(state->Attribute_type); - Py_VISIT(state->AugAssign_type); - Py_VISIT(state->Await_type); - Py_VISIT(state->BinOp_type); - Py_VISIT(state->BitAnd_singleton); - Py_VISIT(state->BitAnd_type); - Py_VISIT(state->BitOr_singleton); - Py_VISIT(state->BitOr_type); - Py_VISIT(state->BitXor_singleton); - Py_VISIT(state->BitXor_type); - Py_VISIT(state->BoolOp_type); - Py_VISIT(state->Break_type); - Py_VISIT(state->Call_type); - Py_VISIT(state->ClassDef_type); - Py_VISIT(state->Compare_type); - Py_VISIT(state->Constant_type); - Py_VISIT(state->Continue_type); - Py_VISIT(state->Del_singleton); - Py_VISIT(state->Del_type); - Py_VISIT(state->Delete_type); - Py_VISIT(state->DictComp_type); - Py_VISIT(state->Dict_type); - Py_VISIT(state->Div_singleton); - Py_VISIT(state->Div_type); - Py_VISIT(state->Eq_singleton); - Py_VISIT(state->Eq_type); - Py_VISIT(state->ExceptHandler_type); - Py_VISIT(state->Expr_type); - Py_VISIT(state->Expression_type); - Py_VISIT(state->FloorDiv_singleton); - Py_VISIT(state->FloorDiv_type); - Py_VISIT(state->For_type); - Py_VISIT(state->FormattedValue_type); - Py_VISIT(state->FunctionDef_type); - Py_VISIT(state->FunctionType_type); - Py_VISIT(state->GeneratorExp_type); - Py_VISIT(state->Global_type); - Py_VISIT(state->GtE_singleton); - Py_VISIT(state->GtE_type); - Py_VISIT(state->Gt_singleton); - Py_VISIT(state->Gt_type); - Py_VISIT(state->IfExp_type); - Py_VISIT(state->If_type); - Py_VISIT(state->ImportFrom_type); - Py_VISIT(state->Import_type); - Py_VISIT(state->In_singleton); - Py_VISIT(state->In_type); - Py_VISIT(state->Interactive_type); - Py_VISIT(state->Invert_singleton); - Py_VISIT(state->Invert_type); - Py_VISIT(state->IsNot_singleton); - Py_VISIT(state->IsNot_type); - Py_VISIT(state->Is_singleton); - Py_VISIT(state->Is_type); - Py_VISIT(state->JoinedStr_type); - Py_VISIT(state->LShift_singleton); - Py_VISIT(state->LShift_type); - Py_VISIT(state->Lambda_type); - Py_VISIT(state->ListComp_type); - Py_VISIT(state->List_type); - Py_VISIT(state->Load_singleton); - Py_VISIT(state->Load_type); - Py_VISIT(state->LtE_singleton); - Py_VISIT(state->LtE_type); - Py_VISIT(state->Lt_singleton); - Py_VISIT(state->Lt_type); - Py_VISIT(state->MatMult_singleton); - Py_VISIT(state->MatMult_type); - Py_VISIT(state->Mod_singleton); - Py_VISIT(state->Mod_type); - Py_VISIT(state->Module_type); - Py_VISIT(state->Mult_singleton); - Py_VISIT(state->Mult_type); - Py_VISIT(state->Name_type); - Py_VISIT(state->NamedExpr_type); - Py_VISIT(state->Nonlocal_type); - Py_VISIT(state->NotEq_singleton); - Py_VISIT(state->NotEq_type); - Py_VISIT(state->NotIn_singleton); - Py_VISIT(state->NotIn_type); - Py_VISIT(state->Not_singleton); - Py_VISIT(state->Not_type); - Py_VISIT(state->Or_singleton); - Py_VISIT(state->Or_type); - Py_VISIT(state->Pass_type); - Py_VISIT(state->Pow_singleton); - Py_VISIT(state->Pow_type); - Py_VISIT(state->RShift_singleton); - Py_VISIT(state->RShift_type); - Py_VISIT(state->Raise_type); - Py_VISIT(state->Return_type); - Py_VISIT(state->SetComp_type); - Py_VISIT(state->Set_type); - Py_VISIT(state->Slice_type); - Py_VISIT(state->Starred_type); - Py_VISIT(state->Store_singleton); - Py_VISIT(state->Store_type); - Py_VISIT(state->Sub_singleton); - Py_VISIT(state->Sub_type); - Py_VISIT(state->Subscript_type); - Py_VISIT(state->Try_type); - Py_VISIT(state->Tuple_type); - Py_VISIT(state->TypeIgnore_type); - Py_VISIT(state->UAdd_singleton); - Py_VISIT(state->UAdd_type); - Py_VISIT(state->USub_singleton); - Py_VISIT(state->USub_type); - Py_VISIT(state->UnaryOp_type); - Py_VISIT(state->While_type); - Py_VISIT(state->With_type); - Py_VISIT(state->YieldFrom_type); - Py_VISIT(state->Yield_type); - Py_VISIT(state->__dict__); - Py_VISIT(state->__doc__); - Py_VISIT(state->__module__); - Py_VISIT(state->_attributes); - Py_VISIT(state->_fields); - Py_VISIT(state->alias_type); - Py_VISIT(state->annotation); - Py_VISIT(state->arg); - Py_VISIT(state->arg_type); - Py_VISIT(state->args); - Py_VISIT(state->argtypes); - Py_VISIT(state->arguments_type); - Py_VISIT(state->asname); - Py_VISIT(state->ast); - Py_VISIT(state->attr); - Py_VISIT(state->bases); - Py_VISIT(state->body); - Py_VISIT(state->boolop_type); - Py_VISIT(state->cause); - Py_VISIT(state->cmpop_type); - Py_VISIT(state->col_offset); - Py_VISIT(state->comparators); - Py_VISIT(state->comprehension_type); - Py_VISIT(state->context_expr); - Py_VISIT(state->conversion); - Py_VISIT(state->ctx); - Py_VISIT(state->decorator_list); - Py_VISIT(state->defaults); - Py_VISIT(state->elt); - Py_VISIT(state->elts); - Py_VISIT(state->end_col_offset); - Py_VISIT(state->end_lineno); - Py_VISIT(state->exc); - Py_VISIT(state->excepthandler_type); - Py_VISIT(state->expr_context_type); - Py_VISIT(state->expr_type); - Py_VISIT(state->finalbody); - Py_VISIT(state->format_spec); - Py_VISIT(state->func); - Py_VISIT(state->generators); - Py_VISIT(state->handlers); - Py_VISIT(state->id); - Py_VISIT(state->ifs); - Py_VISIT(state->is_async); - Py_VISIT(state->items); - Py_VISIT(state->iter); - Py_VISIT(state->key); - Py_VISIT(state->keys); - Py_VISIT(state->keyword_type); - Py_VISIT(state->keywords); - Py_VISIT(state->kind); - Py_VISIT(state->kw_defaults); - Py_VISIT(state->kwarg); - Py_VISIT(state->kwonlyargs); - Py_VISIT(state->left); - Py_VISIT(state->level); - Py_VISIT(state->lineno); - Py_VISIT(state->lower); - Py_VISIT(state->mod_type); - Py_VISIT(state->module); - Py_VISIT(state->msg); - Py_VISIT(state->name); - Py_VISIT(state->names); - Py_VISIT(state->op); - Py_VISIT(state->operand); - Py_VISIT(state->operator_type); - Py_VISIT(state->ops); - Py_VISIT(state->optional_vars); - Py_VISIT(state->orelse); - Py_VISIT(state->posonlyargs); - Py_VISIT(state->returns); - Py_VISIT(state->right); - Py_VISIT(state->simple); - Py_VISIT(state->slice); - Py_VISIT(state->step); - Py_VISIT(state->stmt_type); - Py_VISIT(state->tag); - Py_VISIT(state->target); - Py_VISIT(state->targets); - Py_VISIT(state->test); - Py_VISIT(state->type); - Py_VISIT(state->type_comment); - Py_VISIT(state->type_ignore_type); - Py_VISIT(state->type_ignores); - Py_VISIT(state->unaryop_type); - Py_VISIT(state->upper); - Py_VISIT(state->value); - Py_VISIT(state->values); - Py_VISIT(state->vararg); - Py_VISIT(state->withitem_type); - - return 0; -} - -static void astmodule_free(void* module) { - astmodule_clear((PyObject*)module); + state->initialized = 0; } static int init_identifiers(astmodulestate *state) @@ -10316,11 +10087,9 @@ static PyModuleDef_Slot astmodule_slots[] = { static struct PyModuleDef _astmodule = { PyModuleDef_HEAD_INIT, .m_name = "_ast", - .m_size = sizeof(astmodulestate), + // The _ast module uses a global state (global_ast_state). + .m_size = 0, .m_slots = astmodule_slots, - .m_traverse = astmodule_traverse, - .m_clear = astmodule_clear, - .m_free = astmodule_free, }; PyMODINIT_FUNC diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index ab5a6767864..75d57805c07 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1259,6 +1259,12 @@ flush_std_files(void) static void finalize_interp_types(PyThreadState *tstate) { + // The _ast module state is shared by all interpreters. + // The state must only be cleared by the main interpreter. + if (_Py_IsMainInterpreter(tstate)) { + _PyAST_Fini(tstate); + } + _PyExc_Fini(tstate); _PyFrame_Fini(tstate); _PyAsyncGen_Fini(tstate);