bpo-41631: _ast module uses again a global state (#21961)

Partially revert commit ac46eb4ad6662cf6d771b20d8963658b2186c48c:
"bpo-38113: Update the Python-ast.c generator to PEP384 (gh-15957)".

Using a module state per module instance is causing subtle practical
problems.

For example, the Mercurial project replaces the __import__() function
to implement lazy import, whereas Python expected that "import _ast"
always return a fully initialized _ast module.

Add _PyAST_Fini() to clear the state at exit.

The _ast module has no state (set _astmodule.m_size to 0). Remove
astmodule_traverse(), astmodule_clear() and astmodule_free()
functions.
This commit is contained in:
Victor Stinner 2020-09-15 18:03:34 +02:00 committed by GitHub
parent 7bcc6456ad
commit e5fbe0cbd4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 162 additions and 311 deletions

View File

@ -84,6 +84,7 @@ extern void _PyFaulthandler_Fini(void);
extern void _PyHash_Fini(void); extern void _PyHash_Fini(void);
extern void _PyTraceMalloc_Fini(void); extern void _PyTraceMalloc_Fini(void);
extern void _PyWarnings_Fini(PyInterpreterState *interp); extern void _PyWarnings_Fini(PyInterpreterState *interp);
extern void _PyAST_Fini(PyThreadState *tstate);
extern PyStatus _PyGILState_Init(PyThreadState *tstate); extern PyStatus _PyGILState_Init(PyThreadState *tstate);
extern void _PyGILState_Fini(PyThreadState *tstate); extern void _PyGILState_Fini(PyThreadState *tstate);

View File

@ -497,18 +497,20 @@ class NodeTransformer(NodeVisitor):
return node return node
# The following code is for backward compatibility. # If the ast module is loaded more than once, only add deprecated methods once
# It will be removed in future. if not hasattr(Constant, 'n'):
# The following code is for backward compatibility.
# It will be removed in future.
def _getter(self): def _getter(self):
"""Deprecated. Use value instead.""" """Deprecated. Use value instead."""
return self.value return self.value
def _setter(self, value): def _setter(self, value):
self.value = value self.value = value
Constant.n = property(_getter, _setter) Constant.n = property(_getter, _setter)
Constant.s = property(_getter, _setter) Constant.s = property(_getter, _setter)
class _ABC(type): class _ABC(type):
@ -600,14 +602,19 @@ class ExtSlice(slice):
def __new__(cls, dims=(), **kwargs): def __new__(cls, dims=(), **kwargs):
return Tuple(list(dims), Load(), **kwargs) return Tuple(list(dims), Load(), **kwargs)
def _dims_getter(self): # If the ast module is loaded more than once, only add deprecated methods once
if not hasattr(Tuple, 'dims'):
# The following code is for backward compatibility.
# It will be removed in future.
def _dims_getter(self):
"""Deprecated. Use elts instead.""" """Deprecated. Use elts instead."""
return self.elts return self.elts
def _dims_setter(self, value): def _dims_setter(self, value):
self.elts = value self.elts = value
Tuple.dims = property(_dims_getter, _dims_setter) Tuple.dims = property(_dims_getter, _dims_setter)
class Suite(mod): class Suite(mod):
"""Deprecated AST node class. Unused in Python 3.""" """Deprecated AST node class. Unused in Python 3."""

View File

@ -1,7 +1,9 @@
import ast import ast
import builtins
import dis import dis
import os import os
import sys import sys
import types
import unittest import unittest
import warnings import warnings
import weakref import weakref
@ -1945,6 +1947,88 @@ class NodeVisitorTests(unittest.TestCase):
]) ])
@support.cpython_only
class ModuleStateTests(unittest.TestCase):
# bpo-41194, bpo-41261, bpo-41631: The _ast module uses a global state.
def check_ast_module(self):
# Check that the _ast module still works as expected
code = 'x + 1'
filename = '<string>'
mode = 'eval'
# Create _ast.AST subclasses instances
ast_tree = compile(code, filename, mode, flags=ast.PyCF_ONLY_AST)
# Call PyAST_Check()
code = compile(ast_tree, filename, mode)
self.assertIsInstance(code, types.CodeType)
def test_reload_module(self):
# bpo-41194: Importing the _ast module twice must not crash.
with support.swap_item(sys.modules, '_ast', None):
del sys.modules['_ast']
import _ast as ast1
del sys.modules['_ast']
import _ast as ast2
self.check_ast_module()
# Unloading the two _ast module instances must not crash.
del ast1
del ast2
support.gc_collect()
self.check_ast_module()
def test_sys_modules(self):
# bpo-41631: Test reproducing a Mercurial crash when PyAST_Check()
# imported the _ast module internally.
lazy_mod = object()
def my_import(name, *args, **kw):
sys.modules[name] = lazy_mod
return lazy_mod
with support.swap_item(sys.modules, '_ast', None):
del sys.modules['_ast']
with support.swap_attr(builtins, '__import__', my_import):
# Test that compile() does not import the _ast module
self.check_ast_module()
self.assertNotIn('_ast', sys.modules)
# Sanity check of the test itself
import _ast
self.assertIs(_ast, lazy_mod)
def test_subinterpreter(self):
# bpo-41631: Importing and using the _ast module in a subinterpreter
# must not crash.
code = dedent('''
import _ast
import ast
import gc
import sys
import types
# Create _ast.AST subclasses instances and call PyAST_Check()
ast_tree = compile('x+1', '<string>', 'eval',
flags=ast.PyCF_ONLY_AST)
code = compile(ast_tree, 'string', 'eval')
if not isinstance(code, types.CodeType):
raise AssertionError
# Unloading the _ast module must not crash.
del ast, _ast
del sys.modules['ast'], sys.modules['_ast']
gc.collect()
''')
res = support.run_in_subinterp(code)
self.assertEqual(res, 0)
def main(): def main():
if __name__ != '__main__': if __name__ != '__main__':
return return

View File

@ -0,0 +1,5 @@
The ``_ast`` module uses again a global state. Using a module state per module
instance is causing subtle practical problems. For example, the Mercurial
project replaces the ``__import__()`` function to implement lazy import,
whereas Python expected that ``import _ast`` always return a fully initialized
``_ast`` module.

View File

@ -1089,11 +1089,9 @@ static PyModuleDef_Slot astmodule_slots[] = {
static struct PyModuleDef _astmodule = { static struct PyModuleDef _astmodule = {
PyModuleDef_HEAD_INIT, PyModuleDef_HEAD_INIT,
.m_name = "_ast", .m_name = "_ast",
.m_size = sizeof(astmodulestate), // The _ast module uses a global state (global_ast_state).
.m_size = 0,
.m_slots = astmodule_slots, .m_slots = astmodule_slots,
.m_traverse = astmodule_traverse,
.m_clear = astmodule_clear,
.m_free = astmodule_free,
}; };
PyMODINIT_FUNC PyMODINIT_FUNC
@ -1374,59 +1372,40 @@ def generate_module_def(f, mod):
f.write(' PyObject *' + s + ';\n') f.write(' PyObject *' + s + ';\n')
f.write('} astmodulestate;\n\n') f.write('} astmodulestate;\n\n')
f.write(""" f.write("""
static astmodulestate* // Forward declaration
get_ast_state(PyObject *module) static int init_types(astmodulestate *state);
{
void *state = PyModule_GetState(module); // bpo-41194, bpo-41261, bpo-41631: The _ast module uses a global state.
assert(state != NULL); static astmodulestate global_ast_state = {0};
return (astmodulestate*)state;
}
static astmodulestate* static astmodulestate*
get_global_ast_state(void) get_global_ast_state(void)
{ {
_Py_IDENTIFIER(_ast); astmodulestate* state = &global_ast_state;
PyObject *name = _PyUnicode_FromId(&PyId__ast); // borrowed reference if (!init_types(state)) {
if (name == NULL) {
return NULL; return NULL;
} }
PyObject *module = PyImport_GetModule(name);
if (module == NULL) {
if (PyErr_Occurred()) {
return NULL;
}
module = PyImport_Import(name);
if (module == NULL) {
return NULL;
}
}
astmodulestate *state = get_ast_state(module);
Py_DECREF(module);
return state; return state;
} }
static int astmodule_clear(PyObject *module) static astmodulestate*
get_ast_state(PyObject* Py_UNUSED(module))
{ {
astmodulestate *state = get_ast_state(module); astmodulestate* state = get_global_ast_state();
// get_ast_state() must only be called after _ast module is imported,
// and astmodule_exec() calls init_types()
assert(state != NULL);
return state;
}
void _PyAST_Fini(PyThreadState *tstate)
{
astmodulestate* state = &global_ast_state;
""") """)
for s in module_state: for s in module_state:
f.write(" Py_CLEAR(state->" + s + ');\n') f.write(" Py_CLEAR(state->" + s + ');\n')
f.write(""" f.write("""
return 0; state->initialized = 0;
}
static int astmodule_traverse(PyObject *module, visitproc visit, void* arg)
{
astmodulestate *state = get_ast_state(module);
""")
for s in module_state:
f.write(" Py_VISIT(state->" + s + ');\n')
f.write("""
return 0;
}
static void astmodule_free(void* module) {
astmodule_clear((PyObject*)module);
} }
""") """)

275
Python/Python-ast.c generated
View File

@ -224,40 +224,35 @@ typedef struct {
} astmodulestate; } astmodulestate;
static astmodulestate* // Forward declaration
get_ast_state(PyObject *module) static int init_types(astmodulestate *state);
{
void *state = PyModule_GetState(module); // bpo-41194, bpo-41261, bpo-41631: The _ast module uses a global state.
assert(state != NULL); static astmodulestate global_ast_state = {0};
return (astmodulestate*)state;
}
static astmodulestate* static astmodulestate*
get_global_ast_state(void) get_global_ast_state(void)
{ {
_Py_IDENTIFIER(_ast); astmodulestate* state = &global_ast_state;
PyObject *name = _PyUnicode_FromId(&PyId__ast); // borrowed reference if (!init_types(state)) {
if (name == NULL) {
return NULL; return NULL;
} }
PyObject *module = PyImport_GetModule(name);
if (module == NULL) {
if (PyErr_Occurred()) {
return NULL;
}
module = PyImport_Import(name);
if (module == NULL) {
return NULL;
}
}
astmodulestate *state = get_ast_state(module);
Py_DECREF(module);
return state; return state;
} }
static int astmodule_clear(PyObject *module) static astmodulestate*
get_ast_state(PyObject* Py_UNUSED(module))
{ {
astmodulestate *state = get_ast_state(module); astmodulestate* state = get_global_ast_state();
// get_ast_state() must only be called after _ast module is imported,
// and astmodule_exec() calls init_types()
assert(state != NULL);
return state;
}
void _PyAST_Fini(PyThreadState *tstate)
{
astmodulestate* state = &global_ast_state;
Py_CLEAR(state->AST_type); Py_CLEAR(state->AST_type);
Py_CLEAR(state->Add_singleton); Py_CLEAR(state->Add_singleton);
Py_CLEAR(state->Add_type); Py_CLEAR(state->Add_type);
@ -472,231 +467,7 @@ static int astmodule_clear(PyObject *module)
Py_CLEAR(state->vararg); Py_CLEAR(state->vararg);
Py_CLEAR(state->withitem_type); Py_CLEAR(state->withitem_type);
return 0; state->initialized = 0;
}
static int astmodule_traverse(PyObject *module, visitproc visit, void* arg)
{
astmodulestate *state = get_ast_state(module);
Py_VISIT(state->AST_type);
Py_VISIT(state->Add_singleton);
Py_VISIT(state->Add_type);
Py_VISIT(state->And_singleton);
Py_VISIT(state->And_type);
Py_VISIT(state->AnnAssign_type);
Py_VISIT(state->Assert_type);
Py_VISIT(state->Assign_type);
Py_VISIT(state->AsyncFor_type);
Py_VISIT(state->AsyncFunctionDef_type);
Py_VISIT(state->AsyncWith_type);
Py_VISIT(state->Attribute_type);
Py_VISIT(state->AugAssign_type);
Py_VISIT(state->Await_type);
Py_VISIT(state->BinOp_type);
Py_VISIT(state->BitAnd_singleton);
Py_VISIT(state->BitAnd_type);
Py_VISIT(state->BitOr_singleton);
Py_VISIT(state->BitOr_type);
Py_VISIT(state->BitXor_singleton);
Py_VISIT(state->BitXor_type);
Py_VISIT(state->BoolOp_type);
Py_VISIT(state->Break_type);
Py_VISIT(state->Call_type);
Py_VISIT(state->ClassDef_type);
Py_VISIT(state->Compare_type);
Py_VISIT(state->Constant_type);
Py_VISIT(state->Continue_type);
Py_VISIT(state->Del_singleton);
Py_VISIT(state->Del_type);
Py_VISIT(state->Delete_type);
Py_VISIT(state->DictComp_type);
Py_VISIT(state->Dict_type);
Py_VISIT(state->Div_singleton);
Py_VISIT(state->Div_type);
Py_VISIT(state->Eq_singleton);
Py_VISIT(state->Eq_type);
Py_VISIT(state->ExceptHandler_type);
Py_VISIT(state->Expr_type);
Py_VISIT(state->Expression_type);
Py_VISIT(state->FloorDiv_singleton);
Py_VISIT(state->FloorDiv_type);
Py_VISIT(state->For_type);
Py_VISIT(state->FormattedValue_type);
Py_VISIT(state->FunctionDef_type);
Py_VISIT(state->FunctionType_type);
Py_VISIT(state->GeneratorExp_type);
Py_VISIT(state->Global_type);
Py_VISIT(state->GtE_singleton);
Py_VISIT(state->GtE_type);
Py_VISIT(state->Gt_singleton);
Py_VISIT(state->Gt_type);
Py_VISIT(state->IfExp_type);
Py_VISIT(state->If_type);
Py_VISIT(state->ImportFrom_type);
Py_VISIT(state->Import_type);
Py_VISIT(state->In_singleton);
Py_VISIT(state->In_type);
Py_VISIT(state->Interactive_type);
Py_VISIT(state->Invert_singleton);
Py_VISIT(state->Invert_type);
Py_VISIT(state->IsNot_singleton);
Py_VISIT(state->IsNot_type);
Py_VISIT(state->Is_singleton);
Py_VISIT(state->Is_type);
Py_VISIT(state->JoinedStr_type);
Py_VISIT(state->LShift_singleton);
Py_VISIT(state->LShift_type);
Py_VISIT(state->Lambda_type);
Py_VISIT(state->ListComp_type);
Py_VISIT(state->List_type);
Py_VISIT(state->Load_singleton);
Py_VISIT(state->Load_type);
Py_VISIT(state->LtE_singleton);
Py_VISIT(state->LtE_type);
Py_VISIT(state->Lt_singleton);
Py_VISIT(state->Lt_type);
Py_VISIT(state->MatMult_singleton);
Py_VISIT(state->MatMult_type);
Py_VISIT(state->Mod_singleton);
Py_VISIT(state->Mod_type);
Py_VISIT(state->Module_type);
Py_VISIT(state->Mult_singleton);
Py_VISIT(state->Mult_type);
Py_VISIT(state->Name_type);
Py_VISIT(state->NamedExpr_type);
Py_VISIT(state->Nonlocal_type);
Py_VISIT(state->NotEq_singleton);
Py_VISIT(state->NotEq_type);
Py_VISIT(state->NotIn_singleton);
Py_VISIT(state->NotIn_type);
Py_VISIT(state->Not_singleton);
Py_VISIT(state->Not_type);
Py_VISIT(state->Or_singleton);
Py_VISIT(state->Or_type);
Py_VISIT(state->Pass_type);
Py_VISIT(state->Pow_singleton);
Py_VISIT(state->Pow_type);
Py_VISIT(state->RShift_singleton);
Py_VISIT(state->RShift_type);
Py_VISIT(state->Raise_type);
Py_VISIT(state->Return_type);
Py_VISIT(state->SetComp_type);
Py_VISIT(state->Set_type);
Py_VISIT(state->Slice_type);
Py_VISIT(state->Starred_type);
Py_VISIT(state->Store_singleton);
Py_VISIT(state->Store_type);
Py_VISIT(state->Sub_singleton);
Py_VISIT(state->Sub_type);
Py_VISIT(state->Subscript_type);
Py_VISIT(state->Try_type);
Py_VISIT(state->Tuple_type);
Py_VISIT(state->TypeIgnore_type);
Py_VISIT(state->UAdd_singleton);
Py_VISIT(state->UAdd_type);
Py_VISIT(state->USub_singleton);
Py_VISIT(state->USub_type);
Py_VISIT(state->UnaryOp_type);
Py_VISIT(state->While_type);
Py_VISIT(state->With_type);
Py_VISIT(state->YieldFrom_type);
Py_VISIT(state->Yield_type);
Py_VISIT(state->__dict__);
Py_VISIT(state->__doc__);
Py_VISIT(state->__module__);
Py_VISIT(state->_attributes);
Py_VISIT(state->_fields);
Py_VISIT(state->alias_type);
Py_VISIT(state->annotation);
Py_VISIT(state->arg);
Py_VISIT(state->arg_type);
Py_VISIT(state->args);
Py_VISIT(state->argtypes);
Py_VISIT(state->arguments_type);
Py_VISIT(state->asname);
Py_VISIT(state->ast);
Py_VISIT(state->attr);
Py_VISIT(state->bases);
Py_VISIT(state->body);
Py_VISIT(state->boolop_type);
Py_VISIT(state->cause);
Py_VISIT(state->cmpop_type);
Py_VISIT(state->col_offset);
Py_VISIT(state->comparators);
Py_VISIT(state->comprehension_type);
Py_VISIT(state->context_expr);
Py_VISIT(state->conversion);
Py_VISIT(state->ctx);
Py_VISIT(state->decorator_list);
Py_VISIT(state->defaults);
Py_VISIT(state->elt);
Py_VISIT(state->elts);
Py_VISIT(state->end_col_offset);
Py_VISIT(state->end_lineno);
Py_VISIT(state->exc);
Py_VISIT(state->excepthandler_type);
Py_VISIT(state->expr_context_type);
Py_VISIT(state->expr_type);
Py_VISIT(state->finalbody);
Py_VISIT(state->format_spec);
Py_VISIT(state->func);
Py_VISIT(state->generators);
Py_VISIT(state->handlers);
Py_VISIT(state->id);
Py_VISIT(state->ifs);
Py_VISIT(state->is_async);
Py_VISIT(state->items);
Py_VISIT(state->iter);
Py_VISIT(state->key);
Py_VISIT(state->keys);
Py_VISIT(state->keyword_type);
Py_VISIT(state->keywords);
Py_VISIT(state->kind);
Py_VISIT(state->kw_defaults);
Py_VISIT(state->kwarg);
Py_VISIT(state->kwonlyargs);
Py_VISIT(state->left);
Py_VISIT(state->level);
Py_VISIT(state->lineno);
Py_VISIT(state->lower);
Py_VISIT(state->mod_type);
Py_VISIT(state->module);
Py_VISIT(state->msg);
Py_VISIT(state->name);
Py_VISIT(state->names);
Py_VISIT(state->op);
Py_VISIT(state->operand);
Py_VISIT(state->operator_type);
Py_VISIT(state->ops);
Py_VISIT(state->optional_vars);
Py_VISIT(state->orelse);
Py_VISIT(state->posonlyargs);
Py_VISIT(state->returns);
Py_VISIT(state->right);
Py_VISIT(state->simple);
Py_VISIT(state->slice);
Py_VISIT(state->step);
Py_VISIT(state->stmt_type);
Py_VISIT(state->tag);
Py_VISIT(state->target);
Py_VISIT(state->targets);
Py_VISIT(state->test);
Py_VISIT(state->type);
Py_VISIT(state->type_comment);
Py_VISIT(state->type_ignore_type);
Py_VISIT(state->type_ignores);
Py_VISIT(state->unaryop_type);
Py_VISIT(state->upper);
Py_VISIT(state->value);
Py_VISIT(state->values);
Py_VISIT(state->vararg);
Py_VISIT(state->withitem_type);
return 0;
}
static void astmodule_free(void* module) {
astmodule_clear((PyObject*)module);
} }
static int init_identifiers(astmodulestate *state) static int init_identifiers(astmodulestate *state)
@ -10316,11 +10087,9 @@ static PyModuleDef_Slot astmodule_slots[] = {
static struct PyModuleDef _astmodule = { static struct PyModuleDef _astmodule = {
PyModuleDef_HEAD_INIT, PyModuleDef_HEAD_INIT,
.m_name = "_ast", .m_name = "_ast",
.m_size = sizeof(astmodulestate), // The _ast module uses a global state (global_ast_state).
.m_size = 0,
.m_slots = astmodule_slots, .m_slots = astmodule_slots,
.m_traverse = astmodule_traverse,
.m_clear = astmodule_clear,
.m_free = astmodule_free,
}; };
PyMODINIT_FUNC PyMODINIT_FUNC

View File

@ -1259,6 +1259,12 @@ flush_std_files(void)
static void static void
finalize_interp_types(PyThreadState *tstate) finalize_interp_types(PyThreadState *tstate)
{ {
// The _ast module state is shared by all interpreters.
// The state must only be cleared by the main interpreter.
if (_Py_IsMainInterpreter(tstate)) {
_PyAST_Fini(tstate);
}
_PyExc_Fini(tstate); _PyExc_Fini(tstate);
_PyFrame_Fini(tstate); _PyFrame_Fini(tstate);
_PyAsyncGen_Fini(tstate); _PyAsyncGen_Fini(tstate);