bpo-41631: _ast module uses again a global state (#21961)

Partially revert commit ac46eb4ad6662cf6d771b20d8963658b2186c48c:
"bpo-38113: Update the Python-ast.c generator to PEP384 (gh-15957)".

Using a module state per module instance is causing subtle practical
problems.

For example, the Mercurial project replaces the __import__() function
to implement lazy import, whereas Python expected that "import _ast"
always return a fully initialized _ast module.

Add _PyAST_Fini() to clear the state at exit.

The _ast module has no state (set _astmodule.m_size to 0). Remove
astmodule_traverse(), astmodule_clear() and astmodule_free()
functions.
This commit is contained in:
Victor Stinner 2020-09-15 18:03:34 +02:00 committed by GitHub
parent 7bcc6456ad
commit e5fbe0cbd4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 162 additions and 311 deletions

View File

@ -84,6 +84,7 @@ extern void _PyFaulthandler_Fini(void);
extern void _PyHash_Fini(void);
extern void _PyTraceMalloc_Fini(void);
extern void _PyWarnings_Fini(PyInterpreterState *interp);
extern void _PyAST_Fini(PyThreadState *tstate);
extern PyStatus _PyGILState_Init(PyThreadState *tstate);
extern void _PyGILState_Fini(PyThreadState *tstate);

View File

@ -497,6 +497,8 @@ class NodeTransformer(NodeVisitor):
return node
# If the ast module is loaded more than once, only add deprecated methods once
if not hasattr(Constant, 'n'):
# The following code is for backward compatibility.
# It will be removed in future.
@ -600,6 +602,11 @@ class ExtSlice(slice):
def __new__(cls, dims=(), **kwargs):
return Tuple(list(dims), Load(), **kwargs)
# If the ast module is loaded more than once, only add deprecated methods once
if not hasattr(Tuple, 'dims'):
# The following code is for backward compatibility.
# It will be removed in future.
def _dims_getter(self):
"""Deprecated. Use elts instead."""
return self.elts

View File

@ -1,7 +1,9 @@
import ast
import builtins
import dis
import os
import sys
import types
import unittest
import warnings
import weakref
@ -1945,6 +1947,88 @@ class NodeVisitorTests(unittest.TestCase):
])
@support.cpython_only
class ModuleStateTests(unittest.TestCase):
# bpo-41194, bpo-41261, bpo-41631: The _ast module uses a global state.
def check_ast_module(self):
# Check that the _ast module still works as expected
code = 'x + 1'
filename = '<string>'
mode = 'eval'
# Create _ast.AST subclasses instances
ast_tree = compile(code, filename, mode, flags=ast.PyCF_ONLY_AST)
# Call PyAST_Check()
code = compile(ast_tree, filename, mode)
self.assertIsInstance(code, types.CodeType)
def test_reload_module(self):
# bpo-41194: Importing the _ast module twice must not crash.
with support.swap_item(sys.modules, '_ast', None):
del sys.modules['_ast']
import _ast as ast1
del sys.modules['_ast']
import _ast as ast2
self.check_ast_module()
# Unloading the two _ast module instances must not crash.
del ast1
del ast2
support.gc_collect()
self.check_ast_module()
def test_sys_modules(self):
# bpo-41631: Test reproducing a Mercurial crash when PyAST_Check()
# imported the _ast module internally.
lazy_mod = object()
def my_import(name, *args, **kw):
sys.modules[name] = lazy_mod
return lazy_mod
with support.swap_item(sys.modules, '_ast', None):
del sys.modules['_ast']
with support.swap_attr(builtins, '__import__', my_import):
# Test that compile() does not import the _ast module
self.check_ast_module()
self.assertNotIn('_ast', sys.modules)
# Sanity check of the test itself
import _ast
self.assertIs(_ast, lazy_mod)
def test_subinterpreter(self):
# bpo-41631: Importing and using the _ast module in a subinterpreter
# must not crash.
code = dedent('''
import _ast
import ast
import gc
import sys
import types
# Create _ast.AST subclasses instances and call PyAST_Check()
ast_tree = compile('x+1', '<string>', 'eval',
flags=ast.PyCF_ONLY_AST)
code = compile(ast_tree, 'string', 'eval')
if not isinstance(code, types.CodeType):
raise AssertionError
# Unloading the _ast module must not crash.
del ast, _ast
del sys.modules['ast'], sys.modules['_ast']
gc.collect()
''')
res = support.run_in_subinterp(code)
self.assertEqual(res, 0)
def main():
if __name__ != '__main__':
return

View File

@ -0,0 +1,5 @@
The ``_ast`` module uses again a global state. Using a module state per module
instance is causing subtle practical problems. For example, the Mercurial
project replaces the ``__import__()`` function to implement lazy import,
whereas Python expected that ``import _ast`` always return a fully initialized
``_ast`` module.

View File

@ -1089,11 +1089,9 @@ static PyModuleDef_Slot astmodule_slots[] = {
static struct PyModuleDef _astmodule = {
PyModuleDef_HEAD_INIT,
.m_name = "_ast",
.m_size = sizeof(astmodulestate),
// The _ast module uses a global state (global_ast_state).
.m_size = 0,
.m_slots = astmodule_slots,
.m_traverse = astmodule_traverse,
.m_clear = astmodule_clear,
.m_free = astmodule_free,
};
PyMODINIT_FUNC
@ -1374,59 +1372,40 @@ def generate_module_def(f, mod):
f.write(' PyObject *' + s + ';\n')
f.write('} astmodulestate;\n\n')
f.write("""
static astmodulestate*
get_ast_state(PyObject *module)
{
void *state = PyModule_GetState(module);
assert(state != NULL);
return (astmodulestate*)state;
}
// Forward declaration
static int init_types(astmodulestate *state);
// bpo-41194, bpo-41261, bpo-41631: The _ast module uses a global state.
static astmodulestate global_ast_state = {0};
static astmodulestate*
get_global_ast_state(void)
{
_Py_IDENTIFIER(_ast);
PyObject *name = _PyUnicode_FromId(&PyId__ast); // borrowed reference
if (name == NULL) {
astmodulestate* state = &global_ast_state;
if (!init_types(state)) {
return NULL;
}
PyObject *module = PyImport_GetModule(name);
if (module == NULL) {
if (PyErr_Occurred()) {
return NULL;
}
module = PyImport_Import(name);
if (module == NULL) {
return NULL;
}
}
astmodulestate *state = get_ast_state(module);
Py_DECREF(module);
return state;
}
static int astmodule_clear(PyObject *module)
static astmodulestate*
get_ast_state(PyObject* Py_UNUSED(module))
{
astmodulestate *state = get_ast_state(module);
astmodulestate* state = get_global_ast_state();
// get_ast_state() must only be called after _ast module is imported,
// and astmodule_exec() calls init_types()
assert(state != NULL);
return state;
}
void _PyAST_Fini(PyThreadState *tstate)
{
astmodulestate* state = &global_ast_state;
""")
for s in module_state:
f.write(" Py_CLEAR(state->" + s + ');\n')
f.write("""
return 0;
}
static int astmodule_traverse(PyObject *module, visitproc visit, void* arg)
{
astmodulestate *state = get_ast_state(module);
""")
for s in module_state:
f.write(" Py_VISIT(state->" + s + ');\n')
f.write("""
return 0;
}
static void astmodule_free(void* module) {
astmodule_clear((PyObject*)module);
state->initialized = 0;
}
""")

275
Python/Python-ast.c generated
View File

@ -224,40 +224,35 @@ typedef struct {
} astmodulestate;
static astmodulestate*
get_ast_state(PyObject *module)
{
void *state = PyModule_GetState(module);
assert(state != NULL);
return (astmodulestate*)state;
}
// Forward declaration
static int init_types(astmodulestate *state);
// bpo-41194, bpo-41261, bpo-41631: The _ast module uses a global state.
static astmodulestate global_ast_state = {0};
static astmodulestate*
get_global_ast_state(void)
{
_Py_IDENTIFIER(_ast);
PyObject *name = _PyUnicode_FromId(&PyId__ast); // borrowed reference
if (name == NULL) {
astmodulestate* state = &global_ast_state;
if (!init_types(state)) {
return NULL;
}
PyObject *module = PyImport_GetModule(name);
if (module == NULL) {
if (PyErr_Occurred()) {
return NULL;
}
module = PyImport_Import(name);
if (module == NULL) {
return NULL;
}
}
astmodulestate *state = get_ast_state(module);
Py_DECREF(module);
return state;
}
static int astmodule_clear(PyObject *module)
static astmodulestate*
get_ast_state(PyObject* Py_UNUSED(module))
{
astmodulestate *state = get_ast_state(module);
astmodulestate* state = get_global_ast_state();
// get_ast_state() must only be called after _ast module is imported,
// and astmodule_exec() calls init_types()
assert(state != NULL);
return state;
}
void _PyAST_Fini(PyThreadState *tstate)
{
astmodulestate* state = &global_ast_state;
Py_CLEAR(state->AST_type);
Py_CLEAR(state->Add_singleton);
Py_CLEAR(state->Add_type);
@ -472,231 +467,7 @@ static int astmodule_clear(PyObject *module)
Py_CLEAR(state->vararg);
Py_CLEAR(state->withitem_type);
return 0;
}
static int astmodule_traverse(PyObject *module, visitproc visit, void* arg)
{
astmodulestate *state = get_ast_state(module);
Py_VISIT(state->AST_type);
Py_VISIT(state->Add_singleton);
Py_VISIT(state->Add_type);
Py_VISIT(state->And_singleton);
Py_VISIT(state->And_type);
Py_VISIT(state->AnnAssign_type);
Py_VISIT(state->Assert_type);
Py_VISIT(state->Assign_type);
Py_VISIT(state->AsyncFor_type);
Py_VISIT(state->AsyncFunctionDef_type);
Py_VISIT(state->AsyncWith_type);
Py_VISIT(state->Attribute_type);
Py_VISIT(state->AugAssign_type);
Py_VISIT(state->Await_type);
Py_VISIT(state->BinOp_type);
Py_VISIT(state->BitAnd_singleton);
Py_VISIT(state->BitAnd_type);
Py_VISIT(state->BitOr_singleton);
Py_VISIT(state->BitOr_type);
Py_VISIT(state->BitXor_singleton);
Py_VISIT(state->BitXor_type);
Py_VISIT(state->BoolOp_type);
Py_VISIT(state->Break_type);
Py_VISIT(state->Call_type);
Py_VISIT(state->ClassDef_type);
Py_VISIT(state->Compare_type);
Py_VISIT(state->Constant_type);
Py_VISIT(state->Continue_type);
Py_VISIT(state->Del_singleton);
Py_VISIT(state->Del_type);
Py_VISIT(state->Delete_type);
Py_VISIT(state->DictComp_type);
Py_VISIT(state->Dict_type);
Py_VISIT(state->Div_singleton);
Py_VISIT(state->Div_type);
Py_VISIT(state->Eq_singleton);
Py_VISIT(state->Eq_type);
Py_VISIT(state->ExceptHandler_type);
Py_VISIT(state->Expr_type);
Py_VISIT(state->Expression_type);
Py_VISIT(state->FloorDiv_singleton);
Py_VISIT(state->FloorDiv_type);
Py_VISIT(state->For_type);
Py_VISIT(state->FormattedValue_type);
Py_VISIT(state->FunctionDef_type);
Py_VISIT(state->FunctionType_type);
Py_VISIT(state->GeneratorExp_type);
Py_VISIT(state->Global_type);
Py_VISIT(state->GtE_singleton);
Py_VISIT(state->GtE_type);
Py_VISIT(state->Gt_singleton);
Py_VISIT(state->Gt_type);
Py_VISIT(state->IfExp_type);
Py_VISIT(state->If_type);
Py_VISIT(state->ImportFrom_type);
Py_VISIT(state->Import_type);
Py_VISIT(state->In_singleton);
Py_VISIT(state->In_type);
Py_VISIT(state->Interactive_type);
Py_VISIT(state->Invert_singleton);
Py_VISIT(state->Invert_type);
Py_VISIT(state->IsNot_singleton);
Py_VISIT(state->IsNot_type);
Py_VISIT(state->Is_singleton);
Py_VISIT(state->Is_type);
Py_VISIT(state->JoinedStr_type);
Py_VISIT(state->LShift_singleton);
Py_VISIT(state->LShift_type);
Py_VISIT(state->Lambda_type);
Py_VISIT(state->ListComp_type);
Py_VISIT(state->List_type);
Py_VISIT(state->Load_singleton);
Py_VISIT(state->Load_type);
Py_VISIT(state->LtE_singleton);
Py_VISIT(state->LtE_type);
Py_VISIT(state->Lt_singleton);
Py_VISIT(state->Lt_type);
Py_VISIT(state->MatMult_singleton);
Py_VISIT(state->MatMult_type);
Py_VISIT(state->Mod_singleton);
Py_VISIT(state->Mod_type);
Py_VISIT(state->Module_type);
Py_VISIT(state->Mult_singleton);
Py_VISIT(state->Mult_type);
Py_VISIT(state->Name_type);
Py_VISIT(state->NamedExpr_type);
Py_VISIT(state->Nonlocal_type);
Py_VISIT(state->NotEq_singleton);
Py_VISIT(state->NotEq_type);
Py_VISIT(state->NotIn_singleton);
Py_VISIT(state->NotIn_type);
Py_VISIT(state->Not_singleton);
Py_VISIT(state->Not_type);
Py_VISIT(state->Or_singleton);
Py_VISIT(state->Or_type);
Py_VISIT(state->Pass_type);
Py_VISIT(state->Pow_singleton);
Py_VISIT(state->Pow_type);
Py_VISIT(state->RShift_singleton);
Py_VISIT(state->RShift_type);
Py_VISIT(state->Raise_type);
Py_VISIT(state->Return_type);
Py_VISIT(state->SetComp_type);
Py_VISIT(state->Set_type);
Py_VISIT(state->Slice_type);
Py_VISIT(state->Starred_type);
Py_VISIT(state->Store_singleton);
Py_VISIT(state->Store_type);
Py_VISIT(state->Sub_singleton);
Py_VISIT(state->Sub_type);
Py_VISIT(state->Subscript_type);
Py_VISIT(state->Try_type);
Py_VISIT(state->Tuple_type);
Py_VISIT(state->TypeIgnore_type);
Py_VISIT(state->UAdd_singleton);
Py_VISIT(state->UAdd_type);
Py_VISIT(state->USub_singleton);
Py_VISIT(state->USub_type);
Py_VISIT(state->UnaryOp_type);
Py_VISIT(state->While_type);
Py_VISIT(state->With_type);
Py_VISIT(state->YieldFrom_type);
Py_VISIT(state->Yield_type);
Py_VISIT(state->__dict__);
Py_VISIT(state->__doc__);
Py_VISIT(state->__module__);
Py_VISIT(state->_attributes);
Py_VISIT(state->_fields);
Py_VISIT(state->alias_type);
Py_VISIT(state->annotation);
Py_VISIT(state->arg);
Py_VISIT(state->arg_type);
Py_VISIT(state->args);
Py_VISIT(state->argtypes);
Py_VISIT(state->arguments_type);
Py_VISIT(state->asname);
Py_VISIT(state->ast);
Py_VISIT(state->attr);
Py_VISIT(state->bases);
Py_VISIT(state->body);
Py_VISIT(state->boolop_type);
Py_VISIT(state->cause);
Py_VISIT(state->cmpop_type);
Py_VISIT(state->col_offset);
Py_VISIT(state->comparators);
Py_VISIT(state->comprehension_type);
Py_VISIT(state->context_expr);
Py_VISIT(state->conversion);
Py_VISIT(state->ctx);
Py_VISIT(state->decorator_list);
Py_VISIT(state->defaults);
Py_VISIT(state->elt);
Py_VISIT(state->elts);
Py_VISIT(state->end_col_offset);
Py_VISIT(state->end_lineno);
Py_VISIT(state->exc);
Py_VISIT(state->excepthandler_type);
Py_VISIT(state->expr_context_type);
Py_VISIT(state->expr_type);
Py_VISIT(state->finalbody);
Py_VISIT(state->format_spec);
Py_VISIT(state->func);
Py_VISIT(state->generators);
Py_VISIT(state->handlers);
Py_VISIT(state->id);
Py_VISIT(state->ifs);
Py_VISIT(state->is_async);
Py_VISIT(state->items);
Py_VISIT(state->iter);
Py_VISIT(state->key);
Py_VISIT(state->keys);
Py_VISIT(state->keyword_type);
Py_VISIT(state->keywords);
Py_VISIT(state->kind);
Py_VISIT(state->kw_defaults);
Py_VISIT(state->kwarg);
Py_VISIT(state->kwonlyargs);
Py_VISIT(state->left);
Py_VISIT(state->level);
Py_VISIT(state->lineno);
Py_VISIT(state->lower);
Py_VISIT(state->mod_type);
Py_VISIT(state->module);
Py_VISIT(state->msg);
Py_VISIT(state->name);
Py_VISIT(state->names);
Py_VISIT(state->op);
Py_VISIT(state->operand);
Py_VISIT(state->operator_type);
Py_VISIT(state->ops);
Py_VISIT(state->optional_vars);
Py_VISIT(state->orelse);
Py_VISIT(state->posonlyargs);
Py_VISIT(state->returns);
Py_VISIT(state->right);
Py_VISIT(state->simple);
Py_VISIT(state->slice);
Py_VISIT(state->step);
Py_VISIT(state->stmt_type);
Py_VISIT(state->tag);
Py_VISIT(state->target);
Py_VISIT(state->targets);
Py_VISIT(state->test);
Py_VISIT(state->type);
Py_VISIT(state->type_comment);
Py_VISIT(state->type_ignore_type);
Py_VISIT(state->type_ignores);
Py_VISIT(state->unaryop_type);
Py_VISIT(state->upper);
Py_VISIT(state->value);
Py_VISIT(state->values);
Py_VISIT(state->vararg);
Py_VISIT(state->withitem_type);
return 0;
}
static void astmodule_free(void* module) {
astmodule_clear((PyObject*)module);
state->initialized = 0;
}
static int init_identifiers(astmodulestate *state)
@ -10316,11 +10087,9 @@ static PyModuleDef_Slot astmodule_slots[] = {
static struct PyModuleDef _astmodule = {
PyModuleDef_HEAD_INIT,
.m_name = "_ast",
.m_size = sizeof(astmodulestate),
// The _ast module uses a global state (global_ast_state).
.m_size = 0,
.m_slots = astmodule_slots,
.m_traverse = astmodule_traverse,
.m_clear = astmodule_clear,
.m_free = astmodule_free,
};
PyMODINIT_FUNC

View File

@ -1259,6 +1259,12 @@ flush_std_files(void)
static void
finalize_interp_types(PyThreadState *tstate)
{
// The _ast module state is shared by all interpreters.
// The state must only be cleared by the main interpreter.
if (_Py_IsMainInterpreter(tstate)) {
_PyAST_Fini(tstate);
}
_PyExc_Fini(tstate);
_PyFrame_Fini(tstate);
_PyAsyncGen_Fini(tstate);