gh-120108: Fix deepcopying of AST trees with .parent attributes (#120114)

This commit is contained in:
Jelle Zijlstra 2024-06-25 08:12:11 -07:00 committed by GitHub
parent ead676516d
commit 42b2c9d78d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 105 additions and 44 deletions

View File

@ -1,5 +1,6 @@
import ast import ast
import builtins import builtins
import copy
import dis import dis
import enum import enum
import os import os
@ -20,7 +21,7 @@ from test.support import os_helper, script_helper
from test.support.ast_helper import ASTTestMixin from test.support.ast_helper import ASTTestMixin
def to_tuple(t): def to_tuple(t):
if t is None or isinstance(t, (str, int, complex)) or t is Ellipsis: if t is None or isinstance(t, (str, int, complex, float, bytes)) or t is Ellipsis:
return t return t
elif isinstance(t, list): elif isinstance(t, list):
return [to_tuple(e) for e in t] return [to_tuple(e) for e in t]
@ -775,15 +776,6 @@ class AST_Tests(unittest.TestCase):
x = ast.Sub() x = ast.Sub()
self.assertEqual(x._fields, ()) self.assertEqual(x._fields, ())
def test_pickling(self):
import pickle
for protocol in range(pickle.HIGHEST_PROTOCOL + 1):
for ast in (compile(i, "?", "exec", 0x400) for i in exec_tests):
with self.subTest(ast=ast, protocol=protocol):
ast2 = pickle.loads(pickle.dumps(ast, protocol))
self.assertEqual(to_tuple(ast2), to_tuple(ast))
def test_invalid_sum(self): def test_invalid_sum(self):
pos = dict(lineno=2, col_offset=3) pos = dict(lineno=2, col_offset=3)
m = ast.Module([ast.Expr(ast.expr(**pos), **pos)], []) m = ast.Module([ast.Expr(ast.expr(**pos), **pos)], [])
@ -1135,6 +1127,79 @@ class AST_Tests(unittest.TestCase):
self.assert_none_check(node, attr, source) self.assert_none_check(node, attr, source)
class CopyTests(unittest.TestCase):
"""Test copying and pickling AST nodes."""
def test_pickling(self):
import pickle
for protocol in range(pickle.HIGHEST_PROTOCOL + 1):
for code in exec_tests:
with self.subTest(code=code, protocol=protocol):
tree = compile(code, "?", "exec", 0x400)
ast2 = pickle.loads(pickle.dumps(tree, protocol))
self.assertEqual(to_tuple(ast2), to_tuple(tree))
def test_copy_with_parents(self):
# gh-120108
code = """
('',)
while i < n:
if ch == '':
ch = format[i]
if ch == '':
if freplace is None:
'' % getattr(object)
elif ch == '':
if zreplace is None:
if hasattr:
offset = object.utcoffset()
if offset is not None:
if offset.days < 0:
offset = -offset
h = divmod(timedelta(hours=0))
if u:
zreplace = '' % (sign,)
elif s:
zreplace = '' % (sign,)
else:
zreplace = '' % (sign,)
elif ch == '':
if Zreplace is None:
Zreplace = ''
if hasattr(object):
s = object.tzname()
if s is not None:
Zreplace = s.replace('')
newformat.append(Zreplace)
else:
push('')
else:
push(ch)
"""
tree = ast.parse(textwrap.dedent(code))
for node in ast.walk(tree):
for child in ast.iter_child_nodes(node):
child.parent = node
try:
with support.infinite_recursion(200):
tree2 = copy.deepcopy(tree)
finally:
# Singletons like ast.Load() are shared; make sure we don't
# leave them mutated after this test.
for node in ast.walk(tree):
if hasattr(node, "parent"):
del node.parent
for node in ast.walk(tree2):
for child in ast.iter_child_nodes(node):
if hasattr(child, "parent") and not isinstance(child, (
ast.expr_context, ast.boolop, ast.unaryop, ast.cmpop, ast.operator,
)):
self.assertEqual(to_tuple(child.parent), to_tuple(node))
class ASTHelpers_Test(unittest.TestCase): class ASTHelpers_Test(unittest.TestCase):
maxDiff = None maxDiff = None

View File

@ -0,0 +1,2 @@
Fix calling :func:`copy.deepcopy` on :mod:`ast` trees that have been
modified to have references to parent nodes. Patch by Jelle Zijlstra.

View File

@ -1064,17 +1064,22 @@ ast_type_reduce(PyObject *self, PyObject *unused)
return NULL; return NULL;
} }
PyObject *dict = NULL, *fields = NULL, *remaining_fields = NULL, PyObject *dict = NULL, *fields = NULL, *positional_args = NULL;
*remaining_dict = NULL, *positional_args = NULL;
if (PyObject_GetOptionalAttr(self, state->__dict__, &dict) < 0) { if (PyObject_GetOptionalAttr(self, state->__dict__, &dict) < 0) {
return NULL; return NULL;
} }
PyObject *result = NULL; PyObject *result = NULL;
if (dict) { if (dict) {
// Serialize the fields as positional args if possible, because if we // Unpickling (or copying) works as follows:
// serialize them as a dict, during unpickling they are set only *after* // - Construct the object with only positional arguments
// the object is constructed, which will now trigger a DeprecationWarning // - Set the fields from the dict
// if the AST type has required fields. // We have two constraints:
// - We must set all the required fields in the initial constructor call,
// or the unpickling or deepcopying of the object will trigger DeprecationWarnings.
// - We must not include child nodes in the positional args, because
// that may trigger runaway recursion during copying (gh-120108).
// To satisfy both constraints, we set all the fields to None in the
// initial list of positional args, and then set the fields from the dict.
if (PyObject_GetOptionalAttr((PyObject*)Py_TYPE(self), state->_fields, &fields) < 0) { if (PyObject_GetOptionalAttr((PyObject*)Py_TYPE(self), state->_fields, &fields) < 0) {
goto cleanup; goto cleanup;
} }
@ -1084,11 +1089,6 @@ ast_type_reduce(PyObject *self, PyObject *unused)
Py_DECREF(dict); Py_DECREF(dict);
goto cleanup; goto cleanup;
} }
remaining_dict = PyDict_Copy(dict);
Py_DECREF(dict);
if (!remaining_dict) {
goto cleanup;
}
positional_args = PyList_New(0); positional_args = PyList_New(0);
if (!positional_args) { if (!positional_args) {
goto cleanup; goto cleanup;
@ -1099,7 +1099,7 @@ ast_type_reduce(PyObject *self, PyObject *unused)
goto cleanup; goto cleanup;
} }
PyObject *value; PyObject *value;
int rc = PyDict_Pop(remaining_dict, name, &value); int rc = PyDict_GetItemRef(dict, name, &value);
Py_DECREF(name); Py_DECREF(name);
if (rc < 0) { if (rc < 0) {
goto cleanup; goto cleanup;
@ -1107,7 +1107,7 @@ ast_type_reduce(PyObject *self, PyObject *unused)
if (!value) { if (!value) {
break; break;
} }
rc = PyList_Append(positional_args, value); rc = PyList_Append(positional_args, Py_None);
Py_DECREF(value); Py_DECREF(value);
if (rc < 0) { if (rc < 0) {
goto cleanup; goto cleanup;
@ -1117,8 +1117,7 @@ ast_type_reduce(PyObject *self, PyObject *unused)
if (!args_tuple) { if (!args_tuple) {
goto cleanup; goto cleanup;
} }
result = Py_BuildValue("ONO", Py_TYPE(self), args_tuple, result = Py_BuildValue("ONN", Py_TYPE(self), args_tuple, dict);
remaining_dict);
} }
else { else {
result = Py_BuildValue("O()N", Py_TYPE(self), dict); result = Py_BuildValue("O()N", Py_TYPE(self), dict);
@ -1129,8 +1128,6 @@ ast_type_reduce(PyObject *self, PyObject *unused)
} }
cleanup: cleanup:
Py_XDECREF(fields); Py_XDECREF(fields);
Py_XDECREF(remaining_fields);
Py_XDECREF(remaining_dict);
Py_XDECREF(positional_args); Py_XDECREF(positional_args);
return result; return result;
} }

31
Python/Python-ast.c generated
View File

@ -5263,17 +5263,22 @@ ast_type_reduce(PyObject *self, PyObject *unused)
return NULL; return NULL;
} }
PyObject *dict = NULL, *fields = NULL, *remaining_fields = NULL, PyObject *dict = NULL, *fields = NULL, *positional_args = NULL;
*remaining_dict = NULL, *positional_args = NULL;
if (PyObject_GetOptionalAttr(self, state->__dict__, &dict) < 0) { if (PyObject_GetOptionalAttr(self, state->__dict__, &dict) < 0) {
return NULL; return NULL;
} }
PyObject *result = NULL; PyObject *result = NULL;
if (dict) { if (dict) {
// Serialize the fields as positional args if possible, because if we // Unpickling (or copying) works as follows:
// serialize them as a dict, during unpickling they are set only *after* // - Construct the object with only positional arguments
// the object is constructed, which will now trigger a DeprecationWarning // - Set the fields from the dict
// if the AST type has required fields. // We have two constraints:
// - We must set all the required fields in the initial constructor call,
// or the unpickling or deepcopying of the object will trigger DeprecationWarnings.
// - We must not include child nodes in the positional args, because
// that may trigger runaway recursion during copying (gh-120108).
// To satisfy both constraints, we set all the fields to None in the
// initial list of positional args, and then set the fields from the dict.
if (PyObject_GetOptionalAttr((PyObject*)Py_TYPE(self), state->_fields, &fields) < 0) { if (PyObject_GetOptionalAttr((PyObject*)Py_TYPE(self), state->_fields, &fields) < 0) {
goto cleanup; goto cleanup;
} }
@ -5283,11 +5288,6 @@ ast_type_reduce(PyObject *self, PyObject *unused)
Py_DECREF(dict); Py_DECREF(dict);
goto cleanup; goto cleanup;
} }
remaining_dict = PyDict_Copy(dict);
Py_DECREF(dict);
if (!remaining_dict) {
goto cleanup;
}
positional_args = PyList_New(0); positional_args = PyList_New(0);
if (!positional_args) { if (!positional_args) {
goto cleanup; goto cleanup;
@ -5298,7 +5298,7 @@ ast_type_reduce(PyObject *self, PyObject *unused)
goto cleanup; goto cleanup;
} }
PyObject *value; PyObject *value;
int rc = PyDict_Pop(remaining_dict, name, &value); int rc = PyDict_GetItemRef(dict, name, &value);
Py_DECREF(name); Py_DECREF(name);
if (rc < 0) { if (rc < 0) {
goto cleanup; goto cleanup;
@ -5306,7 +5306,7 @@ ast_type_reduce(PyObject *self, PyObject *unused)
if (!value) { if (!value) {
break; break;
} }
rc = PyList_Append(positional_args, value); rc = PyList_Append(positional_args, Py_None);
Py_DECREF(value); Py_DECREF(value);
if (rc < 0) { if (rc < 0) {
goto cleanup; goto cleanup;
@ -5316,8 +5316,7 @@ ast_type_reduce(PyObject *self, PyObject *unused)
if (!args_tuple) { if (!args_tuple) {
goto cleanup; goto cleanup;
} }
result = Py_BuildValue("ONO", Py_TYPE(self), args_tuple, result = Py_BuildValue("ONN", Py_TYPE(self), args_tuple, dict);
remaining_dict);
} }
else { else {
result = Py_BuildValue("O()N", Py_TYPE(self), dict); result = Py_BuildValue("O()N", Py_TYPE(self), dict);
@ -5328,8 +5327,6 @@ ast_type_reduce(PyObject *self, PyObject *unused)
} }
cleanup: cleanup:
Py_XDECREF(fields); Py_XDECREF(fields);
Py_XDECREF(remaining_fields);
Py_XDECREF(remaining_dict);
Py_XDECREF(positional_args); Py_XDECREF(positional_args);
return result; return result;
} }