From 7c797982383ebfd9cca39c480dcf6132979dd06f Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Mon, 7 Dec 2020 12:08:24 -0800 Subject: [PATCH] bpo-42536: GC track recycled tuples (GH-23623) (GH-23652) Several built-in and standard library types now ensure that their internal result tuples are always tracked by the garbage collector: - collections.OrderedDict.items - dict.items - enumerate - functools.reduce - itertools.combinations - itertools.combinations_with_replacement - itertools.permutations - itertools.product - itertools.zip_longest - zip Previously, they could have become untracked by a prior garbage collection. (cherry picked from commit 226a012d1cd61f42ecd3056c554922f359a1a35d) --- Lib/test/test_builtin.py | 15 +++++- Lib/test/test_dict.py | 19 ++++++++ Lib/test/test_enumerate.py | 13 +++++ Lib/test/test_itertools.py | 47 +++++++++++++++++++ Lib/test/test_ordered_dict.py | 11 +++++ .../2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst | 26 ++++++++++ Modules/_functoolsmodule.c | 6 +++ Modules/itertoolsmodule.c | 26 ++++++++++ Objects/dictobject.c | 10 ++++ Objects/enumobject.c | 11 +++++ Objects/odictobject.c | 5 ++ Python/bltinmodule.c | 6 +++ 12 files changed, 194 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 4a498262ba6..729f15dc4b2 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -6,6 +6,7 @@ import builtins import collections import decimal import fractions +import gc import io import locale import os @@ -27,7 +28,7 @@ from types import AsyncGeneratorType, FunctionType from operator import neg from test.support import ( EnvironmentVarGuard, TESTFN, check_warnings, swap_attr, unlink, - maybe_get_event_loop_policy) + maybe_get_event_loop_policy, cpython_only) from test.support.script_helper import assert_python_ok from unittest.mock import MagicMock, patch try: @@ -1573,6 +1574,18 @@ class BuiltinTest(unittest.TestCase): self.assertIs(cm.exception, exception) + @cpython_only + def test_zip_result_gc(self): + # bpo-42536: zip's tuple-reuse speed trick breaks the GC's assumptions + # about what can be untracked. Make sure we re-track result tuples + # whenever we reuse them. + it = zip([[]]) + gc.collect() + # That GC collection probably untracked the recycled internal result + # tuple, which is initialized to (None,). Make sure it's re-tracked when + # it's mutated and returned from __next__: + self.assertTrue(gc.is_tracked(next(it))) + def test_format(self): # Test the basic machinery of the format() builtin. Don't test # the specifics of the various formatters diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py index de483ab5521..ce044c8684f 100644 --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -1377,6 +1377,25 @@ class DictTest(unittest.TestCase): d = CustomReversedDict(pairs) self.assertEqual(pairs[::-1], list(dict(d).items())) + @support.cpython_only + def test_dict_items_result_gc(self): + # bpo-42536: dict.items's tuple-reuse speed trick breaks the GC's + # assumptions about what can be untracked. Make sure we re-track result + # tuples whenever we reuse them. + it = iter({None: []}.items()) + gc.collect() + # That GC collection probably untracked the recycled internal result + # tuple, which is initialized to (None, None). Make sure it's re-tracked + # when it's mutated and returned from __next__: + self.assertTrue(gc.is_tracked(next(it))) + + @support.cpython_only + def test_dict_items_result_gc(self): + # Same as test_dict_items_result_gc above, but reversed. + it = reversed({None: []}.items()) + gc.collect() + self.assertTrue(gc.is_tracked(next(it))) + class CAPITest(unittest.TestCase): diff --git a/Lib/test/test_enumerate.py b/Lib/test/test_enumerate.py index 5785cb46492..906bfc21a26 100644 --- a/Lib/test/test_enumerate.py +++ b/Lib/test/test_enumerate.py @@ -2,6 +2,7 @@ import unittest import operator import sys import pickle +import gc from test import support @@ -134,6 +135,18 @@ class EnumerateTestCase(unittest.TestCase, PickleTest): self.assertEqual(len(set(map(id, list(enumerate(self.seq))))), len(self.seq)) self.assertEqual(len(set(map(id, enumerate(self.seq)))), min(1,len(self.seq))) + @support.cpython_only + def test_enumerate_result_gc(self): + # bpo-42536: enumerate's tuple-reuse speed trick breaks the GC's + # assumptions about what can be untracked. Make sure we re-track result + # tuples whenever we reuse them. + it = self.enum([[]]) + gc.collect() + # That GC collection probably untracked the recycled internal result + # tuple, which is initialized to (None, None). Make sure it's re-tracked + # when it's mutated and returned from __next__: + self.assertTrue(gc.is_tracked(next(it))) + class MyEnum(enumerate): pass diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index eaa6197bec3..d7c09222b70 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -12,6 +12,8 @@ from functools import reduce import sys import struct import threading +import gc + maxsize = support.MAX_Py_ssize_t minsize = -maxsize-1 @@ -1554,6 +1556,51 @@ class TestBasicOps(unittest.TestCase): self.assertRaises(StopIteration, next, f(lambda x:x, [])) self.assertRaises(StopIteration, next, f(lambda x:x, StopNow())) + @support.cpython_only + def test_combinations_result_gc(self): + # bpo-42536: combinations's tuple-reuse speed trick breaks the GC's + # assumptions about what can be untracked. Make sure we re-track result + # tuples whenever we reuse them. + it = combinations([None, []], 1) + next(it) + gc.collect() + # That GC collection probably untracked the recycled internal result + # tuple, which has the value (None,). Make sure it's re-tracked when + # it's mutated and returned from __next__: + self.assertTrue(gc.is_tracked(next(it))) + + @support.cpython_only + def test_combinations_with_replacement_result_gc(self): + # Ditto for combinations_with_replacement. + it = combinations_with_replacement([None, []], 1) + next(it) + gc.collect() + self.assertTrue(gc.is_tracked(next(it))) + + @support.cpython_only + def test_permutations_result_gc(self): + # Ditto for permutations. + it = permutations([None, []], 1) + next(it) + gc.collect() + self.assertTrue(gc.is_tracked(next(it))) + + @support.cpython_only + def test_product_result_gc(self): + # Ditto for product. + it = product([None, []]) + next(it) + gc.collect() + self.assertTrue(gc.is_tracked(next(it))) + + @support.cpython_only + def test_zip_longest_result_gc(self): + # Ditto for zip_longest. + it = zip_longest([[]]) + gc.collect() + self.assertTrue(gc.is_tracked(next(it))) + + class TestExamples(unittest.TestCase): def test_accumulate(self): diff --git a/Lib/test/test_ordered_dict.py b/Lib/test/test_ordered_dict.py index 085e5f60ed9..bcda8f83cea 100644 --- a/Lib/test/test_ordered_dict.py +++ b/Lib/test/test_ordered_dict.py @@ -654,6 +654,17 @@ class OrderedDictTests: support.check_free_after_iterating(self, lambda d: iter(d.values()), self.OrderedDict) support.check_free_after_iterating(self, lambda d: iter(d.items()), self.OrderedDict) + @support.cpython_only + def test_ordered_dict_items_result_gc(self): + # bpo-42536: OrderedDict.items's tuple-reuse speed trick breaks the GC's + # assumptions about what can be untracked. Make sure we re-track result + # tuples whenever we reuse them. + it = iter(self.OrderedDict({None: []}).items()) + gc.collect() + # That GC collection probably untracked the recycled internal result + # tuple, which is initialized to (None, None). Make sure it's re-tracked + # when it's mutated and returned from __next__: + self.assertTrue(gc.is_tracked(next(it))) class PurePythonOrderedDictTests(OrderedDictTests, unittest.TestCase): diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst b/Misc/NEWS.d/next/Core and Builtins/2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst new file mode 100644 index 00000000000..6ccacab1f64 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst @@ -0,0 +1,26 @@ +Several built-in and standard library types now ensure that their internal +result tuples are always tracked by the :term:`garbage collector +`: + +- :meth:`collections.OrderedDict.items() ` + +- :meth:`dict.items` + +- :func:`enumerate` + +- :func:`functools.reduce` + +- :func:`itertools.combinations` + +- :func:`itertools.combinations_with_replacement` + +- :func:`itertools.permutations` + +- :func:`itertools.product` + +- :func:`itertools.zip_longest` + +- :func:`zip` + +Previously, they could have become untracked by a prior garbage collection. +Patch by Brandt Bucher. diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c index a101363bf02..148d051714f 100644 --- a/Modules/_functoolsmodule.c +++ b/Modules/_functoolsmodule.c @@ -3,6 +3,7 @@ #include "pycore_pystate.h" #include "pycore_tupleobject.h" #include "structmember.h" +#include "pycore_object.h" // _PyObject_GC_TRACK /* _functools module written and maintained by Hye-Shik Chang @@ -633,6 +634,11 @@ functools_reduce(PyObject *self, PyObject *args) if ((result = PyObject_Call(func, args, NULL)) == NULL) { goto Fail; } + // bpo-42536: The GC may have untracked this args tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(args)) { + _PyObject_GC_TRACK(args); + } } } diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index eba59ba1b88..eb04c26beb8 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -3,6 +3,7 @@ #include "Python.h" #include "pycore_tupleobject.h" #include "structmember.h" +#include "pycore_object.h" // _PyObject_GC_TRACK() /* Itertools module written and maintained by Raymond D. Hettinger @@ -2255,6 +2256,11 @@ product_next(productobject *lz) lz->result = result; Py_DECREF(old_result); } + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + else if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } /* Now, we've got the only copy so we can update it in-place */ assert (npools==0 || Py_REFCNT(result) == 1); @@ -2580,6 +2586,11 @@ combinations_next(combinationsobject *co) co->result = result; Py_DECREF(old_result); } + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + else if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } /* Now, we've got the only copy so we can update it in-place * CPython's empty tuple is a singleton and cached in * PyTuple's freelist. @@ -2916,6 +2927,11 @@ cwr_next(cwrobject *co) co->result = result; Py_DECREF(old_result); } + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + else if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } /* Now, we've got the only copy so we can update it in-place CPython's empty tuple is a singleton and cached in PyTuple's freelist. */ assert(r == 0 || Py_REFCNT(result) == 1); @@ -3259,6 +3275,11 @@ permutations_next(permutationsobject *po) po->result = result; Py_DECREF(old_result); } + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + else if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } /* Now, we've got the only copy so we can update it in-place */ assert(r == 0 || Py_REFCNT(result) == 1); @@ -4536,6 +4557,11 @@ zip_longest_next(ziplongestobject *lz) PyTuple_SET_ITEM(result, i, item); Py_DECREF(olditem); } + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } } else { result = PyTuple_New(tuplesize); if (result == NULL) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 3c56f4a515e..edc02372a1d 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -3769,6 +3769,11 @@ dictiter_iternextitem(dictiterobject *di) Py_INCREF(result); Py_DECREF(oldkey); Py_DECREF(oldvalue); + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } } else { result = PyTuple_New(2); @@ -3884,6 +3889,11 @@ dictreviter_iternext(dictiterobject *di) Py_INCREF(result); Py_DECREF(oldkey); Py_DECREF(oldvalue); + // bpo-42536: The GC may have untracked this result tuple. Since + // we're recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } } else { result = PyTuple_New(2); diff --git a/Objects/enumobject.c b/Objects/enumobject.c index 4786297c41a..478a9dd93a3 100644 --- a/Objects/enumobject.c +++ b/Objects/enumobject.c @@ -1,6 +1,7 @@ /* enumerate object */ #include "Python.h" +#include "pycore_object.h" // _PyObject_GC_TRACK() #include "clinic/enumobject.c.h" @@ -130,6 +131,11 @@ enum_next_long(enumobject *en, PyObject* next_item) PyTuple_SET_ITEM(result, 1, next_item); Py_DECREF(old_index); Py_DECREF(old_item); + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } return result; } result = PyTuple_New(2); @@ -175,6 +181,11 @@ enum_next(enumobject *en) PyTuple_SET_ITEM(result, 1, next_item); Py_DECREF(old_index); Py_DECREF(old_item); + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } return result; } result = PyTuple_New(2); diff --git a/Objects/odictobject.c b/Objects/odictobject.c index ac0da9bd5ba..6076b03455f 100644 --- a/Objects/odictobject.c +++ b/Objects/odictobject.c @@ -1766,6 +1766,11 @@ odictiter_iternext(odictiterobject *di) Py_INCREF(result); Py_DECREF(PyTuple_GET_ITEM(result, 0)); /* borrowed */ Py_DECREF(PyTuple_GET_ITEM(result, 1)); /* borrowed */ + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } } else { result = PyTuple_New(2); diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index e42d5f246c3..3767f552bb1 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -4,6 +4,7 @@ #include #include "ast.h" #undef Yield /* undefine macro conflicting with */ +#include "pycore_object.h" // _PyObject_GC_TRACK() #include "pycore_pystate.h" #include "pycore_tupleobject.h" @@ -2618,6 +2619,11 @@ zip_next(zipobject *lz) PyTuple_SET_ITEM(result, i, item); Py_DECREF(olditem); } + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } } else { result = PyTuple_New(tuplesize); if (result == NULL)