From 226a012d1cd61f42ecd3056c554922f359a1a35d Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Fri, 4 Dec 2020 19:45:57 -0800 Subject: [PATCH] bpo-42536: GC track recycled tuples (GH-23623) Several built-in and standard library types now ensure that their internal result tuples are always tracked by the garbage collector: - collections.OrderedDict.items - dict.items - enumerate - functools.reduce - itertools.combinations - itertools.combinations_with_replacement - itertools.permutations - itertools.product - itertools.zip_longest - zip Previously, they could have become untracked by a prior garbage collection. --- Lib/test/test_builtin.py | 13 +++++ Lib/test/test_dict.py | 19 ++++++++ Lib/test/test_enumerate.py | 13 +++++ Lib/test/test_itertools.py | 47 +++++++++++++++++++ Lib/test/test_ordered_dict.py | 11 +++++ .../2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst | 26 ++++++++++ Modules/_functoolsmodule.c | 6 +++ Modules/itertoolsmodule.c | 26 ++++++++++ Objects/dictobject.c | 10 ++++ Objects/enumobject.c | 11 +++++ Objects/odictobject.c | 5 ++ Python/bltinmodule.c | 5 ++ 12 files changed, 192 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index edb4ec092e3..8c9573731ae 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -6,6 +6,7 @@ import builtins import collections import decimal import fractions +import gc import io import locale import os @@ -1756,6 +1757,18 @@ class BuiltinTest(unittest.TestCase): l8 = self.iter_error(zip(Iter(3), "AB", strict=True), ValueError) self.assertEqual(l8, [(2, "A"), (1, "B")]) + @support.cpython_only + def test_zip_result_gc(self): + # bpo-42536: zip's tuple-reuse speed trick breaks the GC's assumptions + # about what can be untracked. Make sure we re-track result tuples + # whenever we reuse them. + it = zip([[]]) + gc.collect() + # That GC collection probably untracked the recycled internal result + # tuple, which is initialized to (None,). Make sure it's re-tracked when + # it's mutated and returned from __next__: + self.assertTrue(gc.is_tracked(next(it))) + def test_format(self): # Test the basic machinery of the format() builtin. Don't test # the specifics of the various formatters diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py index 9ff8b7d501a..4b31cdc7941 100644 --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -1452,6 +1452,25 @@ class DictTest(unittest.TestCase): d = CustomReversedDict(pairs) self.assertEqual(pairs[::-1], list(dict(d).items())) + @support.cpython_only + def test_dict_items_result_gc(self): + # bpo-42536: dict.items's tuple-reuse speed trick breaks the GC's + # assumptions about what can be untracked. Make sure we re-track result + # tuples whenever we reuse them. + it = iter({None: []}.items()) + gc.collect() + # That GC collection probably untracked the recycled internal result + # tuple, which is initialized to (None, None). Make sure it's re-tracked + # when it's mutated and returned from __next__: + self.assertTrue(gc.is_tracked(next(it))) + + @support.cpython_only + def test_dict_items_result_gc(self): + # Same as test_dict_items_result_gc above, but reversed. + it = reversed({None: []}.items()) + gc.collect() + self.assertTrue(gc.is_tracked(next(it))) + class CAPITest(unittest.TestCase): diff --git a/Lib/test/test_enumerate.py b/Lib/test/test_enumerate.py index 5785cb46492..906bfc21a26 100644 --- a/Lib/test/test_enumerate.py +++ b/Lib/test/test_enumerate.py @@ -2,6 +2,7 @@ import unittest import operator import sys import pickle +import gc from test import support @@ -134,6 +135,18 @@ class EnumerateTestCase(unittest.TestCase, PickleTest): self.assertEqual(len(set(map(id, list(enumerate(self.seq))))), len(self.seq)) self.assertEqual(len(set(map(id, enumerate(self.seq)))), min(1,len(self.seq))) + @support.cpython_only + def test_enumerate_result_gc(self): + # bpo-42536: enumerate's tuple-reuse speed trick breaks the GC's + # assumptions about what can be untracked. Make sure we re-track result + # tuples whenever we reuse them. + it = self.enum([[]]) + gc.collect() + # That GC collection probably untracked the recycled internal result + # tuple, which is initialized to (None, None). Make sure it's re-tracked + # when it's mutated and returned from __next__: + self.assertTrue(gc.is_tracked(next(it))) + class MyEnum(enumerate): pass diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index df2997e87d4..a99b5e2bb71 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -12,6 +12,8 @@ from functools import reduce import sys import struct import threading +import gc + maxsize = support.MAX_Py_ssize_t minsize = -maxsize-1 @@ -1573,6 +1575,51 @@ class TestBasicOps(unittest.TestCase): self.assertRaises(StopIteration, next, f(lambda x:x, [])) self.assertRaises(StopIteration, next, f(lambda x:x, StopNow())) + @support.cpython_only + def test_combinations_result_gc(self): + # bpo-42536: combinations's tuple-reuse speed trick breaks the GC's + # assumptions about what can be untracked. Make sure we re-track result + # tuples whenever we reuse them. + it = combinations([None, []], 1) + next(it) + gc.collect() + # That GC collection probably untracked the recycled internal result + # tuple, which has the value (None,). Make sure it's re-tracked when + # it's mutated and returned from __next__: + self.assertTrue(gc.is_tracked(next(it))) + + @support.cpython_only + def test_combinations_with_replacement_result_gc(self): + # Ditto for combinations_with_replacement. + it = combinations_with_replacement([None, []], 1) + next(it) + gc.collect() + self.assertTrue(gc.is_tracked(next(it))) + + @support.cpython_only + def test_permutations_result_gc(self): + # Ditto for permutations. + it = permutations([None, []], 1) + next(it) + gc.collect() + self.assertTrue(gc.is_tracked(next(it))) + + @support.cpython_only + def test_product_result_gc(self): + # Ditto for product. + it = product([None, []]) + next(it) + gc.collect() + self.assertTrue(gc.is_tracked(next(it))) + + @support.cpython_only + def test_zip_longest_result_gc(self): + # Ditto for zip_longest. + it = zip_longest([[]]) + gc.collect() + self.assertTrue(gc.is_tracked(next(it))) + + class TestExamples(unittest.TestCase): def test_accumulate(self): diff --git a/Lib/test/test_ordered_dict.py b/Lib/test/test_ordered_dict.py index 31759f20d28..eb404463e92 100644 --- a/Lib/test/test_ordered_dict.py +++ b/Lib/test/test_ordered_dict.py @@ -700,6 +700,17 @@ class OrderedDictTests: with self.assertRaises(ValueError): a |= "BAD" + @support.cpython_only + def test_ordered_dict_items_result_gc(self): + # bpo-42536: OrderedDict.items's tuple-reuse speed trick breaks the GC's + # assumptions about what can be untracked. Make sure we re-track result + # tuples whenever we reuse them. + it = iter(self.OrderedDict({None: []}).items()) + gc.collect() + # That GC collection probably untracked the recycled internal result + # tuple, which is initialized to (None, None). Make sure it's re-tracked + # when it's mutated and returned from __next__: + self.assertTrue(gc.is_tracked(next(it))) class PurePythonOrderedDictTests(OrderedDictTests, unittest.TestCase): diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst b/Misc/NEWS.d/next/Core and Builtins/2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst new file mode 100644 index 00000000000..6ccacab1f64 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst @@ -0,0 +1,26 @@ +Several built-in and standard library types now ensure that their internal +result tuples are always tracked by the :term:`garbage collector +`: + +- :meth:`collections.OrderedDict.items() ` + +- :meth:`dict.items` + +- :func:`enumerate` + +- :func:`functools.reduce` + +- :func:`itertools.combinations` + +- :func:`itertools.combinations_with_replacement` + +- :func:`itertools.permutations` + +- :func:`itertools.product` + +- :func:`itertools.zip_longest` + +- :func:`zip` + +Previously, they could have become untracked by a prior garbage collection. +Patch by Brandt Bucher. diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c index ff03c334766..621b721d011 100644 --- a/Modules/_functoolsmodule.c +++ b/Modules/_functoolsmodule.c @@ -1,5 +1,6 @@ #include "Python.h" #include "pycore_long.h" // _PyLong_GetZero() +#include "pycore_object.h" // _PyObject_GC_TRACK #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_tuple.h" // _PyTuple_ITEMS() #include "structmember.h" // PyMemberDef @@ -673,6 +674,11 @@ functools_reduce(PyObject *self, PyObject *args) if ((result = PyObject_Call(func, args, NULL)) == NULL) { goto Fail; } + // bpo-42536: The GC may have untracked this args tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(args)) { + _PyObject_GC_TRACK(args); + } } } diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index 7144856c352..293735a8864 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -3,6 +3,7 @@ #define PY_SSIZE_T_CLEAN #include "Python.h" #include "pycore_long.h" // _PyLong_GetZero() +#include "pycore_object.h" // _PyObject_GC_TRACK() #include "pycore_tuple.h" // _PyTuple_ITEMS() #include // offsetof() @@ -2378,6 +2379,11 @@ product_next(productobject *lz) lz->result = result; Py_DECREF(old_result); } + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + else if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } /* Now, we've got the only copy so we can update it in-place */ assert (npools==0 || Py_REFCNT(result) == 1); @@ -2701,6 +2707,11 @@ combinations_next(combinationsobject *co) co->result = result; Py_DECREF(old_result); } + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + else if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } /* Now, we've got the only copy so we can update it in-place * CPython's empty tuple is a singleton and cached in * PyTuple's freelist. @@ -3035,6 +3046,11 @@ cwr_next(cwrobject *co) co->result = result; Py_DECREF(old_result); } + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + else if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } /* Now, we've got the only copy so we can update it in-place CPython's empty tuple is a singleton and cached in PyTuple's freelist. */ assert(r == 0 || Py_REFCNT(result) == 1); @@ -3379,6 +3395,11 @@ permutations_next(permutationsobject *po) po->result = result; Py_DECREF(old_result); } + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + else if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } /* Now, we've got the only copy so we can update it in-place */ assert(r == 0 || Py_REFCNT(result) == 1); @@ -4649,6 +4670,11 @@ zip_longest_next(ziplongestobject *lz) PyTuple_SET_ITEM(result, i, item); Py_DECREF(olditem); } + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } } else { result = PyTuple_New(tuplesize); if (result == NULL) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 7a37313df8a..35e881fe272 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -3989,6 +3989,11 @@ dictiter_iternextitem(dictiterobject *di) Py_INCREF(result); Py_DECREF(oldkey); Py_DECREF(oldvalue); + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } } else { result = PyTuple_New(2); @@ -4104,6 +4109,11 @@ dictreviter_iternext(dictiterobject *di) Py_INCREF(result); Py_DECREF(oldkey); Py_DECREF(oldvalue); + // bpo-42536: The GC may have untracked this result tuple. Since + // we're recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } } else { result = PyTuple_New(2); diff --git a/Objects/enumobject.c b/Objects/enumobject.c index 8b5e7d3a3c6..98ece3f13fc 100644 --- a/Objects/enumobject.c +++ b/Objects/enumobject.c @@ -2,6 +2,7 @@ #include "Python.h" #include "pycore_long.h" // _PyLong_GetOne() +#include "pycore_object.h" // _PyObject_GC_TRACK() #include "clinic/enumobject.c.h" @@ -131,6 +132,11 @@ enum_next_long(enumobject *en, PyObject* next_item) PyTuple_SET_ITEM(result, 1, next_item); Py_DECREF(old_index); Py_DECREF(old_item); + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } return result; } result = PyTuple_New(2); @@ -176,6 +182,11 @@ enum_next(enumobject *en) PyTuple_SET_ITEM(result, 1, next_item); Py_DECREF(old_index); Py_DECREF(old_item); + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } return result; } result = PyTuple_New(2); diff --git a/Objects/odictobject.c b/Objects/odictobject.c index 4eb15f999bd..6c7f1175cd6 100644 --- a/Objects/odictobject.c +++ b/Objects/odictobject.c @@ -1814,6 +1814,11 @@ odictiter_iternext(odictiterobject *di) Py_INCREF(result); Py_DECREF(PyTuple_GET_ITEM(result, 0)); /* borrowed */ Py_DECREF(PyTuple_GET_ITEM(result, 1)); /* borrowed */ + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } } else { result = PyTuple_New(2); diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index a73b8cb320e..352fb83d55e 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -2636,6 +2636,11 @@ zip_next(zipobject *lz) PyTuple_SET_ITEM(result, i, item); Py_DECREF(olditem); } + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } } else { result = PyTuple_New(tuplesize); if (result == NULL)