bpo-42536: GC track recycled tuples (GH-23623) (GH-23652)

Several built-in and standard library types now ensure that their internal result tuples are always tracked by the garbage collector:

- collections.OrderedDict.items
- dict.items
- enumerate
- functools.reduce
- itertools.combinations
- itertools.combinations_with_replacement
- itertools.permutations
- itertools.product
- itertools.zip_longest
- zip

Previously, they could have become untracked by a prior garbage collection.
(cherry picked from commit 226a012d1c)
This commit is contained in:
Brandt Bucher 2020-12-07 12:08:24 -08:00 committed by GitHub
parent ca52aa3ddd
commit 7c79798238
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 194 additions and 1 deletions

View File

@ -6,6 +6,7 @@ import builtins
import collections import collections
import decimal import decimal
import fractions import fractions
import gc
import io import io
import locale import locale
import os import os
@ -27,7 +28,7 @@ from types import AsyncGeneratorType, FunctionType
from operator import neg from operator import neg
from test.support import ( from test.support import (
EnvironmentVarGuard, TESTFN, check_warnings, swap_attr, unlink, EnvironmentVarGuard, TESTFN, check_warnings, swap_attr, unlink,
maybe_get_event_loop_policy) maybe_get_event_loop_policy, cpython_only)
from test.support.script_helper import assert_python_ok from test.support.script_helper import assert_python_ok
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
try: try:
@ -1573,6 +1574,18 @@ class BuiltinTest(unittest.TestCase):
self.assertIs(cm.exception, exception) self.assertIs(cm.exception, exception)
@cpython_only
def test_zip_result_gc(self):
# bpo-42536: zip's tuple-reuse speed trick breaks the GC's assumptions
# about what can be untracked. Make sure we re-track result tuples
# whenever we reuse them.
it = zip([[]])
gc.collect()
# That GC collection probably untracked the recycled internal result
# tuple, which is initialized to (None,). Make sure it's re-tracked when
# it's mutated and returned from __next__:
self.assertTrue(gc.is_tracked(next(it)))
def test_format(self): def test_format(self):
# Test the basic machinery of the format() builtin. Don't test # Test the basic machinery of the format() builtin. Don't test
# the specifics of the various formatters # the specifics of the various formatters

View File

@ -1377,6 +1377,25 @@ class DictTest(unittest.TestCase):
d = CustomReversedDict(pairs) d = CustomReversedDict(pairs)
self.assertEqual(pairs[::-1], list(dict(d).items())) self.assertEqual(pairs[::-1], list(dict(d).items()))
@support.cpython_only
def test_dict_items_result_gc(self):
# bpo-42536: dict.items's tuple-reuse speed trick breaks the GC's
# assumptions about what can be untracked. Make sure we re-track result
# tuples whenever we reuse them.
it = iter({None: []}.items())
gc.collect()
# That GC collection probably untracked the recycled internal result
# tuple, which is initialized to (None, None). Make sure it's re-tracked
# when it's mutated and returned from __next__:
self.assertTrue(gc.is_tracked(next(it)))
@support.cpython_only
def test_dict_items_result_gc(self):
# Same as test_dict_items_result_gc above, but reversed.
it = reversed({None: []}.items())
gc.collect()
self.assertTrue(gc.is_tracked(next(it)))
class CAPITest(unittest.TestCase): class CAPITest(unittest.TestCase):

View File

@ -2,6 +2,7 @@ import unittest
import operator import operator
import sys import sys
import pickle import pickle
import gc
from test import support from test import support
@ -134,6 +135,18 @@ class EnumerateTestCase(unittest.TestCase, PickleTest):
self.assertEqual(len(set(map(id, list(enumerate(self.seq))))), len(self.seq)) self.assertEqual(len(set(map(id, list(enumerate(self.seq))))), len(self.seq))
self.assertEqual(len(set(map(id, enumerate(self.seq)))), min(1,len(self.seq))) self.assertEqual(len(set(map(id, enumerate(self.seq)))), min(1,len(self.seq)))
@support.cpython_only
def test_enumerate_result_gc(self):
# bpo-42536: enumerate's tuple-reuse speed trick breaks the GC's
# assumptions about what can be untracked. Make sure we re-track result
# tuples whenever we reuse them.
it = self.enum([[]])
gc.collect()
# That GC collection probably untracked the recycled internal result
# tuple, which is initialized to (None, None). Make sure it's re-tracked
# when it's mutated and returned from __next__:
self.assertTrue(gc.is_tracked(next(it)))
class MyEnum(enumerate): class MyEnum(enumerate):
pass pass

View File

@ -12,6 +12,8 @@ from functools import reduce
import sys import sys
import struct import struct
import threading import threading
import gc
maxsize = support.MAX_Py_ssize_t maxsize = support.MAX_Py_ssize_t
minsize = -maxsize-1 minsize = -maxsize-1
@ -1554,6 +1556,51 @@ class TestBasicOps(unittest.TestCase):
self.assertRaises(StopIteration, next, f(lambda x:x, [])) self.assertRaises(StopIteration, next, f(lambda x:x, []))
self.assertRaises(StopIteration, next, f(lambda x:x, StopNow())) self.assertRaises(StopIteration, next, f(lambda x:x, StopNow()))
@support.cpython_only
def test_combinations_result_gc(self):
# bpo-42536: combinations's tuple-reuse speed trick breaks the GC's
# assumptions about what can be untracked. Make sure we re-track result
# tuples whenever we reuse them.
it = combinations([None, []], 1)
next(it)
gc.collect()
# That GC collection probably untracked the recycled internal result
# tuple, which has the value (None,). Make sure it's re-tracked when
# it's mutated and returned from __next__:
self.assertTrue(gc.is_tracked(next(it)))
@support.cpython_only
def test_combinations_with_replacement_result_gc(self):
# Ditto for combinations_with_replacement.
it = combinations_with_replacement([None, []], 1)
next(it)
gc.collect()
self.assertTrue(gc.is_tracked(next(it)))
@support.cpython_only
def test_permutations_result_gc(self):
# Ditto for permutations.
it = permutations([None, []], 1)
next(it)
gc.collect()
self.assertTrue(gc.is_tracked(next(it)))
@support.cpython_only
def test_product_result_gc(self):
# Ditto for product.
it = product([None, []])
next(it)
gc.collect()
self.assertTrue(gc.is_tracked(next(it)))
@support.cpython_only
def test_zip_longest_result_gc(self):
# Ditto for zip_longest.
it = zip_longest([[]])
gc.collect()
self.assertTrue(gc.is_tracked(next(it)))
class TestExamples(unittest.TestCase): class TestExamples(unittest.TestCase):
def test_accumulate(self): def test_accumulate(self):

View File

@ -654,6 +654,17 @@ class OrderedDictTests:
support.check_free_after_iterating(self, lambda d: iter(d.values()), self.OrderedDict) support.check_free_after_iterating(self, lambda d: iter(d.values()), self.OrderedDict)
support.check_free_after_iterating(self, lambda d: iter(d.items()), self.OrderedDict) support.check_free_after_iterating(self, lambda d: iter(d.items()), self.OrderedDict)
@support.cpython_only
def test_ordered_dict_items_result_gc(self):
# bpo-42536: OrderedDict.items's tuple-reuse speed trick breaks the GC's
# assumptions about what can be untracked. Make sure we re-track result
# tuples whenever we reuse them.
it = iter(self.OrderedDict({None: []}).items())
gc.collect()
# That GC collection probably untracked the recycled internal result
# tuple, which is initialized to (None, None). Make sure it's re-tracked
# when it's mutated and returned from __next__:
self.assertTrue(gc.is_tracked(next(it)))
class PurePythonOrderedDictTests(OrderedDictTests, unittest.TestCase): class PurePythonOrderedDictTests(OrderedDictTests, unittest.TestCase):

View File

@ -0,0 +1,26 @@
Several built-in and standard library types now ensure that their internal
result tuples are always tracked by the :term:`garbage collector
<garbage collection>`:
- :meth:`collections.OrderedDict.items() <collections.OrderedDict>`
- :meth:`dict.items`
- :func:`enumerate`
- :func:`functools.reduce`
- :func:`itertools.combinations`
- :func:`itertools.combinations_with_replacement`
- :func:`itertools.permutations`
- :func:`itertools.product`
- :func:`itertools.zip_longest`
- :func:`zip`
Previously, they could have become untracked by a prior garbage collection.
Patch by Brandt Bucher.

View File

@ -3,6 +3,7 @@
#include "pycore_pystate.h" #include "pycore_pystate.h"
#include "pycore_tupleobject.h" #include "pycore_tupleobject.h"
#include "structmember.h" #include "structmember.h"
#include "pycore_object.h" // _PyObject_GC_TRACK
/* _functools module written and maintained /* _functools module written and maintained
by Hye-Shik Chang <perky@FreeBSD.org> by Hye-Shik Chang <perky@FreeBSD.org>
@ -633,6 +634,11 @@ functools_reduce(PyObject *self, PyObject *args)
if ((result = PyObject_Call(func, args, NULL)) == NULL) { if ((result = PyObject_Call(func, args, NULL)) == NULL) {
goto Fail; goto Fail;
} }
// bpo-42536: The GC may have untracked this args tuple. Since we're
// recycling it, make sure it's tracked again:
if (!_PyObject_GC_IS_TRACKED(args)) {
_PyObject_GC_TRACK(args);
}
} }
} }

View File

@ -3,6 +3,7 @@
#include "Python.h" #include "Python.h"
#include "pycore_tupleobject.h" #include "pycore_tupleobject.h"
#include "structmember.h" #include "structmember.h"
#include "pycore_object.h" // _PyObject_GC_TRACK()
/* Itertools module written and maintained /* Itertools module written and maintained
by Raymond D. Hettinger <python@rcn.com> by Raymond D. Hettinger <python@rcn.com>
@ -2255,6 +2256,11 @@ product_next(productobject *lz)
lz->result = result; lz->result = result;
Py_DECREF(old_result); Py_DECREF(old_result);
} }
// bpo-42536: The GC may have untracked this result tuple. Since we're
// recycling it, make sure it's tracked again:
else if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
/* Now, we've got the only copy so we can update it in-place */ /* Now, we've got the only copy so we can update it in-place */
assert (npools==0 || Py_REFCNT(result) == 1); assert (npools==0 || Py_REFCNT(result) == 1);
@ -2580,6 +2586,11 @@ combinations_next(combinationsobject *co)
co->result = result; co->result = result;
Py_DECREF(old_result); Py_DECREF(old_result);
} }
// bpo-42536: The GC may have untracked this result tuple. Since we're
// recycling it, make sure it's tracked again:
else if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
/* Now, we've got the only copy so we can update it in-place /* Now, we've got the only copy so we can update it in-place
* CPython's empty tuple is a singleton and cached in * CPython's empty tuple is a singleton and cached in
* PyTuple's freelist. * PyTuple's freelist.
@ -2916,6 +2927,11 @@ cwr_next(cwrobject *co)
co->result = result; co->result = result;
Py_DECREF(old_result); Py_DECREF(old_result);
} }
// bpo-42536: The GC may have untracked this result tuple. Since we're
// recycling it, make sure it's tracked again:
else if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
/* Now, we've got the only copy so we can update it in-place CPython's /* Now, we've got the only copy so we can update it in-place CPython's
empty tuple is a singleton and cached in PyTuple's freelist. */ empty tuple is a singleton and cached in PyTuple's freelist. */
assert(r == 0 || Py_REFCNT(result) == 1); assert(r == 0 || Py_REFCNT(result) == 1);
@ -3259,6 +3275,11 @@ permutations_next(permutationsobject *po)
po->result = result; po->result = result;
Py_DECREF(old_result); Py_DECREF(old_result);
} }
// bpo-42536: The GC may have untracked this result tuple. Since we're
// recycling it, make sure it's tracked again:
else if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
/* Now, we've got the only copy so we can update it in-place */ /* Now, we've got the only copy so we can update it in-place */
assert(r == 0 || Py_REFCNT(result) == 1); assert(r == 0 || Py_REFCNT(result) == 1);
@ -4536,6 +4557,11 @@ zip_longest_next(ziplongestobject *lz)
PyTuple_SET_ITEM(result, i, item); PyTuple_SET_ITEM(result, i, item);
Py_DECREF(olditem); Py_DECREF(olditem);
} }
// bpo-42536: The GC may have untracked this result tuple. Since we're
// recycling it, make sure it's tracked again:
if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
} else { } else {
result = PyTuple_New(tuplesize); result = PyTuple_New(tuplesize);
if (result == NULL) if (result == NULL)

View File

@ -3769,6 +3769,11 @@ dictiter_iternextitem(dictiterobject *di)
Py_INCREF(result); Py_INCREF(result);
Py_DECREF(oldkey); Py_DECREF(oldkey);
Py_DECREF(oldvalue); Py_DECREF(oldvalue);
// bpo-42536: The GC may have untracked this result tuple. Since we're
// recycling it, make sure it's tracked again:
if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
} }
else { else {
result = PyTuple_New(2); result = PyTuple_New(2);
@ -3884,6 +3889,11 @@ dictreviter_iternext(dictiterobject *di)
Py_INCREF(result); Py_INCREF(result);
Py_DECREF(oldkey); Py_DECREF(oldkey);
Py_DECREF(oldvalue); Py_DECREF(oldvalue);
// bpo-42536: The GC may have untracked this result tuple. Since
// we're recycling it, make sure it's tracked again:
if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
} }
else { else {
result = PyTuple_New(2); result = PyTuple_New(2);

View File

@ -1,6 +1,7 @@
/* enumerate object */ /* enumerate object */
#include "Python.h" #include "Python.h"
#include "pycore_object.h" // _PyObject_GC_TRACK()
#include "clinic/enumobject.c.h" #include "clinic/enumobject.c.h"
@ -130,6 +131,11 @@ enum_next_long(enumobject *en, PyObject* next_item)
PyTuple_SET_ITEM(result, 1, next_item); PyTuple_SET_ITEM(result, 1, next_item);
Py_DECREF(old_index); Py_DECREF(old_index);
Py_DECREF(old_item); Py_DECREF(old_item);
// bpo-42536: The GC may have untracked this result tuple. Since we're
// recycling it, make sure it's tracked again:
if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
return result; return result;
} }
result = PyTuple_New(2); result = PyTuple_New(2);
@ -175,6 +181,11 @@ enum_next(enumobject *en)
PyTuple_SET_ITEM(result, 1, next_item); PyTuple_SET_ITEM(result, 1, next_item);
Py_DECREF(old_index); Py_DECREF(old_index);
Py_DECREF(old_item); Py_DECREF(old_item);
// bpo-42536: The GC may have untracked this result tuple. Since we're
// recycling it, make sure it's tracked again:
if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
return result; return result;
} }
result = PyTuple_New(2); result = PyTuple_New(2);

View File

@ -1766,6 +1766,11 @@ odictiter_iternext(odictiterobject *di)
Py_INCREF(result); Py_INCREF(result);
Py_DECREF(PyTuple_GET_ITEM(result, 0)); /* borrowed */ Py_DECREF(PyTuple_GET_ITEM(result, 0)); /* borrowed */
Py_DECREF(PyTuple_GET_ITEM(result, 1)); /* borrowed */ Py_DECREF(PyTuple_GET_ITEM(result, 1)); /* borrowed */
// bpo-42536: The GC may have untracked this result tuple. Since we're
// recycling it, make sure it's tracked again:
if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
} }
else { else {
result = PyTuple_New(2); result = PyTuple_New(2);

View File

@ -4,6 +4,7 @@
#include <ctype.h> #include <ctype.h>
#include "ast.h" #include "ast.h"
#undef Yield /* undefine macro conflicting with <winbase.h> */ #undef Yield /* undefine macro conflicting with <winbase.h> */
#include "pycore_object.h" // _PyObject_GC_TRACK()
#include "pycore_pystate.h" #include "pycore_pystate.h"
#include "pycore_tupleobject.h" #include "pycore_tupleobject.h"
@ -2618,6 +2619,11 @@ zip_next(zipobject *lz)
PyTuple_SET_ITEM(result, i, item); PyTuple_SET_ITEM(result, i, item);
Py_DECREF(olditem); Py_DECREF(olditem);
} }
// bpo-42536: The GC may have untracked this result tuple. Since we're
// recycling it, make sure it's tracked again:
if (!_PyObject_GC_IS_TRACKED(result)) {
_PyObject_GC_TRACK(result);
}
} else { } else {
result = PyTuple_New(tuplesize); result = PyTuple_New(tuplesize);
if (result == NULL) if (result == NULL)