GH-97002: Prevent `_PyInterpreterFrame`s from backing more than one `PyFrameObject` (GH-97996)

This commit is contained in:
Brandt Bucher 2022-10-06 16:20:01 -07:00 committed by GitHub
parent cbf0afd8a1
commit 21a2d9ff55
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 91 additions and 6 deletions

View File

@ -1,3 +1,4 @@
import gc
import re
import sys
import textwrap
@ -261,5 +262,69 @@ class TestIncompleteFrameAreInvisible(unittest.TestCase):
""")
assert_python_ok("-c", code)
@support.cpython_only
def test_sneaky_frame_object(self):
def trace(frame, event, arg):
"""
Don't actually do anything, just force a frame object to be created.
"""
def callback(phase, info):
"""
Yo dawg, I heard you like frames, so I'm allocating a frame while
you're allocating a frame, so you can have a frame while you have a
frame!
"""
nonlocal sneaky_frame_object
sneaky_frame_object = sys._getframe().f_back
# We're done here:
gc.callbacks.remove(callback)
def f():
while True:
yield
old_threshold = gc.get_threshold()
old_callbacks = gc.callbacks[:]
old_enabled = gc.isenabled()
old_trace = sys.gettrace()
try:
# Stop the GC for a second while we set things up:
gc.disable()
# Create a paused generator:
g = f()
next(g)
# Move all objects to the oldest generation, and tell the GC to run
# on the *very next* allocation:
gc.collect()
gc.set_threshold(1, 0, 0)
# Okay, so here's the nightmare scenario:
# - We're tracing the resumption of a generator, which creates a new
# frame object.
# - The allocation of this frame object triggers a collection
# *before* the frame object is actually created.
# - During the collection, we request the exact same frame object.
# This test does it with a GC callback, but in real code it would
# likely be a trace function, weakref callback, or finalizer.
# - The collection finishes, and the original frame object is
# created. We now have two frame objects fighting over ownership
# of the same interpreter frame!
sys.settrace(trace)
gc.callbacks.append(callback)
sneaky_frame_object = None
gc.enable()
next(g)
# g.gi_frame should be the the frame object from the callback (the
# one that was *requested* second, but *created* first):
self.assertIs(g.gi_frame, sneaky_frame_object)
finally:
gc.set_threshold(*old_threshold)
gc.callbacks[:] = old_callbacks
sys.settrace(old_trace)
if old_enabled:
gc.enable()
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,3 @@
Fix an issue where several frame objects could be backed by the same
interpreter frame, possibly leading to corrupted memory and hard crashes of
the interpreter.

View File

@ -37,14 +37,31 @@ _PyFrame_MakeAndSetFrameObject(_PyInterpreterFrame *frame)
Py_XDECREF(error_type);
Py_XDECREF(error_value);
Py_XDECREF(error_traceback);
return NULL;
}
else {
assert(frame->owner != FRAME_OWNED_BY_FRAME_OBJECT);
assert(frame->owner != FRAME_CLEARED);
f->f_frame = frame;
frame->frame_obj = f;
PyErr_Restore(error_type, error_value, error_traceback);
PyErr_Restore(error_type, error_value, error_traceback);
if (frame->frame_obj) {
// GH-97002: How did we get into this horrible situation? Most likely,
// allocating f triggered a GC collection, which ran some code that
// *also* created the same frame... while we were in the middle of
// creating it! See test_sneaky_frame_object in test_frame.py for a
// concrete example.
//
// Regardless, just throw f away and use that frame instead, since it's
// already been exposed to user code. It's actually a bit tricky to do
// this, since we aren't backed by a real _PyInterpreterFrame anymore.
// Just pretend that we have an owned, cleared frame so frame_dealloc
// doesn't make the situation worse:
f->f_frame = (_PyInterpreterFrame *)f->_f_frame_data;
f->f_frame->owner = FRAME_CLEARED;
f->f_frame->frame_obj = f;
Py_DECREF(f);
return frame->frame_obj;
}
assert(frame->owner != FRAME_OWNED_BY_FRAME_OBJECT);
assert(frame->owner != FRAME_CLEARED);
f->f_frame = frame;
frame->frame_obj = f;
return f;
}