bpo-33608: Factor out a private, per-interpreter _Py_AddPendingCall(). (gh-12360)

This is effectively an un-revert of #11617 and #12024 (reverted in #12159). Portions of those were merged in other PRs (with lower risk) and this represents the remainder. Note that I found 3 different bugs in the original PRs and have fixed them here.
This commit is contained in:
Eric Snow 2019-04-12 09:18:16 -06:00 committed by GitHub
parent 44235041f3
commit f13c5c8b94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 151 additions and 108 deletions

View File

@ -221,7 +221,7 @@ PyAPI_FUNC(Py_ssize_t) _PyEval_RequestCodeExtraIndex(freefunc);
#ifndef Py_LIMITED_API
PyAPI_FUNC(int) _PyEval_SliceIndex(PyObject *, Py_ssize_t *);
PyAPI_FUNC(int) _PyEval_SliceIndexNotNone(PyObject *, Py_ssize_t *);
PyAPI_FUNC(void) _PyEval_SignalAsyncExc(void);
PyAPI_FUNC(void) _PyEval_SignalAsyncExc(PyInterpreterState *);
#endif
/* Masks and values used by FORMAT_VALUE opcode. */

View File

@ -11,7 +11,11 @@ extern "C" {
#include "pycore_atomic.h"
#include "pythread.h"
PyAPI_FUNC(void) _Py_FinishPendingCalls(void);
struct _is; // See PyInterpreterState in cpython/pystate.h.
PyAPI_FUNC(int) _Py_AddPendingCall(struct _is*, unsigned long, int (*)(void *), void *);
PyAPI_FUNC(int) _Py_MakePendingCalls(struct _is*);
PyAPI_FUNC(void) _Py_FinishPendingCalls(struct _is*);
struct _pending_calls {
int finishing;
@ -24,6 +28,7 @@ struct _pending_calls {
int async_exc;
#define NPENDINGCALLS 32
struct {
unsigned long thread_id;
int (*func)(void *);
void *arg;
} calls[NPENDINGCALLS];
@ -31,6 +36,13 @@ struct _pending_calls {
int last;
};
struct _ceval_interpreter_state {
/* This single variable consolidates all requests to break out of
the fast path in the eval loop. */
_Py_atomic_int eval_breaker;
struct _pending_calls pending;
};
#include "pycore_gil.h"
struct _ceval_runtime_state {
@ -41,12 +53,8 @@ struct _ceval_runtime_state {
c_tracefunc. This speeds up the if statement in
PyEval_EvalFrameEx() after fast_next_opcode. */
int tracing_possible;
/* This single variable consolidates all requests to break out of
the fast path in the eval loop. */
_Py_atomic_int eval_breaker;
/* Request for dropping the GIL */
_Py_atomic_int gil_drop_request;
struct _pending_calls pending;
/* Request for checking signals. */
_Py_atomic_int signals_pending;
struct _gil_runtime_state gil;

View File

@ -12,6 +12,7 @@ extern "C" {
#include "pystate.h"
#include "pythread.h"
#include "pycore_atomic.h"
#include "pycore_ceval.h"
#include "pycore_pathconfig.h"
#include "pycore_pymem.h"
@ -83,6 +84,8 @@ struct _is {
PyObject *pyexitmodule;
uint64_t tstate_next_unique_id;
struct _ceval_interpreter_state ceval;
};
PyAPI_FUNC(struct _is*) _PyInterpreterState_LookUpID(PY_INT64_T);

View File

@ -373,7 +373,7 @@ class TestPendingCalls(unittest.TestCase):
def test_pendingcalls_threaded(self):
#do every callback on a separate thread
n = 32 #total callbacks
n = 32 #total callbacks (see NPENDINGCALLS in pycore_ceval.h)
threads = []
class foo(object):pass
context = foo()

View File

@ -0,0 +1,5 @@
We added a new internal _Py_AddPendingCall() that operates relative to the
provided interpreter. This allows us to use the existing implementation to
ask another interpreter to do work that cannot be done in the current
interpreter, like decref an object the other interpreter owns. The existing
Py_AddPendingCall() only operates relative to the main interpreter.

View File

@ -2445,6 +2445,7 @@ pending_threadfunc(PyObject *self, PyObject *arg)
Py_INCREF(callable);
Py_BEGIN_ALLOW_THREADS
/* XXX Use the internal _Py_AddPendingCall(). */
r = Py_AddPendingCall(&_pending_callback, callable);
Py_END_ALLOW_THREADS

View File

@ -19,6 +19,7 @@
#include <process.h>
#endif
#endif
#include "internal/pycore_pystate.h"
#ifdef HAVE_SIGNAL_H
#include <signal.h>
@ -295,8 +296,10 @@ trip_signal(int sig_num)
{
/* Py_AddPendingCall() isn't signal-safe, but we
still use it for this exceptional case. */
Py_AddPendingCall(report_wakeup_send_error,
(void *)(intptr_t) last_error);
_Py_AddPendingCall(_PyRuntime.interpreters.main,
main_thread,
report_wakeup_send_error,
(void *)(intptr_t) last_error);
}
}
}
@ -313,8 +316,10 @@ trip_signal(int sig_num)
{
/* Py_AddPendingCall() isn't signal-safe, but we
still use it for this exceptional case. */
Py_AddPendingCall(report_wakeup_write_error,
(void *)(intptr_t)errno);
_Py_AddPendingCall(_PyRuntime.interpreters.main,
main_thread,
report_wakeup_write_error,
(void *)(intptr_t)errno);
}
}
}

View File

@ -96,61 +96,61 @@ static long dxp[256];
/* This can set eval_breaker to 0 even though gil_drop_request became
1. We believe this is all right because the eval loop will release
the GIL eventually anyway. */
#define COMPUTE_EVAL_BREAKER() \
#define COMPUTE_EVAL_BREAKER(interp) \
_Py_atomic_store_relaxed( \
&_PyRuntime.ceval.eval_breaker, \
&interp->ceval.eval_breaker, \
GIL_REQUEST | \
_Py_atomic_load_relaxed(&_PyRuntime.ceval.signals_pending) | \
_Py_atomic_load_relaxed(&_PyRuntime.ceval.pending.calls_to_do) | \
_PyRuntime.ceval.pending.async_exc)
_Py_atomic_load_relaxed(&interp->ceval.pending.calls_to_do) | \
interp->ceval.pending.async_exc)
#define SET_GIL_DROP_REQUEST() \
#define SET_GIL_DROP_REQUEST(interp) \
do { \
_Py_atomic_store_relaxed(&_PyRuntime.ceval.gil_drop_request, 1); \
_Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \
_Py_atomic_store_relaxed(&interp->ceval.eval_breaker, 1); \
} while (0)
#define RESET_GIL_DROP_REQUEST() \
#define RESET_GIL_DROP_REQUEST(interp) \
do { \
_Py_atomic_store_relaxed(&_PyRuntime.ceval.gil_drop_request, 0); \
COMPUTE_EVAL_BREAKER(); \
COMPUTE_EVAL_BREAKER(interp); \
} while (0)
/* Pending calls are only modified under pending_lock */
#define SIGNAL_PENDING_CALLS() \
#define SIGNAL_PENDING_CALLS(interp) \
do { \
_Py_atomic_store_relaxed(&_PyRuntime.ceval.pending.calls_to_do, 1); \
_Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \
_Py_atomic_store_relaxed(&interp->ceval.pending.calls_to_do, 1); \
_Py_atomic_store_relaxed(&interp->ceval.eval_breaker, 1); \
} while (0)
#define UNSIGNAL_PENDING_CALLS() \
#define UNSIGNAL_PENDING_CALLS(interp) \
do { \
_Py_atomic_store_relaxed(&_PyRuntime.ceval.pending.calls_to_do, 0); \
COMPUTE_EVAL_BREAKER(); \
_Py_atomic_store_relaxed(&interp->ceval.pending.calls_to_do, 0); \
COMPUTE_EVAL_BREAKER(interp); \
} while (0)
#define SIGNAL_PENDING_SIGNALS() \
do { \
_Py_atomic_store_relaxed(&_PyRuntime.ceval.signals_pending, 1); \
_Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \
_Py_atomic_store_relaxed(&_PyRuntime.interpreters.main->ceval.eval_breaker, 1); \
} while (0)
#define UNSIGNAL_PENDING_SIGNALS() \
do { \
_Py_atomic_store_relaxed(&_PyRuntime.ceval.signals_pending, 0); \
COMPUTE_EVAL_BREAKER(); \
COMPUTE_EVAL_BREAKER(_PyRuntime.interpreters.main); \
} while (0)
#define SIGNAL_ASYNC_EXC() \
#define SIGNAL_ASYNC_EXC(interp) \
do { \
_PyRuntime.ceval.pending.async_exc = 1; \
_Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \
interp->ceval.pending.async_exc = 1; \
_Py_atomic_store_relaxed(&interp->ceval.eval_breaker, 1); \
} while (0)
#define UNSIGNAL_ASYNC_EXC() \
#define UNSIGNAL_ASYNC_EXC(interp) \
do { \
_PyRuntime.ceval.pending.async_exc = 0; \
COMPUTE_EVAL_BREAKER(); \
interp->ceval.pending.async_exc = 0; \
COMPUTE_EVAL_BREAKER(interp); \
} while (0)
@ -177,10 +177,7 @@ PyEval_InitThreads(void)
create_gil();
take_gil(_PyThreadState_GET());
_PyRuntime.ceval.pending.lock = PyThread_allocate_lock();
if (_PyRuntime.ceval.pending.lock == NULL) {
Py_FatalError("Can't initialize threads for pending calls");
}
// The pending calls mutex is initialized in PyInterpreterState_New().
}
void
@ -192,11 +189,6 @@ _PyEval_FiniThreads(void)
destroy_gil();
assert(!gil_created());
if (_PyRuntime.ceval.pending.lock != NULL) {
PyThread_free_lock(_PyRuntime.ceval.pending.lock);
_PyRuntime.ceval.pending.lock = NULL;
}
}
void
@ -256,8 +248,10 @@ PyEval_ReInitThreads(void)
recreate_gil();
take_gil(current_tstate);
_PyRuntime.ceval.pending.lock = PyThread_allocate_lock();
if (_PyRuntime.ceval.pending.lock == NULL) {
// Only the main interpreter remains, so ignore the rest.
PyInterpreterState *interp = _PyRuntime.interpreters.main;
interp->ceval.pending.lock = PyThread_allocate_lock();
if (interp->ceval.pending.lock == NULL) {
Py_FatalError("Can't initialize threads for pending calls");
}
@ -269,9 +263,9 @@ PyEval_ReInitThreads(void)
raised. */
void
_PyEval_SignalAsyncExc(void)
_PyEval_SignalAsyncExc(PyInterpreterState *interp)
{
SIGNAL_ASYNC_EXC();
SIGNAL_ASYNC_EXC(interp);
}
PyThreadState *
@ -339,7 +333,7 @@ _PyEval_SignalReceived(void)
/* Push one item onto the queue while holding the lock. */
static int
_push_pending_call(struct _pending_calls *pending,
_push_pending_call(struct _pending_calls *pending, unsigned long thread_id,
int (*func)(void *), void *arg)
{
int i = pending->last;
@ -347,6 +341,7 @@ _push_pending_call(struct _pending_calls *pending,
if (j == pending->first) {
return -1; /* Queue full */
}
pending->calls[i].thread_id = thread_id;
pending->calls[i].func = func;
pending->calls[i].arg = arg;
pending->last = j;
@ -355,7 +350,7 @@ _push_pending_call(struct _pending_calls *pending,
/* Pop one item off the queue while holding the lock. */
static void
_pop_pending_call(struct _pending_calls *pending,
_pop_pending_call(struct _pending_calls *pending, unsigned long *thread_id,
int (**func)(void *), void **arg)
{
int i = pending->first;
@ -365,6 +360,7 @@ _pop_pending_call(struct _pending_calls *pending,
*func = pending->calls[i].func;
*arg = pending->calls[i].arg;
*thread_id = pending->calls[i].thread_id;
pending->first = (i + 1) % NPENDINGCALLS;
}
@ -374,9 +370,10 @@ _pop_pending_call(struct _pending_calls *pending,
*/
int
Py_AddPendingCall(int (*func)(void *), void *arg)
_Py_AddPendingCall(PyInterpreterState *interp, unsigned long thread_id,
int (*func)(void *), void *arg)
{
struct _pending_calls *pending = &_PyRuntime.ceval.pending;
struct _pending_calls *pending = &interp->ceval.pending;
PyThread_acquire_lock(pending->lock, WAIT_LOCK);
if (pending->finishing) {
@ -391,14 +388,23 @@ Py_AddPendingCall(int (*func)(void *), void *arg)
PyErr_Restore(exc, val, tb);
return -1;
}
int result = _push_pending_call(pending, func, arg);
int result = _push_pending_call(pending, thread_id, func, arg);
/* signal main loop */
SIGNAL_PENDING_CALLS(interp);
PyThread_release_lock(pending->lock);
/* signal main loop */
SIGNAL_PENDING_CALLS();
return result;
}
/* Py_AddPendingCall() is a simple wrapper for the sake
of backward-compatibility. */
int
Py_AddPendingCall(int (*func)(void *), void *arg)
{
PyInterpreterState *interp = _PyRuntime.interpreters.main;
return _Py_AddPendingCall(interp, _PyRuntime.main_thread, func, arg);
}
static int
handle_signals(void)
{
@ -425,15 +431,11 @@ handle_signals(void)
}
static int
make_pending_calls(struct _pending_calls* pending)
make_pending_calls(PyInterpreterState *interp)
{
struct _pending_calls *pending = &interp->ceval.pending;
static int busy = 0;
/* only service pending calls on main thread */
if (PyThread_get_thread_ident() != _PyRuntime.main_thread) {
return 0;
}
/* don't perform recursive pending calls */
if (busy) {
return 0;
@ -441,19 +443,27 @@ make_pending_calls(struct _pending_calls* pending)
busy = 1;
/* unsignal before starting to call callbacks, so that any callback
added in-between re-signals */
UNSIGNAL_PENDING_CALLS();
UNSIGNAL_PENDING_CALLS(interp);
int res = 0;
/* perform a bounded number of calls, in case of recursion */
unsigned long thread_id = 0;
for (int i=0; i<NPENDINGCALLS; i++) {
int (*func)(void *) = NULL;
void *arg = NULL;
/* pop one item off the queue while holding the lock */
PyThread_acquire_lock(pending->lock, WAIT_LOCK);
_pop_pending_call(pending, &func, &arg);
_pop_pending_call(pending, &thread_id, &func, &arg);
PyThread_release_lock(pending->lock);
if (thread_id && PyThread_get_thread_ident() != thread_id) {
// Thread mismatch, so move it to the end of the list
// and start over.
_Py_AddPendingCall(interp, thread_id, func, arg);
goto error;
}
/* having released the lock, perform the callback */
if (func == NULL) {
break;
@ -469,14 +479,14 @@ make_pending_calls(struct _pending_calls* pending)
error:
busy = 0;
SIGNAL_PENDING_CALLS();
SIGNAL_PENDING_CALLS(interp); /* We're not done yet */
return res;
}
void
_Py_FinishPendingCalls(void)
_Py_FinishPendingCalls(PyInterpreterState *interp)
{
struct _pending_calls *pending = &_PyRuntime.ceval.pending;
struct _pending_calls *pending = &interp->ceval.pending;
assert(PyGILState_Check());
@ -488,7 +498,7 @@ _Py_FinishPendingCalls(void)
return;
}
if (make_pending_calls(pending) < 0) {
if (make_pending_calls(interp) < 0) {
PyObject *exc, *val, *tb;
PyErr_Fetch(&exc, &val, &tb);
PyErr_BadInternalCall();
@ -497,6 +507,14 @@ _Py_FinishPendingCalls(void)
}
}
int
_Py_MakePendingCalls(PyInterpreterState *interp)
{
assert(PyGILState_Check());
return make_pending_calls(interp);
}
/* Py_MakePendingCalls() is a simple wrapper for the sake
of backward-compatibility. */
int
@ -511,12 +529,8 @@ Py_MakePendingCalls(void)
return res;
}
res = make_pending_calls(&_PyRuntime.ceval.pending);
if (res != 0) {
return res;
}
return 0;
PyInterpreterState *interp = _PyRuntime.interpreters.main;
return make_pending_calls(interp);
}
/* The interpreter's recursion limit */
@ -638,7 +652,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
PyObject **fastlocals, **freevars;
PyObject *retval = NULL; /* Return value */
PyThreadState *tstate = _PyThreadState_GET();
_Py_atomic_int *eval_breaker = &_PyRuntime.ceval.eval_breaker;
_Py_atomic_int *eval_breaker = &tstate->interp->ceval.eval_breaker;
PyCodeObject *co;
/* when tracing we set things up so that
@ -1059,9 +1073,9 @@ main_loop:
}
}
if (_Py_atomic_load_relaxed(
&_PyRuntime.ceval.pending.calls_to_do))
&tstate->interp->ceval.pending.calls_to_do))
{
if (make_pending_calls(&_PyRuntime.ceval.pending) != 0) {
if (make_pending_calls(tstate->interp) != 0) {
goto error;
}
}
@ -1093,7 +1107,7 @@ main_loop:
if (tstate->async_exc != NULL) {
PyObject *exc = tstate->async_exc;
tstate->async_exc = NULL;
UNSIGNAL_ASYNC_EXC();
UNSIGNAL_ASYNC_EXC(tstate->interp);
PyErr_SetNone(exc);
Py_DECREF(exc);
goto error;

View File

@ -176,7 +176,7 @@ static void drop_gil(PyThreadState *tstate)
&_PyRuntime.ceval.gil.last_holder)
) == tstate)
{
RESET_GIL_DROP_REQUEST();
RESET_GIL_DROP_REQUEST(tstate->interp);
/* NOTE: if COND_WAIT does not atomically start waiting when
releasing the mutex, another thread can run through, take
the GIL and drop it again, and reset the condition
@ -213,7 +213,7 @@ static void take_gil(PyThreadState *tstate)
if (timed_out &&
_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked) &&
_PyRuntime.ceval.gil.switch_number == saved_switchnum) {
SET_GIL_DROP_REQUEST();
SET_GIL_DROP_REQUEST(tstate->interp);
}
}
_ready:
@ -239,10 +239,10 @@ _ready:
MUTEX_UNLOCK(_PyRuntime.ceval.gil.switch_mutex);
#endif
if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil_drop_request)) {
RESET_GIL_DROP_REQUEST();
RESET_GIL_DROP_REQUEST(tstate->interp);
}
if (tstate->async_exc != NULL) {
_PyEval_SignalAsyncExc();
_PyEval_SignalAsyncExc(tstate->interp);
}
MUTEX_UNLOCK(_PyRuntime.ceval.gil.mutex);

View File

@ -1146,7 +1146,7 @@ Py_FinalizeEx(void)
interp = tstate->interp;
// Make any remaining pending calls.
_Py_FinishPendingCalls();
_Py_FinishPendingCalls(interp);
/* The interpreter is still entirely intact at this point, and the
* exit funcs may be relying on that. In particular, if some thread
@ -1552,6 +1552,9 @@ Py_EndInterpreter(PyThreadState *tstate)
// Wrap up existing "threading"-module-created, non-daemon threads.
wait_for_thread_shutdown();
// Make any remaining pending calls.
_Py_FinishPendingCalls(interp);
call_py_exitfuncs(interp);
if (tstate != interp->tstate_head || tstate->next != NULL)

View File

@ -173,6 +173,14 @@ PyInterpreterState_New(void)
memset(interp, 0, sizeof(*interp));
interp->id_refcount = -1;
interp->check_interval = 100;
interp->ceval.pending.lock = PyThread_allocate_lock();
if (interp->ceval.pending.lock == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"failed to create interpreter ceval pending mutex");
return NULL;
}
interp->core_config = _PyCoreConfig_INIT;
interp->eval_frame = _PyEval_EvalFrameDefault;
#ifdef HAVE_DLOPEN
@ -279,6 +287,9 @@ PyInterpreterState_Delete(PyInterpreterState *interp)
if (interp->id_mutex != NULL) {
PyThread_free_lock(interp->id_mutex);
}
if (interp->ceval.pending.lock != NULL) {
PyThread_free_lock(interp->ceval.pending.lock);
}
PyMem_RawFree(interp);
}
@ -928,7 +939,7 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc)
p->async_exc = exc;
HEAD_UNLOCK();
Py_XDECREF(old_exc);
_PyEval_SignalAsyncExc();
_PyEval_SignalAsyncExc(interp);
return 1;
}
}
@ -1342,7 +1353,7 @@ _PyObject_GetCrossInterpreterData(PyObject *obj, _PyCrossInterpreterData *data)
return 0;
}
static void
static int
_release_xidata(void *arg)
{
_PyCrossInterpreterData *data = (_PyCrossInterpreterData *)arg;
@ -1350,30 +1361,8 @@ _release_xidata(void *arg)
data->free(data->data);
}
Py_XDECREF(data->obj);
}
static void
_call_in_interpreter(PyInterpreterState *interp,
void (*func)(void *), void *arg)
{
/* We would use Py_AddPendingCall() if it weren't specific to the
* main interpreter (see bpo-33608). In the meantime we take a
* naive approach.
*/
PyThreadState *save_tstate = NULL;
if (interp != _PyInterpreterState_Get()) {
// XXX Using the "head" thread isn't strictly correct.
PyThreadState *tstate = PyInterpreterState_ThreadHead(interp);
// XXX Possible GILState issues?
save_tstate = PyThreadState_Swap(tstate);
}
func(arg);
// Switch back.
if (save_tstate != NULL) {
PyThreadState_Swap(save_tstate);
}
PyMem_Free(data);
return 0;
}
void
@ -1384,7 +1373,7 @@ _PyCrossInterpreterData_Release(_PyCrossInterpreterData *data)
return;
}
// Switch to the original interpreter.
// Get the original interpreter.
PyInterpreterState *interp = _PyInterpreterState_LookUpID(data->interp);
if (interp == NULL) {
// The intepreter was already destroyed.
@ -1393,9 +1382,24 @@ _PyCrossInterpreterData_Release(_PyCrossInterpreterData *data)
}
return;
}
// XXX There's an ever-so-slight race here...
if (interp->finalizing) {
// XXX Someone leaked some memory...
return;
}
// "Release" the data and/or the object.
_call_in_interpreter(interp, _release_xidata, data);
_PyCrossInterpreterData *copied = PyMem_Malloc(sizeof(_PyCrossInterpreterData));
if (copied == NULL) {
PyErr_SetString(PyExc_MemoryError,
"Not enough memory to preserve cross-interpreter data");
PyErr_Print();
return;
}
memcpy(copied, data, sizeof(_PyCrossInterpreterData));
if (_Py_AddPendingCall(interp, 0, _release_xidata, copied) != 0) {
// XXX Queue full or couldn't get lock. Try again somehow?
}
}
PyObject *