bpo-30703: Improve signal delivery (#2415)
* Improve signal delivery Avoid using Py_AddPendingCall from signal handler, to avoid calling signal-unsafe functions. * Remove unused function * Improve comments * Add stress test * Adapt for --without-threads * Add second stress test * Add NEWS blurb * Address comments @haypo
This commit is contained in:
parent
9f3bdcb643
commit
c08177a1cc
|
@ -54,6 +54,7 @@ PyAPI_FUNC(int) PyEval_MergeCompilerFlags(PyCompilerFlags *cf);
|
|||
#endif
|
||||
|
||||
PyAPI_FUNC(int) Py_AddPendingCall(int (*func)(void *), void *arg);
|
||||
PyAPI_FUNC(void) _PyEval_SignalReceived(void);
|
||||
PyAPI_FUNC(int) Py_MakePendingCalls(void);
|
||||
|
||||
/* Protection against deeply nested recursive calls
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import os
|
||||
import random
|
||||
import signal
|
||||
import socket
|
||||
import subprocess
|
||||
|
@ -941,6 +942,101 @@ class PendingSignalsTests(unittest.TestCase):
|
|||
(exitcode, stdout))
|
||||
|
||||
|
||||
class StressTest(unittest.TestCase):
|
||||
"""
|
||||
Stress signal delivery, especially when a signal arrives in
|
||||
the middle of recomputing the signal state or executing
|
||||
previously tripped signal handlers.
|
||||
"""
|
||||
|
||||
@unittest.skipUnless(hasattr(signal, "setitimer"),
|
||||
"test needs setitimer()")
|
||||
def test_stress_delivery_dependent(self):
|
||||
"""
|
||||
This test uses dependent signal handlers.
|
||||
"""
|
||||
N = 10000
|
||||
sigs = []
|
||||
|
||||
def first_handler(signum, frame):
|
||||
# 1e-6 is the minimum non-zero value for `setitimer()`.
|
||||
# Choose a random delay so as to improve chances of
|
||||
# triggering a race condition. Ideally the signal is received
|
||||
# when inside critical signal-handling routines such as
|
||||
# Py_MakePendingCalls().
|
||||
signal.setitimer(signal.ITIMER_REAL, 1e-6 + random.random() * 1e-5)
|
||||
|
||||
def second_handler(signum=None, frame=None):
|
||||
sigs.append(signum)
|
||||
|
||||
def setsig(signum, handler):
|
||||
old_handler = signal.signal(signum, handler)
|
||||
self.addCleanup(signal.signal, signum, old_handler)
|
||||
|
||||
# Here on Linux, SIGPROF > SIGALRM > SIGUSR1. By using both
|
||||
# ascending and descending sequences (SIGUSR1 then SIGALRM,
|
||||
# SIGPROF then SIGALRM), we maximize chances of hitting a bug.
|
||||
setsig(signal.SIGPROF, first_handler)
|
||||
setsig(signal.SIGUSR1, first_handler)
|
||||
setsig(signal.SIGALRM, second_handler) # for ITIMER_REAL
|
||||
|
||||
expected_sigs = 0
|
||||
deadline = time.time() + 15.0
|
||||
|
||||
while expected_sigs < N:
|
||||
os.kill(os.getpid(), signal.SIGPROF)
|
||||
expected_sigs += 1
|
||||
# Wait for handlers to run to avoid signal coalescing
|
||||
while len(sigs) < expected_sigs and time.time() < deadline:
|
||||
time.sleep(1e-5)
|
||||
|
||||
os.kill(os.getpid(), signal.SIGUSR1)
|
||||
expected_sigs += 1
|
||||
while len(sigs) < expected_sigs and time.time() < deadline:
|
||||
time.sleep(1e-5)
|
||||
|
||||
# All ITIMER_REAL signals should have been delivered to the
|
||||
# Python handler
|
||||
self.assertEqual(len(sigs), N, "Some signals were lost")
|
||||
|
||||
@unittest.skipUnless(hasattr(signal, "setitimer"),
|
||||
"test needs setitimer()")
|
||||
def test_stress_delivery_simultaneous(self):
|
||||
"""
|
||||
This test uses simultaneous signal handlers.
|
||||
"""
|
||||
N = 10000
|
||||
sigs = []
|
||||
|
||||
def handler(signum, frame):
|
||||
sigs.append(signum)
|
||||
|
||||
def setsig(signum, handler):
|
||||
old_handler = signal.signal(signum, handler)
|
||||
self.addCleanup(signal.signal, signum, old_handler)
|
||||
|
||||
setsig(signal.SIGUSR1, handler)
|
||||
setsig(signal.SIGALRM, handler) # for ITIMER_REAL
|
||||
|
||||
expected_sigs = 0
|
||||
deadline = time.time() + 15.0
|
||||
|
||||
while expected_sigs < N:
|
||||
# Hopefully the SIGALRM will be received somewhere during
|
||||
# initial processing of SIGUSR1.
|
||||
signal.setitimer(signal.ITIMER_REAL, 1e-6 + random.random() * 1e-5)
|
||||
os.kill(os.getpid(), signal.SIGUSR1)
|
||||
|
||||
expected_sigs += 2
|
||||
# Wait for handlers to run to avoid signal coalescing
|
||||
while len(sigs) < expected_sigs and time.time() < deadline:
|
||||
time.sleep(1e-5)
|
||||
|
||||
# All ITIMER_REAL signals should have been delivered to the
|
||||
# Python handler
|
||||
self.assertEqual(len(sigs), N, "Some signals were lost")
|
||||
|
||||
|
||||
def tearDownModule():
|
||||
support.reap_children()
|
||||
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
Improve signal delivery.
|
||||
|
||||
Avoid using Py_AddPendingCall from signal handler, to avoid calling signal-
|
||||
unsafe functions. The tests I'm adding here fail without the rest of the
|
||||
patch, on Linux and OS X. This means our signal delivery logic had defects
|
||||
(some signals could be lost).
|
|
@ -188,12 +188,6 @@ The default handler for SIGINT installed by Python.\n\
|
|||
It raises KeyboardInterrupt.");
|
||||
|
||||
|
||||
static int
|
||||
checksignals_witharg(void * unused)
|
||||
{
|
||||
return PyErr_CheckSignals();
|
||||
}
|
||||
|
||||
static int
|
||||
report_wakeup_write_error(void *data)
|
||||
{
|
||||
|
@ -244,17 +238,15 @@ trip_signal(int sig_num)
|
|||
|
||||
Handlers[sig_num].tripped = 1;
|
||||
|
||||
if (!is_tripped) {
|
||||
/* Set is_tripped after setting .tripped, as it gets
|
||||
cleared in PyErr_CheckSignals() before .tripped. */
|
||||
is_tripped = 1;
|
||||
Py_AddPendingCall(checksignals_witharg, NULL);
|
||||
}
|
||||
_PyEval_SignalReceived();
|
||||
|
||||
/* And then write to the wakeup fd *after* setting all the globals and
|
||||
doing the Py_AddPendingCall. We used to write to the wakeup fd and then
|
||||
set the flag, but this allowed the following sequence of events
|
||||
(especially on windows, where trip_signal runs in a new thread):
|
||||
doing the _PyEval_SignalReceived. We used to write to the wakeup fd
|
||||
and then set the flag, but this allowed the following sequence of events
|
||||
(especially on windows, where trip_signal may run in a new thread):
|
||||
|
||||
- main thread blocks on select([wakeup_fd], ...)
|
||||
- signal arrives
|
||||
|
@ -289,6 +281,8 @@ trip_signal(int sig_num)
|
|||
wakeup.send_err_set = 1;
|
||||
wakeup.send_errno = errno;
|
||||
wakeup.send_win_error = GetLastError();
|
||||
/* Py_AddPendingCall() isn't signal-safe, but we
|
||||
still use it for this exceptional case. */
|
||||
Py_AddPendingCall(report_wakeup_send_error, NULL);
|
||||
}
|
||||
}
|
||||
|
@ -302,6 +296,8 @@ trip_signal(int sig_num)
|
|||
rc = _Py_write_noraise(fd, &byte, 1);
|
||||
|
||||
if (rc < 0) {
|
||||
/* Py_AddPendingCall() isn't signal-safe, but we
|
||||
still use it for this exceptional case. */
|
||||
Py_AddPendingCall(report_wakeup_write_error,
|
||||
(void *)(intptr_t)errno);
|
||||
}
|
||||
|
@ -1556,8 +1552,10 @@ PyErr_CheckSignals(void)
|
|||
arglist);
|
||||
Py_DECREF(arglist);
|
||||
}
|
||||
if (!result)
|
||||
if (!result) {
|
||||
is_tripped = 1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
Py_DECREF(result);
|
||||
}
|
||||
|
|
|
@ -140,6 +140,15 @@ static long dxp[256];
|
|||
do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0)
|
||||
|
||||
|
||||
/* This single variable consolidates all requests to break out of the fast path
|
||||
in the eval loop. */
|
||||
static _Py_atomic_int eval_breaker = {0};
|
||||
/* Request for running pending calls. */
|
||||
static _Py_atomic_int pendingcalls_to_do = {0};
|
||||
/* Request for looking at the `async_exc` field of the current thread state.
|
||||
Guarded by the GIL. */
|
||||
static int pending_async_exc = 0;
|
||||
|
||||
#ifdef WITH_THREAD
|
||||
|
||||
#ifdef HAVE_ERRNO_H
|
||||
|
@ -149,16 +158,8 @@ static long dxp[256];
|
|||
|
||||
static PyThread_type_lock pending_lock = 0; /* for pending calls */
|
||||
static unsigned long main_thread = 0;
|
||||
/* This single variable consolidates all requests to break out of the fast path
|
||||
in the eval loop. */
|
||||
static _Py_atomic_int eval_breaker = {0};
|
||||
/* Request for dropping the GIL */
|
||||
static _Py_atomic_int gil_drop_request = {0};
|
||||
/* Request for running pending calls. */
|
||||
static _Py_atomic_int pendingcalls_to_do = {0};
|
||||
/* Request for looking at the `async_exc` field of the current thread state.
|
||||
Guarded by the GIL. */
|
||||
static int pending_async_exc = 0;
|
||||
|
||||
#include "ceval_gil.h"
|
||||
|
||||
|
@ -253,9 +254,6 @@ PyEval_ReInitThreads(void)
|
|||
_PyThreadState_DeleteExcept(current_tstate);
|
||||
}
|
||||
|
||||
#else
|
||||
static _Py_atomic_int eval_breaker = {0};
|
||||
static int pending_async_exc = 0;
|
||||
#endif /* WITH_THREAD */
|
||||
|
||||
/* This function is used to signal that async exceptions are waiting to be
|
||||
|
@ -330,6 +328,15 @@ PyEval_RestoreThread(PyThreadState *tstate)
|
|||
#endif
|
||||
*/
|
||||
|
||||
void
|
||||
_PyEval_SignalReceived(void)
|
||||
{
|
||||
/* bpo-30703: Function called when the C signal handler of Python gets a
|
||||
signal. We cannot queue a callback using Py_AddPendingCall() since
|
||||
that function is not async-signal-safe. */
|
||||
SIGNAL_PENDING_CALLS();
|
||||
}
|
||||
|
||||
#ifdef WITH_THREAD
|
||||
|
||||
/* The WITH_THREAD implementation is thread-safe. It allows
|
||||
|
@ -394,6 +401,8 @@ Py_MakePendingCalls(void)
|
|||
int i;
|
||||
int r = 0;
|
||||
|
||||
assert(PyGILState_Check());
|
||||
|
||||
if (!pending_lock) {
|
||||
/* initial allocation of the lock */
|
||||
pending_lock = PyThread_allocate_lock();
|
||||
|
@ -408,6 +417,16 @@ Py_MakePendingCalls(void)
|
|||
if (busy)
|
||||
return 0;
|
||||
busy = 1;
|
||||
/* unsignal before starting to call callbacks, so that any callback
|
||||
added in-between re-signals */
|
||||
UNSIGNAL_PENDING_CALLS();
|
||||
|
||||
/* Python signal handler doesn't really queue a callback: it only signals
|
||||
that a signal was received, see _PyEval_SignalReceived(). */
|
||||
if (PyErr_CheckSignals() < 0) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* perform a bounded number of calls, in case of recursion */
|
||||
for (i=0; i<NPENDINGCALLS; i++) {
|
||||
int j;
|
||||
|
@ -424,20 +443,23 @@ Py_MakePendingCalls(void)
|
|||
arg = pendingcalls[j].arg;
|
||||
pendingfirst = (j + 1) % NPENDINGCALLS;
|
||||
}
|
||||
if (pendingfirst != pendinglast)
|
||||
SIGNAL_PENDING_CALLS();
|
||||
else
|
||||
UNSIGNAL_PENDING_CALLS();
|
||||
PyThread_release_lock(pending_lock);
|
||||
/* having released the lock, perform the callback */
|
||||
if (func == NULL)
|
||||
break;
|
||||
r = func(arg);
|
||||
if (r)
|
||||
break;
|
||||
if (r) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
busy = 0;
|
||||
return r;
|
||||
|
||||
error:
|
||||
busy = 0;
|
||||
SIGNAL_PENDING_CALLS(); /* We're not done yet */
|
||||
return -1;
|
||||
}
|
||||
|
||||
#else /* if ! defined WITH_THREAD */
|
||||
|
@ -472,7 +494,6 @@ static struct {
|
|||
} pendingcalls[NPENDINGCALLS];
|
||||
static volatile int pendingfirst = 0;
|
||||
static volatile int pendinglast = 0;
|
||||
static _Py_atomic_int pendingcalls_to_do = {0};
|
||||
|
||||
int
|
||||
Py_AddPendingCall(int (*func)(void *), void *arg)
|
||||
|
@ -506,7 +527,16 @@ Py_MakePendingCalls(void)
|
|||
if (busy)
|
||||
return 0;
|
||||
busy = 1;
|
||||
|
||||
/* unsignal before starting to call callbacks, so that any callback
|
||||
added in-between re-signals */
|
||||
UNSIGNAL_PENDING_CALLS();
|
||||
/* Python signal handler doesn't really queue a callback: it only signals
|
||||
that a signal was received, see _PyEval_SignalReceived(). */
|
||||
if (PyErr_CheckSignals() < 0) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
int i;
|
||||
int (*func)(void *);
|
||||
|
@ -518,13 +548,16 @@ Py_MakePendingCalls(void)
|
|||
arg = pendingcalls[i].arg;
|
||||
pendingfirst = (i + 1) % NPENDINGCALLS;
|
||||
if (func(arg) < 0) {
|
||||
busy = 0;
|
||||
SIGNAL_PENDING_CALLS(); /* We're not done yet */
|
||||
return -1;
|
||||
goto error:
|
||||
}
|
||||
}
|
||||
busy = 0;
|
||||
return 0;
|
||||
|
||||
error:
|
||||
busy = 0;
|
||||
SIGNAL_PENDING_CALLS(); /* We're not done yet */
|
||||
return -1;
|
||||
}
|
||||
|
||||
#endif /* WITH_THREAD */
|
||||
|
|
Loading…
Reference in New Issue