// C Extension module to test pycore_lock.h API #include "parts.h" #include "pycore_lock.h" #include "pycore_pythread.h" // PyThread_get_thread_ident_ex() #include "clinic/test_lock.c.h" #ifdef MS_WINDOWS #define WIN32_LEAN_AND_MEAN #include #else #include // usleep() #endif /*[clinic input] module _testinternalcapi [clinic start generated code]*/ /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7bb583d8c9eb9a78]*/ static void pysleep(int ms) { #ifdef MS_WINDOWS Sleep(ms); #else usleep(ms * 1000); #endif } static PyObject * test_lock_basic(PyObject *self, PyObject *obj) { PyMutex m = (PyMutex){0}; // uncontended lock and unlock PyMutex_Lock(&m); assert(m._bits == 1); PyMutex_Unlock(&m); assert(m._bits == 0); Py_RETURN_NONE; } struct test_lock2_data { PyMutex m; PyEvent done; int started; }; static void lock_thread(void *arg) { struct test_lock2_data *test_data = arg; PyMutex *m = &test_data->m; _Py_atomic_store_int(&test_data->started, 1); PyMutex_Lock(m); assert(m->_bits == 1); PyMutex_Unlock(m); assert(m->_bits == 0); _PyEvent_Notify(&test_data->done); } static PyObject * test_lock_two_threads(PyObject *self, PyObject *obj) { // lock attempt by two threads struct test_lock2_data test_data; memset(&test_data, 0, sizeof(test_data)); PyMutex_Lock(&test_data.m); assert(test_data.m._bits == 1); PyThread_start_new_thread(lock_thread, &test_data); // wait up to two seconds for the lock_thread to attempt to lock "m" int iters = 0; uint8_t v; do { pysleep(10); // allow some time for the other thread to try to lock v = _Py_atomic_load_uint8_relaxed(&test_data.m._bits); assert(v == 1 || v == 3); iters++; } while (v != 3 && iters < 200); // both the "locked" and the "has parked" bits should be set assert(test_data.m._bits == 3); PyMutex_Unlock(&test_data.m); PyEvent_Wait(&test_data.done); assert(test_data.m._bits == 0); Py_RETURN_NONE; } #define COUNTER_THREADS 5 #define COUNTER_ITERS 10000 struct test_data_counter { PyMutex m; Py_ssize_t counter; }; struct thread_data_counter { struct test_data_counter *test_data; PyEvent done_event; }; static void counter_thread(void *arg) { struct thread_data_counter *thread_data = arg; struct test_data_counter *test_data = thread_data->test_data; for (Py_ssize_t i = 0; i < COUNTER_ITERS; i++) { PyMutex_Lock(&test_data->m); test_data->counter++; PyMutex_Unlock(&test_data->m); } _PyEvent_Notify(&thread_data->done_event); } static PyObject * test_lock_counter(PyObject *self, PyObject *obj) { // Test with rapidly locking and unlocking mutex struct test_data_counter test_data; memset(&test_data, 0, sizeof(test_data)); struct thread_data_counter thread_data[COUNTER_THREADS]; memset(&thread_data, 0, sizeof(thread_data)); for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) { thread_data[i].test_data = &test_data; PyThread_start_new_thread(counter_thread, &thread_data[i]); } for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) { PyEvent_Wait(&thread_data[i].done_event); } assert(test_data.counter == COUNTER_THREADS * COUNTER_ITERS); Py_RETURN_NONE; } #define SLOW_COUNTER_ITERS 100 static void slow_counter_thread(void *arg) { struct thread_data_counter *thread_data = arg; struct test_data_counter *test_data = thread_data->test_data; for (Py_ssize_t i = 0; i < SLOW_COUNTER_ITERS; i++) { PyMutex_Lock(&test_data->m); if (i % 7 == 0) { pysleep(2); } test_data->counter++; PyMutex_Unlock(&test_data->m); } _PyEvent_Notify(&thread_data->done_event); } static PyObject * test_lock_counter_slow(PyObject *self, PyObject *obj) { // Test lock/unlock with occasional "long" critical section, which will // trigger handoff of the lock. struct test_data_counter test_data; memset(&test_data, 0, sizeof(test_data)); struct thread_data_counter thread_data[COUNTER_THREADS]; memset(&thread_data, 0, sizeof(thread_data)); for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) { thread_data[i].test_data = &test_data; PyThread_start_new_thread(slow_counter_thread, &thread_data[i]); } for (Py_ssize_t i = 0; i < COUNTER_THREADS; i++) { PyEvent_Wait(&thread_data[i].done_event); } assert(test_data.counter == COUNTER_THREADS * SLOW_COUNTER_ITERS); Py_RETURN_NONE; } struct bench_data_locks { int stop; int use_pymutex; int critical_section_length; char padding[200]; PyThread_type_lock lock; PyMutex m; double value; Py_ssize_t total_iters; }; struct bench_thread_data { struct bench_data_locks *bench_data; Py_ssize_t iters; PyEvent done; }; static void thread_benchmark_locks(void *arg) { struct bench_thread_data *thread_data = arg; struct bench_data_locks *bench_data = thread_data->bench_data; int use_pymutex = bench_data->use_pymutex; int critical_section_length = bench_data->critical_section_length; double my_value = 1.0; Py_ssize_t iters = 0; while (!_Py_atomic_load_int_relaxed(&bench_data->stop)) { if (use_pymutex) { PyMutex_Lock(&bench_data->m); for (int i = 0; i < critical_section_length; i++) { bench_data->value += my_value; my_value = bench_data->value; } PyMutex_Unlock(&bench_data->m); } else { PyThread_acquire_lock(bench_data->lock, 1); for (int i = 0; i < critical_section_length; i++) { bench_data->value += my_value; my_value = bench_data->value; } PyThread_release_lock(bench_data->lock); } iters++; } thread_data->iters = iters; _Py_atomic_add_ssize(&bench_data->total_iters, iters); _PyEvent_Notify(&thread_data->done); } /*[clinic input] _testinternalcapi.benchmark_locks num_threads: Py_ssize_t use_pymutex: bool = True critical_section_length: int = 1 time_ms: int = 1000 / [clinic start generated code]*/ static PyObject * _testinternalcapi_benchmark_locks_impl(PyObject *module, Py_ssize_t num_threads, int use_pymutex, int critical_section_length, int time_ms) /*[clinic end generated code: output=381df8d7e9a74f18 input=f3aeaf688738c121]*/ { // Run from Tools/lockbench/lockbench.py // Based on the WebKit lock benchmarks: // https://github.com/WebKit/WebKit/blob/main/Source/WTF/benchmarks/LockSpeedTest.cpp // See also https://webkit.org/blog/6161/locking-in-webkit/ PyObject *thread_iters = NULL; PyObject *res = NULL; struct bench_data_locks bench_data; memset(&bench_data, 0, sizeof(bench_data)); bench_data.use_pymutex = use_pymutex; bench_data.critical_section_length = critical_section_length; bench_data.lock = PyThread_allocate_lock(); if (bench_data.lock == NULL) { return PyErr_NoMemory(); } struct bench_thread_data *thread_data = NULL; thread_data = PyMem_Calloc(num_threads, sizeof(*thread_data)); if (thread_data == NULL) { PyErr_NoMemory(); goto exit; } thread_iters = PyList_New(num_threads); if (thread_iters == NULL) { goto exit; } PyTime_t start, end; if (PyTime_PerfCounter(&start) < 0) { goto exit; } for (Py_ssize_t i = 0; i < num_threads; i++) { thread_data[i].bench_data = &bench_data; PyThread_start_new_thread(thread_benchmark_locks, &thread_data[i]); } // Let the threads run for `time_ms` milliseconds pysleep(time_ms); _Py_atomic_store_int(&bench_data.stop, 1); // Wait for the threads to finish for (Py_ssize_t i = 0; i < num_threads; i++) { PyEvent_Wait(&thread_data[i].done); } Py_ssize_t total_iters = bench_data.total_iters; if (PyTime_PerfCounter(&end) < 0) { goto exit; } // Return the total number of acquisitions and the number of acquisitions // for each thread. for (Py_ssize_t i = 0; i < num_threads; i++) { PyObject *iter = PyLong_FromSsize_t(thread_data[i].iters); if (iter == NULL) { goto exit; } PyList_SET_ITEM(thread_iters, i, iter); } assert(end != start); double rate = total_iters * 1e9 / (end - start); res = Py_BuildValue("(dO)", rate, thread_iters); exit: PyThread_free_lock(bench_data.lock); PyMem_Free(thread_data); Py_XDECREF(thread_iters); return res; } static PyObject * test_lock_benchmark(PyObject *module, PyObject *obj) { // Just make sure the benchmark runs without crashing PyObject *res = _testinternalcapi_benchmark_locks_impl( module, 1, 1, 1, 100); if (res == NULL) { return NULL; } Py_DECREF(res); Py_RETURN_NONE; } static int init_maybe_fail(void *arg) { int *counter = (int *)arg; (*counter)++; if (*counter < 5) { // failure return -1; } assert(*counter == 5); return 0; } static PyObject * test_lock_once(PyObject *self, PyObject *obj) { _PyOnceFlag once = {0}; int counter = 0; for (int i = 0; i < 10; i++) { int res = _PyOnceFlag_CallOnce(&once, init_maybe_fail, &counter); if (i < 4) { assert(res == -1); } else { assert(res == 0); assert(counter == 5); } } Py_RETURN_NONE; } struct test_rwlock_data { Py_ssize_t nthreads; _PyRWMutex rw; PyEvent step1; PyEvent step2; PyEvent step3; PyEvent done; }; static void rdlock_thread(void *arg) { struct test_rwlock_data *test_data = arg; // Acquire the lock in read mode _PyRWMutex_RLock(&test_data->rw); PyEvent_Wait(&test_data->step1); _PyRWMutex_RUnlock(&test_data->rw); _PyRWMutex_RLock(&test_data->rw); PyEvent_Wait(&test_data->step3); _PyRWMutex_RUnlock(&test_data->rw); if (_Py_atomic_add_ssize(&test_data->nthreads, -1) == 1) { _PyEvent_Notify(&test_data->done); } } static void wrlock_thread(void *arg) { struct test_rwlock_data *test_data = arg; // First acquire the lock in write mode _PyRWMutex_Lock(&test_data->rw); PyEvent_Wait(&test_data->step2); _PyRWMutex_Unlock(&test_data->rw); if (_Py_atomic_add_ssize(&test_data->nthreads, -1) == 1) { _PyEvent_Notify(&test_data->done); } } static void wait_until(uintptr_t *ptr, uintptr_t value) { // wait up to two seconds for *ptr == value int iters = 0; uintptr_t bits; do { pysleep(10); bits = _Py_atomic_load_uintptr(ptr); iters++; } while (bits != value && iters < 200); } static PyObject * test_lock_rwlock(PyObject *self, PyObject *obj) { struct test_rwlock_data test_data = {.nthreads = 3}; _PyRWMutex_Lock(&test_data.rw); assert(test_data.rw.bits == 1); _PyRWMutex_Unlock(&test_data.rw); assert(test_data.rw.bits == 0); // Start two readers PyThread_start_new_thread(rdlock_thread, &test_data); PyThread_start_new_thread(rdlock_thread, &test_data); // wait up to two seconds for the threads to attempt to read-lock "rw" wait_until(&test_data.rw.bits, 8); assert(test_data.rw.bits == 8); // start writer (while readers hold lock) PyThread_start_new_thread(wrlock_thread, &test_data); wait_until(&test_data.rw.bits, 10); assert(test_data.rw.bits == 10); // readers release lock, writer should acquire it _PyEvent_Notify(&test_data.step1); wait_until(&test_data.rw.bits, 3); assert(test_data.rw.bits == 3); // writer releases lock, readers acquire it _PyEvent_Notify(&test_data.step2); wait_until(&test_data.rw.bits, 8); assert(test_data.rw.bits == 8); // readers release lock again _PyEvent_Notify(&test_data.step3); wait_until(&test_data.rw.bits, 0); assert(test_data.rw.bits == 0); PyEvent_Wait(&test_data.done); Py_RETURN_NONE; } static PyObject * test_lock_recursive(PyObject *self, PyObject *obj) { _PyRecursiveMutex m = (_PyRecursiveMutex){0}; assert(!_PyRecursiveMutex_IsLockedByCurrentThread(&m)); _PyRecursiveMutex_Lock(&m); assert(m.thread == PyThread_get_thread_ident_ex()); assert(PyMutex_IsLocked(&m.mutex)); assert(m.level == 0); _PyRecursiveMutex_Lock(&m); assert(m.level == 1); _PyRecursiveMutex_Unlock(&m); _PyRecursiveMutex_Unlock(&m); assert(m.thread == 0); assert(!PyMutex_IsLocked(&m.mutex)); assert(m.level == 0); Py_RETURN_NONE; } static PyMethodDef test_methods[] = { {"test_lock_basic", test_lock_basic, METH_NOARGS}, {"test_lock_two_threads", test_lock_two_threads, METH_NOARGS}, {"test_lock_counter", test_lock_counter, METH_NOARGS}, {"test_lock_counter_slow", test_lock_counter_slow, METH_NOARGS}, _TESTINTERNALCAPI_BENCHMARK_LOCKS_METHODDEF {"test_lock_benchmark", test_lock_benchmark, METH_NOARGS}, {"test_lock_once", test_lock_once, METH_NOARGS}, {"test_lock_rwlock", test_lock_rwlock, METH_NOARGS}, {"test_lock_recursive", test_lock_recursive, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; int _PyTestInternalCapi_Init_Lock(PyObject *mod) { if (PyModule_AddFunctions(mod, test_methods) < 0) { return -1; } return 0; }