diff --git a/Include/internal/mimalloc/mimalloc/types.h b/Include/internal/mimalloc/mimalloc/types.h index ed93e45062c..17e440848ec 100644 --- a/Include/internal/mimalloc/mimalloc/types.h +++ b/Include/internal/mimalloc/mimalloc/types.h @@ -311,6 +311,7 @@ typedef struct mi_page_s { uint32_t slice_offset; // distance from the actual page data slice (0 if a page) uint8_t is_committed : 1; // `true` if the page virtual memory is committed uint8_t is_zero_init : 1; // `true` if the page was initially zero initialized + uint8_t use_qsbr : 1; // delay page freeing using qsbr uint8_t tag : 4; // tag from the owning heap uint8_t debug_offset; // number of bytes to preserve when filling freed or uninitialized memory @@ -336,8 +337,13 @@ typedef struct mi_page_s { struct mi_page_s* next; // next page owned by this thread with the same `block_size` struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` +#ifdef Py_GIL_DISABLED + struct llist_node qsbr_node; + uint64_t qsbr_goal; +#endif + // 64-bit 9 words, 32-bit 12 words, (+2 for secure) - #if MI_INTPTR_SIZE==8 + #if MI_INTPTR_SIZE==8 && !defined(Py_GIL_DISABLED) uintptr_t padding[1]; #endif } mi_page_t; @@ -555,6 +561,7 @@ struct mi_heap_s { bool no_reclaim; // `true` if this heap should not reclaim abandoned pages uint8_t tag; // custom identifier for this heap uint8_t debug_offset; // number of bytes to preserve when filling freed or uninitialized memory + bool page_use_qsbr; // should freeing pages be delayed using QSBR }; diff --git a/Include/internal/pycore_mimalloc.h b/Include/internal/pycore_mimalloc.h index 44c160b7ac1..3ef0154ba76 100644 --- a/Include/internal/pycore_mimalloc.h +++ b/Include/internal/pycore_mimalloc.h @@ -48,6 +48,7 @@ struct _mimalloc_thread_state { mi_heap_t *current_object_heap; mi_heap_t heaps[_Py_MIMALLOC_HEAP_COUNT]; mi_tld_t tld; + struct llist_node page_list; }; #endif diff --git a/Include/internal/pycore_qsbr.h b/Include/internal/pycore_qsbr.h index 475f00deedc..c3680a20554 100644 --- a/Include/internal/pycore_qsbr.h +++ b/Include/internal/pycore_qsbr.h @@ -29,6 +29,12 @@ extern "C" { #define QSBR_INITIAL 1 #define QSBR_INCR 2 +// Wrap-around safe comparison. This is a holdover from the FreeBSD +// implementation, which uses 32-bit sequence numbers. We currently use 64-bit +// sequence numbers, so wrap-around is unlikely. +#define QSBR_LT(a, b) ((int64_t)((a)-(b)) < 0) +#define QSBR_LEQ(a, b) ((int64_t)((a)-(b)) <= 0) + struct _qsbr_shared; struct _PyThreadStateImpl; // forward declare to avoid circular dependency @@ -89,6 +95,15 @@ _Py_qsbr_quiescent_state(struct _qsbr_thread_state *qsbr) _Py_atomic_store_uint64_release(&qsbr->seq, seq); } +// Have the read sequences advanced to the given goal? Like `_Py_qsbr_poll()`, +// but does not perform a scan of threads. +static inline bool +_Py_qbsr_goal_reached(struct _qsbr_thread_state *qsbr, uint64_t goal) +{ + uint64_t rd_seq = _Py_atomic_load_uint64(&qsbr->shared->rd_seq); + return QSBR_LEQ(goal, rd_seq); +} + // Advance the write sequence and return the new goal. This should be called // after data is removed. The returned goal is used with `_Py_qsbr_poll()` to // determine when it is safe to reclaim (free) the memory. diff --git a/Objects/mimalloc/heap.c b/Objects/mimalloc/heap.c index 154dad0b128..26777f39fb6 100644 --- a/Objects/mimalloc/heap.c +++ b/Objects/mimalloc/heap.c @@ -98,7 +98,10 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t if (mi_page_all_free(page)) { // no more used blocks, free the page. // note: this will free retired pages as well. - _mi_page_free(page, pq, collect >= MI_FORCE); + bool freed = _PyMem_mi_page_maybe_free(page, pq, collect >= MI_FORCE); + if (!freed && collect == MI_ABANDON) { + _mi_page_abandon(page, pq); + } } else if (collect == MI_ABANDON) { // still used blocks but the thread is done; abandon the page @@ -153,6 +156,9 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // collect retired pages _mi_heap_collect_retired(heap, force); + // free pages that were delayed with QSBR + _PyMem_mi_heap_collect_qsbr(heap); + // collect all pages owned by this thread mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL ); diff --git a/Objects/mimalloc/page.c b/Objects/mimalloc/page.c index 63db893e494..25ecd6ec7d7 100644 --- a/Objects/mimalloc/page.c +++ b/Objects/mimalloc/page.c @@ -225,6 +225,9 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { // and the local free list if (page->local_free != NULL) { + // any previous QSBR goals are no longer valid because we reused the page + _PyMem_mi_page_clear_qsbr(page); + if mi_likely(page->free == NULL) { // usual case page->free = page->local_free; @@ -267,6 +270,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { // TODO: push on full queue immediately if it is full? mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); mi_page_queue_push(heap, pq, page); + _PyMem_mi_page_reclaimed(page); mi_assert_expensive(_mi_page_is_valid(page)); } @@ -383,6 +387,13 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { mi_heap_t* pheap = mi_page_heap(page); +#ifdef Py_GIL_DISABLED + if (page->qsbr_node.next != NULL) { + // remove from QSBR queue, but keep the goal + llist_remove(&page->qsbr_node); + } +#endif + // remove from our page list mi_segments_tld_t* segments_tld = &pheap->tld->segments; mi_page_queue_remove(pq, page); @@ -417,6 +428,11 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_heap_t* heap = mi_page_heap(page); +#ifdef Py_GIL_DISABLED + mi_assert_internal(page->qsbr_goal == 0); + mi_assert_internal(page->qsbr_node.next == NULL); +#endif + // remove from the page list // (no need to do _mi_heap_delayed_free first as all blocks are already free) mi_segments_tld_t* segments_tld = &heap->tld->segments; @@ -444,6 +460,9 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept { mi_page_set_has_aligned(page, false); + // any previous QSBR goals are no longer valid because we reused the page + _PyMem_mi_page_clear_qsbr(page); + // don't retire too often.. // (or we end up retiring and re-allocating most of the time) // NOTE: refine this more: we should not retire if this @@ -465,7 +484,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept { return; // dont't free after all } } - _mi_page_free(page, pq, false); + _PyMem_mi_page_maybe_free(page, pq, false); } // free retired pages: we don't need to look at the entire queues @@ -480,7 +499,10 @@ void _mi_heap_collect_retired(mi_heap_t* heap, bool force) { if (mi_page_all_free(page)) { page->retire_expire--; if (force || page->retire_expire == 0) { - _mi_page_free(pq->first, pq, force); +#ifdef Py_GIL_DISABLED + mi_assert_internal(page->qsbr_goal == 0); +#endif + _PyMem_mi_page_maybe_free(page, pq, force); } else { // keep retired, update min/max @@ -661,6 +683,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi // set fields mi_page_set_heap(page, heap); page->tag = heap->tag; + page->use_qsbr = heap->page_use_qsbr; page->debug_offset = heap->debug_offset; page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); // initialize before _mi_segment_page_start size_t page_size; @@ -691,6 +714,10 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->xthread_free == 0); mi_assert_internal(page->next == NULL); mi_assert_internal(page->prev == NULL); +#ifdef Py_GIL_DISABLED + mi_assert_internal(page->qsbr_goal == 0); + mi_assert_internal(page->qsbr_node.next == NULL); +#endif mi_assert_internal(page->retire_expire == 0); mi_assert_internal(!mi_page_has_aligned(page)); #if (MI_PADDING || MI_ENCODE_FREELIST) @@ -750,6 +777,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p mi_heap_stat_counter_increase(heap, searches, count); if (page == NULL) { + _PyMem_mi_heap_collect_qsbr(heap); // some pages might be safe to free now _mi_heap_collect_retired(heap, false); // perhaps make a page available? page = mi_page_fresh(heap, pq); if (page == NULL && first_try) { @@ -760,6 +788,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p else { mi_assert(pq->first == page); page->retire_expire = 0; + _PyMem_mi_page_clear_qsbr(page); } mi_assert_internal(page == NULL || mi_page_immediate_available(page)); return page; @@ -785,6 +814,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { if (mi_page_immediate_available(page)) { page->retire_expire = 0; + _PyMem_mi_page_clear_qsbr(page); return page; // fast path } } @@ -878,6 +908,7 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignme return NULL; } else { + _PyMem_mi_heap_collect_qsbr(heap); return mi_large_huge_page_alloc(heap,size,huge_alignment); } } diff --git a/Objects/mimalloc/segment.c b/Objects/mimalloc/segment.c index 584233b8b57..08b15643365 100644 --- a/Objects/mimalloc/segment.c +++ b/Objects/mimalloc/segment.c @@ -982,6 +982,10 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld mi_assert_internal(mi_page_all_free(page)); mi_segment_t* segment = _mi_ptr_segment(page); mi_assert_internal(segment->used > 0); +#ifdef Py_GIL_DISABLED + mi_assert_internal(page->qsbr_goal == 0); + mi_assert_internal(page->qsbr_node.next == NULL); +#endif size_t inuse = page->capacity * mi_page_block_size(page); _mi_stat_decrease(&tld->stats->page_committed, inuse); @@ -1270,10 +1274,13 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, s // ensure used count is up to date and collect potential concurrent frees mi_page_t* const page = mi_slice_to_page(slice); _mi_page_free_collect(page, false); - if (mi_page_all_free(page)) { + if (mi_page_all_free(page) && _PyMem_mi_page_is_safe_to_free(page)) { // if this page is all free now, free it without adding to any queues (yet) mi_assert_internal(page->next == NULL && page->prev==NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); +#ifdef Py_GIL_DISABLED + page->qsbr_goal = 0; +#endif segment->abandoned--; slice = mi_segment_page_clear(page, tld); // re-assign slice due to coalesce! mi_assert_internal(!mi_slice_is_used(slice)); @@ -1344,15 +1351,18 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, mi_page_set_heap(page, target_heap); _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) _mi_page_free_collect(page, false); // ensure used count is up to date - if (mi_page_all_free(page)) { + if (mi_page_all_free(page) && _PyMem_mi_page_is_safe_to_free(page)) { // if everything free by now, free the page +#ifdef Py_GIL_DISABLED + page->qsbr_goal = 0; +#endif slice = mi_segment_page_clear(page, tld); // set slice again due to coalesceing } else { // otherwise reclaim it into the heap _mi_page_reclaim(target_heap, page); if (requested_block_size == page->xblock_size && mi_page_has_any_available(page) && - heap == target_heap) { + requested_block_size <= MI_MEDIUM_OBJ_SIZE_MAX && heap == target_heap) { if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; } } } diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index b2a2286ef22..e7813807674 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -12,6 +12,12 @@ #include // malloc() #include #ifdef WITH_MIMALLOC +// Forward declarations of functions used in our mimalloc modifications +static void _PyMem_mi_page_clear_qsbr(mi_page_t *page); +static bool _PyMem_mi_page_is_safe_to_free(mi_page_t *page); +static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force); +static void _PyMem_mi_page_reclaimed(mi_page_t *page); +static void _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap); # include "pycore_mimalloc.h" # include "mimalloc/static.c" # include "mimalloc/internal.h" // for stats @@ -86,6 +92,113 @@ _PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr) #ifdef WITH_MIMALLOC +static void +_PyMem_mi_page_clear_qsbr(mi_page_t *page) +{ +#ifdef Py_GIL_DISABLED + // Clear the QSBR goal and remove the page from the QSBR linked list. + page->qsbr_goal = 0; + if (page->qsbr_node.next != NULL) { + llist_remove(&page->qsbr_node); + } +#endif +} + +// Check if an empty, newly reclaimed page is safe to free now. +static bool +_PyMem_mi_page_is_safe_to_free(mi_page_t *page) +{ + assert(mi_page_all_free(page)); +#ifdef Py_GIL_DISABLED + assert(page->qsbr_node.next == NULL); + if (page->use_qsbr && page->qsbr_goal != 0) { + _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); + if (tstate == NULL) { + return false; + } + return _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal); + } +#endif + return true; + +} + +static bool +_PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force) +{ +#ifdef Py_GIL_DISABLED + assert(mi_page_all_free(page)); + if (page->use_qsbr) { + _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET(); + if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal)) { + _PyMem_mi_page_clear_qsbr(page); + _mi_page_free(page, pq, force); + return true; + } + + _PyMem_mi_page_clear_qsbr(page); + page->retire_expire = 0; + page->qsbr_goal = _Py_qsbr_deferred_advance(tstate->qsbr); + llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node); + return false; + } +#endif + _mi_page_free(page, pq, force); + return true; +} + +static void +_PyMem_mi_page_reclaimed(mi_page_t *page) +{ +#ifdef Py_GIL_DISABLED + assert(page->qsbr_node.next == NULL); + if (page->qsbr_goal != 0) { + if (mi_page_all_free(page)) { + assert(page->qsbr_node.next == NULL); + _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET(); + page->retire_expire = 0; + llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node); + } + else { + page->qsbr_goal = 0; + } + } +#endif +} + +static void +_PyMem_mi_heap_collect_qsbr(mi_heap_t *heap) +{ +#ifdef Py_GIL_DISABLED + if (!heap->page_use_qsbr) { + return; + } + + _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); + struct llist_node *head = &tstate->mimalloc.page_list; + if (llist_empty(head)) { + return; + } + + struct llist_node *node; + llist_for_each_safe(node, head) { + mi_page_t *page = llist_data(node, mi_page_t, qsbr_node); + if (!mi_page_all_free(page)) { + // We allocated from this page some point after the delayed free + _PyMem_mi_page_clear_qsbr(page); + continue; + } + + if (!_Py_qsbr_poll(tstate->qsbr, page->qsbr_goal)) { + return; + } + + _PyMem_mi_page_clear_qsbr(page); + _mi_page_free(page, mi_page_queue_of(page), false); + } +#endif +} + void * _PyMem_MiMalloc(void *ctx, size_t size) { diff --git a/Python/pystate.c b/Python/pystate.c index 3d6394f81da..274aec8fec8 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2839,6 +2839,7 @@ tstate_mimalloc_bind(PyThreadState *tstate) // the "backing" heap. mi_tld_t *tld = &mts->tld; _mi_tld_init(tld, &mts->heaps[_Py_MIMALLOC_HEAP_MEM]); + llist_init(&mts->page_list); // Exiting threads push any remaining in-use segments to the abandoned // pool to be re-claimed later by other threads. We use per-interpreter @@ -2865,6 +2866,12 @@ tstate_mimalloc_bind(PyThreadState *tstate) mts->heaps[i].debug_offset = (uint8_t)debug_offsets[i]; } + // Heaps that store Python objects should use QSBR to delay freeing + // mimalloc pages while there may be concurrent lock-free readers. + mts->heaps[_Py_MIMALLOC_HEAP_OBJECT].page_use_qsbr = true; + mts->heaps[_Py_MIMALLOC_HEAP_GC].page_use_qsbr = true; + mts->heaps[_Py_MIMALLOC_HEAP_GC_PRE].page_use_qsbr = true; + // By default, object allocations use _Py_MIMALLOC_HEAP_OBJECT. // _PyObject_GC_New() and similar functions temporarily override this to // use one of the GC heaps. diff --git a/Python/qsbr.c b/Python/qsbr.c index 7f7ae03cf60..69f77f49082 100644 --- a/Python/qsbr.c +++ b/Python/qsbr.c @@ -38,12 +38,6 @@ #include "pycore_pystate.h" // _PyThreadState_GET() -// Wrap-around safe comparison. This is a holdover from the FreeBSD -// implementation, which uses 32-bit sequence numbers. We currently use 64-bit -// sequence numbers, so wrap-around is unlikely. -#define QSBR_LT(a, b) ((int64_t)((a)-(b)) < 0) -#define QSBR_LEQ(a, b) ((int64_t)((a)-(b)) <= 0) - // Starting size of the array of qsbr thread states #define MIN_ARRAY_SIZE 8 @@ -167,13 +161,11 @@ bool _Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal) { assert(_PyThreadState_GET()->state == _Py_THREAD_ATTACHED); - - uint64_t rd_seq = _Py_atomic_load_uint64(&qsbr->shared->rd_seq); - if (QSBR_LEQ(goal, rd_seq)) { + if (_Py_qbsr_goal_reached(qsbr, goal)) { return true; } - rd_seq = qsbr_poll_scan(qsbr->shared); + uint64_t rd_seq = qsbr_poll_scan(qsbr->shared); return QSBR_LEQ(goal, rd_seq); }