mirror of https://github.com/python/cpython
gh-115103: Delay reuse of mimalloc pages that store PyObjects (#115435)
This implements the delayed reuse of mimalloc pages that contain Python objects in the free-threaded build. Allocations of the same size class are grouped in data structures called pages. These are different from operating system pages. For thread-safety, we want to ensure that memory used to store PyObjects remains valid as long as there may be concurrent lock-free readers; we want to delay using it for other size classes, in other heaps, or returning it to the operating system. When a mimalloc page becomes empty, instead of immediately freeing it, we tag it with a QSBR goal and insert it into a per-thread state linked list of pages to be freed. When mimalloc needs a fresh page, we process the queue and free any still empty pages that are now deemed safe to be freed. Pages waiting to be freed are still available for allocations of the same size class and allocating from a page prevent it from being freed. There is additional logic to handle abandoned pages when threads exit.
This commit is contained in:
parent
02ee475ee3
commit
c012c8ab7b
|
@ -311,6 +311,7 @@ typedef struct mi_page_s {
|
||||||
uint32_t slice_offset; // distance from the actual page data slice (0 if a page)
|
uint32_t slice_offset; // distance from the actual page data slice (0 if a page)
|
||||||
uint8_t is_committed : 1; // `true` if the page virtual memory is committed
|
uint8_t is_committed : 1; // `true` if the page virtual memory is committed
|
||||||
uint8_t is_zero_init : 1; // `true` if the page was initially zero initialized
|
uint8_t is_zero_init : 1; // `true` if the page was initially zero initialized
|
||||||
|
uint8_t use_qsbr : 1; // delay page freeing using qsbr
|
||||||
uint8_t tag : 4; // tag from the owning heap
|
uint8_t tag : 4; // tag from the owning heap
|
||||||
uint8_t debug_offset; // number of bytes to preserve when filling freed or uninitialized memory
|
uint8_t debug_offset; // number of bytes to preserve when filling freed or uninitialized memory
|
||||||
|
|
||||||
|
@ -336,8 +337,13 @@ typedef struct mi_page_s {
|
||||||
struct mi_page_s* next; // next page owned by this thread with the same `block_size`
|
struct mi_page_s* next; // next page owned by this thread with the same `block_size`
|
||||||
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
|
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
|
||||||
|
|
||||||
|
#ifdef Py_GIL_DISABLED
|
||||||
|
struct llist_node qsbr_node;
|
||||||
|
uint64_t qsbr_goal;
|
||||||
|
#endif
|
||||||
|
|
||||||
// 64-bit 9 words, 32-bit 12 words, (+2 for secure)
|
// 64-bit 9 words, 32-bit 12 words, (+2 for secure)
|
||||||
#if MI_INTPTR_SIZE==8
|
#if MI_INTPTR_SIZE==8 && !defined(Py_GIL_DISABLED)
|
||||||
uintptr_t padding[1];
|
uintptr_t padding[1];
|
||||||
#endif
|
#endif
|
||||||
} mi_page_t;
|
} mi_page_t;
|
||||||
|
@ -555,6 +561,7 @@ struct mi_heap_s {
|
||||||
bool no_reclaim; // `true` if this heap should not reclaim abandoned pages
|
bool no_reclaim; // `true` if this heap should not reclaim abandoned pages
|
||||||
uint8_t tag; // custom identifier for this heap
|
uint8_t tag; // custom identifier for this heap
|
||||||
uint8_t debug_offset; // number of bytes to preserve when filling freed or uninitialized memory
|
uint8_t debug_offset; // number of bytes to preserve when filling freed or uninitialized memory
|
||||||
|
bool page_use_qsbr; // should freeing pages be delayed using QSBR
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -48,6 +48,7 @@ struct _mimalloc_thread_state {
|
||||||
mi_heap_t *current_object_heap;
|
mi_heap_t *current_object_heap;
|
||||||
mi_heap_t heaps[_Py_MIMALLOC_HEAP_COUNT];
|
mi_heap_t heaps[_Py_MIMALLOC_HEAP_COUNT];
|
||||||
mi_tld_t tld;
|
mi_tld_t tld;
|
||||||
|
struct llist_node page_list;
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,12 @@ extern "C" {
|
||||||
#define QSBR_INITIAL 1
|
#define QSBR_INITIAL 1
|
||||||
#define QSBR_INCR 2
|
#define QSBR_INCR 2
|
||||||
|
|
||||||
|
// Wrap-around safe comparison. This is a holdover from the FreeBSD
|
||||||
|
// implementation, which uses 32-bit sequence numbers. We currently use 64-bit
|
||||||
|
// sequence numbers, so wrap-around is unlikely.
|
||||||
|
#define QSBR_LT(a, b) ((int64_t)((a)-(b)) < 0)
|
||||||
|
#define QSBR_LEQ(a, b) ((int64_t)((a)-(b)) <= 0)
|
||||||
|
|
||||||
struct _qsbr_shared;
|
struct _qsbr_shared;
|
||||||
struct _PyThreadStateImpl; // forward declare to avoid circular dependency
|
struct _PyThreadStateImpl; // forward declare to avoid circular dependency
|
||||||
|
|
||||||
|
@ -89,6 +95,15 @@ _Py_qsbr_quiescent_state(struct _qsbr_thread_state *qsbr)
|
||||||
_Py_atomic_store_uint64_release(&qsbr->seq, seq);
|
_Py_atomic_store_uint64_release(&qsbr->seq, seq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Have the read sequences advanced to the given goal? Like `_Py_qsbr_poll()`,
|
||||||
|
// but does not perform a scan of threads.
|
||||||
|
static inline bool
|
||||||
|
_Py_qbsr_goal_reached(struct _qsbr_thread_state *qsbr, uint64_t goal)
|
||||||
|
{
|
||||||
|
uint64_t rd_seq = _Py_atomic_load_uint64(&qsbr->shared->rd_seq);
|
||||||
|
return QSBR_LEQ(goal, rd_seq);
|
||||||
|
}
|
||||||
|
|
||||||
// Advance the write sequence and return the new goal. This should be called
|
// Advance the write sequence and return the new goal. This should be called
|
||||||
// after data is removed. The returned goal is used with `_Py_qsbr_poll()` to
|
// after data is removed. The returned goal is used with `_Py_qsbr_poll()` to
|
||||||
// determine when it is safe to reclaim (free) the memory.
|
// determine when it is safe to reclaim (free) the memory.
|
||||||
|
|
|
@ -98,7 +98,10 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
|
||||||
if (mi_page_all_free(page)) {
|
if (mi_page_all_free(page)) {
|
||||||
// no more used blocks, free the page.
|
// no more used blocks, free the page.
|
||||||
// note: this will free retired pages as well.
|
// note: this will free retired pages as well.
|
||||||
_mi_page_free(page, pq, collect >= MI_FORCE);
|
bool freed = _PyMem_mi_page_maybe_free(page, pq, collect >= MI_FORCE);
|
||||||
|
if (!freed && collect == MI_ABANDON) {
|
||||||
|
_mi_page_abandon(page, pq);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (collect == MI_ABANDON) {
|
else if (collect == MI_ABANDON) {
|
||||||
// still used blocks but the thread is done; abandon the page
|
// still used blocks but the thread is done; abandon the page
|
||||||
|
@ -153,6 +156,9 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
|
||||||
// collect retired pages
|
// collect retired pages
|
||||||
_mi_heap_collect_retired(heap, force);
|
_mi_heap_collect_retired(heap, force);
|
||||||
|
|
||||||
|
// free pages that were delayed with QSBR
|
||||||
|
_PyMem_mi_heap_collect_qsbr(heap);
|
||||||
|
|
||||||
// collect all pages owned by this thread
|
// collect all pages owned by this thread
|
||||||
mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
|
mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
|
||||||
mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL );
|
mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL );
|
||||||
|
|
|
@ -225,6 +225,9 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
|
||||||
|
|
||||||
// and the local free list
|
// and the local free list
|
||||||
if (page->local_free != NULL) {
|
if (page->local_free != NULL) {
|
||||||
|
// any previous QSBR goals are no longer valid because we reused the page
|
||||||
|
_PyMem_mi_page_clear_qsbr(page);
|
||||||
|
|
||||||
if mi_likely(page->free == NULL) {
|
if mi_likely(page->free == NULL) {
|
||||||
// usual case
|
// usual case
|
||||||
page->free = page->local_free;
|
page->free = page->local_free;
|
||||||
|
@ -267,6 +270,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
|
||||||
// TODO: push on full queue immediately if it is full?
|
// TODO: push on full queue immediately if it is full?
|
||||||
mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
|
mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
|
||||||
mi_page_queue_push(heap, pq, page);
|
mi_page_queue_push(heap, pq, page);
|
||||||
|
_PyMem_mi_page_reclaimed(page);
|
||||||
mi_assert_expensive(_mi_page_is_valid(page));
|
mi_assert_expensive(_mi_page_is_valid(page));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -383,6 +387,13 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
|
||||||
|
|
||||||
mi_heap_t* pheap = mi_page_heap(page);
|
mi_heap_t* pheap = mi_page_heap(page);
|
||||||
|
|
||||||
|
#ifdef Py_GIL_DISABLED
|
||||||
|
if (page->qsbr_node.next != NULL) {
|
||||||
|
// remove from QSBR queue, but keep the goal
|
||||||
|
llist_remove(&page->qsbr_node);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// remove from our page list
|
// remove from our page list
|
||||||
mi_segments_tld_t* segments_tld = &pheap->tld->segments;
|
mi_segments_tld_t* segments_tld = &pheap->tld->segments;
|
||||||
mi_page_queue_remove(pq, page);
|
mi_page_queue_remove(pq, page);
|
||||||
|
@ -417,6 +428,11 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
|
||||||
|
|
||||||
mi_heap_t* heap = mi_page_heap(page);
|
mi_heap_t* heap = mi_page_heap(page);
|
||||||
|
|
||||||
|
#ifdef Py_GIL_DISABLED
|
||||||
|
mi_assert_internal(page->qsbr_goal == 0);
|
||||||
|
mi_assert_internal(page->qsbr_node.next == NULL);
|
||||||
|
#endif
|
||||||
|
|
||||||
// remove from the page list
|
// remove from the page list
|
||||||
// (no need to do _mi_heap_delayed_free first as all blocks are already free)
|
// (no need to do _mi_heap_delayed_free first as all blocks are already free)
|
||||||
mi_segments_tld_t* segments_tld = &heap->tld->segments;
|
mi_segments_tld_t* segments_tld = &heap->tld->segments;
|
||||||
|
@ -444,6 +460,9 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
|
||||||
|
|
||||||
mi_page_set_has_aligned(page, false);
|
mi_page_set_has_aligned(page, false);
|
||||||
|
|
||||||
|
// any previous QSBR goals are no longer valid because we reused the page
|
||||||
|
_PyMem_mi_page_clear_qsbr(page);
|
||||||
|
|
||||||
// don't retire too often..
|
// don't retire too often..
|
||||||
// (or we end up retiring and re-allocating most of the time)
|
// (or we end up retiring and re-allocating most of the time)
|
||||||
// NOTE: refine this more: we should not retire if this
|
// NOTE: refine this more: we should not retire if this
|
||||||
|
@ -465,7 +484,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
|
||||||
return; // dont't free after all
|
return; // dont't free after all
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_mi_page_free(page, pq, false);
|
_PyMem_mi_page_maybe_free(page, pq, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// free retired pages: we don't need to look at the entire queues
|
// free retired pages: we don't need to look at the entire queues
|
||||||
|
@ -480,7 +499,10 @@ void _mi_heap_collect_retired(mi_heap_t* heap, bool force) {
|
||||||
if (mi_page_all_free(page)) {
|
if (mi_page_all_free(page)) {
|
||||||
page->retire_expire--;
|
page->retire_expire--;
|
||||||
if (force || page->retire_expire == 0) {
|
if (force || page->retire_expire == 0) {
|
||||||
_mi_page_free(pq->first, pq, force);
|
#ifdef Py_GIL_DISABLED
|
||||||
|
mi_assert_internal(page->qsbr_goal == 0);
|
||||||
|
#endif
|
||||||
|
_PyMem_mi_page_maybe_free(page, pq, force);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// keep retired, update min/max
|
// keep retired, update min/max
|
||||||
|
@ -661,6 +683,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
|
||||||
// set fields
|
// set fields
|
||||||
mi_page_set_heap(page, heap);
|
mi_page_set_heap(page, heap);
|
||||||
page->tag = heap->tag;
|
page->tag = heap->tag;
|
||||||
|
page->use_qsbr = heap->page_use_qsbr;
|
||||||
page->debug_offset = heap->debug_offset;
|
page->debug_offset = heap->debug_offset;
|
||||||
page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); // initialize before _mi_segment_page_start
|
page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); // initialize before _mi_segment_page_start
|
||||||
size_t page_size;
|
size_t page_size;
|
||||||
|
@ -691,6 +714,10 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
|
||||||
mi_assert_internal(page->xthread_free == 0);
|
mi_assert_internal(page->xthread_free == 0);
|
||||||
mi_assert_internal(page->next == NULL);
|
mi_assert_internal(page->next == NULL);
|
||||||
mi_assert_internal(page->prev == NULL);
|
mi_assert_internal(page->prev == NULL);
|
||||||
|
#ifdef Py_GIL_DISABLED
|
||||||
|
mi_assert_internal(page->qsbr_goal == 0);
|
||||||
|
mi_assert_internal(page->qsbr_node.next == NULL);
|
||||||
|
#endif
|
||||||
mi_assert_internal(page->retire_expire == 0);
|
mi_assert_internal(page->retire_expire == 0);
|
||||||
mi_assert_internal(!mi_page_has_aligned(page));
|
mi_assert_internal(!mi_page_has_aligned(page));
|
||||||
#if (MI_PADDING || MI_ENCODE_FREELIST)
|
#if (MI_PADDING || MI_ENCODE_FREELIST)
|
||||||
|
@ -750,6 +777,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
|
||||||
mi_heap_stat_counter_increase(heap, searches, count);
|
mi_heap_stat_counter_increase(heap, searches, count);
|
||||||
|
|
||||||
if (page == NULL) {
|
if (page == NULL) {
|
||||||
|
_PyMem_mi_heap_collect_qsbr(heap); // some pages might be safe to free now
|
||||||
_mi_heap_collect_retired(heap, false); // perhaps make a page available?
|
_mi_heap_collect_retired(heap, false); // perhaps make a page available?
|
||||||
page = mi_page_fresh(heap, pq);
|
page = mi_page_fresh(heap, pq);
|
||||||
if (page == NULL && first_try) {
|
if (page == NULL && first_try) {
|
||||||
|
@ -760,6 +788,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
|
||||||
else {
|
else {
|
||||||
mi_assert(pq->first == page);
|
mi_assert(pq->first == page);
|
||||||
page->retire_expire = 0;
|
page->retire_expire = 0;
|
||||||
|
_PyMem_mi_page_clear_qsbr(page);
|
||||||
}
|
}
|
||||||
mi_assert_internal(page == NULL || mi_page_immediate_available(page));
|
mi_assert_internal(page == NULL || mi_page_immediate_available(page));
|
||||||
return page;
|
return page;
|
||||||
|
@ -785,6 +814,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
|
||||||
|
|
||||||
if (mi_page_immediate_available(page)) {
|
if (mi_page_immediate_available(page)) {
|
||||||
page->retire_expire = 0;
|
page->retire_expire = 0;
|
||||||
|
_PyMem_mi_page_clear_qsbr(page);
|
||||||
return page; // fast path
|
return page; // fast path
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -878,6 +908,7 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignme
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
_PyMem_mi_heap_collect_qsbr(heap);
|
||||||
return mi_large_huge_page_alloc(heap,size,huge_alignment);
|
return mi_large_huge_page_alloc(heap,size,huge_alignment);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -982,6 +982,10 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld
|
||||||
mi_assert_internal(mi_page_all_free(page));
|
mi_assert_internal(mi_page_all_free(page));
|
||||||
mi_segment_t* segment = _mi_ptr_segment(page);
|
mi_segment_t* segment = _mi_ptr_segment(page);
|
||||||
mi_assert_internal(segment->used > 0);
|
mi_assert_internal(segment->used > 0);
|
||||||
|
#ifdef Py_GIL_DISABLED
|
||||||
|
mi_assert_internal(page->qsbr_goal == 0);
|
||||||
|
mi_assert_internal(page->qsbr_node.next == NULL);
|
||||||
|
#endif
|
||||||
|
|
||||||
size_t inuse = page->capacity * mi_page_block_size(page);
|
size_t inuse = page->capacity * mi_page_block_size(page);
|
||||||
_mi_stat_decrease(&tld->stats->page_committed, inuse);
|
_mi_stat_decrease(&tld->stats->page_committed, inuse);
|
||||||
|
@ -1270,10 +1274,13 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, s
|
||||||
// ensure used count is up to date and collect potential concurrent frees
|
// ensure used count is up to date and collect potential concurrent frees
|
||||||
mi_page_t* const page = mi_slice_to_page(slice);
|
mi_page_t* const page = mi_slice_to_page(slice);
|
||||||
_mi_page_free_collect(page, false);
|
_mi_page_free_collect(page, false);
|
||||||
if (mi_page_all_free(page)) {
|
if (mi_page_all_free(page) && _PyMem_mi_page_is_safe_to_free(page)) {
|
||||||
// if this page is all free now, free it without adding to any queues (yet)
|
// if this page is all free now, free it without adding to any queues (yet)
|
||||||
mi_assert_internal(page->next == NULL && page->prev==NULL);
|
mi_assert_internal(page->next == NULL && page->prev==NULL);
|
||||||
_mi_stat_decrease(&tld->stats->pages_abandoned, 1);
|
_mi_stat_decrease(&tld->stats->pages_abandoned, 1);
|
||||||
|
#ifdef Py_GIL_DISABLED
|
||||||
|
page->qsbr_goal = 0;
|
||||||
|
#endif
|
||||||
segment->abandoned--;
|
segment->abandoned--;
|
||||||
slice = mi_segment_page_clear(page, tld); // re-assign slice due to coalesce!
|
slice = mi_segment_page_clear(page, tld); // re-assign slice due to coalesce!
|
||||||
mi_assert_internal(!mi_slice_is_used(slice));
|
mi_assert_internal(!mi_slice_is_used(slice));
|
||||||
|
@ -1344,15 +1351,18 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
|
||||||
mi_page_set_heap(page, target_heap);
|
mi_page_set_heap(page, target_heap);
|
||||||
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set)
|
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set)
|
||||||
_mi_page_free_collect(page, false); // ensure used count is up to date
|
_mi_page_free_collect(page, false); // ensure used count is up to date
|
||||||
if (mi_page_all_free(page)) {
|
if (mi_page_all_free(page) && _PyMem_mi_page_is_safe_to_free(page)) {
|
||||||
// if everything free by now, free the page
|
// if everything free by now, free the page
|
||||||
|
#ifdef Py_GIL_DISABLED
|
||||||
|
page->qsbr_goal = 0;
|
||||||
|
#endif
|
||||||
slice = mi_segment_page_clear(page, tld); // set slice again due to coalesceing
|
slice = mi_segment_page_clear(page, tld); // set slice again due to coalesceing
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// otherwise reclaim it into the heap
|
// otherwise reclaim it into the heap
|
||||||
_mi_page_reclaim(target_heap, page);
|
_mi_page_reclaim(target_heap, page);
|
||||||
if (requested_block_size == page->xblock_size && mi_page_has_any_available(page) &&
|
if (requested_block_size == page->xblock_size && mi_page_has_any_available(page) &&
|
||||||
heap == target_heap) {
|
requested_block_size <= MI_MEDIUM_OBJ_SIZE_MAX && heap == target_heap) {
|
||||||
if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; }
|
if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,6 +12,12 @@
|
||||||
#include <stdlib.h> // malloc()
|
#include <stdlib.h> // malloc()
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#ifdef WITH_MIMALLOC
|
#ifdef WITH_MIMALLOC
|
||||||
|
// Forward declarations of functions used in our mimalloc modifications
|
||||||
|
static void _PyMem_mi_page_clear_qsbr(mi_page_t *page);
|
||||||
|
static bool _PyMem_mi_page_is_safe_to_free(mi_page_t *page);
|
||||||
|
static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force);
|
||||||
|
static void _PyMem_mi_page_reclaimed(mi_page_t *page);
|
||||||
|
static void _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap);
|
||||||
# include "pycore_mimalloc.h"
|
# include "pycore_mimalloc.h"
|
||||||
# include "mimalloc/static.c"
|
# include "mimalloc/static.c"
|
||||||
# include "mimalloc/internal.h" // for stats
|
# include "mimalloc/internal.h" // for stats
|
||||||
|
@ -86,6 +92,113 @@ _PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr)
|
||||||
|
|
||||||
#ifdef WITH_MIMALLOC
|
#ifdef WITH_MIMALLOC
|
||||||
|
|
||||||
|
static void
|
||||||
|
_PyMem_mi_page_clear_qsbr(mi_page_t *page)
|
||||||
|
{
|
||||||
|
#ifdef Py_GIL_DISABLED
|
||||||
|
// Clear the QSBR goal and remove the page from the QSBR linked list.
|
||||||
|
page->qsbr_goal = 0;
|
||||||
|
if (page->qsbr_node.next != NULL) {
|
||||||
|
llist_remove(&page->qsbr_node);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if an empty, newly reclaimed page is safe to free now.
|
||||||
|
static bool
|
||||||
|
_PyMem_mi_page_is_safe_to_free(mi_page_t *page)
|
||||||
|
{
|
||||||
|
assert(mi_page_all_free(page));
|
||||||
|
#ifdef Py_GIL_DISABLED
|
||||||
|
assert(page->qsbr_node.next == NULL);
|
||||||
|
if (page->use_qsbr && page->qsbr_goal != 0) {
|
||||||
|
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
|
||||||
|
if (tstate == NULL) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return true;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
_PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
|
||||||
|
{
|
||||||
|
#ifdef Py_GIL_DISABLED
|
||||||
|
assert(mi_page_all_free(page));
|
||||||
|
if (page->use_qsbr) {
|
||||||
|
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
|
||||||
|
if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal)) {
|
||||||
|
_PyMem_mi_page_clear_qsbr(page);
|
||||||
|
_mi_page_free(page, pq, force);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
_PyMem_mi_page_clear_qsbr(page);
|
||||||
|
page->retire_expire = 0;
|
||||||
|
page->qsbr_goal = _Py_qsbr_deferred_advance(tstate->qsbr);
|
||||||
|
llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
_mi_page_free(page, pq, force);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
_PyMem_mi_page_reclaimed(mi_page_t *page)
|
||||||
|
{
|
||||||
|
#ifdef Py_GIL_DISABLED
|
||||||
|
assert(page->qsbr_node.next == NULL);
|
||||||
|
if (page->qsbr_goal != 0) {
|
||||||
|
if (mi_page_all_free(page)) {
|
||||||
|
assert(page->qsbr_node.next == NULL);
|
||||||
|
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
|
||||||
|
page->retire_expire = 0;
|
||||||
|
llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
page->qsbr_goal = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
_PyMem_mi_heap_collect_qsbr(mi_heap_t *heap)
|
||||||
|
{
|
||||||
|
#ifdef Py_GIL_DISABLED
|
||||||
|
if (!heap->page_use_qsbr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
|
||||||
|
struct llist_node *head = &tstate->mimalloc.page_list;
|
||||||
|
if (llist_empty(head)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct llist_node *node;
|
||||||
|
llist_for_each_safe(node, head) {
|
||||||
|
mi_page_t *page = llist_data(node, mi_page_t, qsbr_node);
|
||||||
|
if (!mi_page_all_free(page)) {
|
||||||
|
// We allocated from this page some point after the delayed free
|
||||||
|
_PyMem_mi_page_clear_qsbr(page);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!_Py_qsbr_poll(tstate->qsbr, page->qsbr_goal)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
_PyMem_mi_page_clear_qsbr(page);
|
||||||
|
_mi_page_free(page, mi_page_queue_of(page), false);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
void *
|
void *
|
||||||
_PyMem_MiMalloc(void *ctx, size_t size)
|
_PyMem_MiMalloc(void *ctx, size_t size)
|
||||||
{
|
{
|
||||||
|
|
|
@ -2839,6 +2839,7 @@ tstate_mimalloc_bind(PyThreadState *tstate)
|
||||||
// the "backing" heap.
|
// the "backing" heap.
|
||||||
mi_tld_t *tld = &mts->tld;
|
mi_tld_t *tld = &mts->tld;
|
||||||
_mi_tld_init(tld, &mts->heaps[_Py_MIMALLOC_HEAP_MEM]);
|
_mi_tld_init(tld, &mts->heaps[_Py_MIMALLOC_HEAP_MEM]);
|
||||||
|
llist_init(&mts->page_list);
|
||||||
|
|
||||||
// Exiting threads push any remaining in-use segments to the abandoned
|
// Exiting threads push any remaining in-use segments to the abandoned
|
||||||
// pool to be re-claimed later by other threads. We use per-interpreter
|
// pool to be re-claimed later by other threads. We use per-interpreter
|
||||||
|
@ -2865,6 +2866,12 @@ tstate_mimalloc_bind(PyThreadState *tstate)
|
||||||
mts->heaps[i].debug_offset = (uint8_t)debug_offsets[i];
|
mts->heaps[i].debug_offset = (uint8_t)debug_offsets[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Heaps that store Python objects should use QSBR to delay freeing
|
||||||
|
// mimalloc pages while there may be concurrent lock-free readers.
|
||||||
|
mts->heaps[_Py_MIMALLOC_HEAP_OBJECT].page_use_qsbr = true;
|
||||||
|
mts->heaps[_Py_MIMALLOC_HEAP_GC].page_use_qsbr = true;
|
||||||
|
mts->heaps[_Py_MIMALLOC_HEAP_GC_PRE].page_use_qsbr = true;
|
||||||
|
|
||||||
// By default, object allocations use _Py_MIMALLOC_HEAP_OBJECT.
|
// By default, object allocations use _Py_MIMALLOC_HEAP_OBJECT.
|
||||||
// _PyObject_GC_New() and similar functions temporarily override this to
|
// _PyObject_GC_New() and similar functions temporarily override this to
|
||||||
// use one of the GC heaps.
|
// use one of the GC heaps.
|
||||||
|
|
|
@ -38,12 +38,6 @@
|
||||||
#include "pycore_pystate.h" // _PyThreadState_GET()
|
#include "pycore_pystate.h" // _PyThreadState_GET()
|
||||||
|
|
||||||
|
|
||||||
// Wrap-around safe comparison. This is a holdover from the FreeBSD
|
|
||||||
// implementation, which uses 32-bit sequence numbers. We currently use 64-bit
|
|
||||||
// sequence numbers, so wrap-around is unlikely.
|
|
||||||
#define QSBR_LT(a, b) ((int64_t)((a)-(b)) < 0)
|
|
||||||
#define QSBR_LEQ(a, b) ((int64_t)((a)-(b)) <= 0)
|
|
||||||
|
|
||||||
// Starting size of the array of qsbr thread states
|
// Starting size of the array of qsbr thread states
|
||||||
#define MIN_ARRAY_SIZE 8
|
#define MIN_ARRAY_SIZE 8
|
||||||
|
|
||||||
|
@ -167,13 +161,11 @@ bool
|
||||||
_Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal)
|
_Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal)
|
||||||
{
|
{
|
||||||
assert(_PyThreadState_GET()->state == _Py_THREAD_ATTACHED);
|
assert(_PyThreadState_GET()->state == _Py_THREAD_ATTACHED);
|
||||||
|
if (_Py_qbsr_goal_reached(qsbr, goal)) {
|
||||||
uint64_t rd_seq = _Py_atomic_load_uint64(&qsbr->shared->rd_seq);
|
|
||||||
if (QSBR_LEQ(goal, rd_seq)) {
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
rd_seq = qsbr_poll_scan(qsbr->shared);
|
uint64_t rd_seq = qsbr_poll_scan(qsbr->shared);
|
||||||
return QSBR_LEQ(goal, rd_seq);
|
return QSBR_LEQ(goal, rd_seq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue