mirror of https://github.com/python/cpython
bpo-37448: Use radix tree for pymalloc address_in_range(). (GH-14474)
The radix tree approach is a relatively simple and memory sanitary alternative to the old (slightly) unsanitary address_in_range(). To disable the radix tree map, set a preprocessor flag as follows: -DWITH_PYMALLOC_RADIX_TREE=0. Co-authored-by: Tim Peters <tim.peters@gmail.com>
This commit is contained in:
parent
a54fc683f2
commit
85b6b70589
|
@ -0,0 +1,15 @@
|
|||
Add a radix tree based memory map to track in-use obmalloc arenas. Use to
|
||||
replace the old implementation of address_in_range(). The radix tree
|
||||
approach makes it easy to increase pool sizes beyond the OS page size.
|
||||
Boosting the pool and arena size allows obmalloc to handle a significantly
|
||||
higher percentage of requests from its ultra-fast paths.
|
||||
|
||||
It also has the advantage of eliminating the memory unsanitary behavior of
|
||||
the previous address_in_range(). The old address_in_range() was marked with
|
||||
the annotations _Py_NO_SANITIZE_ADDRESS, _Py_NO_SANITIZE_THREAD, and
|
||||
_Py_NO_SANITIZE_MEMORY. Those annotations are no longer needed.
|
||||
|
||||
To disable the radix tree map, set a preprocessor flag as follows:
|
||||
`-DWITH_PYMALLOC_RADIX_TREE=0`.
|
||||
|
||||
Co-authored-by: Tim Peters <tim.peters@gmail.com>
|
|
@ -894,6 +894,22 @@ static int running_on_valgrind = -1;
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(WITH_PYMALLOC_RADIX_TREE)
|
||||
/* Use radix-tree to track arena memory regions, for address_in_range().
|
||||
* Enable by default since it allows larger pool sizes. Can be disabled
|
||||
* using -DWITH_PYMALLOC_RADIX_TREE=0 */
|
||||
#define WITH_PYMALLOC_RADIX_TREE 1
|
||||
#endif
|
||||
|
||||
#if SIZEOF_VOID_P > 4
|
||||
/* on 64-bit platforms use larger pools and arenas if we can */
|
||||
#define USE_LARGE_ARENAS
|
||||
#if WITH_PYMALLOC_RADIX_TREE
|
||||
/* large pools only supported if radix-tree is enabled */
|
||||
#define USE_LARGE_POOLS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
|
||||
* on a page boundary. This is a reserved virtual address space for the
|
||||
|
@ -907,18 +923,34 @@ static int running_on_valgrind = -1;
|
|||
* Arenas are allocated with mmap() on systems supporting anonymous memory
|
||||
* mappings to reduce heap fragmentation.
|
||||
*/
|
||||
#define ARENA_SIZE (256 << 10) /* 256KB */
|
||||
#ifdef USE_LARGE_ARENAS
|
||||
#define ARENA_BITS 20 /* 1 MiB */
|
||||
#else
|
||||
#define ARENA_BITS 18 /* 256 KiB */
|
||||
#endif
|
||||
#define ARENA_SIZE (1 << ARENA_BITS)
|
||||
#define ARENA_SIZE_MASK (ARENA_SIZE - 1)
|
||||
|
||||
#ifdef WITH_MEMORY_LIMITS
|
||||
#define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Size of the pools used for small blocks. Should be a power of 2,
|
||||
* between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
|
||||
* Size of the pools used for small blocks. Must be a power of 2.
|
||||
*/
|
||||
#define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */
|
||||
#define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK
|
||||
#ifdef USE_LARGE_POOLS
|
||||
#define POOL_BITS 14 /* 16 KiB */
|
||||
#else
|
||||
#define POOL_BITS 12 /* 4 KiB */
|
||||
#endif
|
||||
#define POOL_SIZE (1 << POOL_BITS)
|
||||
#define POOL_SIZE_MASK (POOL_SIZE - 1)
|
||||
|
||||
#if !WITH_PYMALLOC_RADIX_TREE
|
||||
#if POOL_SIZE != SYSTEM_PAGE_SIZE
|
||||
# error "pool size must be equal to system page size"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define MAX_POOLS_IN_ARENA (ARENA_SIZE / POOL_SIZE)
|
||||
#if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE
|
||||
|
@ -1233,6 +1265,264 @@ _Py_GetAllocatedBlocks(void)
|
|||
return n;
|
||||
}
|
||||
|
||||
#if WITH_PYMALLOC_RADIX_TREE
|
||||
/*==========================================================================*/
|
||||
/* radix tree for tracking arena usage
|
||||
|
||||
bit allocation for keys
|
||||
|
||||
64-bit pointers and 2^20 arena size:
|
||||
16 -> ignored (POINTER_BITS - ADDRESS_BITS)
|
||||
10 -> MAP_TOP
|
||||
10 -> MAP_MID
|
||||
8 -> MAP_BOT
|
||||
20 -> ideal aligned arena
|
||||
----
|
||||
64
|
||||
|
||||
32-bit pointers and 2^18 arena size:
|
||||
14 -> MAP_BOT
|
||||
18 -> ideal aligned arena
|
||||
----
|
||||
32
|
||||
|
||||
*/
|
||||
|
||||
#if SIZEOF_VOID_P == 8
|
||||
|
||||
/* number of bits in a pointer */
|
||||
#define POINTER_BITS 64
|
||||
|
||||
/* Current 64-bit processors are limited to 48-bit physical addresses. For
|
||||
* now, the top 17 bits of addresses will all be equal to bit 2**47. If that
|
||||
* changes in the future, this must be adjusted upwards.
|
||||
*/
|
||||
#define ADDRESS_BITS 48
|
||||
|
||||
/* use the top and mid layers of the radix tree */
|
||||
#define USE_INTERIOR_NODES
|
||||
|
||||
#elif SIZEOF_VOID_P == 4
|
||||
|
||||
#define POINTER_BITS 32
|
||||
#define ADDRESS_BITS 32
|
||||
|
||||
#else
|
||||
|
||||
/* Currently this code works for 64-bit or 32-bit pointers only. */
|
||||
#error "obmalloc radix tree requires 64-bit or 32-bit pointers."
|
||||
|
||||
#endif /* SIZEOF_VOID_P */
|
||||
|
||||
/* arena_coverage_t members require this to be true */
|
||||
#if ARENA_BITS >= 32
|
||||
# error "arena size must be < 2^32"
|
||||
#endif
|
||||
|
||||
#ifdef USE_INTERIOR_NODES
|
||||
/* number of bits used for MAP_TOP and MAP_MID nodes */
|
||||
#define INTERIOR_BITS ((ADDRESS_BITS - ARENA_BITS + 2) / 3)
|
||||
#else
|
||||
#define INTERIOR_BITS 0
|
||||
#endif
|
||||
|
||||
#define MAP_TOP_BITS INTERIOR_BITS
|
||||
#define MAP_TOP_LENGTH (1 << MAP_TOP_BITS)
|
||||
#define MAP_TOP_MASK (MAP_BOT_LENGTH - 1)
|
||||
|
||||
#define MAP_MID_BITS INTERIOR_BITS
|
||||
#define MAP_MID_LENGTH (1 << MAP_MID_BITS)
|
||||
#define MAP_MID_MASK (MAP_MID_LENGTH - 1)
|
||||
|
||||
#define MAP_BOT_BITS (ADDRESS_BITS - ARENA_BITS - 2*INTERIOR_BITS)
|
||||
#define MAP_BOT_LENGTH (1 << MAP_BOT_BITS)
|
||||
#define MAP_BOT_MASK (MAP_BOT_LENGTH - 1)
|
||||
|
||||
#define MAP_BOT_SHIFT ARENA_BITS
|
||||
#define MAP_MID_SHIFT (MAP_BOT_BITS + MAP_BOT_SHIFT)
|
||||
#define MAP_TOP_SHIFT (MAP_MID_BITS + MAP_MID_SHIFT)
|
||||
|
||||
#define AS_UINT(p) ((uintptr_t)(p))
|
||||
#define MAP_BOT_INDEX(p) ((AS_UINT(p) >> MAP_BOT_SHIFT) & MAP_BOT_MASK)
|
||||
#define MAP_MID_INDEX(p) ((AS_UINT(p) >> MAP_MID_SHIFT) & MAP_MID_MASK)
|
||||
#define MAP_TOP_INDEX(p) ((AS_UINT(p) >> MAP_TOP_SHIFT) & MAP_TOP_MASK)
|
||||
|
||||
#if ADDRESS_BITS > POINTER_BITS
|
||||
/* Return non-physical address bits of a pointer. Those bits should be same
|
||||
* for all valid pointers if ADDRESS_BITS set correctly. Linux has support for
|
||||
* 57-bit address space (Intel 5-level paging) but will not currently give
|
||||
* those addresses to user space.
|
||||
*/
|
||||
#define HIGH_BITS(p) (AS_UINT(p) >> ADDRESS_BITS)
|
||||
#else
|
||||
#define HIGH_BITS(p) 0
|
||||
#endif
|
||||
|
||||
|
||||
/* This is the leaf of the radix tree. See arena_map_mark_used() for the
|
||||
* meaning of these members. */
|
||||
typedef struct {
|
||||
int32_t tail_hi;
|
||||
int32_t tail_lo;
|
||||
} arena_coverage_t;
|
||||
|
||||
typedef struct arena_map_bot {
|
||||
/* The members tail_hi and tail_lo are accessed together. So, it
|
||||
* better to have them as an array of structs, rather than two
|
||||
* arrays.
|
||||
*/
|
||||
arena_coverage_t arenas[MAP_BOT_LENGTH];
|
||||
} arena_map_bot_t;
|
||||
|
||||
#ifdef USE_INTERIOR_NODES
|
||||
typedef struct arena_map_mid {
|
||||
struct arena_map_bot *ptrs[MAP_MID_LENGTH];
|
||||
} arena_map_mid_t;
|
||||
|
||||
typedef struct arena_map_top {
|
||||
struct arena_map_mid *ptrs[MAP_TOP_LENGTH];
|
||||
} arena_map_top_t;
|
||||
#endif
|
||||
|
||||
/* The root of radix tree. Note that by initializing like this, the memory
|
||||
* should be in the BSS. The OS will only memory map pages as the MAP_MID
|
||||
* nodes get used (OS pages are demand loaded as needed).
|
||||
*/
|
||||
#ifdef USE_INTERIOR_NODES
|
||||
static arena_map_top_t arena_map_root;
|
||||
/* accounting for number of used interior nodes */
|
||||
static int arena_map_mid_count;
|
||||
static int arena_map_bot_count;
|
||||
#else
|
||||
static arena_map_bot_t arena_map_root;
|
||||
#endif
|
||||
|
||||
/* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
|
||||
* it cannot be created */
|
||||
static arena_map_bot_t *
|
||||
arena_map_get(block *p, int create)
|
||||
{
|
||||
#ifdef USE_INTERIOR_NODES
|
||||
/* sanity check that ADDRESS_BITS is correct */
|
||||
assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
|
||||
int i1 = MAP_TOP_INDEX(p);
|
||||
if (arena_map_root.ptrs[i1] == NULL) {
|
||||
if (!create) {
|
||||
return NULL;
|
||||
}
|
||||
arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
|
||||
if (n == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
arena_map_root.ptrs[i1] = n;
|
||||
arena_map_mid_count++;
|
||||
}
|
||||
int i2 = MAP_MID_INDEX(p);
|
||||
if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
|
||||
if (!create) {
|
||||
return NULL;
|
||||
}
|
||||
arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
|
||||
if (n == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
arena_map_root.ptrs[i1]->ptrs[i2] = n;
|
||||
arena_map_bot_count++;
|
||||
}
|
||||
return arena_map_root.ptrs[i1]->ptrs[i2];
|
||||
#else
|
||||
return &arena_map_root;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* The radix tree only tracks arenas. So, for 16 MiB arenas, we throw
|
||||
* away 24 bits of the address. That reduces the space requirement of
|
||||
* the tree compared to similar radix tree page-map schemes. In
|
||||
* exchange for slashing the space requirement, it needs more
|
||||
* computation to check an address.
|
||||
*
|
||||
* Tracking coverage is done by "ideal" arena address. It is easier to
|
||||
* explain in decimal so let's say that the arena size is 100 bytes.
|
||||
* Then, ideal addresses are 100, 200, 300, etc. For checking if a
|
||||
* pointer address is inside an actual arena, we have to check two ideal
|
||||
* arena addresses. E.g. if pointer is 357, we need to check 200 and
|
||||
* 300. In the rare case that an arena is aligned in the ideal way
|
||||
* (e.g. base address of arena is 200) then we only have to check one
|
||||
* ideal address.
|
||||
*
|
||||
* The tree nodes for 200 and 300 both store the address of arena.
|
||||
* There are two cases: the arena starts at a lower ideal arena and
|
||||
* extends to this one, or the arena starts in this arena and extends to
|
||||
* the next ideal arena. The tail_lo and tail_hi members correspond to
|
||||
* these two cases.
|
||||
*/
|
||||
|
||||
|
||||
/* mark or unmark addresses covered by arena */
|
||||
static int
|
||||
arena_map_mark_used(uintptr_t arena_base, int is_used)
|
||||
{
|
||||
/* sanity check that ADDRESS_BITS is correct */
|
||||
assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
|
||||
arena_map_bot_t *n_hi = arena_map_get((block *)arena_base, is_used);
|
||||
if (n_hi == NULL) {
|
||||
assert(is_used); /* otherwise node should already exist */
|
||||
return 0; /* failed to allocate space for node */
|
||||
}
|
||||
int i3 = MAP_BOT_INDEX((block *)arena_base);
|
||||
int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
|
||||
if (tail == 0) {
|
||||
/* is ideal arena address */
|
||||
n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
|
||||
}
|
||||
else {
|
||||
/* arena_base address is not ideal (aligned to arena size) and
|
||||
* so it potentially covers two MAP_BOT nodes. Get the MAP_BOT node
|
||||
* for the next arena. Note that it might be in different MAP_TOP
|
||||
* and MAP_MID nodes as well so we need to call arena_map_get()
|
||||
* again (do the full tree traversal).
|
||||
*/
|
||||
n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
|
||||
uintptr_t arena_base_next = arena_base + ARENA_SIZE;
|
||||
/* If arena_base is a legit arena address, so is arena_base_next - 1
|
||||
* (last address in arena). If arena_base_next overflows then it
|
||||
* must overflow to 0. However, that would mean arena_base was
|
||||
* "ideal" and we should not be in this case. */
|
||||
assert(arena_base < arena_base_next);
|
||||
arena_map_bot_t *n_lo = arena_map_get((block *)arena_base_next, is_used);
|
||||
if (n_lo == NULL) {
|
||||
assert(is_used); /* otherwise should already exist */
|
||||
n_hi->arenas[i3].tail_hi = 0;
|
||||
return 0; /* failed to allocate space for node */
|
||||
}
|
||||
int i3_next = MAP_BOT_INDEX(arena_base_next);
|
||||
n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Return true if 'p' is a pointer inside an obmalloc arena.
|
||||
* _PyObject_Free() calls this so it needs to be very fast. */
|
||||
static int
|
||||
arena_map_is_used(block *p)
|
||||
{
|
||||
arena_map_bot_t *n = arena_map_get(p, 0);
|
||||
if (n == NULL) {
|
||||
return 0;
|
||||
}
|
||||
int i3 = MAP_BOT_INDEX(p);
|
||||
/* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
|
||||
int32_t hi = n->arenas[i3].tail_hi;
|
||||
int32_t lo = n->arenas[i3].tail_lo;
|
||||
int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
|
||||
return (tail < lo) || (tail >= hi && hi != 0);
|
||||
}
|
||||
|
||||
/* end of radix tree logic */
|
||||
/*==========================================================================*/
|
||||
#endif /* WITH_PYMALLOC_RADIX_TREE */
|
||||
|
||||
|
||||
/* Allocate a new arena. If we run out of memory, return NULL. Else
|
||||
* allocate a new arena, and return the address of an arena_object
|
||||
|
@ -1302,6 +1592,15 @@ new_arena(void)
|
|||
unused_arena_objects = arenaobj->nextarena;
|
||||
assert(arenaobj->address == 0);
|
||||
address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
|
||||
#if WITH_PYMALLOC_RADIX_TREE
|
||||
if (address != NULL) {
|
||||
if (!arena_map_mark_used((uintptr_t)address, 1)) {
|
||||
/* marking arena in radix tree failed, abort */
|
||||
_PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
|
||||
address = NULL;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (address == NULL) {
|
||||
/* The allocation failed: return NULL after putting the
|
||||
* arenaobj back.
|
||||
|
@ -1332,6 +1631,17 @@ new_arena(void)
|
|||
}
|
||||
|
||||
|
||||
|
||||
#if WITH_PYMALLOC_RADIX_TREE
|
||||
/* Return true if and only if P is an address that was allocated by
|
||||
pymalloc. When the radix tree is used, 'poolp' is unused.
|
||||
*/
|
||||
static bool
|
||||
address_in_range(void *p, poolp pool)
|
||||
{
|
||||
return arena_map_is_used(p);
|
||||
}
|
||||
#else
|
||||
/*
|
||||
address_in_range(P, POOL)
|
||||
|
||||
|
@ -1423,6 +1733,7 @@ address_in_range(void *p, poolp pool)
|
|||
arenas[arenaindex].address != 0;
|
||||
}
|
||||
|
||||
#endif /* !WITH_PYMALLOC_RADIX_TREE */
|
||||
|
||||
/*==========================================================================*/
|
||||
|
||||
|
@ -1768,6 +2079,11 @@ insert_to_freepool(poolp pool)
|
|||
ao->nextarena = unused_arena_objects;
|
||||
unused_arena_objects = ao;
|
||||
|
||||
#if WITH_PYMALLOC_RADIX_TREE
|
||||
/* mark arena region as not under control of obmalloc */
|
||||
arena_map_mark_used(ao->address, 0);
|
||||
#endif
|
||||
|
||||
/* Free the entire arena. */
|
||||
_PyObject_Arena.free(_PyObject_Arena.ctx,
|
||||
(void *)ao->address, ARENA_SIZE);
|
||||
|
@ -2711,6 +3027,12 @@ _PyObject_DebugMallocStats(FILE *out)
|
|||
(void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
|
||||
(void)printone(out, "# arenas highwater mark", narenas_highwater);
|
||||
(void)printone(out, "# arenas allocated current", narenas);
|
||||
#ifdef USE_INTERIOR_NODES
|
||||
(void)printone(out, "# arena map mid nodes", arena_map_mid_count);
|
||||
(void)printone(out, "# arena map bot nodes", arena_map_bot_count);
|
||||
fputc('\n', out);
|
||||
#endif
|
||||
|
||||
|
||||
PyOS_snprintf(buf, sizeof(buf),
|
||||
"%zu arenas * %d bytes/arena",
|
||||
|
@ -2729,6 +3051,15 @@ _PyObject_DebugMallocStats(FILE *out)
|
|||
total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
|
||||
total += printone(out, "# bytes lost to quantization", quantization);
|
||||
total += printone(out, "# bytes lost to arena alignment", arena_alignment);
|
||||
#ifdef WITH_PYMALLOC_RADIX_TREE
|
||||
total += printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
|
||||
#endif
|
||||
#ifdef USE_INTERIOR_NODES
|
||||
total += printone(out, "# bytes lost to arena map mid",
|
||||
sizeof(arena_map_mid_t) * arena_map_mid_count);
|
||||
total += printone(out, "# bytes lost to arena map bot",
|
||||
sizeof(arena_map_bot_t) * arena_map_bot_count);
|
||||
#endif
|
||||
(void)printone(out, "Total", total);
|
||||
return 1;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue