gh-113055: Use pointer for interp->obmalloc state (gh-113412)

For interpreters that share state with the main interpreter, this points
to the same static memory structure.  For interpreters with their own
obmalloc state, it is heap allocated.  Add free_obmalloc_arenas() which
will free the obmalloc arenas and radix tree structures for interpreters
with their own obmalloc state.

Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
This commit is contained in:
Neil Schemenauer 2024-01-26 19:38:14 -08:00 committed by GitHub
parent 2d08af34b8
commit 7a7bce5a0a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 157 additions and 24 deletions

View File

@ -203,7 +203,17 @@ struct _is {
struct _mimalloc_interp_state mimalloc;
#endif
struct _obmalloc_state obmalloc;
// Per-interpreter state for the obmalloc allocator. For the main
// interpreter and for all interpreters that don't have their
// own obmalloc state, this points to the static structure in
// obmalloc.c obmalloc_state_main. For other interpreters, it is
// heap allocated by _PyMem_init_obmalloc() and freed when the
// interpreter structure is freed. In the case of a heap allocated
// obmalloc state, it is not safe to hold on to or use memory after
// the interpreter is freed. The obmalloc state corresponding to
// that allocated memory is gone. See free_obmalloc_arenas() for
// more comments.
struct _obmalloc_state *obmalloc;
PyObject *audit_hooks;
PyType_WatchCallback type_watchers[TYPE_MAX_WATCHERS];

View File

@ -686,6 +686,8 @@ extern Py_ssize_t _Py_GetGlobalAllocatedBlocks(void);
_Py_GetGlobalAllocatedBlocks()
extern Py_ssize_t _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *);
extern void _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *);
extern int _PyMem_init_obmalloc(PyInterpreterState *interp);
extern bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp);
#ifdef WITH_PYMALLOC

View File

@ -59,13 +59,6 @@ extern "C" {
.dump_debug_stats = -1, \
}
#define _obmalloc_state_INIT(obmalloc) \
{ \
.pools = { \
.used = _obmalloc_pools_INIT(obmalloc.pools), \
}, \
}
#ifdef __cplusplus
}

View File

@ -155,7 +155,6 @@ extern PyTypeObject _PyExc_MemoryError;
{ \
.id_refcount = -1, \
.imports = IMPORTS_INIT, \
.obmalloc = _obmalloc_state_INIT(INTERP.obmalloc), \
.ceval = { \
.recursion_limit = Py_DEFAULT_RECURSION_LIMIT, \
}, \

View File

@ -0,0 +1,5 @@
Make interp->obmalloc a pointer. For interpreters that share state with the
main interpreter, this points to the same static memory structure. For
interpreters with their own obmalloc state, it is heap allocated. Add
free_obmalloc_arenas() which will free the obmalloc arenas and radix tree
structures for interpreters with their own obmalloc state.

View File

@ -7,6 +7,7 @@
#include "pycore_pyerrors.h" // _Py_FatalErrorFormat()
#include "pycore_pymem.h"
#include "pycore_pystate.h" // _PyInterpreterState_GET
#include "pycore_obmalloc_init.h"
#include <stdlib.h> // malloc()
#include <stdbool.h>
@ -1016,6 +1017,13 @@ static int running_on_valgrind = -1;
typedef struct _obmalloc_state OMState;
/* obmalloc state for main interpreter and shared by all interpreters without
* their own obmalloc state. By not explicitly initalizing this structure, it
* will be allocated in the BSS which is a small performance win. The radix
* tree arrays are fairly large but are sparsely used. */
static struct _obmalloc_state obmalloc_state_main;
static bool obmalloc_state_initialized;
static inline int
has_own_state(PyInterpreterState *interp)
{
@ -1028,10 +1036,8 @@ static inline OMState *
get_state(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
if (!has_own_state(interp)) {
interp = _PyInterpreterState_Main();
}
return &interp->obmalloc;
assert(interp->obmalloc != NULL); // otherwise not initialized or freed
return interp->obmalloc;
}
// These macros all rely on a local "state" variable.
@ -1094,7 +1100,11 @@ _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
"the interpreter doesn't have its own allocator");
}
#endif
OMState *state = &interp->obmalloc;
OMState *state = interp->obmalloc;
if (state == NULL) {
return 0;
}
Py_ssize_t n = raw_allocated_blocks;
/* add up allocated blocks for used pools */
@ -1116,6 +1126,8 @@ _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
return n;
}
static void free_obmalloc_arenas(PyInterpreterState *interp);
void
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
{
@ -1124,10 +1136,20 @@ _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
return;
}
#endif
if (has_own_state(interp)) {
if (has_own_state(interp) && interp->obmalloc != NULL) {
Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
assert(has_own_state(interp) || leaked == 0);
interp->runtime->obmalloc.interpreter_leaks += leaked;
if (_PyMem_obmalloc_state_on_heap(interp) && leaked == 0) {
// free the obmalloc arenas and radix tree nodes. If leaked > 0
// then some of the memory allocated by obmalloc has not been
// freed. It might be safe to free the arenas in that case but
// it's possible that extension modules are still using that
// memory. So, it is safer to not free and to leak. Perhaps there
// should be warning when this happens. It should be possible to
// use a tool like "-fsanitize=address" to track down these leaks.
free_obmalloc_arenas(interp);
}
}
}
@ -2717,8 +2739,95 @@ _PyDebugAllocatorStats(FILE *out,
(void)printone(out, buf2, num_blocks * sizeof_block);
}
// Return true if the obmalloc state structure is heap allocated,
// by PyMem_RawCalloc(). For the main interpreter, this structure
// allocated in the BSS. Allocating that way gives some memory savings
// and a small performance win (at least on a demand paged OS). On
// 64-bit platforms, the obmalloc structure is 256 kB. Most of that
// memory is for the arena_map_top array. Since normally only one entry
// of that array is used, only one page of resident memory is actually
// used, rather than the full 256 kB.
bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp)
{
#if WITH_PYMALLOC
return interp->obmalloc && interp->obmalloc != &obmalloc_state_main;
#else
return false;
#endif
}
#ifdef WITH_PYMALLOC
static void
init_obmalloc_pools(PyInterpreterState *interp)
{
// initialize the obmalloc->pools structure. This must be done
// before the obmalloc alloc/free functions can be called.
poolp temp[OBMALLOC_USED_POOLS_SIZE] =
_obmalloc_pools_INIT(interp->obmalloc->pools);
memcpy(&interp->obmalloc->pools.used, temp, sizeof(temp));
}
#endif /* WITH_PYMALLOC */
int _PyMem_init_obmalloc(PyInterpreterState *interp)
{
#ifdef WITH_PYMALLOC
/* Initialize obmalloc, but only for subinterpreters,
since the main interpreter is initialized statically. */
if (_Py_IsMainInterpreter(interp)
|| _PyInterpreterState_HasFeature(interp,
Py_RTFLAGS_USE_MAIN_OBMALLOC)) {
interp->obmalloc = &obmalloc_state_main;
if (!obmalloc_state_initialized) {
init_obmalloc_pools(interp);
obmalloc_state_initialized = true;
}
} else {
interp->obmalloc = PyMem_RawCalloc(1, sizeof(struct _obmalloc_state));
if (interp->obmalloc == NULL) {
return -1;
}
init_obmalloc_pools(interp);
}
#endif /* WITH_PYMALLOC */
return 0; // success
}
#ifdef WITH_PYMALLOC
static void
free_obmalloc_arenas(PyInterpreterState *interp)
{
OMState *state = interp->obmalloc;
for (uint i = 0; i < maxarenas; ++i) {
// free each obmalloc memory arena
struct arena_object *ao = &allarenas[i];
_PyObject_Arena.free(_PyObject_Arena.ctx,
(void *)ao->address, ARENA_SIZE);
}
// free the array containing pointers to all arenas
PyMem_RawFree(allarenas);
#if WITH_PYMALLOC_RADIX_TREE
#ifdef USE_INTERIOR_NODES
// Free the middle and bottom nodes of the radix tree. These are allocated
// by arena_map_mark_used() but not freed when arenas are freed.
for (int i1 = 0; i1 < MAP_TOP_LENGTH; i1++) {
arena_map_mid_t *mid = arena_map_root.ptrs[i1];
if (mid == NULL) {
continue;
}
for (int i2 = 0; i2 < MAP_MID_LENGTH; i2++) {
arena_map_bot_t *bot = arena_map_root.ptrs[i1]->ptrs[i2];
if (bot == NULL) {
continue;
}
PyMem_RawFree(bot);
}
PyMem_RawFree(mid);
}
#endif
#endif
}
#ifdef Py_DEBUG
/* Is target in the list? The list is traversed via the nextpool pointers.

View File

@ -32,6 +32,7 @@
#include "pycore_typevarobject.h" // _Py_clear_generic_types()
#include "pycore_unicodeobject.h" // _PyUnicode_InitTypes()
#include "pycore_weakref.h" // _PyWeakref_GET_REF()
#include "pycore_obmalloc.h" // _PyMem_init_obmalloc()
#include "opcode.h"
@ -645,6 +646,13 @@ pycore_create_interpreter(_PyRuntimeState *runtime,
return status;
}
// initialize the interp->obmalloc state. This must be done after
// the settings are loaded (so that feature_flags are set) but before
// any calls are made to obmalloc functions.
if (_PyMem_init_obmalloc(interp) < 0) {
return _PyStatus_NO_MEMORY();
}
PyThreadState *tstate = _PyThreadState_New(interp,
_PyThreadState_WHENCE_INTERP);
if (tstate == NULL) {
@ -2144,6 +2152,14 @@ new_interpreter(PyThreadState **tstate_p, const PyInterpreterConfig *config)
goto error;
}
// initialize the interp->obmalloc state. This must be done after
// the settings are loaded (so that feature_flags are set) but before
// any calls are made to obmalloc functions.
if (_PyMem_init_obmalloc(interp) < 0) {
status = _PyStatus_NO_MEMORY();
goto error;
}
tstate = _PyThreadState_New(interp, _PyThreadState_WHENCE_INTERP);
if (tstate == NULL) {
status = _PyStatus_NO_MEMORY();

View File

@ -18,6 +18,7 @@
#include "pycore_pystate.h"
#include "pycore_runtime_init.h" // _PyRuntimeState_INIT
#include "pycore_sysmodule.h" // _PySys_Audit()
#include "pycore_obmalloc.h" // _PyMem_obmalloc_state_on_heap()
/* --------------------------------------------------------------------------
CAUTION
@ -553,6 +554,11 @@ free_interpreter(PyInterpreterState *interp)
// The main interpreter is statically allocated so
// should not be freed.
if (interp != &_PyRuntime._main_interpreter) {
if (_PyMem_obmalloc_state_on_heap(interp)) {
// interpreter has its own obmalloc state, free it
PyMem_RawFree(interp->obmalloc);
interp->obmalloc = NULL;
}
PyMem_RawFree(interp);
}
}
@ -595,14 +601,6 @@ init_interpreter(PyInterpreterState *interp,
assert(next != NULL || (interp == runtime->interpreters.main));
interp->next = next;
/* Initialize obmalloc, but only for subinterpreters,
since the main interpreter is initialized statically. */
if (interp != &runtime->_main_interpreter) {
poolp temp[OBMALLOC_USED_POOLS_SIZE] = \
_obmalloc_pools_INIT(interp->obmalloc.pools);
memcpy(&interp->obmalloc.pools.used, temp, sizeof(temp));
}
PyStatus status = _PyObject_InitState(interp);
if (_PyStatus_EXCEPTION(status)) {
return status;

View File

@ -325,7 +325,8 @@ Objects/obmalloc.c - _PyMem_Debug -
Objects/obmalloc.c - _PyMem_Raw -
Objects/obmalloc.c - _PyObject -
Objects/obmalloc.c - last_final_leaks -
Objects/obmalloc.c - usedpools -
Objects/obmalloc.c - obmalloc_state_main -
Objects/obmalloc.c - obmalloc_state_initialized -
Objects/typeobject.c - name_op -
Objects/typeobject.c - slotdefs -
Objects/unicodeobject.c - stripfuncnames -

Can't render this file because it has a wrong number of fields in line 4.