gh-101659: Isolate "obmalloc" State to Each Interpreter (gh-101660)

This is strictly about moving the "obmalloc" runtime state from
`_PyRuntimeState` to `PyInterpreterState`.  Doing so improves isolation
between interpreters, specifically most of the memory (incl. objects)
allocated for each interpreter's use.  This is important for a
per-interpreter GIL, but such isolation is valuable even without it.

FWIW, a per-interpreter obmalloc is the proverbial
canary-in-the-coalmine when it comes to the isolation of objects between
interpreters.  Any object that leaks (unintentionally) to another
interpreter is highly likely to cause a crash (on debug builds at
least).  That's a useful thing to know, relative to interpreter
isolation.
This commit is contained in:
Eric Snow 2023-04-24 17:23:57 -06:00 committed by GitHub
parent 01be52e42e
commit df3173d28e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 322 additions and 73 deletions

View File

@ -245,6 +245,8 @@ PyAPI_FUNC(PyStatus) PyConfig_SetWideStringList(PyConfig *config,
/* --- PyInterpreterConfig ------------------------------------ */
typedef struct {
// XXX "allow_object_sharing"? "own_objects"?
int use_main_obmalloc;
int allow_fork;
int allow_exec;
int allow_threads;
@ -254,6 +256,7 @@ typedef struct {
#define _PyInterpreterConfig_INIT \
{ \
.use_main_obmalloc = 0, \
.allow_fork = 0, \
.allow_exec = 0, \
.allow_threads = 1, \
@ -263,6 +266,7 @@ typedef struct {
#define _PyInterpreterConfig_LEGACY_INIT \
{ \
.use_main_obmalloc = 1, \
.allow_fork = 1, \
.allow_exec = 1, \
.allow_threads = 1, \

View File

@ -11,6 +11,10 @@ is available in a given context. For example, forking the process
might not be allowed in the current interpreter (i.e. os.fork() would fail).
*/
/* Set if the interpreter share obmalloc runtime state
with the main interpreter. */
#define Py_RTFLAGS_USE_MAIN_OBMALLOC (1UL << 5)
/* Set if import should check a module for subinterpreter support. */
#define Py_RTFLAGS_MULTI_INTERP_EXTENSIONS (1UL << 8)

View File

@ -23,11 +23,12 @@ extern "C" {
#include "pycore_function.h" // FUNC_MAX_WATCHERS
#include "pycore_genobject.h" // struct _Py_async_gen_state
#include "pycore_gc.h" // struct _gc_runtime_state
#include "pycore_global_objects.h" // struct _Py_interp_static_objects
#include "pycore_import.h" // struct _import_state
#include "pycore_instruments.h" // PY_MONITORING_EVENTS
#include "pycore_list.h" // struct _Py_list_state
#include "pycore_global_objects.h" // struct _Py_interp_static_objects
#include "pycore_object_state.h" // struct _py_object_state
#include "pycore_obmalloc.h" // struct obmalloc_state
#include "pycore_tuple.h" // struct _Py_tuple_state
#include "pycore_typeobject.h" // struct type_cache
#include "pycore_unicodeobject.h" // struct _Py_unicode_state
@ -82,6 +83,8 @@ struct _is {
int _initialized;
int finalizing;
struct _obmalloc_state obmalloc;
struct _ceval_state ceval;
struct _gc_runtime_state gc;

View File

@ -657,8 +657,12 @@ struct _obmalloc_usage {
#endif /* WITH_PYMALLOC_RADIX_TREE */
struct _obmalloc_state {
struct _obmalloc_global_state {
int dump_debug_stats;
Py_ssize_t interpreter_leaks;
};
struct _obmalloc_state {
struct _obmalloc_pools pools;
struct _obmalloc_mgmt mgmt;
struct _obmalloc_usage usage;
@ -675,7 +679,11 @@ void _PyObject_VirtualFree(void *, size_t size);
/* This function returns the number of allocated memory blocks, regardless of size */
PyAPI_FUNC(Py_ssize_t) _Py_GetAllocatedBlocks(void);
extern Py_ssize_t _Py_GetGlobalAllocatedBlocks(void);
#define _Py_GetAllocatedBlocks() \
_Py_GetGlobalAllocatedBlocks()
extern Py_ssize_t _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *);
extern void _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *);
#ifdef WITH_PYMALLOC

View File

@ -54,9 +54,13 @@ extern "C" {
# error "NB_SMALL_SIZE_CLASSES should be less than 64"
#endif
#define _obmalloc_state_INIT(obmalloc) \
#define _obmalloc_global_state_INIT \
{ \
.dump_debug_stats = -1, \
}
#define _obmalloc_state_INIT(obmalloc) \
{ \
.pools = { \
.used = _obmalloc_pools_INIT(obmalloc.pools), \
}, \

View File

@ -64,6 +64,7 @@ extern void _PyAtExit_Fini(PyInterpreterState *interp);
extern void _PyThread_FiniType(PyInterpreterState *interp);
extern void _Py_Deepfreeze_Fini(void);
extern void _PyArg_Fini(void);
extern void _Py_FinalizeAllocatedBlocks(_PyRuntimeState *);
extern PyStatus _PyGILState_Init(PyInterpreterState *interp);
extern PyStatus _PyGILState_SetTstate(PyThreadState *tstate);

View File

@ -33,6 +33,13 @@ _Py_IsMainInterpreter(PyInterpreterState *interp)
return (interp == _PyInterpreterState_Main());
}
static inline int
_Py_IsMainInterpreterFinalizing(PyInterpreterState *interp)
{
return (_PyRuntimeState_GetFinalizing(interp->runtime) != NULL &&
interp == &interp->runtime->_main_interpreter);
}
static inline const PyConfig *
_Py_GetMainConfig(void)

View File

@ -21,7 +21,6 @@ extern "C" {
#include "pycore_pymem.h" // struct _pymem_allocators
#include "pycore_pyhash.h" // struct pyhash_runtime_state
#include "pycore_pythread.h" // struct _pythread_runtime_state
#include "pycore_obmalloc.h" // struct obmalloc_state
#include "pycore_signal.h" // struct _signals_runtime_state
#include "pycore_time.h" // struct _time_runtime_state
#include "pycore_tracemalloc.h" // struct _tracemalloc_runtime_state
@ -88,7 +87,7 @@ typedef struct pyruntimestate {
_Py_atomic_address _finalizing;
struct _pymem_allocators allocators;
struct _obmalloc_state obmalloc;
struct _obmalloc_global_state obmalloc;
struct pyhash_runtime_state pyhash_state;
struct _time_runtime_state time;
struct _pythread_runtime_state threads;

View File

@ -29,7 +29,7 @@ extern PyTypeObject _PyExc_MemoryError;
_pymem_allocators_debug_INIT, \
_pymem_allocators_obj_arena_INIT, \
}, \
.obmalloc = _obmalloc_state_INIT(runtime.obmalloc), \
.obmalloc = _obmalloc_global_state_INIT, \
.pyhash_state = pyhash_state_INIT, \
.signals = _signals_RUNTIME_INIT, \
.interpreters = { \
@ -93,6 +93,7 @@ extern PyTypeObject _PyExc_MemoryError;
{ \
.id_refcount = -1, \
.imports = IMPORTS_INIT, \
.obmalloc = _obmalloc_state_INIT(INTERP.obmalloc), \
.ceval = { \
.recursion_limit = Py_DEFAULT_RECURSION_LIMIT, \
}, \

View File

@ -1211,20 +1211,25 @@ class SubinterpreterTest(unittest.TestCase):
"""
import json
OBMALLOC = 1<<5
EXTENSIONS = 1<<8
THREADS = 1<<10
DAEMON_THREADS = 1<<11
FORK = 1<<15
EXEC = 1<<16
features = ['fork', 'exec', 'threads', 'daemon_threads', 'extensions']
features = ['obmalloc', 'fork', 'exec', 'threads', 'daemon_threads',
'extensions']
kwlist = [f'allow_{n}' for n in features]
kwlist[0] = 'use_main_obmalloc'
kwlist[-1] = 'check_multi_interp_extensions'
# expected to work
for config, expected in {
(True, True, True, True, True):
FORK | EXEC | THREADS | DAEMON_THREADS | EXTENSIONS,
(False, False, False, False, False): 0,
(False, False, True, False, True): THREADS | EXTENSIONS,
(True, True, True, True, True, True):
OBMALLOC | FORK | EXEC | THREADS | DAEMON_THREADS | EXTENSIONS,
(True, False, False, False, False, False): OBMALLOC,
(False, False, False, True, False, True): THREADS | EXTENSIONS,
}.items():
kwargs = dict(zip(kwlist, config))
expected = {
@ -1246,6 +1251,20 @@ class SubinterpreterTest(unittest.TestCase):
self.assertEqual(settings, expected)
# expected to fail
for config in [
(False, False, False, False, False, False),
]:
kwargs = dict(zip(kwlist, config))
with self.subTest(config):
script = textwrap.dedent(f'''
import _testinternalcapi
_testinternalcapi.get_interp_settings()
raise NotImplementedError('unreachable')
''')
with self.assertRaises(RuntimeError):
support.run_in_subinterp_with_config(script, **kwargs)
@unittest.skipIf(_testsinglephase is None, "test requires _testsinglephase module")
@unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()")
def test_overridden_setting_extensions_subinterp_check(self):
@ -1257,13 +1276,15 @@ class SubinterpreterTest(unittest.TestCase):
"""
import json
OBMALLOC = 1<<5
EXTENSIONS = 1<<8
THREADS = 1<<10
DAEMON_THREADS = 1<<11
FORK = 1<<15
EXEC = 1<<16
BASE_FLAGS = FORK | EXEC | THREADS | DAEMON_THREADS
BASE_FLAGS = OBMALLOC | FORK | EXEC | THREADS | DAEMON_THREADS
base_kwargs = {
'use_main_obmalloc': True,
'allow_fork': True,
'allow_exec': True,
'allow_threads': True,

View File

@ -1656,6 +1656,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
api=API_PYTHON, env=env)
def test_init_main_interpreter_settings(self):
OBMALLOC = 1<<5
EXTENSIONS = 1<<8
THREADS = 1<<10
DAEMON_THREADS = 1<<11
@ -1664,7 +1665,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
expected = {
# All optional features should be enabled.
'feature_flags':
FORK | EXEC | THREADS | DAEMON_THREADS,
OBMALLOC | FORK | EXEC | THREADS | DAEMON_THREADS,
}
out, err = self.run_embedded_interpreter(
'test_init_main_interpreter_settings',

View File

@ -1636,7 +1636,12 @@ class SubinterpImportTests(unittest.TestCase):
allow_exec=False,
allow_threads=True,
allow_daemon_threads=False,
# Isolation-related config values aren't included here.
)
ISOLATED = dict(
use_main_obmalloc=False,
)
NOT_ISOLATED = {k: not v for k, v in ISOLATED.items()}
@unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()")
def pipe(self):
@ -1669,6 +1674,7 @@ class SubinterpImportTests(unittest.TestCase):
def run_here(self, name, *,
check_singlephase_setting=False,
check_singlephase_override=None,
isolated=False,
):
"""
Try importing the named module in a subinterpreter.
@ -1689,6 +1695,7 @@ class SubinterpImportTests(unittest.TestCase):
kwargs = dict(
**self.RUN_KWARGS,
**(self.ISOLATED if isolated else self.NOT_ISOLATED),
check_multi_interp_extensions=check_singlephase_setting,
)
@ -1699,33 +1706,36 @@ class SubinterpImportTests(unittest.TestCase):
self.assertEqual(ret, 0)
return os.read(r, 100)
def check_compatible_here(self, name, *, strict=False):
def check_compatible_here(self, name, *, strict=False, isolated=False):
# Verify that the named module may be imported in a subinterpreter.
# (See run_here() for more info.)
out = self.run_here(name,
check_singlephase_setting=strict,
isolated=isolated,
)
self.assertEqual(out, b'okay')
def check_incompatible_here(self, name):
def check_incompatible_here(self, name, *, isolated=False):
# Differences from check_compatible_here():
# * verify that import fails
# * "strict" is always True
out = self.run_here(name,
check_singlephase_setting=True,
isolated=isolated,
)
self.assertEqual(
out.decode('utf-8'),
f'ImportError: module {name} does not support loading in subinterpreters',
)
def check_compatible_fresh(self, name, *, strict=False):
def check_compatible_fresh(self, name, *, strict=False, isolated=False):
# Differences from check_compatible_here():
# * subinterpreter in a new process
# * module has never been imported before in that process
# * this tests importing the module for the first time
kwargs = dict(
**self.RUN_KWARGS,
**(self.ISOLATED if isolated else self.NOT_ISOLATED),
check_multi_interp_extensions=strict,
)
_, out, err = script_helper.assert_python_ok('-c', textwrap.dedent(f'''
@ -1743,12 +1753,13 @@ class SubinterpImportTests(unittest.TestCase):
self.assertEqual(err, b'')
self.assertEqual(out, b'okay')
def check_incompatible_fresh(self, name):
def check_incompatible_fresh(self, name, *, isolated=False):
# Differences from check_compatible_fresh():
# * verify that import fails
# * "strict" is always True
kwargs = dict(
**self.RUN_KWARGS,
**(self.ISOLATED if isolated else self.NOT_ISOLATED),
check_multi_interp_extensions=True,
)
_, out, err = script_helper.assert_python_ok('-c', textwrap.dedent(f'''
@ -1854,6 +1865,14 @@ class SubinterpImportTests(unittest.TestCase):
with self.subTest('config: check disabled; override: disabled'):
check_compatible(False, -1)
def test_isolated_config(self):
module = 'threading'
require_pure_python(module)
with self.subTest(f'{module}: strict, not fresh'):
self.check_compatible_here(module, strict=True, isolated=True)
with self.subTest(f'{module}: strict, fresh'):
self.check_compatible_fresh(module, strict=True, isolated=True)
class TestSinglePhaseSnapshot(ModuleSnapshot):

View File

@ -1343,6 +1343,7 @@ class SubinterpThreadingTests(BaseTestCase):
import test.support
test.support.run_in_subinterp_with_config(
{subinterp_code!r},
use_main_obmalloc=True,
allow_fork=True,
allow_exec=True,
allow_threads={allowed},

View File

@ -1482,6 +1482,7 @@ static PyObject *
run_in_subinterp_with_config(PyObject *self, PyObject *args, PyObject *kwargs)
{
const char *code;
int use_main_obmalloc = -1;
int allow_fork = -1;
int allow_exec = -1;
int allow_threads = -1;
@ -1493,6 +1494,7 @@ run_in_subinterp_with_config(PyObject *self, PyObject *args, PyObject *kwargs)
PyCompilerFlags cflags = {0};
static char *kwlist[] = {"code",
"use_main_obmalloc",
"allow_fork",
"allow_exec",
"allow_threads",
@ -1500,12 +1502,17 @@ run_in_subinterp_with_config(PyObject *self, PyObject *args, PyObject *kwargs)
"check_multi_interp_extensions",
NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs,
"s$ppppp:run_in_subinterp_with_config", kwlist,
&code, &allow_fork, &allow_exec,
"s$pppppp:run_in_subinterp_with_config", kwlist,
&code, &use_main_obmalloc,
&allow_fork, &allow_exec,
&allow_threads, &allow_daemon_threads,
&check_multi_interp_extensions)) {
return NULL;
}
if (use_main_obmalloc < 0) {
PyErr_SetString(PyExc_ValueError, "missing use_main_obmalloc");
return NULL;
}
if (allow_fork < 0) {
PyErr_SetString(PyExc_ValueError, "missing allow_fork");
return NULL;
@ -1532,6 +1539,7 @@ run_in_subinterp_with_config(PyObject *self, PyObject *args, PyObject *kwargs)
PyThreadState_Swap(NULL);
const _PyInterpreterConfig config = {
.use_main_obmalloc = use_main_obmalloc,
.allow_fork = allow_fork,
.allow_exec = allow_exec,
.allow_threads = allow_threads,

View File

@ -145,7 +145,7 @@ _PyDebug_PrintTotalRefs(void) {
_PyRuntimeState *runtime = &_PyRuntime;
fprintf(stderr,
"[%zd refs, %zd blocks]\n",
get_global_reftotal(runtime), _Py_GetAllocatedBlocks());
get_global_reftotal(runtime), _Py_GetGlobalAllocatedBlocks());
/* It may be helpful to also print the "legacy" reftotal separately.
Likewise for the total for each interpreter. */
}

View File

@ -725,20 +725,51 @@ PyObject_Free(void *ptr)
static int running_on_valgrind = -1;
#endif
typedef struct _obmalloc_state OMState;
#define allarenas (_PyRuntime.obmalloc.mgmt.arenas)
#define maxarenas (_PyRuntime.obmalloc.mgmt.maxarenas)
#define unused_arena_objects (_PyRuntime.obmalloc.mgmt.unused_arena_objects)
#define usable_arenas (_PyRuntime.obmalloc.mgmt.usable_arenas)
#define nfp2lasta (_PyRuntime.obmalloc.mgmt.nfp2lasta)
#define narenas_currently_allocated (_PyRuntime.obmalloc.mgmt.narenas_currently_allocated)
#define ntimes_arena_allocated (_PyRuntime.obmalloc.mgmt.ntimes_arena_allocated)
#define narenas_highwater (_PyRuntime.obmalloc.mgmt.narenas_highwater)
#define raw_allocated_blocks (_PyRuntime.obmalloc.mgmt.raw_allocated_blocks)
static inline int
has_own_state(PyInterpreterState *interp)
{
return (_Py_IsMainInterpreter(interp) ||
!(interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) ||
_Py_IsMainInterpreterFinalizing(interp));
}
static inline OMState *
get_state(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
if (!has_own_state(interp)) {
interp = _PyInterpreterState_Main();
}
return &interp->obmalloc;
}
// These macros all rely on a local "state" variable.
#define usedpools (state->pools.used)
#define allarenas (state->mgmt.arenas)
#define maxarenas (state->mgmt.maxarenas)
#define unused_arena_objects (state->mgmt.unused_arena_objects)
#define usable_arenas (state->mgmt.usable_arenas)
#define nfp2lasta (state->mgmt.nfp2lasta)
#define narenas_currently_allocated (state->mgmt.narenas_currently_allocated)
#define ntimes_arena_allocated (state->mgmt.ntimes_arena_allocated)
#define narenas_highwater (state->mgmt.narenas_highwater)
#define raw_allocated_blocks (state->mgmt.raw_allocated_blocks)
Py_ssize_t
_Py_GetAllocatedBlocks(void)
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
{
#ifdef Py_DEBUG
assert(has_own_state(interp));
#else
if (!has_own_state(interp)) {
_Py_FatalErrorFunc(__func__,
"the interpreter doesn't have its own allocator");
}
#endif
OMState *state = &interp->obmalloc;
Py_ssize_t n = raw_allocated_blocks;
/* add up allocated blocks for used pools */
for (uint i = 0; i < maxarenas; ++i) {
@ -759,20 +790,100 @@ _Py_GetAllocatedBlocks(void)
return n;
}
void
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
{
if (has_own_state(interp)) {
Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
assert(has_own_state(interp) || leaked == 0);
interp->runtime->obmalloc.interpreter_leaks += leaked;
}
}
static Py_ssize_t get_num_global_allocated_blocks(_PyRuntimeState *);
/* We preserve the number of blockss leaked during runtime finalization,
so they can be reported if the runtime is initialized again. */
// XXX We don't lose any information by dropping this,
// so we should consider doing so.
static Py_ssize_t last_final_leaks = 0;
void
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *runtime)
{
last_final_leaks = get_num_global_allocated_blocks(runtime);
runtime->obmalloc.interpreter_leaks = 0;
}
static Py_ssize_t
get_num_global_allocated_blocks(_PyRuntimeState *runtime)
{
Py_ssize_t total = 0;
if (_PyRuntimeState_GetFinalizing(runtime) != NULL) {
PyInterpreterState *interp = _PyInterpreterState_Main();
if (interp == NULL) {
/* We are at the very end of runtime finalization.
We can't rely on finalizing->interp since that thread
state is probably already freed, so we don't worry
about it. */
assert(PyInterpreterState_Head() == NULL);
}
else {
assert(interp != NULL);
/* It is probably the last interpreter but not necessarily. */
assert(PyInterpreterState_Next(interp) == NULL);
total += _PyInterpreterState_GetAllocatedBlocks(interp);
}
}
else {
HEAD_LOCK(runtime);
PyInterpreterState *interp = PyInterpreterState_Head();
assert(interp != NULL);
#ifdef Py_DEBUG
int got_main = 0;
#endif
for (; interp != NULL; interp = PyInterpreterState_Next(interp)) {
#ifdef Py_DEBUG
if (_Py_IsMainInterpreter(interp)) {
assert(!got_main);
got_main = 1;
assert(has_own_state(interp));
}
#endif
if (has_own_state(interp)) {
total += _PyInterpreterState_GetAllocatedBlocks(interp);
}
}
HEAD_UNLOCK(runtime);
#ifdef Py_DEBUG
assert(got_main);
#endif
}
total += runtime->obmalloc.interpreter_leaks;
total += last_final_leaks;
return total;
}
Py_ssize_t
_Py_GetGlobalAllocatedBlocks(void)
{
return get_num_global_allocated_blocks(&_PyRuntime);
}
#if WITH_PYMALLOC_RADIX_TREE
/*==========================================================================*/
/* radix tree for tracking arena usage. */
#define arena_map_root (_PyRuntime.obmalloc.usage.arena_map_root)
#define arena_map_root (state->usage.arena_map_root)
#ifdef USE_INTERIOR_NODES
#define arena_map_mid_count (_PyRuntime.obmalloc.usage.arena_map_mid_count)
#define arena_map_bot_count (_PyRuntime.obmalloc.usage.arena_map_bot_count)
#define arena_map_mid_count (state->usage.arena_map_mid_count)
#define arena_map_bot_count (state->usage.arena_map_bot_count)
#endif
/* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
* it cannot be created */
static Py_ALWAYS_INLINE arena_map_bot_t *
arena_map_get(pymem_block *p, int create)
arena_map_get(OMState *state, pymem_block *p, int create)
{
#ifdef USE_INTERIOR_NODES
/* sanity check that IGNORE_BITS is correct */
@ -833,11 +944,12 @@ arena_map_get(pymem_block *p, int create)
/* mark or unmark addresses covered by arena */
static int
arena_map_mark_used(uintptr_t arena_base, int is_used)
arena_map_mark_used(OMState *state, uintptr_t arena_base, int is_used)
{
/* sanity check that IGNORE_BITS is correct */
assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
arena_map_bot_t *n_hi = arena_map_get((pymem_block *)arena_base, is_used);
arena_map_bot_t *n_hi = arena_map_get(
state, (pymem_block *)arena_base, is_used);
if (n_hi == NULL) {
assert(is_used); /* otherwise node should already exist */
return 0; /* failed to allocate space for node */
@ -862,7 +974,8 @@ arena_map_mark_used(uintptr_t arena_base, int is_used)
* must overflow to 0. However, that would mean arena_base was
* "ideal" and we should not be in this case. */
assert(arena_base < arena_base_next);
arena_map_bot_t *n_lo = arena_map_get((pymem_block *)arena_base_next, is_used);
arena_map_bot_t *n_lo = arena_map_get(
state, (pymem_block *)arena_base_next, is_used);
if (n_lo == NULL) {
assert(is_used); /* otherwise should already exist */
n_hi->arenas[i3].tail_hi = 0;
@ -877,9 +990,9 @@ arena_map_mark_used(uintptr_t arena_base, int is_used)
/* Return true if 'p' is a pointer inside an obmalloc arena.
* _PyObject_Free() calls this so it needs to be very fast. */
static int
arena_map_is_used(pymem_block *p)
arena_map_is_used(OMState *state, pymem_block *p)
{
arena_map_bot_t *n = arena_map_get(p, 0);
arena_map_bot_t *n = arena_map_get(state, p, 0);
if (n == NULL) {
return 0;
}
@ -902,7 +1015,7 @@ arena_map_is_used(pymem_block *p)
* `usable_arenas` to the return value.
*/
static struct arena_object*
new_arena(void)
new_arena(OMState *state)
{
struct arena_object* arenaobj;
uint excess; /* number of bytes above pool alignment */
@ -968,7 +1081,7 @@ new_arena(void)
address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
#if WITH_PYMALLOC_RADIX_TREE
if (address != NULL) {
if (!arena_map_mark_used((uintptr_t)address, 1)) {
if (!arena_map_mark_used(state, (uintptr_t)address, 1)) {
/* marking arena in radix tree failed, abort */
_PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
address = NULL;
@ -1011,9 +1124,9 @@ new_arena(void)
pymalloc. When the radix tree is used, 'poolp' is unused.
*/
static bool
address_in_range(void *p, poolp Py_UNUSED(pool))
address_in_range(OMState *state, void *p, poolp Py_UNUSED(pool))
{
return arena_map_is_used(p);
return arena_map_is_used(state, p);
}
#else
/*
@ -1094,7 +1207,7 @@ extremely desirable that it be this fast.
static bool _Py_NO_SANITIZE_ADDRESS
_Py_NO_SANITIZE_THREAD
_Py_NO_SANITIZE_MEMORY
address_in_range(void *p, poolp pool)
address_in_range(OMState *state, void *p, poolp pool)
{
// Since address_in_range may be reading from memory which was not allocated
// by Python, it is important that pool->arenaindex is read only once, as
@ -1111,8 +1224,6 @@ address_in_range(void *p, poolp pool)
/*==========================================================================*/
#define usedpools (_PyRuntime.obmalloc.pools.used)
// Called when freelist is exhausted. Extend the freelist if there is
// space for a block. Otherwise, remove this pool from usedpools.
static void
@ -1138,7 +1249,7 @@ pymalloc_pool_extend(poolp pool, uint size)
* This function takes new pool and allocate a block from it.
*/
static void*
allocate_from_new_pool(uint size)
allocate_from_new_pool(OMState *state, uint size)
{
/* There isn't a pool of the right size class immediately
* available: use a free pool.
@ -1150,7 +1261,7 @@ allocate_from_new_pool(uint size)
return NULL;
}
#endif
usable_arenas = new_arena();
usable_arenas = new_arena(state);
if (usable_arenas == NULL) {
return NULL;
}
@ -1274,7 +1385,7 @@ allocate_from_new_pool(uint size)
or when the max memory limit has been reached.
*/
static inline void*
pymalloc_alloc(void *Py_UNUSED(ctx), size_t nbytes)
pymalloc_alloc(OMState *state, void *Py_UNUSED(ctx), size_t nbytes)
{
#ifdef WITH_VALGRIND
if (UNLIKELY(running_on_valgrind == -1)) {
@ -1314,7 +1425,7 @@ pymalloc_alloc(void *Py_UNUSED(ctx), size_t nbytes)
/* There isn't a pool of the right size class immediately
* available: use a free pool.
*/
bp = allocate_from_new_pool(size);
bp = allocate_from_new_pool(state, size);
}
return (void *)bp;
@ -1324,7 +1435,8 @@ pymalloc_alloc(void *Py_UNUSED(ctx), size_t nbytes)
void *
_PyObject_Malloc(void *ctx, size_t nbytes)
{
void* ptr = pymalloc_alloc(ctx, nbytes);
OMState *state = get_state();
void* ptr = pymalloc_alloc(state, ctx, nbytes);
if (LIKELY(ptr != NULL)) {
return ptr;
}
@ -1343,7 +1455,8 @@ _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
size_t nbytes = nelem * elsize;
void* ptr = pymalloc_alloc(ctx, nbytes);
OMState *state = get_state();
void* ptr = pymalloc_alloc(state, ctx, nbytes);
if (LIKELY(ptr != NULL)) {
memset(ptr, 0, nbytes);
return ptr;
@ -1358,7 +1471,7 @@ _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
static void
insert_to_usedpool(poolp pool)
insert_to_usedpool(OMState *state, poolp pool)
{
assert(pool->ref.count > 0); /* else the pool is empty */
@ -1374,7 +1487,7 @@ insert_to_usedpool(poolp pool)
}
static void
insert_to_freepool(poolp pool)
insert_to_freepool(OMState *state, poolp pool)
{
poolp next = pool->nextpool;
poolp prev = pool->prevpool;
@ -1457,7 +1570,7 @@ insert_to_freepool(poolp pool)
#if WITH_PYMALLOC_RADIX_TREE
/* mark arena region as not under control of obmalloc */
arena_map_mark_used(ao->address, 0);
arena_map_mark_used(state, ao->address, 0);
#endif
/* Free the entire arena. */
@ -1544,7 +1657,7 @@ insert_to_freepool(poolp pool)
Return 1 if it was freed.
Return 0 if the block was not allocated by pymalloc_alloc(). */
static inline int
pymalloc_free(void *Py_UNUSED(ctx), void *p)
pymalloc_free(OMState *state, void *Py_UNUSED(ctx), void *p)
{
assert(p != NULL);
@ -1555,7 +1668,7 @@ pymalloc_free(void *Py_UNUSED(ctx), void *p)
#endif
poolp pool = POOL_ADDR(p);
if (UNLIKELY(!address_in_range(p, pool))) {
if (UNLIKELY(!address_in_range(state, p, pool))) {
return 0;
}
/* We allocated this address. */
@ -1579,7 +1692,7 @@ pymalloc_free(void *Py_UNUSED(ctx), void *p)
* targets optimal filling when several pools contain
* blocks of the same size class.
*/
insert_to_usedpool(pool);
insert_to_usedpool(state, pool);
return 1;
}
@ -1596,7 +1709,7 @@ pymalloc_free(void *Py_UNUSED(ctx), void *p)
* previously freed pools will be allocated later
* (being not referenced, they are perhaps paged out).
*/
insert_to_freepool(pool);
insert_to_freepool(state, pool);
return 1;
}
@ -1609,7 +1722,8 @@ _PyObject_Free(void *ctx, void *p)
return;
}
if (UNLIKELY(!pymalloc_free(ctx, p))) {
OMState *state = get_state();
if (UNLIKELY(!pymalloc_free(state, ctx, p))) {
/* pymalloc didn't allocate this address */
PyMem_RawFree(p);
raw_allocated_blocks--;
@ -1627,7 +1741,8 @@ _PyObject_Free(void *ctx, void *p)
Return 0 if pymalloc didn't allocated p. */
static int
pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes)
pymalloc_realloc(OMState *state, void *ctx,
void **newptr_p, void *p, size_t nbytes)
{
void *bp;
poolp pool;
@ -1643,7 +1758,7 @@ pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes)
#endif
pool = POOL_ADDR(p);
if (!address_in_range(p, pool)) {
if (!address_in_range(state, p, pool)) {
/* pymalloc is not managing this block.
If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
@ -1696,7 +1811,8 @@ _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
return _PyObject_Malloc(ctx, nbytes);
}
if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) {
OMState *state = get_state();
if (pymalloc_realloc(state, ctx, &ptr2, ptr, nbytes)) {
return ptr2;
}
@ -1710,11 +1826,29 @@ _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
* only be used by extensions that are compiled with pymalloc enabled. */
Py_ssize_t
_Py_GetAllocatedBlocks(void)
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
{
return 0;
}
Py_ssize_t
_Py_GetGlobalAllocatedBlocks(void)
{
return 0;
}
void
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
{
return;
}
void
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *Py_UNUSED(runtime))
{
return;
}
#endif /* WITH_PYMALLOC */
@ -2289,6 +2423,7 @@ _PyObject_DebugMallocStats(FILE *out)
if (!_PyMem_PymallocEnabled()) {
return 0;
}
OMState *state = get_state();
uint i;
const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;

View File

@ -547,11 +547,21 @@ pycore_init_runtime(_PyRuntimeState *runtime,
}
static void
static PyStatus
init_interp_settings(PyInterpreterState *interp, const _PyInterpreterConfig *config)
{
assert(interp->feature_flags == 0);
if (config->use_main_obmalloc) {
interp->feature_flags |= Py_RTFLAGS_USE_MAIN_OBMALLOC;
}
else if (!config->check_multi_interp_extensions) {
/* The reason: PyModuleDef.m_base.m_copy leaks objects between
interpreters. */
return _PyStatus_ERR("per-interpreter obmalloc does not support "
"single-phase init extension modules");
}
if (config->allow_fork) {
interp->feature_flags |= Py_RTFLAGS_FORK;
}
@ -570,6 +580,8 @@ init_interp_settings(PyInterpreterState *interp, const _PyInterpreterConfig *con
if (config->check_multi_interp_extensions) {
interp->feature_flags |= Py_RTFLAGS_MULTI_INTERP_EXTENSIONS;
}
return _PyStatus_OK();
}
@ -622,7 +634,10 @@ pycore_create_interpreter(_PyRuntimeState *runtime,
}
const _PyInterpreterConfig config = _PyInterpreterConfig_LEGACY_INIT;
init_interp_settings(interp, &config);
status = init_interp_settings(interp, &config);
if (_PyStatus_EXCEPTION(status)) {
return status;
}
PyThreadState *tstate = _PyThreadState_New(interp);
if (tstate == NULL) {
@ -1668,6 +1683,8 @@ finalize_interp_types(PyInterpreterState *interp)
_PyFloat_FiniType(interp);
_PyLong_FiniTypes(interp);
_PyThread_FiniType(interp);
// XXX fini collections module static types (_PyStaticType_Dealloc())
// XXX fini IO module static types (_PyStaticType_Dealloc())
_PyErr_FiniTypes(interp);
_PyTypes_FiniTypes(interp);
@ -1936,6 +1953,7 @@ Py_FinalizeEx(void)
}
_Py_FinalizeRefTotal(runtime);
#endif
_Py_FinalizeAllocatedBlocks(runtime);
#ifdef Py_TRACE_REFS
/* Display addresses (& refcnts) of all objects still alive.
@ -2036,7 +2054,10 @@ new_interpreter(PyThreadState **tstate_p, const _PyInterpreterConfig *config)
goto error;
}
init_interp_settings(interp, config);
status = init_interp_settings(interp, config);
if (_PyStatus_EXCEPTION(status)) {
goto error;
}
status = init_interp_create_gil(tstate);
if (_PyStatus_EXCEPTION(status)) {

View File

@ -671,6 +671,14 @@ init_interpreter(PyInterpreterState *interp,
assert(next != NULL || (interp == runtime->interpreters.main));
interp->next = next;
/* Initialize obmalloc, but only for subinterpreters,
since the main interpreter is initialized statically. */
if (interp != &runtime->_main_interpreter) {
poolp temp[OBMALLOC_USED_POOLS_SIZE] = \
_obmalloc_pools_INIT(interp->obmalloc.pools);
memcpy(&interp->obmalloc.pools.used, temp, sizeof(temp));
}
_PyEval_InitState(&interp->ceval, pending_lock);
_PyGC_InitState(&interp->gc);
PyConfig_InitPythonConfig(&interp->config);
@ -941,11 +949,12 @@ PyInterpreterState_Delete(PyInterpreterState *interp)
_PyEval_FiniState(&interp->ceval);
#ifdef Py_REF_DEBUG
// XXX This call should be done at the end of clear_interpreter(),
// XXX These two calls should be done at the end of clear_interpreter(),
// but currently some objects get decref'ed after that.
#ifdef Py_REF_DEBUG
_PyInterpreterState_FinalizeRefTotal(interp);
#endif
_PyInterpreterState_FinalizeAllocatedBlocks(interp);
HEAD_LOCK(runtime);
PyInterpreterState **p;
@ -2320,11 +2329,11 @@ _PyCrossInterpreterData_InitWithSize(_PyCrossInterpreterData *data,
// where it was allocated, so the interpreter is required.
assert(interp != NULL);
_PyCrossInterpreterData_Init(data, interp, NULL, obj, new_object);
data->data = PyMem_Malloc(size);
data->data = PyMem_RawMalloc(size);
if (data->data == NULL) {
return -1;
}
data->free = PyMem_Free;
data->free = PyMem_RawFree;
return 0;
}

View File

@ -1871,7 +1871,9 @@ static Py_ssize_t
sys_getallocatedblocks_impl(PyObject *module)
/*[clinic end generated code: output=f0c4e873f0b6dcf7 input=dab13ee346a0673e]*/
{
return _Py_GetAllocatedBlocks();
// It might make sense to return the count
// for just the current interpreter.
return _Py_GetGlobalAllocatedBlocks();
}
/*[clinic input]

View File

@ -309,6 +309,7 @@ Objects/obmalloc.c - _PyMem -
Objects/obmalloc.c - _PyMem_Debug -
Objects/obmalloc.c - _PyMem_Raw -
Objects/obmalloc.c - _PyObject -
Objects/obmalloc.c - last_final_leaks -
Objects/obmalloc.c - usedpools -
Objects/typeobject.c - name_op -
Objects/typeobject.c - slotdefs -

Can't render this file because it has a wrong number of fields in line 4.