From df3173d28ef25a0f97d2cca8cf4e64e062a08d06 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 24 Apr 2023 17:23:57 -0600 Subject: [PATCH] gh-101659: Isolate "obmalloc" State to Each Interpreter (gh-101660) This is strictly about moving the "obmalloc" runtime state from `_PyRuntimeState` to `PyInterpreterState`. Doing so improves isolation between interpreters, specifically most of the memory (incl. objects) allocated for each interpreter's use. This is important for a per-interpreter GIL, but such isolation is valuable even without it. FWIW, a per-interpreter obmalloc is the proverbial canary-in-the-coalmine when it comes to the isolation of objects between interpreters. Any object that leaks (unintentionally) to another interpreter is highly likely to cause a crash (on debug builds at least). That's a useful thing to know, relative to interpreter isolation. --- Include/cpython/initconfig.h | 4 + Include/cpython/pystate.h | 4 + Include/internal/pycore_interp.h | 5 +- Include/internal/pycore_obmalloc.h | 12 +- Include/internal/pycore_obmalloc_init.h | 6 +- Include/internal/pycore_pylifecycle.h | 1 + Include/internal/pycore_pystate.h | 7 + Include/internal/pycore_runtime.h | 3 +- Include/internal/pycore_runtime_init.h | 3 +- Lib/test/test_capi/test_misc.py | 33 +++- Lib/test/test_embed.py | 3 +- Lib/test/test_import/__init__.py | 27 ++- Lib/test/test_threading.py | 1 + Modules/_testcapimodule.c | 12 +- Objects/object.c | 2 +- Objects/obmalloc.c | 223 +++++++++++++++++++----- Python/pylifecycle.c | 27 ++- Python/pystate.c | 17 +- Python/sysmodule.c | 4 +- Tools/c-analyzer/cpython/ignored.tsv | 1 + 20 files changed, 322 insertions(+), 73 deletions(-) diff --git a/Include/cpython/initconfig.h b/Include/cpython/initconfig.h index 8bc681b1a93..79c1023baa9 100644 --- a/Include/cpython/initconfig.h +++ b/Include/cpython/initconfig.h @@ -245,6 +245,8 @@ PyAPI_FUNC(PyStatus) PyConfig_SetWideStringList(PyConfig *config, /* --- PyInterpreterConfig ------------------------------------ */ typedef struct { + // XXX "allow_object_sharing"? "own_objects"? + int use_main_obmalloc; int allow_fork; int allow_exec; int allow_threads; @@ -254,6 +256,7 @@ typedef struct { #define _PyInterpreterConfig_INIT \ { \ + .use_main_obmalloc = 0, \ .allow_fork = 0, \ .allow_exec = 0, \ .allow_threads = 1, \ @@ -263,6 +266,7 @@ typedef struct { #define _PyInterpreterConfig_LEGACY_INIT \ { \ + .use_main_obmalloc = 1, \ .allow_fork = 1, \ .allow_exec = 1, \ .allow_threads = 1, \ diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index ea6ed8d2bc4..f33c72d4cf4 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -11,6 +11,10 @@ is available in a given context. For example, forking the process might not be allowed in the current interpreter (i.e. os.fork() would fail). */ +/* Set if the interpreter share obmalloc runtime state + with the main interpreter. */ +#define Py_RTFLAGS_USE_MAIN_OBMALLOC (1UL << 5) + /* Set if import should check a module for subinterpreter support. */ #define Py_RTFLAGS_MULTI_INTERP_EXTENSIONS (1UL << 8) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 86ae3d8dfc1..7276ce35ba6 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -23,11 +23,12 @@ extern "C" { #include "pycore_function.h" // FUNC_MAX_WATCHERS #include "pycore_genobject.h" // struct _Py_async_gen_state #include "pycore_gc.h" // struct _gc_runtime_state +#include "pycore_global_objects.h" // struct _Py_interp_static_objects #include "pycore_import.h" // struct _import_state #include "pycore_instruments.h" // PY_MONITORING_EVENTS #include "pycore_list.h" // struct _Py_list_state -#include "pycore_global_objects.h" // struct _Py_interp_static_objects #include "pycore_object_state.h" // struct _py_object_state +#include "pycore_obmalloc.h" // struct obmalloc_state #include "pycore_tuple.h" // struct _Py_tuple_state #include "pycore_typeobject.h" // struct type_cache #include "pycore_unicodeobject.h" // struct _Py_unicode_state @@ -82,6 +83,8 @@ struct _is { int _initialized; int finalizing; + struct _obmalloc_state obmalloc; + struct _ceval_state ceval; struct _gc_runtime_state gc; diff --git a/Include/internal/pycore_obmalloc.h b/Include/internal/pycore_obmalloc.h index a5c7f4528f9..ca2a0419b4f 100644 --- a/Include/internal/pycore_obmalloc.h +++ b/Include/internal/pycore_obmalloc.h @@ -657,8 +657,12 @@ struct _obmalloc_usage { #endif /* WITH_PYMALLOC_RADIX_TREE */ -struct _obmalloc_state { +struct _obmalloc_global_state { int dump_debug_stats; + Py_ssize_t interpreter_leaks; +}; + +struct _obmalloc_state { struct _obmalloc_pools pools; struct _obmalloc_mgmt mgmt; struct _obmalloc_usage usage; @@ -675,7 +679,11 @@ void _PyObject_VirtualFree(void *, size_t size); /* This function returns the number of allocated memory blocks, regardless of size */ -PyAPI_FUNC(Py_ssize_t) _Py_GetAllocatedBlocks(void); +extern Py_ssize_t _Py_GetGlobalAllocatedBlocks(void); +#define _Py_GetAllocatedBlocks() \ + _Py_GetGlobalAllocatedBlocks() +extern Py_ssize_t _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *); +extern void _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *); #ifdef WITH_PYMALLOC diff --git a/Include/internal/pycore_obmalloc_init.h b/Include/internal/pycore_obmalloc_init.h index c9f197e72de..8ee72ff2d41 100644 --- a/Include/internal/pycore_obmalloc_init.h +++ b/Include/internal/pycore_obmalloc_init.h @@ -54,9 +54,13 @@ extern "C" { # error "NB_SMALL_SIZE_CLASSES should be less than 64" #endif -#define _obmalloc_state_INIT(obmalloc) \ +#define _obmalloc_global_state_INIT \ { \ .dump_debug_stats = -1, \ + } + +#define _obmalloc_state_INIT(obmalloc) \ + { \ .pools = { \ .used = _obmalloc_pools_INIT(obmalloc.pools), \ }, \ diff --git a/Include/internal/pycore_pylifecycle.h b/Include/internal/pycore_pylifecycle.h index a899e848bb8..f96261a650d 100644 --- a/Include/internal/pycore_pylifecycle.h +++ b/Include/internal/pycore_pylifecycle.h @@ -64,6 +64,7 @@ extern void _PyAtExit_Fini(PyInterpreterState *interp); extern void _PyThread_FiniType(PyInterpreterState *interp); extern void _Py_Deepfreeze_Fini(void); extern void _PyArg_Fini(void); +extern void _Py_FinalizeAllocatedBlocks(_PyRuntimeState *); extern PyStatus _PyGILState_Init(PyInterpreterState *interp); extern PyStatus _PyGILState_SetTstate(PyThreadState *tstate); diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index c40f9e7393a..180ea676bc2 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -33,6 +33,13 @@ _Py_IsMainInterpreter(PyInterpreterState *interp) return (interp == _PyInterpreterState_Main()); } +static inline int +_Py_IsMainInterpreterFinalizing(PyInterpreterState *interp) +{ + return (_PyRuntimeState_GetFinalizing(interp->runtime) != NULL && + interp == &interp->runtime->_main_interpreter); +} + static inline const PyConfig * _Py_GetMainConfig(void) diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h index 789a1fbc928..d1b165d0ab9 100644 --- a/Include/internal/pycore_runtime.h +++ b/Include/internal/pycore_runtime.h @@ -21,7 +21,6 @@ extern "C" { #include "pycore_pymem.h" // struct _pymem_allocators #include "pycore_pyhash.h" // struct pyhash_runtime_state #include "pycore_pythread.h" // struct _pythread_runtime_state -#include "pycore_obmalloc.h" // struct obmalloc_state #include "pycore_signal.h" // struct _signals_runtime_state #include "pycore_time.h" // struct _time_runtime_state #include "pycore_tracemalloc.h" // struct _tracemalloc_runtime_state @@ -88,7 +87,7 @@ typedef struct pyruntimestate { _Py_atomic_address _finalizing; struct _pymem_allocators allocators; - struct _obmalloc_state obmalloc; + struct _obmalloc_global_state obmalloc; struct pyhash_runtime_state pyhash_state; struct _time_runtime_state time; struct _pythread_runtime_state threads; diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 843f8a34410..a48461c0742 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -29,7 +29,7 @@ extern PyTypeObject _PyExc_MemoryError; _pymem_allocators_debug_INIT, \ _pymem_allocators_obj_arena_INIT, \ }, \ - .obmalloc = _obmalloc_state_INIT(runtime.obmalloc), \ + .obmalloc = _obmalloc_global_state_INIT, \ .pyhash_state = pyhash_state_INIT, \ .signals = _signals_RUNTIME_INIT, \ .interpreters = { \ @@ -93,6 +93,7 @@ extern PyTypeObject _PyExc_MemoryError; { \ .id_refcount = -1, \ .imports = IMPORTS_INIT, \ + .obmalloc = _obmalloc_state_INIT(INTERP.obmalloc), \ .ceval = { \ .recursion_limit = Py_DEFAULT_RECURSION_LIMIT, \ }, \ diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 637adc01a33..eab69300196 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -1211,20 +1211,25 @@ class SubinterpreterTest(unittest.TestCase): """ import json + OBMALLOC = 1<<5 EXTENSIONS = 1<<8 THREADS = 1<<10 DAEMON_THREADS = 1<<11 FORK = 1<<15 EXEC = 1<<16 - features = ['fork', 'exec', 'threads', 'daemon_threads', 'extensions'] + features = ['obmalloc', 'fork', 'exec', 'threads', 'daemon_threads', + 'extensions'] kwlist = [f'allow_{n}' for n in features] + kwlist[0] = 'use_main_obmalloc' kwlist[-1] = 'check_multi_interp_extensions' + + # expected to work for config, expected in { - (True, True, True, True, True): - FORK | EXEC | THREADS | DAEMON_THREADS | EXTENSIONS, - (False, False, False, False, False): 0, - (False, False, True, False, True): THREADS | EXTENSIONS, + (True, True, True, True, True, True): + OBMALLOC | FORK | EXEC | THREADS | DAEMON_THREADS | EXTENSIONS, + (True, False, False, False, False, False): OBMALLOC, + (False, False, False, True, False, True): THREADS | EXTENSIONS, }.items(): kwargs = dict(zip(kwlist, config)) expected = { @@ -1246,6 +1251,20 @@ class SubinterpreterTest(unittest.TestCase): self.assertEqual(settings, expected) + # expected to fail + for config in [ + (False, False, False, False, False, False), + ]: + kwargs = dict(zip(kwlist, config)) + with self.subTest(config): + script = textwrap.dedent(f''' + import _testinternalcapi + _testinternalcapi.get_interp_settings() + raise NotImplementedError('unreachable') + ''') + with self.assertRaises(RuntimeError): + support.run_in_subinterp_with_config(script, **kwargs) + @unittest.skipIf(_testsinglephase is None, "test requires _testsinglephase module") @unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()") def test_overridden_setting_extensions_subinterp_check(self): @@ -1257,13 +1276,15 @@ class SubinterpreterTest(unittest.TestCase): """ import json + OBMALLOC = 1<<5 EXTENSIONS = 1<<8 THREADS = 1<<10 DAEMON_THREADS = 1<<11 FORK = 1<<15 EXEC = 1<<16 - BASE_FLAGS = FORK | EXEC | THREADS | DAEMON_THREADS + BASE_FLAGS = OBMALLOC | FORK | EXEC | THREADS | DAEMON_THREADS base_kwargs = { + 'use_main_obmalloc': True, 'allow_fork': True, 'allow_exec': True, 'allow_threads': True, diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index e56d0db8627..f702ffb9990 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -1656,6 +1656,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): api=API_PYTHON, env=env) def test_init_main_interpreter_settings(self): + OBMALLOC = 1<<5 EXTENSIONS = 1<<8 THREADS = 1<<10 DAEMON_THREADS = 1<<11 @@ -1664,7 +1665,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): expected = { # All optional features should be enabled. 'feature_flags': - FORK | EXEC | THREADS | DAEMON_THREADS, + OBMALLOC | FORK | EXEC | THREADS | DAEMON_THREADS, } out, err = self.run_embedded_interpreter( 'test_init_main_interpreter_settings', diff --git a/Lib/test/test_import/__init__.py b/Lib/test/test_import/__init__.py index 66ae554f984..f206e52faf8 100644 --- a/Lib/test/test_import/__init__.py +++ b/Lib/test/test_import/__init__.py @@ -1636,7 +1636,12 @@ class SubinterpImportTests(unittest.TestCase): allow_exec=False, allow_threads=True, allow_daemon_threads=False, + # Isolation-related config values aren't included here. ) + ISOLATED = dict( + use_main_obmalloc=False, + ) + NOT_ISOLATED = {k: not v for k, v in ISOLATED.items()} @unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()") def pipe(self): @@ -1669,6 +1674,7 @@ class SubinterpImportTests(unittest.TestCase): def run_here(self, name, *, check_singlephase_setting=False, check_singlephase_override=None, + isolated=False, ): """ Try importing the named module in a subinterpreter. @@ -1689,6 +1695,7 @@ class SubinterpImportTests(unittest.TestCase): kwargs = dict( **self.RUN_KWARGS, + **(self.ISOLATED if isolated else self.NOT_ISOLATED), check_multi_interp_extensions=check_singlephase_setting, ) @@ -1699,33 +1706,36 @@ class SubinterpImportTests(unittest.TestCase): self.assertEqual(ret, 0) return os.read(r, 100) - def check_compatible_here(self, name, *, strict=False): + def check_compatible_here(self, name, *, strict=False, isolated=False): # Verify that the named module may be imported in a subinterpreter. # (See run_here() for more info.) out = self.run_here(name, check_singlephase_setting=strict, + isolated=isolated, ) self.assertEqual(out, b'okay') - def check_incompatible_here(self, name): + def check_incompatible_here(self, name, *, isolated=False): # Differences from check_compatible_here(): # * verify that import fails # * "strict" is always True out = self.run_here(name, check_singlephase_setting=True, + isolated=isolated, ) self.assertEqual( out.decode('utf-8'), f'ImportError: module {name} does not support loading in subinterpreters', ) - def check_compatible_fresh(self, name, *, strict=False): + def check_compatible_fresh(self, name, *, strict=False, isolated=False): # Differences from check_compatible_here(): # * subinterpreter in a new process # * module has never been imported before in that process # * this tests importing the module for the first time kwargs = dict( **self.RUN_KWARGS, + **(self.ISOLATED if isolated else self.NOT_ISOLATED), check_multi_interp_extensions=strict, ) _, out, err = script_helper.assert_python_ok('-c', textwrap.dedent(f''' @@ -1743,12 +1753,13 @@ class SubinterpImportTests(unittest.TestCase): self.assertEqual(err, b'') self.assertEqual(out, b'okay') - def check_incompatible_fresh(self, name): + def check_incompatible_fresh(self, name, *, isolated=False): # Differences from check_compatible_fresh(): # * verify that import fails # * "strict" is always True kwargs = dict( **self.RUN_KWARGS, + **(self.ISOLATED if isolated else self.NOT_ISOLATED), check_multi_interp_extensions=True, ) _, out, err = script_helper.assert_python_ok('-c', textwrap.dedent(f''' @@ -1854,6 +1865,14 @@ class SubinterpImportTests(unittest.TestCase): with self.subTest('config: check disabled; override: disabled'): check_compatible(False, -1) + def test_isolated_config(self): + module = 'threading' + require_pure_python(module) + with self.subTest(f'{module}: strict, not fresh'): + self.check_compatible_here(module, strict=True, isolated=True) + with self.subTest(f'{module}: strict, fresh'): + self.check_compatible_fresh(module, strict=True, isolated=True) + class TestSinglePhaseSnapshot(ModuleSnapshot): diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index a39a267b403..fdd74c37e26 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -1343,6 +1343,7 @@ class SubinterpThreadingTests(BaseTestCase): import test.support test.support.run_in_subinterp_with_config( {subinterp_code!r}, + use_main_obmalloc=True, allow_fork=True, allow_exec=True, allow_threads={allowed}, diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 30b2674d543..c1892f6fa0a 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1482,6 +1482,7 @@ static PyObject * run_in_subinterp_with_config(PyObject *self, PyObject *args, PyObject *kwargs) { const char *code; + int use_main_obmalloc = -1; int allow_fork = -1; int allow_exec = -1; int allow_threads = -1; @@ -1493,6 +1494,7 @@ run_in_subinterp_with_config(PyObject *self, PyObject *args, PyObject *kwargs) PyCompilerFlags cflags = {0}; static char *kwlist[] = {"code", + "use_main_obmalloc", "allow_fork", "allow_exec", "allow_threads", @@ -1500,12 +1502,17 @@ run_in_subinterp_with_config(PyObject *self, PyObject *args, PyObject *kwargs) "check_multi_interp_extensions", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "s$ppppp:run_in_subinterp_with_config", kwlist, - &code, &allow_fork, &allow_exec, + "s$pppppp:run_in_subinterp_with_config", kwlist, + &code, &use_main_obmalloc, + &allow_fork, &allow_exec, &allow_threads, &allow_daemon_threads, &check_multi_interp_extensions)) { return NULL; } + if (use_main_obmalloc < 0) { + PyErr_SetString(PyExc_ValueError, "missing use_main_obmalloc"); + return NULL; + } if (allow_fork < 0) { PyErr_SetString(PyExc_ValueError, "missing allow_fork"); return NULL; @@ -1532,6 +1539,7 @@ run_in_subinterp_with_config(PyObject *self, PyObject *args, PyObject *kwargs) PyThreadState_Swap(NULL); const _PyInterpreterConfig config = { + .use_main_obmalloc = use_main_obmalloc, .allow_fork = allow_fork, .allow_exec = allow_exec, .allow_threads = allow_threads, diff --git a/Objects/object.c b/Objects/object.c index a784e6bcbf9..65c296e9340 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -145,7 +145,7 @@ _PyDebug_PrintTotalRefs(void) { _PyRuntimeState *runtime = &_PyRuntime; fprintf(stderr, "[%zd refs, %zd blocks]\n", - get_global_reftotal(runtime), _Py_GetAllocatedBlocks()); + get_global_reftotal(runtime), _Py_GetGlobalAllocatedBlocks()); /* It may be helpful to also print the "legacy" reftotal separately. Likewise for the total for each interpreter. */ } diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 5e1bcda1d97..de62aeb0446 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -725,20 +725,51 @@ PyObject_Free(void *ptr) static int running_on_valgrind = -1; #endif +typedef struct _obmalloc_state OMState; -#define allarenas (_PyRuntime.obmalloc.mgmt.arenas) -#define maxarenas (_PyRuntime.obmalloc.mgmt.maxarenas) -#define unused_arena_objects (_PyRuntime.obmalloc.mgmt.unused_arena_objects) -#define usable_arenas (_PyRuntime.obmalloc.mgmt.usable_arenas) -#define nfp2lasta (_PyRuntime.obmalloc.mgmt.nfp2lasta) -#define narenas_currently_allocated (_PyRuntime.obmalloc.mgmt.narenas_currently_allocated) -#define ntimes_arena_allocated (_PyRuntime.obmalloc.mgmt.ntimes_arena_allocated) -#define narenas_highwater (_PyRuntime.obmalloc.mgmt.narenas_highwater) -#define raw_allocated_blocks (_PyRuntime.obmalloc.mgmt.raw_allocated_blocks) +static inline int +has_own_state(PyInterpreterState *interp) +{ + return (_Py_IsMainInterpreter(interp) || + !(interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) || + _Py_IsMainInterpreterFinalizing(interp)); +} + +static inline OMState * +get_state(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (!has_own_state(interp)) { + interp = _PyInterpreterState_Main(); + } + return &interp->obmalloc; +} + +// These macros all rely on a local "state" variable. +#define usedpools (state->pools.used) +#define allarenas (state->mgmt.arenas) +#define maxarenas (state->mgmt.maxarenas) +#define unused_arena_objects (state->mgmt.unused_arena_objects) +#define usable_arenas (state->mgmt.usable_arenas) +#define nfp2lasta (state->mgmt.nfp2lasta) +#define narenas_currently_allocated (state->mgmt.narenas_currently_allocated) +#define ntimes_arena_allocated (state->mgmt.ntimes_arena_allocated) +#define narenas_highwater (state->mgmt.narenas_highwater) +#define raw_allocated_blocks (state->mgmt.raw_allocated_blocks) Py_ssize_t -_Py_GetAllocatedBlocks(void) +_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp) { +#ifdef Py_DEBUG + assert(has_own_state(interp)); +#else + if (!has_own_state(interp)) { + _Py_FatalErrorFunc(__func__, + "the interpreter doesn't have its own allocator"); + } +#endif + OMState *state = &interp->obmalloc; + Py_ssize_t n = raw_allocated_blocks; /* add up allocated blocks for used pools */ for (uint i = 0; i < maxarenas; ++i) { @@ -759,20 +790,100 @@ _Py_GetAllocatedBlocks(void) return n; } +void +_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp) +{ + if (has_own_state(interp)) { + Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp); + assert(has_own_state(interp) || leaked == 0); + interp->runtime->obmalloc.interpreter_leaks += leaked; + } +} + +static Py_ssize_t get_num_global_allocated_blocks(_PyRuntimeState *); + +/* We preserve the number of blockss leaked during runtime finalization, + so they can be reported if the runtime is initialized again. */ +// XXX We don't lose any information by dropping this, +// so we should consider doing so. +static Py_ssize_t last_final_leaks = 0; + +void +_Py_FinalizeAllocatedBlocks(_PyRuntimeState *runtime) +{ + last_final_leaks = get_num_global_allocated_blocks(runtime); + runtime->obmalloc.interpreter_leaks = 0; +} + +static Py_ssize_t +get_num_global_allocated_blocks(_PyRuntimeState *runtime) +{ + Py_ssize_t total = 0; + if (_PyRuntimeState_GetFinalizing(runtime) != NULL) { + PyInterpreterState *interp = _PyInterpreterState_Main(); + if (interp == NULL) { + /* We are at the very end of runtime finalization. + We can't rely on finalizing->interp since that thread + state is probably already freed, so we don't worry + about it. */ + assert(PyInterpreterState_Head() == NULL); + } + else { + assert(interp != NULL); + /* It is probably the last interpreter but not necessarily. */ + assert(PyInterpreterState_Next(interp) == NULL); + total += _PyInterpreterState_GetAllocatedBlocks(interp); + } + } + else { + HEAD_LOCK(runtime); + PyInterpreterState *interp = PyInterpreterState_Head(); + assert(interp != NULL); +#ifdef Py_DEBUG + int got_main = 0; +#endif + for (; interp != NULL; interp = PyInterpreterState_Next(interp)) { +#ifdef Py_DEBUG + if (_Py_IsMainInterpreter(interp)) { + assert(!got_main); + got_main = 1; + assert(has_own_state(interp)); + } +#endif + if (has_own_state(interp)) { + total += _PyInterpreterState_GetAllocatedBlocks(interp); + } + } + HEAD_UNLOCK(runtime); +#ifdef Py_DEBUG + assert(got_main); +#endif + } + total += runtime->obmalloc.interpreter_leaks; + total += last_final_leaks; + return total; +} + +Py_ssize_t +_Py_GetGlobalAllocatedBlocks(void) +{ + return get_num_global_allocated_blocks(&_PyRuntime); +} + #if WITH_PYMALLOC_RADIX_TREE /*==========================================================================*/ /* radix tree for tracking arena usage. */ -#define arena_map_root (_PyRuntime.obmalloc.usage.arena_map_root) +#define arena_map_root (state->usage.arena_map_root) #ifdef USE_INTERIOR_NODES -#define arena_map_mid_count (_PyRuntime.obmalloc.usage.arena_map_mid_count) -#define arena_map_bot_count (_PyRuntime.obmalloc.usage.arena_map_bot_count) +#define arena_map_mid_count (state->usage.arena_map_mid_count) +#define arena_map_bot_count (state->usage.arena_map_bot_count) #endif /* Return a pointer to a bottom tree node, return NULL if it doesn't exist or * it cannot be created */ static Py_ALWAYS_INLINE arena_map_bot_t * -arena_map_get(pymem_block *p, int create) +arena_map_get(OMState *state, pymem_block *p, int create) { #ifdef USE_INTERIOR_NODES /* sanity check that IGNORE_BITS is correct */ @@ -833,11 +944,12 @@ arena_map_get(pymem_block *p, int create) /* mark or unmark addresses covered by arena */ static int -arena_map_mark_used(uintptr_t arena_base, int is_used) +arena_map_mark_used(OMState *state, uintptr_t arena_base, int is_used) { /* sanity check that IGNORE_BITS is correct */ assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root)); - arena_map_bot_t *n_hi = arena_map_get((pymem_block *)arena_base, is_used); + arena_map_bot_t *n_hi = arena_map_get( + state, (pymem_block *)arena_base, is_used); if (n_hi == NULL) { assert(is_used); /* otherwise node should already exist */ return 0; /* failed to allocate space for node */ @@ -862,7 +974,8 @@ arena_map_mark_used(uintptr_t arena_base, int is_used) * must overflow to 0. However, that would mean arena_base was * "ideal" and we should not be in this case. */ assert(arena_base < arena_base_next); - arena_map_bot_t *n_lo = arena_map_get((pymem_block *)arena_base_next, is_used); + arena_map_bot_t *n_lo = arena_map_get( + state, (pymem_block *)arena_base_next, is_used); if (n_lo == NULL) { assert(is_used); /* otherwise should already exist */ n_hi->arenas[i3].tail_hi = 0; @@ -877,9 +990,9 @@ arena_map_mark_used(uintptr_t arena_base, int is_used) /* Return true if 'p' is a pointer inside an obmalloc arena. * _PyObject_Free() calls this so it needs to be very fast. */ static int -arena_map_is_used(pymem_block *p) +arena_map_is_used(OMState *state, pymem_block *p) { - arena_map_bot_t *n = arena_map_get(p, 0); + arena_map_bot_t *n = arena_map_get(state, p, 0); if (n == NULL) { return 0; } @@ -902,7 +1015,7 @@ arena_map_is_used(pymem_block *p) * `usable_arenas` to the return value. */ static struct arena_object* -new_arena(void) +new_arena(OMState *state) { struct arena_object* arenaobj; uint excess; /* number of bytes above pool alignment */ @@ -968,7 +1081,7 @@ new_arena(void) address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE); #if WITH_PYMALLOC_RADIX_TREE if (address != NULL) { - if (!arena_map_mark_used((uintptr_t)address, 1)) { + if (!arena_map_mark_used(state, (uintptr_t)address, 1)) { /* marking arena in radix tree failed, abort */ _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE); address = NULL; @@ -1011,9 +1124,9 @@ new_arena(void) pymalloc. When the radix tree is used, 'poolp' is unused. */ static bool -address_in_range(void *p, poolp Py_UNUSED(pool)) +address_in_range(OMState *state, void *p, poolp Py_UNUSED(pool)) { - return arena_map_is_used(p); + return arena_map_is_used(state, p); } #else /* @@ -1094,7 +1207,7 @@ extremely desirable that it be this fast. static bool _Py_NO_SANITIZE_ADDRESS _Py_NO_SANITIZE_THREAD _Py_NO_SANITIZE_MEMORY -address_in_range(void *p, poolp pool) +address_in_range(OMState *state, void *p, poolp pool) { // Since address_in_range may be reading from memory which was not allocated // by Python, it is important that pool->arenaindex is read only once, as @@ -1111,8 +1224,6 @@ address_in_range(void *p, poolp pool) /*==========================================================================*/ -#define usedpools (_PyRuntime.obmalloc.pools.used) - // Called when freelist is exhausted. Extend the freelist if there is // space for a block. Otherwise, remove this pool from usedpools. static void @@ -1138,7 +1249,7 @@ pymalloc_pool_extend(poolp pool, uint size) * This function takes new pool and allocate a block from it. */ static void* -allocate_from_new_pool(uint size) +allocate_from_new_pool(OMState *state, uint size) { /* There isn't a pool of the right size class immediately * available: use a free pool. @@ -1150,7 +1261,7 @@ allocate_from_new_pool(uint size) return NULL; } #endif - usable_arenas = new_arena(); + usable_arenas = new_arena(state); if (usable_arenas == NULL) { return NULL; } @@ -1274,7 +1385,7 @@ allocate_from_new_pool(uint size) or when the max memory limit has been reached. */ static inline void* -pymalloc_alloc(void *Py_UNUSED(ctx), size_t nbytes) +pymalloc_alloc(OMState *state, void *Py_UNUSED(ctx), size_t nbytes) { #ifdef WITH_VALGRIND if (UNLIKELY(running_on_valgrind == -1)) { @@ -1314,7 +1425,7 @@ pymalloc_alloc(void *Py_UNUSED(ctx), size_t nbytes) /* There isn't a pool of the right size class immediately * available: use a free pool. */ - bp = allocate_from_new_pool(size); + bp = allocate_from_new_pool(state, size); } return (void *)bp; @@ -1324,7 +1435,8 @@ pymalloc_alloc(void *Py_UNUSED(ctx), size_t nbytes) void * _PyObject_Malloc(void *ctx, size_t nbytes) { - void* ptr = pymalloc_alloc(ctx, nbytes); + OMState *state = get_state(); + void* ptr = pymalloc_alloc(state, ctx, nbytes); if (LIKELY(ptr != NULL)) { return ptr; } @@ -1343,7 +1455,8 @@ _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize) assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize); size_t nbytes = nelem * elsize; - void* ptr = pymalloc_alloc(ctx, nbytes); + OMState *state = get_state(); + void* ptr = pymalloc_alloc(state, ctx, nbytes); if (LIKELY(ptr != NULL)) { memset(ptr, 0, nbytes); return ptr; @@ -1358,7 +1471,7 @@ _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize) static void -insert_to_usedpool(poolp pool) +insert_to_usedpool(OMState *state, poolp pool) { assert(pool->ref.count > 0); /* else the pool is empty */ @@ -1374,7 +1487,7 @@ insert_to_usedpool(poolp pool) } static void -insert_to_freepool(poolp pool) +insert_to_freepool(OMState *state, poolp pool) { poolp next = pool->nextpool; poolp prev = pool->prevpool; @@ -1457,7 +1570,7 @@ insert_to_freepool(poolp pool) #if WITH_PYMALLOC_RADIX_TREE /* mark arena region as not under control of obmalloc */ - arena_map_mark_used(ao->address, 0); + arena_map_mark_used(state, ao->address, 0); #endif /* Free the entire arena. */ @@ -1544,7 +1657,7 @@ insert_to_freepool(poolp pool) Return 1 if it was freed. Return 0 if the block was not allocated by pymalloc_alloc(). */ static inline int -pymalloc_free(void *Py_UNUSED(ctx), void *p) +pymalloc_free(OMState *state, void *Py_UNUSED(ctx), void *p) { assert(p != NULL); @@ -1555,7 +1668,7 @@ pymalloc_free(void *Py_UNUSED(ctx), void *p) #endif poolp pool = POOL_ADDR(p); - if (UNLIKELY(!address_in_range(p, pool))) { + if (UNLIKELY(!address_in_range(state, p, pool))) { return 0; } /* We allocated this address. */ @@ -1579,7 +1692,7 @@ pymalloc_free(void *Py_UNUSED(ctx), void *p) * targets optimal filling when several pools contain * blocks of the same size class. */ - insert_to_usedpool(pool); + insert_to_usedpool(state, pool); return 1; } @@ -1596,7 +1709,7 @@ pymalloc_free(void *Py_UNUSED(ctx), void *p) * previously freed pools will be allocated later * (being not referenced, they are perhaps paged out). */ - insert_to_freepool(pool); + insert_to_freepool(state, pool); return 1; } @@ -1609,7 +1722,8 @@ _PyObject_Free(void *ctx, void *p) return; } - if (UNLIKELY(!pymalloc_free(ctx, p))) { + OMState *state = get_state(); + if (UNLIKELY(!pymalloc_free(state, ctx, p))) { /* pymalloc didn't allocate this address */ PyMem_RawFree(p); raw_allocated_blocks--; @@ -1627,7 +1741,8 @@ _PyObject_Free(void *ctx, void *p) Return 0 if pymalloc didn't allocated p. */ static int -pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes) +pymalloc_realloc(OMState *state, void *ctx, + void **newptr_p, void *p, size_t nbytes) { void *bp; poolp pool; @@ -1643,7 +1758,7 @@ pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes) #endif pool = POOL_ADDR(p); - if (!address_in_range(p, pool)) { + if (!address_in_range(state, p, pool)) { /* pymalloc is not managing this block. If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take @@ -1696,7 +1811,8 @@ _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes) return _PyObject_Malloc(ctx, nbytes); } - if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) { + OMState *state = get_state(); + if (pymalloc_realloc(state, ctx, &ptr2, ptr, nbytes)) { return ptr2; } @@ -1710,11 +1826,29 @@ _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes) * only be used by extensions that are compiled with pymalloc enabled. */ Py_ssize_t -_Py_GetAllocatedBlocks(void) +_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp)) { return 0; } +Py_ssize_t +_Py_GetGlobalAllocatedBlocks(void) +{ + return 0; +} + +void +_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp)) +{ + return; +} + +void +_Py_FinalizeAllocatedBlocks(_PyRuntimeState *Py_UNUSED(runtime)) +{ + return; +} + #endif /* WITH_PYMALLOC */ @@ -2289,6 +2423,7 @@ _PyObject_DebugMallocStats(FILE *out) if (!_PyMem_PymallocEnabled()) { return 0; } + OMState *state = get_state(); uint i; const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT; diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index a510c9b2216..ebf1a0bff54 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -547,11 +547,21 @@ pycore_init_runtime(_PyRuntimeState *runtime, } -static void +static PyStatus init_interp_settings(PyInterpreterState *interp, const _PyInterpreterConfig *config) { assert(interp->feature_flags == 0); + if (config->use_main_obmalloc) { + interp->feature_flags |= Py_RTFLAGS_USE_MAIN_OBMALLOC; + } + else if (!config->check_multi_interp_extensions) { + /* The reason: PyModuleDef.m_base.m_copy leaks objects between + interpreters. */ + return _PyStatus_ERR("per-interpreter obmalloc does not support " + "single-phase init extension modules"); + } + if (config->allow_fork) { interp->feature_flags |= Py_RTFLAGS_FORK; } @@ -570,6 +580,8 @@ init_interp_settings(PyInterpreterState *interp, const _PyInterpreterConfig *con if (config->check_multi_interp_extensions) { interp->feature_flags |= Py_RTFLAGS_MULTI_INTERP_EXTENSIONS; } + + return _PyStatus_OK(); } @@ -622,7 +634,10 @@ pycore_create_interpreter(_PyRuntimeState *runtime, } const _PyInterpreterConfig config = _PyInterpreterConfig_LEGACY_INIT; - init_interp_settings(interp, &config); + status = init_interp_settings(interp, &config); + if (_PyStatus_EXCEPTION(status)) { + return status; + } PyThreadState *tstate = _PyThreadState_New(interp); if (tstate == NULL) { @@ -1668,6 +1683,8 @@ finalize_interp_types(PyInterpreterState *interp) _PyFloat_FiniType(interp); _PyLong_FiniTypes(interp); _PyThread_FiniType(interp); + // XXX fini collections module static types (_PyStaticType_Dealloc()) + // XXX fini IO module static types (_PyStaticType_Dealloc()) _PyErr_FiniTypes(interp); _PyTypes_FiniTypes(interp); @@ -1936,6 +1953,7 @@ Py_FinalizeEx(void) } _Py_FinalizeRefTotal(runtime); #endif + _Py_FinalizeAllocatedBlocks(runtime); #ifdef Py_TRACE_REFS /* Display addresses (& refcnts) of all objects still alive. @@ -2036,7 +2054,10 @@ new_interpreter(PyThreadState **tstate_p, const _PyInterpreterConfig *config) goto error; } - init_interp_settings(interp, config); + status = init_interp_settings(interp, config); + if (_PyStatus_EXCEPTION(status)) { + goto error; + } status = init_interp_create_gil(tstate); if (_PyStatus_EXCEPTION(status)) { diff --git a/Python/pystate.c b/Python/pystate.c index d108cfc7e50..b2ef7e2ddde 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -671,6 +671,14 @@ init_interpreter(PyInterpreterState *interp, assert(next != NULL || (interp == runtime->interpreters.main)); interp->next = next; + /* Initialize obmalloc, but only for subinterpreters, + since the main interpreter is initialized statically. */ + if (interp != &runtime->_main_interpreter) { + poolp temp[OBMALLOC_USED_POOLS_SIZE] = \ + _obmalloc_pools_INIT(interp->obmalloc.pools); + memcpy(&interp->obmalloc.pools.used, temp, sizeof(temp)); + } + _PyEval_InitState(&interp->ceval, pending_lock); _PyGC_InitState(&interp->gc); PyConfig_InitPythonConfig(&interp->config); @@ -941,11 +949,12 @@ PyInterpreterState_Delete(PyInterpreterState *interp) _PyEval_FiniState(&interp->ceval); -#ifdef Py_REF_DEBUG - // XXX This call should be done at the end of clear_interpreter(), + // XXX These two calls should be done at the end of clear_interpreter(), // but currently some objects get decref'ed after that. +#ifdef Py_REF_DEBUG _PyInterpreterState_FinalizeRefTotal(interp); #endif + _PyInterpreterState_FinalizeAllocatedBlocks(interp); HEAD_LOCK(runtime); PyInterpreterState **p; @@ -2320,11 +2329,11 @@ _PyCrossInterpreterData_InitWithSize(_PyCrossInterpreterData *data, // where it was allocated, so the interpreter is required. assert(interp != NULL); _PyCrossInterpreterData_Init(data, interp, NULL, obj, new_object); - data->data = PyMem_Malloc(size); + data->data = PyMem_RawMalloc(size); if (data->data == NULL) { return -1; } - data->free = PyMem_Free; + data->free = PyMem_RawFree; return 0; } diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 1e42e8dfceb..58ed48859b5 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1871,7 +1871,9 @@ static Py_ssize_t sys_getallocatedblocks_impl(PyObject *module) /*[clinic end generated code: output=f0c4e873f0b6dcf7 input=dab13ee346a0673e]*/ { - return _Py_GetAllocatedBlocks(); + // It might make sense to return the count + // for just the current interpreter. + return _Py_GetGlobalAllocatedBlocks(); } /*[clinic input] diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index a8ba88efc73..7a5d7d45f51 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -309,6 +309,7 @@ Objects/obmalloc.c - _PyMem - Objects/obmalloc.c - _PyMem_Debug - Objects/obmalloc.c - _PyMem_Raw - Objects/obmalloc.c - _PyObject - +Objects/obmalloc.c - last_final_leaks - Objects/obmalloc.c - usedpools - Objects/typeobject.c - name_op - Objects/typeobject.c - slotdefs -