From 2ebc5ce42a8a9e047e790aefbf9a94811569b2b6 Mon Sep 17 00:00:00 2001
From: Eric Snow <ericsnowcurrently@gmail.com>
Date: Thu, 7 Sep 2017 23:51:28 -0600
Subject: [PATCH] bpo-30860: Consolidate stateful runtime globals. (#3397)

* group the (stateful) runtime globals into various topical structs
* consolidate the topical structs under a single top-level _PyRuntimeState struct
* add a check-c-globals.py script that helps identify runtime globals

Other globals are excluded (see globals.txt and check-c-globals.py).
---
 Include/ceval.h                               |   5 +
 Include/internal/ceval.h                      |  53 ++
 Include/internal/condvar.h                    |  91 +++
 Include/internal/gil.h                        |  46 ++
 Include/internal/mem.h                        | 197 +++++
 Include/internal/pymalloc.h                   | 443 ++++++++++
 Include/internal/pystate.h                    |  92 +++
 Include/internal/warnings.h                   |  21 +
 Include/object.h                              |   2 -
 Include/pylifecycle.h                         |   2 +-
 Include/pystate.h                             |  30 +-
 Makefile.pre.in                               |   6 +
 .../2017-09-05-13-47-49.bpo-30860.MROpZw.rst  |   2 +
 Modules/_functoolsmodule.c                    |   2 +
 Modules/_io/bufferedio.c                      |   3 +-
 Modules/_json.c                               |   7 +
 Modules/_pickle.c                             |   7 +
 Modules/_threadmodule.c                       |  12 +-
 Modules/_winapi.c                             |   2 +-
 Modules/gcmodule.c                            | 309 +++----
 Modules/main.c                                |   8 +-
 Modules/zipimport.c                           |   1 +
 Objects/abstract.c                            |   1 +
 Objects/bytearrayobject.c                     |   2 +
 Objects/bytesobject.c                         |   2 +
 Objects/call.c                                |   1 +
 Objects/cellobject.c                          |   2 +
 Objects/classobject.c                         |   2 +
 Objects/descrobject.c                         |   1 +
 Objects/dictobject.c                          |   1 +
 Objects/exceptions.c                          |   2 +
 Objects/frameobject.c                         |   1 +
 Objects/funcobject.c                          |   2 +
 Objects/genobject.c                           |   1 +
 Objects/iterobject.c                          |   2 +
 Objects/listobject.c                          |   1 +
 Objects/memoryobject.c                        |   2 +
 Objects/methodobject.c                        |   2 +
 Objects/moduleobject.c                        |   1 +
 Objects/object.c                              |  23 +-
 Objects/obmalloc.c                            | 772 ++++--------------
 Objects/odictobject.c                         |   1 +
 Objects/setobject.c                           |   1 +
 Objects/sliceobject.c                         |   2 +
 Objects/tupleobject.c                         |   1 +
 Objects/typeobject.c                          |  11 +-
 Objects/unicodeobject.c                       |   1 +
 PC/pyconfig.h                                 |  26 +-
 PCbuild/pythoncore.vcxproj                    |   7 +
 PCbuild/pythoncore.vcxproj.filters            |  21 +
 Parser/myreadline.c                           |   1 +
 Parser/pgenmain.c                             |   5 +
 Python/_warnings.c                            |  80 +-
 Python/ceval.c                                | 156 ++--
 Python/ceval_gil.h                            | 157 ++--
 Python/codecs.c                               |   1 +
 Python/condvar.h                              |  75 +-
 Python/dynload_shlib.c                        |   1 +
 Python/errors.c                               |   1 +
 Python/import.c                               |   1 +
 Python/pylifecycle.c                          |  95 ++-
 Python/pystate.c                              | 179 ++--
 Python/pythonrun.c                            |   1 +
 Python/symtable.c                             |   4 +
 Python/sysmodule.c                            |  60 +-
 Python/thread.c                               |   8 +-
 Python/thread_nt.h                            |   9 +-
 Python/thread_pthread.h                       |   9 +-
 Python/traceback.c                            |   1 +
 Tools/c-globals/README                        |  41 +
 Tools/c-globals/check-c-globals.py            | 446 ++++++++++
 Tools/c-globals/ignored-globals.txt           | 494 +++++++++++
 72 files changed, 2746 insertions(+), 1312 deletions(-)
 create mode 100644 Include/internal/ceval.h
 create mode 100644 Include/internal/condvar.h
 create mode 100644 Include/internal/gil.h
 create mode 100644 Include/internal/mem.h
 create mode 100644 Include/internal/pymalloc.h
 create mode 100644 Include/internal/pystate.h
 create mode 100644 Include/internal/warnings.h
 create mode 100644 Misc/NEWS.d/next/Core and Builtins/2017-09-05-13-47-49.bpo-30860.MROpZw.rst
 create mode 100644 Tools/c-globals/README
 create mode 100644 Tools/c-globals/check-c-globals.py
 create mode 100644 Tools/c-globals/ignored-globals.txt

diff --git a/Include/ceval.h b/Include/ceval.h
index 2028a9ef5bc..c5ccf473c17 100644
--- a/Include/ceval.h
+++ b/Include/ceval.h
@@ -93,6 +93,11 @@ PyAPI_FUNC(int) Py_GetRecursionLimit(void);
       PyThreadState_GET()->overflowed = 0;  \
     } while(0)
 PyAPI_FUNC(int) _Py_CheckRecursiveCall(const char *where);
+/* XXX _Py_CheckRecursionLimit should be changed to
+   _PyRuntime.ceval.check_recursion_limit.  However, due to the macros
+   in which it's used, _Py_CheckRecursionLimit is stuck in the stable
+   ABI.  It should be removed therefrom when possible.
+*/
 PyAPI_DATA(int) _Py_CheckRecursionLimit;
 
 #ifdef USE_STACKCHECK
diff --git a/Include/internal/ceval.h b/Include/internal/ceval.h
new file mode 100644
index 00000000000..57db9b1ebc7
--- /dev/null
+++ b/Include/internal/ceval.h
@@ -0,0 +1,53 @@
+#ifndef Py_INTERNAL_CEVAL_H
+#define Py_INTERNAL_CEVAL_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "pyatomic.h"
+#include "pythread.h"
+
+struct _pending_calls {
+    unsigned long main_thread;
+    PyThread_type_lock lock;
+    /* Request for running pending calls. */
+    _Py_atomic_int calls_to_do;
+    /* Request for looking at the `async_exc` field of the current
+       thread state.
+       Guarded by the GIL. */
+    int async_exc;
+#define NPENDINGCALLS 32
+    struct {
+        int (*func)(void *);
+        void *arg;
+    } calls[NPENDINGCALLS];
+    int first;
+    int last;
+};
+
+#include "internal/gil.h"
+
+struct _ceval_runtime_state {
+    int recursion_limit;
+    int check_recursion_limit;
+    /* Records whether tracing is on for any thread.  Counts the number
+       of threads for which tstate->c_tracefunc is non-NULL, so if the
+       value is 0, we know we don't have to check this thread's
+       c_tracefunc.  This speeds up the if statement in
+       PyEval_EvalFrameEx() after fast_next_opcode. */
+    int tracing_possible;
+    /* This single variable consolidates all requests to break out of
+       the fast path in the eval loop. */
+    _Py_atomic_int eval_breaker;
+    /* Request for dropping the GIL */
+    _Py_atomic_int gil_drop_request;
+    struct _pending_calls pending;
+    struct _gil_runtime_state gil;
+};
+
+PyAPI_FUNC(void) _PyEval_Initialize(struct _ceval_runtime_state *);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_INTERNAL_CEVAL_H */
diff --git a/Include/internal/condvar.h b/Include/internal/condvar.h
new file mode 100644
index 00000000000..f9330890d3e
--- /dev/null
+++ b/Include/internal/condvar.h
@@ -0,0 +1,91 @@
+#ifndef Py_INTERNAL_CONDVAR_H
+#define Py_INTERNAL_CONDVAR_H
+
+#ifndef _POSIX_THREADS
+/* This means pthreads are not implemented in libc headers, hence the macro
+   not present in unistd.h. But they still can be implemented as an external
+   library (e.g. gnu pth in pthread emulation) */
+# ifdef HAVE_PTHREAD_H
+#  include <pthread.h> /* _POSIX_THREADS */
+# endif
+#endif
+
+#ifdef _POSIX_THREADS
+/*
+ * POSIX support
+ */
+#define Py_HAVE_CONDVAR
+
+#include <pthread.h>
+
+#define PyMUTEX_T pthread_mutex_t
+#define PyCOND_T pthread_cond_t
+
+#elif defined(NT_THREADS)
+/*
+ * Windows (XP, 2003 server and later, as well as (hopefully) CE) support
+ *
+ * Emulated condition variables ones that work with XP and later, plus
+ * example native support on VISTA and onwards.
+ */
+#define Py_HAVE_CONDVAR
+
+/* include windows if it hasn't been done before */
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+/* options */
+/* non-emulated condition variables are provided for those that want
+ * to target Windows Vista.  Modify this macro to enable them.
+ */
+#ifndef _PY_EMULATED_WIN_CV
+#define _PY_EMULATED_WIN_CV 1  /* use emulated condition variables */
+#endif
+
+/* fall back to emulation if not targeting Vista */
+#if !defined NTDDI_VISTA || NTDDI_VERSION < NTDDI_VISTA
+#undef _PY_EMULATED_WIN_CV
+#define _PY_EMULATED_WIN_CV 1
+#endif
+
+#if _PY_EMULATED_WIN_CV
+
+typedef CRITICAL_SECTION PyMUTEX_T;
+
+/* The ConditionVariable object.  From XP onwards it is easily emulated
+   with a Semaphore.
+   Semaphores are available on Windows XP (2003 server) and later.
+   We use a Semaphore rather than an auto-reset event, because although
+   an auto-resent event might appear to solve the lost-wakeup bug (race
+   condition between releasing the outer lock and waiting) because it
+   maintains state even though a wait hasn't happened, there is still
+   a lost wakeup problem if more than one thread are interrupted in the
+   critical place.  A semaphore solves that, because its state is
+   counted, not Boolean.
+   Because it is ok to signal a condition variable with no one
+   waiting, we need to keep track of the number of
+   waiting threads.  Otherwise, the semaphore's state could rise
+   without bound.  This also helps reduce the number of "spurious wakeups"
+   that would otherwise happen.
+ */
+
+typedef struct _PyCOND_T
+{
+    HANDLE sem;
+    int waiting; /* to allow PyCOND_SIGNAL to be a no-op */
+} PyCOND_T;
+
+#else /* !_PY_EMULATED_WIN_CV */
+
+/* Use native Win7 primitives if build target is Win7 or higher */
+
+/* SRWLOCK is faster and better than CriticalSection */
+typedef SRWLOCK PyMUTEX_T;
+
+typedef CONDITION_VARIABLE  PyCOND_T;
+
+#endif /* _PY_EMULATED_WIN_CV */
+
+#endif /* _POSIX_THREADS, NT_THREADS */
+
+#endif /* Py_INTERNAL_CONDVAR_H */
diff --git a/Include/internal/gil.h b/Include/internal/gil.h
new file mode 100644
index 00000000000..6139bd215c3
--- /dev/null
+++ b/Include/internal/gil.h
@@ -0,0 +1,46 @@
+#ifndef Py_INTERNAL_GIL_H
+#define Py_INTERNAL_GIL_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "pyatomic.h"
+
+#include "internal/condvar.h"
+#ifndef Py_HAVE_CONDVAR
+#error You need either a POSIX-compatible or a Windows system!
+#endif
+
+/* Enable if you want to force the switching of threads at least
+   every `interval`. */
+#undef FORCE_SWITCHING
+#define FORCE_SWITCHING
+
+struct _gil_runtime_state {
+    /* microseconds (the Python API uses seconds, though) */
+    unsigned long interval;
+    /* Last PyThreadState holding / having held the GIL. This helps us
+       know whether anyone else was scheduled after we dropped the GIL. */
+    _Py_atomic_address last_holder;
+    /* Whether the GIL is already taken (-1 if uninitialized). This is
+       atomic because it can be read without any lock taken in ceval.c. */
+    _Py_atomic_int locked;
+    /* Number of GIL switches since the beginning. */
+    unsigned long switch_number;
+    /* This condition variable allows one or several threads to wait
+       until the GIL is released. In addition, the mutex also protects
+       the above variables. */
+    PyCOND_T cond;
+    PyMUTEX_T mutex;
+#ifdef FORCE_SWITCHING
+    /* This condition variable helps the GIL-releasing thread wait for
+       a GIL-awaiting thread to be scheduled and take the GIL. */
+    PyCOND_T switch_cond;
+    PyMUTEX_T switch_mutex;
+#endif
+};
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_INTERNAL_GIL_H */
diff --git a/Include/internal/mem.h b/Include/internal/mem.h
new file mode 100644
index 00000000000..1624f378f67
--- /dev/null
+++ b/Include/internal/mem.h
@@ -0,0 +1,197 @@
+#ifndef Py_INTERNAL_MEM_H
+#define Py_INTERNAL_MEM_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "objimpl.h"
+#include "pymem.h"
+
+#ifdef WITH_PYMALLOC
+#include "internal/pymalloc.h"
+#endif
+
+/* Low-level memory runtime state */
+
+struct _pymem_runtime_state {
+    struct _allocator_runtime_state {
+        PyMemAllocatorEx mem;
+        PyMemAllocatorEx obj;
+        PyMemAllocatorEx raw;
+    } allocators;
+#ifdef WITH_PYMALLOC
+    /* Array of objects used to track chunks of memory (arenas). */
+    struct arena_object* arenas;
+    /* The head of the singly-linked, NULL-terminated list of available
+       arena_objects. */
+    struct arena_object* unused_arena_objects;
+    /* The head of the doubly-linked, NULL-terminated at each end,
+       list of arena_objects associated with arenas that have pools
+       available. */
+    struct arena_object* usable_arenas;
+    /* Number of slots currently allocated in the `arenas` vector. */
+    unsigned int maxarenas;
+    /* Number of arenas allocated that haven't been free()'d. */
+    size_t narenas_currently_allocated;
+    /* High water mark (max value ever seen) for
+     * narenas_currently_allocated. */
+    size_t narenas_highwater;
+    /* Total number of times malloc() called to allocate an arena. */
+    size_t ntimes_arena_allocated;
+    poolp usedpools[MAX_POOLS];
+    Py_ssize_t num_allocated_blocks;
+    size_t serialno;     /* incremented on each debug {m,re}alloc */
+#endif /* WITH_PYMALLOC */
+};
+
+PyAPI_FUNC(void) _PyMem_Initialize(struct _pymem_runtime_state *);
+
+
+/* High-level memory runtime state */
+
+struct _pyobj_runtime_state {
+    PyObjectArenaAllocator allocator_arenas;
+};
+
+PyAPI_FUNC(void) _PyObject_Initialize(struct _pyobj_runtime_state *);
+
+
+/* GC runtime state */
+
+/* If we change this, we need to change the default value in the
+   signature of gc.collect. */
+#define NUM_GENERATIONS 3
+
+/*
+   NOTE: about the counting of long-lived objects.
+
+   To limit the cost of garbage collection, there are two strategies;
+     - make each collection faster, e.g. by scanning fewer objects
+     - do less collections
+   This heuristic is about the latter strategy.
+
+   In addition to the various configurable thresholds, we only trigger a
+   full collection if the ratio
+    long_lived_pending / long_lived_total
+   is above a given value (hardwired to 25%).
+
+   The reason is that, while "non-full" collections (i.e., collections of
+   the young and middle generations) will always examine roughly the same
+   number of objects -- determined by the aforementioned thresholds --,
+   the cost of a full collection is proportional to the total number of
+   long-lived objects, which is virtually unbounded.
+
+   Indeed, it has been remarked that doing a full collection every
+   <constant number> of object creations entails a dramatic performance
+   degradation in workloads which consist in creating and storing lots of
+   long-lived objects (e.g. building a large list of GC-tracked objects would
+   show quadratic performance, instead of linear as expected: see issue #4074).
+
+   Using the above ratio, instead, yields amortized linear performance in
+   the total number of objects (the effect of which can be summarized
+   thusly: "each full garbage collection is more and more costly as the
+   number of objects grows, but we do fewer and fewer of them").
+
+   This heuristic was suggested by Martin von Löwis on python-dev in
+   June 2008. His original analysis and proposal can be found at:
+    http://mail.python.org/pipermail/python-dev/2008-June/080579.html
+*/
+
+/*
+   NOTE: about untracking of mutable objects.
+
+   Certain types of container cannot participate in a reference cycle, and
+   so do not need to be tracked by the garbage collector. Untracking these
+   objects reduces the cost of garbage collections. However, determining
+   which objects may be untracked is not free, and the costs must be
+   weighed against the benefits for garbage collection.
+
+   There are two possible strategies for when to untrack a container:
+
+   i) When the container is created.
+   ii) When the container is examined by the garbage collector.
+
+   Tuples containing only immutable objects (integers, strings etc, and
+   recursively, tuples of immutable objects) do not need to be tracked.
+   The interpreter creates a large number of tuples, many of which will
+   not survive until garbage collection. It is therefore not worthwhile
+   to untrack eligible tuples at creation time.
+
+   Instead, all tuples except the empty tuple are tracked when created.
+   During garbage collection it is determined whether any surviving tuples
+   can be untracked. A tuple can be untracked if all of its contents are
+   already not tracked. Tuples are examined for untracking in all garbage
+   collection cycles. It may take more than one cycle to untrack a tuple.
+
+   Dictionaries containing only immutable objects also do not need to be
+   tracked. Dictionaries are untracked when created. If a tracked item is
+   inserted into a dictionary (either as a key or value), the dictionary
+   becomes tracked. During a full garbage collection (all generations),
+   the collector will untrack any dictionaries whose contents are not
+   tracked.
+
+   The module provides the python function is_tracked(obj), which returns
+   the CURRENT tracking status of the object. Subsequent garbage
+   collections may change the tracking status of the object.
+
+   Untracking of certain containers was introduced in issue #4688, and
+   the algorithm was refined in response to issue #14775.
+*/
+
+struct gc_generation {
+    PyGC_Head head;
+    int threshold; /* collection threshold */
+    int count; /* count of allocations or collections of younger
+                  generations */
+};
+
+/* Running stats per generation */
+struct gc_generation_stats {
+    /* total number of collections */
+    Py_ssize_t collections;
+    /* total number of collected objects */
+    Py_ssize_t collected;
+    /* total number of uncollectable objects (put into gc.garbage) */
+    Py_ssize_t uncollectable;
+};
+
+struct _gc_runtime_state {
+    /* List of objects that still need to be cleaned up, singly linked
+     * via their gc headers' gc_prev pointers.  */
+    PyObject *trash_delete_later;
+    /* Current call-stack depth of tp_dealloc calls. */
+    int trash_delete_nesting;
+
+    int enabled;
+    int debug;
+    /* linked lists of container objects */
+    struct gc_generation generations[NUM_GENERATIONS];
+    PyGC_Head *generation0;
+    struct gc_generation_stats generation_stats[NUM_GENERATIONS];
+    /* true if we are currently running the collector */
+    int collecting;
+    /* list of uncollectable objects */
+    PyObject *garbage;
+    /* a list of callbacks to be invoked when collection is performed */
+    PyObject *callbacks;
+    /* This is the number of objects that survived the last full
+       collection. It approximates the number of long lived objects
+       tracked by the GC.
+
+       (by "full collection", we mean a collection of the oldest
+       generation). */
+    Py_ssize_t long_lived_total;
+    /* This is the number of objects that survived all "non-full"
+       collections, and are awaiting to undergo a full collection for
+       the first time. */
+    Py_ssize_t long_lived_pending;
+};
+
+PyAPI_FUNC(void) _PyGC_Initialize(struct _gc_runtime_state *);
+
+#define _PyGC_generation0 _PyRuntime.gc.generation0
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_INTERNAL_MEM_H */
diff --git a/Include/internal/pymalloc.h b/Include/internal/pymalloc.h
new file mode 100644
index 00000000000..e9d6ab6a8b3
--- /dev/null
+++ b/Include/internal/pymalloc.h
@@ -0,0 +1,443 @@
+
+/* An object allocator for Python.
+
+   Here is an introduction to the layers of the Python memory architecture,
+   showing where the object allocator is actually used (layer +2), It is
+   called for every object allocation and deallocation (PyObject_New/Del),
+   unless the object-specific allocators implement a proprietary allocation
+   scheme (ex.: ints use a simple free list). This is also the place where
+   the cyclic garbage collector operates selectively on container objects.
+
+
+    Object-specific allocators
+    _____   ______   ______       ________
+   [ int ] [ dict ] [ list ] ... [ string ]       Python core         |
++3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
+    _______________________________       |                           |
+   [   Python's object allocator   ]      |                           |
++2 | ####### Object memory ####### | <------ Internal buffers ------> |
+    ______________________________________________________________    |
+   [          Python's raw memory allocator (PyMem_ API)          ]   |
++1 | <----- Python memory (under PyMem manager's control) ------> |   |
+    __________________________________________________________________
+   [    Underlying general-purpose allocator (ex: C library malloc)   ]
+ 0 | <------ Virtual memory allocated for the python process -------> |
+
+   =========================================================================
+    _______________________________________________________________________
+   [                OS-specific Virtual Memory Manager (VMM)               ]
+-1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
+    __________________________________   __________________________________
+   [                                  ] [                                  ]
+-2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
+
+*/
+/*==========================================================================*/
+
+/* A fast, special-purpose memory allocator for small blocks, to be used
+   on top of a general-purpose malloc -- heavily based on previous art. */
+
+/* Vladimir Marangozov -- August 2000 */
+
+/*
+ * "Memory management is where the rubber meets the road -- if we do the wrong
+ * thing at any level, the results will not be good. And if we don't make the
+ * levels work well together, we are in serious trouble." (1)
+ *
+ * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
+ *    "Dynamic Storage Allocation: A Survey and Critical Review",
+ *    in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
+ */
+
+#ifndef Py_INTERNAL_PYMALLOC_H
+#define Py_INTERNAL_PYMALLOC_H
+
+/* #undef WITH_MEMORY_LIMITS */         /* disable mem limit checks  */
+
+/*==========================================================================*/
+
+/*
+ * Allocation strategy abstract:
+ *
+ * For small requests, the allocator sub-allocates <Big> blocks of memory.
+ * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
+ * system's allocator.
+ *
+ * Small requests are grouped in size classes spaced 8 bytes apart, due
+ * to the required valid alignment of the returned address. Requests of
+ * a particular size are serviced from memory pools of 4K (one VMM page).
+ * Pools are fragmented on demand and contain free lists of blocks of one
+ * particular size class. In other words, there is a fixed-size allocator
+ * for each size class. Free pools are shared by the different allocators
+ * thus minimizing the space reserved for a particular size class.
+ *
+ * This allocation strategy is a variant of what is known as "simple
+ * segregated storage based on array of free lists". The main drawback of
+ * simple segregated storage is that we might end up with lot of reserved
+ * memory for the different free lists, which degenerate in time. To avoid
+ * this, we partition each free list in pools and we share dynamically the
+ * reserved space between all free lists. This technique is quite efficient
+ * for memory intensive programs which allocate mainly small-sized blocks.
+ *
+ * For small requests we have the following table:
+ *
+ * Request in bytes     Size of allocated block      Size class idx
+ * ----------------------------------------------------------------
+ *        1-8                     8                       0
+ *        9-16                   16                       1
+ *       17-24                   24                       2
+ *       25-32                   32                       3
+ *       33-40                   40                       4
+ *       41-48                   48                       5
+ *       49-56                   56                       6
+ *       57-64                   64                       7
+ *       65-72                   72                       8
+ *        ...                   ...                     ...
+ *      497-504                 504                      62
+ *      505-512                 512                      63
+ *
+ *      0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
+ *      allocator.
+ */
+
+/*==========================================================================*/
+
+/*
+ * -- Main tunable settings section --
+ */
+
+/*
+ * Alignment of addresses returned to the user. 8-bytes alignment works
+ * on most current architectures (with 32-bit or 64-bit address busses).
+ * The alignment value is also used for grouping small requests in size
+ * classes spaced ALIGNMENT bytes apart.
+ *
+ * You shouldn't change this unless you know what you are doing.
+ */
+#define ALIGNMENT               8               /* must be 2^N */
+#define ALIGNMENT_SHIFT         3
+
+/* Return the number of bytes in size class I, as a uint. */
+#define INDEX2SIZE(I) (((unsigned int)(I) + 1) << ALIGNMENT_SHIFT)
+
+/*
+ * Max size threshold below which malloc requests are considered to be
+ * small enough in order to use preallocated memory pools. You can tune
+ * this value according to your application behaviour and memory needs.
+ *
+ * Note: a size threshold of 512 guarantees that newly created dictionaries
+ * will be allocated from preallocated memory pools on 64-bit.
+ *
+ * The following invariants must hold:
+ *      1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
+ *      2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
+ *
+ * Although not required, for better performance and space efficiency,
+ * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
+ */
+#define SMALL_REQUEST_THRESHOLD 512
+#define NB_SMALL_SIZE_CLASSES   (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
+
+#if NB_SMALL_SIZE_CLASSES > 64
+#error "NB_SMALL_SIZE_CLASSES should be less than 64"
+#endif /* NB_SMALL_SIZE_CLASSES > 64 */
+
+/*
+ * The system's VMM page size can be obtained on most unices with a
+ * getpagesize() call or deduced from various header files. To make
+ * things simpler, we assume that it is 4K, which is OK for most systems.
+ * It is probably better if this is the native page size, but it doesn't
+ * have to be.  In theory, if SYSTEM_PAGE_SIZE is larger than the native page
+ * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
+ * violation fault.  4K is apparently OK for all the platforms that python
+ * currently targets.
+ */
+#define SYSTEM_PAGE_SIZE        (4 * 1024)
+#define SYSTEM_PAGE_SIZE_MASK   (SYSTEM_PAGE_SIZE - 1)
+
+/*
+ * Maximum amount of memory managed by the allocator for small requests.
+ */
+#ifdef WITH_MEMORY_LIMITS
+#ifndef SMALL_MEMORY_LIMIT
+#define SMALL_MEMORY_LIMIT      (64 * 1024 * 1024)      /* 64 MB -- more? */
+#endif
+#endif
+
+/*
+ * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
+ * on a page boundary. This is a reserved virtual address space for the
+ * current process (obtained through a malloc()/mmap() call). In no way this
+ * means that the memory arenas will be used entirely. A malloc(<Big>) is
+ * usually an address range reservation for <Big> bytes, unless all pages within
+ * this space are referenced subsequently. So malloc'ing big blocks and not
+ * using them does not mean "wasting memory". It's an addressable range
+ * wastage...
+ *
+ * Arenas are allocated with mmap() on systems supporting anonymous memory
+ * mappings to reduce heap fragmentation.
+ */
+#define ARENA_SIZE              (256 << 10)     /* 256KB */
+
+#ifdef WITH_MEMORY_LIMITS
+#define MAX_ARENAS              (SMALL_MEMORY_LIMIT / ARENA_SIZE)
+#endif
+
+/*
+ * Size of the pools used for small blocks. Should be a power of 2,
+ * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
+ */
+#define POOL_SIZE               SYSTEM_PAGE_SIZE        /* must be 2^N */
+#define POOL_SIZE_MASK          SYSTEM_PAGE_SIZE_MASK
+
+/*
+ * -- End of tunable settings section --
+ */
+
+/*==========================================================================*/
+
+/*
+ * Locking
+ *
+ * To reduce lock contention, it would probably be better to refine the
+ * crude function locking with per size class locking. I'm not positive
+ * however, whether it's worth switching to such locking policy because
+ * of the performance penalty it might introduce.
+ *
+ * The following macros describe the simplest (should also be the fastest)
+ * lock object on a particular platform and the init/fini/lock/unlock
+ * operations on it. The locks defined here are not expected to be recursive
+ * because it is assumed that they will always be called in the order:
+ * INIT, [LOCK, UNLOCK]*, FINI.
+ */
+
+/*
+ * Python's threads are serialized, so object malloc locking is disabled.
+ */
+#define SIMPLELOCK_DECL(lock)   /* simple lock declaration              */
+#define SIMPLELOCK_INIT(lock)   /* allocate (if needed) and initialize  */
+#define SIMPLELOCK_FINI(lock)   /* free/destroy an existing lock        */
+#define SIMPLELOCK_LOCK(lock)   /* acquire released lock */
+#define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */
+
+/* When you say memory, my mind reasons in terms of (pointers to) blocks */
+typedef uint8_t pyblock;
+
+/* Pool for small blocks. */
+struct pool_header {
+    union { pyblock *_padding;
+            unsigned int count; } ref;  /* number of allocated blocks    */
+    pyblock *freeblock;                 /* pool's free list head         */
+    struct pool_header *nextpool;       /* next pool of this size class  */
+    struct pool_header *prevpool;       /* previous pool       ""        */
+    unsigned int arenaindex;            /* index into arenas of base adr */
+    unsigned int szidx;                 /* block size class index        */
+    unsigned int nextoffset;            /* bytes to virgin block         */
+    unsigned int maxnextoffset;         /* largest valid nextoffset      */
+};
+
+typedef struct pool_header *poolp;
+
+/* Record keeping for arenas. */
+struct arena_object {
+    /* The address of the arena, as returned by malloc.  Note that 0
+     * will never be returned by a successful malloc, and is used
+     * here to mark an arena_object that doesn't correspond to an
+     * allocated arena.
+     */
+    uintptr_t address;
+
+    /* Pool-aligned pointer to the next pool to be carved off. */
+    pyblock* pool_address;
+
+    /* The number of available pools in the arena:  free pools + never-
+     * allocated pools.
+     */
+    unsigned int nfreepools;
+
+    /* The total number of pools in the arena, whether or not available. */
+    unsigned int ntotalpools;
+
+    /* Singly-linked list of available pools. */
+    struct pool_header* freepools;
+
+    /* Whenever this arena_object is not associated with an allocated
+     * arena, the nextarena member is used to link all unassociated
+     * arena_objects in the singly-linked `unused_arena_objects` list.
+     * The prevarena member is unused in this case.
+     *
+     * When this arena_object is associated with an allocated arena
+     * with at least one available pool, both members are used in the
+     * doubly-linked `usable_arenas` list, which is maintained in
+     * increasing order of `nfreepools` values.
+     *
+     * Else this arena_object is associated with an allocated arena
+     * all of whose pools are in use.  `nextarena` and `prevarena`
+     * are both meaningless in this case.
+     */
+    struct arena_object* nextarena;
+    struct arena_object* prevarena;
+};
+
+#define POOL_OVERHEAD   _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
+
+#define DUMMY_SIZE_IDX          0xffff  /* size class of newly cached pools */
+
+/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
+#define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
+
+/* Return total number of blocks in pool of size index I, as a uint. */
+#define NUMBLOCKS(I) \
+    ((unsigned int)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
+
+/*==========================================================================*/
+
+/*
+ * This malloc lock
+ */
+SIMPLELOCK_DECL(_malloc_lock)
+#define LOCK()          SIMPLELOCK_LOCK(_malloc_lock)
+#define UNLOCK()        SIMPLELOCK_UNLOCK(_malloc_lock)
+#define LOCK_INIT()     SIMPLELOCK_INIT(_malloc_lock)
+#define LOCK_FINI()     SIMPLELOCK_FINI(_malloc_lock)
+
+/*
+ * Pool table -- headed, circular, doubly-linked lists of partially used pools.
+
+This is involved.  For an index i, usedpools[i+i] is the header for a list of
+all partially used pools holding small blocks with "size class idx" i. So
+usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
+16, and so on:  index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
+
+Pools are carved off an arena's highwater mark (an arena_object's pool_address
+member) as needed.  Once carved off, a pool is in one of three states forever
+after:
+
+used == partially used, neither empty nor full
+    At least one block in the pool is currently allocated, and at least one
+    block in the pool is not currently allocated (note this implies a pool
+    has room for at least two blocks).
+    This is a pool's initial state, as a pool is created only when malloc
+    needs space.
+    The pool holds blocks of a fixed size, and is in the circular list headed
+    at usedpools[i] (see above).  It's linked to the other used pools of the
+    same size class via the pool_header's nextpool and prevpool members.
+    If all but one block is currently allocated, a malloc can cause a
+    transition to the full state.  If all but one block is not currently
+    allocated, a free can cause a transition to the empty state.
+
+full == all the pool's blocks are currently allocated
+    On transition to full, a pool is unlinked from its usedpools[] list.
+    It's not linked to from anything then anymore, and its nextpool and
+    prevpool members are meaningless until it transitions back to used.
+    A free of a block in a full pool puts the pool back in the used state.
+    Then it's linked in at the front of the appropriate usedpools[] list, so
+    that the next allocation for its size class will reuse the freed block.
+
+empty == all the pool's blocks are currently available for allocation
+    On transition to empty, a pool is unlinked from its usedpools[] list,
+    and linked to the front of its arena_object's singly-linked freepools list,
+    via its nextpool member.  The prevpool member has no meaning in this case.
+    Empty pools have no inherent size class:  the next time a malloc finds
+    an empty list in usedpools[], it takes the first pool off of freepools.
+    If the size class needed happens to be the same as the size class the pool
+    last had, some pool initialization can be skipped.
+
+
+Block Management
+
+Blocks within pools are again carved out as needed.  pool->freeblock points to
+the start of a singly-linked list of free blocks within the pool.  When a
+block is freed, it's inserted at the front of its pool's freeblock list.  Note
+that the available blocks in a pool are *not* linked all together when a pool
+is initialized.  Instead only "the first two" (lowest addresses) blocks are
+set up, returning the first such block, and setting pool->freeblock to a
+one-block list holding the second such block.  This is consistent with that
+pymalloc strives at all levels (arena, pool, and block) never to touch a piece
+of memory until it's actually needed.
+
+So long as a pool is in the used state, we're certain there *is* a block
+available for allocating, and pool->freeblock is not NULL.  If pool->freeblock
+points to the end of the free list before we've carved the entire pool into
+blocks, that means we simply haven't yet gotten to one of the higher-address
+blocks.  The offset from the pool_header to the start of "the next" virgin
+block is stored in the pool_header nextoffset member, and the largest value
+of nextoffset that makes sense is stored in the maxnextoffset member when a
+pool is initialized.  All the blocks in a pool have been passed out at least
+once when and only when nextoffset > maxnextoffset.
+
+
+Major obscurity:  While the usedpools vector is declared to have poolp
+entries, it doesn't really.  It really contains two pointers per (conceptual)
+poolp entry, the nextpool and prevpool members of a pool_header.  The
+excruciating initialization code below fools C so that
+
+    usedpool[i+i]
+
+"acts like" a genuine poolp, but only so long as you only reference its
+nextpool and prevpool members.  The "- 2*sizeof(block *)" gibberish is
+compensating for that a pool_header's nextpool and prevpool members
+immediately follow a pool_header's first two members:
+
+    union { block *_padding;
+            uint count; } ref;
+    block *freeblock;
+
+each of which consume sizeof(block *) bytes.  So what usedpools[i+i] really
+contains is a fudged-up pointer p such that *if* C believes it's a poolp
+pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
+circular list is empty).
+
+It's unclear why the usedpools setup is so convoluted.  It could be to
+minimize the amount of cache required to hold this heavily-referenced table
+(which only *needs* the two interpool pointer members of a pool_header). OTOH,
+referencing code has to remember to "double the index" and doing so isn't
+free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
+on that C doesn't insert any padding anywhere in a pool_header at or before
+the prevpool member.
+**************************************************************************** */
+
+#define MAX_POOLS  (2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8)
+
+/*==========================================================================
+Arena management.
+
+`arenas` is a vector of arena_objects.  It contains maxarenas entries, some of
+which may not be currently used (== they're arena_objects that aren't
+currently associated with an allocated arena).  Note that arenas proper are
+separately malloc'ed.
+
+Prior to Python 2.5, arenas were never free()'ed.  Starting with Python 2.5,
+we do try to free() arenas, and use some mild heuristic strategies to increase
+the likelihood that arenas eventually can be freed.
+
+unused_arena_objects
+
+    This is a singly-linked list of the arena_objects that are currently not
+    being used (no arena is associated with them).  Objects are taken off the
+    head of the list in new_arena(), and are pushed on the head of the list in
+    PyObject_Free() when the arena is empty.  Key invariant:  an arena_object
+    is on this list if and only if its .address member is 0.
+
+usable_arenas
+
+    This is a doubly-linked list of the arena_objects associated with arenas
+    that have pools available.  These pools are either waiting to be reused,
+    or have not been used before.  The list is sorted to have the most-
+    allocated arenas first (ascending order based on the nfreepools member).
+    This means that the next allocation will come from a heavily used arena,
+    which gives the nearly empty arenas a chance to be returned to the system.
+    In my unscientific tests this dramatically improved the number of arenas
+    that could be freed.
+
+Note that an arena_object associated with an arena all of whose pools are
+currently in use isn't on either list.
+*/
+
+/* How many arena_objects do we initially allocate?
+ * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
+ * `arenas` vector.
+ */
+#define INITIAL_ARENA_OBJECTS 16
+
+#endif /* Py_INTERNAL_PYMALLOC_H */
diff --git a/Include/internal/pystate.h b/Include/internal/pystate.h
new file mode 100644
index 00000000000..20c5946b14f
--- /dev/null
+++ b/Include/internal/pystate.h
@@ -0,0 +1,92 @@
+#ifndef Py_INTERNAL_PYSTATE_H
+#define Py_INTERNAL_PYSTATE_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "pystate.h"
+#include "pyatomic.h"
+#include "pythread.h"
+
+#include "internal/mem.h"
+#include "internal/ceval.h"
+#include "internal/warnings.h"
+
+
+/* GIL state */
+
+struct _gilstate_runtime_state {
+    int check_enabled;
+    /* Assuming the current thread holds the GIL, this is the
+       PyThreadState for the current thread. */
+    _Py_atomic_address tstate_current;
+    PyThreadFrameGetter getframe;
+    /* The single PyInterpreterState used by this process'
+       GILState implementation
+    */
+    /* TODO: Given interp_main, it may be possible to kill this ref */
+    PyInterpreterState *autoInterpreterState;
+    int autoTLSkey;
+};
+
+/* hook for PyEval_GetFrame(), requested for Psyco */
+#define _PyThreadState_GetFrame _PyRuntime.gilstate.getframe
+
+/* Issue #26558: Flag to disable PyGILState_Check().
+   If set to non-zero, PyGILState_Check() always return 1. */
+#define _PyGILState_check_enabled _PyRuntime.gilstate.check_enabled
+
+
+/* Full Python runtime state */
+
+typedef struct pyruntimestate {
+    int initialized;
+    int core_initialized;
+    PyThreadState *finalizing;
+
+    struct pyinterpreters {
+        PyThread_type_lock mutex;
+        PyInterpreterState *head;
+        PyInterpreterState *main;
+        /* _next_interp_id is an auto-numbered sequence of small
+           integers.  It gets initialized in _PyInterpreterState_Init(),
+           which is called in Py_Initialize(), and used in
+           PyInterpreterState_New().  A negative interpreter ID
+           indicates an error occurred.  The main interpreter will
+           always have an ID of 0.  Overflow results in a RuntimeError.
+           If that becomes a problem later then we can adjust, e.g. by
+           using a Python int. */
+        int64_t next_id;
+    } interpreters;
+
+#define NEXITFUNCS 32
+    void (*exitfuncs[NEXITFUNCS])(void);
+    int nexitfuncs;
+    void (*pyexitfunc)(void);
+
+    struct _pyobj_runtime_state obj;
+    struct _gc_runtime_state gc;
+    struct _pymem_runtime_state mem;
+    struct _warnings_runtime_state warnings;
+    struct _ceval_runtime_state ceval;
+    struct _gilstate_runtime_state gilstate;
+
+    // XXX Consolidate globals found via the check-c-globals script.
+} _PyRuntimeState;
+
+PyAPI_DATA(_PyRuntimeState) _PyRuntime;
+PyAPI_FUNC(void) _PyRuntimeState_Init(_PyRuntimeState *);
+PyAPI_FUNC(void) _PyRuntimeState_Fini(_PyRuntimeState *);
+
+#define _Py_CURRENTLY_FINALIZING(tstate) \
+    (_PyRuntime.finalizing == tstate)
+
+
+/* Other */
+
+PyAPI_FUNC(void) _PyInterpreterState_Enable(_PyRuntimeState *);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_INTERNAL_PYSTATE_H */
diff --git a/Include/internal/warnings.h b/Include/internal/warnings.h
new file mode 100644
index 00000000000..2878a28a2ee
--- /dev/null
+++ b/Include/internal/warnings.h
@@ -0,0 +1,21 @@
+#ifndef Py_INTERNAL_WARNINGS_H
+#define Py_INTERNAL_WARNINGS_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "object.h"
+
+struct _warnings_runtime_state {
+    /* Both 'filters' and 'onceregistry' can be set in warnings.py;
+       get_warnings_attr() will reset these variables accordingly. */
+    PyObject *filters;  /* List */
+    PyObject *once_registry;  /* Dict */
+    PyObject *default_action; /* String */
+    long filters_version;
+};
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_INTERNAL_WARNINGS_H */
diff --git a/Include/object.h b/Include/object.h
index f5ed70b1129..b46d4c30e1e 100644
--- a/Include/object.h
+++ b/Include/object.h
@@ -1038,8 +1038,6 @@ with the call stack never exceeding a depth of PyTrash_UNWIND_LEVEL.
    Kept for binary compatibility of extensions using the stable ABI. */
 PyAPI_FUNC(void) _PyTrash_deposit_object(PyObject*);
 PyAPI_FUNC(void) _PyTrash_destroy_chain(void);
-PyAPI_DATA(int) _PyTrash_delete_nesting;
-PyAPI_DATA(PyObject *) _PyTrash_delete_later;
 #endif /* !Py_LIMITED_API */
 
 /* The new thread-safe private API, invoked by the macros below. */
diff --git a/Include/pylifecycle.h b/Include/pylifecycle.h
index 0d609ec2344..e1737b5972b 100644
--- a/Include/pylifecycle.h
+++ b/Include/pylifecycle.h
@@ -119,7 +119,7 @@ PyAPI_FUNC(void) _PyType_Fini(void);
 PyAPI_FUNC(void) _Py_HashRandomization_Fini(void);
 PyAPI_FUNC(void) PyAsyncGen_Fini(void);
 
-PyAPI_DATA(PyThreadState *) _Py_Finalizing;
+PyAPI_FUNC(int) _Py_IsFinalizing(void);
 #endif
 
 /* Signals */
diff --git a/Include/pystate.h b/Include/pystate.h
index f957d536dcd..d986e35ae3b 100644
--- a/Include/pystate.h
+++ b/Include/pystate.h
@@ -29,9 +29,10 @@ typedef struct {
     int use_hash_seed;
     unsigned long hash_seed;
     int _disable_importlib; /* Needed by freeze_importlib */
+    char *allocator;
 } _PyCoreConfig;
 
-#define _PyCoreConfig_INIT {0, -1, 0, 0}
+#define _PyCoreConfig_INIT {0, -1, 0, 0, NULL}
 
 /* Placeholders while working on the new configuration API
  *
@@ -57,6 +58,19 @@ typedef struct _is {
     PyObject *builtins;
     PyObject *importlib;
 
+    /* Used in Python/sysmodule.c. */
+    int check_interval;
+    PyObject *warnoptions;
+    PyObject *xoptions;
+
+    /* Used in Modules/_threadmodule.c. */
+    long num_threads;
+    /* Support for runtime thread stack size tuning.
+       A value of 0 means using the platform's default stack size
+       or the size specified by the THREAD_STACK_SIZE macro. */
+    /* Used in Python/thread.c. */
+    size_t pythread_stacksize;
+
     PyObject *codec_search_path;
     PyObject *codec_search_cache;
     PyObject *codec_error_registry;
@@ -190,9 +204,6 @@ typedef struct _ts {
 #endif
 
 
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(void) _PyInterpreterState_Init(void);
-#endif /* !Py_LIMITED_API */
 PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_New(void);
 PyAPI_FUNC(void) PyInterpreterState_Clear(PyInterpreterState *);
 PyAPI_FUNC(void) PyInterpreterState_Delete(PyInterpreterState *);
@@ -249,7 +260,7 @@ PyAPI_FUNC(int) PyThreadState_SetAsyncExc(unsigned long, PyObject *);
 /* Assuming the current thread holds the GIL, this is the
    PyThreadState for the current thread. */
 #ifdef Py_BUILD_CORE
-PyAPI_DATA(_Py_atomic_address) _PyThreadState_Current;
+#  define _PyThreadState_Current _PyRuntime.gilstate.tstate_current
 #  define PyThreadState_GET() \
              ((PyThreadState*)_Py_atomic_load_relaxed(&_PyThreadState_Current))
 #else
@@ -303,10 +314,6 @@ PyAPI_FUNC(void) PyGILState_Release(PyGILState_STATE);
 PyAPI_FUNC(PyThreadState *) PyGILState_GetThisThreadState(void);
 
 #ifndef Py_LIMITED_API
-/* Issue #26558: Flag to disable PyGILState_Check().
-   If set to non-zero, PyGILState_Check() always return 1. */
-PyAPI_DATA(int) _PyGILState_check_enabled;
-
 /* Helper/diagnostic function - return 1 if the current thread
    currently holds the GIL, 0 otherwise.
 
@@ -341,11 +348,6 @@ PyAPI_FUNC(PyThreadState *) PyThreadState_Next(PyThreadState *);
 typedef struct _frame *(*PyThreadFrameGetter)(PyThreadState *self_);
 #endif
 
-/* hook for PyEval_GetFrame(), requested for Psyco */
-#ifndef Py_LIMITED_API
-PyAPI_DATA(PyThreadFrameGetter) _PyThreadState_GetFrame;
-#endif
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 5f83e048760..0eb5c4dc501 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -984,6 +984,12 @@ PYTHON_HEADERS= \
 		pyconfig.h \
 		$(PARSER_HEADERS) \
 		$(srcdir)/Include/Python-ast.h \
+		$(srcdir)/Include/internal/ceval.h \
+		$(srcdir)/Include/internal/gil.h \
+		$(srcdir)/Include/internal/mem.h \
+		$(srcdir)/Include/internal/pymalloc.h \
+		$(srcdir)/Include/internal/pystate.h \
+		$(srcdir)/Include/internal/warnings.h \
 		$(DTRACE_HEADERS)
 
 $(LIBRARY_OBJS) $(MODOBJS) Programs/python.o: $(PYTHON_HEADERS)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2017-09-05-13-47-49.bpo-30860.MROpZw.rst b/Misc/NEWS.d/next/Core and Builtins/2017-09-05-13-47-49.bpo-30860.MROpZw.rst
new file mode 100644
index 00000000000..d8e9d5eeea1
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2017-09-05-13-47-49.bpo-30860.MROpZw.rst	
@@ -0,0 +1,2 @@
+Consolidate CPython's global runtime state under a single struct.  This
+improves discoverability of the runtime state.
diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c
index 33761a46667..e109b336466 100644
--- a/Modules/_functoolsmodule.c
+++ b/Modules/_functoolsmodule.c
@@ -1,5 +1,7 @@
 
 #include "Python.h"
+#include "internal/mem.h"
+#include "internal/pystate.h"
 #include "structmember.h"
 
 /* _functools module written and maintained
diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c
index ba0932c5d81..b2b9ade2c7c 100644
--- a/Modules/_io/bufferedio.c
+++ b/Modules/_io/bufferedio.c
@@ -9,6 +9,7 @@
 
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
+#include "internal/pystate.h"
 #include "structmember.h"
 #include "pythread.h"
 #include "_iomodule.h"
@@ -275,7 +276,7 @@ _enter_buffered_busy(buffered *self)
                      "reentrant call inside %R", self);
         return 0;
     }
-    relax_locking = (_Py_Finalizing != NULL);
+    relax_locking = _Py_IsFinalizing();
     Py_BEGIN_ALLOW_THREADS
     if (!relax_locking)
         st = PyThread_acquire_lock(self->lock, 1);
diff --git a/Modules/_json.c b/Modules/_json.c
index 54fc90cfece..f1da2302dd3 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -1,3 +1,10 @@
+
+/* Core extension modules are built-in on some platforms (e.g. Windows). */
+#ifdef Py_BUILD_CORE
+#define Py_BUILD_CORE_MODULE
+#undef Py_BUILD_CORE
+#endif
+
 #include "Python.h"
 #include "structmember.h"
 #include "accu.h"
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index 25255368a10..5fbf0995f4c 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -1,3 +1,10 @@
+
+/* Core extension modules are built-in on some platforms (e.g. Windows). */
+#ifdef Py_BUILD_CORE
+#define Py_BUILD_CORE_MODULE
+#undef Py_BUILD_CORE
+#endif
+
 #include "Python.h"
 #include "structmember.h"
 
diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c
index d58ebc32926..2657a1fc68d 100644
--- a/Modules/_threadmodule.c
+++ b/Modules/_threadmodule.c
@@ -3,11 +3,11 @@
 /* Interface to Sjoerd's portable C thread library */
 
 #include "Python.h"
+#include "internal/pystate.h"
 #include "structmember.h" /* offsetof */
 #include "pythread.h"
 
 static PyObject *ThreadError;
-static long nb_threads = 0;
 static PyObject *str_dict;
 
 _Py_IDENTIFIER(stderr);
@@ -986,7 +986,7 @@ t_bootstrap(void *boot_raw)
     tstate->thread_id = PyThread_get_thread_ident();
     _PyThreadState_Init(tstate);
     PyEval_AcquireThread(tstate);
-    nb_threads++;
+    tstate->interp->num_threads++;
     res = PyObject_Call(boot->func, boot->args, boot->keyw);
     if (res == NULL) {
         if (PyErr_ExceptionMatches(PyExc_SystemExit))
@@ -1013,7 +1013,7 @@ t_bootstrap(void *boot_raw)
     Py_DECREF(boot->args);
     Py_XDECREF(boot->keyw);
     PyMem_DEL(boot_raw);
-    nb_threads--;
+    tstate->interp->num_threads--;
     PyThreadState_Clear(tstate);
     PyThreadState_DeleteCurrent();
     PyThread_exit_thread();
@@ -1152,7 +1152,8 @@ A thread's identity may be reused for another thread after it exits.");
 static PyObject *
 thread__count(PyObject *self)
 {
-    return PyLong_FromLong(nb_threads);
+    PyThreadState *tstate = PyThreadState_Get();
+    return PyLong_FromLong(tstate->interp->num_threads);
 }
 
 PyDoc_STRVAR(_count_doc,
@@ -1345,6 +1346,7 @@ PyInit__thread(void)
     PyObject *m, *d, *v;
     double time_max;
     double timeout_max;
+    PyThreadState *tstate = PyThreadState_Get();
 
     /* Initialize types: */
     if (PyType_Ready(&localdummytype) < 0)
@@ -1389,7 +1391,7 @@ PyInit__thread(void)
     if (PyModule_AddObject(m, "_local", (PyObject *)&localtype) < 0)
         return NULL;
 
-    nb_threads = 0;
+    tstate->interp->num_threads = 0;
 
     str_dict = PyUnicode_InternFromString("__dict__");
     if (str_dict == NULL)
diff --git a/Modules/_winapi.c b/Modules/_winapi.c
index 682d0a3cdd8..a81ccafdda9 100644
--- a/Modules/_winapi.c
+++ b/Modules/_winapi.c
@@ -114,7 +114,7 @@ overlapped_dealloc(OverlappedObject *self)
         {
             /* The operation is no longer pending -- nothing to do. */
         }
-        else if (_Py_Finalizing == NULL)
+        else if (_Py_IsFinalizing())
         {
             /* The operation is still pending -- give a warning.  This
                will probably only happen on Windows XP. */
diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c
index 4e5acf305b9..832e27ebe66 100644
--- a/Modules/gcmodule.c
+++ b/Modules/gcmodule.c
@@ -24,6 +24,8 @@
 */
 
 #include "Python.h"
+#include "internal/mem.h"
+#include "internal/pystate.h"
 #include "frameobject.h"        /* for PyFrame_ClearFreeList */
 #include "pydtrace.h"
 #include "pytime.h"             /* for _PyTime_GetMonotonicClock() */
@@ -39,133 +41,9 @@ module gc
 /* Get the object given the GC head */
 #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1))
 
-/*** Global GC state ***/
-
-struct gc_generation {
-    PyGC_Head head;
-    int threshold; /* collection threshold */
-    int count; /* count of allocations or collections of younger
-                  generations */
-};
-
-/* If we change this, we need to change the default value in the signature of
-   gc.collect. */
-#define NUM_GENERATIONS 3
-#define GEN_HEAD(n) (&generations[n].head)
-
-/* linked lists of container objects */
-static struct gc_generation generations[NUM_GENERATIONS] = {
-    /* PyGC_Head,                               threshold,      count */
-    {{{GEN_HEAD(0), GEN_HEAD(0), 0}},           700,            0},
-    {{{GEN_HEAD(1), GEN_HEAD(1), 0}},           10,             0},
-    {{{GEN_HEAD(2), GEN_HEAD(2), 0}},           10,             0},
-};
-
-PyGC_Head *_PyGC_generation0 = GEN_HEAD(0);
-
-static int enabled = 1; /* automatic collection enabled? */
-
-/* true if we are currently running the collector */
-static int collecting = 0;
-
-/* list of uncollectable objects */
-static PyObject *garbage = NULL;
-
 /* Python string to use if unhandled exception occurs */
 static PyObject *gc_str = NULL;
 
-/* a list of callbacks to be invoked when collection is performed */
-static PyObject *callbacks = NULL;
-
-/* This is the number of objects that survived the last full collection. It
-   approximates the number of long lived objects tracked by the GC.
-
-   (by "full collection", we mean a collection of the oldest generation).
-*/
-static Py_ssize_t long_lived_total = 0;
-
-/* This is the number of objects that survived all "non-full" collections,
-   and are awaiting to undergo a full collection for the first time.
-
-*/
-static Py_ssize_t long_lived_pending = 0;
-
-/*
-   NOTE: about the counting of long-lived objects.
-
-   To limit the cost of garbage collection, there are two strategies;
-     - make each collection faster, e.g. by scanning fewer objects
-     - do less collections
-   This heuristic is about the latter strategy.
-
-   In addition to the various configurable thresholds, we only trigger a
-   full collection if the ratio
-    long_lived_pending / long_lived_total
-   is above a given value (hardwired to 25%).
-
-   The reason is that, while "non-full" collections (i.e., collections of
-   the young and middle generations) will always examine roughly the same
-   number of objects -- determined by the aforementioned thresholds --,
-   the cost of a full collection is proportional to the total number of
-   long-lived objects, which is virtually unbounded.
-
-   Indeed, it has been remarked that doing a full collection every
-   <constant number> of object creations entails a dramatic performance
-   degradation in workloads which consist in creating and storing lots of
-   long-lived objects (e.g. building a large list of GC-tracked objects would
-   show quadratic performance, instead of linear as expected: see issue #4074).
-
-   Using the above ratio, instead, yields amortized linear performance in
-   the total number of objects (the effect of which can be summarized
-   thusly: "each full garbage collection is more and more costly as the
-   number of objects grows, but we do fewer and fewer of them").
-
-   This heuristic was suggested by Martin von Löwis on python-dev in
-   June 2008. His original analysis and proposal can be found at:
-    http://mail.python.org/pipermail/python-dev/2008-June/080579.html
-*/
-
-/*
-   NOTE: about untracking of mutable objects.
-
-   Certain types of container cannot participate in a reference cycle, and
-   so do not need to be tracked by the garbage collector. Untracking these
-   objects reduces the cost of garbage collections. However, determining
-   which objects may be untracked is not free, and the costs must be
-   weighed against the benefits for garbage collection.
-
-   There are two possible strategies for when to untrack a container:
-
-   i) When the container is created.
-   ii) When the container is examined by the garbage collector.
-
-   Tuples containing only immutable objects (integers, strings etc, and
-   recursively, tuples of immutable objects) do not need to be tracked.
-   The interpreter creates a large number of tuples, many of which will
-   not survive until garbage collection. It is therefore not worthwhile
-   to untrack eligible tuples at creation time.
-
-   Instead, all tuples except the empty tuple are tracked when created.
-   During garbage collection it is determined whether any surviving tuples
-   can be untracked. A tuple can be untracked if all of its contents are
-   already not tracked. Tuples are examined for untracking in all garbage
-   collection cycles. It may take more than one cycle to untrack a tuple.
-
-   Dictionaries containing only immutable objects also do not need to be
-   tracked. Dictionaries are untracked when created. If a tracked item is
-   inserted into a dictionary (either as a key or value), the dictionary
-   becomes tracked. During a full garbage collection (all generations),
-   the collector will untrack any dictionaries whose contents are not
-   tracked.
-
-   The module provides the python function is_tracked(obj), which returns
-   the CURRENT tracking status of the object. Subsequent garbage
-   collections may change the tracking status of the object.
-
-   Untracking of certain containers was introduced in issue #4688, and
-   the algorithm was refined in response to issue #14775.
-*/
-
 /* set for debugging information */
 #define DEBUG_STATS             (1<<0) /* print collection statistics */
 #define DEBUG_COLLECTABLE       (1<<1) /* print collectable objects */
@@ -174,19 +52,26 @@ static Py_ssize_t long_lived_pending = 0;
 #define DEBUG_LEAK              DEBUG_COLLECTABLE | \
                 DEBUG_UNCOLLECTABLE | \
                 DEBUG_SAVEALL
-static int debug;
 
-/* Running stats per generation */
-struct gc_generation_stats {
-    /* total number of collections */
-    Py_ssize_t collections;
-    /* total number of collected objects */
-    Py_ssize_t collected;
-    /* total number of uncollectable objects (put into gc.garbage) */
-    Py_ssize_t uncollectable;
-};
+#define GEN_HEAD(n) (&_PyRuntime.gc.generations[n].head)
 
-static struct gc_generation_stats generation_stats[NUM_GENERATIONS];
+void
+_PyGC_Initialize(struct _gc_runtime_state *state)
+{
+    state->enabled = 1; /* automatic collection enabled? */
+
+#define _GEN_HEAD(n) (&state->generations[n].head)
+    struct gc_generation generations[NUM_GENERATIONS] = {
+        /* PyGC_Head,                                 threshold,      count */
+        {{{_GEN_HEAD(0), _GEN_HEAD(0), 0}},           700,            0},
+        {{{_GEN_HEAD(1), _GEN_HEAD(1), 0}},           10,             0},
+        {{{_GEN_HEAD(2), _GEN_HEAD(2), 0}},           10,             0},
+    };
+    for (int i = 0; i < NUM_GENERATIONS; i++) {
+        state->generations[i] = generations[i];
+    };
+    state->generation0 = GEN_HEAD(0);
+}
 
 /*--------------------------------------------------------------------------
 gc_refs values.
@@ -766,16 +651,16 @@ handle_legacy_finalizers(PyGC_Head *finalizers, PyGC_Head *old)
 {
     PyGC_Head *gc = finalizers->gc.gc_next;
 
-    if (garbage == NULL) {
-        garbage = PyList_New(0);
-        if (garbage == NULL)
+    if (_PyRuntime.gc.garbage == NULL) {
+        _PyRuntime.gc.garbage = PyList_New(0);
+        if (_PyRuntime.gc.garbage == NULL)
             Py_FatalError("gc couldn't create gc.garbage list");
     }
     for (; gc != finalizers; gc = gc->gc.gc_next) {
         PyObject *op = FROM_GC(gc);
 
-        if ((debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
-            if (PyList_Append(garbage, op) < 0)
+        if ((_PyRuntime.gc.debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
+            if (PyList_Append(_PyRuntime.gc.garbage, op) < 0)
                 return -1;
         }
     }
@@ -865,8 +750,8 @@ delete_garbage(PyGC_Head *collectable, PyGC_Head *old)
         PyGC_Head *gc = collectable->gc.gc_next;
         PyObject *op = FROM_GC(gc);
 
-        if (debug & DEBUG_SAVEALL) {
-            PyList_Append(garbage, op);
+        if (_PyRuntime.gc.debug & DEBUG_SAVEALL) {
+            PyList_Append(_PyRuntime.gc.garbage, op);
         }
         else {
             if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
@@ -919,9 +804,9 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
     PyGC_Head *gc;
     _PyTime_t t1 = 0;   /* initialize to prevent a compiler warning */
 
-    struct gc_generation_stats *stats = &generation_stats[generation];
+    struct gc_generation_stats *stats = &_PyRuntime.gc.generation_stats[generation];
 
-    if (debug & DEBUG_STATS) {
+    if (_PyRuntime.gc.debug & DEBUG_STATS) {
         PySys_WriteStderr("gc: collecting generation %d...\n",
                           generation);
         PySys_WriteStderr("gc: objects in each generation:");
@@ -938,9 +823,9 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
 
     /* update collection and allocation counters */
     if (generation+1 < NUM_GENERATIONS)
-        generations[generation+1].count += 1;
+        _PyRuntime.gc.generations[generation+1].count += 1;
     for (i = 0; i <= generation; i++)
-        generations[i].count = 0;
+        _PyRuntime.gc.generations[i].count = 0;
 
     /* merge younger generations with one we are currently collecting */
     for (i = 0; i < generation; i++) {
@@ -974,7 +859,7 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
     /* Move reachable objects to next generation. */
     if (young != old) {
         if (generation == NUM_GENERATIONS - 2) {
-            long_lived_pending += gc_list_size(young);
+            _PyRuntime.gc.long_lived_pending += gc_list_size(young);
         }
         gc_list_merge(young, old);
     }
@@ -982,8 +867,8 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
         /* We only untrack dicts in full collections, to avoid quadratic
            dict build-up. See issue #14775. */
         untrack_dicts(young);
-        long_lived_pending = 0;
-        long_lived_total = gc_list_size(young);
+        _PyRuntime.gc.long_lived_pending = 0;
+        _PyRuntime.gc.long_lived_total = gc_list_size(young);
     }
 
     /* All objects in unreachable are trash, but objects reachable from
@@ -1003,7 +888,7 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
     for (gc = unreachable.gc.gc_next; gc != &unreachable;
                     gc = gc->gc.gc_next) {
         m++;
-        if (debug & DEBUG_COLLECTABLE) {
+        if (_PyRuntime.gc.debug & DEBUG_COLLECTABLE) {
             debug_cycle("collectable", FROM_GC(gc));
         }
     }
@@ -1032,10 +917,10 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
          gc != &finalizers;
          gc = gc->gc.gc_next) {
         n++;
-        if (debug & DEBUG_UNCOLLECTABLE)
+        if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE)
             debug_cycle("uncollectable", FROM_GC(gc));
     }
-    if (debug & DEBUG_STATS) {
+    if (_PyRuntime.gc.debug & DEBUG_STATS) {
         _PyTime_t t2 = _PyTime_GetMonotonicClock();
 
         if (m == 0 && n == 0)
@@ -1098,11 +983,11 @@ invoke_gc_callback(const char *phase, int generation,
     PyObject *info = NULL;
 
     /* we may get called very early */
-    if (callbacks == NULL)
+    if (_PyRuntime.gc.callbacks == NULL)
         return;
     /* The local variable cannot be rebound, check it for sanity */
-    assert(callbacks != NULL && PyList_CheckExact(callbacks));
-    if (PyList_GET_SIZE(callbacks) != 0) {
+    assert(_PyRuntime.gc.callbacks != NULL && PyList_CheckExact(_PyRuntime.gc.callbacks));
+    if (PyList_GET_SIZE(_PyRuntime.gc.callbacks) != 0) {
         info = Py_BuildValue("{sisnsn}",
             "generation", generation,
             "collected", collected,
@@ -1112,8 +997,8 @@ invoke_gc_callback(const char *phase, int generation,
             return;
         }
     }
-    for (i=0; i<PyList_GET_SIZE(callbacks); i++) {
-        PyObject *r, *cb = PyList_GET_ITEM(callbacks, i);
+    for (i=0; i<PyList_GET_SIZE(_PyRuntime.gc.callbacks); i++) {
+        PyObject *r, *cb = PyList_GET_ITEM(_PyRuntime.gc.callbacks, i);
         Py_INCREF(cb); /* make sure cb doesn't go away */
         r = PyObject_CallFunction(cb, "sO", phase, info);
         Py_XDECREF(r);
@@ -1147,13 +1032,13 @@ collect_generations(void)
      * exceeds the threshold.  Objects in the that generation and
      * generations younger than it will be collected. */
     for (i = NUM_GENERATIONS-1; i >= 0; i--) {
-        if (generations[i].count > generations[i].threshold) {
+        if (_PyRuntime.gc.generations[i].count > _PyRuntime.gc.generations[i].threshold) {
             /* Avoid quadratic performance degradation in number
                of tracked objects. See comments at the beginning
                of this file, and issue #4074.
             */
             if (i == NUM_GENERATIONS - 1
-                && long_lived_pending < long_lived_total / 4)
+                && _PyRuntime.gc.long_lived_pending < _PyRuntime.gc.long_lived_total / 4)
                 continue;
             n = collect_with_callback(i);
             break;
@@ -1174,7 +1059,7 @@ static PyObject *
 gc_enable_impl(PyObject *module)
 /*[clinic end generated code: output=45a427e9dce9155c input=81ac4940ca579707]*/
 {
-    enabled = 1;
+    _PyRuntime.gc.enabled = 1;
     Py_RETURN_NONE;
 }
 
@@ -1188,7 +1073,7 @@ static PyObject *
 gc_disable_impl(PyObject *module)
 /*[clinic end generated code: output=97d1030f7aa9d279 input=8c2e5a14e800d83b]*/
 {
-    enabled = 0;
+    _PyRuntime.gc.enabled = 0;
     Py_RETURN_NONE;
 }
 
@@ -1202,7 +1087,7 @@ static int
 gc_isenabled_impl(PyObject *module)
 /*[clinic end generated code: output=1874298331c49130 input=30005e0422373b31]*/
 {
-    return enabled;
+    return _PyRuntime.gc.enabled;
 }
 
 /*[clinic input]
@@ -1230,12 +1115,12 @@ gc_collect_impl(PyObject *module, int generation)
         return -1;
     }
 
-    if (collecting)
+    if (_PyRuntime.gc.collecting)
         n = 0; /* already collecting, don't do anything */
     else {
-        collecting = 1;
+        _PyRuntime.gc.collecting = 1;
         n = collect_with_callback(generation);
-        collecting = 0;
+        _PyRuntime.gc.collecting = 0;
     }
 
     return n;
@@ -1263,7 +1148,7 @@ static PyObject *
 gc_set_debug_impl(PyObject *module, int flags)
 /*[clinic end generated code: output=7c8366575486b228 input=5e5ce15e84fbed15]*/
 {
-    debug = flags;
+    _PyRuntime.gc.debug = flags;
 
     Py_RETURN_NONE;
 }
@@ -1278,7 +1163,7 @@ static int
 gc_get_debug_impl(PyObject *module)
 /*[clinic end generated code: output=91242f3506cd1e50 input=91a101e1c3b98366]*/
 {
-    return debug;
+    return _PyRuntime.gc.debug;
 }
 
 PyDoc_STRVAR(gc_set_thresh__doc__,
@@ -1292,13 +1177,13 @@ gc_set_thresh(PyObject *self, PyObject *args)
 {
     int i;
     if (!PyArg_ParseTuple(args, "i|ii:set_threshold",
-                          &generations[0].threshold,
-                          &generations[1].threshold,
-                          &generations[2].threshold))
+                          &_PyRuntime.gc.generations[0].threshold,
+                          &_PyRuntime.gc.generations[1].threshold,
+                          &_PyRuntime.gc.generations[2].threshold))
         return NULL;
     for (i = 2; i < NUM_GENERATIONS; i++) {
         /* generations higher than 2 get the same threshold */
-        generations[i].threshold = generations[2].threshold;
+        _PyRuntime.gc.generations[i].threshold = _PyRuntime.gc.generations[2].threshold;
     }
 
     Py_RETURN_NONE;
@@ -1315,9 +1200,9 @@ gc_get_threshold_impl(PyObject *module)
 /*[clinic end generated code: output=7902bc9f41ecbbd8 input=286d79918034d6e6]*/
 {
     return Py_BuildValue("(iii)",
-                         generations[0].threshold,
-                         generations[1].threshold,
-                         generations[2].threshold);
+                         _PyRuntime.gc.generations[0].threshold,
+                         _PyRuntime.gc.generations[1].threshold,
+                         _PyRuntime.gc.generations[2].threshold);
 }
 
 /*[clinic input]
@@ -1331,9 +1216,9 @@ gc_get_count_impl(PyObject *module)
 /*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/
 {
     return Py_BuildValue("(iii)",
-                         generations[0].count,
-                         generations[1].count,
-                         generations[2].count);
+                         _PyRuntime.gc.generations[0].count,
+                         _PyRuntime.gc.generations[1].count,
+                         _PyRuntime.gc.generations[2].count);
 }
 
 static int
@@ -1464,7 +1349,7 @@ gc_get_stats_impl(PyObject *module)
     /* To get consistent values despite allocations while constructing
        the result list, we use a snapshot of the running stats. */
     for (i = 0; i < NUM_GENERATIONS; i++) {
-        stats[i] = generation_stats[i];
+        stats[i] = _PyRuntime.gc.generation_stats[i];
     }
 
     result = PyList_New(0);
@@ -1581,22 +1466,22 @@ PyInit_gc(void)
     if (m == NULL)
         return NULL;
 
-    if (garbage == NULL) {
-        garbage = PyList_New(0);
-        if (garbage == NULL)
+    if (_PyRuntime.gc.garbage == NULL) {
+        _PyRuntime.gc.garbage = PyList_New(0);
+        if (_PyRuntime.gc.garbage == NULL)
             return NULL;
     }
-    Py_INCREF(garbage);
-    if (PyModule_AddObject(m, "garbage", garbage) < 0)
+    Py_INCREF(_PyRuntime.gc.garbage);
+    if (PyModule_AddObject(m, "garbage", _PyRuntime.gc.garbage) < 0)
         return NULL;
 
-    if (callbacks == NULL) {
-        callbacks = PyList_New(0);
-        if (callbacks == NULL)
+    if (_PyRuntime.gc.callbacks == NULL) {
+        _PyRuntime.gc.callbacks = PyList_New(0);
+        if (_PyRuntime.gc.callbacks == NULL)
             return NULL;
     }
-    Py_INCREF(callbacks);
-    if (PyModule_AddObject(m, "callbacks", callbacks) < 0)
+    Py_INCREF(_PyRuntime.gc.callbacks);
+    if (PyModule_AddObject(m, "callbacks", _PyRuntime.gc.callbacks) < 0)
         return NULL;
 
 #define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return NULL
@@ -1615,12 +1500,12 @@ PyGC_Collect(void)
 {
     Py_ssize_t n;
 
-    if (collecting)
+    if (_PyRuntime.gc.collecting)
         n = 0; /* already collecting, don't do anything */
     else {
-        collecting = 1;
+        _PyRuntime.gc.collecting = 1;
         n = collect_with_callback(NUM_GENERATIONS - 1);
-        collecting = 0;
+        _PyRuntime.gc.collecting = 0;
     }
 
     return n;
@@ -1629,7 +1514,7 @@ PyGC_Collect(void)
 Py_ssize_t
 _PyGC_CollectIfEnabled(void)
 {
-    if (!enabled)
+    if (!_PyRuntime.gc.enabled)
         return 0;
 
     return PyGC_Collect();
@@ -1646,12 +1531,12 @@ _PyGC_CollectNoFail(void)
        during interpreter shutdown (and then never finish it).
        See http://bugs.python.org/issue8713#msg195178 for an example.
        */
-    if (collecting)
+    if (_PyRuntime.gc.collecting)
         n = 0;
     else {
-        collecting = 1;
+        _PyRuntime.gc.collecting = 1;
         n = collect(NUM_GENERATIONS - 1, NULL, NULL, 1);
-        collecting = 0;
+        _PyRuntime.gc.collecting = 0;
     }
     return n;
 }
@@ -1659,10 +1544,10 @@ _PyGC_CollectNoFail(void)
 void
 _PyGC_DumpShutdownStats(void)
 {
-    if (!(debug & DEBUG_SAVEALL)
-        && garbage != NULL && PyList_GET_SIZE(garbage) > 0) {
+    if (!(_PyRuntime.gc.debug & DEBUG_SAVEALL)
+        && _PyRuntime.gc.garbage != NULL && PyList_GET_SIZE(_PyRuntime.gc.garbage) > 0) {
         char *message;
-        if (debug & DEBUG_UNCOLLECTABLE)
+        if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE)
             message = "gc: %zd uncollectable objects at " \
                 "shutdown";
         else
@@ -1673,13 +1558,13 @@ _PyGC_DumpShutdownStats(void)
            already. */
         if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
                                      "gc", NULL, message,
-                                     PyList_GET_SIZE(garbage)))
+                                     PyList_GET_SIZE(_PyRuntime.gc.garbage)))
             PyErr_WriteUnraisable(NULL);
-        if (debug & DEBUG_UNCOLLECTABLE) {
+        if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE) {
             PyObject *repr = NULL, *bytes = NULL;
-            repr = PyObject_Repr(garbage);
+            repr = PyObject_Repr(_PyRuntime.gc.garbage);
             if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr)))
-                PyErr_WriteUnraisable(garbage);
+                PyErr_WriteUnraisable(_PyRuntime.gc.garbage);
             else {
                 PySys_WriteStderr(
                     "      %s\n",
@@ -1695,7 +1580,7 @@ _PyGC_DumpShutdownStats(void)
 void
 _PyGC_Fini(void)
 {
-    Py_CLEAR(callbacks);
+    Py_CLEAR(_PyRuntime.gc.callbacks);
 }
 
 /* for debugging */
@@ -1746,15 +1631,15 @@ _PyObject_GC_Alloc(int use_calloc, size_t basicsize)
         return PyErr_NoMemory();
     g->gc.gc_refs = 0;
     _PyGCHead_SET_REFS(g, GC_UNTRACKED);
-    generations[0].count++; /* number of allocated GC objects */
-    if (generations[0].count > generations[0].threshold &&
-        enabled &&
-        generations[0].threshold &&
-        !collecting &&
+    _PyRuntime.gc.generations[0].count++; /* number of allocated GC objects */
+    if (_PyRuntime.gc.generations[0].count > _PyRuntime.gc.generations[0].threshold &&
+        _PyRuntime.gc.enabled &&
+        _PyRuntime.gc.generations[0].threshold &&
+        !_PyRuntime.gc.collecting &&
         !PyErr_Occurred()) {
-        collecting = 1;
+        _PyRuntime.gc.collecting = 1;
         collect_generations();
-        collecting = 0;
+        _PyRuntime.gc.collecting = 0;
     }
     op = FROM_GC(g);
     return op;
@@ -1819,8 +1704,8 @@ PyObject_GC_Del(void *op)
     PyGC_Head *g = AS_GC(op);
     if (IS_TRACKED(op))
         gc_list_remove(g);
-    if (generations[0].count > 0) {
-        generations[0].count--;
+    if (_PyRuntime.gc.generations[0].count > 0) {
+        _PyRuntime.gc.generations[0].count--;
     }
     PyObject_FREE(g);
 }
diff --git a/Modules/main.c b/Modules/main.c
index 08b22760de1..3e347dc8e24 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -598,16 +598,10 @@ Py_Main(int argc, wchar_t **argv)
         }
     }
 
-    char *pymalloc = Py_GETENV("PYTHONMALLOC");
-    if (_PyMem_SetupAllocators(pymalloc) < 0) {
-        fprintf(stderr,
-            "Error in PYTHONMALLOC: unknown allocator \"%s\"!\n", pymalloc);
-        exit(1);
-    }
-
     /* Initialize the core language runtime */
     Py_IgnoreEnvironmentFlag = core_config.ignore_environment;
     core_config._disable_importlib = 0;
+    core_config.allocator = Py_GETENV("PYTHONMALLOC");
     _Py_InitializeCore(&core_config);
 
     /* Reprocess the command line with the language runtime available */
diff --git a/Modules/zipimport.c b/Modules/zipimport.c
index d710360f95d..141ada5adc5 100644
--- a/Modules/zipimport.c
+++ b/Modules/zipimport.c
@@ -1,4 +1,5 @@
 #include "Python.h"
+#include "internal/pystate.h"
 #include "structmember.h"
 #include "osdefs.h"
 #include "marshal.h"
diff --git a/Objects/abstract.c b/Objects/abstract.c
index 66ac0e3ea73..998bcb12b80 100644
--- a/Objects/abstract.c
+++ b/Objects/abstract.c
@@ -1,6 +1,7 @@
 /* Abstract Object Interface (many thanks to Jim Fulton) */
 
 #include "Python.h"
+#include "internal/pystate.h"
 #include <ctype.h>
 #include "structmember.h" /* we need the offsetof() macro from there */
 #include "longintrepr.h"
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
index 857a1aa070e..d09b1f22b44 100644
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -2,6 +2,8 @@
 
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
+#include "internal/mem.h"
+#include "internal/pystate.h"
 #include "structmember.h"
 #include "bytes_methods.h"
 #include "bytesobject.h"
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index e95ab9c63b1..d91cb7d8724 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -3,6 +3,8 @@
 #define PY_SSIZE_T_CLEAN
 
 #include "Python.h"
+#include "internal/mem.h"
+#include "internal/pystate.h"
 
 #include "bytes_methods.h"
 #include "pystrhex.h"
diff --git a/Objects/call.c b/Objects/call.c
index 92464327fbc..91e60783117 100644
--- a/Objects/call.c
+++ b/Objects/call.c
@@ -1,4 +1,5 @@
 #include "Python.h"
+#include "internal/pystate.h"
 #include "frameobject.h"
 
 
diff --git a/Objects/cellobject.c b/Objects/cellobject.c
index 6af93b00308..3f6389feaee 100644
--- a/Objects/cellobject.c
+++ b/Objects/cellobject.c
@@ -1,6 +1,8 @@
 /* Cell object implementation */
 
 #include "Python.h"
+#include "internal/mem.h"
+#include "internal/pystate.h"
 
 PyObject *
 PyCell_New(PyObject *obj)
diff --git a/Objects/classobject.c b/Objects/classobject.c
index b0ed0230569..063c24a7171 100644
--- a/Objects/classobject.c
+++ b/Objects/classobject.c
@@ -1,6 +1,8 @@
 /* Class object implementation (dead now except for methods) */
 
 #include "Python.h"
+#include "internal/mem.h"
+#include "internal/pystate.h"
 #include "structmember.h"
 
 #define TP_DESCR_GET(t) ((t)->tp_descr_get)
diff --git a/Objects/descrobject.c b/Objects/descrobject.c
index c20ca9be4df..edead26a7d5 100644
--- a/Objects/descrobject.c
+++ b/Objects/descrobject.c
@@ -1,6 +1,7 @@
 /* Descriptors -- a new, flexible way to describe attributes */
 
 #include "Python.h"
+#include "internal/pystate.h"
 #include "structmember.h" /* Why is this not included in Python.h? */
 
 /*[clinic input]
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index f055170750d..81c7f7f2436 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -111,6 +111,7 @@ converting the dict to the combined table.
 #define PyDict_MINSIZE 8
 
 #include "Python.h"
+#include "internal/pystate.h"
 #include "dict-common.h"
 #include "stringlib/eq.h"    /* to get unicode_eq() */
 
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 13c1be90d88..1b70be786a4 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -6,6 +6,8 @@
 
 #define PY_SSIZE_T_CLEAN
 #include <Python.h>
+#include "internal/mem.h"
+#include "internal/pystate.h"
 #include "structmember.h"
 #include "osdefs.h"
 
diff --git a/Objects/frameobject.c b/Objects/frameobject.c
index b3f0b65e51c..8739596bfc3 100644
--- a/Objects/frameobject.c
+++ b/Objects/frameobject.c
@@ -1,6 +1,7 @@
 /* Frame object implementation */
 
 #include "Python.h"
+#include "internal/pystate.h"
 
 #include "code.h"
 #include "frameobject.h"
diff --git a/Objects/funcobject.c b/Objects/funcobject.c
index e440258d7de..d376f9cab90 100644
--- a/Objects/funcobject.c
+++ b/Objects/funcobject.c
@@ -2,6 +2,8 @@
 /* Function object implementation */
 
 #include "Python.h"
+#include "internal/mem.h"
+#include "internal/pystate.h"
 #include "code.h"
 #include "structmember.h"
 
diff --git a/Objects/genobject.c b/Objects/genobject.c
index 8c2213e5bf2..1a8c37abf23 100644
--- a/Objects/genobject.c
+++ b/Objects/genobject.c
@@ -1,6 +1,7 @@
 /* Generator object implementation */
 
 #include "Python.h"
+#include "internal/pystate.h"
 #include "frameobject.h"
 #include "structmember.h"
 #include "opcode.h"
diff --git a/Objects/iterobject.c b/Objects/iterobject.c
index 75b2fcbd411..252169acbfd 100644
--- a/Objects/iterobject.c
+++ b/Objects/iterobject.c
@@ -1,6 +1,8 @@
 /* Iterator objects */
 
 #include "Python.h"
+#include "internal/mem.h"
+#include "internal/pystate.h"
 
 typedef struct {
     PyObject_HEAD
diff --git a/Objects/listobject.c b/Objects/listobject.c
index 314a13c4c8f..858532252e9 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -1,6 +1,7 @@
 /* List object implementation */
 
 #include "Python.h"
+#include "internal/pystate.h"
 #include "accu.h"
 
 #ifdef STDC_HEADERS
diff --git a/Objects/memoryobject.c b/Objects/memoryobject.c
index 1b95af2d4a8..ccf45ffc582 100644
--- a/Objects/memoryobject.c
+++ b/Objects/memoryobject.c
@@ -1,6 +1,8 @@
 /* Memoryview object implementation */
 
 #include "Python.h"
+#include "internal/mem.h"
+#include "internal/pystate.h"
 #include "pystrhex.h"
 #include <stddef.h>
 
diff --git a/Objects/methodobject.c b/Objects/methodobject.c
index 4050922e03c..2cf314660d0 100644
--- a/Objects/methodobject.c
+++ b/Objects/methodobject.c
@@ -2,6 +2,8 @@
 /* Method object implementation */
 
 #include "Python.h"
+#include "internal/mem.h"
+#include "internal/pystate.h"
 #include "structmember.h"
 
 /* Free list for method objects to safe malloc/free overhead
diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c
index 89afe290323..2be49fbda38 100644
--- a/Objects/moduleobject.c
+++ b/Objects/moduleobject.c
@@ -2,6 +2,7 @@
 /* Module object implementation */
 
 #include "Python.h"
+#include "internal/pystate.h"
 #include "structmember.h"
 
 static Py_ssize_t max_module_number;
diff --git a/Objects/object.c b/Objects/object.c
index 638f7e7c9ba..74893e38c71 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -2,6 +2,7 @@
 /* Generic object operations; and implementation of None */
 
 #include "Python.h"
+#include "internal/pystate.h"
 #include "frameobject.h"
 
 #ifdef __cplusplus
@@ -2022,14 +2023,6 @@ finally:
 
 /* Trashcan support. */
 
-/* Current call-stack depth of tp_dealloc calls. */
-int _PyTrash_delete_nesting = 0;
-
-/* List of objects that still need to be cleaned up, singly linked via their
- * gc headers' gc_prev pointers.
- */
-PyObject *_PyTrash_delete_later = NULL;
-
 /* Add op to the _PyTrash_delete_later list.  Called when the current
  * call-stack depth gets large.  op must be a currently untracked gc'ed
  * object, with refcount 0.  Py_DECREF must already have been called on it.
@@ -2040,8 +2033,8 @@ _PyTrash_deposit_object(PyObject *op)
     assert(PyObject_IS_GC(op));
     assert(_PyGC_REFS(op) == _PyGC_REFS_UNTRACKED);
     assert(op->ob_refcnt == 0);
-    _Py_AS_GC(op)->gc.gc_prev = (PyGC_Head *)_PyTrash_delete_later;
-    _PyTrash_delete_later = op;
+    _Py_AS_GC(op)->gc.gc_prev = (PyGC_Head *)_PyRuntime.gc.trash_delete_later;
+    _PyRuntime.gc.trash_delete_later = op;
 }
 
 /* The equivalent API, using per-thread state recursion info */
@@ -2062,11 +2055,11 @@ _PyTrash_thread_deposit_object(PyObject *op)
 void
 _PyTrash_destroy_chain(void)
 {
-    while (_PyTrash_delete_later) {
-        PyObject *op = _PyTrash_delete_later;
+    while (_PyRuntime.gc.trash_delete_later) {
+        PyObject *op = _PyRuntime.gc.trash_delete_later;
         destructor dealloc = Py_TYPE(op)->tp_dealloc;
 
-        _PyTrash_delete_later =
+        _PyRuntime.gc.trash_delete_later =
             (PyObject*) _Py_AS_GC(op)->gc.gc_prev;
 
         /* Call the deallocator directly.  This used to try to
@@ -2076,9 +2069,9 @@ _PyTrash_destroy_chain(void)
          * up distorting allocation statistics.
          */
         assert(op->ob_refcnt == 0);
-        ++_PyTrash_delete_nesting;
+        ++_PyRuntime.gc.trash_delete_nesting;
         (*dealloc)(op);
-        --_PyTrash_delete_nesting;
+        --_PyRuntime.gc.trash_delete_nesting;
     }
 }
 
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c
index af9cf7b92c0..57edf97e206 100644
--- a/Objects/obmalloc.c
+++ b/Objects/obmalloc.c
@@ -1,4 +1,6 @@
 #include "Python.h"
+#include "internal/mem.h"
+#include "internal/pystate.h"
 
 #include <stdbool.h>
 
@@ -178,7 +180,9 @@ static struct {
 #define PYDBG_FUNCS \
     _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree
 
-static PyMemAllocatorEx _PyMem_Raw = {
+
+#define _PyMem_Raw _PyRuntime.mem.allocators.raw
+static const PyMemAllocatorEx _pymem_raw = {
 #ifdef Py_DEBUG
     &_PyMem_Debug.raw, PYRAWDBG_FUNCS
 #else
@@ -186,7 +190,8 @@ static PyMemAllocatorEx _PyMem_Raw = {
 #endif
     };
 
-static PyMemAllocatorEx _PyMem = {
+#define _PyMem _PyRuntime.mem.allocators.mem
+static const PyMemAllocatorEx _pymem = {
 #ifdef Py_DEBUG
     &_PyMem_Debug.mem, PYDBG_FUNCS
 #else
@@ -194,7 +199,8 @@ static PyMemAllocatorEx _PyMem = {
 #endif
     };
 
-static PyMemAllocatorEx _PyObject = {
+#define _PyObject _PyRuntime.mem.allocators.obj
+static const PyMemAllocatorEx _pyobject = {
 #ifdef Py_DEBUG
     &_PyMem_Debug.obj, PYDBG_FUNCS
 #else
@@ -267,7 +273,7 @@ _PyMem_SetupAllocators(const char *opt)
 #undef PYRAWDBG_FUNCS
 #undef PYDBG_FUNCS
 
-static PyObjectArenaAllocator _PyObject_Arena = {NULL,
+static const PyObjectArenaAllocator _PyObject_Arena = {NULL,
 #ifdef MS_WINDOWS
     _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
 #elif defined(ARENAS_USE_MMAP)
@@ -277,6 +283,34 @@ static PyObjectArenaAllocator _PyObject_Arena = {NULL,
 #endif
     };
 
+void
+_PyObject_Initialize(struct _pyobj_runtime_state *state)
+{
+    state->allocator_arenas = _PyObject_Arena;
+}
+
+void
+_PyMem_Initialize(struct _pymem_runtime_state *state)
+{
+    state->allocators.raw = _pymem_raw;
+    state->allocators.mem = _pymem;
+    state->allocators.obj = _pyobject;
+
+#ifdef WITH_PYMALLOC
+    for (int i = 0; i < 8; i++) {
+        if (NB_SMALL_SIZE_CLASSES <= i * 8)
+            break;
+        for (int j = 0; j < 8; j++) {
+            int x = i * 8 + j;
+            poolp *addr = &(state->usedpools[2*(x)]);
+            poolp val = (poolp)((uint8_t *)addr - 2*sizeof(pyblock *));
+            state->usedpools[x * 2] = val;
+            state->usedpools[x * 2 + 1] = val;
+        };
+    };
+#endif /* WITH_PYMALLOC */
+}
+
 #ifdef WITH_PYMALLOC
 static int
 _PyMem_DebugEnabled(void)
@@ -363,13 +397,13 @@ PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
 void
 PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
 {
-    *allocator = _PyObject_Arena;
+    *allocator = _PyRuntime.obj.allocator_arenas;
 }
 
 void
 PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
 {
-    _PyObject_Arena = *allocator;
+    _PyRuntime.obj.allocator_arenas = *allocator;
 }
 
 void *
@@ -404,7 +438,8 @@ PyMem_RawRealloc(void *ptr, size_t new_size)
     return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
 }
 
-void PyMem_RawFree(void *ptr)
+void
+PyMem_RawFree(void *ptr)
 {
     _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
 }
@@ -521,497 +556,10 @@ PyObject_Free(void *ptr)
 static int running_on_valgrind = -1;
 #endif
 
-/* An object allocator for Python.
-
-   Here is an introduction to the layers of the Python memory architecture,
-   showing where the object allocator is actually used (layer +2), It is
-   called for every object allocation and deallocation (PyObject_New/Del),
-   unless the object-specific allocators implement a proprietary allocation
-   scheme (ex.: ints use a simple free list). This is also the place where
-   the cyclic garbage collector operates selectively on container objects.
-
-
-    Object-specific allocators
-    _____   ______   ______       ________
-   [ int ] [ dict ] [ list ] ... [ string ]       Python core         |
-+3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
-    _______________________________       |                           |
-   [   Python's object allocator   ]      |                           |
-+2 | ####### Object memory ####### | <------ Internal buffers ------> |
-    ______________________________________________________________    |
-   [          Python's raw memory allocator (PyMem_ API)          ]   |
-+1 | <----- Python memory (under PyMem manager's control) ------> |   |
-    __________________________________________________________________
-   [    Underlying general-purpose allocator (ex: C library malloc)   ]
- 0 | <------ Virtual memory allocated for the python process -------> |
-
-   =========================================================================
-    _______________________________________________________________________
-   [                OS-specific Virtual Memory Manager (VMM)               ]
--1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
-    __________________________________   __________________________________
-   [                                  ] [                                  ]
--2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
-
-*/
-/*==========================================================================*/
-
-/* A fast, special-purpose memory allocator for small blocks, to be used
-   on top of a general-purpose malloc -- heavily based on previous art. */
-
-/* Vladimir Marangozov -- August 2000 */
-
-/*
- * "Memory management is where the rubber meets the road -- if we do the wrong
- * thing at any level, the results will not be good. And if we don't make the
- * levels work well together, we are in serious trouble." (1)
- *
- * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
- *    "Dynamic Storage Allocation: A Survey and Critical Review",
- *    in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
- */
-
-/* #undef WITH_MEMORY_LIMITS */         /* disable mem limit checks  */
-
-/*==========================================================================*/
-
-/*
- * Allocation strategy abstract:
- *
- * For small requests, the allocator sub-allocates <Big> blocks of memory.
- * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
- * system's allocator.
- *
- * Small requests are grouped in size classes spaced 8 bytes apart, due
- * to the required valid alignment of the returned address. Requests of
- * a particular size are serviced from memory pools of 4K (one VMM page).
- * Pools are fragmented on demand and contain free lists of blocks of one
- * particular size class. In other words, there is a fixed-size allocator
- * for each size class. Free pools are shared by the different allocators
- * thus minimizing the space reserved for a particular size class.
- *
- * This allocation strategy is a variant of what is known as "simple
- * segregated storage based on array of free lists". The main drawback of
- * simple segregated storage is that we might end up with lot of reserved
- * memory for the different free lists, which degenerate in time. To avoid
- * this, we partition each free list in pools and we share dynamically the
- * reserved space between all free lists. This technique is quite efficient
- * for memory intensive programs which allocate mainly small-sized blocks.
- *
- * For small requests we have the following table:
- *
- * Request in bytes     Size of allocated block      Size class idx
- * ----------------------------------------------------------------
- *        1-8                     8                       0
- *        9-16                   16                       1
- *       17-24                   24                       2
- *       25-32                   32                       3
- *       33-40                   40                       4
- *       41-48                   48                       5
- *       49-56                   56                       6
- *       57-64                   64                       7
- *       65-72                   72                       8
- *        ...                   ...                     ...
- *      497-504                 504                      62
- *      505-512                 512                      63
- *
- *      0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
- *      allocator.
- */
-
-/*==========================================================================*/
-
-/*
- * -- Main tunable settings section --
- */
-
-/*
- * Alignment of addresses returned to the user. 8-bytes alignment works
- * on most current architectures (with 32-bit or 64-bit address busses).
- * The alignment value is also used for grouping small requests in size
- * classes spaced ALIGNMENT bytes apart.
- *
- * You shouldn't change this unless you know what you are doing.
- */
-#define ALIGNMENT               8               /* must be 2^N */
-#define ALIGNMENT_SHIFT         3
-
-/* Return the number of bytes in size class I, as a uint. */
-#define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
-
-/*
- * Max size threshold below which malloc requests are considered to be
- * small enough in order to use preallocated memory pools. You can tune
- * this value according to your application behaviour and memory needs.
- *
- * Note: a size threshold of 512 guarantees that newly created dictionaries
- * will be allocated from preallocated memory pools on 64-bit.
- *
- * The following invariants must hold:
- *      1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
- *      2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
- *
- * Although not required, for better performance and space efficiency,
- * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
- */
-#define SMALL_REQUEST_THRESHOLD 512
-#define NB_SMALL_SIZE_CLASSES   (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
-
-/*
- * The system's VMM page size can be obtained on most unices with a
- * getpagesize() call or deduced from various header files. To make
- * things simpler, we assume that it is 4K, which is OK for most systems.
- * It is probably better if this is the native page size, but it doesn't
- * have to be.  In theory, if SYSTEM_PAGE_SIZE is larger than the native page
- * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
- * violation fault.  4K is apparently OK for all the platforms that python
- * currently targets.
- */
-#define SYSTEM_PAGE_SIZE        (4 * 1024)
-#define SYSTEM_PAGE_SIZE_MASK   (SYSTEM_PAGE_SIZE - 1)
-
-/*
- * Maximum amount of memory managed by the allocator for small requests.
- */
-#ifdef WITH_MEMORY_LIMITS
-#ifndef SMALL_MEMORY_LIMIT
-#define SMALL_MEMORY_LIMIT      (64 * 1024 * 1024)      /* 64 MB -- more? */
-#endif
-#endif
-
-/*
- * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
- * on a page boundary. This is a reserved virtual address space for the
- * current process (obtained through a malloc()/mmap() call). In no way this
- * means that the memory arenas will be used entirely. A malloc(<Big>) is
- * usually an address range reservation for <Big> bytes, unless all pages within
- * this space are referenced subsequently. So malloc'ing big blocks and not
- * using them does not mean "wasting memory". It's an addressable range
- * wastage...
- *
- * Arenas are allocated with mmap() on systems supporting anonymous memory
- * mappings to reduce heap fragmentation.
- */
-#define ARENA_SIZE              (256 << 10)     /* 256KB */
-
-#ifdef WITH_MEMORY_LIMITS
-#define MAX_ARENAS              (SMALL_MEMORY_LIMIT / ARENA_SIZE)
-#endif
-
-/*
- * Size of the pools used for small blocks. Should be a power of 2,
- * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
- */
-#define POOL_SIZE               SYSTEM_PAGE_SIZE        /* must be 2^N */
-#define POOL_SIZE_MASK          SYSTEM_PAGE_SIZE_MASK
-
-/*
- * -- End of tunable settings section --
- */
-
-/*==========================================================================*/
-
-/*
- * Locking
- *
- * To reduce lock contention, it would probably be better to refine the
- * crude function locking with per size class locking. I'm not positive
- * however, whether it's worth switching to such locking policy because
- * of the performance penalty it might introduce.
- *
- * The following macros describe the simplest (should also be the fastest)
- * lock object on a particular platform and the init/fini/lock/unlock
- * operations on it. The locks defined here are not expected to be recursive
- * because it is assumed that they will always be called in the order:
- * INIT, [LOCK, UNLOCK]*, FINI.
- */
-
-/*
- * Python's threads are serialized, so object malloc locking is disabled.
- */
-#define SIMPLELOCK_DECL(lock)   /* simple lock declaration              */
-#define SIMPLELOCK_INIT(lock)   /* allocate (if needed) and initialize  */
-#define SIMPLELOCK_FINI(lock)   /* free/destroy an existing lock        */
-#define SIMPLELOCK_LOCK(lock)   /* acquire released lock */
-#define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */
-
-/* When you say memory, my mind reasons in terms of (pointers to) blocks */
-typedef uint8_t block;
-
-/* Pool for small blocks. */
-struct pool_header {
-    union { block *_padding;
-            uint count; } ref;          /* number of allocated blocks    */
-    block *freeblock;                   /* pool's free list head         */
-    struct pool_header *nextpool;       /* next pool of this size class  */
-    struct pool_header *prevpool;       /* previous pool       ""        */
-    uint arenaindex;                    /* index into arenas of base adr */
-    uint szidx;                         /* block size class index        */
-    uint nextoffset;                    /* bytes to virgin block         */
-    uint maxnextoffset;                 /* largest valid nextoffset      */
-};
-
-typedef struct pool_header *poolp;
-
-/* Record keeping for arenas. */
-struct arena_object {
-    /* The address of the arena, as returned by malloc.  Note that 0
-     * will never be returned by a successful malloc, and is used
-     * here to mark an arena_object that doesn't correspond to an
-     * allocated arena.
-     */
-    uintptr_t address;
-
-    /* Pool-aligned pointer to the next pool to be carved off. */
-    block* pool_address;
-
-    /* The number of available pools in the arena:  free pools + never-
-     * allocated pools.
-     */
-    uint nfreepools;
-
-    /* The total number of pools in the arena, whether or not available. */
-    uint ntotalpools;
-
-    /* Singly-linked list of available pools. */
-    struct pool_header* freepools;
-
-    /* Whenever this arena_object is not associated with an allocated
-     * arena, the nextarena member is used to link all unassociated
-     * arena_objects in the singly-linked `unused_arena_objects` list.
-     * The prevarena member is unused in this case.
-     *
-     * When this arena_object is associated with an allocated arena
-     * with at least one available pool, both members are used in the
-     * doubly-linked `usable_arenas` list, which is maintained in
-     * increasing order of `nfreepools` values.
-     *
-     * Else this arena_object is associated with an allocated arena
-     * all of whose pools are in use.  `nextarena` and `prevarena`
-     * are both meaningless in this case.
-     */
-    struct arena_object* nextarena;
-    struct arena_object* prevarena;
-};
-
-#define POOL_OVERHEAD   _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
-
-#define DUMMY_SIZE_IDX          0xffff  /* size class of newly cached pools */
-
-/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
-#define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
-
-/* Return total number of blocks in pool of size index I, as a uint. */
-#define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
-
-/*==========================================================================*/
-
-/*
- * This malloc lock
- */
-SIMPLELOCK_DECL(_malloc_lock)
-#define LOCK()          SIMPLELOCK_LOCK(_malloc_lock)
-#define UNLOCK()        SIMPLELOCK_UNLOCK(_malloc_lock)
-#define LOCK_INIT()     SIMPLELOCK_INIT(_malloc_lock)
-#define LOCK_FINI()     SIMPLELOCK_FINI(_malloc_lock)
-
-/*
- * Pool table -- headed, circular, doubly-linked lists of partially used pools.
-
-This is involved.  For an index i, usedpools[i+i] is the header for a list of
-all partially used pools holding small blocks with "size class idx" i. So
-usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
-16, and so on:  index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
-
-Pools are carved off an arena's highwater mark (an arena_object's pool_address
-member) as needed.  Once carved off, a pool is in one of three states forever
-after:
-
-used == partially used, neither empty nor full
-    At least one block in the pool is currently allocated, and at least one
-    block in the pool is not currently allocated (note this implies a pool
-    has room for at least two blocks).
-    This is a pool's initial state, as a pool is created only when malloc
-    needs space.
-    The pool holds blocks of a fixed size, and is in the circular list headed
-    at usedpools[i] (see above).  It's linked to the other used pools of the
-    same size class via the pool_header's nextpool and prevpool members.
-    If all but one block is currently allocated, a malloc can cause a
-    transition to the full state.  If all but one block is not currently
-    allocated, a free can cause a transition to the empty state.
-
-full == all the pool's blocks are currently allocated
-    On transition to full, a pool is unlinked from its usedpools[] list.
-    It's not linked to from anything then anymore, and its nextpool and
-    prevpool members are meaningless until it transitions back to used.
-    A free of a block in a full pool puts the pool back in the used state.
-    Then it's linked in at the front of the appropriate usedpools[] list, so
-    that the next allocation for its size class will reuse the freed block.
-
-empty == all the pool's blocks are currently available for allocation
-    On transition to empty, a pool is unlinked from its usedpools[] list,
-    and linked to the front of its arena_object's singly-linked freepools list,
-    via its nextpool member.  The prevpool member has no meaning in this case.
-    Empty pools have no inherent size class:  the next time a malloc finds
-    an empty list in usedpools[], it takes the first pool off of freepools.
-    If the size class needed happens to be the same as the size class the pool
-    last had, some pool initialization can be skipped.
-
-
-Block Management
-
-Blocks within pools are again carved out as needed.  pool->freeblock points to
-the start of a singly-linked list of free blocks within the pool.  When a
-block is freed, it's inserted at the front of its pool's freeblock list.  Note
-that the available blocks in a pool are *not* linked all together when a pool
-is initialized.  Instead only "the first two" (lowest addresses) blocks are
-set up, returning the first such block, and setting pool->freeblock to a
-one-block list holding the second such block.  This is consistent with that
-pymalloc strives at all levels (arena, pool, and block) never to touch a piece
-of memory until it's actually needed.
-
-So long as a pool is in the used state, we're certain there *is* a block
-available for allocating, and pool->freeblock is not NULL.  If pool->freeblock
-points to the end of the free list before we've carved the entire pool into
-blocks, that means we simply haven't yet gotten to one of the higher-address
-blocks.  The offset from the pool_header to the start of "the next" virgin
-block is stored in the pool_header nextoffset member, and the largest value
-of nextoffset that makes sense is stored in the maxnextoffset member when a
-pool is initialized.  All the blocks in a pool have been passed out at least
-once when and only when nextoffset > maxnextoffset.
-
-
-Major obscurity:  While the usedpools vector is declared to have poolp
-entries, it doesn't really.  It really contains two pointers per (conceptual)
-poolp entry, the nextpool and prevpool members of a pool_header.  The
-excruciating initialization code below fools C so that
-
-    usedpool[i+i]
-
-"acts like" a genuine poolp, but only so long as you only reference its
-nextpool and prevpool members.  The "- 2*sizeof(block *)" gibberish is
-compensating for that a pool_header's nextpool and prevpool members
-immediately follow a pool_header's first two members:
-
-    union { block *_padding;
-            uint count; } ref;
-    block *freeblock;
-
-each of which consume sizeof(block *) bytes.  So what usedpools[i+i] really
-contains is a fudged-up pointer p such that *if* C believes it's a poolp
-pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
-circular list is empty).
-
-It's unclear why the usedpools setup is so convoluted.  It could be to
-minimize the amount of cache required to hold this heavily-referenced table
-(which only *needs* the two interpool pointer members of a pool_header). OTOH,
-referencing code has to remember to "double the index" and doing so isn't
-free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
-on that C doesn't insert any padding anywhere in a pool_header at or before
-the prevpool member.
-**************************************************************************** */
-
-#define PTA(x)  ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
-#define PT(x)   PTA(x), PTA(x)
-
-static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
-    PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
-#if NB_SMALL_SIZE_CLASSES > 8
-    , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
-#if NB_SMALL_SIZE_CLASSES > 16
-    , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
-#if NB_SMALL_SIZE_CLASSES > 24
-    , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
-#if NB_SMALL_SIZE_CLASSES > 32
-    , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
-#if NB_SMALL_SIZE_CLASSES > 40
-    , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
-#if NB_SMALL_SIZE_CLASSES > 48
-    , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
-#if NB_SMALL_SIZE_CLASSES > 56
-    , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
-#if NB_SMALL_SIZE_CLASSES > 64
-#error "NB_SMALL_SIZE_CLASSES should be less than 64"
-#endif /* NB_SMALL_SIZE_CLASSES > 64 */
-#endif /* NB_SMALL_SIZE_CLASSES > 56 */
-#endif /* NB_SMALL_SIZE_CLASSES > 48 */
-#endif /* NB_SMALL_SIZE_CLASSES > 40 */
-#endif /* NB_SMALL_SIZE_CLASSES > 32 */
-#endif /* NB_SMALL_SIZE_CLASSES > 24 */
-#endif /* NB_SMALL_SIZE_CLASSES > 16 */
-#endif /* NB_SMALL_SIZE_CLASSES >  8 */
-};
-
-/*==========================================================================
-Arena management.
-
-`arenas` is a vector of arena_objects.  It contains maxarenas entries, some of
-which may not be currently used (== they're arena_objects that aren't
-currently associated with an allocated arena).  Note that arenas proper are
-separately malloc'ed.
-
-Prior to Python 2.5, arenas were never free()'ed.  Starting with Python 2.5,
-we do try to free() arenas, and use some mild heuristic strategies to increase
-the likelihood that arenas eventually can be freed.
-
-unused_arena_objects
-
-    This is a singly-linked list of the arena_objects that are currently not
-    being used (no arena is associated with them).  Objects are taken off the
-    head of the list in new_arena(), and are pushed on the head of the list in
-    PyObject_Free() when the arena is empty.  Key invariant:  an arena_object
-    is on this list if and only if its .address member is 0.
-
-usable_arenas
-
-    This is a doubly-linked list of the arena_objects associated with arenas
-    that have pools available.  These pools are either waiting to be reused,
-    or have not been used before.  The list is sorted to have the most-
-    allocated arenas first (ascending order based on the nfreepools member).
-    This means that the next allocation will come from a heavily used arena,
-    which gives the nearly empty arenas a chance to be returned to the system.
-    In my unscientific tests this dramatically improved the number of arenas
-    that could be freed.
-
-Note that an arena_object associated with an arena all of whose pools are
-currently in use isn't on either list.
-*/
-
-/* Array of objects used to track chunks of memory (arenas). */
-static struct arena_object* arenas = NULL;
-/* Number of slots currently allocated in the `arenas` vector. */
-static uint maxarenas = 0;
-
-/* The head of the singly-linked, NULL-terminated list of available
- * arena_objects.
- */
-static struct arena_object* unused_arena_objects = NULL;
-
-/* The head of the doubly-linked, NULL-terminated at each end, list of
- * arena_objects associated with arenas that have pools available.
- */
-static struct arena_object* usable_arenas = NULL;
-
-/* How many arena_objects do we initially allocate?
- * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
- * `arenas` vector.
- */
-#define INITIAL_ARENA_OBJECTS 16
-
-/* Number of arenas allocated that haven't been free()'d. */
-static size_t narenas_currently_allocated = 0;
-
-/* Total number of times malloc() called to allocate an arena. */
-static size_t ntimes_arena_allocated = 0;
-/* High water mark (max value ever seen) for narenas_currently_allocated. */
-static size_t narenas_highwater = 0;
-
-static Py_ssize_t _Py_AllocatedBlocks = 0;
-
 Py_ssize_t
 _Py_GetAllocatedBlocks(void)
 {
-    return _Py_AllocatedBlocks;
+    return _PyRuntime.mem.num_allocated_blocks;
 }
 
 
@@ -1035,7 +583,7 @@ new_arena(void)
     if (debug_stats)
         _PyObject_DebugMallocStats(stderr);
 
-    if (unused_arena_objects == NULL) {
+    if (_PyRuntime.mem.unused_arena_objects == NULL) {
         uint i;
         uint numarenas;
         size_t nbytes;
@@ -1043,18 +591,18 @@ new_arena(void)
         /* Double the number of arena objects on each allocation.
          * Note that it's possible for `numarenas` to overflow.
          */
-        numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
-        if (numarenas <= maxarenas)
+        numarenas = _PyRuntime.mem.maxarenas ? _PyRuntime.mem.maxarenas << 1 : INITIAL_ARENA_OBJECTS;
+        if (numarenas <= _PyRuntime.mem.maxarenas)
             return NULL;                /* overflow */
 #if SIZEOF_SIZE_T <= SIZEOF_INT
-        if (numarenas > SIZE_MAX / sizeof(*arenas))
+        if (numarenas > SIZE_MAX / sizeof(*_PyRuntime.mem.arenas))
             return NULL;                /* overflow */
 #endif
-        nbytes = numarenas * sizeof(*arenas);
-        arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
+        nbytes = numarenas * sizeof(*_PyRuntime.mem.arenas);
+        arenaobj = (struct arena_object *)PyMem_RawRealloc(_PyRuntime.mem.arenas, nbytes);
         if (arenaobj == NULL)
             return NULL;
-        arenas = arenaobj;
+        _PyRuntime.mem.arenas = arenaobj;
 
         /* We might need to fix pointers that were copied.  However,
          * new_arena only gets called when all the pages in the
@@ -1062,45 +610,45 @@ new_arena(void)
          * into the old array. Thus, we don't have to worry about
          * invalid pointers.  Just to be sure, some asserts:
          */
-        assert(usable_arenas == NULL);
-        assert(unused_arena_objects == NULL);
+        assert(_PyRuntime.mem.usable_arenas == NULL);
+        assert(_PyRuntime.mem.unused_arena_objects == NULL);
 
         /* Put the new arenas on the unused_arena_objects list. */
-        for (i = maxarenas; i < numarenas; ++i) {
-            arenas[i].address = 0;              /* mark as unassociated */
-            arenas[i].nextarena = i < numarenas - 1 ?
-                                   &arenas[i+1] : NULL;
+        for (i = _PyRuntime.mem.maxarenas; i < numarenas; ++i) {
+            _PyRuntime.mem.arenas[i].address = 0;              /* mark as unassociated */
+            _PyRuntime.mem.arenas[i].nextarena = i < numarenas - 1 ?
+                                   &_PyRuntime.mem.arenas[i+1] : NULL;
         }
 
         /* Update globals. */
-        unused_arena_objects = &arenas[maxarenas];
-        maxarenas = numarenas;
+        _PyRuntime.mem.unused_arena_objects = &_PyRuntime.mem.arenas[_PyRuntime.mem.maxarenas];
+        _PyRuntime.mem.maxarenas = numarenas;
     }
 
     /* Take the next available arena object off the head of the list. */
-    assert(unused_arena_objects != NULL);
-    arenaobj = unused_arena_objects;
-    unused_arena_objects = arenaobj->nextarena;
+    assert(_PyRuntime.mem.unused_arena_objects != NULL);
+    arenaobj = _PyRuntime.mem.unused_arena_objects;
+    _PyRuntime.mem.unused_arena_objects = arenaobj->nextarena;
     assert(arenaobj->address == 0);
-    address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
+    address = _PyRuntime.obj.allocator_arenas.alloc(_PyRuntime.obj.allocator_arenas.ctx, ARENA_SIZE);
     if (address == NULL) {
         /* The allocation failed: return NULL after putting the
          * arenaobj back.
          */
-        arenaobj->nextarena = unused_arena_objects;
-        unused_arena_objects = arenaobj;
+        arenaobj->nextarena = _PyRuntime.mem.unused_arena_objects;
+        _PyRuntime.mem.unused_arena_objects = arenaobj;
         return NULL;
     }
     arenaobj->address = (uintptr_t)address;
 
-    ++narenas_currently_allocated;
-    ++ntimes_arena_allocated;
-    if (narenas_currently_allocated > narenas_highwater)
-        narenas_highwater = narenas_currently_allocated;
+    ++_PyRuntime.mem.narenas_currently_allocated;
+    ++_PyRuntime.mem.ntimes_arena_allocated;
+    if (_PyRuntime.mem.narenas_currently_allocated > _PyRuntime.mem.narenas_highwater)
+        _PyRuntime.mem.narenas_highwater = _PyRuntime.mem.narenas_currently_allocated;
     arenaobj->freepools = NULL;
     /* pool_address <- first pool-aligned address in the arena
        nfreepools <- number of whole pools that fit after alignment */
-    arenaobj->pool_address = (block*)arenaobj->address;
+    arenaobj->pool_address = (pyblock*)arenaobj->address;
     arenaobj->nfreepools = ARENA_SIZE / POOL_SIZE;
     assert(POOL_SIZE * arenaobj->nfreepools == ARENA_SIZE);
     excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
@@ -1197,9 +745,9 @@ address_in_range(void *p, poolp pool)
     // the GIL. The following dance forces the compiler to read pool->arenaindex
     // only once.
     uint arenaindex = *((volatile uint *)&pool->arenaindex);
-    return arenaindex < maxarenas &&
-        (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
-        arenas[arenaindex].address != 0;
+    return arenaindex < _PyRuntime.mem.maxarenas &&
+        (uintptr_t)p - _PyRuntime.mem.arenas[arenaindex].address < ARENA_SIZE &&
+        _PyRuntime.mem.arenas[arenaindex].address != 0;
 }
 
 /*==========================================================================*/
@@ -1220,12 +768,12 @@ static void *
 _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
 {
     size_t nbytes;
-    block *bp;
+    pyblock *bp;
     poolp pool;
     poolp next;
     uint size;
 
-    _Py_AllocatedBlocks++;
+    _PyRuntime.mem.num_allocated_blocks++;
 
     assert(elsize == 0 || nelem <= PY_SSIZE_T_MAX / elsize);
     nbytes = nelem * elsize;
@@ -1246,7 +794,7 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
          * Most frequent paths first
          */
         size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
-        pool = usedpools[size + size];
+        pool = _PyRuntime.mem.usedpools[size + size];
         if (pool != pool->nextpool) {
             /*
              * There is a used pool for this size class.
@@ -1255,7 +803,7 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
             ++pool->ref.count;
             bp = pool->freeblock;
             assert(bp != NULL);
-            if ((pool->freeblock = *(block **)bp) != NULL) {
+            if ((pool->freeblock = *(pyblock **)bp) != NULL) {
                 UNLOCK();
                 if (use_calloc)
                     memset(bp, 0, nbytes);
@@ -1266,10 +814,10 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
              */
             if (pool->nextoffset <= pool->maxnextoffset) {
                 /* There is room for another block. */
-                pool->freeblock = (block*)pool +
+                pool->freeblock = (pyblock*)pool +
                                   pool->nextoffset;
                 pool->nextoffset += INDEX2SIZE(size);
-                *(block **)(pool->freeblock) = NULL;
+                *(pyblock **)(pool->freeblock) = NULL;
                 UNLOCK();
                 if (use_calloc)
                     memset(bp, 0, nbytes);
@@ -1289,29 +837,29 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
         /* There isn't a pool of the right size class immediately
          * available:  use a free pool.
          */
-        if (usable_arenas == NULL) {
+        if (_PyRuntime.mem.usable_arenas == NULL) {
             /* No arena has a free pool:  allocate a new arena. */
 #ifdef WITH_MEMORY_LIMITS
-            if (narenas_currently_allocated >= MAX_ARENAS) {
+            if (_PyRuntime.mem.narenas_currently_allocated >= MAX_ARENAS) {
                 UNLOCK();
                 goto redirect;
             }
 #endif
-            usable_arenas = new_arena();
-            if (usable_arenas == NULL) {
+            _PyRuntime.mem.usable_arenas = new_arena();
+            if (_PyRuntime.mem.usable_arenas == NULL) {
                 UNLOCK();
                 goto redirect;
             }
-            usable_arenas->nextarena =
-                usable_arenas->prevarena = NULL;
+            _PyRuntime.mem.usable_arenas->nextarena =
+                _PyRuntime.mem.usable_arenas->prevarena = NULL;
         }
-        assert(usable_arenas->address != 0);
+        assert(_PyRuntime.mem.usable_arenas->address != 0);
 
         /* Try to get a cached free pool. */
-        pool = usable_arenas->freepools;
+        pool = _PyRuntime.mem.usable_arenas->freepools;
         if (pool != NULL) {
             /* Unlink from cached pools. */
-            usable_arenas->freepools = pool->nextpool;
+            _PyRuntime.mem.usable_arenas->freepools = pool->nextpool;
 
             /* This arena already had the smallest nfreepools
              * value, so decreasing nfreepools doesn't change
@@ -1320,18 +868,18 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
              * become wholly allocated, we need to remove its
              * arena_object from usable_arenas.
              */
-            --usable_arenas->nfreepools;
-            if (usable_arenas->nfreepools == 0) {
+            --_PyRuntime.mem.usable_arenas->nfreepools;
+            if (_PyRuntime.mem.usable_arenas->nfreepools == 0) {
                 /* Wholly allocated:  remove. */
-                assert(usable_arenas->freepools == NULL);
-                assert(usable_arenas->nextarena == NULL ||
-                       usable_arenas->nextarena->prevarena ==
-                       usable_arenas);
+                assert(_PyRuntime.mem.usable_arenas->freepools == NULL);
+                assert(_PyRuntime.mem.usable_arenas->nextarena == NULL ||
+                       _PyRuntime.mem.usable_arenas->nextarena->prevarena ==
+                       _PyRuntime.mem.usable_arenas);
 
-                usable_arenas = usable_arenas->nextarena;
-                if (usable_arenas != NULL) {
-                    usable_arenas->prevarena = NULL;
-                    assert(usable_arenas->address != 0);
+                _PyRuntime.mem.usable_arenas = _PyRuntime.mem.usable_arenas->nextarena;
+                if (_PyRuntime.mem.usable_arenas != NULL) {
+                    _PyRuntime.mem.usable_arenas->prevarena = NULL;
+                    assert(_PyRuntime.mem.usable_arenas->address != 0);
                 }
             }
             else {
@@ -1340,14 +888,14 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
                  * off all the arena's pools for the first
                  * time.
                  */
-                assert(usable_arenas->freepools != NULL ||
-                       usable_arenas->pool_address <=
-                       (block*)usable_arenas->address +
+                assert(_PyRuntime.mem.usable_arenas->freepools != NULL ||
+                       _PyRuntime.mem.usable_arenas->pool_address <=
+                       (pyblock*)_PyRuntime.mem.usable_arenas->address +
                            ARENA_SIZE - POOL_SIZE);
             }
         init_pool:
             /* Frontlink to used pools. */
-            next = usedpools[size + size]; /* == prev */
+            next = _PyRuntime.mem.usedpools[size + size]; /* == prev */
             pool->nextpool = next;
             pool->prevpool = next;
             next->nextpool = pool;
@@ -1360,7 +908,7 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
                  */
                 bp = pool->freeblock;
                 assert(bp != NULL);
-                pool->freeblock = *(block **)bp;
+                pool->freeblock = *(pyblock **)bp;
                 UNLOCK();
                 if (use_calloc)
                     memset(bp, 0, nbytes);
@@ -1373,11 +921,11 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
              */
             pool->szidx = size;
             size = INDEX2SIZE(size);
-            bp = (block *)pool + POOL_OVERHEAD;
+            bp = (pyblock *)pool + POOL_OVERHEAD;
             pool->nextoffset = POOL_OVERHEAD + (size << 1);
             pool->maxnextoffset = POOL_SIZE - size;
             pool->freeblock = bp + size;
-            *(block **)(pool->freeblock) = NULL;
+            *(pyblock **)(pool->freeblock) = NULL;
             UNLOCK();
             if (use_calloc)
                 memset(bp, 0, nbytes);
@@ -1385,26 +933,26 @@ _PyObject_Alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize)
         }
 
         /* Carve off a new pool. */
-        assert(usable_arenas->nfreepools > 0);
-        assert(usable_arenas->freepools == NULL);
-        pool = (poolp)usable_arenas->pool_address;
-        assert((block*)pool <= (block*)usable_arenas->address +
-                               ARENA_SIZE - POOL_SIZE);
-        pool->arenaindex = (uint)(usable_arenas - arenas);
-        assert(&arenas[pool->arenaindex] == usable_arenas);
+        assert(_PyRuntime.mem.usable_arenas->nfreepools > 0);
+        assert(_PyRuntime.mem.usable_arenas->freepools == NULL);
+        pool = (poolp)_PyRuntime.mem.usable_arenas->pool_address;
+        assert((pyblock*)pool <= (pyblock*)_PyRuntime.mem.usable_arenas->address +
+                                 ARENA_SIZE - POOL_SIZE);
+        pool->arenaindex = (uint)(_PyRuntime.mem.usable_arenas - _PyRuntime.mem.arenas);
+        assert(&_PyRuntime.mem.arenas[pool->arenaindex] == _PyRuntime.mem.usable_arenas);
         pool->szidx = DUMMY_SIZE_IDX;
-        usable_arenas->pool_address += POOL_SIZE;
-        --usable_arenas->nfreepools;
+        _PyRuntime.mem.usable_arenas->pool_address += POOL_SIZE;
+        --_PyRuntime.mem.usable_arenas->nfreepools;
 
-        if (usable_arenas->nfreepools == 0) {
-            assert(usable_arenas->nextarena == NULL ||
-                   usable_arenas->nextarena->prevarena ==
-                   usable_arenas);
+        if (_PyRuntime.mem.usable_arenas->nfreepools == 0) {
+            assert(_PyRuntime.mem.usable_arenas->nextarena == NULL ||
+                   _PyRuntime.mem.usable_arenas->nextarena->prevarena ==
+                   _PyRuntime.mem.usable_arenas);
             /* Unlink the arena:  it is completely allocated. */
-            usable_arenas = usable_arenas->nextarena;
-            if (usable_arenas != NULL) {
-                usable_arenas->prevarena = NULL;
-                assert(usable_arenas->address != 0);
+            _PyRuntime.mem.usable_arenas = _PyRuntime.mem.usable_arenas->nextarena;
+            if (_PyRuntime.mem.usable_arenas != NULL) {
+                _PyRuntime.mem.usable_arenas->prevarena = NULL;
+                assert(_PyRuntime.mem.usable_arenas->address != 0);
             }
         }
 
@@ -1426,7 +974,7 @@ redirect:
         else
             result = PyMem_RawMalloc(nbytes);
         if (!result)
-            _Py_AllocatedBlocks--;
+            _PyRuntime.mem.num_allocated_blocks--;
         return result;
     }
 }
@@ -1449,14 +997,14 @@ static void
 _PyObject_Free(void *ctx, void *p)
 {
     poolp pool;
-    block *lastfree;
+    pyblock *lastfree;
     poolp next, prev;
     uint size;
 
     if (p == NULL)      /* free(NULL) has no effect */
         return;
 
-    _Py_AllocatedBlocks--;
+    _PyRuntime.mem.num_allocated_blocks--;
 
 #ifdef WITH_VALGRIND
     if (UNLIKELY(running_on_valgrind > 0))
@@ -1474,8 +1022,8 @@ _PyObject_Free(void *ctx, void *p)
          * list in any case).
          */
         assert(pool->ref.count > 0);            /* else it was empty */
-        *(block **)p = lastfree = pool->freeblock;
-        pool->freeblock = (block *)p;
+        *(pyblock **)p = lastfree = pool->freeblock;
+        pool->freeblock = (pyblock *)p;
         if (lastfree) {
             struct arena_object* ao;
             uint nf;  /* ao->nfreepools */
@@ -1501,7 +1049,7 @@ _PyObject_Free(void *ctx, void *p)
             /* Link the pool to freepools.  This is a singly-linked
              * list, and pool->prevpool isn't used there.
              */
-            ao = &arenas[pool->arenaindex];
+            ao = &_PyRuntime.mem.arenas[pool->arenaindex];
             pool->nextpool = ao->freepools;
             ao->freepools = pool;
             nf = ++ao->nfreepools;
@@ -1530,9 +1078,9 @@ _PyObject_Free(void *ctx, void *p)
                  * usable_arenas pointer.
                  */
                 if (ao->prevarena == NULL) {
-                    usable_arenas = ao->nextarena;
-                    assert(usable_arenas == NULL ||
-                           usable_arenas->address != 0);
+                    _PyRuntime.mem.usable_arenas = ao->nextarena;
+                    assert(_PyRuntime.mem.usable_arenas == NULL ||
+                           _PyRuntime.mem.usable_arenas->address != 0);
                 }
                 else {
                     assert(ao->prevarena->nextarena == ao);
@@ -1548,14 +1096,14 @@ _PyObject_Free(void *ctx, void *p)
                 /* Record that this arena_object slot is
                  * available to be reused.
                  */
-                ao->nextarena = unused_arena_objects;
-                unused_arena_objects = ao;
+                ao->nextarena = _PyRuntime.mem.unused_arena_objects;
+                _PyRuntime.mem.unused_arena_objects = ao;
 
                 /* Free the entire arena. */
-                _PyObject_Arena.free(_PyObject_Arena.ctx,
+                _PyRuntime.obj.allocator_arenas.free(_PyRuntime.obj.allocator_arenas.ctx,
                                      (void *)ao->address, ARENA_SIZE);
                 ao->address = 0;                        /* mark unassociated */
-                --narenas_currently_allocated;
+                --_PyRuntime.mem.narenas_currently_allocated;
 
                 UNLOCK();
                 return;
@@ -1566,12 +1114,12 @@ _PyObject_Free(void *ctx, void *p)
                  * ao->nfreepools was 0 before, ao isn't
                  * currently on the usable_arenas list.
                  */
-                ao->nextarena = usable_arenas;
+                ao->nextarena = _PyRuntime.mem.usable_arenas;
                 ao->prevarena = NULL;
-                if (usable_arenas)
-                    usable_arenas->prevarena = ao;
-                usable_arenas = ao;
-                assert(usable_arenas->address != 0);
+                if (_PyRuntime.mem.usable_arenas)
+                    _PyRuntime.mem.usable_arenas->prevarena = ao;
+                _PyRuntime.mem.usable_arenas = ao;
+                assert(_PyRuntime.mem.usable_arenas->address != 0);
 
                 UNLOCK();
                 return;
@@ -1601,8 +1149,8 @@ _PyObject_Free(void *ctx, void *p)
             }
             else {
                 /* ao is at the head of the list */
-                assert(usable_arenas == ao);
-                usable_arenas = ao->nextarena;
+                assert(_PyRuntime.mem.usable_arenas == ao);
+                _PyRuntime.mem.usable_arenas = ao->nextarena;
             }
             ao->nextarena->prevarena = ao->prevarena;
 
@@ -1631,7 +1179,7 @@ _PyObject_Free(void *ctx, void *p)
                       nf > ao->prevarena->nfreepools);
             assert(ao->nextarena == NULL ||
                 ao->nextarena->prevarena == ao);
-            assert((usable_arenas == ao &&
+            assert((_PyRuntime.mem.usable_arenas == ao &&
                 ao->prevarena == NULL) ||
                 ao->prevarena->nextarena == ao);
 
@@ -1647,7 +1195,7 @@ _PyObject_Free(void *ctx, void *p)
         --pool->ref.count;
         assert(pool->ref.count > 0);            /* else the pool is empty */
         size = pool->szidx;
-        next = usedpools[size + size];
+        next = _PyRuntime.mem.usedpools[size + size];
         prev = next->prevpool;
         /* insert pool before next:   prev <-> pool <-> next */
         pool->nextpool = next;
@@ -1769,15 +1317,13 @@ _Py_GetAllocatedBlocks(void)
 #define DEADBYTE       0xDB    /* dead (newly freed) memory */
 #define FORBIDDENBYTE  0xFB    /* untouchable bytes at each end of a block */
 
-static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
-
 /* serialno is always incremented via calling this routine.  The point is
  * to supply a single place to set a breakpoint.
  */
 static void
 bumpserialno(void)
 {
-    ++serialno;
+    ++_PyRuntime.mem.serialno;
 }
 
 #define SST SIZEOF_SIZE_T
@@ -1868,7 +1414,7 @@ _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
     /* at tail, write pad (SST bytes) and serialno (SST bytes) */
     tail = p + 2*SST + nbytes;
     memset(tail, FORBIDDENBYTE, SST);
-    write_size_t(tail + SST, serialno);
+    write_size_t(tail + SST, _PyRuntime.mem.serialno);
 
     return p + 2*SST;
 }
@@ -1953,7 +1499,7 @@ _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
 
     tail = q + nbytes;
     memset(tail, FORBIDDENBYTE, SST);
-    write_size_t(tail + SST, serialno);
+    write_size_t(tail + SST, _PyRuntime.mem.serialno);
 
     if (nbytes > original_nbytes) {
         /* growing:  mark new extra memory clean */
@@ -2283,16 +1829,16 @@ _PyObject_DebugMallocStats(FILE *out)
      * to march over all the arenas.  If we're lucky, most of the memory
      * will be living in full pools -- would be a shame to miss them.
      */
-    for (i = 0; i < maxarenas; ++i) {
+    for (i = 0; i < _PyRuntime.mem.maxarenas; ++i) {
         uint j;
-        uintptr_t base = arenas[i].address;
+        uintptr_t base = _PyRuntime.mem.arenas[i].address;
 
         /* Skip arenas which are not allocated. */
-        if (arenas[i].address == (uintptr_t)NULL)
+        if (_PyRuntime.mem.arenas[i].address == (uintptr_t)NULL)
             continue;
         narenas += 1;
 
-        numfreepools += arenas[i].nfreepools;
+        numfreepools += _PyRuntime.mem.arenas[i].nfreepools;
 
         /* round up to pool alignment */
         if (base & (uintptr_t)POOL_SIZE_MASK) {
@@ -2302,8 +1848,8 @@ _PyObject_DebugMallocStats(FILE *out)
         }
 
         /* visit every pool in the arena */
-        assert(base <= (uintptr_t) arenas[i].pool_address);
-        for (j = 0; base < (uintptr_t) arenas[i].pool_address;
+        assert(base <= (uintptr_t) _PyRuntime.mem.arenas[i].pool_address);
+        for (j = 0; base < (uintptr_t) _PyRuntime.mem.arenas[i].pool_address;
              ++j, base += POOL_SIZE) {
             poolp p = (poolp)base;
             const uint sz = p->szidx;
@@ -2312,7 +1858,7 @@ _PyObject_DebugMallocStats(FILE *out)
             if (p->ref.count == 0) {
                 /* currently unused */
 #ifdef Py_DEBUG
-                assert(pool_is_in_list(p, arenas[i].freepools));
+                assert(pool_is_in_list(p, _PyRuntime.mem.arenas[i].freepools));
 #endif
                 continue;
             }
@@ -2322,11 +1868,11 @@ _PyObject_DebugMallocStats(FILE *out)
             numfreeblocks[sz] += freeblocks;
 #ifdef Py_DEBUG
             if (freeblocks > 0)
-                assert(pool_is_in_list(p, usedpools[sz + sz]));
+                assert(pool_is_in_list(p, _PyRuntime.mem.usedpools[sz + sz]));
 #endif
         }
     }
-    assert(narenas == narenas_currently_allocated);
+    assert(narenas == _PyRuntime.mem.narenas_currently_allocated);
 
     fputc('\n', out);
     fputs("class   size   num pools   blocks in use  avail blocks\n"
@@ -2354,10 +1900,10 @@ _PyObject_DebugMallocStats(FILE *out)
     }
     fputc('\n', out);
     if (_PyMem_DebugEnabled())
-        (void)printone(out, "# times object malloc called", serialno);
-    (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
-    (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
-    (void)printone(out, "# arenas highwater mark", narenas_highwater);
+        (void)printone(out, "# times object malloc called", _PyRuntime.mem.serialno);
+    (void)printone(out, "# arenas allocated total", _PyRuntime.mem.ntimes_arena_allocated);
+    (void)printone(out, "# arenas reclaimed", _PyRuntime.mem.ntimes_arena_allocated - narenas);
+    (void)printone(out, "# arenas highwater mark", _PyRuntime.mem.narenas_highwater);
     (void)printone(out, "# arenas allocated current", narenas);
 
     PyOS_snprintf(buf, sizeof(buf),
diff --git a/Objects/odictobject.c b/Objects/odictobject.c
index e1ee53beae6..8ad8f384c41 100644
--- a/Objects/odictobject.c
+++ b/Objects/odictobject.c
@@ -470,6 +470,7 @@ later:
 */
 
 #include "Python.h"
+#include "internal/pystate.h"
 #include "structmember.h"
 #include "dict-common.h"
 #include <stddef.h>
diff --git a/Objects/setobject.c b/Objects/setobject.c
index 219e81d0baf..8772e58f0a0 100644
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -32,6 +32,7 @@
 */
 
 #include "Python.h"
+#include "internal/pystate.h"
 #include "structmember.h"
 
 /* Object used as dummy key to fill deleted entries */
diff --git a/Objects/sliceobject.c b/Objects/sliceobject.c
index b5aabfd9d7a..59f084d1a61 100644
--- a/Objects/sliceobject.c
+++ b/Objects/sliceobject.c
@@ -14,6 +14,8 @@ this type and there is exactly one in existence.
 */
 
 #include "Python.h"
+#include "internal/mem.h"
+#include "internal/pystate.h"
 #include "structmember.h"
 
 static PyObject *
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
index 2a904904993..c150af8ed1a 100644
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -2,6 +2,7 @@
 /* Tuple object implementation */
 
 #include "Python.h"
+#include "internal/pystate.h"
 #include "accu.h"
 
 /*[clinic input]
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 1d963aae3f8..b2154bbf690 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -1,6 +1,7 @@
 /* Type object implementation */
 
 #include "Python.h"
+#include "internal/pystate.h"
 #include "frameobject.h"
 #include "structmember.h"
 
@@ -1157,10 +1158,10 @@ subtype_dealloc(PyObject *self)
     /* UnTrack and re-Track around the trashcan macro, alas */
     /* See explanation at end of function for full disclosure */
     PyObject_GC_UnTrack(self);
-    ++_PyTrash_delete_nesting;
+    ++_PyRuntime.gc.trash_delete_nesting;
     ++ tstate->trash_delete_nesting;
     Py_TRASHCAN_SAFE_BEGIN(self);
-    --_PyTrash_delete_nesting;
+    --_PyRuntime.gc.trash_delete_nesting;
     -- tstate->trash_delete_nesting;
 
     /* Find the nearest base with a different tp_dealloc */
@@ -1254,10 +1255,10 @@ subtype_dealloc(PyObject *self)
       Py_DECREF(type);
 
   endlabel:
-    ++_PyTrash_delete_nesting;
+    ++_PyRuntime.gc.trash_delete_nesting;
     ++ tstate->trash_delete_nesting;
     Py_TRASHCAN_SAFE_END(self);
-    --_PyTrash_delete_nesting;
+    --_PyRuntime.gc.trash_delete_nesting;
     -- tstate->trash_delete_nesting;
 
     /* Explanation of the weirdness around the trashcan macros:
@@ -1297,7 +1298,7 @@ subtype_dealloc(PyObject *self)
           a subtle disaster.
 
        Q. Why the bizarre (net-zero) manipulation of
-          _PyTrash_delete_nesting around the trashcan macros?
+          _PyRuntime.trash_delete_nesting around the trashcan macros?
 
        A. Some base classes (e.g. list) also use the trashcan mechanism.
           The following scenario used to be possible:
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index f7b2aa6c3df..db1516ddf65 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -40,6 +40,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
+#include "internal/pystate.h"
 #include "ucnhash.h"
 #include "bytes_methods.h"
 #include "stringlib/eq.h"
diff --git a/PC/pyconfig.h b/PC/pyconfig.h
index 0da68418cc0..4e25fbd973d 100644
--- a/PC/pyconfig.h
+++ b/PC/pyconfig.h
@@ -278,18 +278,20 @@ Py_NO_ENABLE_SHARED to find out.  Also support MS_NO_COREDLL for b/w compat */
 /* For an MSVC DLL, we can nominate the .lib files used by extensions */
 #ifdef MS_COREDLL
 #       ifndef Py_BUILD_CORE /* not building the core - must be an ext */
-#               if defined(_MSC_VER)
-                        /* So MSVC users need not specify the .lib file in
-                        their Makefile (other compilers are generally
-                        taken care of by distutils.) */
-#                       if defined(_DEBUG)
-#                               pragma comment(lib,"python37_d.lib")
-#                       elif defined(Py_LIMITED_API)
-#                               pragma comment(lib,"python3.lib")
-#                       else
-#                               pragma comment(lib,"python37.lib")
-#                       endif /* _DEBUG */
-#               endif /* _MSC_VER */
+#               ifndef Py_BUILD_CORE_MODULE
+#                       if defined(_MSC_VER)
+                                /* So MSVC users need not specify the .lib
+                                file in their Makefile (other compilers are
+                                generally taken care of by distutils.) */
+#                               if defined(_DEBUG)
+#                                       pragma comment(lib,"python37_d.lib")
+#                               elif defined(Py_LIMITED_API)
+#                                       pragma comment(lib,"python3.lib")
+#                               else
+#                                       pragma comment(lib,"python37.lib")
+#                               endif /* _DEBUG */
+#                       endif /* _MSC_VER */
+#               endif /* Py_BUILD_CORE_MODULE */
 #       endif /* Py_BUILD_CORE */
 #endif /* MS_COREDLL */
 
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index 59910951b62..246de78463d 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -112,6 +112,13 @@
     <ClInclude Include="..\Include\graminit.h" />
     <ClInclude Include="..\Include\grammar.h" />
     <ClInclude Include="..\Include\import.h" />
+    <ClInclude Include="..\Include\internal\ceval.h" />
+    <ClInclude Include="..\Include\internal\condvar.h" />
+    <ClInclude Include="..\Include\internal\gil.h" />
+    <ClInclude Include="..\Include\internal\mem.h" />
+    <ClInclude Include="..\Include\internal\pymalloc.h" />
+    <ClInclude Include="..\Include\internal\pystate.h" />
+    <ClInclude Include="..\Include\internal\warnings.h" />
     <ClInclude Include="..\Include\intrcheck.h" />
     <ClInclude Include="..\Include\iterobject.h" />
     <ClInclude Include="..\Include\listobject.h" />
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index 115ce85edec..b37e9930e31 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -129,6 +129,27 @@
     <ClInclude Include="..\Include\import.h">
       <Filter>Include</Filter>
     </ClInclude>
+    <ClInclude Include="..\Include\internal\ceval.h">
+      <Filter>Include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\Include\internal\condvar.h">
+      <Filter>Include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\Include\internal\gil.h">
+      <Filter>Include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\Include\internal\mem.h">
+      <Filter>Include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\Include\internal\pymalloc.h">
+      <Filter>Include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\Include\internal\pystate.h">
+      <Filter>Include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\Include\internal\warnings.h">
+      <Filter>Include</Filter>
+    </ClInclude>
     <ClInclude Include="..\Include\intrcheck.h">
       <Filter>Include</Filter>
     </ClInclude>
diff --git a/Parser/myreadline.c b/Parser/myreadline.c
index 1b63f7863fa..7fe04a9d365 100644
--- a/Parser/myreadline.c
+++ b/Parser/myreadline.c
@@ -10,6 +10,7 @@
 */
 
 #include "Python.h"
+#include "internal/pystate.h"
 #ifdef MS_WINDOWS
 #define WIN32_LEAN_AND_MEAN
 #include "windows.h"
diff --git a/Parser/pgenmain.c b/Parser/pgenmain.c
index ce66ebca0cc..8f3e2f7c4ee 100644
--- a/Parser/pgenmain.c
+++ b/Parser/pgenmain.c
@@ -16,6 +16,8 @@
 #define PGEN
 
 #include "Python.h"
+#include "internal/mem.h"
+#include "internal/pystate.h"
 #include "pgenheaders.h"
 #include "grammar.h"
 #include "node.h"
@@ -25,6 +27,7 @@
 int Py_DebugFlag;
 int Py_VerboseFlag;
 int Py_IgnoreEnvironmentFlag;
+_PyRuntimeState _PyRuntime = {0, 0};
 
 /* Forward */
 grammar *getgrammar(const char *filename);
@@ -59,6 +62,8 @@ main(int argc, char **argv)
     filename = argv[1];
     graminit_h = argv[2];
     graminit_c = argv[3];
+    _PyObject_Initialize(&_PyRuntime.obj);
+    _PyMem_Initialize(&_PyRuntime.mem);
     g = getgrammar(filename);
     fp = fopen(graminit_c, "w");
     if (fp == NULL) {
diff --git a/Python/_warnings.c b/Python/_warnings.c
index 8616195c4e3..5230318788c 100644
--- a/Python/_warnings.c
+++ b/Python/_warnings.c
@@ -1,4 +1,5 @@
 #include "Python.h"
+#include "internal/pystate.h"
 #include "frameobject.h"
 #include "clinic/_warnings.c.h"
 
@@ -8,13 +9,6 @@ PyDoc_STRVAR(warnings__doc__,
 MODULE_NAME " provides basic warning filtering support.\n"
 "It is a helper module to speed up interpreter start-up.");
 
-/* Both 'filters' and 'onceregistry' can be set in warnings.py;
-   get_warnings_attr() will reset these variables accordingly. */
-static PyObject *_filters;  /* List */
-static PyObject *_once_registry;  /* Dict */
-static PyObject *_default_action; /* String */
-static long _filters_version;
-
 _Py_IDENTIFIER(argv);
 _Py_IDENTIFIER(stderr);
 
@@ -53,7 +47,7 @@ get_warnings_attr(const char *attr, int try_import)
     }
 
     /* don't try to import after the start of the Python finallization */
-    if (try_import && _Py_Finalizing == NULL) {
+    if (try_import && !_Py_IsFinalizing()) {
         warnings_module = PyImport_Import(warnings_str);
         if (warnings_module == NULL) {
             /* Fallback to the C implementation if we cannot get
@@ -90,10 +84,10 @@ get_once_registry(void)
     if (registry == NULL) {
         if (PyErr_Occurred())
             return NULL;
-        return _once_registry;
+        return _PyRuntime.warnings.once_registry;
     }
-    Py_DECREF(_once_registry);
-    _once_registry = registry;
+    Py_DECREF(_PyRuntime.warnings.once_registry);
+    _PyRuntime.warnings.once_registry = registry;
     return registry;
 }
 
@@ -108,11 +102,11 @@ get_default_action(void)
         if (PyErr_Occurred()) {
             return NULL;
         }
-        return _default_action;
+        return _PyRuntime.warnings.default_action;
     }
 
-    Py_DECREF(_default_action);
-    _default_action = default_action;
+    Py_DECREF(_PyRuntime.warnings.default_action);
+    _PyRuntime.warnings.default_action = default_action;
     return default_action;
 }
 
@@ -132,23 +126,24 @@ get_filter(PyObject *category, PyObject *text, Py_ssize_t lineno,
             return NULL;
     }
     else {
-        Py_DECREF(_filters);
-        _filters = warnings_filters;
+        Py_DECREF(_PyRuntime.warnings.filters);
+        _PyRuntime.warnings.filters = warnings_filters;
     }
 
-    if (_filters == NULL || !PyList_Check(_filters)) {
+    PyObject *filters = _PyRuntime.warnings.filters;
+    if (filters == NULL || !PyList_Check(filters)) {
         PyErr_SetString(PyExc_ValueError,
                         MODULE_NAME ".filters must be a list");
         return NULL;
     }
 
-    /* _filters could change while we are iterating over it. */
-    for (i = 0; i < PyList_GET_SIZE(_filters); i++) {
+    /* _PyRuntime.warnings.filters could change while we are iterating over it. */
+    for (i = 0; i < PyList_GET_SIZE(filters); i++) {
         PyObject *tmp_item, *action, *msg, *cat, *mod, *ln_obj;
         Py_ssize_t ln;
         int is_subclass, good_msg, good_mod;
 
-        tmp_item = PyList_GET_ITEM(_filters, i);
+        tmp_item = PyList_GET_ITEM(filters, i);
         if (!PyTuple_Check(tmp_item) || PyTuple_GET_SIZE(tmp_item) != 5) {
             PyErr_Format(PyExc_ValueError,
                          MODULE_NAME ".filters item %zd isn't a 5-tuple", i);
@@ -220,9 +215,9 @@ already_warned(PyObject *registry, PyObject *key, int should_set)
     version_obj = _PyDict_GetItemId(registry, &PyId_version);
     if (version_obj == NULL
         || !PyLong_CheckExact(version_obj)
-        || PyLong_AsLong(version_obj) != _filters_version) {
+        || PyLong_AsLong(version_obj) != _PyRuntime.warnings.filters_version) {
         PyDict_Clear(registry);
-        version_obj = PyLong_FromLong(_filters_version);
+        version_obj = PyLong_FromLong(_PyRuntime.warnings.filters_version);
         if (version_obj == NULL)
             return -1;
         if (_PyDict_SetItemId(registry, &PyId_version, version_obj) < 0) {
@@ -520,7 +515,7 @@ warn_explicit(PyObject *category, PyObject *message,
                 if (registry == NULL)
                     goto cleanup;
             }
-            /* _once_registry[(text, category)] = 1 */
+            /* _PyRuntime.warnings.once_registry[(text, category)] = 1 */
             rc = update_registry(registry, text, category, 0);
         }
         else if (_PyUnicode_EqualToASCIIString(action, "module")) {
@@ -910,7 +905,7 @@ warnings_warn_explicit(PyObject *self, PyObject *args, PyObject *kwds)
 static PyObject *
 warnings_filters_mutated(PyObject *self, PyObject *args)
 {
-    _filters_version++;
+    _PyRuntime.warnings.filters_version++;
     Py_RETURN_NONE;
 }
 
@@ -1160,7 +1155,8 @@ create_filter(PyObject *category, const char *action)
     }
 
     /* This assumes the line number is zero for now. */
-    return PyTuple_Pack(5, action_obj, Py_None, category, Py_None, _PyLong_Zero);
+    return PyTuple_Pack(5, action_obj, Py_None,
+                        category, Py_None, _PyLong_Zero);
 }
 
 static PyObject *
@@ -1228,33 +1224,35 @@ _PyWarnings_Init(void)
     if (m == NULL)
         return NULL;
 
-    if (_filters == NULL) {
-        _filters = init_filters();
-        if (_filters == NULL)
+    if (_PyRuntime.warnings.filters == NULL) {
+        _PyRuntime.warnings.filters = init_filters();
+        if (_PyRuntime.warnings.filters == NULL)
             return NULL;
     }
-    Py_INCREF(_filters);
-    if (PyModule_AddObject(m, "filters", _filters) < 0)
+    Py_INCREF(_PyRuntime.warnings.filters);
+    if (PyModule_AddObject(m, "filters", _PyRuntime.warnings.filters) < 0)
         return NULL;
 
-    if (_once_registry == NULL) {
-        _once_registry = PyDict_New();
-        if (_once_registry == NULL)
+    if (_PyRuntime.warnings.once_registry == NULL) {
+        _PyRuntime.warnings.once_registry = PyDict_New();
+        if (_PyRuntime.warnings.once_registry == NULL)
             return NULL;
     }
-    Py_INCREF(_once_registry);
-    if (PyModule_AddObject(m, "_onceregistry", _once_registry) < 0)
+    Py_INCREF(_PyRuntime.warnings.once_registry);
+    if (PyModule_AddObject(m, "_onceregistry",
+                           _PyRuntime.warnings.once_registry) < 0)
         return NULL;
 
-    if (_default_action == NULL) {
-        _default_action = PyUnicode_FromString("default");
-        if (_default_action == NULL)
+    if (_PyRuntime.warnings.default_action == NULL) {
+        _PyRuntime.warnings.default_action = PyUnicode_FromString("default");
+        if (_PyRuntime.warnings.default_action == NULL)
             return NULL;
     }
-    Py_INCREF(_default_action);
-    if (PyModule_AddObject(m, "_defaultaction", _default_action) < 0)
+    Py_INCREF(_PyRuntime.warnings.default_action);
+    if (PyModule_AddObject(m, "_defaultaction",
+                           _PyRuntime.warnings.default_action) < 0)
         return NULL;
 
-    _filters_version = 0;
+    _PyRuntime.warnings.filters_version = 0;
     return m;
 }
diff --git a/Python/ceval.c b/Python/ceval.c
index a072a5fe81e..e583e272d72 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -10,6 +10,7 @@
 #define PY_LOCAL_AGGRESSIVE
 
 #include "Python.h"
+#include "internal/pystate.h"
 
 #include "code.h"
 #include "dictobject.h"
@@ -36,7 +37,8 @@ extern int _PyObject_GetMethod(PyObject *, PyObject *, PyObject **);
 typedef PyObject *(*callproc)(PyObject *, PyObject *, PyObject *);
 
 /* Forward declarations */
-Py_LOCAL_INLINE(PyObject *) call_function(PyObject ***, Py_ssize_t, PyObject *);
+Py_LOCAL_INLINE(PyObject *) call_function(PyObject ***, Py_ssize_t,
+                                          PyObject *);
 static PyObject * do_call_core(PyObject *, PyObject *, PyObject *);
 
 #ifdef LLTRACE
@@ -52,13 +54,15 @@ static int call_trace_protected(Py_tracefunc, PyObject *,
 static void call_exc_trace(Py_tracefunc, PyObject *,
                            PyThreadState *, PyFrameObject *);
 static int maybe_call_line_trace(Py_tracefunc, PyObject *,
-                                 PyThreadState *, PyFrameObject *, int *, int *, int *);
+                                 PyThreadState *, PyFrameObject *,
+                                 int *, int *, int *);
 static void maybe_dtrace_line(PyFrameObject *, int *, int *, int *);
 static void dtrace_function_entry(PyFrameObject *);
 static void dtrace_function_return(PyFrameObject *);
 
 static PyObject * cmp_outcome(int, PyObject *, PyObject *);
-static PyObject * import_name(PyFrameObject *, PyObject *, PyObject *, PyObject *);
+static PyObject * import_name(PyFrameObject *, PyObject *, PyObject *,
+                              PyObject *);
 static PyObject * import_from(PyObject *, PyObject *);
 static int import_all_from(PyObject *, PyObject *);
 static void format_exc_check_arg(PyObject *, const char *, PyObject *);
@@ -87,72 +91,60 @@ static long dxp[256];
 #endif
 #endif
 
-#define GIL_REQUEST _Py_atomic_load_relaxed(&gil_drop_request)
+#define GIL_REQUEST _Py_atomic_load_relaxed(&_PyRuntime.ceval.gil_drop_request)
 
 /* This can set eval_breaker to 0 even though gil_drop_request became
    1.  We believe this is all right because the eval loop will release
    the GIL eventually anyway. */
 #define COMPUTE_EVAL_BREAKER() \
     _Py_atomic_store_relaxed( \
-        &eval_breaker, \
+        &_PyRuntime.ceval.eval_breaker, \
         GIL_REQUEST | \
-        _Py_atomic_load_relaxed(&pendingcalls_to_do) | \
-        pending_async_exc)
+        _Py_atomic_load_relaxed(&_PyRuntime.ceval.pending.calls_to_do) | \
+        _PyRuntime.ceval.pending.async_exc)
 
 #define SET_GIL_DROP_REQUEST() \
     do { \
-        _Py_atomic_store_relaxed(&gil_drop_request, 1); \
-        _Py_atomic_store_relaxed(&eval_breaker, 1); \
+        _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil_drop_request, 1); \
+        _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \
     } while (0)
 
 #define RESET_GIL_DROP_REQUEST() \
     do { \
-        _Py_atomic_store_relaxed(&gil_drop_request, 0); \
+        _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil_drop_request, 0); \
         COMPUTE_EVAL_BREAKER(); \
     } while (0)
 
 /* Pending calls are only modified under pending_lock */
 #define SIGNAL_PENDING_CALLS() \
     do { \
-        _Py_atomic_store_relaxed(&pendingcalls_to_do, 1); \
-        _Py_atomic_store_relaxed(&eval_breaker, 1); \
+        _Py_atomic_store_relaxed(&_PyRuntime.ceval.pending.calls_to_do, 1); \
+        _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \
     } while (0)
 
 #define UNSIGNAL_PENDING_CALLS() \
     do { \
-        _Py_atomic_store_relaxed(&pendingcalls_to_do, 0); \
+        _Py_atomic_store_relaxed(&_PyRuntime.ceval.pending.calls_to_do, 0); \
         COMPUTE_EVAL_BREAKER(); \
     } while (0)
 
 #define SIGNAL_ASYNC_EXC() \
     do { \
-        pending_async_exc = 1; \
-        _Py_atomic_store_relaxed(&eval_breaker, 1); \
+        _PyRuntime.ceval.pending.async_exc = 1; \
+        _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \
     } while (0)
 
 #define UNSIGNAL_ASYNC_EXC() \
-    do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0)
+    do { \
+        _PyRuntime.ceval.pending.async_exc = 0; \
+        COMPUTE_EVAL_BREAKER(); \
+    } while (0)
 
 
-/* This single variable consolidates all requests to break out of the fast path
-   in the eval loop. */
-static _Py_atomic_int eval_breaker = {0};
-/* Request for running pending calls. */
-static _Py_atomic_int pendingcalls_to_do = {0};
-/* Request for looking at the `async_exc` field of the current thread state.
-   Guarded by the GIL. */
-static int pending_async_exc = 0;
-
 #ifdef HAVE_ERRNO_H
 #include <errno.h>
 #endif
 #include "pythread.h"
-
-static PyThread_type_lock pending_lock = 0; /* for pending calls */
-static unsigned long main_thread = 0;
-/* Request for dropping the GIL */
-static _Py_atomic_int gil_drop_request = {0};
-
 #include "ceval_gil.h"
 
 int
@@ -168,9 +160,9 @@ PyEval_InitThreads(void)
         return;
     create_gil();
     take_gil(PyThreadState_GET());
-    main_thread = PyThread_get_thread_ident();
-    if (!pending_lock)
-        pending_lock = PyThread_allocate_lock();
+    _PyRuntime.ceval.pending.main_thread = PyThread_get_thread_ident();
+    if (!_PyRuntime.ceval.pending.lock)
+        _PyRuntime.ceval.pending.lock = PyThread_allocate_lock();
 }
 
 void
@@ -238,9 +230,9 @@ PyEval_ReInitThreads(void)
     if (!gil_created())
         return;
     recreate_gil();
-    pending_lock = PyThread_allocate_lock();
+    _PyRuntime.ceval.pending.lock = PyThread_allocate_lock();
     take_gil(current_tstate);
-    main_thread = PyThread_get_thread_ident();
+    _PyRuntime.ceval.pending.main_thread = PyThread_get_thread_ident();
 
     /* Destroy all threads except the current one */
     _PyThreadState_DeleteExcept(current_tstate);
@@ -279,7 +271,7 @@ PyEval_RestoreThread(PyThreadState *tstate)
         int err = errno;
         take_gil(tstate);
         /* _Py_Finalizing is protected by the GIL */
-        if (_Py_Finalizing && tstate != _Py_Finalizing) {
+        if (_Py_IsFinalizing() && !_Py_CURRENTLY_FINALIZING(tstate)) {
             drop_gil(tstate);
             PyThread_exit_thread();
             assert(0);  /* unreachable */
@@ -326,19 +318,11 @@ _PyEval_SignalReceived(void)
    callback.
  */
 
-#define NPENDINGCALLS 32
-static struct {
-    int (*func)(void *);
-    void *arg;
-} pendingcalls[NPENDINGCALLS];
-static int pendingfirst = 0;
-static int pendinglast = 0;
-
 int
 Py_AddPendingCall(int (*func)(void *), void *arg)
 {
     int i, j, result=0;
-    PyThread_type_lock lock = pending_lock;
+    PyThread_type_lock lock = _PyRuntime.ceval.pending.lock;
 
     /* try a few times for the lock.  Since this mechanism is used
      * for signal handling (on the main thread), there is a (slim)
@@ -360,14 +344,14 @@ Py_AddPendingCall(int (*func)(void *), void *arg)
             return -1;
     }
 
-    i = pendinglast;
+    i = _PyRuntime.ceval.pending.last;
     j = (i + 1) % NPENDINGCALLS;
-    if (j == pendingfirst) {
+    if (j == _PyRuntime.ceval.pending.first) {
         result = -1; /* Queue full */
     } else {
-        pendingcalls[i].func = func;
-        pendingcalls[i].arg = arg;
-        pendinglast = j;
+        _PyRuntime.ceval.pending.calls[i].func = func;
+        _PyRuntime.ceval.pending.calls[i].arg = arg;
+        _PyRuntime.ceval.pending.last = j;
     }
     /* signal main loop */
     SIGNAL_PENDING_CALLS();
@@ -385,16 +369,19 @@ Py_MakePendingCalls(void)
 
     assert(PyGILState_Check());
 
-    if (!pending_lock) {
+    if (!_PyRuntime.ceval.pending.lock) {
         /* initial allocation of the lock */
-        pending_lock = PyThread_allocate_lock();
-        if (pending_lock == NULL)
+        _PyRuntime.ceval.pending.lock = PyThread_allocate_lock();
+        if (_PyRuntime.ceval.pending.lock == NULL)
             return -1;
     }
 
     /* only service pending calls on main thread */
-    if (main_thread && PyThread_get_thread_ident() != main_thread)
+    if (_PyRuntime.ceval.pending.main_thread &&
+        PyThread_get_thread_ident() != _PyRuntime.ceval.pending.main_thread)
+    {
         return 0;
+    }
     /* don't perform recursive pending calls */
     if (busy)
         return 0;
@@ -416,16 +403,16 @@ Py_MakePendingCalls(void)
         void *arg = NULL;
 
         /* pop one item off the queue while holding the lock */
-        PyThread_acquire_lock(pending_lock, WAIT_LOCK);
-        j = pendingfirst;
-        if (j == pendinglast) {
+        PyThread_acquire_lock(_PyRuntime.ceval.pending.lock, WAIT_LOCK);
+        j = _PyRuntime.ceval.pending.first;
+        if (j == _PyRuntime.ceval.pending.last) {
             func = NULL; /* Queue empty */
         } else {
-            func = pendingcalls[j].func;
-            arg = pendingcalls[j].arg;
-            pendingfirst = (j + 1) % NPENDINGCALLS;
+            func = _PyRuntime.ceval.pending.calls[j].func;
+            arg = _PyRuntime.ceval.pending.calls[j].arg;
+            _PyRuntime.ceval.pending.first = (j + 1) % NPENDINGCALLS;
         }
-        PyThread_release_lock(pending_lock);
+        PyThread_release_lock(_PyRuntime.ceval.pending.lock);
         /* having released the lock, perform the callback */
         if (func == NULL)
             break;
@@ -444,26 +431,33 @@ error:
     return -1;
 }
 
-
 /* The interpreter's recursion limit */
 
 #ifndef Py_DEFAULT_RECURSION_LIMIT
 #define Py_DEFAULT_RECURSION_LIMIT 1000
 #endif
-static int recursion_limit = Py_DEFAULT_RECURSION_LIMIT;
+
 int _Py_CheckRecursionLimit = Py_DEFAULT_RECURSION_LIMIT;
 
+void
+_PyEval_Initialize(struct _ceval_runtime_state *state)
+{
+    state->recursion_limit = Py_DEFAULT_RECURSION_LIMIT;
+    _Py_CheckRecursionLimit = Py_DEFAULT_RECURSION_LIMIT;
+    _gil_initialize(&state->gil);
+}
+
 int
 Py_GetRecursionLimit(void)
 {
-    return recursion_limit;
+    return _PyRuntime.ceval.recursion_limit;
 }
 
 void
 Py_SetRecursionLimit(int new_limit)
 {
-    recursion_limit = new_limit;
-    _Py_CheckRecursionLimit = recursion_limit;
+    _PyRuntime.ceval.recursion_limit = new_limit;
+    _Py_CheckRecursionLimit = _PyRuntime.ceval.recursion_limit;
 }
 
 /* the macro Py_EnterRecursiveCall() only calls _Py_CheckRecursiveCall()
@@ -475,6 +469,7 @@ int
 _Py_CheckRecursiveCall(const char *where)
 {
     PyThreadState *tstate = PyThreadState_GET();
+    int recursion_limit = _PyRuntime.ceval.recursion_limit;
 
 #ifdef USE_STACKCHECK
     if (PyOS_CheckStack()) {
@@ -522,13 +517,7 @@ static void restore_and_clear_exc_state(PyThreadState *, PyFrameObject *);
 static int do_raise(PyObject *, PyObject *);
 static int unpack_iterable(PyObject *, int, int, PyObject **);
 
-/* Records whether tracing is on for any thread.  Counts the number of
-   threads for which tstate->c_tracefunc is non-NULL, so if the value
-   is 0, we know we don't have to check this thread's c_tracefunc.
-   This speeds up the if statement in PyEval_EvalFrameEx() after
-   fast_next_opcode*/
-static int _Py_TracingPossible = 0;
-
+#define _Py_TracingPossible _PyRuntime.ceval.tracing_possible
 
 
 PyObject *
@@ -659,7 +648,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
 
 #define DISPATCH() \
     { \
-        if (!_Py_atomic_load_relaxed(&eval_breaker)) {      \
+        if (!_Py_atomic_load_relaxed(&_PyRuntime.ceval.eval_breaker)) { \
                     FAST_DISPATCH(); \
         } \
         continue; \
@@ -707,7 +696,8 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
 /* Code access macros */
 
 /* The integer overflow is checked by an assertion below. */
-#define INSTR_OFFSET()  (sizeof(_Py_CODEUNIT) * (int)(next_instr - first_instr))
+#define INSTR_OFFSET()  \
+    (sizeof(_Py_CODEUNIT) * (int)(next_instr - first_instr))
 #define NEXTOPARG()  do { \
         _Py_CODEUNIT word = *next_instr; \
         opcode = _Py_OPCODE(word); \
@@ -960,7 +950,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
            async I/O handler); see Py_AddPendingCall() and
            Py_MakePendingCalls() above. */
 
-        if (_Py_atomic_load_relaxed(&eval_breaker)) {
+        if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.eval_breaker)) {
             if (_Py_OPCODE(*next_instr) == SETUP_FINALLY ||
                 _Py_OPCODE(*next_instr) == YIELD_FROM) {
                 /* Two cases where we skip running signal handlers and other
@@ -977,11 +967,15 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
                 */
                 goto fast_next_opcode;
             }
-            if (_Py_atomic_load_relaxed(&pendingcalls_to_do)) {
+            if (_Py_atomic_load_relaxed(
+                        &_PyRuntime.ceval.pending.calls_to_do))
+            {
                 if (Py_MakePendingCalls() < 0)
                     goto error;
             }
-            if (_Py_atomic_load_relaxed(&gil_drop_request)) {
+            if (_Py_atomic_load_relaxed(
+                        &_PyRuntime.ceval.gil_drop_request))
+            {
                 /* Give another thread a chance */
                 if (PyThreadState_Swap(NULL) != tstate)
                     Py_FatalError("ceval: tstate mix-up");
@@ -992,7 +986,9 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
                 take_gil(tstate);
 
                 /* Check if we should make a quick exit. */
-                if (_Py_Finalizing && _Py_Finalizing != tstate) {
+                if (_Py_IsFinalizing() &&
+                    !_Py_CURRENTLY_FINALIZING(tstate))
+                {
                     drop_gil(tstate);
                     PyThread_exit_thread();
                 }
diff --git a/Python/ceval_gil.h b/Python/ceval_gil.h
index a3b450bd5c4..ef5189068e0 100644
--- a/Python/ceval_gil.h
+++ b/Python/ceval_gil.h
@@ -8,20 +8,13 @@
 
 /* First some general settings */
 
-/* microseconds (the Python API uses seconds, though) */
-#define DEFAULT_INTERVAL 5000
-static unsigned long gil_interval = DEFAULT_INTERVAL;
-#define INTERVAL (gil_interval >= 1 ? gil_interval : 1)
-
-/* Enable if you want to force the switching of threads at least every `gil_interval` */
-#undef FORCE_SWITCHING
-#define FORCE_SWITCHING
+#define INTERVAL (_PyRuntime.ceval.gil.interval >= 1 ? _PyRuntime.ceval.gil.interval : 1)
 
 
 /*
    Notes about the implementation:
 
-   - The GIL is just a boolean variable (gil_locked) whose access is protected
+   - The GIL is just a boolean variable (locked) whose access is protected
      by a mutex (gil_mutex), and whose changes are signalled by a condition
      variable (gil_cond). gil_mutex is taken for short periods of time,
      and therefore mostly uncontended.
@@ -48,7 +41,7 @@ static unsigned long gil_interval = DEFAULT_INTERVAL;
    - When a thread releases the GIL and gil_drop_request is set, that thread
      ensures that another GIL-awaiting thread gets scheduled.
      It does so by waiting on a condition variable (switch_cond) until
-     the value of gil_last_holder is changed to something else than its
+     the value of last_holder is changed to something else than its
      own thread state pointer, indicating that another thread was able to
      take the GIL.
 
@@ -60,11 +53,7 @@ static unsigned long gil_interval = DEFAULT_INTERVAL;
 */
 
 #include "condvar.h"
-#ifndef Py_HAVE_CONDVAR
-#error You need either a POSIX-compatible or a Windows system!
-#endif
 
-#define MUTEX_T PyMUTEX_T
 #define MUTEX_INIT(mut) \
     if (PyMUTEX_INIT(&(mut))) { \
         Py_FatalError("PyMUTEX_INIT(" #mut ") failed"); };
@@ -78,7 +67,6 @@ static unsigned long gil_interval = DEFAULT_INTERVAL;
     if (PyMUTEX_UNLOCK(&(mut))) { \
         Py_FatalError("PyMUTEX_UNLOCK(" #mut ") failed"); };
 
-#define COND_T PyCOND_T
 #define COND_INIT(cond) \
     if (PyCOND_INIT(&(cond))) { \
         Py_FatalError("PyCOND_INIT(" #cond ") failed"); };
@@ -103,48 +91,36 @@ static unsigned long gil_interval = DEFAULT_INTERVAL;
     } \
 
 
+#define DEFAULT_INTERVAL 5000
 
-/* Whether the GIL is already taken (-1 if uninitialized). This is atomic
-   because it can be read without any lock taken in ceval.c. */
-static _Py_atomic_int gil_locked = {-1};
-/* Number of GIL switches since the beginning. */
-static unsigned long gil_switch_number = 0;
-/* Last PyThreadState holding / having held the GIL. This helps us know
-   whether anyone else was scheduled after we dropped the GIL. */
-static _Py_atomic_address gil_last_holder = {0};
-
-/* This condition variable allows one or several threads to wait until
-   the GIL is released. In addition, the mutex also protects the above
-   variables. */
-static COND_T gil_cond;
-static MUTEX_T gil_mutex;
-
-#ifdef FORCE_SWITCHING
-/* This condition variable helps the GIL-releasing thread wait for
-   a GIL-awaiting thread to be scheduled and take the GIL. */
-static COND_T switch_cond;
-static MUTEX_T switch_mutex;
-#endif
-
+static void _gil_initialize(struct _gil_runtime_state *state)
+{
+    _Py_atomic_int uninitialized = {-1};
+    state->locked = uninitialized;
+    state->interval = DEFAULT_INTERVAL;
+}
 
 static int gil_created(void)
 {
-    return _Py_atomic_load_explicit(&gil_locked, _Py_memory_order_acquire) >= 0;
+    return (_Py_atomic_load_explicit(&_PyRuntime.ceval.gil.locked,
+                                     _Py_memory_order_acquire)
+            ) >= 0;
 }
 
 static void create_gil(void)
 {
-    MUTEX_INIT(gil_mutex);
+    MUTEX_INIT(_PyRuntime.ceval.gil.mutex);
 #ifdef FORCE_SWITCHING
-    MUTEX_INIT(switch_mutex);
+    MUTEX_INIT(_PyRuntime.ceval.gil.switch_mutex);
 #endif
-    COND_INIT(gil_cond);
+    COND_INIT(_PyRuntime.ceval.gil.cond);
 #ifdef FORCE_SWITCHING
-    COND_INIT(switch_cond);
+    COND_INIT(_PyRuntime.ceval.gil.switch_cond);
 #endif
-    _Py_atomic_store_relaxed(&gil_last_holder, 0);
-    _Py_ANNOTATE_RWLOCK_CREATE(&gil_locked);
-    _Py_atomic_store_explicit(&gil_locked, 0, _Py_memory_order_release);
+    _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.last_holder, 0);
+    _Py_ANNOTATE_RWLOCK_CREATE(&_PyRuntime.ceval.gil.locked);
+    _Py_atomic_store_explicit(&_PyRuntime.ceval.gil.locked, 0,
+                              _Py_memory_order_release);
 }
 
 static void destroy_gil(void)
@@ -152,54 +128,62 @@ static void destroy_gil(void)
     /* some pthread-like implementations tie the mutex to the cond
      * and must have the cond destroyed first.
      */
-    COND_FINI(gil_cond);
-    MUTEX_FINI(gil_mutex);
+    COND_FINI(_PyRuntime.ceval.gil.cond);
+    MUTEX_FINI(_PyRuntime.ceval.gil.mutex);
 #ifdef FORCE_SWITCHING
-    COND_FINI(switch_cond);
-    MUTEX_FINI(switch_mutex);
+    COND_FINI(_PyRuntime.ceval.gil.switch_cond);
+    MUTEX_FINI(_PyRuntime.ceval.gil.switch_mutex);
 #endif
-    _Py_atomic_store_explicit(&gil_locked, -1, _Py_memory_order_release);
-    _Py_ANNOTATE_RWLOCK_DESTROY(&gil_locked);
+    _Py_atomic_store_explicit(&_PyRuntime.ceval.gil.locked, -1,
+                              _Py_memory_order_release);
+    _Py_ANNOTATE_RWLOCK_DESTROY(&_PyRuntime.ceval.gil.locked);
 }
 
 static void recreate_gil(void)
 {
-    _Py_ANNOTATE_RWLOCK_DESTROY(&gil_locked);
+    _Py_ANNOTATE_RWLOCK_DESTROY(&_PyRuntime.ceval.gil.locked);
     /* XXX should we destroy the old OS resources here? */
     create_gil();
 }
 
 static void drop_gil(PyThreadState *tstate)
 {
-    if (!_Py_atomic_load_relaxed(&gil_locked))
+    if (!_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked))
         Py_FatalError("drop_gil: GIL is not locked");
     /* tstate is allowed to be NULL (early interpreter init) */
     if (tstate != NULL) {
         /* Sub-interpreter support: threads might have been switched
            under our feet using PyThreadState_Swap(). Fix the GIL last
            holder variable so that our heuristics work. */
-        _Py_atomic_store_relaxed(&gil_last_holder, (uintptr_t)tstate);
+        _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.last_holder,
+                                 (uintptr_t)tstate);
     }
 
-    MUTEX_LOCK(gil_mutex);
-    _Py_ANNOTATE_RWLOCK_RELEASED(&gil_locked, /*is_write=*/1);
-    _Py_atomic_store_relaxed(&gil_locked, 0);
-    COND_SIGNAL(gil_cond);
-    MUTEX_UNLOCK(gil_mutex);
+    MUTEX_LOCK(_PyRuntime.ceval.gil.mutex);
+    _Py_ANNOTATE_RWLOCK_RELEASED(&_PyRuntime.ceval.gil.locked, /*is_write=*/1);
+    _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.locked, 0);
+    COND_SIGNAL(_PyRuntime.ceval.gil.cond);
+    MUTEX_UNLOCK(_PyRuntime.ceval.gil.mutex);
 
 #ifdef FORCE_SWITCHING
-    if (_Py_atomic_load_relaxed(&gil_drop_request) && tstate != NULL) {
-        MUTEX_LOCK(switch_mutex);
+    if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil_drop_request) &&
+        tstate != NULL)
+    {
+        MUTEX_LOCK(_PyRuntime.ceval.gil.switch_mutex);
         /* Not switched yet => wait */
-        if ((PyThreadState*)_Py_atomic_load_relaxed(&gil_last_holder) == tstate) {
+        if (((PyThreadState*)_Py_atomic_load_relaxed(
+                    &_PyRuntime.ceval.gil.last_holder)
+            ) == tstate)
+        {
         RESET_GIL_DROP_REQUEST();
             /* NOTE: if COND_WAIT does not atomically start waiting when
                releasing the mutex, another thread can run through, take
                the GIL and drop it again, and reset the condition
                before we even had a chance to wait for it. */
-            COND_WAIT(switch_cond, switch_mutex);
+            COND_WAIT(_PyRuntime.ceval.gil.switch_cond,
+                      _PyRuntime.ceval.gil.switch_mutex);
     }
-        MUTEX_UNLOCK(switch_mutex);
+        MUTEX_UNLOCK(_PyRuntime.ceval.gil.switch_mutex);
     }
 #endif
 }
@@ -211,60 +195,65 @@ static void take_gil(PyThreadState *tstate)
         Py_FatalError("take_gil: NULL tstate");
 
     err = errno;
-    MUTEX_LOCK(gil_mutex);
+    MUTEX_LOCK(_PyRuntime.ceval.gil.mutex);
 
-    if (!_Py_atomic_load_relaxed(&gil_locked))
+    if (!_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked))
         goto _ready;
 
-    while (_Py_atomic_load_relaxed(&gil_locked)) {
+    while (_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked)) {
         int timed_out = 0;
         unsigned long saved_switchnum;
 
-        saved_switchnum = gil_switch_number;
-        COND_TIMED_WAIT(gil_cond, gil_mutex, INTERVAL, timed_out);
+        saved_switchnum = _PyRuntime.ceval.gil.switch_number;
+        COND_TIMED_WAIT(_PyRuntime.ceval.gil.cond, _PyRuntime.ceval.gil.mutex,
+                        INTERVAL, timed_out);
         /* If we timed out and no switch occurred in the meantime, it is time
            to ask the GIL-holding thread to drop it. */
         if (timed_out &&
-            _Py_atomic_load_relaxed(&gil_locked) &&
-            gil_switch_number == saved_switchnum) {
+            _Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked) &&
+            _PyRuntime.ceval.gil.switch_number == saved_switchnum) {
             SET_GIL_DROP_REQUEST();
         }
     }
 _ready:
 #ifdef FORCE_SWITCHING
-    /* This mutex must be taken before modifying gil_last_holder (see drop_gil()). */
-    MUTEX_LOCK(switch_mutex);
+    /* This mutex must be taken before modifying
+       _PyRuntime.ceval.gil.last_holder (see drop_gil()). */
+    MUTEX_LOCK(_PyRuntime.ceval.gil.switch_mutex);
 #endif
     /* We now hold the GIL */
-    _Py_atomic_store_relaxed(&gil_locked, 1);
-    _Py_ANNOTATE_RWLOCK_ACQUIRED(&gil_locked, /*is_write=*/1);
+    _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.locked, 1);
+    _Py_ANNOTATE_RWLOCK_ACQUIRED(&_PyRuntime.ceval.gil.locked, /*is_write=*/1);
 
-    if (tstate != (PyThreadState*)_Py_atomic_load_relaxed(&gil_last_holder)) {
-        _Py_atomic_store_relaxed(&gil_last_holder, (uintptr_t)tstate);
-        ++gil_switch_number;
+    if (tstate != (PyThreadState*)_Py_atomic_load_relaxed(
+                    &_PyRuntime.ceval.gil.last_holder))
+    {
+        _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.last_holder,
+                                 (uintptr_t)tstate);
+        ++_PyRuntime.ceval.gil.switch_number;
     }
 
 #ifdef FORCE_SWITCHING
-    COND_SIGNAL(switch_cond);
-    MUTEX_UNLOCK(switch_mutex);
+    COND_SIGNAL(_PyRuntime.ceval.gil.switch_cond);
+    MUTEX_UNLOCK(_PyRuntime.ceval.gil.switch_mutex);
 #endif
-    if (_Py_atomic_load_relaxed(&gil_drop_request)) {
+    if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil_drop_request)) {
         RESET_GIL_DROP_REQUEST();
     }
     if (tstate->async_exc != NULL) {
         _PyEval_SignalAsyncExc();
     }
 
-    MUTEX_UNLOCK(gil_mutex);
+    MUTEX_UNLOCK(_PyRuntime.ceval.gil.mutex);
     errno = err;
 }
 
 void _PyEval_SetSwitchInterval(unsigned long microseconds)
 {
-    gil_interval = microseconds;
+    _PyRuntime.ceval.gil.interval = microseconds;
 }
 
 unsigned long _PyEval_GetSwitchInterval()
 {
-    return gil_interval;
+    return _PyRuntime.ceval.gil.interval;
 }
diff --git a/Python/codecs.c b/Python/codecs.c
index 688a40bd6ff..18edfbdab94 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -9,6 +9,7 @@ Copyright (c) Corporation for National Research Initiatives.
    ------------------------------------------------------------------------ */
 
 #include "Python.h"
+#include "internal/pystate.h"
 #include "ucnhash.h"
 #include <ctype.h>
 
diff --git a/Python/condvar.h b/Python/condvar.h
index 9a71b17738f..951b8ad47f0 100644
--- a/Python/condvar.h
+++ b/Python/condvar.h
@@ -37,27 +37,16 @@
  *    Condition Variable.
  */
 
-#ifndef _CONDVAR_H_
-#define _CONDVAR_H_
+#ifndef _CONDVAR_IMPL_H_
+#define _CONDVAR_IMPL_H_
 
 #include "Python.h"
-
-#ifndef _POSIX_THREADS
-/* This means pthreads are not implemented in libc headers, hence the macro
-   not present in unistd.h. But they still can be implemented as an external
-   library (e.g. gnu pth in pthread emulation) */
-# ifdef HAVE_PTHREAD_H
-#  include <pthread.h> /* _POSIX_THREADS */
-# endif
-#endif
+#include "internal/condvar.h"
 
 #ifdef _POSIX_THREADS
 /*
  * POSIX support
  */
-#define Py_HAVE_CONDVAR
-
-#include <pthread.h>
 
 #define PyCOND_ADD_MICROSECONDS(tv, interval) \
 do { /* TODO: add overflow and truncation checks */ \
@@ -74,13 +63,11 @@ do { /* TODO: add overflow and truncation checks */ \
 #endif
 
 /* The following functions return 0 on success, nonzero on error */
-#define PyMUTEX_T pthread_mutex_t
 #define PyMUTEX_INIT(mut)       pthread_mutex_init((mut), NULL)
 #define PyMUTEX_FINI(mut)       pthread_mutex_destroy(mut)
 #define PyMUTEX_LOCK(mut)       pthread_mutex_lock(mut)
 #define PyMUTEX_UNLOCK(mut)     pthread_mutex_unlock(mut)
 
-#define PyCOND_T pthread_cond_t
 #define PyCOND_INIT(cond)       pthread_cond_init((cond), NULL)
 #define PyCOND_FINI(cond)       pthread_cond_destroy(cond)
 #define PyCOND_SIGNAL(cond)     pthread_cond_signal(cond)
@@ -116,45 +103,11 @@ PyCOND_TIMEDWAIT(PyCOND_T *cond, PyMUTEX_T *mut, long long us)
  * Emulated condition variables ones that work with XP and later, plus
  * example native support on VISTA and onwards.
  */
-#define Py_HAVE_CONDVAR
-
-
-/* include windows if it hasn't been done before */
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-
-/* options */
-/* non-emulated condition variables are provided for those that want
- * to target Windows Vista.  Modify this macro to enable them.
- */
-#ifndef _PY_EMULATED_WIN_CV
-#define _PY_EMULATED_WIN_CV 1  /* use emulated condition variables */
-#endif
-
-/* fall back to emulation if not targeting Vista */
-#if !defined NTDDI_VISTA || NTDDI_VERSION < NTDDI_VISTA
-#undef _PY_EMULATED_WIN_CV
-#define _PY_EMULATED_WIN_CV 1
-#endif
-
 
 #if _PY_EMULATED_WIN_CV
 
 /* The mutex is a CriticalSection object and
    The condition variables is emulated with the help of a semaphore.
-   Semaphores are available on Windows XP (2003 server) and later.
-   We use a Semaphore rather than an auto-reset event, because although
-   an auto-resent event might appear to solve the lost-wakeup bug (race
-   condition between releasing the outer lock and waiting) because it
-   maintains state even though a wait hasn't happened, there is still
-   a lost wakeup problem if more than one thread are interrupted in the
-   critical place.  A semaphore solves that, because its state is counted,
-   not Boolean.
-   Because it is ok to signal a condition variable with no one
-   waiting, we need to keep track of the number of
-   waiting threads.  Otherwise, the semaphore's state could rise
-   without bound.  This also helps reduce the number of "spurious wakeups"
-   that would otherwise happen.
 
    This implementation still has the problem that the threads woken
    with a "signal" aren't necessarily those that are already
@@ -168,8 +121,6 @@ PyCOND_TIMEDWAIT(PyCOND_T *cond, PyMUTEX_T *mut, long long us)
    http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
 */
 
-typedef CRITICAL_SECTION PyMUTEX_T;
-
 Py_LOCAL_INLINE(int)
 PyMUTEX_INIT(PyMUTEX_T *cs)
 {
@@ -198,15 +149,6 @@ PyMUTEX_UNLOCK(PyMUTEX_T *cs)
     return 0;
 }
 
-/* The ConditionVariable object.  From XP onwards it is easily emulated with
- * a Semaphore
- */
-
-typedef struct _PyCOND_T
-{
-    HANDLE sem;
-    int waiting; /* to allow PyCOND_SIGNAL to be a no-op */
-} PyCOND_T;
 
 Py_LOCAL_INLINE(int)
 PyCOND_INIT(PyCOND_T *cv)
@@ -304,12 +246,7 @@ PyCOND_BROADCAST(PyCOND_T *cv)
     return 0;
 }
 
-#else
-
-/* Use native Win7 primitives if build target is Win7 or higher */
-
-/* SRWLOCK is faster and better than CriticalSection */
-typedef SRWLOCK PyMUTEX_T;
+#else /* !_PY_EMULATED_WIN_CV */
 
 Py_LOCAL_INLINE(int)
 PyMUTEX_INIT(PyMUTEX_T *cs)
@@ -339,8 +276,6 @@ PyMUTEX_UNLOCK(PyMUTEX_T *cs)
 }
 
 
-typedef CONDITION_VARIABLE  PyCOND_T;
-
 Py_LOCAL_INLINE(int)
 PyCOND_INIT(PyCOND_T *cv)
 {
@@ -387,4 +322,4 @@ PyCOND_BROADCAST(PyCOND_T *cv)
 
 #endif /* _POSIX_THREADS, NT_THREADS */
 
-#endif /* _CONDVAR_H_ */
+#endif /* _CONDVAR_IMPL_H_ */
diff --git a/Python/dynload_shlib.c b/Python/dynload_shlib.c
index 7f8f134d60b..f2711931908 100644
--- a/Python/dynload_shlib.c
+++ b/Python/dynload_shlib.c
@@ -2,6 +2,7 @@
 /* Support for dynamic loading of extension modules */
 
 #include "Python.h"
+#include "internal/pystate.h"
 #include "importdl.h"
 
 #include <sys/types.h>
diff --git a/Python/errors.c b/Python/errors.c
index 261dd7b27cb..3cb6d5306f8 100644
--- a/Python/errors.c
+++ b/Python/errors.c
@@ -2,6 +2,7 @@
 /* Error handling */
 
 #include "Python.h"
+#include "internal/pystate.h"
 
 #ifndef __STDC__
 #ifndef MS_WINDOWS
diff --git a/Python/import.c b/Python/import.c
index a406db31032..6b2634c3497 100644
--- a/Python/import.c
+++ b/Python/import.c
@@ -5,6 +5,7 @@
 
 #include "Python-ast.h"
 #undef Yield /* undefine macro conflicting with winbase.h */
+#include "internal/pystate.h"
 #include "errcode.h"
 #include "marshal.h"
 #include "code.h"
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 7798dfe9c2c..b9f916bf39f 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -4,6 +4,7 @@
 
 #include "Python-ast.h"
 #undef Yield /* undefine macro conflicting with winbase.h */
+#include "internal/pystate.h"
 #include "grammar.h"
 #include "node.h"
 #include "token.h"
@@ -75,6 +76,36 @@ extern void _Py_ReadyTypes(void);
 extern void _PyGILState_Init(PyInterpreterState *, PyThreadState *);
 extern void _PyGILState_Fini(void);
 
+_PyRuntimeState _PyRuntime = {0, 0};
+
+void
+_PyRuntime_Initialize(void)
+{
+    /* XXX We only initialize once in the process, which aligns with
+       the static initialization of the former globals now found in
+       _PyRuntime.  However, _PyRuntime *should* be initialized with
+       every Py_Initialize() call, but doing so breaks the runtime.
+       This is because the runtime state is not properly finalized
+       currently. */
+    static int initialized = 0;
+    if (initialized)
+        return;
+    initialized = 1;
+    _PyRuntimeState_Init(&_PyRuntime);
+}
+
+void
+_PyRuntime_Finalize(void)
+{
+    _PyRuntimeState_Fini(&_PyRuntime);
+}
+
+int
+_Py_IsFinalizing(void)
+{
+    return _PyRuntime.finalizing != NULL;
+}
+
 /* Global configuration variable declarations are in pydebug.h */
 /* XXX (ncoghlan): move those declarations to pylifecycle.h? */
 int Py_DebugFlag; /* Needed by parser.c */
@@ -98,8 +129,6 @@ int Py_LegacyWindowsFSEncodingFlag = 0; /* Uses mbcs instead of utf-8 */
 int Py_LegacyWindowsStdioFlag = 0; /* Uses FileIO instead of WindowsConsoleIO */
 #endif
 
-PyThreadState *_Py_Finalizing = NULL;
-
 /* Hack to force loading of object files */
 int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t) = \
     PyOS_mystrnicmp; /* Python/pystrcmp.o */
@@ -117,19 +146,17 @@ PyModule_GetWarningsModule(void)
  *
  * Can be called prior to Py_Initialize.
  */
-int _Py_CoreInitialized = 0;
-int _Py_Initialized = 0;
 
 int
 _Py_IsCoreInitialized(void)
 {
-    return _Py_CoreInitialized;
+    return _PyRuntime.core_initialized;
 }
 
 int
 Py_IsInitialized(void)
 {
-    return _Py_Initialized;
+    return _PyRuntime.initialized;
 }
 
 /* Helper to allow an embedding application to override the normal
@@ -542,14 +569,16 @@ void _Py_InitializeCore(const _PyCoreConfig *config)
     _PyCoreConfig core_config = _PyCoreConfig_INIT;
     _PyMainInterpreterConfig preinit_config = _PyMainInterpreterConfig_INIT;
 
+    _PyRuntime_Initialize();
+
     if (config != NULL) {
         core_config = *config;
     }
 
-    if (_Py_Initialized) {
+    if (_PyRuntime.initialized) {
         Py_FatalError("Py_InitializeCore: main interpreter already initialized");
     }
-    if (_Py_CoreInitialized) {
+    if (_PyRuntime.core_initialized) {
         Py_FatalError("Py_InitializeCore: runtime core already initialized");
     }
 
@@ -562,7 +591,14 @@ void _Py_InitializeCore(const _PyCoreConfig *config)
      * threads still hanging around from a previous Py_Initialize/Finalize
      * pair :(
      */
-    _Py_Finalizing = NULL;
+    _PyRuntime.finalizing = NULL;
+
+    if (_PyMem_SetupAllocators(core_config.allocator) < 0) {
+        fprintf(stderr,
+            "Error in PYTHONMALLOC: unknown allocator \"%s\"!\n",
+            core_config.allocator);
+        exit(1);
+    }
 
 #ifdef __ANDROID__
     /* Passing "" to setlocale() on Android requests the C locale rather
@@ -604,7 +640,7 @@ void _Py_InitializeCore(const _PyCoreConfig *config)
         Py_HashRandomizationFlag = 1;
     }
 
-    _PyInterpreterState_Init();
+    _PyInterpreterState_Enable(&_PyRuntime);
     interp = PyInterpreterState_New();
     if (interp == NULL)
         Py_FatalError("Py_InitializeCore: can't make main interpreter");
@@ -694,7 +730,7 @@ void _Py_InitializeCore(const _PyCoreConfig *config)
     }
 
     /* Only when we get here is the runtime core fully initialized */
-    _Py_CoreInitialized = 1;
+    _PyRuntime.core_initialized = 1;
 }
 
 /* Read configuration settings from standard locations
@@ -735,10 +771,10 @@ int _Py_InitializeMainInterpreter(const _PyMainInterpreterConfig *config)
     PyInterpreterState *interp;
     PyThreadState *tstate;
 
-    if (!_Py_CoreInitialized) {
+    if (!_PyRuntime.core_initialized) {
         Py_FatalError("Py_InitializeMainInterpreter: runtime core not initialized");
     }
-    if (_Py_Initialized) {
+    if (_PyRuntime.initialized) {
         Py_FatalError("Py_InitializeMainInterpreter: main interpreter already initialized");
     }
 
@@ -759,7 +795,7 @@ int _Py_InitializeMainInterpreter(const _PyMainInterpreterConfig *config)
          * This means anything which needs support from extension modules
          * or pure Python code in the standard library won't work.
          */
-        _Py_Initialized = 1;
+        _PyRuntime.initialized = 1;
         return 0;
     }
     /* TODO: Report exceptions rather than fatal errors below here */
@@ -804,7 +840,7 @@ int _Py_InitializeMainInterpreter(const _PyMainInterpreterConfig *config)
         Py_XDECREF(warnings_module);
     }
 
-    _Py_Initialized = 1;
+    _PyRuntime.initialized = 1;
 
     if (!Py_NoSiteFlag)
         initsite(); /* Module site */
@@ -920,7 +956,7 @@ Py_FinalizeEx(void)
     PyThreadState *tstate;
     int status = 0;
 
-    if (!_Py_Initialized)
+    if (!_PyRuntime.initialized)
         return status;
 
     wait_for_thread_shutdown();
@@ -942,9 +978,9 @@ Py_FinalizeEx(void)
 
     /* Remaining threads (e.g. daemon threads) will automatically exit
        after taking the GIL (in PyEval_RestoreThread()). */
-    _Py_Finalizing = tstate;
-    _Py_Initialized = 0;
-    _Py_CoreInitialized = 0;
+    _PyRuntime.finalizing = tstate;
+    _PyRuntime.initialized = 0;
+    _PyRuntime.core_initialized = 0;
 
     /* Flush sys.stdout and sys.stderr */
     if (flush_std_files() < 0) {
@@ -1104,6 +1140,7 @@ Py_FinalizeEx(void)
 #endif
 
     call_ll_exitfuncs();
+    _PyRuntime_Finalize();
     return status;
 }
 
@@ -1133,7 +1170,7 @@ Py_NewInterpreter(void)
     PyThreadState *tstate, *save_tstate;
     PyObject *bimod, *sysmod;
 
-    if (!_Py_Initialized)
+    if (!_PyRuntime.initialized)
         Py_FatalError("Py_NewInterpreter: call Py_Initialize first");
 
     /* Issue #10915, #15751: The GIL API doesn't work with multiple
@@ -1844,20 +1881,19 @@ exit:
 
 #  include "pythread.h"
 
-static void (*pyexitfunc)(void) = NULL;
 /* For the atexit module. */
 void _Py_PyAtExit(void (*func)(void))
 {
-    pyexitfunc = func;
+    _PyRuntime.pyexitfunc = func;
 }
 
 static void
 call_py_exitfuncs(void)
 {
-    if (pyexitfunc == NULL)
+    if (_PyRuntime.pyexitfunc == NULL)
         return;
 
-    (*pyexitfunc)();
+    (*_PyRuntime.pyexitfunc)();
     PyErr_Clear();
 }
 
@@ -1888,22 +1924,19 @@ wait_for_thread_shutdown(void)
 }
 
 #define NEXITFUNCS 32
-static void (*exitfuncs[NEXITFUNCS])(void);
-static int nexitfuncs = 0;
-
 int Py_AtExit(void (*func)(void))
 {
-    if (nexitfuncs >= NEXITFUNCS)
+    if (_PyRuntime.nexitfuncs >= NEXITFUNCS)
         return -1;
-    exitfuncs[nexitfuncs++] = func;
+    _PyRuntime.exitfuncs[_PyRuntime.nexitfuncs++] = func;
     return 0;
 }
 
 static void
 call_ll_exitfuncs(void)
 {
-    while (nexitfuncs > 0)
-        (*exitfuncs[--nexitfuncs])();
+    while (_PyRuntime.nexitfuncs > 0)
+        (*_PyRuntime.exitfuncs[--_PyRuntime.nexitfuncs])();
 
     fflush(stdout);
     fflush(stderr);
diff --git a/Python/pystate.c b/Python/pystate.c
index 379e5a39c5a..be012d7d46a 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -2,6 +2,7 @@
 /* Thread and interpreter state structures and their interfaces */
 
 #include "Python.h"
+#include "internal/pystate.h"
 
 #define GET_TSTATE() \
     ((PyThreadState*)_Py_atomic_load_relaxed(&_PyThreadState_Current))
@@ -34,47 +35,52 @@ to avoid the expense of doing their own locking).
 extern "C" {
 #endif
 
-int _PyGILState_check_enabled = 1;
+void
+_PyRuntimeState_Init(_PyRuntimeState *runtime)
+{
+    memset(runtime, 0, sizeof(*runtime));
 
-#include "pythread.h"
-static PyThread_type_lock head_mutex = NULL; /* Protects interp->tstate_head */
-#define HEAD_INIT() (void)(head_mutex || (head_mutex = PyThread_allocate_lock()))
-#define HEAD_LOCK() PyThread_acquire_lock(head_mutex, WAIT_LOCK)
-#define HEAD_UNLOCK() PyThread_release_lock(head_mutex)
+    _PyObject_Initialize(&runtime->obj);
+    _PyMem_Initialize(&runtime->mem);
+    _PyGC_Initialize(&runtime->gc);
+    _PyEval_Initialize(&runtime->ceval);
 
-/* The single PyInterpreterState used by this process'
-   GILState implementation
-*/
-/* TODO: Given interp_main, it may be possible to kill this ref */
-static PyInterpreterState *autoInterpreterState = NULL;
-static int autoTLSkey = -1;
+    runtime->gilstate.check_enabled = 1;
+    runtime->gilstate.autoTLSkey = -1;
 
-static PyInterpreterState *interp_head = NULL;
-static PyInterpreterState *interp_main = NULL;
+    runtime->interpreters.mutex = PyThread_allocate_lock();
+    if (runtime->interpreters.mutex == NULL)
+        Py_FatalError("Can't initialize threads for interpreter");
+    runtime->interpreters.next_id = -1;
+}
 
-/* Assuming the current thread holds the GIL, this is the
-   PyThreadState for the current thread. */
-_Py_atomic_address _PyThreadState_Current = {0};
-PyThreadFrameGetter _PyThreadState_GetFrame = NULL;
+void
+_PyRuntimeState_Fini(_PyRuntimeState *runtime)
+{
+    if (runtime->interpreters.mutex != NULL) {
+        PyThread_free_lock(runtime->interpreters.mutex);
+        runtime->interpreters.mutex = NULL;
+    }
+}
+
+#define HEAD_LOCK() PyThread_acquire_lock(_PyRuntime.interpreters.mutex, \
+                                          WAIT_LOCK)
+#define HEAD_UNLOCK() PyThread_release_lock(_PyRuntime.interpreters.mutex)
 
 static void _PyGILState_NoteThreadState(PyThreadState* tstate);
 
-/* _next_interp_id is an auto-numbered sequence of small integers.
-   It gets initialized in _PyInterpreterState_Init(), which is called
-   in Py_Initialize(), and used in PyInterpreterState_New().  A negative
-   interpreter ID indicates an error occurred.  The main interpreter
-   will always have an ID of 0.  Overflow results in a RuntimeError.
-   If that becomes a problem later then we can adjust, e.g. by using
-   a Python int.
-
-   We initialize this to -1 so that the pre-Py_Initialize() value
-   results in an error. */
-static int64_t _next_interp_id = -1;
-
 void
-_PyInterpreterState_Init(void)
+_PyInterpreterState_Enable(_PyRuntimeState *runtime)
 {
-    _next_interp_id = 0;
+    runtime->interpreters.next_id = 0;
+    /* Since we only call _PyRuntimeState_Init() once per process
+       (see _PyRuntime_Initialize()), we make sure the mutex is
+       initialized here. */
+    if (runtime->interpreters.mutex == NULL) {
+        runtime->interpreters.mutex = PyThread_allocate_lock();
+        if (runtime->interpreters.mutex == NULL)
+            Py_FatalError("Can't initialize threads for interpreter");
+    }
 }
 
 PyInterpreterState *
@@ -84,14 +90,16 @@ PyInterpreterState_New(void)
                                  PyMem_RawMalloc(sizeof(PyInterpreterState));
 
     if (interp != NULL) {
-        HEAD_INIT();
-        if (head_mutex == NULL)
-            Py_FatalError("Can't initialize threads for interpreter");
         interp->modules_by_index = NULL;
         interp->sysdict = NULL;
         interp->builtins = NULL;
         interp->builtins_copy = NULL;
         interp->tstate_head = NULL;
+        interp->check_interval = 100;
+        interp->warnoptions = NULL;
+        interp->xoptions = NULL;
+        interp->num_threads = 0;
+        interp->pythread_stacksize = 0;
         interp->codec_search_path = NULL;
         interp->codec_search_cache = NULL;
         interp->codec_error_registry = NULL;
@@ -115,19 +123,19 @@ PyInterpreterState_New(void)
 #endif
 
         HEAD_LOCK();
-        interp->next = interp_head;
-        if (interp_main == NULL) {
-            interp_main = interp;
+        interp->next = _PyRuntime.interpreters.head;
+        if (_PyRuntime.interpreters.main == NULL) {
+            _PyRuntime.interpreters.main = interp;
         }
-        interp_head = interp;
-        if (_next_interp_id < 0) {
+        _PyRuntime.interpreters.head = interp;
+        if (_PyRuntime.interpreters.next_id < 0) {
             /* overflow or Py_Initialize() not called! */
             PyErr_SetString(PyExc_RuntimeError,
                             "failed to get an interpreter ID");
             interp = NULL;
         } else {
-            interp->id = _next_interp_id;
-            _next_interp_id += 1;
+            interp->id = _PyRuntime.interpreters.next_id;
+            _PyRuntime.interpreters.next_id += 1;
         }
         HEAD_UNLOCK();
     }
@@ -179,7 +187,7 @@ PyInterpreterState_Delete(PyInterpreterState *interp)
     PyInterpreterState **p;
     zapthreads(interp);
     HEAD_LOCK();
-    for (p = &interp_head; ; p = &(*p)->next) {
+    for (p = &_PyRuntime.interpreters.head; ; p = &(*p)->next) {
         if (*p == NULL)
             Py_FatalError(
                 "PyInterpreterState_Delete: invalid interp");
@@ -189,17 +197,13 @@ PyInterpreterState_Delete(PyInterpreterState *interp)
     if (interp->tstate_head != NULL)
         Py_FatalError("PyInterpreterState_Delete: remaining threads");
     *p = interp->next;
-    if (interp_main == interp) {
-        interp_main = NULL;
-        if (interp_head != NULL)
+    if (_PyRuntime.interpreters.main == interp) {
+        _PyRuntime.interpreters.main = NULL;
+        if (_PyRuntime.interpreters.head != NULL)
             Py_FatalError("PyInterpreterState_Delete: remaining subinterpreters");
     }
     HEAD_UNLOCK();
     PyMem_RawFree(interp);
-    if (interp_head == NULL && head_mutex != NULL) {
-        PyThread_free_lock(head_mutex);
-        head_mutex = NULL;
-    }
 }
 
 
@@ -480,8 +484,11 @@ PyThreadState_Delete(PyThreadState *tstate)
 {
     if (tstate == GET_TSTATE())
         Py_FatalError("PyThreadState_Delete: tstate is still current");
-    if (autoInterpreterState && PyThread_get_key_value(autoTLSkey) == tstate)
-        PyThread_delete_key_value(autoTLSkey);
+    if (_PyRuntime.gilstate.autoInterpreterState &&
+        PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == tstate)
+    {
+        PyThread_delete_key_value(_PyRuntime.gilstate.autoTLSkey);
+    }
     tstate_delete_common(tstate);
 }
 
@@ -494,8 +501,11 @@ PyThreadState_DeleteCurrent()
         Py_FatalError(
             "PyThreadState_DeleteCurrent: no current tstate");
     tstate_delete_common(tstate);
-    if (autoInterpreterState && PyThread_get_key_value(autoTLSkey) == tstate)
-        PyThread_delete_key_value(autoTLSkey);
+    if (_PyRuntime.gilstate.autoInterpreterState &&
+        PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == tstate)
+    {
+        PyThread_delete_key_value(_PyRuntime.gilstate.autoTLSkey);
+    }
     SET_TSTATE(NULL);
     PyEval_ReleaseLock();
 }
@@ -654,13 +664,13 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc)
 PyInterpreterState *
 PyInterpreterState_Head(void)
 {
-    return interp_head;
+    return _PyRuntime.interpreters.head;
 }
 
 PyInterpreterState *
 PyInterpreterState_Main(void)
 {
-    return interp_main;
+    return _PyRuntime.interpreters.main;
 }
 
 PyInterpreterState *
@@ -700,7 +710,7 @@ _PyThread_CurrentFrames(void)
      * need to grab head_mutex for the duration.
      */
     HEAD_LOCK();
-    for (i = interp_head; i != NULL; i = i->next) {
+    for (i = _PyRuntime.interpreters.head; i != NULL; i = i->next) {
         PyThreadState *t;
         for (t = i->tstate_head; t != NULL; t = t->next) {
             PyObject *id;
@@ -751,11 +761,11 @@ void
 _PyGILState_Init(PyInterpreterState *i, PyThreadState *t)
 {
     assert(i && t); /* must init with valid states */
-    autoTLSkey = PyThread_create_key();
-    if (autoTLSkey == -1)
+    _PyRuntime.gilstate.autoTLSkey = PyThread_create_key();
+    if (_PyRuntime.gilstate.autoTLSkey == -1)
         Py_FatalError("Could not allocate TLS entry");
-    autoInterpreterState = i;
-    assert(PyThread_get_key_value(autoTLSkey) == NULL);
+    _PyRuntime.gilstate.autoInterpreterState = i;
+    assert(PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == NULL);
     assert(t->gilstate_counter == 0);
 
     _PyGILState_NoteThreadState(t);
@@ -764,15 +774,15 @@ _PyGILState_Init(PyInterpreterState *i, PyThreadState *t)
 PyInterpreterState *
 _PyGILState_GetInterpreterStateUnsafe(void)
 {
-    return autoInterpreterState;
+    return _PyRuntime.gilstate.autoInterpreterState;
 }
 
 void
 _PyGILState_Fini(void)
 {
-    PyThread_delete_key(autoTLSkey);
-    autoTLSkey = -1;
-    autoInterpreterState = NULL;
+    PyThread_delete_key(_PyRuntime.gilstate.autoTLSkey);
+    _PyRuntime.gilstate.autoTLSkey = -1;
+    _PyRuntime.gilstate.autoInterpreterState = NULL;
 }
 
 /* Reset the TLS key - called by PyOS_AfterFork_Child().
@@ -782,16 +792,18 @@ _PyGILState_Fini(void)
 void
 _PyGILState_Reinit(void)
 {
-    head_mutex = NULL;
-    HEAD_INIT();
+    _PyRuntime.interpreters.mutex = PyThread_allocate_lock();
+    if (_PyRuntime.interpreters.mutex == NULL)
+        Py_FatalError("Can't initialize threads for interpreter");
     PyThreadState *tstate = PyGILState_GetThisThreadState();
-    PyThread_delete_key(autoTLSkey);
-    if ((autoTLSkey = PyThread_create_key()) == -1)
+    PyThread_delete_key(_PyRuntime.gilstate.autoTLSkey);
+    if ((_PyRuntime.gilstate.autoTLSkey = PyThread_create_key()) == -1)
         Py_FatalError("Could not allocate TLS entry");
 
     /* If the thread had an associated auto thread state, reassociate it with
      * the new key. */
-    if (tstate && PyThread_set_key_value(autoTLSkey, (void *)tstate) < 0)
+    if (tstate && PyThread_set_key_value(_PyRuntime.gilstate.autoTLSkey,
+                                         (void *)tstate) < 0)
         Py_FatalError("Couldn't create autoTLSkey mapping");
 }
 
@@ -806,7 +818,7 @@ _PyGILState_NoteThreadState(PyThreadState* tstate)
     /* If autoTLSkey isn't initialized, this must be the very first
        threadstate created in Py_Initialize().  Don't do anything for now
        (we'll be back here when _PyGILState_Init is called). */
-    if (!autoInterpreterState)
+    if (!_PyRuntime.gilstate.autoInterpreterState)
         return;
 
     /* Stick the thread state for this thread in thread local storage.
@@ -821,9 +833,13 @@ _PyGILState_NoteThreadState(PyThreadState* tstate)
        The first thread state created for that given OS level thread will
        "win", which seems reasonable behaviour.
     */
-    if (PyThread_get_key_value(autoTLSkey) == NULL) {
-        if (PyThread_set_key_value(autoTLSkey, (void *)tstate) < 0)
+    if (PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == NULL) {
+        if ((PyThread_set_key_value(_PyRuntime.gilstate.autoTLSkey,
+                                    (void *)tstate)
+             ) < 0)
+        {
             Py_FatalError("Couldn't create autoTLSkey mapping");
+        }
     }
 
     /* PyGILState_Release must not try to delete this thread state. */
@@ -834,9 +850,10 @@ _PyGILState_NoteThreadState(PyThreadState* tstate)
 PyThreadState *
 PyGILState_GetThisThreadState(void)
 {
-    if (autoInterpreterState == NULL)
+    if (_PyRuntime.gilstate.autoInterpreterState == NULL)
         return NULL;
-    return (PyThreadState *)PyThread_get_key_value(autoTLSkey);
+    return (PyThreadState *)PyThread_get_key_value(
+                _PyRuntime.gilstate.autoTLSkey);
 }
 
 int
@@ -847,7 +864,7 @@ PyGILState_Check(void)
     if (!_PyGILState_check_enabled)
         return 1;
 
-    if (autoTLSkey == -1)
+    if (_PyRuntime.gilstate.autoTLSkey == -1)
         return 1;
 
     tstate = GET_TSTATE();
@@ -867,8 +884,10 @@ PyGILState_Ensure(void)
        spells out other issues.  Embedders are expected to have
        called Py_Initialize() and usually PyEval_InitThreads().
     */
-    assert(autoInterpreterState); /* Py_Initialize() hasn't been called! */
-    tcur = (PyThreadState *)PyThread_get_key_value(autoTLSkey);
+    /* Py_Initialize() hasn't been called! */
+    assert(_PyRuntime.gilstate.autoInterpreterState);
+    tcur = (PyThreadState *)PyThread_get_key_value(
+                _PyRuntime.gilstate.autoTLSkey);
     if (tcur == NULL) {
         /* At startup, Python has no concrete GIL. If PyGILState_Ensure() is
            called from a new thread for the first time, we need the create the
@@ -876,7 +895,7 @@ PyGILState_Ensure(void)
         PyEval_InitThreads();
 
         /* Create a new thread state for this thread */
-        tcur = PyThreadState_New(autoInterpreterState);
+        tcur = PyThreadState_New(_PyRuntime.gilstate.autoInterpreterState);
         if (tcur == NULL)
             Py_FatalError("Couldn't create thread-state for new thread");
         /* This is our thread state!  We'll need to delete it in the
@@ -901,7 +920,7 @@ void
 PyGILState_Release(PyGILState_STATE oldstate)
 {
     PyThreadState *tcur = (PyThreadState *)PyThread_get_key_value(
-                                                            autoTLSkey);
+                                _PyRuntime.gilstate.autoTLSkey);
     if (tcur == NULL)
         Py_FatalError("auto-releasing thread-state, "
                       "but no thread-state for this thread");
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index f31b3ee5a5d..d1d4a69a8dd 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -12,6 +12,7 @@
 
 #include "Python-ast.h"
 #undef Yield /* undefine macro conflicting with winbase.h */
+#include "internal/pystate.h"
 #include "grammar.h"
 #include "node.h"
 #include "token.h"
diff --git a/Python/symtable.c b/Python/symtable.c
index 6165cfe162e..2658b917d7a 100644
--- a/Python/symtable.c
+++ b/Python/symtable.c
@@ -1,4 +1,8 @@
 #include "Python.h"
+#include "internal/pystate.h"
+#ifdef Yield
+#undef Yield /* undefine conflicting macro from winbase.h */
+#endif
 #include "Python-ast.h"
 #include "code.h"
 #include "symtable.h"
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 021b95d2c3d..3ecd7fca54a 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -15,6 +15,7 @@ Data members:
 */
 
 #include "Python.h"
+#include "internal/pystate.h"
 #include "code.h"
 #include "frameobject.h"
 #include "pythread.h"
@@ -520,8 +521,6 @@ Return the profiling function set with sys.setprofile.\n\
 See the profiler chapter in the library manual."
 );
 
-static int _check_interval = 100;
-
 static PyObject *
 sys_setcheckinterval(PyObject *self, PyObject *args)
 {
@@ -530,7 +529,8 @@ sys_setcheckinterval(PyObject *self, PyObject *args)
                      "are deprecated.  Use sys.setswitchinterval() "
                      "instead.", 1) < 0)
         return NULL;
-    if (!PyArg_ParseTuple(args, "i:setcheckinterval", &_check_interval))
+    PyInterpreterState *interp = PyThreadState_GET()->interp;
+    if (!PyArg_ParseTuple(args, "i:setcheckinterval", &interp->check_interval))
         return NULL;
     Py_RETURN_NONE;
 }
@@ -550,7 +550,8 @@ sys_getcheckinterval(PyObject *self, PyObject *args)
                      "are deprecated.  Use sys.getswitchinterval() "
                      "instead.", 1) < 0)
         return NULL;
-    return PyLong_FromLong(_check_interval);
+    PyInterpreterState *interp = PyThreadState_GET()->interp;
+    return PyLong_FromLong(interp->check_interval);
 }
 
 PyDoc_STRVAR(getcheckinterval_doc,
@@ -1337,7 +1338,7 @@ Clear the internal type lookup cache.");
 static PyObject *
 sys_is_finalizing(PyObject* self, PyObject* args)
 {
-    return PyBool_FromLong(_Py_Finalizing != NULL);
+    return PyBool_FromLong(_Py_IsFinalizing());
 }
 
 PyDoc_STRVAR(is_finalizing_doc,
@@ -1475,11 +1476,24 @@ list_builtin_module_names(void)
     return list;
 }
 
-static PyObject *warnoptions = NULL;
+static PyObject *
+get_warnoptions(void)
+{
+    PyObject *warnoptions = PyThreadState_GET()->interp->warnoptions;
+    if (warnoptions == NULL || !PyList_Check(warnoptions)) {
+        Py_XDECREF(warnoptions);
+        warnoptions = PyList_New(0);
+        if (warnoptions == NULL)
+            return NULL;
+        PyThreadState_GET()->interp->warnoptions = warnoptions;
+    }
+    return warnoptions;
+}
 
 void
 PySys_ResetWarnOptions(void)
 {
+    PyObject *warnoptions = PyThreadState_GET()->interp->warnoptions;
     if (warnoptions == NULL || !PyList_Check(warnoptions))
         return;
     PyList_SetSlice(warnoptions, 0, PyList_GET_SIZE(warnoptions), NULL);
@@ -1488,12 +1502,9 @@ PySys_ResetWarnOptions(void)
 void
 PySys_AddWarnOptionUnicode(PyObject *unicode)
 {
-    if (warnoptions == NULL || !PyList_Check(warnoptions)) {
-        Py_XDECREF(warnoptions);
-        warnoptions = PyList_New(0);
-        if (warnoptions == NULL)
-            return;
-    }
+    PyObject *warnoptions = get_warnoptions();
+    if (warnoptions == NULL)
+        return;
     PyList_Append(warnoptions, unicode);
 }
 
@@ -1511,17 +1522,20 @@ PySys_AddWarnOption(const wchar_t *s)
 int
 PySys_HasWarnOptions(void)
 {
+    PyObject *warnoptions = PyThreadState_GET()->interp->warnoptions;
     return (warnoptions != NULL && (PyList_Size(warnoptions) > 0)) ? 1 : 0;
 }
 
-static PyObject *xoptions = NULL;
-
 static PyObject *
 get_xoptions(void)
 {
+    PyObject *xoptions = PyThreadState_GET()->interp->xoptions;
     if (xoptions == NULL || !PyDict_Check(xoptions)) {
         Py_XDECREF(xoptions);
         xoptions = PyDict_New();
+        if (xoptions == NULL)
+            return NULL;
+        PyThreadState_GET()->interp->xoptions = xoptions;
     }
     return xoptions;
 }
@@ -2124,17 +2138,15 @@ _PySys_EndInit(PyObject *sysdict)
     SET_SYS_FROM_STRING_INT_RESULT("base_exec_prefix",
                         PyUnicode_FromWideChar(Py_GetExecPrefix(), -1));
 
-    if (warnoptions == NULL) {
-        warnoptions = PyList_New(0);
-        if (warnoptions == NULL)
-            return -1;
-    }
+    PyObject *warnoptions = get_warnoptions();
+    if (warnoptions == NULL)
+        return -1;
+    SET_SYS_FROM_STRING_BORROW_INT_RESULT("warnoptions", warnoptions);
 
-    SET_SYS_FROM_STRING_INT_RESULT("warnoptions",
-                                   PyList_GetSlice(warnoptions,
-                                                   0, Py_SIZE(warnoptions)));
-
-    SET_SYS_FROM_STRING_BORROW_INT_RESULT("_xoptions", get_xoptions());
+    PyObject *xoptions = get_xoptions();
+    if (xoptions == NULL)
+        return -1;
+    SET_SYS_FROM_STRING_BORROW_INT_RESULT("_xoptions", xoptions);
 
     if (PyErr_Occurred())
         return -1;
diff --git a/Python/thread.c b/Python/thread.c
index 4d2f2c32a19..f742d0521e1 100644
--- a/Python/thread.c
+++ b/Python/thread.c
@@ -6,6 +6,7 @@
    Stuff shared by all thread_*.h files is collected here. */
 
 #include "Python.h"
+#include "internal/pystate.h"
 
 #ifndef _POSIX_THREADS
 /* This means pthreads are not implemented in libc headers, hence the macro
@@ -76,11 +77,6 @@ PyThread_init_thread(void)
     PyThread__init_thread();
 }
 
-/* Support for runtime thread stack size tuning.
-   A value of 0 means using the platform's default stack size
-   or the size specified by the THREAD_STACK_SIZE macro. */
-static size_t _pythread_stacksize = 0;
-
 #if defined(_POSIX_THREADS)
 #   define PYTHREAD_NAME "pthread"
 #   include "thread_pthread.h"
@@ -96,7 +92,7 @@ static size_t _pythread_stacksize = 0;
 size_t
 PyThread_get_stacksize(void)
 {
-    return _pythread_stacksize;
+    return PyThreadState_GET()->interp->pythread_stacksize;
 }
 
 /* Only platforms defining a THREAD_SET_STACKSIZE() macro
diff --git a/Python/thread_nt.h b/Python/thread_nt.h
index 47eb4b6e94c..2f3a71b86ad 100644
--- a/Python/thread_nt.h
+++ b/Python/thread_nt.h
@@ -189,9 +189,10 @@ PyThread_start_new_thread(void (*func)(void *), void *arg)
         return PYTHREAD_INVALID_THREAD_ID;
     obj->func = func;
     obj->arg = arg;
+    PyThreadState *tstate = PyThreadState_GET();
+    size_t stacksize = tstate ? tstate->interp->pythread_stacksize : 0;
     hThread = (HANDLE)_beginthreadex(0,
-                      Py_SAFE_DOWNCAST(_pythread_stacksize,
-                                       Py_ssize_t, unsigned int),
+                      Py_SAFE_DOWNCAST(stacksize, Py_ssize_t, unsigned int),
                       bootstrap, obj,
                       0, &threadID);
     if (hThread == 0) {
@@ -332,13 +333,13 @@ _pythread_nt_set_stacksize(size_t size)
 {
     /* set to default */
     if (size == 0) {
-        _pythread_stacksize = 0;
+        PyThreadState_GET()->interp->pythread_stacksize = 0;
         return 0;
     }
 
     /* valid range? */
     if (size >= THREAD_MIN_STACKSIZE && size < THREAD_MAX_STACKSIZE) {
-        _pythread_stacksize = size;
+        PyThreadState_GET()->interp->pythread_stacksize = size;
         return 0;
     }
 
diff --git a/Python/thread_pthread.h b/Python/thread_pthread.h
index 268dec41168..ea05b6fbcfe 100644
--- a/Python/thread_pthread.h
+++ b/Python/thread_pthread.h
@@ -205,8 +205,9 @@ PyThread_start_new_thread(void (*func)(void *), void *arg)
         return PYTHREAD_INVALID_THREAD_ID;
 #endif
 #if defined(THREAD_STACK_SIZE)
-    tss = (_pythread_stacksize != 0) ? _pythread_stacksize
-                                     : THREAD_STACK_SIZE;
+    PyThreadState *tstate = PyThreadState_GET();
+    size_t stacksize = tstate ? tstate->interp->pythread_stacksize : 0;
+    tss = (stacksize != 0) ? stacksize : THREAD_STACK_SIZE;
     if (tss != 0) {
         if (pthread_attr_setstacksize(&attrs, tss) != 0) {
             pthread_attr_destroy(&attrs);
@@ -578,7 +579,7 @@ _pythread_pthread_set_stacksize(size_t size)
 
     /* set to default */
     if (size == 0) {
-        _pythread_stacksize = 0;
+        PyThreadState_GET()->interp->pythread_stacksize = 0;
         return 0;
     }
 
@@ -595,7 +596,7 @@ _pythread_pthread_set_stacksize(size_t size)
             rc = pthread_attr_setstacksize(&attrs, size);
             pthread_attr_destroy(&attrs);
             if (rc == 0) {
-                _pythread_stacksize = size;
+                PyThreadState_GET()->interp->pythread_stacksize = size;
                 return 0;
             }
         }
diff --git a/Python/traceback.c b/Python/traceback.c
index cd30d56b94a..ba979aad8fa 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -2,6 +2,7 @@
 /* Traceback implementation */
 
 #include "Python.h"
+#include "internal/pystate.h"
 
 #include "code.h"
 #include "frameobject.h"
diff --git a/Tools/c-globals/README b/Tools/c-globals/README
new file mode 100644
index 00000000000..d0e6e8eba06
--- /dev/null
+++ b/Tools/c-globals/README
@@ -0,0 +1,41 @@
+#######################################
+# C Globals and CPython Runtime State.
+
+CPython's C code makes extensive use of global variables.  Each global
+falls into one of several categories:
+
+* (effectively) constants (incl. static types)
+* globals used exclusively in main or in the REPL
+* freelists, caches, and counters
+* process-global state
+* module state
+* Python runtime state
+
+The ignored-globals.txt file is organized similarly.  Of the different
+categories, the last two are problematic and generally should not exist
+in the codebase.
+
+Globals that hold module state (i.e. in Modules/*.c) cause problems
+when multiple interpreters are in use.  For more info, see PEP 3121,
+which addresses the situation for extension modules in general.
+
+Globals in the last category should be avoided as well.  The problem
+isn't with the Python runtime having state.  Rather, the problem is with
+that state being spread thoughout the codebase in dozens of individual
+globals.  Unlike the other globals, the runtime state represents a set
+of values that are constantly shifting in a complex way.  When they are
+spread out it's harder to get a clear picture of what the runtime
+involves.  Furthermore, when they are spread out it complicates efforts
+that change the runtime.
+
+Consequently, the globals for Python's runtime state have been
+consolidated under a single top-level _PyRuntime global. No new globals
+should be added for runtime state.  Instead, they should be added to
+_PyRuntimeState or one of its sub-structs.  The check-c-globals script
+should be run to ensure that no new globals have been added:
+
+  python3 Tools/c-globals/check-c-globals.py
+
+If it reports any globals then they should be resolved.  If the globals
+are runtime state then they should be folded into _PyRuntimeState.
+Otherwise they should be added to ignored-globals.txt.
diff --git a/Tools/c-globals/check-c-globals.py b/Tools/c-globals/check-c-globals.py
new file mode 100644
index 00000000000..1de69a8751c
--- /dev/null
+++ b/Tools/c-globals/check-c-globals.py
@@ -0,0 +1,446 @@
+
+from collections import namedtuple
+import glob
+import os.path
+import re
+import shutil
+import sys
+import subprocess
+
+
+VERBOSITY = 2
+
+C_GLOBALS_DIR = os.path.abspath(os.path.dirname(__file__))
+TOOLS_DIR = os.path.dirname(C_GLOBALS_DIR)
+ROOT_DIR = os.path.dirname(TOOLS_DIR)
+GLOBALS_FILE = os.path.join(C_GLOBALS_DIR, 'ignored-globals.txt')
+
+SOURCE_DIRS = ['Include', 'Objects', 'Modules', 'Parser', 'Python']
+
+CAPI_REGEX = re.compile(r'^ *PyAPI_DATA\([^)]*\) \W*(_?Py\w+(?:, \w+)*\w).*;.*$')
+
+
+IGNORED_VARS = {
+        '_DYNAMIC',
+        '_GLOBAL_OFFSET_TABLE_',
+        '__JCR_LIST__',
+        '__JCR_END__',
+        '__TMC_END__',
+        '__bss_start',
+        '__data_start',
+        '__dso_handle',
+        '_edata',
+        '_end',
+        }
+
+
+def find_capi_vars(root):
+    capi_vars = {}
+    for dirname in SOURCE_DIRS:
+        for filename in glob.glob(os.path.join(ROOT_DIR, dirname, '**/*.[hc]'),
+                                  recursive=True):
+            with open(filename) as file:
+                for name in _find_capi_vars(file):
+                    if name in capi_vars:
+                        assert not filename.endswith('.c')
+                        assert capi_vars[name].endswith('.c')
+                    capi_vars[name] = filename
+    return capi_vars
+
+
+def _find_capi_vars(lines):
+    for line in lines:
+        if not line.startswith('PyAPI_DATA'):
+            continue
+        assert '{' not in line
+        match = CAPI_REGEX.match(line)
+        assert match
+        names, = match.groups()
+        for name in names.split(', '):
+            yield name
+
+
+def _read_global_names(filename):
+    # These variables are shared between all interpreters in the process.
+    with open(filename) as file:
+        return {line.partition('#')[0].strip()
+                for line in file
+                if line.strip() and not line.startswith('#')}
+
+
+def _is_global_var(name, globalnames):
+    if _is_autogen_var(name):
+        return True
+    if _is_type_var(name):
+        return True
+    if _is_module(name):
+        return True
+    if _is_exception(name):
+        return True
+    if _is_compiler(name):
+        return True
+    return name in globalnames
+
+
+def _is_autogen_var(name):
+    return (
+        name.startswith('PyId_') or
+        '.' in name or
+        # Objects/typeobject.c
+        name.startswith('op_id.') or
+        name.startswith('rop_id.') or
+        # Python/graminit.c
+        name.startswith('arcs_') or
+        name.startswith('states_')
+        )
+
+
+def _is_type_var(name):
+    if name.endswith(('Type', '_Type', '_type')):  # XXX Always a static type?
+        return True
+    if name.endswith('_desc'):  # for structseq types
+        return True
+    return (
+        name.startswith('doc_') or
+        name.endswith(('_doc', '__doc__', '_docstring')) or
+        name.endswith('_methods') or
+        name.endswith('_fields') or
+        name.endswith(('_memberlist', '_members')) or
+        name.endswith('_slots') or
+        name.endswith(('_getset', '_getsets', '_getsetlist')) or
+        name.endswith('_as_mapping') or
+        name.endswith('_as_number') or
+        name.endswith('_as_sequence') or
+        name.endswith('_as_buffer') or
+        name.endswith('_as_async')
+        )
+
+
+def _is_module(name):
+    if name.endswith(('_functions', 'Methods', '_Methods')):
+        return True
+    if name == 'module_def':
+        return True
+    if name == 'initialized':
+        return True
+    return name.endswith(('module', '_Module'))
+
+
+def _is_exception(name):
+    # Other vars are enumerated in globals-core.txt.
+    if not name.startswith(('PyExc_', '_PyExc_')):
+        return False
+    return name.endswith(('Error', 'Warning'))
+
+
+def _is_compiler(name):
+    return (
+        # Python/Pythyon-ast.c
+        name.endswith('_type') or
+        name.endswith('_singleton') or
+        name.endswith('_attributes')
+        )
+
+
+class Var(namedtuple('Var', 'name kind scope capi filename')):
+
+    @classmethod
+    def parse_nm(cls, line, expected, ignored, capi_vars, globalnames):
+        _, _, line = line.partition(' ')  # strip off the address
+        line = line.strip()
+        kind, _, line = line.partition(' ')
+        if kind in ignored or ():
+            return None
+        elif kind not in expected or ():
+            raise RuntimeError('unsupported NM type {!r}'.format(kind))
+
+        name, _, filename = line.partition('\t')
+        name = name.strip()
+        if _is_autogen_var(name):
+            return None
+        if _is_global_var(name, globalnames):
+            scope = 'global'
+        else:
+            scope = None
+        capi = (name in capi_vars or ())
+        if filename:
+            filename = os.path.relpath(filename.partition(':')[0])
+        return cls(name, kind, scope, capi, filename or '~???~')
+
+    @property
+    def external(self):
+        return self.kind.isupper()
+
+
+def find_vars(root, globals_filename=GLOBALS_FILE):
+    python = os.path.join(root, 'python')
+    if not os.path.exists(python):
+        raise RuntimeError('python binary missing (need to build it first?)')
+    capi_vars = find_capi_vars(root)
+    globalnames = _read_global_names(globals_filename)
+
+    nm = shutil.which('nm')
+    if nm is None:
+        # XXX Use dumpbin.exe /SYMBOLS on Windows.
+        raise NotImplementedError
+    else:
+        yield from (var
+                    for var in _find_var_symbols(python, nm, capi_vars,
+                                                 globalnames)
+                    if var.name not in IGNORED_VARS)
+
+
+NM_FUNCS = set('Tt')
+NM_PUBLIC_VARS = set('BD')
+NM_PRIVATE_VARS = set('bd')
+NM_VARS = NM_PUBLIC_VARS | NM_PRIVATE_VARS
+NM_DATA = set('Rr')
+NM_OTHER = set('ACGgiINpSsuUVvWw-?')
+NM_IGNORED = NM_FUNCS | NM_DATA | NM_OTHER
+
+
+def _find_var_symbols(python, nm, capi_vars, globalnames):
+    args = [nm,
+            '--line-numbers',
+            python]
+    out = subprocess.check_output(args)
+    for line in out.decode('utf-8').splitlines():
+        var = Var.parse_nm(line, NM_VARS, NM_IGNORED, capi_vars, globalnames)
+        if var is None:
+            continue
+        yield var
+
+
+#######################################
+
+class Filter(namedtuple('Filter', 'name op value action')):
+
+    @classmethod
+    def parse(cls, raw):
+        action = '+'
+        if raw.startswith(('+', '-')):
+            action = raw[0]
+            raw = raw[1:]
+        # XXX Support < and >?
+        name, op, value = raw.partition('=')
+        return cls(name, op, value, action)
+
+    def check(self, var):
+        value = getattr(var, self.name, None)
+        if not self.op:
+            matched = bool(value)
+        elif self.op == '=':
+            matched = (value == self.value)
+        else:
+            raise NotImplementedError
+
+        if self.action == '+':
+            return matched
+        elif self.action == '-':
+            return not matched
+        else:
+            raise NotImplementedError
+
+
+def filter_var(var, filters):
+    for filter in filters:
+        if not filter.check(var):
+            return False
+    return True
+
+
+def make_sort_key(spec):
+    columns = [(col.strip('_'), '_' if col.startswith('_') else '')
+               for col in spec]
+    def sort_key(var):
+        return tuple(getattr(var, col).lstrip(prefix)
+                     for col, prefix in columns)
+    return sort_key
+
+
+def make_groups(allvars, spec):
+    group = spec
+    groups = {}
+    for var in allvars:
+        value = getattr(var, group)
+        key = '{}: {}'.format(group, value)
+        try:
+            groupvars = groups[key]
+        except KeyError:
+            groupvars = groups[key] = []
+        groupvars.append(var)
+    return groups
+
+
+def format_groups(groups, columns, fmts, widths):
+    for group in sorted(groups):
+        groupvars = groups[group]
+        yield '', 0
+        yield '  # {}'.format(group), 0
+        yield from format_vars(groupvars, columns, fmts, widths)
+
+
+def format_vars(allvars, columns, fmts, widths):
+    fmt = ' '.join(fmts[col] for col in columns)
+    fmt = ' ' + fmt.replace(' ', '   ') + ' '  # for div margin
+    header = fmt.replace(':', ':^').format(*(col.upper() for col in columns))
+    yield header, 0
+    div = ' '.join('-'*(widths[col]+2) for col in columns)
+    yield div, 0
+    for var in allvars:
+        values = (getattr(var, col) for col in columns)
+        row = fmt.format(*('X' if val is True else val or ''
+                           for val in values))
+        yield row, 1
+    yield div, 0
+
+
+#######################################
+
+COLUMNS = 'name,external,capi,scope,filename'
+COLUMN_NAMES = COLUMNS.split(',')
+
+COLUMN_WIDTHS = {col: len(col)
+                 for col in COLUMN_NAMES}
+COLUMN_WIDTHS.update({
+        'name': 50,
+        'scope': 7,
+        'filename': 40,
+        })
+COLUMN_FORMATS = {col: '{:%s}' % width
+                  for col, width in COLUMN_WIDTHS.items()}
+for col in COLUMN_FORMATS:
+    if COLUMN_WIDTHS[col] == len(col):
+        COLUMN_FORMATS[col] = COLUMN_FORMATS[col].replace(':', ':^')
+
+
+def _parse_filters_arg(raw, error):
+    filters = []
+    for value in raw.split(','):
+        value=value.strip()
+        if not value:
+            continue
+        try:
+            filter = Filter.parse(value)
+            if filter.name not in COLUMN_NAMES:
+                raise Exception('unsupported column {!r}'.format(filter.name))
+        except Exception as e:
+            error('bad filter {!r}: {}'.format(raw, e))
+        filters.append(filter)
+    return filters
+
+
+def _parse_columns_arg(raw, error):
+    columns = raw.split(',')
+    for column in columns:
+        if column not in COLUMN_NAMES:
+            error('unsupported column {!r}'.format(column))
+    return columns
+
+
+def _parse_sort_arg(raw, error):
+    sort = raw.split(',')
+    for column in sort:
+        if column.lstrip('_') not in COLUMN_NAMES:
+            error('unsupported column {!r}'.format(column))
+    return sort
+
+
+def _parse_group_arg(raw, error):
+    if not raw:
+        return raw
+    group = raw
+    if group not in COLUMN_NAMES:
+        error('unsupported column {!r}'.format(group))
+    if group != 'filename':
+        error('unsupported group {!r}'.format(group))
+    return group
+
+
+def parse_args(argv=None):
+    if argv is None:
+        argv = sys.argv[1:]
+
+    import argparse
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('-v', '--verbose', action='count', default=0)
+    parser.add_argument('-q', '--quiet', action='count', default=0)
+
+    parser.add_argument('--filters', default='-scope',
+                        help='[[-]<COLUMN>[=<GLOB>]] ...')
+
+    parser.add_argument('--columns', default=COLUMNS,
+                        help='a comma-separated list of columns to show')
+    parser.add_argument('--sort', default='filename,_name',
+                        help='a comma-separated list of columns to sort')
+    parser.add_argument('--group',
+                        help='group by the given column name (- to not group)')
+
+    parser.add_argument('--rc-on-match', dest='rc', type=int)
+
+    parser.add_argument('filename', nargs='?', default=GLOBALS_FILE)
+
+    args = parser.parse_args(argv)
+
+    verbose = vars(args).pop('verbose', 0)
+    quiet = vars(args).pop('quiet', 0)
+    args.verbosity = max(0, VERBOSITY + verbose - quiet)
+
+    if args.sort.startswith('filename') and not args.group:
+        args.group = 'filename'
+
+    if args.rc is None:
+        if '-scope=core' in args.filters or 'core' not in args.filters:
+            args.rc = 0
+        else:
+            args.rc = 1
+
+    args.filters = _parse_filters_arg(args.filters, parser.error)
+    args.columns = _parse_columns_arg(args.columns, parser.error)
+    args.sort = _parse_sort_arg(args.sort, parser.error)
+    args.group = _parse_group_arg(args.group, parser.error)
+
+    return args
+
+
+def main(root=ROOT_DIR, filename=GLOBALS_FILE,
+         filters=None, columns=COLUMN_NAMES, sort=None, group=None,
+         verbosity=VERBOSITY, rc=1):
+
+    log = lambda msg: ...
+    if verbosity >= 2:
+        log = lambda msg: print(msg)
+
+    allvars = (var
+               for var in find_vars(root, filename)
+               if filter_var(var, filters))
+    if sort:
+        allvars = sorted(allvars, key=make_sort_key(sort))
+
+    if group:
+        try:
+            columns.remove(group)
+        except ValueError:
+            pass
+        grouped = make_groups(allvars, group)
+        lines = format_groups(grouped, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
+    else:
+        lines = format_vars(allvars, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
+
+    total = 0
+    for line, count in lines:
+        total += count
+        log(line)
+    log('\ntotal: {}'.format(total))
+
+    if total and rc:
+        print('ERROR: found unsafe globals', file=sys.stderr)
+        return rc
+    return 0
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    sys.exit(
+            main(**vars(args)))
diff --git a/Tools/c-globals/ignored-globals.txt b/Tools/c-globals/ignored-globals.txt
new file mode 100644
index 00000000000..4fafba6eefa
--- /dev/null
+++ b/Tools/c-globals/ignored-globals.txt
@@ -0,0 +1,494 @@
+# All variables declared here are shared between all interpreters
+# in a single process.  That means that they must not be changed
+# unless that change should apply to all interpreters.
+#
+# See check-c-globals.py.
+#
+# Many generic names are handled via the script:
+#
+# * most exceptions and all warnings handled via _is_exception()
+# * for builtin modules, generic names are handled via _is_module()
+# * generic names for static types handled via _is_type_var()
+# * AST vars handled via _is_compiler()
+
+
+#######################################
+# main
+
+# Modules/getpath.c
+exec_prefix
+module_search_path
+prefix
+progpath
+
+# Modules/main.c
+orig_argc
+orig_argv
+
+# Python/getopt.c
+opt_ptr
+_PyOS_optarg
+_PyOS_opterr
+_PyOS_optind
+
+
+#######################################
+# REPL
+
+# Parser/myreadline.c
+PyOS_InputHook
+PyOS_ReadlineFunctionPointer
+_PyOS_ReadlineLock
+_PyOS_ReadlineTState
+
+
+#######################################
+# state
+
+# Python/dtoa.c
+p5s
+pmem_next  # very slight race
+private_mem  # very slight race
+
+# Python/import.c
+# For the moment the import lock stays global.  Ultimately there should
+# be a global lock for extension modules and a per-interpreter lock.
+import_lock
+import_lock_level
+import_lock_thread
+
+# Python/pylifecycle.c
+_PyRuntime
+
+
+#---------------------------------
+# module globals (PyObject)
+
+# Modules/_functoolsmodule.c
+kwd_mark
+
+# Modules/_localemodule.c
+Error
+
+# Modules/_threadmodule.c
+ThreadError
+
+# Modules/_tracemalloc.c
+unknown_filename
+
+# Modules/gcmodule.c
+gc_str
+
+# Modules/posixmodule.c
+billion
+posix_putenv_garbage
+
+# Modules/signalmodule.c
+DefaultHandler
+IgnoreHandler
+IntHandler
+ItimerError
+
+# Modules/zipimport.c
+ZipImportError
+zip_directory_cache
+
+
+#---------------------------------
+# module globals (other)
+
+# Modules/_tracemalloc.c
+allocators
+tables_lock
+tracemalloc_config
+tracemalloc_empty_traceback
+tracemalloc_filenames
+tracemalloc_peak_traced_memory
+tracemalloc_reentrant_key
+tracemalloc_traceback
+tracemalloc_tracebacks
+tracemalloc_traced_memory
+tracemalloc_traces
+
+# Modules/faulthandler.c
+fatal_error
+faulthandler_handlers
+old_stack
+stack
+thread
+user_signals
+
+# Modules/posixmodule.c
+posix_constants_confstr
+posix_constants_pathconf
+posix_constants_sysconf
+_stat_float_times  # deprecated, __main__-only
+structseq_new
+ticks_per_second
+
+# Modules/signalmodule.c
+Handlers  # main thread only
+is_tripped  # main thread only
+main_pid
+main_thread
+old_siginthandler
+wakeup_fd  # main thread only
+
+# Modules/zipimport.c
+zip_searchorder
+
+# Python/bltinmodule.c
+Py_FileSystemDefaultEncodeErrors
+Py_FileSystemDefaultEncoding
+Py_HasFileSystemDefaultEncoding
+
+# Python/sysmodule.c
+_PySys_ImplCacheTag
+_PySys_ImplName
+
+
+#---------------------------------
+# freelists
+
+# Modules/_collectionsmodule.c
+freeblocks
+numfreeblocks
+
+# Objects/classobject.c
+free_list
+numfree
+
+# Objects/dictobject.c
+free_list
+keys_free_list
+numfree
+numfreekeys
+
+# Objects/exceptions.c
+memerrors_freelist
+memerrors_numfree
+
+# Objects/floatobject.c
+free_list
+numfree
+
+# Objects/frameobject.c
+free_list
+numfree
+
+# Objects/genobject.c
+ag_asend_freelist
+ag_asend_freelist_free
+ag_value_freelist
+ag_value_freelist_free
+
+# Objects/listobject.c
+free_list
+numfree
+
+# Objects/methodobject.c
+free_list
+numfree
+
+# Objects/sliceobject.c
+slice_cache  # slight race
+
+# Objects/tupleobject.c
+free_list
+numfree
+
+# Python/dtoa.c
+freelist  # very slight race
+
+
+#---------------------------------
+# caches (PyObject)
+
+# Objects/typeobject.c
+method_cache  # only for static types
+next_version_tag  # only for static types
+
+# Python/dynload_shlib.c
+handles  # slight race during import
+nhandles  # slight race during import
+
+# Python/import.c
+extensions  # slight race on init during import
+
+
+#---------------------------------
+# caches (other)
+
+# Python/bootstrap_hash.c
+urandom_cache
+
+# Python/modsupport.c
+_Py_PackageContext  # Slight race during import!  Move to PyThreadState?
+
+
+#---------------------------------
+# counters
+
+# Objects/bytesobject.c
+null_strings
+one_strings
+
+# Objects/dictobject.c
+pydict_global_version
+
+# Objects/moduleobject.c
+max_module_number  # slight race during import
+
+
+#######################################
+# constants
+
+#---------------------------------
+# singletons
+
+# Objects/boolobject.c
+_Py_FalseStruct
+_Py_TrueStruct
+
+# Objects/object.c
+_Py_NoneStruct
+_Py_NotImplementedStruct
+
+# Objects/sliceobject.c
+_Py_EllipsisObject
+
+
+#---------------------------------
+# constants (other)
+
+# Modules/config.c
+_PyImport_Inittab
+
+# Objects/bytearrayobject.c
+_PyByteArray_empty_string
+
+# Objects/dictobject.c
+empty_keys_struct
+empty_values
+
+# Objects/floatobject.c
+detected_double_format
+detected_float_format
+double_format
+float_format
+
+# Objects/longobject.c
+_PyLong_DigitValue
+
+# Objects/object.c
+_Py_SwappedOp
+
+# Objects/obmalloc.c
+_PyMem_Debug
+
+# Objects/setobject.c
+_dummy_struct
+
+# Objects/structseq.c
+PyStructSequence_UnnamedField
+
+# Objects/typeobject.c
+name_op
+slotdefs  # almost
+slotdefs_initialized  # almost
+subtype_getsets_dict_only
+subtype_getsets_full
+subtype_getsets_weakref_only
+tp_new_methoddef
+
+# Objects/unicodeobject.c
+bloom_linebreak
+static_strings  # slight race
+
+# Parser/tokenizer.c
+_PyParser_TokenNames
+
+# Python/Python-ast.c
+alias_fields
+
+# Python/codecs.c
+Py_hexdigits
+ucnhash_CAPI  # slight performance-only race
+
+# Python/dynload_shlib.c
+_PyImport_DynLoadFiletab
+
+# Python/fileutils.c
+_Py_open_cloexec_works
+force_ascii
+
+# Python/frozen.c
+M___hello__
+PyImport_FrozenModules
+
+# Python/graminit.c
+_PyParser_Grammar
+dfas
+labels
+
+# Python/import.c
+PyImport_Inittab
+
+# Python/pylifecycle.c
+_TARGET_LOCALES
+
+
+#---------------------------------
+# initialized (PyObject)
+
+# Objects/bytesobject.c
+characters
+nullstring
+
+# Objects/exceptions.c
+PyExc_RecursionErrorInst
+errnomap
+
+# Objects/longobject.c
+_PyLong_One
+_PyLong_Zero
+small_ints
+
+# Objects/setobject.c
+emptyfrozenset
+
+# Objects/unicodeobject.c
+interned  # slight race on init in PyUnicode_InternInPlace()
+unicode_empty
+unicode_latin1
+
+
+#---------------------------------
+# initialized (other)
+
+# Python/getargs.c
+static_arg_parsers
+
+# Python/pyhash.c
+PyHash_Func
+_Py_HashSecret
+_Py_HashSecret_Initialized
+
+# Python/pylifecycle.c
+_Py_StandardStreamEncoding
+_Py_StandardStreamErrors
+default_home
+env_home
+progname
+Py_BytesWarningFlag
+Py_DebugFlag
+Py_DontWriteBytecodeFlag
+Py_FrozenFlag
+Py_HashRandomizationFlag
+Py_IgnoreEnvironmentFlag
+Py_InspectFlag
+Py_InteractiveFlag
+Py_IsolatedFlag
+Py_NoSiteFlag
+Py_NoUserSiteDirectory
+Py_OptimizeFlag
+Py_QuietFlag
+Py_UnbufferedStdioFlag
+Py_UseClassExceptionsFlag
+Py_VerboseFlag
+
+
+#---------------------------------
+# types
+
+# Modules/_threadmodule.c
+Locktype
+RLocktype
+localdummytype
+localtype
+
+# Objects/exceptions.c
+PyExc_BaseException
+PyExc_Exception
+PyExc_GeneratorExit
+PyExc_KeyboardInterrupt
+PyExc_StopAsyncIteration
+PyExc_StopIteration
+PyExc_SystemExit
+_PyExc_BaseException
+_PyExc_Exception
+_PyExc_GeneratorExit
+_PyExc_KeyboardInterrupt
+_PyExc_StopAsyncIteration
+_PyExc_StopIteration
+_PyExc_SystemExit
+
+# Objects/structseq.c
+_struct_sequence_template
+
+
+#---------------------------------
+# interned strings/bytes
+
+# Modules/_io/_iomodule.c
+_PyIO_empty_bytes
+_PyIO_empty_str
+_PyIO_str_close
+_PyIO_str_closed
+_PyIO_str_decode
+_PyIO_str_encode
+_PyIO_str_fileno
+_PyIO_str_flush
+_PyIO_str_getstate
+_PyIO_str_isatty
+_PyIO_str_newlines
+_PyIO_str_nl
+_PyIO_str_read
+_PyIO_str_read1
+_PyIO_str_readable
+_PyIO_str_readall
+_PyIO_str_readinto
+_PyIO_str_readline
+_PyIO_str_reset
+_PyIO_str_seek
+_PyIO_str_seekable
+_PyIO_str_setstate
+_PyIO_str_tell
+_PyIO_str_truncate
+_PyIO_str_writable
+_PyIO_str_write
+
+# Modules/_threadmodule.c
+str_dict
+
+# Objects/boolobject.c
+false_str
+true_str
+
+# Objects/listobject.c
+indexerr
+
+# Python/symtable.c
+__class__
+dictcomp
+genexpr
+lambda
+listcomp
+setcomp
+top
+
+# Python/sysmodule.c
+whatstrings
+
+
+#######################################
+# hacks
+
+# Objects/object.c
+_Py_abstract_hack
+
+# Objects/setobject.c
+_PySet_Dummy
+
+# Python/pylifecycle.c
+_PyOS_mystrnicmp_hack