bpo-30860: Consolidate stateful runtime globals. (#2594)

* group the (stateful) runtime globals into various topical structs * consolidate the topical structs under a single top-level _PyRuntimeState struct * add a check-c-globals.py script that helps identify runtime globals Other globals are excluded (see globals.txt and check-c-globals.py).
2024-11-27 20:04:41 +08:00 · 2017-09-05 18:26:16 -07:00 · 2017-09-05 18:26:16 -07:00 · 76d5abc868
commit 76d5abc868
parent 501b324d3a
40 changed files with 2727 additions and 1327 deletions
--- a/Include/Python.h
+++ b/Include/Python.h
@ -133,4 +133,8 @@
 #include "fileutils.h"
 #include "pyfpe.h"

+#ifdef Py_BUILD_CORE
+#include "internal/_Python.h"
+#endif
+
 #endif /* !Py_PYTHON_H */
--- a/Include/ceval.h
+++ b/Include/ceval.h
@ -93,7 +93,12 @@ PyAPI_FUNC(int) Py_GetRecursionLimit(void);
      PyThreadState_GET()->overflowed = 0;  \
    } while(0)
 PyAPI_FUNC(int) _Py_CheckRecursiveCall(const char *where);
-PyAPI_DATA(int) _Py_CheckRecursionLimit;
+#ifdef Py_BUILD_CORE
+#define _Py_CheckRecursionLimit _PyRuntime.ceval.check_recursion_limit
+#else
+PyAPI_FUNC(int) _PyEval_CheckRecursionLimit(void);
+#define _Py_CheckRecursionLimit _PyEval_CheckRecursionLimit()
+#endif

 #ifdef USE_STACKCHECK
 /* With USE_STACKCHECK, we artificially decrement the recursion limit in order
--- a/Include/internal/_Python.h
+++ b/Include/internal/_Python.h
@ -0,0 +1,16 @@
+#ifndef _Py_PYTHON_H
+#define _Py_PYTHON_H
+/* Since this is a "meta-include" file, no #ifdef __cplusplus / extern "C" { */
+
+/* Include all internal Python header files */
+
+#ifndef Py_BUILD_CORE
+#error "Internal headers are not available externally."
+#endif
+
+#include "_mem.h"
+#include "_ceval.h"
+#include "_warnings.h"
+#include "_pystate.h"
+
+#endif /* !_Py_PYTHON_H */
--- a/Include/internal/_ceval.h
+++ b/Include/internal/_ceval.h
@ -0,0 +1,71 @@
+#ifndef _Py_CEVAL_H
+#define _Py_CEVAL_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "ceval.h"
+#include "compile.h"
+#include "pyatomic.h"
+
+#ifdef WITH_THREAD
+#include "pythread.h"
+#endif
+
+struct _pending_calls {
+    unsigned long main_thread;
+#ifdef WITH_THREAD
+    PyThread_type_lock lock;
+    /* Request for running pending calls. */
+    _Py_atomic_int calls_to_do;
+    /* Request for looking at the `async_exc` field of the current
+       thread state.
+       Guarded by the GIL. */
+    int async_exc;
+#define NPENDINGCALLS 32
+    struct {
+        int (*func)(void *);
+        void *arg;
+    } calls[NPENDINGCALLS];
+    int first;
+    int last;
+#else /* ! WITH_THREAD */
+    _Py_atomic_int calls_to_do;
+#define NPENDINGCALLS 32
+    struct {
+        int (*func)(void *);
+        void *arg;
+    } calls[NPENDINGCALLS];
+    volatile int first;
+    volatile int last;
+#endif /* WITH_THREAD */
+};
+
+#include "_gil.h"
+
+struct _ceval_runtime_state {
+    int recursion_limit;
+    int check_recursion_limit;
+    /* Records whether tracing is on for any thread.  Counts the number
+       of threads for which tstate->c_tracefunc is non-NULL, so if the
+       value is 0, we know we don't have to check this thread's
+       c_tracefunc.  This speeds up the if statement in
+       PyEval_EvalFrameEx() after fast_next_opcode. */
+    int tracing_possible;
+    /* This single variable consolidates all requests to break out of
+       the fast path in the eval loop. */
+    _Py_atomic_int eval_breaker;
+#ifdef WITH_THREAD
+    /* Request for dropping the GIL */
+    _Py_atomic_int gil_drop_request;
+#endif
+    struct _pending_calls pending;
+    struct _gil_runtime_state gil;
+};
+
+PyAPI_FUNC(void) _PyEval_Initialize(struct _ceval_runtime_state *);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !_Py_CEVAL_H */
--- a/Include/internal/_condvar.h
+++ b/Include/internal/_condvar.h
@ -0,0 +1,91 @@
+#ifndef _CONDVAR_H_
+#define _CONDVAR_H_
+
+#ifndef _POSIX_THREADS
+/* This means pthreads are not implemented in libc headers, hence the macro
+   not present in unistd.h. But they still can be implemented as an external
+   library (e.g. gnu pth in pthread emulation) */
+# ifdef HAVE_PTHREAD_H
+#  include <pthread.h> /* _POSIX_THREADS */
+# endif
+#endif
+
+#ifdef _POSIX_THREADS
+/*
+ * POSIX support
+ */
+#define Py_HAVE_CONDVAR
+
+#include <pthread.h>
+
+#define PyMUTEX_T pthread_mutex_t
+#define PyCOND_T pthread_cond_t
+
+#elif defined(NT_THREADS)
+/*
+ * Windows (XP, 2003 server and later, as well as (hopefully) CE) support
+ *
+ * Emulated condition variables ones that work with XP and later, plus
+ * example native support on VISTA and onwards.
+ */
+#define Py_HAVE_CONDVAR
+
+/* include windows if it hasn't been done before */
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+/* options */
+/* non-emulated condition variables are provided for those that want
+ * to target Windows Vista.  Modify this macro to enable them.
+ */
+#ifndef _PY_EMULATED_WIN_CV
+#define _PY_EMULATED_WIN_CV 1  /* use emulated condition variables */
+#endif
+
+/* fall back to emulation if not targeting Vista */
+#if !defined NTDDI_VISTA || NTDDI_VERSION < NTDDI_VISTA
+#undef _PY_EMULATED_WIN_CV
+#define _PY_EMULATED_WIN_CV 1
+#endif
+
+#if _PY_EMULATED_WIN_CV
+
+typedef CRITICAL_SECTION PyMUTEX_T;
+
+/* The ConditionVariable object.  From XP onwards it is easily emulated
+   with a Semaphore.
+   Semaphores are available on Windows XP (2003 server) and later.
+   We use a Semaphore rather than an auto-reset event, because although
+   an auto-resent event might appear to solve the lost-wakeup bug (race
+   condition between releasing the outer lock and waiting) because it
+   maintains state even though a wait hasn't happened, there is still
+   a lost wakeup problem if more than one thread are interrupted in the
+   critical place.  A semaphore solves that, because its state is
+   counted, not Boolean.
+   Because it is ok to signal a condition variable with no one
+   waiting, we need to keep track of the number of
+   waiting threads.  Otherwise, the semaphore's state could rise
+   without bound.  This also helps reduce the number of "spurious wakeups"
+   that would otherwise happen.
+ */
+
+typedef struct _PyCOND_T
+{
+    HANDLE sem;
+    int waiting; /* to allow PyCOND_SIGNAL to be a no-op */
+} PyCOND_T;
+
+#else /* !_PY_EMULATED_WIN_CV */
+
+/* Use native Win7 primitives if build target is Win7 or higher */
+
+/* SRWLOCK is faster and better than CriticalSection */
+typedef SRWLOCK PyMUTEX_T;
+
+typedef CONDITION_VARIABLE  PyCOND_T;
+
+#endif /* _PY_EMULATED_WIN_CV */
+
+#endif /* _POSIX_THREADS, NT_THREADS */
+
+#endif /* _CONDVAR_H_ */
--- a/Include/internal/_gil.h
+++ b/Include/internal/_gil.h
@ -0,0 +1,48 @@
+#ifndef _Py_GIL_H
+#define _Py_GIL_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "pyatomic.h"
+
+#include "internal/_condvar.h"
+#ifndef Py_HAVE_CONDVAR
+#error You need either a POSIX-compatible or a Windows system!
+#endif
+
+/* Enable if you want to force the switching of threads at least
+   every `interval`. */
+#undef FORCE_SWITCHING
+#define FORCE_SWITCHING
+
+struct _gil_runtime_state {
+    /* microseconds (the Python API uses seconds, though) */
+    unsigned long interval;
+    /* Last PyThreadState holding / having held the GIL. This helps us
+       know whether anyone else was scheduled after we dropped the GIL. */
+    _Py_atomic_address last_holder;
+    /* Whether the GIL is already taken (-1 if uninitialized). This is
+       atomic because it can be read without any lock taken in ceval.c. */
+    _Py_atomic_int locked;
+    /* Number of GIL switches since the beginning. */
+    unsigned long switch_number;
+#ifdef WITH_THREAD
+    /* This condition variable allows one or several threads to wait
+       until the GIL is released. In addition, the mutex also protects
+       the above variables. */
+    PyCOND_T cond;
+    PyMUTEX_T mutex;
+#ifdef FORCE_SWITCHING
+    /* This condition variable helps the GIL-releasing thread wait for
+       a GIL-awaiting thread to be scheduled and take the GIL. */
+    PyCOND_T switch_cond;
+    PyMUTEX_T switch_mutex;
+#endif
+#endif /* WITH_THREAD */
+};
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !_Py_GIL_H */
--- a/Include/internal/_mem.h
+++ b/Include/internal/_mem.h
@ -0,0 +1,197 @@
+#ifndef _Py_MEM_H
+#define _Py_MEM_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "objimpl.h"
+#include "pymem.h"
+
+#ifdef WITH_PYMALLOC
+#include "_pymalloc.h"
+#endif
+
+/* Low-level memory runtime state */
+
+struct _pymem_runtime_state {
+    struct _allocator_runtime_state {
+        PyMemAllocatorEx mem;
+        PyMemAllocatorEx obj;
+        PyMemAllocatorEx raw;
+    } allocators;
+#ifdef WITH_PYMALLOC
+    /* Array of objects used to track chunks of memory (arenas). */
+    struct arena_object* arenas;
+    /* The head of the singly-linked, NULL-terminated list of available
+       arena_objects. */
+    struct arena_object* unused_arena_objects;
+    /* The head of the doubly-linked, NULL-terminated at each end,
+       list of arena_objects associated with arenas that have pools
+       available. */
+    struct arena_object* usable_arenas;
+    /* Number of slots currently allocated in the `arenas` vector. */
+    unsigned int maxarenas;
+    /* Number of arenas allocated that haven't been free()'d. */
+    size_t narenas_currently_allocated;
+    /* High water mark (max value ever seen) for
+     * narenas_currently_allocated. */
+    size_t narenas_highwater;
+    /* Total number of times malloc() called to allocate an arena. */
+    size_t ntimes_arena_allocated;
+    poolp usedpools[MAX_POOLS];
+    Py_ssize_t num_allocated_blocks;
+    size_t serialno;     /* incremented on each debug {m,re}alloc */
+#endif /* WITH_PYMALLOC */
+};
+
+PyAPI_FUNC(void) _PyMem_Initialize(struct _pymem_runtime_state *);
+
+
+/* High-level memory runtime state */
+
+struct _pyobj_runtime_state {
+    PyObjectArenaAllocator allocator_arenas;
+};
+
+PyAPI_FUNC(void) _PyObject_Initialize(struct _pyobj_runtime_state *);
+
+
+/* GC runtime state */
+
+/* If we change this, we need to change the default value in the
+   signature of gc.collect. */
+#define NUM_GENERATIONS 3
+
+/*
+   NOTE: about the counting of long-lived objects.
+
+   To limit the cost of garbage collection, there are two strategies;
+     - make each collection faster, e.g. by scanning fewer objects
+     - do less collections
+   This heuristic is about the latter strategy.
+
+   In addition to the various configurable thresholds, we only trigger a
+   full collection if the ratio
+    long_lived_pending / long_lived_total
+   is above a given value (hardwired to 25%).
+
+   The reason is that, while "non-full" collections (i.e., collections of
+   the young and middle generations) will always examine roughly the same
+   number of objects -- determined by the aforementioned thresholds --,
+   the cost of a full collection is proportional to the total number of
+   long-lived objects, which is virtually unbounded.
+
+   Indeed, it has been remarked that doing a full collection every
+   <constant number> of object creations entails a dramatic performance
+   degradation in workloads which consist in creating and storing lots of
+   long-lived objects (e.g. building a large list of GC-tracked objects would
+   show quadratic performance, instead of linear as expected: see issue #4074).
+
+   Using the above ratio, instead, yields amortized linear performance in
+   the total number of objects (the effect of which can be summarized
+   thusly: "each full garbage collection is more and more costly as the
+   number of objects grows, but we do fewer and fewer of them").
+
+   This heuristic was suggested by Martin von Löwis on python-dev in
+   June 2008. His original analysis and proposal can be found at:
+    http://mail.python.org/pipermail/python-dev/2008-June/080579.html
+*/
+
+/*
+   NOTE: about untracking of mutable objects.
+
+   Certain types of container cannot participate in a reference cycle, and
+   so do not need to be tracked by the garbage collector. Untracking these
+   objects reduces the cost of garbage collections. However, determining
+   which objects may be untracked is not free, and the costs must be
+   weighed against the benefits for garbage collection.
+
+   There are two possible strategies for when to untrack a container:
+
+   i) When the container is created.
+   ii) When the container is examined by the garbage collector.
+
+   Tuples containing only immutable objects (integers, strings etc, and
+   recursively, tuples of immutable objects) do not need to be tracked.
+   The interpreter creates a large number of tuples, many of which will
+   not survive until garbage collection. It is therefore not worthwhile
+   to untrack eligible tuples at creation time.
+
+   Instead, all tuples except the empty tuple are tracked when created.
+   During garbage collection it is determined whether any surviving tuples
+   can be untracked. A tuple can be untracked if all of its contents are
+   already not tracked. Tuples are examined for untracking in all garbage
+   collection cycles. It may take more than one cycle to untrack a tuple.
+
+   Dictionaries containing only immutable objects also do not need to be
+   tracked. Dictionaries are untracked when created. If a tracked item is
+   inserted into a dictionary (either as a key or value), the dictionary
+   becomes tracked. During a full garbage collection (all generations),
+   the collector will untrack any dictionaries whose contents are not
+   tracked.
+
+   The module provides the python function is_tracked(obj), which returns
+   the CURRENT tracking status of the object. Subsequent garbage
+   collections may change the tracking status of the object.
+
+   Untracking of certain containers was introduced in issue #4688, and
+   the algorithm was refined in response to issue #14775.
+*/
+
+struct gc_generation {
+    PyGC_Head head;
+    int threshold; /* collection threshold */
+    int count; /* count of allocations or collections of younger
+                  generations */
+};
+
+/* Running stats per generation */
+struct gc_generation_stats {
+    /* total number of collections */
+    Py_ssize_t collections;
+    /* total number of collected objects */
+    Py_ssize_t collected;
+    /* total number of uncollectable objects (put into gc.garbage) */
+    Py_ssize_t uncollectable;
+};
+
+struct _gc_runtime_state {
+    /* List of objects that still need to be cleaned up, singly linked
+     * via their gc headers' gc_prev pointers.  */
+    PyObject *trash_delete_later;
+    /* Current call-stack depth of tp_dealloc calls. */
+    int trash_delete_nesting;
+
+    int enabled;
+    int debug;
+    /* linked lists of container objects */
+    struct gc_generation generations[NUM_GENERATIONS];
+    PyGC_Head *generation0;
+    struct gc_generation_stats generation_stats[NUM_GENERATIONS];
+    /* true if we are currently running the collector */
+    int collecting;
+    /* list of uncollectable objects */
+    PyObject *garbage;
+    /* a list of callbacks to be invoked when collection is performed */
+    PyObject *callbacks;
+    /* This is the number of objects that survived the last full
+       collection. It approximates the number of long lived objects
+       tracked by the GC.
+
+       (by "full collection", we mean a collection of the oldest
+       generation). */
+    Py_ssize_t long_lived_total;
+    /* This is the number of objects that survived all "non-full"
+       collections, and are awaiting to undergo a full collection for
+       the first time. */
+    Py_ssize_t long_lived_pending;
+};
+
+PyAPI_FUNC(void) _PyGC_Initialize(struct _gc_runtime_state *);
+
+#define _PyGC_generation0 _PyRuntime.gc.generation0
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !_Py_MEM_H */
--- a/Include/internal/_pymalloc.h
+++ b/Include/internal/_pymalloc.h
@ -0,0 +1,443 @@
+
+/* An object allocator for Python.
+
+   Here is an introduction to the layers of the Python memory architecture,
+   showing where the object allocator is actually used (layer +2), It is
+   called for every object allocation and deallocation (PyObject_New/Del),
+   unless the object-specific allocators implement a proprietary allocation
+   scheme (ex.: ints use a simple free list). This is also the place where
+   the cyclic garbage collector operates selectively on container objects.
+
+
+    Object-specific allocators
+    _____   ______   ______       ________
+   [ int ] [ dict ] [ list ] ... [ string ]       Python core         |
+3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
+    _______________________________       |                           |
+   [   Python's object allocator   ]      |                           |
+2 | ####### Object memory ####### | <------ Internal buffers ------> |
+    ______________________________________________________________    |
+   [          Python's raw memory allocator (PyMem_ API)          ]   |
+1 | <----- Python memory (under PyMem manager's control) ------> |   |
+    __________________________________________________________________
+   [    Underlying general-purpose allocator (ex: C library malloc)   ]
+ 0 | <------ Virtual memory allocated for the python process -------> |
+
+   =========================================================================
+    _______________________________________________________________________
+   [                OS-specific Virtual Memory Manager (VMM)               ]
+-1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
+    __________________________________   __________________________________
+   [                                  ] [                                  ]
+-2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
+
+*/
+/*==========================================================================*/
+
+/* A fast, special-purpose memory allocator for small blocks, to be used
+   on top of a general-purpose malloc -- heavily based on previous art. */
+
+/* Vladimir Marangozov -- August 2000 */
+
+/*
+ * "Memory management is where the rubber meets the road -- if we do the wrong
+ * thing at any level, the results will not be good. And if we don't make the
+ * levels work well together, we are in serious trouble." (1)
+ *
+ * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
+ *    "Dynamic Storage Allocation: A Survey and Critical Review",
+ *    in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
+ */
+
+#ifndef _Py_PYMALLOC_H
+#define _Py_PYMALLOC_H
+
+/* #undef WITH_MEMORY_LIMITS */         /* disable mem limit checks  */
+
+/*==========================================================================*/
+
+/*
+ * Allocation strategy abstract:
+ *
+ * For small requests, the allocator sub-allocates <Big> blocks of memory.
+ * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
+ * system's allocator.
+ *
+ * Small requests are grouped in size classes spaced 8 bytes apart, due
+ * to the required valid alignment of the returned address. Requests of
+ * a particular size are serviced from memory pools of 4K (one VMM page).
+ * Pools are fragmented on demand and contain free lists of blocks of one
+ * particular size class. In other words, there is a fixed-size allocator
+ * for each size class. Free pools are shared by the different allocators
+ * thus minimizing the space reserved for a particular size class.
+ *
+ * This allocation strategy is a variant of what is known as "simple
+ * segregated storage based on array of free lists". The main drawback of
+ * simple segregated storage is that we might end up with lot of reserved
+ * memory for the different free lists, which degenerate in time. To avoid
+ * this, we partition each free list in pools and we share dynamically the
+ * reserved space between all free lists. This technique is quite efficient
+ * for memory intensive programs which allocate mainly small-sized blocks.
+ *
+ * For small requests we have the following table:
+ *
+ * Request in bytes     Size of allocated block      Size class idx
+ * ----------------------------------------------------------------
+ *        1-8                     8                       0
+ *        9-16                   16                       1
+ *       17-24                   24                       2
+ *       25-32                   32                       3
+ *       33-40                   40                       4
+ *       41-48                   48                       5
+ *       49-56                   56                       6
+ *       57-64                   64                       7
+ *       65-72                   72                       8
+ *        ...                   ...                     ...
+ *      497-504                 504                      62
+ *      505-512                 512                      63
+ *
+ *      0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
+ *      allocator.
+ */
+
+/*==========================================================================*/
+
+/*
+ * -- Main tunable settings section --
+ */
+
+/*
+ * Alignment of addresses returned to the user. 8-bytes alignment works
+ * on most current architectures (with 32-bit or 64-bit address busses).
+ * The alignment value is also used for grouping small requests in size
+ * classes spaced ALIGNMENT bytes apart.
+ *
+ * You shouldn't change this unless you know what you are doing.
+ */
+#define ALIGNMENT               8               /* must be 2^N */
+#define ALIGNMENT_SHIFT         3
+
+/* Return the number of bytes in size class I, as a uint. */
+#define INDEX2SIZE(I) (((unsigned int)(I) + 1) << ALIGNMENT_SHIFT)
+
+/*
+ * Max size threshold below which malloc requests are considered to be
+ * small enough in order to use preallocated memory pools. You can tune
+ * this value according to your application behaviour and memory needs.
+ *
+ * Note: a size threshold of 512 guarantees that newly created dictionaries
+ * will be allocated from preallocated memory pools on 64-bit.
+ *
+ * The following invariants must hold:
+ *      1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
+ *      2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
+ *
+ * Although not required, for better performance and space efficiency,
+ * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
+ */
+#define SMALL_REQUEST_THRESHOLD 512
+#define NB_SMALL_SIZE_CLASSES   (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
+
+#if NB_SMALL_SIZE_CLASSES > 64
+#error "NB_SMALL_SIZE_CLASSES should be less than 64"
+#endif /* NB_SMALL_SIZE_CLASSES > 64 */
+
+/*
+ * The system's VMM page size can be obtained on most unices with a
+ * getpagesize() call or deduced from various header files. To make
+ * things simpler, we assume that it is 4K, which is OK for most systems.
+ * It is probably better if this is the native page size, but it doesn't
+ * have to be.  In theory, if SYSTEM_PAGE_SIZE is larger than the native page
+ * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
+ * violation fault.  4K is apparently OK for all the platforms that python
+ * currently targets.
+ */
+#define SYSTEM_PAGE_SIZE        (4 * 1024)
+#define SYSTEM_PAGE_SIZE_MASK   (SYSTEM_PAGE_SIZE - 1)
+
+/*
+ * Maximum amount of memory managed by the allocator for small requests.
+ */
+#ifdef WITH_MEMORY_LIMITS
+#ifndef SMALL_MEMORY_LIMIT
+#define SMALL_MEMORY_LIMIT      (64 * 1024 * 1024)      /* 64 MB -- more? */
+#endif
+#endif
+
+/*
+ * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
+ * on a page boundary. This is a reserved virtual address space for the
+ * current process (obtained through a malloc()/mmap() call). In no way this
+ * means that the memory arenas will be used entirely. A malloc(<Big>) is
+ * usually an address range reservation for <Big> bytes, unless all pages within
+ * this space are referenced subsequently. So malloc'ing big blocks and not
+ * using them does not mean "wasting memory". It's an addressable range
+ * wastage...
+ *
+ * Arenas are allocated with mmap() on systems supporting anonymous memory
+ * mappings to reduce heap fragmentation.
+ */
+#define ARENA_SIZE              (256 << 10)     /* 256KB */
+
+#ifdef WITH_MEMORY_LIMITS
+#define MAX_ARENAS              (SMALL_MEMORY_LIMIT / ARENA_SIZE)
+#endif
+
+/*
+ * Size of the pools used for small blocks. Should be a power of 2,
+ * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
+ */
+#define POOL_SIZE               SYSTEM_PAGE_SIZE        /* must be 2^N */
+#define POOL_SIZE_MASK          SYSTEM_PAGE_SIZE_MASK
+
+/*
+ * -- End of tunable settings section --
+ */
+
+/*==========================================================================*/
+
+/*
+ * Locking
+ *
+ * To reduce lock contention, it would probably be better to refine the
+ * crude function locking with per size class locking. I'm not positive
+ * however, whether it's worth switching to such locking policy because
+ * of the performance penalty it might introduce.
+ *
+ * The following macros describe the simplest (should also be the fastest)
+ * lock object on a particular platform and the init/fini/lock/unlock
+ * operations on it. The locks defined here are not expected to be recursive
+ * because it is assumed that they will always be called in the order:
+ * INIT, [LOCK, UNLOCK]*, FINI.
+ */
+
+/*
+ * Python's threads are serialized, so object malloc locking is disabled.
+ */
+#define SIMPLELOCK_DECL(lock)   /* simple lock declaration              */
+#define SIMPLELOCK_INIT(lock)   /* allocate (if needed) and initialize  */
+#define SIMPLELOCK_FINI(lock)   /* free/destroy an existing lock        */
+#define SIMPLELOCK_LOCK(lock)   /* acquire released lock */
+#define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */
+
+/* When you say memory, my mind reasons in terms of (pointers to) blocks */
+typedef uint8_t pyblock;
+
+/* Pool for small blocks. */
+struct pool_header {
+    union { pyblock *_padding;
+            unsigned int count; } ref;  /* number of allocated blocks    */
+    pyblock *freeblock;                 /* pool's free list head         */
+    struct pool_header *nextpool;       /* next pool of this size class  */
+    struct pool_header *prevpool;       /* previous pool       ""        */
+    unsigned int arenaindex;            /* index into arenas of base adr */
+    unsigned int szidx;                 /* block size class index        */
+    unsigned int nextoffset;            /* bytes to virgin block         */
+    unsigned int maxnextoffset;         /* largest valid nextoffset      */
+};
+
+typedef struct pool_header *poolp;
+
+/* Record keeping for arenas. */
+struct arena_object {
+    /* The address of the arena, as returned by malloc.  Note that 0
+     * will never be returned by a successful malloc, and is used
+     * here to mark an arena_object that doesn't correspond to an
+     * allocated arena.
+     */
+    uintptr_t address;
+
+    /* Pool-aligned pointer to the next pool to be carved off. */
+    pyblock* pool_address;
+
+    /* The number of available pools in the arena:  free pools + never-
+     * allocated pools.
+     */
+    unsigned int nfreepools;
+
+    /* The total number of pools in the arena, whether or not available. */
+    unsigned int ntotalpools;
+
+    /* Singly-linked list of available pools. */
+    struct pool_header* freepools;
+
+    /* Whenever this arena_object is not associated with an allocated
+     * arena, the nextarena member is used to link all unassociated
+     * arena_objects in the singly-linked `unused_arena_objects` list.
+     * The prevarena member is unused in this case.
+     *
+     * When this arena_object is associated with an allocated arena
+     * with at least one available pool, both members are used in the
+     * doubly-linked `usable_arenas` list, which is maintained in
+     * increasing order of `nfreepools` values.
+     *
+     * Else this arena_object is associated with an allocated arena
+     * all of whose pools are in use.  `nextarena` and `prevarena`
+     * are both meaningless in this case.
+     */
+    struct arena_object* nextarena;
+    struct arena_object* prevarena;
+};
+
+#define POOL_OVERHEAD   _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
+
+#define DUMMY_SIZE_IDX          0xffff  /* size class of newly cached pools */
+
+/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
+#define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
+
+/* Return total number of blocks in pool of size index I, as a uint. */
+#define NUMBLOCKS(I) \
+    ((unsigned int)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
+
+/*==========================================================================*/
+
+/*
+ * This malloc lock
+ */
+SIMPLELOCK_DECL(_malloc_lock)
+#define LOCK()          SIMPLELOCK_LOCK(_malloc_lock)
+#define UNLOCK()        SIMPLELOCK_UNLOCK(_malloc_lock)
+#define LOCK_INIT()     SIMPLELOCK_INIT(_malloc_lock)
+#define LOCK_FINI()     SIMPLELOCK_FINI(_malloc_lock)
+
+/*
+ * Pool table -- headed, circular, doubly-linked lists of partially used pools.
+
+This is involved.  For an index i, usedpools[i+i] is the header for a list of
+all partially used pools holding small blocks with "size class idx" i. So
+usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
+16, and so on:  index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
+
+Pools are carved off an arena's highwater mark (an arena_object's pool_address
+member) as needed.  Once carved off, a pool is in one of three states forever
+after:
+
+used == partially used, neither empty nor full
+    At least one block in the pool is currently allocated, and at least one
+    block in the pool is not currently allocated (note this implies a pool
+    has room for at least two blocks).
+    This is a pool's initial state, as a pool is created only when malloc
+    needs space.
+    The pool holds blocks of a fixed size, and is in the circular list headed
+    at usedpools[i] (see above).  It's linked to the other used pools of the
+    same size class via the pool_header's nextpool and prevpool members.
+    If all but one block is currently allocated, a malloc can cause a
+    transition to the full state.  If all but one block is not currently
+    allocated, a free can cause a transition to the empty state.
+
+full == all the pool's blocks are currently allocated
+    On transition to full, a pool is unlinked from its usedpools[] list.
+    It's not linked to from anything then anymore, and its nextpool and
+    prevpool members are meaningless until it transitions back to used.
+    A free of a block in a full pool puts the pool back in the used state.
+    Then it's linked in at the front of the appropriate usedpools[] list, so
+    that the next allocation for its size class will reuse the freed block.
+
+empty == all the pool's blocks are currently available for allocation
+    On transition to empty, a pool is unlinked from its usedpools[] list,
+    and linked to the front of its arena_object's singly-linked freepools list,
+    via its nextpool member.  The prevpool member has no meaning in this case.
+    Empty pools have no inherent size class:  the next time a malloc finds
+    an empty list in usedpools[], it takes the first pool off of freepools.
+    If the size class needed happens to be the same as the size class the pool
+    last had, some pool initialization can be skipped.
+
+
+Block Management
+
+Blocks within pools are again carved out as needed.  pool->freeblock points to
+the start of a singly-linked list of free blocks within the pool.  When a
+block is freed, it's inserted at the front of its pool's freeblock list.  Note
+that the available blocks in a pool are *not* linked all together when a pool
+is initialized.  Instead only "the first two" (lowest addresses) blocks are
+set up, returning the first such block, and setting pool->freeblock to a
+one-block list holding the second such block.  This is consistent with that
+pymalloc strives at all levels (arena, pool, and block) never to touch a piece
+of memory until it's actually needed.
+
+So long as a pool is in the used state, we're certain there *is* a block
+available for allocating, and pool->freeblock is not NULL.  If pool->freeblock
+points to the end of the free list before we've carved the entire pool into
+blocks, that means we simply haven't yet gotten to one of the higher-address
+blocks.  The offset from the pool_header to the start of "the next" virgin
+block is stored in the pool_header nextoffset member, and the largest value
+of nextoffset that makes sense is stored in the maxnextoffset member when a
+pool is initialized.  All the blocks in a pool have been passed out at least
+once when and only when nextoffset > maxnextoffset.
+
+
+Major obscurity:  While the usedpools vector is declared to have poolp
+entries, it doesn't really.  It really contains two pointers per (conceptual)
+poolp entry, the nextpool and prevpool members of a pool_header.  The
+excruciating initialization code below fools C so that
+
+    usedpool[i+i]
+
+"acts like" a genuine poolp, but only so long as you only reference its
+nextpool and prevpool members.  The "- 2*sizeof(block *)" gibberish is
+compensating for that a pool_header's nextpool and prevpool members
+immediately follow a pool_header's first two members:
+
+    union { block *_padding;
+            uint count; } ref;
+    block *freeblock;
+
+each of which consume sizeof(block *) bytes.  So what usedpools[i+i] really
+contains is a fudged-up pointer p such that *if* C believes it's a poolp
+pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
+circular list is empty).
+
+It's unclear why the usedpools setup is so convoluted.  It could be to
+minimize the amount of cache required to hold this heavily-referenced table
+(which only *needs* the two interpool pointer members of a pool_header). OTOH,
+referencing code has to remember to "double the index" and doing so isn't
+free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
+on that C doesn't insert any padding anywhere in a pool_header at or before
+the prevpool member.
+**************************************************************************** */
+
+#define MAX_POOLS  (2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8)
+
+/*==========================================================================
+Arena management.
+
+`arenas` is a vector of arena_objects.  It contains maxarenas entries, some of
+which may not be currently used (== they're arena_objects that aren't
+currently associated with an allocated arena).  Note that arenas proper are
+separately malloc'ed.
+
+Prior to Python 2.5, arenas were never free()'ed.  Starting with Python 2.5,
+we do try to free() arenas, and use some mild heuristic strategies to increase
+the likelihood that arenas eventually can be freed.
+
+unused_arena_objects
+
+    This is a singly-linked list of the arena_objects that are currently not
+    being used (no arena is associated with them).  Objects are taken off the
+    head of the list in new_arena(), and are pushed on the head of the list in
+    PyObject_Free() when the arena is empty.  Key invariant:  an arena_object
+    is on this list if and only if its .address member is 0.
+
+usable_arenas
+
+    This is a doubly-linked list of the arena_objects associated with arenas
+    that have pools available.  These pools are either waiting to be reused,
+    or have not been used before.  The list is sorted to have the most-
+    allocated arenas first (ascending order based on the nfreepools member).
+    This means that the next allocation will come from a heavily used arena,
+    which gives the nearly empty arenas a chance to be returned to the system.
+    In my unscientific tests this dramatically improved the number of arenas
+    that could be freed.
+
+Note that an arena_object associated with an arena all of whose pools are
+currently in use isn't on either list.
+*/
+
+/* How many arena_objects do we initially allocate?
+ * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
+ * `arenas` vector.
+ */
+#define INITIAL_ARENA_OBJECTS 16
+
+#endif /* _Py_PYMALLOC_H */
--- a/Include/internal/_pystate.h
+++ b/Include/internal/_pystate.h
@ -0,0 +1,93 @@
+#ifndef _Py_PYSTATE_H
+#define _Py_PYSTATE_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "pystate.h"
+#include "pyatomic.h"
+
+#ifdef WITH_THREAD
+#include "pythread.h"
+#endif
+
+#include "_mem.h"
+#include "_ceval.h"
+#include "_warnings.h"
+
+
+/* GIL state */
+
+struct _gilstate_runtime_state {
+    int check_enabled;
+    /* Assuming the current thread holds the GIL, this is the
+       PyThreadState for the current thread. */
+    _Py_atomic_address tstate_current;
+    PyThreadFrameGetter getframe;
+#ifdef WITH_THREAD
+    /* The single PyInterpreterState used by this process'
+       GILState implementation
+    */
+    /* TODO: Given interp_main, it may be possible to kill this ref */
+    PyInterpreterState *autoInterpreterState;
+    int autoTLSkey;
+#endif /* WITH_THREAD */
+};
+
+/* hook for PyEval_GetFrame(), requested for Psyco */
+#define _PyThreadState_GetFrame _PyRuntime.gilstate.getframe
+
+/* Issue #26558: Flag to disable PyGILState_Check().
+   If set to non-zero, PyGILState_Check() always return 1. */
+#define _PyGILState_check_enabled _PyRuntime.gilstate.check_enabled
+
+
+/* Full Python runtime state */
+
+typedef struct pyruntimestate {
+    int initialized;
+    int core_initialized;
+    PyThreadState *finalizing;
+
+    struct pyinterpreters {
+#ifdef WITH_THREAD
+        PyThread_type_lock mutex;
+#endif
+        PyInterpreterState *head;
+        PyInterpreterState *main;
+        /* _next_interp_id is an auto-numbered sequence of small
+           integers.  It gets initialized in _PyInterpreterState_Init(),
+           which is called in Py_Initialize(), and used in
+           PyInterpreterState_New().  A negative interpreter ID
+           indicates an error occurred.  The main interpreter will
+           always have an ID of 0.  Overflow results in a RuntimeError.
+           If that becomes a problem later then we can adjust, e.g. by
+           using a Python int. */
+        int64_t next_id;
+    } interpreters;
+
+#define NEXITFUNCS 32
+    void (*exitfuncs[NEXITFUNCS])(void);
+    int nexitfuncs;
+    void (*pyexitfunc)(void);
+
+    struct _pyobj_runtime_state obj;
+    struct _gc_runtime_state gc;
+    struct _pymem_runtime_state mem;
+    struct _warnings_runtime_state warnings;
+    struct _ceval_runtime_state ceval;
+    struct _gilstate_runtime_state gilstate;
+
+    // XXX Consolidate globals found via the check-c-globals script.
+} _PyRuntimeState;
+
+PyAPI_DATA(_PyRuntimeState) _PyRuntime;
+PyAPI_FUNC(void) _PyRuntimeState_Init(_PyRuntimeState *);
+PyAPI_FUNC(void) _PyRuntimeState_Fini(_PyRuntimeState *);
+
+PyAPI_FUNC(void) _PyInterpreterState_Enable(_PyRuntimeState *);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !_Py_PYSTATE_H */
--- a/Include/internal/_warnings.h
+++ b/Include/internal/_warnings.h
@ -0,0 +1,21 @@
+#ifndef _Py_WARNINGS_H
+#define _Py_WARNINGS_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "object.h"
+
+struct _warnings_runtime_state {
+    /* Both 'filters' and 'onceregistry' can be set in warnings.py;
+       get_warnings_attr() will reset these variables accordingly. */
+    PyObject *filters;  /* List */
+    PyObject *once_registry;  /* Dict */
+    PyObject *default_action; /* String */
+    long filters_version;
+};
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !_Py_WARNINGS_H */
--- a/Include/object.h
+++ b/Include/object.h
@ -1038,8 +1038,6 @@ with the call stack never exceeding a depth of PyTrash_UNWIND_LEVEL.
   Kept for binary compatibility of extensions using the stable ABI. */
 PyAPI_FUNC(void) _PyTrash_deposit_object(PyObject*);
 PyAPI_FUNC(void) _PyTrash_destroy_chain(void);
-PyAPI_DATA(int) _PyTrash_delete_nesting;
-PyAPI_DATA(PyObject *) _PyTrash_delete_later;
 #endif /* !Py_LIMITED_API */

 /* The new thread-safe private API, invoked by the macros below. */
--- a/Include/pylifecycle.h
+++ b/Include/pylifecycle.h
@ -119,7 +119,10 @@ PyAPI_FUNC(void) _PyType_Fini(void);
 PyAPI_FUNC(void) _Py_HashRandomization_Fini(void);
 PyAPI_FUNC(void) PyAsyncGen_Fini(void);

-PyAPI_DATA(PyThreadState *) _Py_Finalizing;
+#define _Py_IS_FINALIZING() \
+    (_PyRuntime.finalizing != NULL)
+#define _Py_CURRENTLY_FINALIZING(tstate) \
+    (_PyRuntime.finalizing == tstate)
 #endif

 /* Signals */
--- a/Include/pystate.h
+++ b/Include/pystate.h
@ -29,9 +29,10 @@ typedef struct {
    int use_hash_seed;
    unsigned long hash_seed;
    int _disable_importlib; /* Needed by freeze_importlib */
+    char *allocator;
 } _PyCoreConfig;

-#define _PyCoreConfig_INIT {0, -1, 0, 0}
+#define _PyCoreConfig_INIT {0, -1, 0, 0, NULL}

 /* Placeholders while working on the new configuration API
 *
@ -57,6 +58,19 @@ typedef struct _is {
    PyObject *builtins;
    PyObject *importlib;

+    /* Used in Python/sysmodule.c. */
+    int check_interval;
+    PyObject *warnoptions;
+    PyObject *xoptions;
+
+    /* Used in Modules/_threadmodule.c. */
+    long num_threads;
+    /* Support for runtime thread stack size tuning.
+       A value of 0 means using the platform's default stack size
+       or the size specified by the THREAD_STACK_SIZE macro. */
+    /* Used in Python/thread.c. */
+    size_t pythread_stacksize;
+
    PyObject *codec_search_path;
    PyObject *codec_search_cache;
    PyObject *codec_error_registry;
@ -185,9 +199,6 @@ typedef struct _ts {
 #endif


-#ifndef Py_LIMITED_API
-PyAPI_FUNC(void) _PyInterpreterState_Init(void);
-#endif /* !Py_LIMITED_API */
 PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_New(void);
 PyAPI_FUNC(void) PyInterpreterState_Clear(PyInterpreterState *);
 PyAPI_FUNC(void) PyInterpreterState_Delete(PyInterpreterState *);
@ -246,7 +257,7 @@ PyAPI_FUNC(int) PyThreadState_SetAsyncExc(unsigned long, PyObject *);
 /* Assuming the current thread holds the GIL, this is the
   PyThreadState for the current thread. */
 #ifdef Py_BUILD_CORE
-PyAPI_DATA(_Py_atomic_address) _PyThreadState_Current;
+#  define _PyThreadState_Current _PyRuntime.gilstate.tstate_current
 #  define PyThreadState_GET() \
             ((PyThreadState*)_Py_atomic_load_relaxed(&_PyThreadState_Current))
 #else
@ -301,10 +312,6 @@ PyAPI_FUNC(void) PyGILState_Release(PyGILState_STATE);
 PyAPI_FUNC(PyThreadState *) PyGILState_GetThisThreadState(void);

 #ifndef Py_LIMITED_API
-/* Issue #26558: Flag to disable PyGILState_Check().
-   If set to non-zero, PyGILState_Check() always return 1. */
-PyAPI_DATA(int) _PyGILState_check_enabled;
-
 /* Helper/diagnostic function - return 1 if the current thread
   currently holds the GIL, 0 otherwise.

@ -340,11 +347,6 @@ PyAPI_FUNC(PyThreadState *) PyThreadState_Next(PyThreadState *);
 typedef struct _frame *(*PyThreadFrameGetter)(PyThreadState *self_);
 #endif

-/* hook for PyEval_GetFrame(), requested for Psyco */
-#ifndef Py_LIMITED_API
-PyAPI_DATA(PyThreadFrameGetter) _PyThreadState_GetFrame;
-#endif
-
 #ifdef __cplusplus
 }
 #endif
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@ -987,6 +987,13 @@ PYTHON_HEADERS= \
 		pyconfig.h \
 		$(PARSER_HEADERS) \
 		$(srcdir)/Include/Python-ast.h \
+		$(srcdir)/Include/internal/_Python.h \
+		$(srcdir)/Include/internal/_ceval.h \
+		$(srcdir)/Include/internal/_gil.h \
+		$(srcdir)/Include/internal/_mem.h \
+		$(srcdir)/Include/internal/_pymalloc.h \
+		$(srcdir)/Include/internal/_pystate.h \
+		$(srcdir)/Include/internal/_warnings.h \
 		$(DTRACE_HEADERS)

 $(LIBRARY_OBJS) $(MODOBJS) Programs/python.o: $(PYTHON_HEADERS)
--- a/Builtins/2017-09-05-13-47-49.bpo-30860.MROpZw.rst
+++ b/Builtins/2017-09-05-13-47-49.bpo-30860.MROpZw.rst
@ -0,0 +1,2 @@
+Consolidate CPython's global runtime state under a single struct.  This
+improves discoverability of the runtime state.
--- a/Modules/_io/bufferedio.c
+++ b/Modules/_io/bufferedio.c
@ -279,7 +279,7 @@ _enter_buffered_busy(buffered *self)
                     "reentrant call inside %R", self);
        return 0;
    }
-    relax_locking = (_Py_Finalizing != NULL);
+    relax_locking = _Py_IS_FINALIZING();
    Py_BEGIN_ALLOW_THREADS
    if (!relax_locking)
        st = PyThread_acquire_lock(self->lock, 1);
--- a/Modules/_threadmodule.c
+++ b/Modules/_threadmodule.c
@ -14,7 +14,6 @@
 #include "pythread.h"

 static PyObject *ThreadError;
-static long nb_threads = 0;
 static PyObject *str_dict;

 _Py_IDENTIFIER(stderr);
@ -993,7 +992,7 @@ t_bootstrap(void *boot_raw)
    tstate->thread_id = PyThread_get_thread_ident();
    _PyThreadState_Init(tstate);
    PyEval_AcquireThread(tstate);
-    nb_threads++;
+    tstate->interp->num_threads++;
    res = PyObject_Call(boot->func, boot->args, boot->keyw);
    if (res == NULL) {
        if (PyErr_ExceptionMatches(PyExc_SystemExit))
@ -1020,7 +1019,7 @@ t_bootstrap(void *boot_raw)
    Py_DECREF(boot->args);
    Py_XDECREF(boot->keyw);
    PyMem_DEL(boot_raw);
-    nb_threads--;
+    tstate->interp->num_threads--;
    PyThreadState_Clear(tstate);
    PyThreadState_DeleteCurrent();
    PyThread_exit_thread();
@ -1159,7 +1158,8 @@ A thread's identity may be reused for another thread after it exits.");
 static PyObject *
 thread__count(PyObject *self)
 {
-    return PyLong_FromLong(nb_threads);
+    PyThreadState *tstate = PyThreadState_Get();
+    return PyLong_FromLong(tstate->interp->num_threads);
 }

 PyDoc_STRVAR(_count_doc,
@ -1352,6 +1352,7 @@ PyInit__thread(void)
    PyObject *m, *d, *v;
    double time_max;
    double timeout_max;
+    PyThreadState *tstate = PyThreadState_Get();

    /* Initialize types: */
    if (PyType_Ready(&localdummytype) < 0)
@ -1396,7 +1397,7 @@ PyInit__thread(void)
    if (PyModule_AddObject(m, "_local", (PyObject *)&localtype) < 0)
        return NULL;

-    nb_threads = 0;
+    tstate->interp->num_threads = 0;

    str_dict = PyUnicode_InternFromString("__dict__");
    if (str_dict == NULL)
--- a/Modules/_winapi.c
+++ b/Modules/_winapi.c
@ -114,7 +114,7 @@ overlapped_dealloc(OverlappedObject *self)
        {
            /* The operation is no longer pending -- nothing to do. */
        }
-        else if (_Py_Finalizing == NULL)
+        else if _Py_IS_FINALIZING()
        {
            /* The operation is still pending -- give a warning.  This
               will probably only happen on Windows XP. */
--- a/Modules/gcmodule.c
+++ b/Modules/gcmodule.c
@ -39,133 +39,9 @@ module gc
 /* Get the object given the GC head */
 #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1))

-/*** Global GC state ***/
-
-struct gc_generation {
-    PyGC_Head head;
-    int threshold; /* collection threshold */
-    int count; /* count of allocations or collections of younger
-                  generations */
-};
-
-/* If we change this, we need to change the default value in the signature of
-   gc.collect. */
-#define NUM_GENERATIONS 3
-#define GEN_HEAD(n) (&generations[n].head)
-
-/* linked lists of container objects */
-static struct gc_generation generations[NUM_GENERATIONS] = {
-    /* PyGC_Head,                               threshold,      count */
-    {{{GEN_HEAD(0), GEN_HEAD(0), 0}},           700,            0},
-    {{{GEN_HEAD(1), GEN_HEAD(1), 0}},           10,             0},
-    {{{GEN_HEAD(2), GEN_HEAD(2), 0}},           10,             0},
-};
-
-PyGC_Head *_PyGC_generation0 = GEN_HEAD(0);
-
-static int enabled = 1; /* automatic collection enabled? */
-
-/* true if we are currently running the collector */
-static int collecting = 0;
-
-/* list of uncollectable objects */
-static PyObject *garbage = NULL;
-
 /* Python string to use if unhandled exception occurs */
 static PyObject *gc_str = NULL;

-/* a list of callbacks to be invoked when collection is performed */
-static PyObject *callbacks = NULL;
-
-/* This is the number of objects that survived the last full collection. It
-   approximates the number of long lived objects tracked by the GC.
-
-   (by "full collection", we mean a collection of the oldest generation).
-*/
-static Py_ssize_t long_lived_total = 0;
-
-/* This is the number of objects that survived all "non-full" collections,
-   and are awaiting to undergo a full collection for the first time.
-
-*/
-static Py_ssize_t long_lived_pending = 0;
-
-/*
-   NOTE: about the counting of long-lived objects.
-
-   To limit the cost of garbage collection, there are two strategies;
-     - make each collection faster, e.g. by scanning fewer objects
-     - do less collections
-   This heuristic is about the latter strategy.
-
-   In addition to the various configurable thresholds, we only trigger a
-   full collection if the ratio
-    long_lived_pending / long_lived_total
-   is above a given value (hardwired to 25%).
-
-   The reason is that, while "non-full" collections (i.e., collections of
-   the young and middle generations) will always examine roughly the same
-   number of objects -- determined by the aforementioned thresholds --,
-   the cost of a full collection is proportional to the total number of
-   long-lived objects, which is virtually unbounded.
-
-   Indeed, it has been remarked that doing a full collection every
-   <constant number> of object creations entails a dramatic performance
-   degradation in workloads which consist in creating and storing lots of
-   long-lived objects (e.g. building a large list of GC-tracked objects would
-   show quadratic performance, instead of linear as expected: see issue #4074).
-
-   Using the above ratio, instead, yields amortized linear performance in
-   the total number of objects (the effect of which can be summarized
-   thusly: "each full garbage collection is more and more costly as the
-   number of objects grows, but we do fewer and fewer of them").
-
-   This heuristic was suggested by Martin von Löwis on python-dev in
-   June 2008. His original analysis and proposal can be found at:
-    http://mail.python.org/pipermail/python-dev/2008-June/080579.html
-*/
-
-/*
-   NOTE: about untracking of mutable objects.
-
-   Certain types of container cannot participate in a reference cycle, and
-   so do not need to be tracked by the garbage collector. Untracking these
-   objects reduces the cost of garbage collections. However, determining
-   which objects may be untracked is not free, and the costs must be
-   weighed against the benefits for garbage collection.
-
-   There are two possible strategies for when to untrack a container:
-
-   i) When the container is created.
-   ii) When the container is examined by the garbage collector.
-
-   Tuples containing only immutable objects (integers, strings etc, and
-   recursively, tuples of immutable objects) do not need to be tracked.
-   The interpreter creates a large number of tuples, many of which will
-   not survive until garbage collection. It is therefore not worthwhile
-   to untrack eligible tuples at creation time.
-
-   Instead, all tuples except the empty tuple are tracked when created.
-   During garbage collection it is determined whether any surviving tuples
-   can be untracked. A tuple can be untracked if all of its contents are
-   already not tracked. Tuples are examined for untracking in all garbage
-   collection cycles. It may take more than one cycle to untrack a tuple.
-
-   Dictionaries containing only immutable objects also do not need to be
-   tracked. Dictionaries are untracked when created. If a tracked item is
-   inserted into a dictionary (either as a key or value), the dictionary
-   becomes tracked. During a full garbage collection (all generations),
-   the collector will untrack any dictionaries whose contents are not
-   tracked.
-
-   The module provides the python function is_tracked(obj), which returns
-   the CURRENT tracking status of the object. Subsequent garbage
-   collections may change the tracking status of the object.
-
-   Untracking of certain containers was introduced in issue #4688, and
-   the algorithm was refined in response to issue #14775.
-*/
-
 /* set for debugging information */
 #define DEBUG_STATS             (1<<0) /* print collection statistics */
 #define DEBUG_COLLECTABLE       (1<<1) /* print collectable objects */
@ -174,19 +50,26 @@ static Py_ssize_t long_lived_pending = 0;
 #define DEBUG_LEAK              DEBUG_COLLECTABLE | \
                DEBUG_UNCOLLECTABLE | \
                DEBUG_SAVEALL
-static int debug;

-/* Running stats per generation */
-struct gc_generation_stats {
-    /* total number of collections */
-    Py_ssize_t collections;
-    /* total number of collected objects */
-    Py_ssize_t collected;
-    /* total number of uncollectable objects (put into gc.garbage) */
-    Py_ssize_t uncollectable;
-};
+#define GEN_HEAD(n) (&_PyRuntime.gc.generations[n].head)

-static struct gc_generation_stats generation_stats[NUM_GENERATIONS];
+void
+_PyGC_Initialize(struct _gc_runtime_state *state)
+{
+    state->enabled = 1; /* automatic collection enabled? */
+
+#define _GEN_HEAD(n) (&state->generations[n].head)
+    struct gc_generation generations[NUM_GENERATIONS] = {
+        /* PyGC_Head,                                 threshold,      count */
+        {{{_GEN_HEAD(0), _GEN_HEAD(0), 0}},           700,            0},
+        {{{_GEN_HEAD(1), _GEN_HEAD(1), 0}},           10,             0},
+        {{{_GEN_HEAD(2), _GEN_HEAD(2), 0}},           10,             0},
+    };
+    for (int i = 0; i < NUM_GENERATIONS; i++) {
+        state->generations[i] = generations[i];
+    };
+    state->generation0 = GEN_HEAD(0);
+}

 /*--------------------------------------------------------------------------
 gc_refs values.
@ -766,16 +649,16 @@ handle_legacy_finalizers(PyGC_Head *finalizers, PyGC_Head *old)
 {
    PyGC_Head *gc = finalizers->gc.gc_next;

-    if (garbage == NULL) {
-        garbage = PyList_New(0);
-        if (garbage == NULL)
+    if (_PyRuntime.gc.garbage == NULL) {
+        _PyRuntime.gc.garbage = PyList_New(0);
+        if (_PyRuntime.gc.garbage == NULL)
            Py_FatalError("gc couldn't create gc.garbage list");
    }
    for (; gc != finalizers; gc = gc->gc.gc_next) {
        PyObject *op = FROM_GC(gc);

-        if ((debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
-            if (PyList_Append(garbage, op) < 0)
+        if ((_PyRuntime.gc.debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
+            if (PyList_Append(_PyRuntime.gc.garbage, op) < 0)
                return -1;
        }
    }
@ -865,8 +748,8 @@ delete_garbage(PyGC_Head *collectable, PyGC_Head *old)
        PyGC_Head *gc = collectable->gc.gc_next;
        PyObject *op = FROM_GC(gc);

-        if (debug & DEBUG_SAVEALL) {
-            PyList_Append(garbage, op);
+        if (_PyRuntime.gc.debug & DEBUG_SAVEALL) {
+            PyList_Append(_PyRuntime.gc.garbage, op);
        }
        else {
            if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
@ -919,9 +802,9 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
    PyGC_Head *gc;
    _PyTime_t t1 = 0;   /* initialize to prevent a compiler warning */

-    struct gc_generation_stats *stats = &generation_stats[generation];
+    struct gc_generation_stats *stats = &_PyRuntime.gc.generation_stats[generation];

-    if (debug & DEBUG_STATS) {
+    if (_PyRuntime.gc.debug & DEBUG_STATS) {
        PySys_WriteStderr("gc: collecting generation %d...\n",
                          generation);
        PySys_WriteStderr("gc: objects in each generation:");
@ -938,9 +821,9 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,

    /* update collection and allocation counters */
    if (generation+1 < NUM_GENERATIONS)
-        generations[generation+1].count += 1;
+        _PyRuntime.gc.generations[generation+1].count += 1;
    for (i = 0; i <= generation; i++)
-        generations[i].count = 0;
+        _PyRuntime.gc.generations[i].count = 0;

    /* merge younger generations with one we are currently collecting */
    for (i = 0; i < generation; i++) {
@ -974,7 +857,7 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
    /* Move reachable objects to next generation. */
    if (young != old) {
        if (generation == NUM_GENERATIONS - 2) {
-            long_lived_pending += gc_list_size(young);
+            _PyRuntime.gc.long_lived_pending += gc_list_size(young);
        }
        gc_list_merge(young, old);
    }
@ -982,8 +865,8 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
        /* We only untrack dicts in full collections, to avoid quadratic
           dict build-up. See issue #14775. */
        untrack_dicts(young);
-        long_lived_pending = 0;
-        long_lived_total = gc_list_size(young);
+        _PyRuntime.gc.long_lived_pending = 0;
+        _PyRuntime.gc.long_lived_total = gc_list_size(young);
    }

    /* All objects in unreachable are trash, but objects reachable from
@ -1003,7 +886,7 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
    for (gc = unreachable.gc.gc_next; gc != &unreachable;
                    gc = gc->gc.gc_next) {
        m++;
-        if (debug & DEBUG_COLLECTABLE) {
+        if (_PyRuntime.gc.debug & DEBUG_COLLECTABLE) {
            debug_cycle("collectable", FROM_GC(gc));
        }
    }
@ -1032,10 +915,10 @@ collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
         gc != &finalizers;
         gc = gc->gc.gc_next) {
        n++;
-        if (debug & DEBUG_UNCOLLECTABLE)
+        if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE)
            debug_cycle("uncollectable", FROM_GC(gc));
    }
-    if (debug & DEBUG_STATS) {
+    if (_PyRuntime.gc.debug & DEBUG_STATS) {
        _PyTime_t t2 = _PyTime_GetMonotonicClock();

        if (m == 0 && n == 0)
@ -1098,11 +981,11 @@ invoke_gc_callback(const char *phase, int generation,
    PyObject *info = NULL;

    /* we may get called very early */
-    if (callbacks == NULL)
+    if (_PyRuntime.gc.callbacks == NULL)
        return;
    /* The local variable cannot be rebound, check it for sanity */
-    assert(callbacks != NULL && PyList_CheckExact(callbacks));
-    if (PyList_GET_SIZE(callbacks) != 0) {
+    assert(_PyRuntime.gc.callbacks != NULL && PyList_CheckExact(_PyRuntime.gc.callbacks));
+    if (PyList_GET_SIZE(_PyRuntime.gc.callbacks) != 0) {
        info = Py_BuildValue("{sisnsn}",
            "generation", generation,
            "collected", collected,
@ -1112,8 +995,8 @@ invoke_gc_callback(const char *phase, int generation,
            return;
        }
    }
-    for (i=0; i<PyList_GET_SIZE(callbacks); i++) {
-        PyObject *r, *cb = PyList_GET_ITEM(callbacks, i);
+    for (i=0; i<PyList_GET_SIZE(_PyRuntime.gc.callbacks); i++) {
+        PyObject *r, *cb = PyList_GET_ITEM(_PyRuntime.gc.callbacks, i);
        Py_INCREF(cb); /* make sure cb doesn't go away */
        r = PyObject_CallFunction(cb, "sO", phase, info);
        Py_XDECREF(r);
@ -1147,13 +1030,13 @@ collect_generations(void)
     * exceeds the threshold.  Objects in the that generation and
     * generations younger than it will be collected. */
    for (i = NUM_GENERATIONS-1; i >= 0; i--) {
-        if (generations[i].count > generations[i].threshold) {
+        if (_PyRuntime.gc.generations[i].count > _PyRuntime.gc.generations[i].threshold) {
            /* Avoid quadratic performance degradation in number
               of tracked objects. See comments at the beginning
               of this file, and issue #4074.
            */
            if (i == NUM_GENERATIONS - 1
-                && long_lived_pending < long_lived_total / 4)
+                && _PyRuntime.gc.long_lived_pending < _PyRuntime.gc.long_lived_total / 4)
                continue;
            n = collect_with_callback(i);
            break;
@ -1174,7 +1057,7 @@ static PyObject *
 gc_enable_impl(PyObject *module)
 /*[clinic end generated code: output=45a427e9dce9155c input=81ac4940ca579707]*/
 {
-    enabled = 1;
+    _PyRuntime.gc.enabled = 1;
    Py_RETURN_NONE;
 }

@ -1188,7 +1071,7 @@ static PyObject *
 gc_disable_impl(PyObject *module)
 /*[clinic end generated code: output=97d1030f7aa9d279 input=8c2e5a14e800d83b]*/
 {
-    enabled = 0;
+    _PyRuntime.gc.enabled = 0;
    Py_RETURN_NONE;
 }

@ -1202,7 +1085,7 @@ static int
 gc_isenabled_impl(PyObject *module)
 /*[clinic end generated code: output=1874298331c49130 input=30005e0422373b31]*/
 {
-    return enabled;
+    return _PyRuntime.gc.enabled;
 }

 /*[clinic input]
@ -1230,12 +1113,12 @@ gc_collect_impl(PyObject *module, int generation)
        return -1;
    }

-    if (collecting)
+    if (_PyRuntime.gc.collecting)
        n = 0; /* already collecting, don't do anything */
    else {
-        collecting = 1;
+        _PyRuntime.gc.collecting = 1;
        n = collect_with_callback(generation);
-        collecting = 0;
+        _PyRuntime.gc.collecting = 0;
    }

    return n;
@ -1263,7 +1146,7 @@ static PyObject *
 gc_set_debug_impl(PyObject *module, int flags)
 /*[clinic end generated code: output=7c8366575486b228 input=5e5ce15e84fbed15]*/
 {
-    debug = flags;
+    _PyRuntime.gc.debug = flags;

    Py_RETURN_NONE;
 }
@ -1278,7 +1161,7 @@ static int
 gc_get_debug_impl(PyObject *module)
 /*[clinic end generated code: output=91242f3506cd1e50 input=91a101e1c3b98366]*/
 {
-    return debug;
+    return _PyRuntime.gc.debug;
 }

 PyDoc_STRVAR(gc_set_thresh__doc__,
@ -1292,13 +1175,13 @@ gc_set_thresh(PyObject *self, PyObject *args)
 {
    int i;
    if (!PyArg_ParseTuple(args, "i|ii:set_threshold",
-                          &generations[0].threshold,
-                          &generations[1].threshold,
-                          &generations[2].threshold))
+                          &_PyRuntime.gc.generations[0].threshold,
+                          &_PyRuntime.gc.generations[1].threshold,
+                          &_PyRuntime.gc.generations[2].threshold))
        return NULL;
    for (i = 2; i < NUM_GENERATIONS; i++) {
        /* generations higher than 2 get the same threshold */
-        generations[i].threshold = generations[2].threshold;
+        _PyRuntime.gc.generations[i].threshold = _PyRuntime.gc.generations[2].threshold;
    }

    Py_RETURN_NONE;
@ -1315,9 +1198,9 @@ gc_get_threshold_impl(PyObject *module)
 /*[clinic end generated code: output=7902bc9f41ecbbd8 input=286d79918034d6e6]*/
 {
    return Py_BuildValue("(iii)",
-                         generations[0].threshold,
-                         generations[1].threshold,
-                         generations[2].threshold);
+                         _PyRuntime.gc.generations[0].threshold,
+                         _PyRuntime.gc.generations[1].threshold,
+                         _PyRuntime.gc.generations[2].threshold);
 }

 /*[clinic input]
@ -1331,9 +1214,9 @@ gc_get_count_impl(PyObject *module)
 /*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/
 {
    return Py_BuildValue("(iii)",
-                         generations[0].count,
-                         generations[1].count,
-                         generations[2].count);
+                         _PyRuntime.gc.generations[0].count,
+                         _PyRuntime.gc.generations[1].count,
+                         _PyRuntime.gc.generations[2].count);
 }

 static int
@ -1464,7 +1347,7 @@ gc_get_stats_impl(PyObject *module)
    /* To get consistent values despite allocations while constructing
       the result list, we use a snapshot of the running stats. */
    for (i = 0; i < NUM_GENERATIONS; i++) {
-        stats[i] = generation_stats[i];
+        stats[i] = _PyRuntime.gc.generation_stats[i];
    }

    result = PyList_New(0);
@ -1581,22 +1464,22 @@ PyInit_gc(void)
    if (m == NULL)
        return NULL;

-    if (garbage == NULL) {
-        garbage = PyList_New(0);
-        if (garbage == NULL)
+    if (_PyRuntime.gc.garbage == NULL) {
+        _PyRuntime.gc.garbage = PyList_New(0);
+        if (_PyRuntime.gc.garbage == NULL)
            return NULL;
    }
-    Py_INCREF(garbage);
-    if (PyModule_AddObject(m, "garbage", garbage) < 0)
+    Py_INCREF(_PyRuntime.gc.garbage);
+    if (PyModule_AddObject(m, "garbage", _PyRuntime.gc.garbage) < 0)
        return NULL;

-    if (callbacks == NULL) {
-        callbacks = PyList_New(0);
-        if (callbacks == NULL)
+    if (_PyRuntime.gc.callbacks == NULL) {
+        _PyRuntime.gc.callbacks = PyList_New(0);
+        if (_PyRuntime.gc.callbacks == NULL)
            return NULL;
    }
-    Py_INCREF(callbacks);
-    if (PyModule_AddObject(m, "callbacks", callbacks) < 0)
+    Py_INCREF(_PyRuntime.gc.callbacks);
+    if (PyModule_AddObject(m, "callbacks", _PyRuntime.gc.callbacks) < 0)
        return NULL;

 #define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return NULL
@ -1615,12 +1498,12 @@ PyGC_Collect(void)
 {
    Py_ssize_t n;

-    if (collecting)
+    if (_PyRuntime.gc.collecting)
        n = 0; /* already collecting, don't do anything */
    else {
-        collecting = 1;
+        _PyRuntime.gc.collecting = 1;
        n = collect_with_callback(NUM_GENERATIONS - 1);
-        collecting = 0;
+        _PyRuntime.gc.collecting = 0;
    }

    return n;
@ -1629,7 +1512,7 @@ PyGC_Collect(void)
 Py_ssize_t
 _PyGC_CollectIfEnabled(void)
 {
-    if (!enabled)
+    if (!_PyRuntime.gc.enabled)
        return 0;

    return PyGC_Collect();
@ -1646,12 +1529,12 @@ _PyGC_CollectNoFail(void)
       during interpreter shutdown (and then never finish it).
       See http://bugs.python.org/issue8713#msg195178 for an example.
       */
-    if (collecting)
+    if (_PyRuntime.gc.collecting)
        n = 0;
    else {
-        collecting = 1;
+        _PyRuntime.gc.collecting = 1;
        n = collect(NUM_GENERATIONS - 1, NULL, NULL, 1);
-        collecting = 0;
+        _PyRuntime.gc.collecting = 0;
    }
    return n;
 }
@ -1659,10 +1542,10 @@ _PyGC_CollectNoFail(void)
 void
 _PyGC_DumpShutdownStats(void)
 {
-    if (!(debug & DEBUG_SAVEALL)
-        && garbage != NULL && PyList_GET_SIZE(garbage) > 0) {
+    if (!(_PyRuntime.gc.debug & DEBUG_SAVEALL)
+        && _PyRuntime.gc.garbage != NULL && PyList_GET_SIZE(_PyRuntime.gc.garbage) > 0) {
        char *message;
-        if (debug & DEBUG_UNCOLLECTABLE)
+        if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE)
            message = "gc: %zd uncollectable objects at " \
                "shutdown";
        else
@ -1673,13 +1556,13 @@ _PyGC_DumpShutdownStats(void)
           already. */
        if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
                                     "gc", NULL, message,
-                                     PyList_GET_SIZE(garbage)))
+                                     PyList_GET_SIZE(_PyRuntime.gc.garbage)))
            PyErr_WriteUnraisable(NULL);
-        if (debug & DEBUG_UNCOLLECTABLE) {
+        if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE) {
            PyObject *repr = NULL, *bytes = NULL;
-            repr = PyObject_Repr(garbage);
+            repr = PyObject_Repr(_PyRuntime.gc.garbage);
            if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr)))
-                PyErr_WriteUnraisable(garbage);
+                PyErr_WriteUnraisable(_PyRuntime.gc.garbage);
            else {
                PySys_WriteStderr(
                    "      %s\n",
@ -1695,7 +1578,7 @@ _PyGC_DumpShutdownStats(void)
 void
 _PyGC_Fini(void)
 {
-    Py_CLEAR(callbacks);
+    Py_CLEAR(_PyRuntime.gc.callbacks);
 }

 /* for debugging */
@ -1746,15 +1629,15 @@ _PyObject_GC_Alloc(int use_calloc, size_t basicsize)
        return PyErr_NoMemory();
    g->gc.gc_refs = 0;
    _PyGCHead_SET_REFS(g, GC_UNTRACKED);
-    generations[0].count++; /* number of allocated GC objects */
-    if (generations[0].count > generations[0].threshold &&
-        enabled &&
-        generations[0].threshold &&
-        !collecting &&
+    _PyRuntime.gc.generations[0].count++; /* number of allocated GC objects */
+    if (_PyRuntime.gc.generations[0].count > _PyRuntime.gc.generations[0].threshold &&
+        _PyRuntime.gc.enabled &&
+        _PyRuntime.gc.generations[0].threshold &&
+        !_PyRuntime.gc.collecting &&
        !PyErr_Occurred()) {
-        collecting = 1;
+        _PyRuntime.gc.collecting = 1;
        collect_generations();
-        collecting = 0;
+        _PyRuntime.gc.collecting = 0;
    }
    op = FROM_GC(g);
    return op;
@ -1819,8 +1702,8 @@ PyObject_GC_Del(void *op)
    PyGC_Head *g = AS_GC(op);
    if (IS_TRACKED(op))
        gc_list_remove(g);
-    if (generations[0].count > 0) {
-        generations[0].count--;
+    if (_PyRuntime.gc.generations[0].count > 0) {
+        _PyRuntime.gc.generations[0].count--;
    }
    PyObject_FREE(g);
 }
--- a/Modules/main.c
+++ b/Modules/main.c
@ -598,16 +598,10 @@ Py_Main(int argc, wchar_t **argv)
        }
    }

-    char *pymalloc = Py_GETENV("PYTHONMALLOC");
-    if (_PyMem_SetupAllocators(pymalloc) < 0) {
-        fprintf(stderr,
-            "Error in PYTHONMALLOC: unknown allocator \"%s\"!\n", pymalloc);
-        exit(1);
-    }
-
    /* Initialize the core language runtime */
    Py_IgnoreEnvironmentFlag = core_config.ignore_environment;
    core_config._disable_importlib = 0;
+    core_config.allocator = Py_GETENV("PYTHONMALLOC");
    _Py_InitializeCore(&core_config);

    /* Reprocess the command line with the language runtime available */
--- a/Objects/object.c
+++ b/Objects/object.c
@ -2028,14 +2028,6 @@ finally:

 /* Trashcan support. */

-/* Current call-stack depth of tp_dealloc calls. */
-int _PyTrash_delete_nesting = 0;
-
-/* List of objects that still need to be cleaned up, singly linked via their
- * gc headers' gc_prev pointers.
- */
-PyObject *_PyTrash_delete_later = NULL;
-
 /* Add op to the _PyTrash_delete_later list.  Called when the current
 * call-stack depth gets large.  op must be a currently untracked gc'ed
 * object, with refcount 0.  Py_DECREF must already have been called on it.
@ -2046,8 +2038,8 @@ _PyTrash_deposit_object(PyObject *op)
    assert(PyObject_IS_GC(op));
    assert(_PyGC_REFS(op) == _PyGC_REFS_UNTRACKED);
    assert(op->ob_refcnt == 0);
-    _Py_AS_GC(op)->gc.gc_prev = (PyGC_Head *)_PyTrash_delete_later;
-    _PyTrash_delete_later = op;
+    _Py_AS_GC(op)->gc.gc_prev = (PyGC_Head *)_PyRuntime.gc.trash_delete_later;
+    _PyRuntime.gc.trash_delete_later = op;
 }

 /* The equivalent API, using per-thread state recursion info */
@ -2068,11 +2060,11 @@ _PyTrash_thread_deposit_object(PyObject *op)
 void
 _PyTrash_destroy_chain(void)
 {
-    while (_PyTrash_delete_later) {
-        PyObject *op = _PyTrash_delete_later;
+    while (_PyRuntime.gc.trash_delete_later) {
+        PyObject *op = _PyRuntime.gc.trash_delete_later;
        destructor dealloc = Py_TYPE(op)->tp_dealloc;

-        _PyTrash_delete_later =
+        _PyRuntime.gc.trash_delete_later =
            (PyObject*) _Py_AS_GC(op)->gc.gc_prev;

        /* Call the deallocator directly.  This used to try to
@ -2082,9 +2074,9 @@ _PyTrash_destroy_chain(void)
         * up distorting allocation statistics.
         */
        assert(op->ob_refcnt == 0);
-        ++_PyTrash_delete_nesting;
+        ++_PyRuntime.gc.trash_delete_nesting;
        (*dealloc)(op);
-        --_PyTrash_delete_nesting;
+        --_PyRuntime.gc.trash_delete_nesting;
    }
 }

--- a/Objects/obmalloc.c
+++ b/Objects/obmalloc.c
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@ -1115,6 +1115,7 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
    }
    /* The empty frozenset is a singleton */
    if (emptyfrozenset == NULL)
+        /* There is a possible (relatively harmless) race here. */
        emptyfrozenset = make_new_set(type, NULL);
    Py_XINCREF(emptyfrozenset);
    return emptyfrozenset;
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@ -1157,10 +1157,10 @@ subtype_dealloc(PyObject *self)
    /* UnTrack and re-Track around the trashcan macro, alas */
    /* See explanation at end of function for full disclosure */
    PyObject_GC_UnTrack(self);
-    ++_PyTrash_delete_nesting;
+    ++_PyRuntime.gc.trash_delete_nesting;
    ++ tstate->trash_delete_nesting;
    Py_TRASHCAN_SAFE_BEGIN(self);
-    --_PyTrash_delete_nesting;
+    --_PyRuntime.gc.trash_delete_nesting;
    -- tstate->trash_delete_nesting;

    /* Find the nearest base with a different tp_dealloc */
@ -1254,10 +1254,10 @@ subtype_dealloc(PyObject *self)
      Py_DECREF(type);

  endlabel:
-    ++_PyTrash_delete_nesting;
+    ++_PyRuntime.gc.trash_delete_nesting;
    ++ tstate->trash_delete_nesting;
    Py_TRASHCAN_SAFE_END(self);
-    --_PyTrash_delete_nesting;
+    --_PyRuntime.gc.trash_delete_nesting;
    -- tstate->trash_delete_nesting;

    /* Explanation of the weirdness around the trashcan macros:
@ -1297,7 +1297,7 @@ subtype_dealloc(PyObject *self)
          a subtle disaster.

       Q. Why the bizarre (net-zero) manipulation of
-          _PyTrash_delete_nesting around the trashcan macros?
+          _PyRuntime.trash_delete_nesting around the trashcan macros?

       A. Some base classes (e.g. list) also use the trashcan mechanism.
          The following scenario used to be possible:
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@ -106,6 +106,14 @@
    <ClInclude Include="..\Include\graminit.h" />
    <ClInclude Include="..\Include\grammar.h" />
    <ClInclude Include="..\Include\import.h" />
+    <ClInclude Include="..\Include\internal\_Python.h" />
+    <ClInclude Include="..\Include\internal\_ceval.h" />
+    <ClInclude Include="..\Include\internal\_condvar.h" />
+    <ClInclude Include="..\Include\internal\_gil.h" />
+    <ClInclude Include="..\Include\internal\_mem.h" />
+    <ClInclude Include="..\Include\internal\_pymalloc.h" />
+    <ClInclude Include="..\Include\internal\_pystate.h" />
+    <ClInclude Include="..\Include\internal\_warnings.h" />
    <ClInclude Include="..\Include\intrcheck.h" />
    <ClInclude Include="..\Include\iterobject.h" />
    <ClInclude Include="..\Include\listobject.h" />
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@ -129,6 +129,30 @@
    <ClInclude Include="..\Include\import.h">
      <Filter>Include</Filter>
    </ClInclude>
+    <ClInclude Include="..\Include\internal\_Python.h">
+      <Filter>Include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\Include\internal\_ceval.h">
+      <Filter>Include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\Include\internal\_condvar.h">
+      <Filter>Include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\Include\internal\_gil.h">
+      <Filter>Include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\Include\internal\_mem.h">
+      <Filter>Include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\Include\internal\_pymalloc.h">
+      <Filter>Include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\Include\internal\_pystate.h">
+      <Filter>Include</Filter>
+    </ClInclude>
+    <ClInclude Include="..\Include\internal\_warnings.h">
+      <Filter>Include</Filter>
+    </ClInclude>
    <ClInclude Include="..\Include\intrcheck.h">
      <Filter>Include</Filter>
    </ClInclude>
--- a/Parser/pgenmain.c
+++ b/Parser/pgenmain.c
@ -21,10 +21,12 @@
 #include "node.h"
 #include "parsetok.h"
 #include "pgen.h"
+#include "internal/_mem.h"

 int Py_DebugFlag;
 int Py_VerboseFlag;
 int Py_IgnoreEnvironmentFlag;
+struct pyruntimestate _PyRuntime = {};

 /* Forward */
 grammar *getgrammar(const char *filename);
@ -61,6 +63,8 @@ main(int argc, char **argv)
    filename = argv[1];
    graminit_h = argv[2];
    graminit_c = argv[3];
+    _PyObject_Initialize(&_PyRuntime.obj);
+    _PyMem_Initialize(&_PyRuntime.mem);
    g = getgrammar(filename);
    fp = fopen(graminit_c, "w");
    if (fp == NULL) {
--- a/Python/_warnings.c
+++ b/Python/_warnings.c
@ -8,13 +8,6 @@ PyDoc_STRVAR(warnings__doc__,
 MODULE_NAME " provides basic warning filtering support.\n"
 "It is a helper module to speed up interpreter start-up.");

-/* Both 'filters' and 'onceregistry' can be set in warnings.py;
-   get_warnings_attr() will reset these variables accordingly. */
-static PyObject *_filters;  /* List */
-static PyObject *_once_registry;  /* Dict */
-static PyObject *_default_action; /* String */
-static long _filters_version;
-
 _Py_IDENTIFIER(argv);
 _Py_IDENTIFIER(stderr);

@ -53,7 +46,7 @@ get_warnings_attr(const char *attr, int try_import)
    }

    /* don't try to import after the start of the Python finallization */
-    if (try_import && _Py_Finalizing == NULL) {
+    if (try_import && !_Py_IS_FINALIZING()) {
        warnings_module = PyImport_Import(warnings_str);
        if (warnings_module == NULL) {
            /* Fallback to the C implementation if we cannot get
@ -90,10 +83,10 @@ get_once_registry(void)
    if (registry == NULL) {
        if (PyErr_Occurred())
            return NULL;
-        return _once_registry;
+        return _PyRuntime.warnings.once_registry;
    }
-    Py_DECREF(_once_registry);
-    _once_registry = registry;
+    Py_DECREF(_PyRuntime.warnings.once_registry);
+    _PyRuntime.warnings.once_registry = registry;
    return registry;
 }

@ -108,11 +101,11 @@ get_default_action(void)
        if (PyErr_Occurred()) {
            return NULL;
        }
-        return _default_action;
+        return _PyRuntime.warnings.default_action;
    }

-    Py_DECREF(_default_action);
-    _default_action = default_action;
+    Py_DECREF(_PyRuntime.warnings.default_action);
+    _PyRuntime.warnings.default_action = default_action;
    return default_action;
 }

@ -132,23 +125,24 @@ get_filter(PyObject *category, PyObject *text, Py_ssize_t lineno,
            return NULL;
    }
    else {
-        Py_DECREF(_filters);
-        _filters = warnings_filters;
+        Py_DECREF(_PyRuntime.warnings.filters);
+        _PyRuntime.warnings.filters = warnings_filters;
    }

-    if (_filters == NULL || !PyList_Check(_filters)) {
+    PyObject *filters = _PyRuntime.warnings.filters;
+    if (filters == NULL || !PyList_Check(filters)) {
        PyErr_SetString(PyExc_ValueError,
                        MODULE_NAME ".filters must be a list");
        return NULL;
    }

-    /* _filters could change while we are iterating over it. */
-    for (i = 0; i < PyList_GET_SIZE(_filters); i++) {
+    /* _PyRuntime.warnings.filters could change while we are iterating over it. */
+    for (i = 0; i < PyList_GET_SIZE(filters); i++) {
        PyObject *tmp_item, *action, *msg, *cat, *mod, *ln_obj;
        Py_ssize_t ln;
        int is_subclass, good_msg, good_mod;

-        tmp_item = PyList_GET_ITEM(_filters, i);
+        tmp_item = PyList_GET_ITEM(filters, i);
        if (!PyTuple_Check(tmp_item) || PyTuple_GET_SIZE(tmp_item) != 5) {
            PyErr_Format(PyExc_ValueError,
                         MODULE_NAME ".filters item %zd isn't a 5-tuple", i);
@ -220,9 +214,9 @@ already_warned(PyObject *registry, PyObject *key, int should_set)
    version_obj = _PyDict_GetItemId(registry, &PyId_version);
    if (version_obj == NULL
        || !PyLong_CheckExact(version_obj)
-        || PyLong_AsLong(version_obj) != _filters_version) {
+        || PyLong_AsLong(version_obj) != _PyRuntime.warnings.filters_version) {
        PyDict_Clear(registry);
-        version_obj = PyLong_FromLong(_filters_version);
+        version_obj = PyLong_FromLong(_PyRuntime.warnings.filters_version);
        if (version_obj == NULL)
            return -1;
        if (_PyDict_SetItemId(registry, &PyId_version, version_obj) < 0) {
@ -520,7 +514,7 @@ warn_explicit(PyObject *category, PyObject *message,
                if (registry == NULL)
                    goto cleanup;
            }
-            /* _once_registry[(text, category)] = 1 */
+            /* _PyRuntime.warnings.once_registry[(text, category)] = 1 */
            rc = update_registry(registry, text, category, 0);
        }
        else if (_PyUnicode_EqualToASCIIString(action, "module")) {
@ -910,7 +904,7 @@ warnings_warn_explicit(PyObject *self, PyObject *args, PyObject *kwds)
 static PyObject *
 warnings_filters_mutated(PyObject *self, PyObject *args)
 {
-    _filters_version++;
+    _PyRuntime.warnings.filters_version++;
    Py_RETURN_NONE;
 }

@ -1160,7 +1154,8 @@ create_filter(PyObject *category, const char *action)
    }

    /* This assumes the line number is zero for now. */
-    return PyTuple_Pack(5, action_obj, Py_None, category, Py_None, _PyLong_Zero);
+    return PyTuple_Pack(5, action_obj, Py_None,
+                        category, Py_None, _PyLong_Zero);
 }

 static PyObject *
@ -1228,33 +1223,35 @@ _PyWarnings_Init(void)
    if (m == NULL)
        return NULL;

-    if (_filters == NULL) {
-        _filters = init_filters();
-        if (_filters == NULL)
+    if (_PyRuntime.warnings.filters == NULL) {
+        _PyRuntime.warnings.filters = init_filters();
+        if (_PyRuntime.warnings.filters == NULL)
            return NULL;
    }
-    Py_INCREF(_filters);
-    if (PyModule_AddObject(m, "filters", _filters) < 0)
+    Py_INCREF(_PyRuntime.warnings.filters);
+    if (PyModule_AddObject(m, "filters", _PyRuntime.warnings.filters) < 0)
        return NULL;

-    if (_once_registry == NULL) {
-        _once_registry = PyDict_New();
-        if (_once_registry == NULL)
+    if (_PyRuntime.warnings.once_registry == NULL) {
+        _PyRuntime.warnings.once_registry = PyDict_New();
+        if (_PyRuntime.warnings.once_registry == NULL)
            return NULL;
    }
-    Py_INCREF(_once_registry);
-    if (PyModule_AddObject(m, "_onceregistry", _once_registry) < 0)
+    Py_INCREF(_PyRuntime.warnings.once_registry);
+    if (PyModule_AddObject(m, "_onceregistry",
+                           _PyRuntime.warnings.once_registry) < 0)
        return NULL;

-    if (_default_action == NULL) {
-        _default_action = PyUnicode_FromString("default");
-        if (_default_action == NULL)
+    if (_PyRuntime.warnings.default_action == NULL) {
+        _PyRuntime.warnings.default_action = PyUnicode_FromString("default");
+        if (_PyRuntime.warnings.default_action == NULL)
            return NULL;
    }
-    Py_INCREF(_default_action);
-    if (PyModule_AddObject(m, "_defaultaction", _default_action) < 0)
+    Py_INCREF(_PyRuntime.warnings.default_action);
+    if (PyModule_AddObject(m, "_defaultaction",
+                           _PyRuntime.warnings.default_action) < 0)
        return NULL;

-    _filters_version = 0;
+    _PyRuntime.warnings.filters_version = 0;
    return m;
 }
--- a/Python/ceval.c
+++ b/Python/ceval.c
@ -36,7 +36,8 @@ extern int _PyObject_GetMethod(PyObject *, PyObject *, PyObject **);
 typedef PyObject *(*callproc)(PyObject *, PyObject *, PyObject *);

 /* Forward declarations */
-Py_LOCAL_INLINE(PyObject *) call_function(PyObject ***, Py_ssize_t, PyObject *);
+Py_LOCAL_INLINE(PyObject *) call_function(PyObject ***, Py_ssize_t,
+                                          PyObject *);
 static PyObject * do_call_core(PyObject *, PyObject *, PyObject *);

 #ifdef LLTRACE
@ -52,13 +53,15 @@ static int call_trace_protected(Py_tracefunc, PyObject *,
 static void call_exc_trace(Py_tracefunc, PyObject *,
                           PyThreadState *, PyFrameObject *);
 static int maybe_call_line_trace(Py_tracefunc, PyObject *,
-                                 PyThreadState *, PyFrameObject *, int *, int *, int *);
+                                 PyThreadState *, PyFrameObject *,
+                                 int *, int *, int *);
 static void maybe_dtrace_line(PyFrameObject *, int *, int *, int *);
 static void dtrace_function_entry(PyFrameObject *);
 static void dtrace_function_return(PyFrameObject *);

 static PyObject * cmp_outcome(int, PyObject *, PyObject *);
-static PyObject * import_name(PyFrameObject *, PyObject *, PyObject *, PyObject *);
+static PyObject * import_name(PyFrameObject *, PyObject *, PyObject *,
+                              PyObject *);
 static PyObject * import_from(PyObject *, PyObject *);
 static int import_all_from(PyObject *, PyObject *);
 static void format_exc_check_arg(PyObject *, const char *, PyObject *);
@ -88,7 +91,7 @@ static long dxp[256];
 #endif

 #ifdef WITH_THREAD
-#define GIL_REQUEST _Py_atomic_load_relaxed(&gil_drop_request)
+#define GIL_REQUEST _Py_atomic_load_relaxed(&_PyRuntime.ceval.gil_drop_request)
 #else
 #define GIL_REQUEST 0
 #endif
@ -98,22 +101,22 @@ static long dxp[256];
   the GIL eventually anyway. */
 #define COMPUTE_EVAL_BREAKER() \
    _Py_atomic_store_relaxed( \
-        &eval_breaker, \
+        &_PyRuntime.ceval.eval_breaker, \
        GIL_REQUEST | \
-        _Py_atomic_load_relaxed(&pendingcalls_to_do) | \
-        pending_async_exc)
+        _Py_atomic_load_relaxed(&_PyRuntime.ceval.pending.calls_to_do) | \
+        _PyRuntime.ceval.pending.async_exc)

 #ifdef WITH_THREAD

 #define SET_GIL_DROP_REQUEST() \
    do { \
-        _Py_atomic_store_relaxed(&gil_drop_request, 1); \
-        _Py_atomic_store_relaxed(&eval_breaker, 1); \
+        _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil_drop_request, 1); \
+        _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \
    } while (0)

 #define RESET_GIL_DROP_REQUEST() \
    do { \
-        _Py_atomic_store_relaxed(&gil_drop_request, 0); \
+        _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil_drop_request, 0); \
        COMPUTE_EVAL_BREAKER(); \
    } while (0)

@ -122,47 +125,35 @@ static long dxp[256];
 /* Pending calls are only modified under pending_lock */
 #define SIGNAL_PENDING_CALLS() \
    do { \
-        _Py_atomic_store_relaxed(&pendingcalls_to_do, 1); \
-        _Py_atomic_store_relaxed(&eval_breaker, 1); \
+        _Py_atomic_store_relaxed(&_PyRuntime.ceval.pending.calls_to_do, 1); \
+        _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \
    } while (0)

 #define UNSIGNAL_PENDING_CALLS() \
    do { \
-        _Py_atomic_store_relaxed(&pendingcalls_to_do, 0); \
+        _Py_atomic_store_relaxed(&_PyRuntime.ceval.pending.calls_to_do, 0); \
        COMPUTE_EVAL_BREAKER(); \
    } while (0)

 #define SIGNAL_ASYNC_EXC() \
    do { \
-        pending_async_exc = 1; \
-        _Py_atomic_store_relaxed(&eval_breaker, 1); \
+        _PyRuntime.ceval.pending.async_exc = 1; \
+        _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \
    } while (0)

 #define UNSIGNAL_ASYNC_EXC() \
-    do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0)
+    do { \
+        _PyRuntime.ceval.pending.async_exc = 0; \
+        COMPUTE_EVAL_BREAKER(); \
+    } while (0)


-/* This single variable consolidates all requests to break out of the fast path
-   in the eval loop. */
-static _Py_atomic_int eval_breaker = {0};
-/* Request for running pending calls. */
-static _Py_atomic_int pendingcalls_to_do = {0};
-/* Request for looking at the `async_exc` field of the current thread state.
-   Guarded by the GIL. */
-static int pending_async_exc = 0;
-
 #ifdef WITH_THREAD

 #ifdef HAVE_ERRNO_H
 #include <errno.h>
 #endif
 #include "pythread.h"
-
-static PyThread_type_lock pending_lock = 0; /* for pending calls */
-static unsigned long main_thread = 0;
-/* Request for dropping the GIL */
-static _Py_atomic_int gil_drop_request = {0};
-
 #include "ceval_gil.h"

 int
@ -178,9 +169,9 @@ PyEval_InitThreads(void)
        return;
    create_gil();
    take_gil(PyThreadState_GET());
-    main_thread = PyThread_get_thread_ident();
-    if (!pending_lock)
-        pending_lock = PyThread_allocate_lock();
+    _PyRuntime.ceval.pending.main_thread = PyThread_get_thread_ident();
+    if (!_PyRuntime.ceval.pending.lock)
+        _PyRuntime.ceval.pending.lock = PyThread_allocate_lock();
 }

 void
@ -248,9 +239,9 @@ PyEval_ReInitThreads(void)
    if (!gil_created())
        return;
    recreate_gil();
-    pending_lock = PyThread_allocate_lock();
+    _PyRuntime.ceval.pending.lock = PyThread_allocate_lock();
    take_gil(current_tstate);
-    main_thread = PyThread_get_thread_ident();
+    _PyRuntime.ceval.pending.main_thread = PyThread_get_thread_ident();

    /* Destroy all threads except the current one */
    _PyThreadState_DeleteExcept(current_tstate);
@ -294,7 +285,7 @@ PyEval_RestoreThread(PyThreadState *tstate)
        int err = errno;
        take_gil(tstate);
        /* _Py_Finalizing is protected by the GIL */
-        if (_Py_Finalizing && tstate != _Py_Finalizing) {
+        if (_Py_IS_FINALIZING() && !_Py_CURRENTLY_FINALIZING(tstate)) {
            drop_gil(tstate);
            PyThread_exit_thread();
            assert(0);  /* unreachable */
@ -346,19 +337,11 @@ _PyEval_SignalReceived(void)
   callback.
 */

-#define NPENDINGCALLS 32
-static struct {
-    int (*func)(void *);
-    void *arg;
-} pendingcalls[NPENDINGCALLS];
-static int pendingfirst = 0;
-static int pendinglast = 0;
-
 int
 Py_AddPendingCall(int (*func)(void *), void *arg)
 {
    int i, j, result=0;
-    PyThread_type_lock lock = pending_lock;
+    PyThread_type_lock lock = _PyRuntime.ceval.pending.lock;

    /* try a few times for the lock.  Since this mechanism is used
     * for signal handling (on the main thread), there is a (slim)
@ -380,14 +363,14 @@ Py_AddPendingCall(int (*func)(void *), void *arg)
            return -1;
    }

-    i = pendinglast;
+    i = _PyRuntime.ceval.pending.last;
    j = (i + 1) % NPENDINGCALLS;
-    if (j == pendingfirst) {
+    if (j == _PyRuntime.ceval.pending.first) {
        result = -1; /* Queue full */
    } else {
-        pendingcalls[i].func = func;
-        pendingcalls[i].arg = arg;
-        pendinglast = j;
+        _PyRuntime.ceval.pending.calls[i].func = func;
+        _PyRuntime.ceval.pending.calls[i].arg = arg;
+        _PyRuntime.ceval.pending.last = j;
    }
    /* signal main loop */
    SIGNAL_PENDING_CALLS();
@ -405,16 +388,19 @@ Py_MakePendingCalls(void)

    assert(PyGILState_Check());

-    if (!pending_lock) {
+    if (!_PyRuntime.ceval.pending.lock) {
        /* initial allocation of the lock */
-        pending_lock = PyThread_allocate_lock();
-        if (pending_lock == NULL)
+        _PyRuntime.ceval.pending.lock = PyThread_allocate_lock();
+        if (_PyRuntime.ceval.pending.lock == NULL)
            return -1;
    }

    /* only service pending calls on main thread */
-    if (main_thread && PyThread_get_thread_ident() != main_thread)
+    if (_PyRuntime.ceval.pending.main_thread &&
+        PyThread_get_thread_ident() != _PyRuntime.ceval.pending.main_thread)
+    {
        return 0;
+    }
    /* don't perform recursive pending calls */
    if (busy)
        return 0;
@ -436,16 +422,16 @@ Py_MakePendingCalls(void)
        void *arg = NULL;

        /* pop one item off the queue while holding the lock */
-        PyThread_acquire_lock(pending_lock, WAIT_LOCK);
-        j = pendingfirst;
-        if (j == pendinglast) {
+        PyThread_acquire_lock(_PyRuntime.ceval.pending.lock, WAIT_LOCK);
+        j = _PyRuntime.ceval.pending.first;
+        if (j == _PyRuntime.ceval.pending.last) {
            func = NULL; /* Queue empty */
        } else {
-            func = pendingcalls[j].func;
-            arg = pendingcalls[j].arg;
-            pendingfirst = (j + 1) % NPENDINGCALLS;
+            func = _PyRuntime.ceval.pending.calls[j].func;
+            arg = _PyRuntime.ceval.pending.calls[j].arg;
+            _PyRuntime.ceval.pending.first = (j + 1) % NPENDINGCALLS;
        }
-        PyThread_release_lock(pending_lock);
+        PyThread_release_lock(_PyRuntime.ceval.pending.lock);
        /* having released the lock, perform the callback */
        if (func == NULL)
            break;
@ -489,14 +475,6 @@ error:
   The two threads could theoretically wiggle around the "busy" variable.
 */

-#define NPENDINGCALLS 32
-static struct {
-    int (*func)(void *);
-    void *arg;
-} pendingcalls[NPENDINGCALLS];
-static volatile int pendingfirst = 0;
-static volatile int pendinglast = 0;
-
 int
 Py_AddPendingCall(int (*func)(void *), void *arg)
 {
@ -506,15 +484,15 @@ Py_AddPendingCall(int (*func)(void *), void *arg)
    if (busy)
        return -1;
    busy = 1;
-    i = pendinglast;
+    i = _PyRuntime.ceval.pending.last;
    j = (i + 1) % NPENDINGCALLS;
-    if (j == pendingfirst) {
+    if (j == _PyRuntime.ceval.pending.first) {
        busy = 0;
        return -1; /* Queue full */
    }
-    pendingcalls[i].func = func;
-    pendingcalls[i].arg = arg;
-    pendinglast = j;
+    _PyRuntime.ceval.pending.calls[i].func = func;
+    _PyRuntime.ceval.pending.calls[i].arg = arg;
+    _PyRuntime.ceval.pending.last = j;

    SIGNAL_PENDING_CALLS();
    busy = 0;
@ -543,12 +521,12 @@ Py_MakePendingCalls(void)
        int i;
        int (*func)(void *);
        void *arg;
-        i = pendingfirst;
-        if (i == pendinglast)
+        i = _PyRuntime.ceval.pending.first;
+        if (i == _PyRuntime.ceval.pending.last)
            break; /* Queue empty */
-        func = pendingcalls[i].func;
-        arg = pendingcalls[i].arg;
-        pendingfirst = (i + 1) % NPENDINGCALLS;
+        func = _PyRuntime.ceval.pending.calls[i].func;
+        arg = _PyRuntime.ceval.pending.calls[i].arg;
+        _PyRuntime.ceval.pending.first = (i + 1) % NPENDINGCALLS;
        if (func(arg) < 0) {
            goto error;
        }
@ -570,20 +548,32 @@ error:
 #ifndef Py_DEFAULT_RECURSION_LIMIT
 #define Py_DEFAULT_RECURSION_LIMIT 1000
 #endif
-static int recursion_limit = Py_DEFAULT_RECURSION_LIMIT;
-int _Py_CheckRecursionLimit = Py_DEFAULT_RECURSION_LIMIT;
+
+void
+_PyEval_Initialize(struct _ceval_runtime_state *state)
+{
+    state->recursion_limit = Py_DEFAULT_RECURSION_LIMIT;
+    state->check_recursion_limit = Py_DEFAULT_RECURSION_LIMIT;
+    _gil_initialize(&state->gil);
+}
+
+int
+_PyEval_CheckRecursionLimit(void)
+{
+    return _PyRuntime.ceval.check_recursion_limit;
+}

 int
 Py_GetRecursionLimit(void)
 {
-    return recursion_limit;
+    return _PyRuntime.ceval.recursion_limit;
 }

 void
 Py_SetRecursionLimit(int new_limit)
 {
-    recursion_limit = new_limit;
-    _Py_CheckRecursionLimit = recursion_limit;
+    _PyRuntime.ceval.recursion_limit = new_limit;
+    _PyRuntime.ceval.check_recursion_limit = _PyRuntime.ceval.recursion_limit;
 }

 /* the macro Py_EnterRecursiveCall() only calls _Py_CheckRecursiveCall()
@ -595,6 +585,7 @@ int
 _Py_CheckRecursiveCall(const char *where)
 {
    PyThreadState *tstate = PyThreadState_GET();
+    int recursion_limit = _PyRuntime.ceval.recursion_limit;

 #ifdef USE_STACKCHECK
    if (PyOS_CheckStack()) {
@ -603,7 +594,7 @@ _Py_CheckRecursiveCall(const char *where)
        return -1;
    }
 #endif
-    _Py_CheckRecursionLimit = recursion_limit;
+    _PyRuntime.ceval.check_recursion_limit = recursion_limit;
    if (tstate->recursion_critical)
        /* Somebody asked that we don't check for recursion. */
        return 0;
@ -642,13 +633,7 @@ static void restore_and_clear_exc_state(PyThreadState *, PyFrameObject *);
 static int do_raise(PyObject *, PyObject *);
 static int unpack_iterable(PyObject *, int, int, PyObject **);

-/* Records whether tracing is on for any thread.  Counts the number of
-   threads for which tstate->c_tracefunc is non-NULL, so if the value
-   is 0, we know we don't have to check this thread's c_tracefunc.
-   This speeds up the if statement in PyEval_EvalFrameEx() after
-   fast_next_opcode*/
-static int _Py_TracingPossible = 0;
-
+#define _Py_TracingPossible _PyRuntime.ceval.tracing_possible


 PyObject *
@ -779,7 +764,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)

 #define DISPATCH() \
    { \
-        if (!_Py_atomic_load_relaxed(&eval_breaker)) {      \
+        if (!_Py_atomic_load_relaxed(&_PyRuntime.ceval.eval_breaker)) { \
                    FAST_DISPATCH(); \
        } \
        continue; \
@ -827,7 +812,8 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
 /* Code access macros */

 /* The integer overflow is checked by an assertion below. */
-#define INSTR_OFFSET()  (sizeof(_Py_CODEUNIT) * (int)(next_instr - first_instr))
+#define INSTR_OFFSET()  \
+    (sizeof(_Py_CODEUNIT) * (int)(next_instr - first_instr))
 #define NEXTOPARG()  do { \
        _Py_CODEUNIT word = *next_instr; \
        opcode = _Py_OPCODE(word); \
@ -1080,7 +1066,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
           async I/O handler); see Py_AddPendingCall() and
           Py_MakePendingCalls() above. */

-        if (_Py_atomic_load_relaxed(&eval_breaker)) {
+        if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.eval_breaker)) {
            if (_Py_OPCODE(*next_instr) == SETUP_FINALLY ||
                _Py_OPCODE(*next_instr) == YIELD_FROM) {
                /* Two cases where we skip running signal handlers and other
@ -1097,12 +1083,16 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
                */
                goto fast_next_opcode;
            }
-            if (_Py_atomic_load_relaxed(&pendingcalls_to_do)) {
+            if (_Py_atomic_load_relaxed(
+                        &_PyRuntime.ceval.pending.calls_to_do))
+            {
                if (Py_MakePendingCalls() < 0)
                    goto error;
            }
 #ifdef WITH_THREAD
-            if (_Py_atomic_load_relaxed(&gil_drop_request)) {
+            if (_Py_atomic_load_relaxed(
+                        &_PyRuntime.ceval.gil_drop_request))
+            {
                /* Give another thread a chance */
                if (PyThreadState_Swap(NULL) != tstate)
                    Py_FatalError("ceval: tstate mix-up");
@ -1113,7 +1103,9 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
                take_gil(tstate);

                /* Check if we should make a quick exit. */
-                if (_Py_Finalizing && _Py_Finalizing != tstate) {
+                if (_Py_IS_FINALIZING() &&
+                    !_Py_CURRENTLY_FINALIZING(tstate))
+                {
                    drop_gil(tstate);
                    PyThread_exit_thread();
                }
--- a/Python/ceval_gil.h
+++ b/Python/ceval_gil.h
@ -8,20 +8,13 @@

 /* First some general settings */

-/* microseconds (the Python API uses seconds, though) */
-#define DEFAULT_INTERVAL 5000
-static unsigned long gil_interval = DEFAULT_INTERVAL;
-#define INTERVAL (gil_interval >= 1 ? gil_interval : 1)
-
-/* Enable if you want to force the switching of threads at least every `gil_interval` */
-#undef FORCE_SWITCHING
-#define FORCE_SWITCHING
+#define INTERVAL (_PyRuntime.ceval.gil.interval >= 1 ? _PyRuntime.ceval.gil.interval : 1)


 /*
   Notes about the implementation:

-   - The GIL is just a boolean variable (gil_locked) whose access is protected
+   - The GIL is just a boolean variable (locked) whose access is protected
     by a mutex (gil_mutex), and whose changes are signalled by a condition
     variable (gil_cond). gil_mutex is taken for short periods of time,
     and therefore mostly uncontended.
@ -48,7 +41,7 @@ static unsigned long gil_interval = DEFAULT_INTERVAL;
   - When a thread releases the GIL and gil_drop_request is set, that thread
     ensures that another GIL-awaiting thread gets scheduled.
     It does so by waiting on a condition variable (switch_cond) until
-     the value of gil_last_holder is changed to something else than its
+     the value of last_holder is changed to something else than its
     own thread state pointer, indicating that another thread was able to
     take the GIL.

@ -60,11 +53,7 @@ static unsigned long gil_interval = DEFAULT_INTERVAL;
 */

 #include "condvar.h"
-#ifndef Py_HAVE_CONDVAR
-#error You need either a POSIX-compatible or a Windows system!
-#endif

-#define MUTEX_T PyMUTEX_T
 #define MUTEX_INIT(mut) \
    if (PyMUTEX_INIT(&(mut))) { \
        Py_FatalError("PyMUTEX_INIT(" #mut ") failed"); };
@ -78,7 +67,6 @@ static unsigned long gil_interval = DEFAULT_INTERVAL;
    if (PyMUTEX_UNLOCK(&(mut))) { \
        Py_FatalError("PyMUTEX_UNLOCK(" #mut ") failed"); };

-#define COND_T PyCOND_T
 #define COND_INIT(cond) \
    if (PyCOND_INIT(&(cond))) { \
        Py_FatalError("PyCOND_INIT(" #cond ") failed"); };
@ -103,48 +91,36 @@ static unsigned long gil_interval = DEFAULT_INTERVAL;
    } \


+#define DEFAULT_INTERVAL 5000

-/* Whether the GIL is already taken (-1 if uninitialized). This is atomic
-   because it can be read without any lock taken in ceval.c. */
-static _Py_atomic_int gil_locked = {-1};
-/* Number of GIL switches since the beginning. */
-static unsigned long gil_switch_number = 0;
-/* Last PyThreadState holding / having held the GIL. This helps us know
-   whether anyone else was scheduled after we dropped the GIL. */
-static _Py_atomic_address gil_last_holder = {0};
-
-/* This condition variable allows one or several threads to wait until
-   the GIL is released. In addition, the mutex also protects the above
-   variables. */
-static COND_T gil_cond;
-static MUTEX_T gil_mutex;
-
-#ifdef FORCE_SWITCHING
-/* This condition variable helps the GIL-releasing thread wait for
-   a GIL-awaiting thread to be scheduled and take the GIL. */
-static COND_T switch_cond;
-static MUTEX_T switch_mutex;
-#endif
-
+static void _gil_initialize(struct _gil_runtime_state *state)
+{
+    _Py_atomic_int uninitialized = {-1};
+    state->locked = uninitialized;
+    state->interval = DEFAULT_INTERVAL;
+}

 static int gil_created(void)
 {
-    return _Py_atomic_load_explicit(&gil_locked, _Py_memory_order_acquire) >= 0;
+    return (_Py_atomic_load_explicit(&_PyRuntime.ceval.gil.locked,
+                                     _Py_memory_order_acquire)
+            ) >= 0;
 }

 static void create_gil(void)
 {
-    MUTEX_INIT(gil_mutex);
+    MUTEX_INIT(_PyRuntime.ceval.gil.mutex);
 #ifdef FORCE_SWITCHING
-    MUTEX_INIT(switch_mutex);
+    MUTEX_INIT(_PyRuntime.ceval.gil.switch_mutex);
 #endif
-    COND_INIT(gil_cond);
+    COND_INIT(_PyRuntime.ceval.gil.cond);
 #ifdef FORCE_SWITCHING
-    COND_INIT(switch_cond);
+    COND_INIT(_PyRuntime.ceval.gil.switch_cond);
 #endif
-    _Py_atomic_store_relaxed(&gil_last_holder, 0);
-    _Py_ANNOTATE_RWLOCK_CREATE(&gil_locked);
-    _Py_atomic_store_explicit(&gil_locked, 0, _Py_memory_order_release);
+    _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.last_holder, 0);
+    _Py_ANNOTATE_RWLOCK_CREATE(&_PyRuntime.ceval.gil.locked);
+    _Py_atomic_store_explicit(&_PyRuntime.ceval.gil.locked, 0,
+                              _Py_memory_order_release);
 }

 static void destroy_gil(void)
@ -152,54 +128,62 @@ static void destroy_gil(void)
    /* some pthread-like implementations tie the mutex to the cond
     * and must have the cond destroyed first.
     */
-    COND_FINI(gil_cond);
-    MUTEX_FINI(gil_mutex);
+    COND_FINI(_PyRuntime.ceval.gil.cond);
+    MUTEX_FINI(_PyRuntime.ceval.gil.mutex);
 #ifdef FORCE_SWITCHING
-    COND_FINI(switch_cond);
-    MUTEX_FINI(switch_mutex);
+    COND_FINI(_PyRuntime.ceval.gil.switch_cond);
+    MUTEX_FINI(_PyRuntime.ceval.gil.switch_mutex);
 #endif
-    _Py_atomic_store_explicit(&gil_locked, -1, _Py_memory_order_release);
-    _Py_ANNOTATE_RWLOCK_DESTROY(&gil_locked);
+    _Py_atomic_store_explicit(&_PyRuntime.ceval.gil.locked, -1,
+                              _Py_memory_order_release);
+    _Py_ANNOTATE_RWLOCK_DESTROY(&_PyRuntime.ceval.gil.locked);
 }

 static void recreate_gil(void)
 {
-    _Py_ANNOTATE_RWLOCK_DESTROY(&gil_locked);
+    _Py_ANNOTATE_RWLOCK_DESTROY(&_PyRuntime.ceval.gil.locked);
    /* XXX should we destroy the old OS resources here? */
    create_gil();
 }

 static void drop_gil(PyThreadState *tstate)
 {
-    if (!_Py_atomic_load_relaxed(&gil_locked))
+    if (!_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked))
        Py_FatalError("drop_gil: GIL is not locked");
    /* tstate is allowed to be NULL (early interpreter init) */
    if (tstate != NULL) {
        /* Sub-interpreter support: threads might have been switched
           under our feet using PyThreadState_Swap(). Fix the GIL last
           holder variable so that our heuristics work. */
-        _Py_atomic_store_relaxed(&gil_last_holder, (uintptr_t)tstate);
+        _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.last_holder,
+                                 (uintptr_t)tstate);
    }

-    MUTEX_LOCK(gil_mutex);
-    _Py_ANNOTATE_RWLOCK_RELEASED(&gil_locked, /*is_write=*/1);
-    _Py_atomic_store_relaxed(&gil_locked, 0);
-    COND_SIGNAL(gil_cond);
-    MUTEX_UNLOCK(gil_mutex);
+    MUTEX_LOCK(_PyRuntime.ceval.gil.mutex);
+    _Py_ANNOTATE_RWLOCK_RELEASED(&_PyRuntime.ceval.gil.locked, /*is_write=*/1);
+    _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.locked, 0);
+    COND_SIGNAL(_PyRuntime.ceval.gil.cond);
+    MUTEX_UNLOCK(_PyRuntime.ceval.gil.mutex);

 #ifdef FORCE_SWITCHING
-    if (_Py_atomic_load_relaxed(&gil_drop_request) && tstate != NULL) {
-        MUTEX_LOCK(switch_mutex);
+    if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil_drop_request) &&
+        tstate != NULL)
+    {
+        MUTEX_LOCK(_PyRuntime.ceval.gil.switch_mutex);
        /* Not switched yet => wait */
-        if ((PyThreadState*)_Py_atomic_load_relaxed(&gil_last_holder) == tstate) {
+        if (((PyThreadState*)_Py_atomic_load_relaxed(
+                    &_PyRuntime.ceval.gil.last_holder)
+            ) == tstate)
+        {
        RESET_GIL_DROP_REQUEST();
            /* NOTE: if COND_WAIT does not atomically start waiting when
               releasing the mutex, another thread can run through, take
               the GIL and drop it again, and reset the condition
               before we even had a chance to wait for it. */
-            COND_WAIT(switch_cond, switch_mutex);
+            COND_WAIT(_PyRuntime.ceval.gil.switch_cond,
+                      _PyRuntime.ceval.gil.switch_mutex);
    }
-        MUTEX_UNLOCK(switch_mutex);
+        MUTEX_UNLOCK(_PyRuntime.ceval.gil.switch_mutex);
    }
 #endif
 }
@ -211,60 +195,65 @@ static void take_gil(PyThreadState *tstate)
        Py_FatalError("take_gil: NULL tstate");

    err = errno;
-    MUTEX_LOCK(gil_mutex);
+    MUTEX_LOCK(_PyRuntime.ceval.gil.mutex);

-    if (!_Py_atomic_load_relaxed(&gil_locked))
+    if (!_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked))
        goto _ready;

-    while (_Py_atomic_load_relaxed(&gil_locked)) {
+    while (_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked)) {
        int timed_out = 0;
        unsigned long saved_switchnum;

-        saved_switchnum = gil_switch_number;
-        COND_TIMED_WAIT(gil_cond, gil_mutex, INTERVAL, timed_out);
+        saved_switchnum = _PyRuntime.ceval.gil.switch_number;
+        COND_TIMED_WAIT(_PyRuntime.ceval.gil.cond, _PyRuntime.ceval.gil.mutex,
+                        INTERVAL, timed_out);
        /* If we timed out and no switch occurred in the meantime, it is time
           to ask the GIL-holding thread to drop it. */
        if (timed_out &&
-            _Py_atomic_load_relaxed(&gil_locked) &&
-            gil_switch_number == saved_switchnum) {
+            _Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked) &&
+            _PyRuntime.ceval.gil.switch_number == saved_switchnum) {
            SET_GIL_DROP_REQUEST();
        }
    }
 _ready:
 #ifdef FORCE_SWITCHING
-    /* This mutex must be taken before modifying gil_last_holder (see drop_gil()). */
-    MUTEX_LOCK(switch_mutex);
+    /* This mutex must be taken before modifying
+       _PyRuntime.ceval.gil.last_holder (see drop_gil()). */
+    MUTEX_LOCK(_PyRuntime.ceval.gil.switch_mutex);
 #endif
    /* We now hold the GIL */
-    _Py_atomic_store_relaxed(&gil_locked, 1);
-    _Py_ANNOTATE_RWLOCK_ACQUIRED(&gil_locked, /*is_write=*/1);
+    _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.locked, 1);
+    _Py_ANNOTATE_RWLOCK_ACQUIRED(&_PyRuntime.ceval.gil.locked, /*is_write=*/1);

-    if (tstate != (PyThreadState*)_Py_atomic_load_relaxed(&gil_last_holder)) {
-        _Py_atomic_store_relaxed(&gil_last_holder, (uintptr_t)tstate);
-        ++gil_switch_number;
+    if (tstate != (PyThreadState*)_Py_atomic_load_relaxed(
+                    &_PyRuntime.ceval.gil.last_holder))
+    {
+        _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.last_holder,
+                                 (uintptr_t)tstate);
+        ++_PyRuntime.ceval.gil.switch_number;
    }

 #ifdef FORCE_SWITCHING
-    COND_SIGNAL(switch_cond);
-    MUTEX_UNLOCK(switch_mutex);
+    COND_SIGNAL(_PyRuntime.ceval.gil.switch_cond);
+    MUTEX_UNLOCK(_PyRuntime.ceval.gil.switch_mutex);
 #endif
-    if (_Py_atomic_load_relaxed(&gil_drop_request)) {
+    if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil_drop_request)) {
        RESET_GIL_DROP_REQUEST();
    }
    if (tstate->async_exc != NULL) {
        _PyEval_SignalAsyncExc();
    }

-    MUTEX_UNLOCK(gil_mutex);
+    MUTEX_UNLOCK(_PyRuntime.ceval.gil.mutex);
    errno = err;
 }

 void _PyEval_SetSwitchInterval(unsigned long microseconds)
 {
-    gil_interval = microseconds;
+    _PyRuntime.ceval.gil.interval = microseconds;
 }

 unsigned long _PyEval_GetSwitchInterval()
 {
-    return gil_interval;
+    return _PyRuntime.ceval.gil.interval;
 }
--- a/Python/condvar.h
+++ b/Python/condvar.h
@ -37,27 +37,16 @@
 *    Condition Variable.
 */

-#ifndef _CONDVAR_H_
-#define _CONDVAR_H_
+#ifndef _CONDVAR_IMPL_H_
+#define _CONDVAR_IMPL_H_

 #include "Python.h"
-
-#ifndef _POSIX_THREADS
-/* This means pthreads are not implemented in libc headers, hence the macro
-   not present in unistd.h. But they still can be implemented as an external
-   library (e.g. gnu pth in pthread emulation) */
-# ifdef HAVE_PTHREAD_H
-#  include <pthread.h> /* _POSIX_THREADS */
-# endif
-#endif
+#include "internal/_condvar.h"

 #ifdef _POSIX_THREADS
 /*
 * POSIX support
 */
-#define Py_HAVE_CONDVAR
-
-#include <pthread.h>

 #define PyCOND_ADD_MICROSECONDS(tv, interval) \
 do { /* TODO: add overflow and truncation checks */ \
@ -74,13 +63,11 @@ do { /* TODO: add overflow and truncation checks */ \
 #endif

 /* The following functions return 0 on success, nonzero on error */
-#define PyMUTEX_T pthread_mutex_t
 #define PyMUTEX_INIT(mut)       pthread_mutex_init((mut), NULL)
 #define PyMUTEX_FINI(mut)       pthread_mutex_destroy(mut)
 #define PyMUTEX_LOCK(mut)       pthread_mutex_lock(mut)
 #define PyMUTEX_UNLOCK(mut)     pthread_mutex_unlock(mut)

-#define PyCOND_T pthread_cond_t
 #define PyCOND_INIT(cond)       pthread_cond_init((cond), NULL)
 #define PyCOND_FINI(cond)       pthread_cond_destroy(cond)
 #define PyCOND_SIGNAL(cond)     pthread_cond_signal(cond)
@ -116,45 +103,11 @@ PyCOND_TIMEDWAIT(PyCOND_T *cond, PyMUTEX_T *mut, long long us)
 * Emulated condition variables ones that work with XP and later, plus
 * example native support on VISTA and onwards.
 */
-#define Py_HAVE_CONDVAR
-
-
-/* include windows if it hasn't been done before */
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-
-/* options */
-/* non-emulated condition variables are provided for those that want
- * to target Windows Vista.  Modify this macro to enable them.
- */
-#ifndef _PY_EMULATED_WIN_CV
-#define _PY_EMULATED_WIN_CV 1  /* use emulated condition variables */
-#endif
-
-/* fall back to emulation if not targeting Vista */
-#if !defined NTDDI_VISTA || NTDDI_VERSION < NTDDI_VISTA
-#undef _PY_EMULATED_WIN_CV
-#define _PY_EMULATED_WIN_CV 1
-#endif
-

 #if _PY_EMULATED_WIN_CV

 /* The mutex is a CriticalSection object and
   The condition variables is emulated with the help of a semaphore.
-   Semaphores are available on Windows XP (2003 server) and later.
-   We use a Semaphore rather than an auto-reset event, because although
-   an auto-resent event might appear to solve the lost-wakeup bug (race
-   condition between releasing the outer lock and waiting) because it
-   maintains state even though a wait hasn't happened, there is still
-   a lost wakeup problem if more than one thread are interrupted in the
-   critical place.  A semaphore solves that, because its state is counted,
-   not Boolean.
-   Because it is ok to signal a condition variable with no one
-   waiting, we need to keep track of the number of
-   waiting threads.  Otherwise, the semaphore's state could rise
-   without bound.  This also helps reduce the number of "spurious wakeups"
-   that would otherwise happen.

   This implementation still has the problem that the threads woken
   with a "signal" aren't necessarily those that are already
@ -168,8 +121,6 @@ PyCOND_TIMEDWAIT(PyCOND_T *cond, PyMUTEX_T *mut, long long us)
   http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
 */

-typedef CRITICAL_SECTION PyMUTEX_T;
-
 Py_LOCAL_INLINE(int)
 PyMUTEX_INIT(PyMUTEX_T *cs)
 {
@ -198,15 +149,6 @@ PyMUTEX_UNLOCK(PyMUTEX_T *cs)
    return 0;
 }

-/* The ConditionVariable object.  From XP onwards it is easily emulated with
- * a Semaphore
- */
-
-typedef struct _PyCOND_T
-{
-    HANDLE sem;
-    int waiting; /* to allow PyCOND_SIGNAL to be a no-op */
-} PyCOND_T;

 Py_LOCAL_INLINE(int)
 PyCOND_INIT(PyCOND_T *cv)
@ -304,12 +246,7 @@ PyCOND_BROADCAST(PyCOND_T *cv)
    return 0;
 }

-#else
-
-/* Use native Win7 primitives if build target is Win7 or higher */
-
-/* SRWLOCK is faster and better than CriticalSection */
-typedef SRWLOCK PyMUTEX_T;
+#else /* !_PY_EMULATED_WIN_CV */

 Py_LOCAL_INLINE(int)
 PyMUTEX_INIT(PyMUTEX_T *cs)
@ -339,8 +276,6 @@ PyMUTEX_UNLOCK(PyMUTEX_T *cs)
 }


-typedef CONDITION_VARIABLE  PyCOND_T;
-
 Py_LOCAL_INLINE(int)
 PyCOND_INIT(PyCOND_T *cv)
 {
@ -387,4 +322,4 @@ PyCOND_BROADCAST(PyCOND_T *cv)

 #endif /* _POSIX_THREADS, NT_THREADS */

-#endif /* _CONDVAR_H_ */
+#endif /* _CONDVAR_IMPL_H_ */
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@ -77,6 +77,30 @@ extern void _PyGILState_Init(PyInterpreterState *, PyThreadState *);
 extern void _PyGILState_Fini(void);
 #endif /* WITH_THREAD */

+_PyRuntimeState _PyRuntime = {};
+
+void
+_PyRuntime_Initialize(void)
+{
+    /* XXX We only initialize once in the process, which aligns with
+       the static initialization of the former globals now found in
+       _PyRuntime.  However, _PyRuntime *should* be initialized with
+       every Py_Initialize() call, but doing so breaks the runtime.
+       This is because the runtime state is not properly finalized
+       currently. */
+    static int initialized = 0;
+    if (initialized)
+        return;
+    initialized = 1;
+    _PyRuntimeState_Init(&_PyRuntime);
+}
+
+void
+_PyRuntime_Finalize(void)
+{
+    _PyRuntimeState_Fini(&_PyRuntime);
+}
+
 /* Global configuration variable declarations are in pydebug.h */
 /* XXX (ncoghlan): move those declarations to pylifecycle.h? */
 int Py_DebugFlag; /* Needed by parser.c */
@ -100,8 +124,6 @@ int Py_LegacyWindowsFSEncodingFlag = 0; /* Uses mbcs instead of utf-8 */
 int Py_LegacyWindowsStdioFlag = 0; /* Uses FileIO instead of WindowsConsoleIO */
 #endif

-PyThreadState *_Py_Finalizing = NULL;
-
 /* Hack to force loading of object files */
 int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t) = \
    PyOS_mystrnicmp; /* Python/pystrcmp.o */
@ -119,19 +141,17 @@ PyModule_GetWarningsModule(void)
 *
 * Can be called prior to Py_Initialize.
 */
-int _Py_CoreInitialized = 0;
-int _Py_Initialized = 0;

 int
 _Py_IsCoreInitialized(void)
 {
-    return _Py_CoreInitialized;
+    return _PyRuntime.core_initialized;
 }

 int
 Py_IsInitialized(void)
 {
-    return _Py_Initialized;
+    return _PyRuntime.initialized;
 }

 /* Helper to allow an embedding application to override the normal
@ -544,14 +564,16 @@ void _Py_InitializeCore(const _PyCoreConfig *config)
    _PyCoreConfig core_config = _PyCoreConfig_INIT;
    _PyMainInterpreterConfig preinit_config = _PyMainInterpreterConfig_INIT;

+    _PyRuntime_Initialize();
+
    if (config != NULL) {
        core_config = *config;
    }

-    if (_Py_Initialized) {
+    if (_PyRuntime.initialized) {
        Py_FatalError("Py_InitializeCore: main interpreter already initialized");
    }
-    if (_Py_CoreInitialized) {
+    if (_PyRuntime.core_initialized) {
        Py_FatalError("Py_InitializeCore: runtime core already initialized");
    }

@ -564,7 +586,14 @@ void _Py_InitializeCore(const _PyCoreConfig *config)
     * threads still hanging around from a previous Py_Initialize/Finalize
     * pair :(
     */
-    _Py_Finalizing = NULL;
+    _PyRuntime.finalizing = NULL;
+
+    if (_PyMem_SetupAllocators(core_config.allocator) < 0) {
+        fprintf(stderr,
+            "Error in PYTHONMALLOC: unknown allocator \"%s\"!\n",
+            core_config.allocator);
+        exit(1);
+    }

 #ifdef __ANDROID__
    /* Passing "" to setlocale() on Android requests the C locale rather
@ -606,7 +635,7 @@ void _Py_InitializeCore(const _PyCoreConfig *config)
        Py_HashRandomizationFlag = 1;
    }

-    _PyInterpreterState_Init();
+    _PyInterpreterState_Enable(&_PyRuntime);
    interp = PyInterpreterState_New();
    if (interp == NULL)
        Py_FatalError("Py_InitializeCore: can't make main interpreter");
@ -698,7 +727,7 @@ void _Py_InitializeCore(const _PyCoreConfig *config)
    }

    /* Only when we get here is the runtime core fully initialized */
-    _Py_CoreInitialized = 1;
+    _PyRuntime.core_initialized = 1;
 }

 /* Read configuration settings from standard locations
@ -739,10 +768,10 @@ int _Py_InitializeMainInterpreter(const _PyMainInterpreterConfig *config)
    PyInterpreterState *interp;
    PyThreadState *tstate;

-    if (!_Py_CoreInitialized) {
+    if (!_PyRuntime.core_initialized) {
        Py_FatalError("Py_InitializeMainInterpreter: runtime core not initialized");
    }
-    if (_Py_Initialized) {
+    if (_PyRuntime.initialized) {
        Py_FatalError("Py_InitializeMainInterpreter: main interpreter already initialized");
    }

@ -763,7 +792,7 @@ int _Py_InitializeMainInterpreter(const _PyMainInterpreterConfig *config)
         * This means anything which needs support from extension modules
         * or pure Python code in the standard library won't work.
         */
-        _Py_Initialized = 1;
+        _PyRuntime.initialized = 1;
        return 0;
    }
    /* TODO: Report exceptions rather than fatal errors below here */
@ -808,7 +837,7 @@ int _Py_InitializeMainInterpreter(const _PyMainInterpreterConfig *config)
        Py_XDECREF(warnings_module);
    }

-    _Py_Initialized = 1;
+    _PyRuntime.initialized = 1;

    if (!Py_NoSiteFlag)
        initsite(); /* Module site */
@ -924,7 +953,7 @@ Py_FinalizeEx(void)
    PyThreadState *tstate;
    int status = 0;

-    if (!_Py_Initialized)
+    if (!_PyRuntime.initialized)
        return status;

    wait_for_thread_shutdown();
@ -946,9 +975,9 @@ Py_FinalizeEx(void)

    /* Remaining threads (e.g. daemon threads) will automatically exit
       after taking the GIL (in PyEval_RestoreThread()). */
-    _Py_Finalizing = tstate;
-    _Py_Initialized = 0;
-    _Py_CoreInitialized = 0;
+    _PyRuntime.finalizing = tstate;
+    _PyRuntime.initialized = 0;
+    _PyRuntime.core_initialized = 0;

    /* Flush sys.stdout and sys.stderr */
    if (flush_std_files() < 0) {
@ -1110,6 +1139,7 @@ Py_FinalizeEx(void)
 #endif

    call_ll_exitfuncs();
+    _PyRuntime_Finalize();
    return status;
 }

@ -1139,7 +1169,7 @@ Py_NewInterpreter(void)
    PyThreadState *tstate, *save_tstate;
    PyObject *bimod, *sysmod;

-    if (!_Py_Initialized)
+    if (!_PyRuntime.initialized)
        Py_FatalError("Py_NewInterpreter: call Py_Initialize first");

 #ifdef WITH_THREAD
@ -1854,20 +1884,19 @@ exit:
 #  include "pythread.h"
 #endif

-static void (*pyexitfunc)(void) = NULL;
 /* For the atexit module. */
 void _Py_PyAtExit(void (*func)(void))
 {
-    pyexitfunc = func;
+    _PyRuntime.pyexitfunc = func;
 }

 static void
 call_py_exitfuncs(void)
 {
-    if (pyexitfunc == NULL)
+    if (_PyRuntime.pyexitfunc == NULL)
        return;

-    (*pyexitfunc)();
+    (*_PyRuntime.pyexitfunc)();
    PyErr_Clear();
 }

@ -1900,22 +1929,19 @@ wait_for_thread_shutdown(void)
 }

 #define NEXITFUNCS 32
-static void (*exitfuncs[NEXITFUNCS])(void);
-static int nexitfuncs = 0;
-
 int Py_AtExit(void (*func)(void))
 {
-    if (nexitfuncs >= NEXITFUNCS)
+    if (_PyRuntime.nexitfuncs >= NEXITFUNCS)
        return -1;
-    exitfuncs[nexitfuncs++] = func;
+    _PyRuntime.exitfuncs[_PyRuntime.nexitfuncs++] = func;
    return 0;
 }

 static void
 call_ll_exitfuncs(void)
 {
-    while (nexitfuncs > 0)
-        (*exitfuncs[--nexitfuncs])();
+    while (_PyRuntime.nexitfuncs > 0)
+        (*_PyRuntime.exitfuncs[--_PyRuntime.nexitfuncs])();

    fflush(stdout);
    fflush(stderr);
--- a/Python/pystate.c
+++ b/Python/pystate.c
@ -34,55 +34,66 @@ to avoid the expense of doing their own locking).
 extern "C" {
 #endif

-int _PyGILState_check_enabled = 1;
+void
+_PyRuntimeState_Init(_PyRuntimeState *runtime)
+{
+    _PyRuntimeState initial = {};
+    *runtime = initial;
+
+    _PyObject_Initialize(&runtime->obj);
+    _PyMem_Initialize(&runtime->mem);
+    _PyGC_Initialize(&runtime->gc);
+    _PyEval_Initialize(&runtime->ceval);
+
+    runtime->gilstate.check_enabled = 1;
+    runtime->gilstate.autoTLSkey = -1;

 #ifdef WITH_THREAD
-#include "pythread.h"
-static PyThread_type_lock head_mutex = NULL; /* Protects interp->tstate_head */
-#define HEAD_INIT() (void)(head_mutex || (head_mutex = PyThread_allocate_lock()))
-#define HEAD_LOCK() PyThread_acquire_lock(head_mutex, WAIT_LOCK)
-#define HEAD_UNLOCK() PyThread_release_lock(head_mutex)
+    runtime->interpreters.mutex = PyThread_allocate_lock();
+    if (runtime->interpreters.mutex == NULL)
+        Py_FatalError("Can't initialize threads for interpreter");
+#endif
+    runtime->interpreters.next_id = -1;
+}

-/* The single PyInterpreterState used by this process'
-   GILState implementation
-*/
-/* TODO: Given interp_main, it may be possible to kill this ref */
-static PyInterpreterState *autoInterpreterState = NULL;
-static int autoTLSkey = -1;
+void
+_PyRuntimeState_Fini(_PyRuntimeState *runtime)
+{
+#ifdef WITH_THREAD
+    if (runtime->interpreters.mutex != NULL) {
+        PyThread_free_lock(runtime->interpreters.mutex);
+        runtime->interpreters.mutex = NULL;
+    }
+#endif
+}
+
+#ifdef WITH_THREAD
+#define HEAD_LOCK() PyThread_acquire_lock(_PyRuntime.interpreters.mutex, \
+                                          WAIT_LOCK)
+#define HEAD_UNLOCK() PyThread_release_lock(_PyRuntime.interpreters.mutex)
 #else
-#define HEAD_INIT() /* Nothing */
 #define HEAD_LOCK() /* Nothing */
 #define HEAD_UNLOCK() /* Nothing */
 #endif

-static PyInterpreterState *interp_head = NULL;
-static PyInterpreterState *interp_main = NULL;
-
-/* Assuming the current thread holds the GIL, this is the
-   PyThreadState for the current thread. */
-_Py_atomic_address _PyThreadState_Current = {0};
-PyThreadFrameGetter _PyThreadState_GetFrame = NULL;
-
 #ifdef WITH_THREAD
 static void _PyGILState_NoteThreadState(PyThreadState* tstate);
 #endif

-/* _next_interp_id is an auto-numbered sequence of small integers.
-   It gets initialized in _PyInterpreterState_Init(), which is called
-   in Py_Initialize(), and used in PyInterpreterState_New().  A negative
-   interpreter ID indicates an error occurred.  The main interpreter
-   will always have an ID of 0.  Overflow results in a RuntimeError.
-   If that becomes a problem later then we can adjust, e.g. by using
-   a Python int.
-
-   We initialize this to -1 so that the pre-Py_Initialize() value
-   results in an error. */
-static int64_t _next_interp_id = -1;
-
 void
-_PyInterpreterState_Init(void)
+_PyInterpreterState_Enable(_PyRuntimeState *runtime)
 {
-    _next_interp_id = 0;
+    runtime->interpreters.next_id = 0;
+#ifdef WITH_THREAD
+    /* Since we only call _PyRuntimeState_Init() once per process
+       (see _PyRuntime_Initialize()), we make sure the mutex is
+       initialized here. */
+    if (runtime->interpreters.mutex == NULL) {
+        runtime->interpreters.mutex = PyThread_allocate_lock();
+        if (runtime->interpreters.mutex == NULL)
+            Py_FatalError("Can't initialize threads for interpreter");
+    }
+#endif
 }

 PyInterpreterState *
@ -92,16 +103,16 @@ PyInterpreterState_New(void)
                                 PyMem_RawMalloc(sizeof(PyInterpreterState));

    if (interp != NULL) {
-        HEAD_INIT();
-#ifdef WITH_THREAD
-        if (head_mutex == NULL)
-            Py_FatalError("Can't initialize threads for interpreter");
-#endif
        interp->modules_by_index = NULL;
        interp->sysdict = NULL;
        interp->builtins = NULL;
        interp->builtins_copy = NULL;
        interp->tstate_head = NULL;
+        interp->check_interval = 100;
+        interp->warnoptions = NULL;
+        interp->xoptions = NULL;
+        interp->num_threads = 0;
+        interp->pythread_stacksize = 0;
        interp->codec_search_path = NULL;
        interp->codec_search_cache = NULL;
        interp->codec_error_registry = NULL;
@ -125,19 +136,19 @@ PyInterpreterState_New(void)
 #endif

        HEAD_LOCK();
-        interp->next = interp_head;
-        if (interp_main == NULL) {
-            interp_main = interp;
+        interp->next = _PyRuntime.interpreters.head;
+        if (_PyRuntime.interpreters.main == NULL) {
+            _PyRuntime.interpreters.main = interp;
        }
-        interp_head = interp;
-        if (_next_interp_id < 0) {
+        _PyRuntime.interpreters.head = interp;
+        if (_PyRuntime.interpreters.next_id < 0) {
            /* overflow or Py_Initialize() not called! */
            PyErr_SetString(PyExc_RuntimeError,
                            "failed to get an interpreter ID");
            interp = NULL;
        } else {
-            interp->id = _next_interp_id;
-            _next_interp_id += 1;
+            interp->id = _PyRuntime.interpreters.next_id;
+            _PyRuntime.interpreters.next_id += 1;
        }
        HEAD_UNLOCK();
    }
@ -189,7 +200,7 @@ PyInterpreterState_Delete(PyInterpreterState *interp)
    PyInterpreterState **p;
    zapthreads(interp);
    HEAD_LOCK();
-    for (p = &interp_head; ; p = &(*p)->next) {
+    for (p = &_PyRuntime.interpreters.head; ; p = &(*p)->next) {
        if (*p == NULL)
            Py_FatalError(
                "PyInterpreterState_Delete: invalid interp");
@ -199,19 +210,13 @@ PyInterpreterState_Delete(PyInterpreterState *interp)
    if (interp->tstate_head != NULL)
        Py_FatalError("PyInterpreterState_Delete: remaining threads");
    *p = interp->next;
-    if (interp_main == interp) {
-        interp_main = NULL;
-        if (interp_head != NULL)
+    if (_PyRuntime.interpreters.main == interp) {
+        _PyRuntime.interpreters.main = NULL;
+        if (_PyRuntime.interpreters.head != NULL)
            Py_FatalError("PyInterpreterState_Delete: remaining subinterpreters");
    }
    HEAD_UNLOCK();
    PyMem_RawFree(interp);
-#ifdef WITH_THREAD
-    if (interp_head == NULL && head_mutex != NULL) {
-        PyThread_free_lock(head_mutex);
-        head_mutex = NULL;
-    }
-#endif
 }


@ -499,8 +504,11 @@ PyThreadState_Delete(PyThreadState *tstate)
    if (tstate == GET_TSTATE())
        Py_FatalError("PyThreadState_Delete: tstate is still current");
 #ifdef WITH_THREAD
-    if (autoInterpreterState && PyThread_get_key_value(autoTLSkey) == tstate)
-        PyThread_delete_key_value(autoTLSkey);
+    if (_PyRuntime.gilstate.autoInterpreterState &&
+        PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == tstate)
+    {
+        PyThread_delete_key_value(_PyRuntime.gilstate.autoTLSkey);
+    }
 #endif /* WITH_THREAD */
    tstate_delete_common(tstate);
 }
@ -515,8 +523,11 @@ PyThreadState_DeleteCurrent()
        Py_FatalError(
            "PyThreadState_DeleteCurrent: no current tstate");
    tstate_delete_common(tstate);
-    if (autoInterpreterState && PyThread_get_key_value(autoTLSkey) == tstate)
-        PyThread_delete_key_value(autoTLSkey);
+    if (_PyRuntime.gilstate.autoInterpreterState &&
+        PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == tstate)
+    {
+        PyThread_delete_key_value(_PyRuntime.gilstate.autoTLSkey);
+    }
    SET_TSTATE(NULL);
    PyEval_ReleaseLock();
 }
@ -676,13 +687,13 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc)
 PyInterpreterState *
 PyInterpreterState_Head(void)
 {
-    return interp_head;
+    return _PyRuntime.interpreters.head;
 }

 PyInterpreterState *
 PyInterpreterState_Main(void)
 {
-    return interp_main;
+    return _PyRuntime.interpreters.main;
 }

 PyInterpreterState *
@ -722,7 +733,7 @@ _PyThread_CurrentFrames(void)
     * need to grab head_mutex for the duration.
     */
    HEAD_LOCK();
-    for (i = interp_head; i != NULL; i = i->next) {
+    for (i = _PyRuntime.interpreters.head; i != NULL; i = i->next) {
        PyThreadState *t;
        for (t = i->tstate_head; t != NULL; t = t->next) {
            PyObject *id;
@ -774,11 +785,11 @@ void
 _PyGILState_Init(PyInterpreterState *i, PyThreadState *t)
 {
    assert(i && t); /* must init with valid states */
-    autoTLSkey = PyThread_create_key();
-    if (autoTLSkey == -1)
+    _PyRuntime.gilstate.autoTLSkey = PyThread_create_key();
+    if (_PyRuntime.gilstate.autoTLSkey == -1)
        Py_FatalError("Could not allocate TLS entry");
-    autoInterpreterState = i;
-    assert(PyThread_get_key_value(autoTLSkey) == NULL);
+    _PyRuntime.gilstate.autoInterpreterState = i;
+    assert(PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == NULL);
    assert(t->gilstate_counter == 0);

    _PyGILState_NoteThreadState(t);
@ -787,15 +798,15 @@ _PyGILState_Init(PyInterpreterState *i, PyThreadState *t)
 PyInterpreterState *
 _PyGILState_GetInterpreterStateUnsafe(void)
 {
-    return autoInterpreterState;
+    return _PyRuntime.gilstate.autoInterpreterState;
 }

 void
 _PyGILState_Fini(void)
 {
-    PyThread_delete_key(autoTLSkey);
-    autoTLSkey = -1;
-    autoInterpreterState = NULL;
+    PyThread_delete_key(_PyRuntime.gilstate.autoTLSkey);
+    _PyRuntime.gilstate.autoTLSkey = -1;
+    _PyRuntime.gilstate.autoInterpreterState = NULL;
 }

 /* Reset the TLS key - called by PyOS_AfterFork_Child().
@ -806,17 +817,19 @@ void
 _PyGILState_Reinit(void)
 {
 #ifdef WITH_THREAD
-    head_mutex = NULL;
-    HEAD_INIT();
+    _PyRuntime.interpreters.mutex = PyThread_allocate_lock();
+    if (_PyRuntime.interpreters.mutex == NULL)
+        Py_FatalError("Can't initialize threads for interpreter");
 #endif
    PyThreadState *tstate = PyGILState_GetThisThreadState();
-    PyThread_delete_key(autoTLSkey);
-    if ((autoTLSkey = PyThread_create_key()) == -1)
+    PyThread_delete_key(_PyRuntime.gilstate.autoTLSkey);
+    if ((_PyRuntime.gilstate.autoTLSkey = PyThread_create_key()) == -1)
        Py_FatalError("Could not allocate TLS entry");

    /* If the thread had an associated auto thread state, reassociate it with
     * the new key. */
-    if (tstate && PyThread_set_key_value(autoTLSkey, (void *)tstate) < 0)
+    if (tstate && PyThread_set_key_value(_PyRuntime.gilstate.autoTLSkey,
+                                         (void *)tstate) < 0)
        Py_FatalError("Couldn't create autoTLSkey mapping");
 }

@ -831,7 +844,7 @@ _PyGILState_NoteThreadState(PyThreadState* tstate)
    /* If autoTLSkey isn't initialized, this must be the very first
       threadstate created in Py_Initialize().  Don't do anything for now
       (we'll be back here when _PyGILState_Init is called). */
-    if (!autoInterpreterState)
+    if (!_PyRuntime.gilstate.autoInterpreterState)
        return;

    /* Stick the thread state for this thread in thread local storage.
@ -846,10 +859,14 @@ _PyGILState_NoteThreadState(PyThreadState* tstate)
       The first thread state created for that given OS level thread will
       "win", which seems reasonable behaviour.
    */
-    if (PyThread_get_key_value(autoTLSkey) == NULL) {
-        if (PyThread_set_key_value(autoTLSkey, (void *)tstate) < 0)
+    if (PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == NULL) {
+        if ((PyThread_set_key_value(_PyRuntime.gilstate.autoTLSkey,
+                                    (void *)tstate)
+             ) < 0)
+        {
            Py_FatalError("Couldn't create autoTLSkey mapping");
        }
+    }

    /* PyGILState_Release must not try to delete this thread state. */
    tstate->gilstate_counter = 1;
@ -859,9 +876,10 @@ _PyGILState_NoteThreadState(PyThreadState* tstate)
 PyThreadState *
 PyGILState_GetThisThreadState(void)
 {
-    if (autoInterpreterState == NULL)
+    if (_PyRuntime.gilstate.autoInterpreterState == NULL)
        return NULL;
-    return (PyThreadState *)PyThread_get_key_value(autoTLSkey);
+    return (PyThreadState *)PyThread_get_key_value(
+                _PyRuntime.gilstate.autoTLSkey);
 }

 int
@ -872,7 +890,7 @@ PyGILState_Check(void)
    if (!_PyGILState_check_enabled)
        return 1;

-    if (autoTLSkey == -1)
+    if (_PyRuntime.gilstate.autoTLSkey == -1)
        return 1;

    tstate = GET_TSTATE();
@ -892,8 +910,10 @@ PyGILState_Ensure(void)
       spells out other issues.  Embedders are expected to have
       called Py_Initialize() and usually PyEval_InitThreads().
    */
-    assert(autoInterpreterState); /* Py_Initialize() hasn't been called! */
-    tcur = (PyThreadState *)PyThread_get_key_value(autoTLSkey);
+    /* Py_Initialize() hasn't been called! */
+    assert(_PyRuntime.gilstate.autoInterpreterState);
+    tcur = (PyThreadState *)PyThread_get_key_value(
+                _PyRuntime.gilstate.autoTLSkey);
    if (tcur == NULL) {
        /* At startup, Python has no concrete GIL. If PyGILState_Ensure() is
           called from a new thread for the first time, we need the create the
@ -901,7 +921,7 @@ PyGILState_Ensure(void)
        PyEval_InitThreads();

        /* Create a new thread state for this thread */
-        tcur = PyThreadState_New(autoInterpreterState);
+        tcur = PyThreadState_New(_PyRuntime.gilstate.autoInterpreterState);
        if (tcur == NULL)
            Py_FatalError("Couldn't create thread-state for new thread");
        /* This is our thread state!  We'll need to delete it in the
@ -926,7 +946,7 @@ void
 PyGILState_Release(PyGILState_STATE oldstate)
 {
    PyThreadState *tcur = (PyThreadState *)PyThread_get_key_value(
-                                                            autoTLSkey);
+                                _PyRuntime.gilstate.autoTLSkey);
    if (tcur == NULL)
        Py_FatalError("auto-releasing thread-state, "
                      "but no thread-state for this thread");
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@ -519,8 +519,6 @@ Return the profiling function set with sys.setprofile.\n\
 See the profiler chapter in the library manual."
 );

-static int _check_interval = 100;
-
 static PyObject *
 sys_setcheckinterval(PyObject *self, PyObject *args)
 {
@ -529,7 +527,8 @@ sys_setcheckinterval(PyObject *self, PyObject *args)
                     "are deprecated.  Use sys.setswitchinterval() "
                     "instead.", 1) < 0)
        return NULL;
-    if (!PyArg_ParseTuple(args, "i:setcheckinterval", &_check_interval))
+    PyInterpreterState *interp = PyThreadState_GET()->interp;
+    if (!PyArg_ParseTuple(args, "i:setcheckinterval", &interp->check_interval))
        return NULL;
    Py_RETURN_NONE;
 }
@ -549,7 +548,8 @@ sys_getcheckinterval(PyObject *self, PyObject *args)
                     "are deprecated.  Use sys.getswitchinterval() "
                     "instead.", 1) < 0)
        return NULL;
-    return PyLong_FromLong(_check_interval);
+    PyInterpreterState *interp = PyThreadState_GET()->interp;
+    return PyLong_FromLong(interp->check_interval);
 }

 PyDoc_STRVAR(getcheckinterval_doc,
@ -1339,7 +1339,7 @@ Clear the internal type lookup cache.");
 static PyObject *
 sys_is_finalizing(PyObject* self, PyObject* args)
 {
-    return PyBool_FromLong(_Py_Finalizing != NULL);
+    return PyBool_FromLong(_Py_IS_FINALIZING());
 }

 PyDoc_STRVAR(is_finalizing_doc,
@ -1479,11 +1479,24 @@ list_builtin_module_names(void)
    return list;
 }

-static PyObject *warnoptions = NULL;
+static PyObject *
+get_warnoptions(void)
+{
+    PyObject *warnoptions = PyThreadState_GET()->interp->warnoptions;
+    if (warnoptions == NULL || !PyList_Check(warnoptions)) {
+        Py_XDECREF(warnoptions);
+        warnoptions = PyList_New(0);
+        if (warnoptions == NULL)
+            return NULL;
+        PyThreadState_GET()->interp->warnoptions = warnoptions;
+    }
+    return warnoptions;
+}

 void
 PySys_ResetWarnOptions(void)
 {
+    PyObject *warnoptions = PyThreadState_GET()->interp->warnoptions;
    if (warnoptions == NULL || !PyList_Check(warnoptions))
        return;
    PyList_SetSlice(warnoptions, 0, PyList_GET_SIZE(warnoptions), NULL);
@ -1492,12 +1505,9 @@ PySys_ResetWarnOptions(void)
 void
 PySys_AddWarnOptionUnicode(PyObject *unicode)
 {
-    if (warnoptions == NULL || !PyList_Check(warnoptions)) {
-        Py_XDECREF(warnoptions);
-        warnoptions = PyList_New(0);
+    PyObject *warnoptions = get_warnoptions();
    if (warnoptions == NULL)
        return;
-    }
    PyList_Append(warnoptions, unicode);
 }

@ -1515,17 +1525,20 @@ PySys_AddWarnOption(const wchar_t *s)
 int
 PySys_HasWarnOptions(void)
 {
+    PyObject *warnoptions = PyThreadState_GET()->interp->warnoptions;
    return (warnoptions != NULL && (PyList_Size(warnoptions) > 0)) ? 1 : 0;
 }

-static PyObject *xoptions = NULL;
-
 static PyObject *
 get_xoptions(void)
 {
+    PyObject *xoptions = PyThreadState_GET()->interp->xoptions;
    if (xoptions == NULL || !PyDict_Check(xoptions)) {
        Py_XDECREF(xoptions);
        xoptions = PyDict_New();
+        if (xoptions == NULL)
+            return NULL;
+        PyThreadState_GET()->interp->xoptions = xoptions;
    }
    return xoptions;
 }
@ -2130,17 +2143,15 @@ _PySys_EndInit(PyObject *sysdict)
    SET_SYS_FROM_STRING_INT_RESULT("base_exec_prefix",
                        PyUnicode_FromWideChar(Py_GetExecPrefix(), -1));

-    if (warnoptions == NULL) {
-        warnoptions = PyList_New(0);
+    PyObject *warnoptions = get_warnoptions();
    if (warnoptions == NULL)
        return -1;
-    }
+    SET_SYS_FROM_STRING_BORROW_INT_RESULT("warnoptions", warnoptions);

-    SET_SYS_FROM_STRING_INT_RESULT("warnoptions",
-                                   PyList_GetSlice(warnoptions,
-                                                   0, Py_SIZE(warnoptions)));
-
-    SET_SYS_FROM_STRING_BORROW_INT_RESULT("_xoptions", get_xoptions());
+    PyObject *xoptions = get_xoptions();
+    if (xoptions == NULL)
+        return -1;
+    SET_SYS_FROM_STRING_BORROW_INT_RESULT("_xoptions", xoptions);

    if (PyErr_Occurred())
        return -1;
--- a/Python/thread.c
+++ b/Python/thread.c
@ -76,11 +76,6 @@ PyThread_init_thread(void)
    PyThread__init_thread();
 }

-/* Support for runtime thread stack size tuning.
-   A value of 0 means using the platform's default stack size
-   or the size specified by the THREAD_STACK_SIZE macro. */
-static size_t _pythread_stacksize = 0;
-
 #if defined(_POSIX_THREADS)
 #   define PYTHREAD_NAME "pthread"
 #   include "thread_pthread.h"
@ -96,7 +91,7 @@ static size_t _pythread_stacksize = 0;
 size_t
 PyThread_get_stacksize(void)
 {
-    return _pythread_stacksize;
+    return PyThreadState_GET()->interp->pythread_stacksize;
 }

 /* Only platforms defining a THREAD_SET_STACKSIZE() macro
--- a/Python/thread_nt.h
+++ b/Python/thread_nt.h
@ -189,9 +189,10 @@ PyThread_start_new_thread(void (*func)(void *), void *arg)
        return PYTHREAD_INVALID_THREAD_ID;
    obj->func = func;
    obj->arg = arg;
+    PyThreadState *tstate = PyThreadState_GET();
+    size_t stacksize = tstate ? tstate->interp->pythread_stacksize : 0;
    hThread = (HANDLE)_beginthreadex(0,
-                      Py_SAFE_DOWNCAST(_pythread_stacksize,
-                                       Py_ssize_t, unsigned int),
+                      Py_SAFE_DOWNCAST(stacksize, Py_ssize_t, unsigned int),
                      bootstrap, obj,
                      0, &threadID);
    if (hThread == 0) {
@ -332,13 +333,13 @@ _pythread_nt_set_stacksize(size_t size)
 {
    /* set to default */
    if (size == 0) {
-        _pythread_stacksize = 0;
+        PyThreadState_GET()->interp->pythread_stacksize = 0;
        return 0;
    }

    /* valid range? */
    if (size >= THREAD_MIN_STACKSIZE && size < THREAD_MAX_STACKSIZE) {
-        _pythread_stacksize = size;
+        PyThreadState_GET()->interp->pythread_stacksize = size;
        return 0;
    }

--- a/Python/thread_pthread.h
+++ b/Python/thread_pthread.h
@ -205,8 +205,9 @@ PyThread_start_new_thread(void (*func)(void *), void *arg)
        return PYTHREAD_INVALID_THREAD_ID;
 #endif
 #if defined(THREAD_STACK_SIZE)
-    tss = (_pythread_stacksize != 0) ? _pythread_stacksize
-                                     : THREAD_STACK_SIZE;
+    PyThreadState *tstate = PyThreadState_GET();
+    size_t stacksize = tstate ? tstate->interp->pythread_stacksize : 0;
+    tss = (stacksize != 0) ? stacksize : THREAD_STACK_SIZE;
    if (tss != 0) {
        if (pthread_attr_setstacksize(&attrs, tss) != 0) {
            pthread_attr_destroy(&attrs);
@ -578,7 +579,7 @@ _pythread_pthread_set_stacksize(size_t size)

    /* set to default */
    if (size == 0) {
-        _pythread_stacksize = 0;
+        PyThreadState_GET()->interp->pythread_stacksize = 0;
        return 0;
    }

@ -595,7 +596,7 @@ _pythread_pthread_set_stacksize(size_t size)
            rc = pthread_attr_setstacksize(&attrs, size);
            pthread_attr_destroy(&attrs);
            if (rc == 0) {
-                _pythread_stacksize = size;
+                PyThreadState_GET()->interp->pythread_stacksize = size;
                return 0;
            }
        }
--- a/Tools/c-globals/README
+++ b/Tools/c-globals/README
@ -0,0 +1,41 @@
+#######################################
+# C Globals and CPython Runtime State.
+
+CPython's C code makes extensive use of global variables.  Each global
+falls into one of several categories:
+
+* (effectively) constants (incl. static types)
+* globals used exclusively in main or in the REPL
+* freelists, caches, and counters
+* process-global state
+* module state
+* Python runtime state
+
+The ignored-globals.txt file is organized similarly.  Of the different
+categories, the last two are problematic and generally should not exist
+in the codebase.
+
+Globals that hold module state (i.e. in Modules/*.c) cause problems
+when multiple interpreters are in use.  For more info, see PEP 3121,
+which addresses the situation for extension modules in general.
+
+Globals in the last category should be avoided as well.  The problem
+isn't with the Python runtime having state.  Rather, the problem is with
+that state being spread thoughout the codebase in dozens of individual
+globals.  Unlike the other globals, the runtime state represents a set
+of values that are constantly shifting in a complex way.  When they are
+spread out it's harder to get a clear picture of what the runtime
+involves.  Furthermore, when they are spread out it complicates efforts
+that change the runtime.
+
+Consequently, the globals for Python's runtime state have been
+consolidated under a single top-level _PyRuntime global. No new globals
+should be added for runtime state.  Instead, they should be added to
+_PyRuntimeState or one of its sub-structs.  The check-c-globals script
+should be run to ensure that no new globals have been added:
+
+  python3 Tools/c-globals/check-c-globals.py
+
+If it reports any globals then they should be resolved.  If the globals
+are runtime state then they should be folded into _PyRuntimeState.
+Otherwise they should be added to ignored-globals.txt.
--- a/Tools/c-globals/check-c-globals.py
+++ b/Tools/c-globals/check-c-globals.py
@ -0,0 +1,446 @@
+
+from collections import namedtuple
+import glob
+import os.path
+import re
+import shutil
+import sys
+import subprocess
+
+
+VERBOSITY = 2
+
+C_GLOBALS_DIR = os.path.abspath(os.path.dirname(__file__))
+TOOLS_DIR = os.path.dirname(C_GLOBALS_DIR)
+ROOT_DIR = os.path.dirname(TOOLS_DIR)
+GLOBALS_FILE = os.path.join(C_GLOBALS_DIR, 'ignored-globals.txt')
+
+SOURCE_DIRS = ['Include', 'Objects', 'Modules', 'Parser', 'Python']
+
+CAPI_REGEX = re.compile(r'^ *PyAPI_DATA\([^)]*\) \W*(_?Py\w+(?:, \w+)*\w).*;.*$')
+
+
+IGNORED_VARS = {
+        '_DYNAMIC',
+        '_GLOBAL_OFFSET_TABLE_',
+        '__JCR_LIST__',
+        '__JCR_END__',
+        '__TMC_END__',
+        '__bss_start',
+        '__data_start',
+        '__dso_handle',
+        '_edata',
+        '_end',
+        }
+
+
+def find_capi_vars(root):
+    capi_vars = {}
+    for dirname in SOURCE_DIRS:
+        for filename in glob.glob(os.path.join(ROOT_DIR, dirname, '**/*.[hc]'),
+                                  recursive=True):
+            with open(filename) as file:
+                for name in _find_capi_vars(file):
+                    if name in capi_vars:
+                        assert not filename.endswith('.c')
+                        assert capi_vars[name].endswith('.c')
+                    capi_vars[name] = filename
+    return capi_vars
+
+
+def _find_capi_vars(lines):
+    for line in lines:
+        if not line.startswith('PyAPI_DATA'):
+            continue
+        assert '{' not in line
+        match = CAPI_REGEX.match(line)
+        assert match
+        names, = match.groups()
+        for name in names.split(', '):
+            yield name
+
+
+def _read_global_names(filename):
+    # These variables are shared between all interpreters in the process.
+    with open(filename) as file:
+        return {line.partition('#')[0].strip()
+                for line in file
+                if line.strip() and not line.startswith('#')}
+
+
+def _is_global_var(name, globalnames):
+    if _is_autogen_var(name):
+        return True
+    if _is_type_var(name):
+        return True
+    if _is_module(name):
+        return True
+    if _is_exception(name):
+        return True
+    if _is_compiler(name):
+        return True
+    return name in globalnames
+
+
+def _is_autogen_var(name):
+    return (
+        name.startswith('PyId_') or
+        '.' in name or
+        # Objects/typeobject.c
+        name.startswith('op_id.') or
+        name.startswith('rop_id.') or
+        # Python/graminit.c
+        name.startswith('arcs_') or
+        name.startswith('states_')
+        )
+
+
+def _is_type_var(name):
+    if name.endswith(('Type', '_Type', '_type')):  # XXX Always a static type?
+        return True
+    if name.endswith('_desc'):  # for structseq types
+        return True
+    return (
+        name.startswith('doc_') or
+        name.endswith(('_doc', '__doc__', '_docstring')) or
+        name.endswith('_methods') or
+        name.endswith('_fields') or
+        name.endswith(('_memberlist', '_members')) or
+        name.endswith('_slots') or
+        name.endswith(('_getset', '_getsets', '_getsetlist')) or
+        name.endswith('_as_mapping') or
+        name.endswith('_as_number') or
+        name.endswith('_as_sequence') or
+        name.endswith('_as_buffer') or
+        name.endswith('_as_async')
+        )
+
+
+def _is_module(name):
+    if name.endswith(('_functions', 'Methods', '_Methods')):
+        return True
+    if name == 'module_def':
+        return True
+    if name == 'initialized':
+        return True
+    return name.endswith(('module', '_Module'))
+
+
+def _is_exception(name):
+    # Other vars are enumerated in globals-core.txt.
+    if not name.startswith(('PyExc_', '_PyExc_')):
+        return False
+    return name.endswith(('Error', 'Warning'))
+
+
+def _is_compiler(name):
+    return (
+        # Python/Pythyon-ast.c
+        name.endswith('_type') or
+        name.endswith('_singleton') or
+        name.endswith('_attributes')
+        )
+
+
+class Var(namedtuple('Var', 'name kind scope capi filename')):
+
+    @classmethod
+    def parse_nm(cls, line, expected, ignored, capi_vars, globalnames):
+        _, _, line = line.partition(' ')  # strip off the address
+        line = line.strip()
+        kind, _, line = line.partition(' ')
+        if kind in ignored or ():
+            return None
+        elif kind not in expected or ():
+            raise RuntimeError('unsupported NM type {!r}'.format(kind))
+
+        name, _, filename = line.partition('\t')
+        name = name.strip()
+        if _is_autogen_var(name):
+            return None
+        if _is_global_var(name, globalnames):
+            scope = 'global'
+        else:
+            scope = None
+        capi = (name in capi_vars or ())
+        if filename:
+            filename = os.path.relpath(filename.partition(':')[0])
+        return cls(name, kind, scope, capi, filename or '~???~')
+
+    @property
+    def external(self):
+        return self.kind.isupper()
+
+
+def find_vars(root, globals_filename=GLOBALS_FILE):
+    python = os.path.join(root, 'python')
+    if not os.path.exists(python):
+        raise RuntimeError('python binary missing (need to build it first?)')
+    capi_vars = find_capi_vars(root)
+    globalnames = _read_global_names(globals_filename)
+
+    nm = shutil.which('nm')
+    if nm is None:
+        # XXX Use dumpbin.exe /SYMBOLS on Windows.
+        raise NotImplementedError
+    else:
+        yield from (var
+                    for var in _find_var_symbols(python, nm, capi_vars,
+                                                 globalnames)
+                    if var.name not in IGNORED_VARS)
+
+
+NM_FUNCS = set('Tt')
+NM_PUBLIC_VARS = set('BD')
+NM_PRIVATE_VARS = set('bd')
+NM_VARS = NM_PUBLIC_VARS | NM_PRIVATE_VARS
+NM_DATA = set('Rr')
+NM_OTHER = set('ACGgiINpSsuUVvWw-?')
+NM_IGNORED = NM_FUNCS | NM_DATA | NM_OTHER
+
+
+def _find_var_symbols(python, nm, capi_vars, globalnames):
+    args = [nm,
+            '--line-numbers',
+            python]
+    out = subprocess.check_output(args)
+    for line in out.decode('utf-8').splitlines():
+        var = Var.parse_nm(line, NM_VARS, NM_IGNORED, capi_vars, globalnames)
+        if var is None:
+            continue
+        yield var
+
+
+#######################################
+
+class Filter(namedtuple('Filter', 'name op value action')):
+
+    @classmethod
+    def parse(cls, raw):
+        action = '+'
+        if raw.startswith(('+', '-')):
+            action = raw[0]
+            raw = raw[1:]
+        # XXX Support < and >?
+        name, op, value = raw.partition('=')
+        return cls(name, op, value, action)
+
+    def check(self, var):
+        value = getattr(var, self.name, None)
+        if not self.op:
+            matched = bool(value)
+        elif self.op == '=':
+            matched = (value == self.value)
+        else:
+            raise NotImplementedError
+
+        if self.action == '+':
+            return matched
+        elif self.action == '-':
+            return not matched
+        else:
+            raise NotImplementedError
+
+
+def filter_var(var, filters):
+    for filter in filters:
+        if not filter.check(var):
+            return False
+    return True
+
+
+def make_sort_key(spec):
+    columns = [(col.strip('_'), '_' if col.startswith('_') else '')
+               for col in spec]
+    def sort_key(var):
+        return tuple(getattr(var, col).lstrip(prefix)
+                     for col, prefix in columns)
+    return sort_key
+
+
+def make_groups(allvars, spec):
+    group = spec
+    groups = {}
+    for var in allvars:
+        value = getattr(var, group)
+        key = '{}: {}'.format(group, value)
+        try:
+            groupvars = groups[key]
+        except KeyError:
+            groupvars = groups[key] = []
+        groupvars.append(var)
+    return groups
+
+
+def format_groups(groups, columns, fmts, widths):
+    for group in sorted(groups):
+        groupvars = groups[group]
+        yield '', 0
+        yield '  # {}'.format(group), 0
+        yield from format_vars(groupvars, columns, fmts, widths)
+
+
+def format_vars(allvars, columns, fmts, widths):
+    fmt = ' '.join(fmts[col] for col in columns)
+    fmt = ' ' + fmt.replace(' ', '   ') + ' '  # for div margin
+    header = fmt.replace(':', ':^').format(*(col.upper() for col in columns))
+    yield header, 0
+    div = ' '.join('-'*(widths[col]+2) for col in columns)
+    yield div, 0
+    for var in allvars:
+        values = (getattr(var, col) for col in columns)
+        row = fmt.format(*('X' if val is True else val or ''
+                           for val in values))
+        yield row, 1
+    yield div, 0
+
+
+#######################################
+
+COLUMNS = 'name,external,capi,scope,filename'
+COLUMN_NAMES = COLUMNS.split(',')
+
+COLUMN_WIDTHS = {col: len(col)
+                 for col in COLUMN_NAMES}
+COLUMN_WIDTHS.update({
+        'name': 50,
+        'scope': 7,
+        'filename': 40,
+        })
+COLUMN_FORMATS = {col: '{:%s}' % width
+                  for col, width in COLUMN_WIDTHS.items()}
+for col in COLUMN_FORMATS:
+    if COLUMN_WIDTHS[col] == len(col):
+        COLUMN_FORMATS[col] = COLUMN_FORMATS[col].replace(':', ':^')
+
+
+def _parse_filters_arg(raw, error):
+    filters = []
+    for value in raw.split(','):
+        value=value.strip()
+        if not value:
+            continue
+        try:
+            filter = Filter.parse(value)
+            if filter.name not in COLUMN_NAMES:
+                raise Exception('unsupported column {!r}'.format(filter.name))
+        except Exception as e:
+            error('bad filter {!r}: {}'.format(raw, e))
+        filters.append(filter)
+    return filters
+
+
+def _parse_columns_arg(raw, error):
+    columns = raw.split(',')
+    for column in columns:
+        if column not in COLUMN_NAMES:
+            error('unsupported column {!r}'.format(column))
+    return columns
+
+
+def _parse_sort_arg(raw, error):
+    sort = raw.split(',')
+    for column in sort:
+        if column.lstrip('_') not in COLUMN_NAMES:
+            error('unsupported column {!r}'.format(column))
+    return sort
+
+
+def _parse_group_arg(raw, error):
+    if not raw:
+        return raw
+    group = raw
+    if group not in COLUMN_NAMES:
+        error('unsupported column {!r}'.format(group))
+    if group != 'filename':
+        error('unsupported group {!r}'.format(group))
+    return group
+
+
+def parse_args(argv=None):
+    if argv is None:
+        argv = sys.argv[1:]
+
+    import argparse
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('-v', '--verbose', action='count', default=0)
+    parser.add_argument('-q', '--quiet', action='count', default=0)
+
+    parser.add_argument('--filters', default='-scope',
+                        help='[[-]<COLUMN>[=<GLOB>]] ...')
+
+    parser.add_argument('--columns', default=COLUMNS,
+                        help='a comma-separated list of columns to show')
+    parser.add_argument('--sort', default='filename,_name',
+                        help='a comma-separated list of columns to sort')
+    parser.add_argument('--group',
+                        help='group by the given column name (- to not group)')
+
+    parser.add_argument('--rc-on-match', dest='rc', type=int)
+
+    parser.add_argument('filename', nargs='?', default=GLOBALS_FILE)
+
+    args = parser.parse_args(argv)
+
+    verbose = vars(args).pop('verbose', 0)
+    quiet = vars(args).pop('quiet', 0)
+    args.verbosity = max(0, VERBOSITY + verbose - quiet)
+
+    if args.sort.startswith('filename') and not args.group:
+        args.group = 'filename'
+
+    if args.rc is None:
+        if '-scope=core' in args.filters or 'core' not in args.filters:
+            args.rc = 0
+        else:
+            args.rc = 1
+
+    args.filters = _parse_filters_arg(args.filters, parser.error)
+    args.columns = _parse_columns_arg(args.columns, parser.error)
+    args.sort = _parse_sort_arg(args.sort, parser.error)
+    args.group = _parse_group_arg(args.group, parser.error)
+
+    return args
+
+
+def main(root=ROOT_DIR, filename=GLOBALS_FILE,
+         filters=None, columns=COLUMN_NAMES, sort=None, group=None,
+         verbosity=VERBOSITY, rc=1):
+
+    log = lambda msg: ...
+    if verbosity >= 2:
+        log = lambda msg: print(msg)
+
+    allvars = (var
+               for var in find_vars(root, filename)
+               if filter_var(var, filters))
+    if sort:
+        allvars = sorted(allvars, key=make_sort_key(sort))
+
+    if group:
+        try:
+            columns.remove(group)
+        except ValueError:
+            pass
+        grouped = make_groups(allvars, group)
+        lines = format_groups(grouped, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
+    else:
+        lines = format_vars(allvars, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
+
+    total = 0
+    for line, count in lines:
+        total += count
+        log(line)
+    log('\ntotal: {}'.format(total))
+
+    if total and rc:
+        print('ERROR: found unsafe globals', file=sys.stderr)
+        return rc
+    return 0
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    sys.exit(
+            main(**vars(args)))
--- a/Tools/c-globals/ignored-globals.txt
+++ b/Tools/c-globals/ignored-globals.txt
@ -0,0 +1,494 @@
+# All variables declared here are shared between all interpreters
+# in a single process.  That means that they must not be changed
+# unless that change should apply to all interpreters.
+#
+# See check-c-globals.py.
+#
+# Many generic names are handled via the script:
+#
+# * most exceptions and all warnings handled via _is_exception()
+# * for builtin modules, generic names are handled via _is_module()
+# * generic names for static types handled via _is_type_var()
+# * AST vars handled via _is_compiler()
+
+
+#######################################
+# main
+
+# Modules/getpath.c
+exec_prefix
+module_search_path
+prefix
+progpath
+
+# Modules/main.c
+orig_argc
+orig_argv
+
+# Python/getopt.c
+opt_ptr
+_PyOS_optarg
+_PyOS_opterr
+_PyOS_optind
+
+
+#######################################
+# REPL
+
+# Parser/myreadline.c
+PyOS_InputHook
+PyOS_ReadlineFunctionPointer
+_PyOS_ReadlineLock
+_PyOS_ReadlineTState
+
+
+#######################################
+# state
+
+# Python/dtoa.c
+p5s
+pmem_next  # very slight race
+private_mem  # very slight race
+
+# Python/import.c
+# For the moment the import lock stays global.  Ultimately there should
+# be a global lock for extension modules and a per-interpreter lock.
+import_lock
+import_lock_level
+import_lock_thread
+
+# Python/pylifecycle.c
+_PyRuntime
+
+
+#---------------------------------
+# module globals (PyObject)
+
+# Modules/_functoolsmodule.c
+kwd_mark
+
+# Modules/_localemodule.c
+Error
+
+# Modules/_threadmodule.c
+ThreadError
+
+# Modules/_tracemalloc.c
+unknown_filename
+
+# Modules/gcmodule.c
+gc_str
+
+# Modules/posixmodule.c
+billion
+posix_putenv_garbage
+
+# Modules/signalmodule.c
+DefaultHandler
+IgnoreHandler
+IntHandler
+ItimerError
+
+# Modules/zipimport.c
+ZipImportError
+zip_directory_cache
+
+
+#---------------------------------
+# module globals (other)
+
+# Modules/_tracemalloc.c
+allocators
+tables_lock
+tracemalloc_config
+tracemalloc_empty_traceback
+tracemalloc_filenames
+tracemalloc_peak_traced_memory
+tracemalloc_reentrant_key
+tracemalloc_traceback
+tracemalloc_tracebacks
+tracemalloc_traced_memory
+tracemalloc_traces
+
+# Modules/faulthandler.c
+fatal_error
+faulthandler_handlers
+old_stack
+stack
+thread
+user_signals
+
+# Modules/posixmodule.c
+posix_constants_confstr
+posix_constants_pathconf
+posix_constants_sysconf
+_stat_float_times  # deprecated, __main__-only
+structseq_new
+ticks_per_second
+
+# Modules/signalmodule.c
+Handlers  # main thread only
+is_tripped  # main thread only
+main_pid
+main_thread
+old_siginthandler
+wakeup_fd  # main thread only
+
+# Modules/zipimport.c
+zip_searchorder
+
+# Python/bltinmodule.c
+Py_FileSystemDefaultEncodeErrors
+Py_FileSystemDefaultEncoding
+Py_HasFileSystemDefaultEncoding
+
+# Python/sysmodule.c
+_PySys_ImplCacheTag
+_PySys_ImplName
+
+
+#---------------------------------
+# freelists
+
+# Modules/_collectionsmodule.c
+freeblocks
+numfreeblocks
+
+# Objects/classobject.c
+free_list
+numfree
+
+# Objects/dictobject.c
+free_list
+keys_free_list
+numfree
+numfreekeys
+
+# Objects/exceptions.c
+memerrors_freelist
+memerrors_numfree
+
+# Objects/floatobject.c
+free_list
+numfree
+
+# Objects/frameobject.c
+free_list
+numfree
+
+# Objects/genobject.c
+ag_asend_freelist
+ag_asend_freelist_free
+ag_value_freelist
+ag_value_freelist_free
+
+# Objects/listobject.c
+free_list
+numfree
+
+# Objects/methodobject.c
+free_list
+numfree
+
+# Objects/sliceobject.c
+slice_cache  # slight race
+
+# Objects/tupleobject.c
+free_list
+numfree
+
+# Python/dtoa.c
+freelist  # very slight race
+
+
+#---------------------------------
+# caches (PyObject)
+
+# Objects/typeobject.c
+method_cache  # only for static types
+next_version_tag  # only for static types
+
+# Python/dynload_shlib.c
+handles  # slight race during import
+nhandles  # slight race during import
+
+# Python/import.c
+extensions  # slight race on init during import
+
+
+#---------------------------------
+# caches (other)
+
+# Python/bootstrap_hash.c
+urandom_cache
+
+# Python/modsupport.c
+_Py_PackageContext  # Slight race during import!  Move to PyThreadState?
+
+
+#---------------------------------
+# counters
+
+# Objects/bytesobject.c
+null_strings
+one_strings
+
+# Objects/dictobject.c
+pydict_global_version
+
+# Objects/moduleobject.c
+max_module_number  # slight race during import
+
+
+#######################################
+# constants
+
+#---------------------------------
+# singletons
+
+# Objects/boolobject.c
+_Py_FalseStruct
+_Py_TrueStruct
+
+# Objects/object.c
+_Py_NoneStruct
+_Py_NotImplementedStruct
+
+# Objects/sliceobject.c
+_Py_EllipsisObject
+
+
+#---------------------------------
+# constants (other)
+
+# Modules/config.c
+_PyImport_Inittab
+
+# Objects/bytearrayobject.c
+_PyByteArray_empty_string
+
+# Objects/dictobject.c
+empty_keys_struct
+empty_values
+
+# Objects/floatobject.c
+detected_double_format
+detected_float_format
+double_format
+float_format
+
+# Objects/longobject.c
+_PyLong_DigitValue
+
+# Objects/object.c
+_Py_SwappedOp
+
+# Objects/obmalloc.c
+_PyMem_Debug
+
+# Objects/setobject.c
+_dummy_struct
+
+# Objects/structseq.c
+PyStructSequence_UnnamedField
+
+# Objects/typeobject.c
+name_op
+slotdefs  # almost
+slotdefs_initialized  # almost
+subtype_getsets_dict_only
+subtype_getsets_full
+subtype_getsets_weakref_only
+tp_new_methoddef
+
+# Objects/unicodeobject.c
+bloom_linebreak
+static_strings  # slight race
+
+# Parser/tokenizer.c
+_PyParser_TokenNames
+
+# Python/Python-ast.c
+alias_fields
+
+# Python/codecs.c
+Py_hexdigits
+ucnhash_CAPI  # slight performance-only race
+
+# Python/dynload_shlib.c
+_PyImport_DynLoadFiletab
+
+# Python/fileutils.c
+_Py_open_cloexec_works
+force_ascii
+
+# Python/frozen.c
+M___hello__
+PyImport_FrozenModules
+
+# Python/graminit.c
+_PyParser_Grammar
+dfas
+labels
+
+# Python/import.c
+PyImport_Inittab
+
+# Python/pylifecycle.c
+_TARGET_LOCALES
+
+
+#---------------------------------
+# initialized (PyObject)
+
+# Objects/bytesobject.c
+characters
+nullstring
+
+# Objects/exceptions.c
+PyExc_RecursionErrorInst
+errnomap
+
+# Objects/longobject.c
+_PyLong_One
+_PyLong_Zero
+small_ints
+
+# Objects/setobject.c
+emptyfrozenset
+
+# Objects/unicodeobject.c
+interned  # slight race on init in PyUnicode_InternInPlace()
+unicode_empty
+unicode_latin1
+
+
+#---------------------------------
+# initialized (other)
+
+# Python/getargs.c
+static_arg_parsers
+
+# Python/pyhash.c
+PyHash_Func
+_Py_HashSecret
+_Py_HashSecret_Initialized
+
+# Python/pylifecycle.c
+_Py_StandardStreamEncoding
+_Py_StandardStreamErrors
+default_home
+env_home
+progname
+Py_BytesWarningFlag
+Py_DebugFlag
+Py_DontWriteBytecodeFlag
+Py_FrozenFlag
+Py_HashRandomizationFlag
+Py_IgnoreEnvironmentFlag
+Py_InspectFlag
+Py_InteractiveFlag
+Py_IsolatedFlag
+Py_NoSiteFlag
+Py_NoUserSiteDirectory
+Py_OptimizeFlag
+Py_QuietFlag
+Py_UnbufferedStdioFlag
+Py_UseClassExceptionsFlag
+Py_VerboseFlag
+
+
+#---------------------------------
+# types
+
+# Modules/_threadmodule.c
+Locktype
+RLocktype
+localdummytype
+localtype
+
+# Objects/exceptions.c
+PyExc_BaseException
+PyExc_Exception
+PyExc_GeneratorExit
+PyExc_KeyboardInterrupt
+PyExc_StopAsyncIteration
+PyExc_StopIteration
+PyExc_SystemExit
+_PyExc_BaseException
+_PyExc_Exception
+_PyExc_GeneratorExit
+_PyExc_KeyboardInterrupt
+_PyExc_StopAsyncIteration
+_PyExc_StopIteration
+_PyExc_SystemExit
+
+# Objects/structseq.c
+_struct_sequence_template
+
+
+#---------------------------------
+# interned strings/bytes
+
+# Modules/_io/_iomodule.c
+_PyIO_empty_bytes
+_PyIO_empty_str
+_PyIO_str_close
+_PyIO_str_closed
+_PyIO_str_decode
+_PyIO_str_encode
+_PyIO_str_fileno
+_PyIO_str_flush
+_PyIO_str_getstate
+_PyIO_str_isatty
+_PyIO_str_newlines
+_PyIO_str_nl
+_PyIO_str_read
+_PyIO_str_read1
+_PyIO_str_readable
+_PyIO_str_readall
+_PyIO_str_readinto
+_PyIO_str_readline
+_PyIO_str_reset
+_PyIO_str_seek
+_PyIO_str_seekable
+_PyIO_str_setstate
+_PyIO_str_tell
+_PyIO_str_truncate
+_PyIO_str_writable
+_PyIO_str_write
+
+# Modules/_threadmodule.c
+str_dict
+
+# Objects/boolobject.c
+false_str
+true_str
+
+# Objects/listobject.c
+indexerr
+
+# Python/symtable.c
+__class__
+dictcomp
+genexpr
+lambda
+listcomp
+setcomp
+top
+
+# Python/sysmodule.c
+whatstrings
+
+
+#######################################
+# hacks
+
+# Objects/object.c
+_Py_abstract_hack
+
+# Objects/setobject.c
+_PySet_Dummy
+
+# Python/pylifecycle.c
+_PyOS_mystrnicmp_hack