gh-113190: Reenable non-debug interned string cleanup (GH-113601)

2024-11-23 18:04:37 +08:00 · 2024-08-15 07:55:09 -04:00 · 2024-08-15 07:55:09 -04:00 · 3203a74129
commit 3203a74129
parent b6cb435ac0
5 changed files with 42 additions and 42 deletions
--- a/Doc/c-api/init.rst
+++ b/Doc/c-api/init.rst
@ -394,8 +394,7 @@ Initializing and finalizing the interpreter
   Undo all initializations made by :c:func:`Py_Initialize` and subsequent use of
   Python/C API functions, and destroy all sub-interpreters (see
   :c:func:`Py_NewInterpreter` below) that were created and not yet destroyed since
-   the last call to :c:func:`Py_Initialize`.  Ideally, this frees all memory
-   allocated by the Python interpreter.  This is a no-op when called for a second
+   the last call to :c:func:`Py_Initialize`.  This is a no-op when called for a second
   time (without calling :c:func:`Py_Initialize` again first).

   Since this is the reverse of :c:func:`Py_Initialize`, it should be called
@ -407,6 +406,12 @@ Initializing and finalizing the interpreter
   If there were errors during finalization (flushing buffered data),
   ``-1`` is returned.

+   Note that Python will do a best effort at freeing all memory allocated by the Python
+   interpreter.  Therefore, any C-Extension should make sure to correctly clean up all
+   of the preveiously allocated PyObjects before using them in subsequent calls to
+   :c:func:`Py_Initialize`.  Otherwise it could introduce vulnerabilities and incorrect
+   behavior.
+
   This function is provided for a number of reasons.  An embedding application
   might want to restart Python without having to restart the application itself.
   An application that has loaded the Python interpreter from a dynamically
@ -421,10 +426,11 @@ Initializing and finalizing the interpreter
   loaded extension modules loaded by Python are not unloaded.  Small amounts of
   memory allocated by the Python interpreter may not be freed (if you find a leak,
   please report it).  Memory tied up in circular references between objects is not
-   freed.  Some memory allocated by extension modules may not be freed.  Some
-   extensions may not work properly if their initialization routine is called more
-   than once; this can happen if an application calls :c:func:`Py_Initialize` and
-   :c:func:`Py_FinalizeEx` more than once.
+   freed.  Interned strings will all be deallocated regarldess of their reference count.
+   Some memory allocated by extension modules may not be freed.  Some extensions may not
+   work properly if their initialization routine is called more than once; this can
+   happen if an application calls :c:func:`Py_Initialize` and :c:func:`Py_FinalizeEx`
+   more than once.

   .. audit-event:: cpython._PySys_ClearAuditHooks "" c.Py_FinalizeEx

--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@ -419,6 +419,16 @@ New Features
  which has an ambiguous return value.
  (Contributed by Irit Katriel and Erlend Aasland in :gh:`105201`.)

+* :c:func:`Py_Finalize` now deletes all interned strings. This
+  is backwards incompatible to any C-Extension that holds onto an interned
+  string after a call to :c:func:`Py_Finalize` and is then reused after a
+  call to :c:func:`Py_Initialize`.  Any issues arising from this behavior will
+  normally result in crashes during the exectuion of the subsequent call to
+  :c:func:`Py_Initialize` from accessing uninitialized memory. To fix, use
+  an address sanitizer to identify any use-after-free coming from
+  an interned string and deallocate it during module shutdown.
+  (Contribued by Eddie Elizondo in :gh:`113601`.)
+
 Porting to Python 3.14
 ----------------------

--- a/Lib/test/_test_embed_structseq.py
+++ b/Lib/test/_test_embed_structseq.py
@ -1,31 +1,27 @@
 import sys
 import types
+import unittest

-# Note: This test file can't import `unittest` since the runtime can't
-# currently guarantee that it will not leak memory. Doing so will mark
-# the test as passing but with reference leaks. This can safely import
-# the `unittest` library once there's a strict guarantee of no leaks
-# during runtime shutdown.

 # bpo-46417: Test that structseq types used by the sys module are still
 # valid when Py_Finalize()/Py_Initialize() are called multiple times.
-class TestStructSeq:
+class TestStructSeq(unittest.TestCase):
    # test PyTypeObject members
-    def _check_structseq(self, obj_type):
+    def check_structseq(self, obj_type):
        # ob_refcnt
-        assert sys.getrefcount(obj_type) > 1
+        self.assertGreaterEqual(sys.getrefcount(obj_type), 1)
        # tp_base
-        assert issubclass(obj_type, tuple)
+        self.assertTrue(issubclass(obj_type, tuple))
        # tp_bases
-        assert obj_type.__bases__ == (tuple,)
+        self.assertEqual(obj_type.__bases__, (tuple,))
        # tp_dict
-        assert isinstance(obj_type.__dict__, types.MappingProxyType)
+        self.assertIsInstance(obj_type.__dict__, types.MappingProxyType)
        # tp_mro
-        assert obj_type.__mro__ == (obj_type, tuple, object)
+        self.assertEqual(obj_type.__mro__, (obj_type, tuple, object))
        # tp_name
-        assert isinstance(type.__name__, str)
+        self.assertIsInstance(type.__name__, str)
        # tp_subclasses
-        assert obj_type.__subclasses__() == []
+        self.assertEqual(obj_type.__subclasses__(), [])

    def test_sys_attrs(self):
        for attr_name in (
@ -36,23 +32,23 @@ class TestStructSeq:
            'thread_info',    # ThreadInfoType
            'version_info',   # VersionInfoType
        ):
-            attr = getattr(sys, attr_name)
-            self._check_structseq(type(attr))
+            with self.subTest(attr=attr_name):
+                attr = getattr(sys, attr_name)
+                self.check_structseq(type(attr))

    def test_sys_funcs(self):
        func_names = ['get_asyncgen_hooks']  # AsyncGenHooksType
        if hasattr(sys, 'getwindowsversion'):
            func_names.append('getwindowsversion')  # WindowsVersionType
        for func_name in func_names:
-            func = getattr(sys, func_name)
-            obj = func()
-            self._check_structseq(type(obj))
+            with self.subTest(func=func_name):
+                func = getattr(sys, func_name)
+                obj = func()
+                self.check_structseq(type(obj))


 try:
-    tests = TestStructSeq()
-    tests.test_sys_attrs()
-    tests.test_sys_funcs()
+    unittest.main()
 except SystemExit as exc:
    if exc.args[0] != 0:
        raise
--- a/Builtins/2024-01-15-18-11-48.gh-issue-113190.OwQX64.rst
+++ b/Builtins/2024-01-15-18-11-48.gh-issue-113190.OwQX64.rst
@ -0,0 +1 @@
+:c:func:`Py_Finalize` now deletes all interned strings.
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -15623,19 +15623,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
        int shared = 0;
        switch (PyUnicode_CHECK_INTERNED(s)) {
        case SSTATE_INTERNED_IMMORTAL:
-            /* Make immortal interned strings mortal again.
-             *
-             * Currently, the runtime is not able to guarantee that it can exit
-             * without allocations that carry over to a future initialization
-             * of Python within the same process. i.e:
-             *   ./python -X showrefcount -c 'import itertools'
-             *   [237 refs, 237 blocks]
-             *
-             * This should remain disabled (`Py_DEBUG` only) until there is a
-             * strict guarantee that no memory will be left after
-             * `Py_Finalize`.
-             */
-#ifdef Py_DEBUG
+            /* Make immortal interned strings mortal again. */
            // Skip the Immortal Instance check and restore
            // the two references (key and value) ignored
            // by PyUnicode_InternInPlace().
@ -15648,7 +15636,6 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
 #ifdef INTERNED_STATS
            total_length += PyUnicode_GET_LENGTH(s);
 #endif
-#endif // Py_DEBUG
            break;
        case SSTATE_INTERNED_IMMORTAL_STATIC:
            /* It is shared between interpreters, so we should unmark it
				`@ -0,0 +1 @@`
				:c:func:`Py_Finalize` now deletes all interned strings.