gh-124470: Fix crash when reading from object instance dictionary while replacing it (#122489)

Delay free a dictionary when replacing it
This commit is contained in:
Dino Viehland 2024-11-21 08:41:19 -08:00 committed by GitHub
parent 3926842117
commit bf542f8bb9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 294 additions and 90 deletions

View File

@ -50,7 +50,6 @@ static inline PyObject* _Py_FROM_GC(PyGC_Head *gc) {
# define _PyGC_BITS_UNREACHABLE (4)
# define _PyGC_BITS_FROZEN (8)
# define _PyGC_BITS_SHARED (16)
# define _PyGC_BITS_SHARED_INLINE (32)
# define _PyGC_BITS_DEFERRED (64) // Use deferred reference counting
#endif
@ -119,23 +118,6 @@ static inline void _PyObject_GC_SET_SHARED(PyObject *op) {
}
#define _PyObject_GC_SET_SHARED(op) _PyObject_GC_SET_SHARED(_Py_CAST(PyObject*, op))
/* True if the memory of the object is shared between multiple
* threads and needs special purpose when freeing due to
* the possibility of in-flight lock-free reads occurring.
* Objects with this bit that are GC objects will automatically
* delay-freed by PyObject_GC_Del. */
static inline int _PyObject_GC_IS_SHARED_INLINE(PyObject *op) {
return _PyObject_HAS_GC_BITS(op, _PyGC_BITS_SHARED_INLINE);
}
#define _PyObject_GC_IS_SHARED_INLINE(op) \
_PyObject_GC_IS_SHARED_INLINE(_Py_CAST(PyObject*, op))
static inline void _PyObject_GC_SET_SHARED_INLINE(PyObject *op) {
_PyObject_SET_GC_BITS(op, _PyGC_BITS_SHARED_INLINE);
}
#define _PyObject_GC_SET_SHARED_INLINE(op) \
_PyObject_GC_SET_SHARED_INLINE(_Py_CAST(PyObject*, op))
#endif
/* Bit flags for _gc_prev */

View File

@ -120,11 +120,25 @@ extern int _PyMem_DebugEnabled(void);
extern void _PyMem_FreeDelayed(void *ptr);
// Enqueue an object to be freed possibly after some delay
extern void _PyObject_FreeDelayed(void *ptr);
#ifdef Py_GIL_DISABLED
extern void _PyObject_XDecRefDelayed(PyObject *obj);
#else
static inline void _PyObject_XDecRefDelayed(PyObject *obj)
{
Py_XDECREF(obj);
}
#endif
// Periodically process delayed free requests.
extern void _PyMem_ProcessDelayed(PyThreadState *tstate);
// Periodically process delayed free requests when the world is stopped.
// Notify of any objects whic should be freeed.
typedef void (*delayed_dealloc_cb)(PyObject *, void *);
extern void _PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate,
delayed_dealloc_cb cb, void *state);
// Abandon all thread-local delayed free requests and push them to the
// interpreter's queue.
extern void _PyMem_AbandonDelayed(PyThreadState *tstate);

View File

@ -142,6 +142,70 @@ class TestDict(TestCase):
for ref in thread_list:
self.assertIsNone(ref())
def test_racing_set_object_dict(self):
"""Races assigning to __dict__ should be thread safe"""
class C: pass
class MyDict(dict): pass
for cyclic in (False, True):
f = C()
f.__dict__ = {"foo": 42}
THREAD_COUNT = 10
def writer_func(l):
for i in range(1000):
if cyclic:
other_d = {}
d = MyDict({"foo": 100})
if cyclic:
d["x"] = other_d
other_d["bar"] = d
l.append(weakref.ref(d))
f.__dict__ = d
def reader_func():
for i in range(1000):
f.foo
lists = []
readers = []
writers = []
for x in range(THREAD_COUNT):
thread_list = []
lists.append(thread_list)
writer = Thread(target=partial(writer_func, thread_list))
writers.append(writer)
for x in range(THREAD_COUNT):
reader = Thread(target=partial(reader_func))
readers.append(reader)
for writer in writers:
writer.start()
for reader in readers:
reader.start()
for writer in writers:
writer.join()
for reader in readers:
reader.join()
f.__dict__ = {}
gc.collect()
gc.collect()
count = 0
ids = set()
for thread_list in lists:
for i, ref in enumerate(thread_list):
if ref() is None:
continue
count += 1
ids.add(id(ref()))
count += 1
self.assertEqual(count, 0)
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1 @@
Fix crash in free-threaded builds when replacing object dictionary while reading attribute on another thread

View File

@ -7087,51 +7087,146 @@ set_dict_inline_values(PyObject *obj, PyDictObject *new_dict)
}
}
int
_PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
#ifdef Py_GIL_DISABLED
// Trys and sets the dictionary for an object in the easy case when our current
// dictionary is either completely not materialized or is a dictionary which
// does not point at the inline values.
static bool
try_set_dict_inline_only_or_other_dict(PyObject *obj, PyObject *new_dict, PyDictObject **cur_dict)
{
bool replaced = false;
Py_BEGIN_CRITICAL_SECTION(obj);
PyDictObject *dict = *cur_dict = _PyObject_GetManagedDict(obj);
if (dict == NULL) {
// We only have inline values, we can just completely replace them.
set_dict_inline_values(obj, (PyDictObject *)new_dict);
replaced = true;
goto exit_lock;
}
if (FT_ATOMIC_LOAD_PTR_RELAXED(dict->ma_values) != _PyObject_InlineValues(obj)) {
// We have a materialized dict which doesn't point at the inline values,
// We get to simply swap dictionaries and free the old dictionary.
FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
(PyDictObject *)Py_XNewRef(new_dict));
replaced = true;
goto exit_lock;
}
else {
// We have inline values, we need to lock the dict and the object
// at the same time to safely dematerialize them. To do that while releasing
// the object lock we need a strong reference to the current dictionary.
Py_INCREF(dict);
}
exit_lock:
Py_END_CRITICAL_SECTION();
return replaced;
}
// Replaces a dictionary that is probably the dictionary which has been
// materialized and points at the inline values. We could have raced
// and replaced it with another dictionary though.
static int
replace_dict_probably_inline_materialized(PyObject *obj, PyDictObject *inline_dict,
PyDictObject *cur_dict, PyObject *new_dict)
{
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(obj);
if (cur_dict == inline_dict) {
assert(FT_ATOMIC_LOAD_PTR_RELAXED(inline_dict->ma_values) == _PyObject_InlineValues(obj));
int err = _PyDict_DetachFromObject(inline_dict, obj);
if (err != 0) {
assert(new_dict == NULL);
return err;
}
}
FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
(PyDictObject *)Py_XNewRef(new_dict));
return 0;
}
#endif
static void
decref_maybe_delay(PyObject *obj, bool delay)
{
if (delay) {
_PyObject_XDecRefDelayed(obj);
}
else {
Py_XDECREF(obj);
}
}
static int
set_or_clear_managed_dict(PyObject *obj, PyObject *new_dict, bool clear)
{
assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
#ifndef NDEBUG
Py_BEGIN_CRITICAL_SECTION(obj);
assert(_PyObject_InlineValuesConsistencyCheck(obj));
Py_END_CRITICAL_SECTION();
#endif
int err = 0;
PyTypeObject *tp = Py_TYPE(obj);
if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) {
#ifdef Py_GIL_DISABLED
PyDictObject *prev_dict;
if (!try_set_dict_inline_only_or_other_dict(obj, new_dict, &prev_dict)) {
// We had a materialized dictionary which pointed at the inline
// values. We need to lock both the object and the dict at the
// same time to safely replace it. We can't merely lock the dictionary
// while the object is locked because it could suspend the object lock.
PyDictObject *cur_dict;
assert(prev_dict != NULL);
Py_BEGIN_CRITICAL_SECTION2(obj, prev_dict);
// We could have had another thread race in between the call to
// try_set_dict_inline_only_or_other_dict where we locked the object
// and when we unlocked and re-locked the dictionary.
cur_dict = _PyObject_GetManagedDict(obj);
err = replace_dict_probably_inline_materialized(obj, prev_dict,
cur_dict, new_dict);
Py_END_CRITICAL_SECTION2();
// Decref for the dictionary we incref'd in try_set_dict_inline_only_or_other_dict
// while the object was locked
decref_maybe_delay((PyObject *)prev_dict,
!clear && prev_dict != cur_dict);
if (err != 0) {
return err;
}
prev_dict = cur_dict;
}
if (prev_dict != NULL) {
// decref for the dictionary that we replaced
decref_maybe_delay((PyObject *)prev_dict, !clear);
}
return 0;
#else
PyDictObject *dict = _PyObject_GetManagedDict(obj);
if (dict == NULL) {
#ifdef Py_GIL_DISABLED
Py_BEGIN_CRITICAL_SECTION(obj);
dict = _PyObject_ManagedDictPointer(obj)->dict;
if (dict == NULL) {
set_dict_inline_values(obj, (PyDictObject *)new_dict);
}
Py_END_CRITICAL_SECTION();
if (dict == NULL) {
return 0;
}
#else
set_dict_inline_values(obj, (PyDictObject *)new_dict);
return 0;
}
if (_PyDict_DetachFromObject(dict, obj) == 0) {
_PyObject_ManagedDictPointer(obj)->dict = (PyDictObject *)Py_XNewRef(new_dict);
Py_DECREF(dict);
return 0;
}
assert(new_dict == NULL);
return -1;
#endif
}
Py_BEGIN_CRITICAL_SECTION2(dict, obj);
// We've locked dict, but the actual dict could have changed
// since we locked it.
dict = _PyObject_ManagedDictPointer(obj)->dict;
err = _PyDict_DetachFromObject(dict, obj);
assert(err == 0 || new_dict == NULL);
if (err == 0) {
FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
(PyDictObject *)Py_XNewRef(new_dict));
}
Py_END_CRITICAL_SECTION2();
if (err == 0) {
Py_XDECREF(dict);
}
}
else {
PyDictObject *dict;
@ -7144,17 +7239,22 @@ _PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
(PyDictObject *)Py_XNewRef(new_dict));
Py_END_CRITICAL_SECTION();
Py_XDECREF(dict);
decref_maybe_delay((PyObject *)dict, !clear);
}
assert(_PyObject_InlineValuesConsistencyCheck(obj));
return err;
}
int
_PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
{
return set_or_clear_managed_dict(obj, new_dict, false);
}
void
PyObject_ClearManagedDict(PyObject *obj)
{
if (_PyObject_SetManagedDict(obj, NULL) < 0) {
if (set_or_clear_managed_dict(obj, NULL, true) < 0) {
/* Must be out of memory */
assert(PyErr_Occurred() == PyExc_MemoryError);
PyErr_WriteUnraisable(NULL);

View File

@ -1093,10 +1093,24 @@ struct _mem_work_chunk {
};
static void
free_work_item(uintptr_t ptr)
free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state)
{
if (ptr & 0x01) {
PyObject_Free((char *)(ptr - 1));
PyObject *obj = (PyObject *)(ptr - 1);
#ifdef Py_GIL_DISABLED
if (cb == NULL) {
assert(!_PyInterpreterState_GET()->stoptheworld.world_stopped);
Py_DECREF(obj);
return;
}
Py_ssize_t refcount = _Py_ExplicitMergeRefcount(obj, -1);
if (refcount == 0) {
cb(obj, state);
}
#else
Py_DECREF(obj);
#endif
}
else {
PyMem_Free((void *)ptr);
@ -1107,7 +1121,7 @@ static void
free_delayed(uintptr_t ptr)
{
#ifndef Py_GIL_DISABLED
free_work_item(ptr);
free_work_item(ptr, NULL, NULL);
#else
PyInterpreterState *interp = _PyInterpreterState_GET();
if (_PyInterpreterState_GetFinalizing(interp) != NULL ||
@ -1115,7 +1129,8 @@ free_delayed(uintptr_t ptr)
{
// Free immediately during interpreter shutdown or if the world is
// stopped.
free_work_item(ptr);
assert(!interp->stoptheworld.world_stopped || !(ptr & 0x01));
free_work_item(ptr, NULL, NULL);
return;
}
@ -1142,7 +1157,8 @@ free_delayed(uintptr_t ptr)
if (buf == NULL) {
// failed to allocate a buffer, free immediately
_PyEval_StopTheWorld(tstate->base.interp);
free_work_item(ptr);
// TODO: Fix me
free_work_item(ptr, NULL, NULL);
_PyEval_StartTheWorld(tstate->base.interp);
return;
}
@ -1166,12 +1182,16 @@ _PyMem_FreeDelayed(void *ptr)
free_delayed((uintptr_t)ptr);
}
#ifdef Py_GIL_DISABLED
void
_PyObject_FreeDelayed(void *ptr)
_PyObject_XDecRefDelayed(PyObject *ptr)
{
assert(!((uintptr_t)ptr & 0x01));
free_delayed(((uintptr_t)ptr)|0x01);
if (ptr != NULL) {
free_delayed(((uintptr_t)ptr)|0x01);
}
}
#endif
static struct _mem_work_chunk *
work_queue_first(struct llist_node *head)
@ -1181,7 +1201,7 @@ work_queue_first(struct llist_node *head)
static void
process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr,
bool keep_empty)
bool keep_empty, delayed_dealloc_cb cb, void *state)
{
while (!llist_empty(head)) {
struct _mem_work_chunk *buf = work_queue_first(head);
@ -1192,7 +1212,7 @@ process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr,
return;
}
free_work_item(item->ptr);
free_work_item(item->ptr, cb, state);
buf->rd_idx++;
}
@ -1210,7 +1230,8 @@ process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr,
static void
process_interp_queue(struct _Py_mem_interp_free_queue *queue,
struct _qsbr_thread_state *qsbr)
struct _qsbr_thread_state *qsbr, delayed_dealloc_cb cb,
void *state)
{
if (!_Py_atomic_load_int_relaxed(&queue->has_work)) {
return;
@ -1218,7 +1239,7 @@ process_interp_queue(struct _Py_mem_interp_free_queue *queue,
// Try to acquire the lock, but don't block if it's already held.
if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) {
process_queue(&queue->head, qsbr, false);
process_queue(&queue->head, qsbr, false, cb, state);
int more_work = !llist_empty(&queue->head);
_Py_atomic_store_int_relaxed(&queue->has_work, more_work);
@ -1234,10 +1255,23 @@ _PyMem_ProcessDelayed(PyThreadState *tstate)
_PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
// Process thread-local work
process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true);
process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true, NULL, NULL);
// Process shared interpreter work
process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr);
process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr, NULL, NULL);
}
void
_PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate, delayed_dealloc_cb cb, void *state)
{
PyInterpreterState *interp = tstate->interp;
_PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
// Process thread-local work
process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true, cb, state);
// Process shared interpreter work
process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr, cb, state);
}
void
@ -1279,7 +1313,7 @@ _PyMem_FiniDelayed(PyInterpreterState *interp)
// Free the remaining items immediately. There should be no other
// threads accessing the memory at this point during shutdown.
struct _mem_work_item *item = &buf->array[buf->rd_idx];
free_work_item(item->ptr);
free_work_item(item->ptr, NULL, NULL);
buf->rd_idx++;
}

View File

@ -393,6 +393,23 @@ gc_visit_thread_stacks(PyInterpreterState *interp)
HEAD_UNLOCK(&_PyRuntime);
}
static void
queue_untracked_obj_decref(PyObject *op, struct collection_state *state)
{
if (!_PyObject_GC_IS_TRACKED(op)) {
// GC objects with zero refcount are handled subsequently by the
// GC as if they were cyclic trash, but we have to handle dead
// non-GC objects here. Add one to the refcount so that we can
// decref and deallocate the object once we start the world again.
op->ob_ref_shared += (1 << _Py_REF_SHARED_SHIFT);
#ifdef Py_REF_DEBUG
_Py_IncRefTotal(_PyThreadState_GET());
#endif
worklist_push(&state->objs_to_decref, op);
}
}
static void
merge_queued_objects(_PyThreadStateImpl *tstate, struct collection_state *state)
{
@ -404,22 +421,20 @@ merge_queued_objects(_PyThreadStateImpl *tstate, struct collection_state *state)
// Subtract one when merging because the queue had a reference.
Py_ssize_t refcount = merge_refcount(op, -1);
if (!_PyObject_GC_IS_TRACKED(op) && refcount == 0) {
// GC objects with zero refcount are handled subsequently by the
// GC as if they were cyclic trash, but we have to handle dead
// non-GC objects here. Add one to the refcount so that we can
// decref and deallocate the object once we start the world again.
op->ob_ref_shared += (1 << _Py_REF_SHARED_SHIFT);
#ifdef Py_REF_DEBUG
_Py_IncRefTotal(_PyThreadState_GET());
#endif
worklist_push(&state->objs_to_decref, op);
if (refcount == 0) {
queue_untracked_obj_decref(op, state);
}
}
}
static void
process_delayed_frees(PyInterpreterState *interp)
queue_freed_object(PyObject *obj, void *arg)
{
queue_untracked_obj_decref(obj, arg);
}
static void
process_delayed_frees(PyInterpreterState *interp, struct collection_state *state)
{
// While we are in a "stop the world" pause, we can observe the latest
// write sequence by advancing the write sequence immediately.
@ -438,7 +453,7 @@ process_delayed_frees(PyInterpreterState *interp)
}
HEAD_UNLOCK(&_PyRuntime);
_PyMem_ProcessDelayed((PyThreadState *)current_tstate);
_PyMem_ProcessDelayedNoDealloc((PyThreadState *)current_tstate, queue_freed_object, state);
}
// Subtract an incoming reference from the computed "gc_refs" refcount.
@ -1231,7 +1246,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
}
HEAD_UNLOCK(&_PyRuntime);
process_delayed_frees(interp);
process_delayed_frees(interp, state);
// Find unreachable objects
int err = deduce_unreachable_heap(interp, state);
@ -1910,13 +1925,7 @@ PyObject_GC_Del(void *op)
}
record_deallocation(_PyThreadState_GET());
PyObject *self = (PyObject *)op;
if (_PyObject_GC_IS_SHARED_INLINE(self)) {
_PyObject_FreeDelayed(((char *)op)-presize);
}
else {
PyObject_Free(((char *)op)-presize);
}
PyObject_Free(((char *)op)-presize);
}
int