From 4f04172c9287c507f1426e02ddfc432f1f3ade54 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 14 Nov 2023 13:51:00 +0100 Subject: [PATCH] gh-111262: Add PyDict_Pop() function (#112028) _PyDict_Pop_KnownHash(): remove the default value and the return type becomes an int. Co-authored-by: Stefan Behnel Co-authored-by: Antoine Pitrou --- Doc/c-api/dict.rst | 27 ++++ Doc/whatsnew/3.13.rst | 6 + Include/cpython/dictobject.h | 2 + Include/internal/pycore_dict.h | 6 +- Lib/test/test_capi/test_dict.py | 87 ++++++++++++ ...-11-10-10-21-38.gh-issue-111262.2utB5m.rst | 4 + Modules/_functoolsmodule.c | 28 ++-- Modules/_testcapi/dict.c | 87 +++++++++++- Modules/_threadmodule.c | 7 +- Modules/socketmodule.c | 10 +- Objects/dictobject.c | 125 +++++++++++++----- Objects/odictobject.c | 5 +- Objects/structseq.c | 12 +- Python/import.c | 4 +- Python/sysmodule.c | 4 +- 15 files changed, 338 insertions(+), 76 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2023-11-10-10-21-38.gh-issue-111262.2utB5m.rst diff --git a/Doc/c-api/dict.rst b/Doc/c-api/dict.rst index 8ee35191800..8471c98d044 100644 --- a/Doc/c-api/dict.rst +++ b/Doc/c-api/dict.rst @@ -173,6 +173,33 @@ Dictionary Objects .. versionadded:: 3.4 + +.. c:function:: int PyDict_Pop(PyObject *p, PyObject *key, PyObject **result) + + Remove *key* from dictionary *p* and optionally return the removed value. + Do not raise :exc:`KeyError` if the key missing. + + - If the key is present, set *\*result* to a new reference to the removed + value if *result* is not ``NULL``, and return ``1``. + - If the key is missing, set *\*result* to ``NULL`` if *result* is not + ``NULL``, and return ``0``. + - On error, raise an exception and return ``-1``. + + This is similar to :meth:`dict.pop`, but without the default value and + not raising :exc:`KeyError` if the key missing. + + .. versionadded:: 3.13 + + +.. c:function:: int PyDict_PopString(PyObject *p, const char *key, PyObject **result) + + Similar to :c:func:`PyDict_Pop`, but *key* is specified as a + :c:expr:`const char*` UTF-8 encoded bytes string, rather than a + :c:expr:`PyObject*`. + + .. versionadded:: 3.13 + + .. c:function:: PyObject* PyDict_Items(PyObject *p) Return a :c:type:`PyListObject` containing all the items from the dictionary. diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 81e133bb545..136fe901ce3 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1175,6 +1175,12 @@ New Features Python ``list.extend()`` and ``list.clear()`` methods. (Contributed by Victor Stinner in :gh:`111138`.) +* Add :c:func:`PyDict_Pop` and :c:func:`PyDict_PopString` functions: remove a + key from a dictionary and optionally return the removed value. This is + similar to :meth:`dict.pop`, but without the default value and not raising + :exc:`KeyError` if the key missing. + (Contributed by Stefan Behnel and Victor Stinner in :gh:`111262`.) + Porting to Python 3.13 ---------------------- diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h index 64a40425ba7..64753849d32 100644 --- a/Include/cpython/dictobject.h +++ b/Include/cpython/dictobject.h @@ -46,6 +46,8 @@ static inline Py_ssize_t PyDict_GET_SIZE(PyObject *op) { PyAPI_FUNC(int) PyDict_ContainsString(PyObject *mp, const char *key); +PyAPI_FUNC(int) PyDict_Pop(PyObject *dict, PyObject *key, PyObject **result); +PyAPI_FUNC(int) PyDict_PopString(PyObject *dict, const char *key, PyObject **result); PyAPI_FUNC(PyObject *) _PyDict_Pop(PyObject *dict, PyObject *key, PyObject *default_value); /* Dictionary watchers */ diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index d01ef55de51..89f30a452c0 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -116,7 +116,11 @@ extern PyObject *_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *); extern int _PyDict_SetItem_Take2(PyDictObject *op, PyObject *key, PyObject *value); extern int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value); -extern PyObject *_PyDict_Pop_KnownHash(PyObject *, PyObject *, Py_hash_t, PyObject *); +extern int _PyDict_Pop_KnownHash( + PyDictObject *dict, + PyObject *key, + Py_hash_t hash, + PyObject **result); #define DKIX_EMPTY (-1) #define DKIX_DUMMY (-2) /* Used internally */ diff --git a/Lib/test/test_capi/test_dict.py b/Lib/test/test_capi/test_dict.py index 67f12a56313..57a7238588e 100644 --- a/Lib/test/test_capi/test_dict.py +++ b/Lib/test/test_capi/test_dict.py @@ -432,6 +432,93 @@ class CAPITest(unittest.TestCase): # CRASHES mergefromseq2({}, NULL, 0) # CRASHES mergefromseq2(NULL, {}, 0) + def test_dict_pop(self): + # Test PyDict_Pop() + dict_pop = _testcapi.dict_pop + dict_pop_null = _testcapi.dict_pop_null + + # key present, get removed value + mydict = {"key": "value", "key2": "value2"} + self.assertEqual(dict_pop(mydict, "key"), (1, "value")) + self.assertEqual(mydict, {"key2": "value2"}) + self.assertEqual(dict_pop(mydict, "key2"), (1, "value2")) + self.assertEqual(mydict, {}) + + # key present, ignore removed value + mydict = {"key": "value", "key2": "value2"} + self.assertEqual(dict_pop_null(mydict, "key"), 1) + self.assertEqual(mydict, {"key2": "value2"}) + self.assertEqual(dict_pop_null(mydict, "key2"), 1) + self.assertEqual(mydict, {}) + + # key missing, expect removed value; empty dict has a fast path + self.assertEqual(dict_pop({}, "key"), (0, NULL)) + self.assertEqual(dict_pop({"a": 1}, "key"), (0, NULL)) + + # key missing, ignored removed value; empty dict has a fast path + self.assertEqual(dict_pop_null({}, "key"), 0) + self.assertEqual(dict_pop_null({"a": 1}, "key"), 0) + + # dict error + not_dict = UserDict({1: 2}) + self.assertRaises(SystemError, dict_pop, not_dict, "key") + self.assertRaises(SystemError, dict_pop_null, not_dict, "key") + + # key error; don't hash key if dict is empty + not_hashable_key = ["list"] + self.assertEqual(dict_pop({}, not_hashable_key), (0, NULL)) + with self.assertRaises(TypeError): + dict_pop({'key': 1}, not_hashable_key) + dict_pop({}, NULL) # key is not checked if dict is empty + + # CRASHES dict_pop(NULL, "key") + # CRASHES dict_pop({"a": 1}, NULL) + + def test_dict_popstring(self): + # Test PyDict_PopString() + dict_popstring = _testcapi.dict_popstring + dict_popstring_null = _testcapi.dict_popstring_null + + # key present, get removed value + mydict = {"key": "value", "key2": "value2"} + self.assertEqual(dict_popstring(mydict, "key"), (1, "value")) + self.assertEqual(mydict, {"key2": "value2"}) + self.assertEqual(dict_popstring(mydict, "key2"), (1, "value2")) + self.assertEqual(mydict, {}) + + # key present, ignore removed value + mydict = {"key": "value", "key2": "value2"} + self.assertEqual(dict_popstring_null(mydict, "key"), 1) + self.assertEqual(mydict, {"key2": "value2"}) + self.assertEqual(dict_popstring_null(mydict, "key2"), 1) + self.assertEqual(mydict, {}) + + # key missing; empty dict has a fast path + self.assertEqual(dict_popstring({}, "key"), (0, NULL)) + self.assertEqual(dict_popstring_null({}, "key"), 0) + self.assertEqual(dict_popstring({"a": 1}, "key"), (0, NULL)) + self.assertEqual(dict_popstring_null({"a": 1}, "key"), 0) + + # non-ASCII key + non_ascii = '\U0001f40d' + dct = {'\U0001f40d': 123} + self.assertEqual(dict_popstring(dct, '\U0001f40d'.encode()), (1, 123)) + dct = {'\U0001f40d': 123} + self.assertEqual(dict_popstring_null(dct, '\U0001f40d'.encode()), 1) + + # dict error + not_dict = UserDict({1: 2}) + self.assertRaises(SystemError, dict_popstring, not_dict, "key") + self.assertRaises(SystemError, dict_popstring_null, not_dict, "key") + + # key error + self.assertRaises(UnicodeDecodeError, dict_popstring, {1: 2}, INVALID_UTF8) + self.assertRaises(UnicodeDecodeError, dict_popstring_null, {1: 2}, INVALID_UTF8) + + # CRASHES dict_popstring(NULL, "key") + # CRASHES dict_popstring({}, NULL) + # CRASHES dict_popstring({"a": 1}, NULL) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/C API/2023-11-10-10-21-38.gh-issue-111262.2utB5m.rst b/Misc/NEWS.d/next/C API/2023-11-10-10-21-38.gh-issue-111262.2utB5m.rst new file mode 100644 index 00000000000..d432b7ebd3a --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-11-10-10-21-38.gh-issue-111262.2utB5m.rst @@ -0,0 +1,4 @@ +Add :c:func:`PyDict_Pop` and :c:func:`PyDict_PopString` functions: remove a key +from a dictionary and optionally return the removed value. This is similar to +:meth:`dict.pop`, but without the default value and not raising :exc:`KeyError` +if the key missing. Patch by Stefan Behnel and Victor Stinner. diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c index 8ea493ad9ab..ca440e4c70c 100644 --- a/Modules/_functoolsmodule.c +++ b/Modules/_functoolsmodule.c @@ -1087,19 +1087,9 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds The cache dict holds one reference to the link. We created one other reference when the link was created. The linked list only has borrowed references. */ - popresult = _PyDict_Pop_KnownHash(self->cache, link->key, - link->hash, Py_None); - if (popresult == Py_None) { - /* Getting here means that the user function call or another - thread has already removed the old key from the dictionary. - This link is now an orphan. Since we don't want to leave the - cache in an inconsistent state, we don't restore the link. */ - Py_DECREF(popresult); - Py_DECREF(link); - Py_DECREF(key); - return result; - } - if (popresult == NULL) { + int res = _PyDict_Pop_KnownHash((PyDictObject*)self->cache, link->key, + link->hash, &popresult); + if (res < 0) { /* An error arose while trying to remove the oldest key (the one being evicted) from the cache. We restore the link to its original position as the oldest link. Then we allow the @@ -1110,10 +1100,22 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds Py_DECREF(result); return NULL; } + if (res == 0) { + /* Getting here means that the user function call or another + thread has already removed the old key from the dictionary. + This link is now an orphan. Since we don't want to leave the + cache in an inconsistent state, we don't restore the link. */ + assert(popresult == NULL); + Py_DECREF(link); + Py_DECREF(key); + return result; + } + /* Keep a reference to the old key and old result to prevent their ref counts from going to zero during the update. That will prevent potentially arbitrary object clean-up code (i.e. __del__) from running while we're still adjusting the links. */ + assert(popresult != NULL); oldkey = link->key; oldresult = link->result; diff --git a/Modules/_testcapi/dict.c b/Modules/_testcapi/dict.c index 5f6a1a037dc..42e056b7d07 100644 --- a/Modules/_testcapi/dict.c +++ b/Modules/_testcapi/dict.c @@ -331,6 +331,88 @@ dict_mergefromseq2(PyObject *self, PyObject *args) } +static PyObject * +dict_pop(PyObject *self, PyObject *args) +{ + // Test PyDict_Pop(dict, key, &value) + PyObject *dict, *key; + if (!PyArg_ParseTuple(args, "OO", &dict, &key)) { + return NULL; + } + NULLABLE(dict); + NULLABLE(key); + PyObject *result = UNINITIALIZED_PTR; + int res = PyDict_Pop(dict, key, &result); + if (res < 0) { + assert(result == NULL); + return NULL; + } + if (res == 0) { + assert(result == NULL); + result = Py_NewRef(Py_None); + } + else { + assert(result != NULL); + } + return Py_BuildValue("iN", res, result); +} + + +static PyObject * +dict_pop_null(PyObject *self, PyObject *args) +{ + // Test PyDict_Pop(dict, key, NULL) + PyObject *dict, *key; + if (!PyArg_ParseTuple(args, "OO", &dict, &key)) { + return NULL; + } + NULLABLE(dict); + NULLABLE(key); + RETURN_INT(PyDict_Pop(dict, key, NULL)); +} + + +static PyObject * +dict_popstring(PyObject *self, PyObject *args) +{ + PyObject *dict; + const char *key; + Py_ssize_t key_size; + if (!PyArg_ParseTuple(args, "Oz#", &dict, &key, &key_size)) { + return NULL; + } + NULLABLE(dict); + PyObject *result = UNINITIALIZED_PTR; + int res = PyDict_PopString(dict, key, &result); + if (res < 0) { + assert(result == NULL); + return NULL; + } + if (res == 0) { + assert(result == NULL); + result = Py_NewRef(Py_None); + } + else { + assert(result != NULL); + } + return Py_BuildValue("iN", res, result); +} + + +static PyObject * +dict_popstring_null(PyObject *self, PyObject *args) +{ + PyObject *dict; + const char *key; + Py_ssize_t key_size; + if (!PyArg_ParseTuple(args, "Oz#", &dict, &key, &key_size)) { + return NULL; + } + NULLABLE(dict); + RETURN_INT(PyDict_PopString(dict, key, NULL)); +} + + static PyMethodDef test_methods[] = { {"dict_check", dict_check, METH_O}, {"dict_checkexact", dict_checkexact, METH_O}, @@ -358,7 +440,10 @@ static PyMethodDef test_methods[] = { {"dict_merge", dict_merge, METH_VARARGS}, {"dict_update", dict_update, METH_VARARGS}, {"dict_mergefromseq2", dict_mergefromseq2, METH_VARARGS}, - + {"dict_pop", dict_pop, METH_VARARGS}, + {"dict_pop_null", dict_pop_null, METH_VARARGS}, + {"dict_popstring", dict_popstring, METH_VARARGS}, + {"dict_popstring_null", dict_popstring_null, METH_VARARGS}, {NULL}, }; diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 88ca9032b5e..c608789b5fb 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -967,11 +967,8 @@ local_clear(localobject *self) HEAD_UNLOCK(runtime); while (tstate) { if (tstate->dict) { - PyObject *v = _PyDict_Pop(tstate->dict, self->key, Py_None); - if (v != NULL) { - Py_DECREF(v); - } - else { + if (PyDict_Pop(tstate->dict, self->key, NULL) < 0) { + // Silently ignore error PyErr_Clear(); } } diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c index 97f4d174666..2e1a0974ceb 100644 --- a/Modules/socketmodule.c +++ b/Modules/socketmodule.c @@ -392,16 +392,10 @@ remove_unusable_flags(PyObject *m) break; } else { - PyObject *flag_name = PyUnicode_FromString(win_runtime_flags[i].flag_name); - if (flag_name == NULL) { + if (PyDict_PopString(dict, win_runtime_flags[i].flag_name, + NULL) < 0) { return -1; } - PyObject *v = _PyDict_Pop(dict, flag_name, Py_None); - Py_DECREF(flag_name); - if (v == NULL) { - return -1; - } - Py_DECREF(v); } } return 0; diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 719d438897c..d3d16c59f0e 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2226,64 +2226,119 @@ PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue) return _PyDict_Next(op, ppos, pkey, pvalue, NULL); } -/* Internal version of dict.pop(). */ -PyObject * -_PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *deflt) -{ - Py_ssize_t ix; - PyObject *old_value; - PyDictObject *mp; - PyInterpreterState *interp = _PyInterpreterState_GET(); - assert(PyDict_Check(dict)); - mp = (PyDictObject *)dict; +/* Internal version of dict.pop(). */ +int +_PyDict_Pop_KnownHash(PyDictObject *mp, PyObject *key, Py_hash_t hash, + PyObject **result) +{ + assert(PyDict_Check(mp)); if (mp->ma_used == 0) { - if (deflt) { - return Py_NewRef(deflt); + if (result) { + *result = NULL; } - _PyErr_SetKeyError(key); - return NULL; + return 0; } - ix = _Py_dict_lookup(mp, key, hash, &old_value); - if (ix == DKIX_ERROR) - return NULL; + + PyObject *old_value; + Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &old_value); + if (ix == DKIX_ERROR) { + if (result) { + *result = NULL; + } + return -1; + } + if (ix == DKIX_EMPTY || old_value == NULL) { - if (deflt) { - return Py_NewRef(deflt); + if (result) { + *result = NULL; } - _PyErr_SetKeyError(key); - return NULL; + return 0; } + assert(old_value != NULL); + PyInterpreterState *interp = _PyInterpreterState_GET(); uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_DELETED, mp, key, NULL); delitem_common(mp, hash, ix, Py_NewRef(old_value), new_version); ASSERT_CONSISTENT(mp); - return old_value; + if (result) { + *result = old_value; + } + else { + Py_DECREF(old_value); + } + return 1; } -PyObject * -_PyDict_Pop(PyObject *dict, PyObject *key, PyObject *deflt) -{ - Py_hash_t hash; - if (((PyDictObject *)dict)->ma_used == 0) { - if (deflt) { - return Py_NewRef(deflt); +int +PyDict_Pop(PyObject *op, PyObject *key, PyObject **result) +{ + if (!PyDict_Check(op)) { + if (result) { + *result = NULL; + } + PyErr_BadInternalCall(); + return -1; + } + PyDictObject *dict = (PyDictObject *)op; + + if (dict->ma_used == 0) { + if (result) { + *result = NULL; + } + return 0; + } + + Py_hash_t hash; + if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) { + hash = PyObject_Hash(key); + if (hash == -1) { + if (result) { + *result = NULL; + } + return -1; + } + } + return _PyDict_Pop_KnownHash(dict, key, hash, result); +} + + +int +PyDict_PopString(PyObject *op, const char *key, PyObject **result) +{ + PyObject *key_obj = PyUnicode_FromString(key); + if (key_obj == NULL) { + if (result != NULL) { + *result = NULL; + } + return -1; + } + + int res = PyDict_Pop(op, key_obj, result); + Py_DECREF(key_obj); + return res; +} + + +PyObject * +_PyDict_Pop(PyObject *dict, PyObject *key, PyObject *default_value) +{ + PyObject *result; + if (PyDict_Pop(dict, key, &result) == 0) { + if (default_value != NULL) { + return Py_NewRef(default_value); } _PyErr_SetKeyError(key); return NULL; } - if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) { - hash = PyObject_Hash(key); - if (hash == -1) - return NULL; - } - return _PyDict_Pop_KnownHash(dict, key, hash, deflt); + return result; } + /* Internal version of dict.from_keys(). It is subclass-friendly. */ PyObject * _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value) diff --git a/Objects/odictobject.c b/Objects/odictobject.c index b99896319e0..b5280c39e1b 100644 --- a/Objects/odictobject.c +++ b/Objects/odictobject.c @@ -1049,7 +1049,10 @@ _odict_popkey_hash(PyObject *od, PyObject *key, PyObject *failobj, return NULL; } /* Now delete the value from the dict. */ - value = _PyDict_Pop_KnownHash(od, key, hash, failobj); + if (_PyDict_Pop_KnownHash((PyDictObject *)od, key, hash, + &value) == 0) { + value = Py_NewRef(failobj); + } } else if (value == NULL && !PyErr_Occurred()) { /* Apply the fallback value, if necessary. */ diff --git a/Objects/structseq.c b/Objects/structseq.c index db4aebebdd8..581d6ad2408 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -8,7 +8,6 @@ */ #include "Python.h" -#include "pycore_dict.h" // _PyDict_Pop() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_modsupport.h" // _PyArg_NoPositional() #include "pycore_object.h" // _PyObject_GC_TRACK() @@ -417,14 +416,13 @@ structseq_replace(PyStructSequence *self, PyObject *args, PyObject *kwargs) // We do not support types with unnamed fields, so we can iterate over // i >= n_visible_fields case without slicing with (i - n_unnamed_fields). for (i = 0; i < n_fields; ++i) { - PyObject *key = PyUnicode_FromString(Py_TYPE(self)->tp_members[i].name); - if (!key) { + PyObject *ob; + if (PyDict_PopString(kwargs, Py_TYPE(self)->tp_members[i].name, + &ob) < 0) { goto error; } - PyObject *ob = _PyDict_Pop(kwargs, key, self->ob_item[i]); - Py_DECREF(key); - if (!ob) { - goto error; + if (ob == NULL) { + ob = Py_NewRef(self->ob_item[i]); } result->ob_item[i] = ob; } diff --git a/Python/import.c b/Python/import.c index 12f586abc2e..f37393bbdc4 100644 --- a/Python/import.c +++ b/Python/import.c @@ -395,8 +395,8 @@ remove_module(PyThreadState *tstate, PyObject *name) PyObject *modules = MODULES(tstate->interp); if (PyDict_CheckExact(modules)) { - PyObject *mod = _PyDict_Pop(modules, name, Py_None); - Py_XDECREF(mod); + // Error is reported to the caller + (void)PyDict_Pop(modules, name, NULL); } else if (PyMapping_DelItem(modules, name) < 0) { if (_PyErr_ExceptionMatches(tstate, PyExc_KeyError)) { diff --git a/Python/sysmodule.c b/Python/sysmodule.c index e28523284f1..c17de44731b 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -125,11 +125,9 @@ sys_set_object(PyInterpreterState *interp, PyObject *key, PyObject *v) } PyObject *sd = interp->sysdict; if (v == NULL) { - v = _PyDict_Pop(sd, key, Py_None); - if (v == NULL) { + if (PyDict_Pop(sd, key, NULL) < 0) { return -1; } - Py_DECREF(v); return 0; } else {