gh-124153: Implement PyType_GetBaseByToken() and Py_tp_token slot (GH-124163)

This commit is contained in:
neonene 2024-09-18 16:18:19 +09:00 committed by GitHub
parent 79a7410236
commit 646f16bdee
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 443 additions and 13 deletions

View File

@ -264,6 +264,24 @@ Type Objects
.. versionadded:: 3.11
.. c:function:: int PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result)
Find the first superclass in *type*'s :term:`method resolution order` whose
:c:macro:`Py_tp_token` token is equal to the given one.
* If found, set *\*result* to a new :term:`strong reference`
to it and return ``1``.
* If not found, set *\*result* to ``NULL`` and return ``0``.
* On error, set *\*result* to ``NULL`` and return ``-1`` with an
exception set.
The *result* argument may be ``NULL``, in which case *\*result* is not set.
Use this if you need only the return value.
The *token* argument may not be ``NULL``.
.. versionadded:: 3.14
.. c:function:: int PyUnstable_Type_AssignVersionTag(PyTypeObject *type)
Attempt to assign a version tag to the given type.
@ -488,6 +506,11 @@ The following functions and structs are used to create
* ``Py_nb_add`` to set :c:member:`PyNumberMethods.nb_add`
* ``Py_sq_length`` to set :c:member:`PySequenceMethods.sq_length`
An additional slot is supported that does not correspond to a
:c:type:`!PyTypeObject` struct field:
* :c:data:`Py_tp_token`
The following “offset” fields cannot be set using :c:type:`PyType_Slot`:
* :c:member:`~PyTypeObject.tp_weaklistoffset`
@ -538,4 +561,47 @@ The following functions and structs are used to create
The desired value of the slot. In most cases, this is a pointer
to a function.
Slots other than ``Py_tp_doc`` may not be ``NULL``.
*pfunc* values may not be ``NULL``, except for the following slots:
* ``Py_tp_doc``
* :c:data:`Py_tp_token` (for clarity, prefer :c:data:`Py_TP_USE_SPEC`
rather than ``NULL``)
.. c:macro:: Py_tp_token
A :c:member:`~PyType_Slot.slot` that records a static memory layout ID
for a class.
If the :c:type:`PyType_Spec` of the class is statically
allocated, the token can be set to the spec using the special value
:c:data:`Py_TP_USE_SPEC`:
.. code-block:: c
static PyType_Slot foo_slots[] = {
{Py_tp_token, Py_TP_USE_SPEC},
It can also be set to an arbitrary pointer, but you must ensure that:
* The pointer outlives the class, so it's not reused for something else
while the class exists.
* It "belongs" to the extension module where the class lives, so it will not
clash with other extensions.
Use :c:func:`PyType_GetBaseByToken` to check if a class's superclass has
a given token -- that is, check whether the memory layout is compatible.
To get the token for a given class (without considering superclasses),
use :c:func:`PyType_GetSlot` with ``Py_tp_token``.
.. versionadded:: 3.14
.. c:namespace:: NULL
.. c:macro:: Py_TP_USE_SPEC
Used as a value with :c:data:`Py_tp_token` to set the token to the
class's :c:type:`PyType_Spec`.
Expands to ``NULL``.
.. versionadded:: 3.14

View File

@ -690,6 +690,7 @@ func,PyType_FromSpec,3.2,,
func,PyType_FromSpecWithBases,3.3,,
func,PyType_GenericAlloc,3.2,,
func,PyType_GenericNew,3.2,,
func,PyType_GetBaseByToken,3.14,,
func,PyType_GetFlags,3.2,,
func,PyType_GetFullyQualifiedName,3.13,,
func,PyType_GetModule,3.10,,

View File

@ -554,6 +554,11 @@ New Features
(Contributed by Victor Stinner in :gh:`107954`.)
* Add :c:func:`PyType_GetBaseByToken` and :c:data:`Py_tp_token` slot for easier
superclass identification, which attempts to resolve the `type checking issue
<https://peps.python.org/pep-0630/#type-checking>`__ mentioned in :pep:`630`
(:gh:`124153`).
Porting to Python 3.14
----------------------

View File

@ -269,6 +269,7 @@ typedef struct _heaptypeobject {
struct _dictkeysobject *ht_cached_keys;
PyObject *ht_module;
char *_ht_tpname; // Storage for "tp_name"; see PyType_FromModuleAndSpec
void *ht_token; // Storage for the "Py_tp_token" slot
struct _specialization_cache _spec_cache; // For use by the specializer.
#ifdef Py_GIL_DISABLED
Py_ssize_t unique_id; // ID used for thread-local refcounting

View File

@ -391,6 +391,10 @@ PyAPI_FUNC(PyObject *) PyType_FromMetaclass(PyTypeObject*, PyObject*, PyType_Spe
PyAPI_FUNC(void *) PyObject_GetTypeData(PyObject *obj, PyTypeObject *cls);
PyAPI_FUNC(Py_ssize_t) PyType_GetTypeDataSize(PyTypeObject *cls);
#endif
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030E0000
PyAPI_FUNC(int) PyType_GetBaseByToken(PyTypeObject *, void *, PyTypeObject **);
#define Py_TP_USE_SPEC NULL
#endif
/* Generic type check */
PyAPI_FUNC(int) PyType_IsSubtype(PyTypeObject *, PyTypeObject *);

View File

@ -90,3 +90,7 @@
/* New in 3.14 */
#define Py_tp_vectorcall 82
#endif
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030E0000
/* New in 3.14 */
#define Py_tp_token 83
#endif

View File

@ -1144,6 +1144,77 @@ class CAPITest(unittest.TestCase):
MyType.__module__ = 123
self.assertEqual(get_type_fullyqualname(MyType), 'my_qualname')
def test_get_base_by_token(self):
def get_base_by_token(src, key, comparable=True):
def run(use_mro):
find_first = _testcapi.pytype_getbasebytoken
ret1, result = find_first(src, key, use_mro, True)
ret2, no_result = find_first(src, key, use_mro, False)
self.assertIn(ret1, (0, 1))
self.assertEqual(ret1, result is not None)
self.assertEqual(ret1, ret2)
self.assertIsNone(no_result)
return result
found_in_mro = run(True)
found_in_bases = run(False)
if comparable:
self.assertIs(found_in_mro, found_in_bases)
return found_in_mro
return found_in_mro, found_in_bases
create_type = _testcapi.create_type_with_token
get_token = _testcapi.get_tp_token
Py_TP_USE_SPEC = _testcapi.Py_TP_USE_SPEC
self.assertEqual(Py_TP_USE_SPEC, 0)
A1 = create_type('_testcapi.A1', Py_TP_USE_SPEC)
self.assertTrue(get_token(A1) != Py_TP_USE_SPEC)
B1 = create_type('_testcapi.B1', id(self))
self.assertTrue(get_token(B1) == id(self))
tokenA1 = get_token(A1)
# find A1 from A1
found = get_base_by_token(A1, tokenA1)
self.assertIs(found, A1)
# no token in static types
STATIC = type(1)
self.assertEqual(get_token(STATIC), 0)
found = get_base_by_token(STATIC, tokenA1)
self.assertIs(found, None)
# no token in pure subtypes
class A2(A1): pass
self.assertEqual(get_token(A2), 0)
# find A1
class Z(STATIC, B1, A2): pass
found = get_base_by_token(Z, tokenA1)
self.assertIs(found, A1)
# searching for NULL token is an error
with self.assertRaises(SystemError):
get_base_by_token(Z, 0)
with self.assertRaises(SystemError):
get_base_by_token(STATIC, 0)
# share the token with A1
C1 = create_type('_testcapi.C1', tokenA1)
self.assertTrue(get_token(C1) == tokenA1)
# find C1 first by shared token
class Z(C1, A2): pass
found = get_base_by_token(Z, tokenA1)
self.assertIs(found, C1)
# B1 not found
found = get_base_by_token(Z, get_token(B1))
self.assertIs(found, None)
with self.assertRaises(TypeError):
_testcapi.pytype_getbasebytoken(
'not a type', id(self), True, False)
def test_gen_get_code(self):
def genf(): yield

View File

@ -719,6 +719,7 @@ SYMBOL_NAMES = (
"PyType_FromSpecWithBases",
"PyType_GenericAlloc",
"PyType_GenericNew",
"PyType_GetBaseByToken",
"PyType_GetFlags",
"PyType_GetFullyQualifiedName",
"PyType_GetModule",

View File

@ -1718,7 +1718,7 @@ class SizeofTest(unittest.TestCase):
'3P' # PyMappingMethods
'10P' # PySequenceMethods
'2P' # PyBufferProcs
'6P'
'7P'
'1PIP' # Specializer cache
+ typeid # heap type id (free-threaded only)
)

View File

@ -0,0 +1,2 @@
Add :c:func:`PyType_GetBaseByToken` and :c:data:`Py_tp_token` slot for easier
type checking, related to :pep:`489` and :pep:`630`.

View File

@ -2527,4 +2527,10 @@
[function.PyLong_AsUInt64]
added = '3.14'
[const.Py_tp_vectorcall]
added = '3.14'
added = '3.14'
[function.PyType_GetBaseByToken]
added = '3.14'
[const.Py_tp_token]
added = '3.14'
[const.Py_TP_USE_SPEC]
added = '3.14'

View File

@ -500,7 +500,7 @@ CType_Type_dealloc(PyObject *self)
{
StgInfo *info = _PyStgInfo_FromType_NoState(self);
if (!info) {
PyErr_WriteUnraisable(self);
PyErr_WriteUnraisable(NULL); // NULL avoids segfault here
}
if (info) {
PyMem_Free(info->ffi_type_pointer.elements);
@ -560,6 +560,7 @@ static PyMethodDef ctype_methods[] = {
};
static PyType_Slot ctype_type_slots[] = {
{Py_tp_token, Py_TP_USE_SPEC},
{Py_tp_traverse, CType_Type_traverse},
{Py_tp_clear, CType_Type_clear},
{Py_tp_dealloc, CType_Type_dealloc},
@ -569,7 +570,7 @@ static PyType_Slot ctype_type_slots[] = {
{0, NULL},
};
static PyType_Spec pyctype_type_spec = {
PyType_Spec pyctype_type_spec = {
.name = "_ctypes.CType_Type",
.basicsize = -(Py_ssize_t)sizeof(StgInfo),
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE |

View File

@ -108,6 +108,7 @@ get_module_state_by_def(PyTypeObject *cls)
}
extern PyType_Spec pyctype_type_spec;
extern PyType_Spec carg_spec;
extern PyType_Spec cfield_spec;
extern PyType_Spec cthunk_spec;
@ -490,16 +491,23 @@ PyStgInfo_FromAny(ctypes_state *state, PyObject *obj, StgInfo **result)
/* A variant of PyStgInfo_FromType that doesn't need the state,
* so it can be called from finalization functions when the module
* state is torn down. Does no checks; cannot fail.
* This inlines the current implementation PyObject_GetTypeData,
* so it might break in the future.
* state is torn down.
*/
static inline StgInfo *
_PyStgInfo_FromType_NoState(PyObject *type)
{
size_t type_basicsize =_Py_SIZE_ROUND_UP(PyType_Type.tp_basicsize,
ALIGNOF_MAX_ALIGN_T);
return (StgInfo *)((char *)type + type_basicsize);
PyTypeObject *PyCType_Type;
if (PyType_GetBaseByToken(Py_TYPE(type), &pyctype_type_spec, &PyCType_Type) < 0) {
return NULL;
}
if (PyCType_Type == NULL) {
PyErr_Format(PyExc_TypeError, "expected a ctypes type, got '%N'", type);
return NULL;
}
StgInfo *info = PyObject_GetTypeData(type, PyCType_Type);
Py_DECREF(PyCType_Type);
return info;
}
// Initialize StgInfo on a newly created type

View File

@ -410,6 +410,118 @@ pyobject_getitemdata(PyObject *self, PyObject *o)
}
static PyObject *
create_type_with_token(PyObject *module, PyObject *args)
{
const char *name;
PyObject *py_token;
if (!PyArg_ParseTuple(args, "sO", &name, &py_token)) {
return NULL;
}
void *token = PyLong_AsVoidPtr(py_token);
if (token == Py_TP_USE_SPEC) {
// Py_TP_USE_SPEC requires the spec that at least outlives the class
static PyType_Slot slots[] = {
{Py_tp_token, Py_TP_USE_SPEC},
{0},
};
static PyType_Spec spec = {
.name = "_testcapi.DefaultTokenTest",
.flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
.slots = slots,
};
PyObject *type = PyType_FromMetaclass(NULL, NULL, &spec, NULL);
if (!type) {
return NULL;
}
token = PyType_GetSlot((PyTypeObject *)type, Py_tp_token);
assert(!PyErr_Occurred());
Py_DECREF(type);
if (token != &spec) {
PyErr_SetString(PyExc_AssertionError,
"failed to convert token from Py_TP_USE_SPEC");
return NULL;
}
}
// Test non-NULL token that must also outlive the class
PyType_Slot slots[] = {
{Py_tp_token, token},
{0},
};
PyType_Spec spec = {
.name = name,
.flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
.slots = slots,
};
return PyType_FromMetaclass(NULL, module, &spec, NULL);
}
static PyObject *
get_tp_token(PyObject *self, PyObject *type)
{
void *token = PyType_GetSlot((PyTypeObject *)type, Py_tp_token);
if (PyErr_Occurred()) {
return NULL;
}
return PyLong_FromVoidPtr(token);
}
static PyObject *
pytype_getbasebytoken(PyObject *self, PyObject *args)
{
PyTypeObject *type;
PyObject *py_token, *use_mro, *need_result;
if (!PyArg_ParseTuple(args, "OOOO",
&type, &py_token, &use_mro, &need_result)) {
return NULL;
}
PyObject *mro_save = NULL;
if (use_mro != Py_True) {
// Test internal detail: PyType_GetBaseByToken works even with
// types that are only partially initialized (or torn down):
// if tp_mro=NULL we fall back to tp_bases.
assert(PyType_Check(type));
mro_save = type->tp_mro;
type->tp_mro = NULL;
}
void *token = PyLong_AsVoidPtr(py_token);
PyObject *result;
int ret;
if (need_result == Py_True) {
ret = PyType_GetBaseByToken(type, token, (PyTypeObject **)&result);
}
else {
result = NULL;
ret = PyType_GetBaseByToken(type, token, NULL);
}
if (use_mro != Py_True) {
type->tp_mro = mro_save;
}
if (ret < 0) {
assert(result == NULL);
return NULL;
}
PyObject *py_ret = PyLong_FromLong(ret);
if (py_ret == NULL) {
goto error;
}
PyObject *tuple = PyTuple_New(2);
if (tuple == NULL) {
goto error;
}
PyTuple_SET_ITEM(tuple, 0, py_ret);
PyTuple_SET_ITEM(tuple, 1, result ? result : Py_None);
return tuple;
error:
Py_XDECREF(py_ret);
Py_XDECREF(result);
return NULL;
}
static PyMethodDef TestMethods[] = {
{"pytype_fromspec_meta", pytype_fromspec_meta, METH_O},
{"test_type_from_ephemeral_spec", test_type_from_ephemeral_spec, METH_NOARGS},
@ -423,6 +535,9 @@ static PyMethodDef TestMethods[] = {
{"make_immutable_type_with_base", make_immutable_type_with_base, METH_O},
{"make_type_with_base", make_type_with_base, METH_O},
{"pyobject_getitemdata", pyobject_getitemdata, METH_O},
{"create_type_with_token", create_type_with_token, METH_VARARGS},
{"get_tp_token", get_tp_token, METH_O},
{"pytype_getbasebytoken", pytype_getbasebytoken, METH_VARARGS},
{NULL},
};
@ -1287,6 +1402,8 @@ _PyTestCapi_Init_Heaptype(PyObject *m) {
&PyType_Type, m, &HeapCTypeMetaclassNullNew_spec, (PyObject *) &PyType_Type);
ADD("HeapCTypeMetaclassNullNew", HeapCTypeMetaclassNullNew);
ADD("Py_TP_USE_SPEC", PyLong_FromVoidPtr(Py_TP_USE_SPEC));
PyObject *HeapCCollection = PyType_FromMetaclass(
NULL, m, &HeapCCollection_spec, NULL);
if (HeapCCollection == NULL) {

View File

@ -3926,6 +3926,7 @@ type_new_alloc(type_new_ctx *ctx)
et->ht_name = Py_NewRef(ctx->name);
et->ht_module = NULL;
et->_ht_tpname = NULL;
et->ht_token = NULL;
#ifdef Py_GIL_DISABLED
_PyType_AssignId(et);
@ -4984,6 +4985,11 @@ PyType_FromMetaclass(
}
}
break;
case Py_tp_token:
{
res->ht_token = slot->pfunc == Py_TP_USE_SPEC ? spec : slot->pfunc;
}
break;
default:
{
/* Copy other slots directly */
@ -5144,8 +5150,15 @@ PyType_GetSlot(PyTypeObject *type, int slot)
PyErr_BadInternalCall();
return NULL;
}
int slot_offset = pyslot_offsets[slot].slot_offset;
parent_slot = *(void**)((char*)type + pyslot_offsets[slot].slot_offset);
if (slot_offset >= (int)sizeof(PyTypeObject)) {
if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
return NULL;
}
}
parent_slot = *(void**)((char*)type + slot_offset);
if (parent_slot == NULL) {
return NULL;
}
@ -5274,6 +5287,129 @@ _PyType_GetModuleByDef2(PyTypeObject *left, PyTypeObject *right,
return module;
}
static PyTypeObject *
get_base_by_token_recursive(PyTypeObject *type, void *token)
{
assert(PyType_GetSlot(type, Py_tp_token) != token);
PyObject *bases = lookup_tp_bases(type);
assert(bases != NULL);
Py_ssize_t n = PyTuple_GET_SIZE(bases);
for (Py_ssize_t i = 0; i < n; i++) {
PyTypeObject *base = _PyType_CAST(PyTuple_GET_ITEM(bases, i));
if (!_PyType_HasFeature(base, Py_TPFLAGS_HEAPTYPE)) {
continue;
}
if (((PyHeapTypeObject*)base)->ht_token == token) {
return base;
}
base = get_base_by_token_recursive(base, token);
if (base != NULL) {
return base;
}
}
return NULL;
}
static inline PyTypeObject *
get_base_by_token_from_mro(PyTypeObject *type, void *token)
{
// Bypass lookup_tp_mro() as PyType_IsSubtype() does
PyObject *mro = type->tp_mro;
assert(mro != NULL);
assert(PyTuple_Check(mro));
// mro_invoke() ensures that the type MRO cannot be empty.
assert(PyTuple_GET_SIZE(mro) >= 1);
// Also, the first item in the MRO is the type itself, which is supposed
// to be already checked by the caller. We skip it in the loop.
assert(PyTuple_GET_ITEM(mro, 0) == (PyObject *)type);
assert(PyType_GetSlot(type, Py_tp_token) != token);
Py_ssize_t n = PyTuple_GET_SIZE(mro);
for (Py_ssize_t i = 1; i < n; i++) {
PyTypeObject *base = _PyType_CAST(PyTuple_GET_ITEM(mro, i));
if (!_PyType_HasFeature(base, Py_TPFLAGS_HEAPTYPE)) {
continue;
}
if (((PyHeapTypeObject*)base)->ht_token == token) {
return base;
}
}
return NULL;
}
static int
check_base_by_token(PyTypeObject *type, void *token) {
// Chain the branches, which will be optimized exclusive here
if (token == NULL) {
PyErr_Format(PyExc_SystemError,
"PyType_GetBaseByToken called with token=NULL");
return -1;
}
else if (!PyType_Check(type)) {
PyErr_Format(PyExc_TypeError,
"expected a type, got a '%T' object", type);
return -1;
}
else if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
return 0;
}
else if (((PyHeapTypeObject*)type)->ht_token == token) {
return 1;
}
else if (type->tp_mro != NULL) {
// This will not be inlined
return get_base_by_token_from_mro(type, token) ? 1 : 0;
}
else {
return get_base_by_token_recursive(type, token) ? 1 : 0;
}
}
int
PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result)
{
if (result == NULL) {
// If the `result` is checked only once here, the subsequent
// branches will become trivial to optimize.
return check_base_by_token(type, token);
}
if (token == NULL || !PyType_Check(type)) {
*result = NULL;
return check_base_by_token(type, token);
}
// Chain the branches, which will be optimized exclusive here
PyTypeObject *base;
if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
// No static type has a heaptype superclass,
// which is ensured by type_ready_mro().
*result = NULL;
return 0;
}
else if (((PyHeapTypeObject*)type)->ht_token == token) {
*result = (PyTypeObject *)Py_NewRef(type);
return 1;
}
else if (type->tp_mro != NULL) {
// Expect this to be inlined
base = get_base_by_token_from_mro(type, token);
}
else {
base = get_base_by_token_recursive(type, token);
}
if (base != NULL) {
*result = (PyTypeObject *)Py_NewRef(base);
return 1;
}
else {
*result = NULL;
return 0;
}
}
void *
PyObject_GetTypeData(PyObject *obj, PyTypeObject *cls)
{
@ -5966,6 +6102,7 @@ type_dealloc(PyObject *self)
#ifdef Py_GIL_DISABLED
_PyType_ReleaseId(et);
#endif
et->ht_token = NULL;
Py_TYPE(type)->tp_free((PyObject *)type);
}

1
Objects/typeslots.inc generated
View File

@ -81,3 +81,4 @@
{-1, offsetof(PyTypeObject, tp_finalize)},
{offsetof(PyAsyncMethods, am_send), offsetof(PyTypeObject, tp_as_async)},
{-1, offsetof(PyTypeObject, tp_vectorcall)},
{-1, offsetof(PyHeapTypeObject, ht_token)},

View File

@ -13,7 +13,11 @@ def generate_typeslots(out=sys.stdout):
continue
member = m.group(1)
if member.startswith("tp_"):
if member == "tp_token":
# The heap type structure (ht_*) is an implementation detail;
# the public slot for it has a familiar `tp_` prefix
member = '{-1, offsetof(PyHeapTypeObject, ht_token)}'
elif member.startswith("tp_"):
member = f'{{-1, offsetof(PyTypeObject, {member})}}'
elif member.startswith("am_"):
member = (f'{{offsetof(PyAsyncMethods, {member}),'+

1
PC/python3dll.c generated
View File

@ -651,6 +651,7 @@ EXPORT_FUNC(PyType_FromSpec)
EXPORT_FUNC(PyType_FromSpecWithBases)
EXPORT_FUNC(PyType_GenericAlloc)
EXPORT_FUNC(PyType_GenericNew)
EXPORT_FUNC(PyType_GetBaseByToken)
EXPORT_FUNC(PyType_GetFlags)
EXPORT_FUNC(PyType_GetFullyQualifiedName)
EXPORT_FUNC(PyType_GetModule)