mirror of
https://github.com/python/cpython.git
synced 2025-01-21 07:55:16 +08:00
Issue #24000: Improved Argument Clinic's mapping of converters to legacy
"format units". Updated the documentation to match.
This commit is contained in:
parent
95283fb589
commit
38337d1e15
@ -758,6 +758,14 @@ All Argument Clinic converters accept the following arguments:
|
||||
In addition, some converters accept additional arguments. Here is a list
|
||||
of these arguments, along with their meanings:
|
||||
|
||||
``accept``
|
||||
A set of Python types (and possibly pseudo-types);
|
||||
this restricts the allowable Python argument to values of these types.
|
||||
(This is not a general-purpose facility; as a rule it only supports
|
||||
specific lists of types as shown in the legacy converter table.)
|
||||
|
||||
To accept ``None``, add ``NoneType`` to this set.
|
||||
|
||||
``bitwise``
|
||||
Only supported for unsigned integers. The native integer value of this
|
||||
Python argument will be written to the parameter without any range checking,
|
||||
@ -772,39 +780,27 @@ of these arguments, along with their meanings:
|
||||
Only supported for strings. Specifies the encoding to use when converting
|
||||
this string from a Python str (Unicode) value into a C ``char *`` value.
|
||||
|
||||
``length``
|
||||
Only supported for strings. If true, requests that the length of the
|
||||
string be passed in to the impl function, just after the string parameter,
|
||||
in a parameter named ``<parameter_name>_length``.
|
||||
|
||||
``nullable``
|
||||
Only supported for strings. If true, this parameter may also be set to
|
||||
``None``, in which case the C parameter will be set to ``NULL``.
|
||||
|
||||
``subclass_of``
|
||||
Only supported for the ``object`` converter. Requires that the Python
|
||||
value be a subclass of a Python type, as expressed in C.
|
||||
|
||||
``types``
|
||||
Only supported for the ``object`` (and ``self``) converter. Specifies
|
||||
``type``
|
||||
Only supported for the ``object`` and ``self`` converters. Specifies
|
||||
the C type that will be used to declare the variable. Default value is
|
||||
``"PyObject *"``.
|
||||
|
||||
``types``
|
||||
A string containing a list of Python types (and possibly pseudo-types);
|
||||
this restricts the allowable Python argument to values of these types.
|
||||
(This is not a general-purpose facility; as a rule it only supports
|
||||
specific lists of types as shown in the legacy converter table.)
|
||||
|
||||
``zeroes``
|
||||
Only supported for strings. If true, embedded NUL bytes (``'\\0'``) are
|
||||
permitted inside the value.
|
||||
permitted inside the value. The length of the string will be passed in
|
||||
to the impl function, just after the string parameter, as a parameter named
|
||||
``<parameter_name>_length``.
|
||||
|
||||
Please note, not every possible combination of arguments will work.
|
||||
Often these arguments are implemented internally by specific ``PyArg_ParseTuple``
|
||||
Usually these arguments are implemented by specific ``PyArg_ParseTuple``
|
||||
*format units*, with specific behavior. For example, currently you cannot
|
||||
call ``str`` and pass in ``zeroes=True`` without also specifying an ``encoding``;
|
||||
although it's perfectly reasonable to think this would work, these semantics don't
|
||||
call ``unsigned_short`` without also specifying ``bitwise=True``.
|
||||
Although it's perfectly reasonable to think this would work, these semantics don't
|
||||
map to any existing format unit. So Argument Clinic doesn't support it. (Or, at
|
||||
least, not yet.)
|
||||
|
||||
@ -816,13 +812,13 @@ on the right is the text you'd replace it with.
|
||||
``'B'`` ``unsigned_char(bitwise=True)``
|
||||
``'b'`` ``unsigned_char``
|
||||
``'c'`` ``char``
|
||||
``'C'`` ``int(types='str')``
|
||||
``'C'`` ``int(accept={str})``
|
||||
``'d'`` ``double``
|
||||
``'D'`` ``Py_complex``
|
||||
``'es#'`` ``str(encoding='name_of_encoding', length=True, zeroes=True)``
|
||||
``'es'`` ``str(encoding='name_of_encoding')``
|
||||
``'et#'`` ``str(encoding='name_of_encoding', types='bytes bytearray str', length=True)``
|
||||
``'et'`` ``str(encoding='name_of_encoding', types='bytes bytearray str')``
|
||||
``'es#'`` ``str(encoding='name_of_encoding', zeroes=True)``
|
||||
``'et'`` ``str(encoding='name_of_encoding', accept={bytes, bytearray, str})``
|
||||
``'et#'`` ``str(encoding='name_of_encoding', accept={bytes, bytearray, str}, zeroes=True)``
|
||||
``'f'`` ``float``
|
||||
``'h'`` ``short``
|
||||
``'H'`` ``unsigned_short(bitwise=True)``
|
||||
@ -832,27 +828,27 @@ on the right is the text you'd replace it with.
|
||||
``'K'`` ``unsigned_PY_LONG_LONG(bitwise=True)``
|
||||
``'L'`` ``PY_LONG_LONG``
|
||||
``'n'`` ``Py_ssize_t``
|
||||
``'O'`` ``object``
|
||||
``'O!'`` ``object(subclass_of='&PySomething_Type')``
|
||||
``'O&'`` ``object(converter='name_of_c_function')``
|
||||
``'O'`` ``object``
|
||||
``'p'`` ``bool``
|
||||
``'s#'`` ``str(length=True)``
|
||||
``'S'`` ``PyBytesObject``
|
||||
``'s'`` ``str``
|
||||
``'s*'`` ``Py_buffer(types='str bytes bytearray buffer')``
|
||||
``'u#'`` ``Py_UNICODE(length=True)``
|
||||
``'u'`` ``Py_UNICODE``
|
||||
``'s#'`` ``str(zeroes=True)``
|
||||
``'s*'`` ``Py_buffer(accept={buffer, str})``
|
||||
``'U'`` ``unicode``
|
||||
``'w*'`` ``Py_buffer(types='bytearray rwbuffer')``
|
||||
``'y#'`` ``str(types='bytes', length=True)``
|
||||
``'u'`` ``Py_UNICODE``
|
||||
``'u#'`` ``Py_UNICODE(zeroes=True)``
|
||||
``'w*'`` ``Py_buffer(accept={rwbuffer})``
|
||||
``'Y'`` ``PyByteArrayObject``
|
||||
``'y'`` ``str(types='bytes')``
|
||||
``'y'`` ``str(accept={bytes})``
|
||||
``'y#'`` ``str(accept={robuffer}, zeroes=True)``
|
||||
``'y*'`` ``Py_buffer``
|
||||
``'Z#'`` ``Py_UNICODE(nullable=True, length=True)``
|
||||
``'z#'`` ``str(nullable=True, length=True)``
|
||||
``'Z'`` ``Py_UNICODE(nullable=True)``
|
||||
``'z'`` ``str(nullable=True)``
|
||||
``'z*'`` ``Py_buffer(types='str bytes bytearray buffer', nullable=True)``
|
||||
``'Z'`` ``Py_UNICODE(accept={str, NoneType})``
|
||||
``'Z#'`` ``Py_UNICODE(accept={str, NoneType}, zeroes=True)``
|
||||
``'z'`` ``str(accept={str, NoneType})``
|
||||
``'z#'`` ``str(accept={str, NoneType}, zeroes=True)``
|
||||
``'z*'`` ``Py_buffer(accept={buffer, str, NoneType})``
|
||||
========= =================================================================================
|
||||
|
||||
As an example, here's our sample ``pickle.Pickler.dump`` using the proper
|
||||
|
@ -82,6 +82,9 @@ Documentation
|
||||
Tools/Demos
|
||||
-----------
|
||||
|
||||
- Issue #24000: Improved Argument Clinic's mapping of converters to legacy
|
||||
"format units". Updated the documentation to match.
|
||||
|
||||
- Issue #24001: Argument Clinic converters now use accept={type}
|
||||
instead of types={'type'} to specify the types the converter accepts.
|
||||
|
||||
|
@ -274,7 +274,7 @@ static PySequenceMethods dbm_as_sequence = {
|
||||
/*[clinic input]
|
||||
_dbm.dbm.get
|
||||
|
||||
key: str(accept={str, robuffer}, length=True)
|
||||
key: str(accept={str, robuffer}, zeroes=True)
|
||||
default: object(c_default="NULL") = b''
|
||||
/
|
||||
|
||||
@ -284,7 +284,8 @@ Return the value for key if present, otherwise default.
|
||||
static PyObject *
|
||||
_dbm_dbm_get_impl(dbmobject *self, const char *key,
|
||||
Py_ssize_clean_t key_length, PyObject *default_value)
|
||||
/*[clinic end generated code: output=b44f95eba8203d93 input=3c7c1afd9c508457]*/
|
||||
/*[clinic end generated code: output=b44f95eba8203d93 input=a3a279957f85eb6d]*/
|
||||
/*[clinic end generated code: output=4f5c0e523eaf1251 input=9402c0af8582dc69]*/
|
||||
{
|
||||
datum dbm_key, val;
|
||||
|
||||
@ -301,7 +302,7 @@ _dbm_dbm_get_impl(dbmobject *self, const char *key,
|
||||
|
||||
/*[clinic input]
|
||||
_dbm.dbm.setdefault
|
||||
key: str(accept={str, robuffer}, length=True)
|
||||
key: str(accept={str, robuffer}, zeroes=True)
|
||||
default: object(c_default="NULL") = b''
|
||||
/
|
||||
|
||||
@ -314,7 +315,7 @@ static PyObject *
|
||||
_dbm_dbm_setdefault_impl(dbmobject *self, const char *key,
|
||||
Py_ssize_clean_t key_length,
|
||||
PyObject *default_value)
|
||||
/*[clinic end generated code: output=52545886cf272161 input=a66fcb7f18ee2f50]*/
|
||||
/*[clinic end generated code: output=52545886cf272161 input=bf40c48edaca01d6]*/
|
||||
{
|
||||
datum dbm_key, val;
|
||||
Py_ssize_t tmp_size;
|
||||
|
@ -383,7 +383,7 @@ _gdbm_gdbm_firstkey_impl(dbmobject *self)
|
||||
/*[clinic input]
|
||||
_gdbm.gdbm.nextkey
|
||||
|
||||
key: str(accept={str, robuffer}, length=True)
|
||||
key: str(accept={str, robuffer}, zeroes=True)
|
||||
/
|
||||
|
||||
Returns the key that follows key in the traversal.
|
||||
@ -400,7 +400,7 @@ to create a list in memory that contains them all:
|
||||
static PyObject *
|
||||
_gdbm_gdbm_nextkey_impl(dbmobject *self, const char *key,
|
||||
Py_ssize_clean_t key_length)
|
||||
/*[clinic end generated code: output=192ab892de6eb2f6 input=1eb2ff9b4b0e6ffd]*/
|
||||
/*[clinic end generated code: output=192ab892de6eb2f6 input=1f1606943614e36f]*/
|
||||
{
|
||||
PyObject *v;
|
||||
datum dbm_key, nextkey;
|
||||
|
@ -1673,7 +1673,7 @@ array_array_tostring_impl(arrayobject *self)
|
||||
/*[clinic input]
|
||||
array.array.fromunicode
|
||||
|
||||
ustr: Py_UNICODE(length=True)
|
||||
ustr: Py_UNICODE(zeroes=True)
|
||||
/
|
||||
|
||||
Extends this array with data from the unicode string ustr.
|
||||
@ -1686,7 +1686,7 @@ some other type.
|
||||
static PyObject *
|
||||
array_array_fromunicode_impl(arrayobject *self, Py_UNICODE *ustr,
|
||||
Py_ssize_clean_t ustr_length)
|
||||
/*[clinic end generated code: output=ebb72fc16975e06d input=56bcedb5ef70139f]*/
|
||||
/*[clinic end generated code: output=ebb72fc16975e06d input=150f00566ffbca6e]*/
|
||||
{
|
||||
char typecode;
|
||||
|
||||
|
@ -1215,7 +1215,7 @@ unicodedata_UCD_name_impl(PyObject *self, int chr, PyObject *default_value)
|
||||
unicodedata.UCD.lookup
|
||||
|
||||
self: self
|
||||
name: str(accept={str, robuffer}, length=True)
|
||||
name: str(accept={str, robuffer}, zeroes=True)
|
||||
/
|
||||
|
||||
Look up character by name.
|
||||
@ -1227,7 +1227,7 @@ corresponding character. If not found, KeyError is raised.
|
||||
static PyObject *
|
||||
unicodedata_UCD_lookup_impl(PyObject *self, const char *name,
|
||||
Py_ssize_clean_t name_length)
|
||||
/*[clinic end generated code: output=765cb8186788e6be input=2dfe682c2491447a]*/
|
||||
/*[clinic end generated code: output=765cb8186788e6be input=a557be0f8607a0d6]*/
|
||||
{
|
||||
Py_UCS4 code;
|
||||
unsigned int index;
|
||||
|
@ -2644,64 +2644,85 @@ class buffer: pass
|
||||
class rwbuffer: pass
|
||||
class robuffer: pass
|
||||
|
||||
@add_legacy_c_converter('s#', accept={str, robuffer}, length=True)
|
||||
@add_legacy_c_converter('y', accept={robuffer})
|
||||
@add_legacy_c_converter('y#', accept={robuffer}, length=True)
|
||||
@add_legacy_c_converter('z', accept={str, NoneType})
|
||||
@add_legacy_c_converter('z#', accept={str, NoneType}, length=True)
|
||||
# add_legacy_c_converter not supported for es, es#, et, et#
|
||||
# because of their extra encoding argument
|
||||
def str_converter_key(types, encoding, zeroes):
|
||||
return (frozenset(types), bool(encoding), bool(zeroes))
|
||||
|
||||
str_converter_argument_map = {}
|
||||
|
||||
class str_converter(CConverter):
|
||||
type = 'const char *'
|
||||
default_type = (str, Null, NoneType)
|
||||
format_unit = 's'
|
||||
|
||||
def converter_init(self, *, encoding=None, accept={str}, length=False, zeroes=False):
|
||||
|
||||
self.length = bool(length)
|
||||
|
||||
is_b_or_ba = accept == {bytes, bytearray}
|
||||
is_b_or_ba_or_none = accept == {bytes, bytearray, NoneType}
|
||||
is_str = accept == {str}
|
||||
is_str_or_none = accept == {str, NoneType}
|
||||
is_robuffer = accept == {robuffer}
|
||||
is_str_or_robuffer = accept == {str, robuffer}
|
||||
is_str_or_robuffer_or_none = accept == {str, robuffer, NoneType}
|
||||
|
||||
format_unit = None
|
||||
|
||||
if encoding:
|
||||
self.encoding = encoding
|
||||
|
||||
if is_str and not length and not zeroes:
|
||||
format_unit = 'es'
|
||||
elif is_str_or_none and length and zeroes:
|
||||
format_unit = 'es#'
|
||||
elif is_b_or_ba and not length and not zeroes:
|
||||
format_unit = 'et'
|
||||
elif is_b_or_ba_or_none and length and zeroes:
|
||||
format_unit = 'et#'
|
||||
|
||||
else:
|
||||
if zeroes:
|
||||
fail("str_converter: illegal combination of arguments (zeroes is only legal with an encoding)")
|
||||
|
||||
if is_str and not length:
|
||||
format_unit = 's'
|
||||
elif is_str_or_none and not length:
|
||||
format_unit = 'z'
|
||||
elif is_robuffer and not length:
|
||||
format_unit = 'y'
|
||||
elif is_robuffer and length:
|
||||
format_unit = 'y#'
|
||||
elif is_str_or_robuffer and length:
|
||||
format_unit = 's#'
|
||||
elif is_str_or_robuffer_or_none and length:
|
||||
format_unit = 'z#'
|
||||
def converter_init(self, *, accept={str}, encoding=None, zeroes=False):
|
||||
|
||||
key = str_converter_key(accept, encoding, zeroes)
|
||||
format_unit = str_converter_argument_map.get(key)
|
||||
if not format_unit:
|
||||
fail("str_converter: illegal combination of arguments")
|
||||
fail("str_converter: illegal combination of arguments", key)
|
||||
|
||||
self.format_unit = format_unit
|
||||
self.length = bool(zeroes)
|
||||
if encoding:
|
||||
if self.default not in (Null, None, unspecified):
|
||||
fail("str_converter: Argument Clinic doesn't support default values for encoded strings")
|
||||
self.encoding = encoding
|
||||
self.type = 'char *'
|
||||
# sorry, clinic can't support preallocated buffers
|
||||
# for es# and et#
|
||||
self.c_default = "NULL"
|
||||
|
||||
def cleanup(self):
|
||||
if self.encoding:
|
||||
name = ensure_legal_c_identifier(self.name)
|
||||
return "".join(["if (", name, ")\n PyMem_FREE(", name, ");\n"])
|
||||
|
||||
#
|
||||
# This is the fourth or fifth rewrite of registering all the
|
||||
# crazy string converter format units. Previous approaches hid
|
||||
# bugs--generally mismatches between the semantics of the format
|
||||
# unit and the arguments necessary to represent those semantics
|
||||
# properly. Hopefully with this approach we'll get it 100% right.
|
||||
#
|
||||
# The r() function (short for "register") both registers the
|
||||
# mapping from arguments to format unit *and* registers the
|
||||
# legacy C converter for that format unit.
|
||||
#
|
||||
def r(format_unit, *, accept, encoding=False, zeroes=False):
|
||||
if not encoding and format_unit != 's':
|
||||
# add the legacy c converters here too.
|
||||
#
|
||||
# note: add_legacy_c_converter can't work for
|
||||
# es, es#, et, or et#
|
||||
# because of their extra encoding argument
|
||||
#
|
||||
# also don't add the converter for 's' because
|
||||
# the metaclass for CConverter adds it for us.
|
||||
kwargs = {}
|
||||
if accept != {str}:
|
||||
kwargs['accept'] = accept
|
||||
if zeroes:
|
||||
kwargs['zeroes'] = True
|
||||
added_f = functools.partial(str_converter, **kwargs)
|
||||
legacy_converters[format_unit] = added_f
|
||||
|
||||
d = str_converter_argument_map
|
||||
key = str_converter_key(accept, encoding, zeroes)
|
||||
if key in d:
|
||||
sys.exit("Duplicate keys specified for str_converter_argument_map!")
|
||||
d[key] = format_unit
|
||||
|
||||
r('es', encoding=True, accept={str})
|
||||
r('es#', encoding=True, zeroes=True, accept={str})
|
||||
r('et', encoding=True, accept={bytes, bytearray, str})
|
||||
r('et#', encoding=True, zeroes=True, accept={bytes, bytearray, str})
|
||||
r('s', accept={str})
|
||||
r('s#', zeroes=True, accept={robuffer, str})
|
||||
r('y', accept={robuffer})
|
||||
r('y#', zeroes=True, accept={robuffer})
|
||||
r('z', accept={str, NoneType})
|
||||
r('z#', zeroes=True, accept={robuffer, str, NoneType})
|
||||
del r
|
||||
|
||||
|
||||
class PyBytesObject_converter(CConverter):
|
||||
@ -2719,17 +2740,17 @@ class unicode_converter(CConverter):
|
||||
default_type = (str, Null, NoneType)
|
||||
format_unit = 'U'
|
||||
|
||||
@add_legacy_c_converter('u#', length=True)
|
||||
@add_legacy_c_converter('u#', zeroes=True)
|
||||
@add_legacy_c_converter('Z', accept={str, NoneType})
|
||||
@add_legacy_c_converter('Z#', accept={str, NoneType}, length=True)
|
||||
@add_legacy_c_converter('Z#', accept={str, NoneType}, zeroes=True)
|
||||
class Py_UNICODE_converter(CConverter):
|
||||
type = 'Py_UNICODE *'
|
||||
default_type = (str, Null, NoneType)
|
||||
format_unit = 'u'
|
||||
|
||||
def converter_init(self, *, accept={str}, length=False):
|
||||
def converter_init(self, *, accept={str}, zeroes=False):
|
||||
format_unit = 'Z' if accept=={str, NoneType} else 'u'
|
||||
if length:
|
||||
if zeroes:
|
||||
format_unit += '#'
|
||||
self.length = True
|
||||
self.format_unit = format_unit
|
||||
|
Loading…
Reference in New Issue
Block a user