gh-110815: Support non-ASCII keyword names in PyArg_ParseTupleAndKeywords() (GH-110816)

It already mostly worked, except in the case when invalid keyword
argument with non-ASCII name was passed to function with non-ASCII
parameter names. Then it crashed in the debug mode.
This commit is contained in:
Serhiy Storchaka 2023-10-14 08:50:03 +03:00 committed by GitHub
parent ce298a1c15
commit 7284e0ef84
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 64 additions and 3 deletions

View File

@ -416,8 +416,10 @@ API Functions
.. c:function:: int PyArg_ParseTupleAndKeywords(PyObject *args, PyObject *kw, const char *format, char *keywords[], ...)
Parse the parameters of a function that takes both positional and keyword
parameters into local variables. The *keywords* argument is a
``NULL``-terminated array of keyword parameter names. Empty names denote
parameters into local variables.
The *keywords* argument is a ``NULL``-terminated array of keyword parameter
names specified as null-terminated ASCII or UTF-8 encoded C strings.
Empty names denote
:ref:`positional-only parameters <positional-only_parameter>`.
Returns true on success; on failure, it returns false and raises the
appropriate exception.
@ -426,6 +428,9 @@ API Functions
Added support for :ref:`positional-only parameters
<positional-only_parameter>`.
.. versionchanged:: 3.13
Added support for non-ASCII keyword parameter names.
.. c:function:: int PyArg_VaParseTupleAndKeywords(PyObject *args, PyObject *kw, const char *format, char *keywords[], va_list vargs)

View File

@ -1045,6 +1045,10 @@ New Features
but pass event arguments as a Python :class:`tuple` object.
(Contributed by Victor Stinner in :gh:`85283`.)
* :c:func:`PyArg_ParseTupleAndKeywords` now supports non-ASCII keyword
parameter names.
(Contributed by Serhiy Storchaka in :gh:`110815`.)
Porting to Python 3.13
----------------------

View File

@ -1235,6 +1235,57 @@ class ParseTupleAndKeywords_Test(unittest.TestCase):
with self.assertRaisesRegex(SystemError, 'Empty keyword'):
parse((1,), {}, 'O|OO', ['', 'a', ''])
def test_nonascii_keywords(self):
parse = _testcapi.parse_tuple_and_keywords
for name in ('a', 'ä', 'ŷ', '', '𐀀'):
with self.subTest(name=name):
self.assertEqual(parse((), {name: 1}, 'O', [name]), (1,))
self.assertEqual(parse((), {}, '|O', [name]), (NULL,))
with self.assertRaisesRegex(TypeError,
f"function missing required argument '{name}'"):
parse((), {}, 'O', [name])
with self.assertRaisesRegex(TypeError,
fr"argument for function given by name \('{name}'\) "
fr"and position \(1\)"):
parse((1,), {name: 2}, 'O|O', [name, 'b'])
with self.assertRaisesRegex(TypeError,
f"'{name}' is an invalid keyword argument"):
parse((), {name: 1}, '|O', ['b'])
with self.assertRaisesRegex(TypeError,
"'b' is an invalid keyword argument"):
parse((), {'b': 1}, '|O', [name])
invalid = name.encode() + (name.encode()[:-1] or b'\x80')
self.assertEqual(parse((), {}, '|O', [invalid]), (NULL,))
self.assertEqual(parse((1,), {'b': 2}, 'O|O', [invalid, 'b']),
(1, 2))
with self.assertRaisesRegex(TypeError,
f"function missing required argument '{name}\ufffd'"):
parse((), {}, 'O', [invalid])
with self.assertRaisesRegex(UnicodeDecodeError,
f"'utf-8' codec can't decode bytes? "):
parse((), {'b': 1}, '|OO', [invalid, 'b'])
with self.assertRaisesRegex(UnicodeDecodeError,
f"'utf-8' codec can't decode bytes? "):
parse((), {'b': 1}, '|O', [invalid])
for name2 in ('b', 'ë', 'ĉ', '', '𐀁'):
with self.subTest(name2=name2):
with self.assertRaisesRegex(TypeError,
f"'{name2}' is an invalid keyword argument"):
parse((), {name2: 1}, '|O', [name])
name2 = name.encode().decode('latin1')
if name2 != name:
with self.assertRaisesRegex(TypeError,
f"'{name2}' is an invalid keyword argument"):
parse((), {name2: 1}, '|O', [name])
name3 = name + '3'
with self.assertRaisesRegex(TypeError,
f"'{name2}' is an invalid keyword argument"):
parse((), {name2: 1, name3: 2}, '|OO', [name, name3])
class Test_testcapi(unittest.TestCase):
locals().update((name, getattr(_testcapi, name))

View File

@ -0,0 +1 @@
Support non-ASCII keyword names in :c:func:`PyArg_ParseTupleAndKeywords`.

View File

@ -1729,7 +1729,7 @@ vgetargskeywords(PyObject *args, PyObject *kwargs, const char *format,
return cleanreturn(0, &freelist);
}
for (i = pos; i < len; i++) {
if (_PyUnicode_EqualToASCIIString(key, kwlist[i])) {
if (PyUnicode_EqualToUTF8(key, kwlist[i])) {
match = 1;
break;
}