bpo-37540: vectorcall: keyword names must be strings (GH-14682)

The fact that keyword names are strings is now part of the vectorcall and `METH_FASTCALL` protocols. The biggest concrete change is that `_PyStack_UnpackDict` now checks that and raises `TypeError` if not.

CC @markshannon @vstinner 


https://bugs.python.org/issue37540
This commit is contained in:
Jeroen Demeyer 2019-08-16 12:41:27 +02:00 committed by Miss Islington (bot)
parent f3cb68f2e4
commit 0567786d26
10 changed files with 43 additions and 46 deletions

View File

@ -400,8 +400,8 @@ Object Protocol
:c:func:`PyVectorcall_NARGS(nargsf) <PyVectorcall_NARGS>`.
*kwnames* can be either NULL (no keyword arguments) or a tuple of keyword
names. In the latter case, the values of the keyword arguments are stored
in *args* after the positional arguments.
names, which must be strings. In the latter case, the values of the keyword
arguments are stored in *args* after the positional arguments.
The number of keyword arguments does not influence *nargsf*.
*kwnames* must contain only objects of type ``str`` (not a subclass),

View File

@ -204,6 +204,7 @@ also keyword arguments. So there are a total of 6 calling conventions:
Keyword arguments are passed the same way as in the vectorcall protocol:
there is an additional fourth :c:type:`PyObject\*` parameter
which is a tuple representing the names of the keyword arguments
(which are guaranteed to be strings)
or possibly *NULL* if there are no keywords. The values of the keyword
arguments are stored in the *args* array, after the positional arguments.

View File

@ -1142,8 +1142,10 @@ All of the following opcodes use their arguments.
Calls a callable object with positional (if any) and keyword arguments.
*argc* indicates the total number of positional and keyword arguments.
The top element on the stack contains a tuple of keyword argument names.
Below that are keyword arguments in the order corresponding to the tuple.
The top element on the stack contains a tuple with the names of the
keyword arguments, which must be strings.
Below that are the values for the keyword arguments,
in the order corresponding to the tuple.
Below that are positional arguments, with the right-most parameter on
top. Below the arguments is a callable object to call.
``CALL_FUNCTION_KW`` pops all arguments and the callable object off the stack,

View File

@ -88,8 +88,7 @@ _PyVectorcall_Function(PyObject *callable)
of keyword arguments does not change nargsf). kwnames can also be NULL if
there are no keyword arguments.
keywords must only contains str strings (no subclass), and all keys must
be unique.
keywords must only contain strings and all keys must be unique.
Return the result on success. Raise an exception and return NULL on
error. */

View File

@ -237,7 +237,7 @@ What about willful misconduct?
>>> f(**{1:2})
Traceback (most recent call last):
...
TypeError: f() keywords must be strings
TypeError: keywords must be strings
>>> h(**{'e': 2})
Traceback (most recent call last):

View File

@ -256,7 +256,7 @@ Overridden parameters
>>> f(**{1: 3}, **{1: 5})
Traceback (most recent call last):
...
TypeError: f() keywords must be strings
TypeError: f() got multiple values for keyword argument '1'
Unpacking non-sequence

View File

@ -0,0 +1,2 @@
The vectorcall protocol now requires that the caller passes only strings as
keyword names.

View File

@ -322,8 +322,7 @@ _PyFunction_Vectorcall(PyObject *func, PyObject* const* stack,
assert(nargs >= 0);
assert(kwnames == NULL || PyTuple_CheckExact(kwnames));
assert((nargs == 0 && nkwargs == 0) || stack != NULL);
/* kwnames must only contains str strings, no subclass, and all keys must
be unique */
/* kwnames must only contain strings and all keys must be unique */
if (co->co_kwonlyargcount == 0 && nkwargs == 0 &&
(co->co_flags & ~PyCF_MASK) == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE))
@ -943,12 +942,12 @@ _PyStack_AsDict(PyObject *const *values, PyObject *kwnames)
vector; return NULL with exception set on error. Return the keyword names
tuple in *p_kwnames.
This also checks that all keyword names are strings. If not, a TypeError is
raised.
The newly allocated argument vector supports PY_VECTORCALL_ARGUMENTS_OFFSET.
When done, you must call _PyStack_UnpackDict_Free(stack, nargs, kwnames)
The type of keyword keys is not checked, these checks should be done
later (ex: _PyArg_ParseStackAndKeywords). */
When done, you must call _PyStack_UnpackDict_Free(stack, nargs, kwnames) */
static PyObject *const *
_PyStack_UnpackDict(PyObject *const *args, Py_ssize_t nargs, PyObject *kwargs,
PyObject **p_kwnames)
@ -994,7 +993,9 @@ _PyStack_UnpackDict(PyObject *const *args, Py_ssize_t nargs, PyObject *kwargs,
called in the performance critical hot code. */
Py_ssize_t pos = 0, i = 0;
PyObject *key, *value;
unsigned long keys_are_strings = Py_TPFLAGS_UNICODE_SUBCLASS;
while (PyDict_Next(kwargs, &pos, &key, &value)) {
keys_are_strings &= Py_TYPE(key)->tp_flags;
Py_INCREF(key);
Py_INCREF(value);
PyTuple_SET_ITEM(kwnames, i, key);
@ -1002,6 +1003,18 @@ _PyStack_UnpackDict(PyObject *const *args, Py_ssize_t nargs, PyObject *kwargs,
i++;
}
/* keys_are_strings has the value Py_TPFLAGS_UNICODE_SUBCLASS if that
* flag is set for all keys. Otherwise, keys_are_strings equals 0.
* We do this check once at the end instead of inside the loop above
* because it simplifies the deallocation in the failing case.
* It happens to also make the loop above slightly more efficient. */
if (!keys_are_strings) {
PyErr_SetString(PyExc_TypeError,
"keywords must be strings");
_PyStack_UnpackDict_Free(stack, nargs, kwnames);
return NULL;
}
*p_kwnames = kwnames;
return stack;
}

View File

@ -3504,7 +3504,9 @@ main_loop:
PyObject **sp, *res, *names;
names = POP();
assert(PyTuple_CheckExact(names) && PyTuple_GET_SIZE(names) <= oparg);
assert(PyTuple_Check(names));
assert(PyTuple_GET_SIZE(names) <= oparg);
/* We assume without checking that names contains only strings */
sp = stack_pointer;
res = call_function(tstate, &sp, oparg, names);
stack_pointer = sp;
@ -5372,20 +5374,12 @@ format_kwargs_error(PyThreadState *tstate, PyObject *func, PyObject *kwargs)
_PyErr_Fetch(tstate, &exc, &val, &tb);
if (val && PyTuple_Check(val) && PyTuple_GET_SIZE(val) == 1) {
PyObject *key = PyTuple_GET_ITEM(val, 0);
if (!PyUnicode_Check(key)) {
_PyErr_Format(tstate, PyExc_TypeError,
"%.200s%.200s keywords must be strings",
PyEval_GetFuncName(func),
PyEval_GetFuncDesc(func));
}
else {
_PyErr_Format(tstate, PyExc_TypeError,
"%.200s%.200s got multiple "
"values for keyword argument '%U'",
PyEval_GetFuncName(func),
PyEval_GetFuncDesc(func),
key);
}
_PyErr_Format(tstate, PyExc_TypeError,
"%.200s%.200s got multiple "
"values for keyword argument '%S'",
PyEval_GetFuncName(func),
PyEval_GetFuncDesc(func),
key);
Py_XDECREF(exc);
Py_XDECREF(val);
Py_XDECREF(tb);

View File

@ -2043,11 +2043,7 @@ find_keyword(PyObject *kwnames, PyObject *const *kwstack, PyObject *key)
if (kwname == key) {
return kwstack[i];
}
if (!PyUnicode_Check(kwname)) {
/* ignore non-string keyword keys:
an error will be raised below */
continue;
}
assert(PyUnicode_Check(kwname));
if (_PyUnicode_EQ(kwname, key)) {
return kwstack[i];
}
@ -2275,16 +2271,11 @@ vgetargskeywordsfast_impl(PyObject *const *args, Py_ssize_t nargs,
j++;
}
if (!PyUnicode_Check(keyword)) {
PyErr_SetString(PyExc_TypeError,
"keywords must be strings");
return cleanreturn(0, &freelist);
}
match = PySequence_Contains(kwtuple, keyword);
if (match <= 0) {
if (!match) {
PyErr_Format(PyExc_TypeError,
"'%U' is an invalid keyword "
"'%S' is an invalid keyword "
"argument for %.200s%s",
keyword,
(parser->fname == NULL) ? "this function" : parser->fname,
@ -2505,16 +2496,11 @@ _PyArg_UnpackKeywords(PyObject *const *args, Py_ssize_t nargs,
j++;
}
if (!PyUnicode_Check(keyword)) {
PyErr_SetString(PyExc_TypeError,
"keywords must be strings");
return NULL;
}
match = PySequence_Contains(kwtuple, keyword);
if (match <= 0) {
if (!match) {
PyErr_Format(PyExc_TypeError,
"'%U' is an invalid keyword "
"'%S' is an invalid keyword "
"argument for %.200s%s",
keyword,
(parser->fname == NULL) ? "this function" : parser->fname,