From 67266266469fe0e817736227f39537182534c1a5 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 24 Aug 2023 15:59:12 +0200 Subject: [PATCH] gh-108314: Add PyDict_ContainsString() function (#108323) --- Doc/c-api/dict.rst | 20 ++++++++--- Doc/whatsnew/3.13.rst | 5 +++ Include/cpython/dictobject.h | 1 + Lib/test/test_capi/test_dict.py | 24 ++++++++++--- ...-08-22-18-45-20.gh-issue-108314.nOlmwq.rst | 4 +++ Modules/_ctypes/_ctypes.c | 36 +++++++++++-------- Modules/_testcapi/dict.c | 14 ++++++++ Objects/dictobject.c | 12 +++++++ Python/pylifecycle.c | 9 ++--- Python/pythonrun.c | 23 +++++++----- 10 files changed, 112 insertions(+), 36 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2023-08-22-18-45-20.gh-issue-108314.nOlmwq.rst diff --git a/Doc/c-api/dict.rst b/Doc/c-api/dict.rst index e4c1d71a413..7810d05b0eb 100644 --- a/Doc/c-api/dict.rst +++ b/Doc/c-api/dict.rst @@ -55,6 +55,15 @@ Dictionary Objects This is equivalent to the Python expression ``key in p``. +.. c:function:: int PyDict_ContainsString(PyObject *p, const char *key) + + This is the same as :c:func:`PyDict_Contains`, but *key* is specified as a + :c:expr:`const char*` UTF-8 encoded bytes string, rather than a + :c:expr:`PyObject*`. + + .. versionadded:: 3.13 + + .. c:function:: PyObject* PyDict_Copy(PyObject *p) Return a new dictionary that contains the same key-value pairs as *p*. @@ -73,7 +82,7 @@ Dictionary Objects .. index:: single: PyUnicode_FromString() Insert *val* into the dictionary *p* using *key* as a key. *key* should - be a :c:expr:`const char*`. The key object is created using + be a :c:expr:`const char*` UTF-8 encoded bytes string. The key object is created using ``PyUnicode_FromString(key)``. Return ``0`` on success or ``-1`` on failure. This function *does not* steal a reference to *val*. @@ -88,7 +97,8 @@ Dictionary Objects .. c:function:: int PyDict_DelItemString(PyObject *p, const char *key) - Remove the entry in dictionary *p* which has a key specified by the string *key*. + Remove the entry in dictionary *p* which has a key specified by the UTF-8 + encoded bytes string *key*. If *key* is not in the dictionary, :exc:`KeyError` is raised. Return ``0`` on success or ``-1`` on failure. @@ -136,7 +146,8 @@ Dictionary Objects .. c:function:: PyObject* PyDict_GetItemString(PyObject *p, const char *key) This is the same as :c:func:`PyDict_GetItem`, but *key* is specified as a - :c:expr:`const char*`, rather than a :c:expr:`PyObject*`. + :c:expr:`const char*` UTF-8 encoded bytes string, rather than a + :c:expr:`PyObject*`. .. note:: @@ -150,7 +161,8 @@ Dictionary Objects .. c:function:: int PyDict_GetItemStringRef(PyObject *p, const char *key, PyObject **result) Similar than :c:func:`PyDict_GetItemRef`, but *key* is specified as a - :c:expr:`const char*`, rather than a :c:expr:`PyObject*`. + :c:expr:`const char*` UTF-8 encoded bytes string, rather than a + :c:expr:`PyObject*`. .. versionadded:: 3.13 diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 8509e18a7d7..25eb5e981c5 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -862,6 +862,11 @@ New Features not needed. (Contributed by Victor Stinner in :gh:`106004`.) +* Added :c:func:`PyDict_ContainsString` function: same as + :c:func:`PyDict_Contains`, but *key* is specified as a :c:expr:`const char*` + UTF-8 encoded bytes string, rather than a :c:expr:`PyObject*`. + (Contributed by Victor Stinner in :gh:`108314`.) + * Add :c:func:`Py_IsFinalizing` function: check if the main Python interpreter is :term:`shutting down `. (Contributed by Victor Stinner in :gh:`108014`.) diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h index 470f5943674..f44880991f4 100644 --- a/Include/cpython/dictobject.h +++ b/Include/cpython/dictobject.h @@ -55,6 +55,7 @@ static inline Py_ssize_t PyDict_GET_SIZE(PyObject *op) { } #define PyDict_GET_SIZE(op) PyDict_GET_SIZE(_PyObject_CAST(op)) +PyAPI_FUNC(int) PyDict_ContainsString(PyObject *mp, const char *key); PyAPI_FUNC(int) _PyDict_ContainsId(PyObject *, _Py_Identifier *); PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused); diff --git a/Lib/test/test_capi/test_dict.py b/Lib/test/test_capi/test_dict.py index 9da6efd695e..b22fa20e14d 100644 --- a/Lib/test/test_capi/test_dict.py +++ b/Lib/test/test_capi/test_dict.py @@ -8,6 +8,7 @@ import _testcapi NULL = None +INVALID_UTF8 = b'\xff' class DictSubclass(dict): def __getitem__(self, key): @@ -137,7 +138,7 @@ class CAPITest(unittest.TestCase): self.assertEqual(getitemstring(dct2, b'a'), 1) self.assertIs(getitemstring(dct2, b'b'), KeyError) - self.assertIs(getitemstring({}, b'\xff'), KeyError) + self.assertIs(getitemstring({}, INVALID_UTF8), KeyError) self.assertIs(getitemstring(42, b'a'), KeyError) self.assertIs(getitemstring([], b'a'), KeyError) # CRASHES getitemstring({}, NULL) @@ -173,7 +174,7 @@ class CAPITest(unittest.TestCase): self.assertIs(getitemstring(dct2, b'b'), KeyError) self.assertRaises(SystemError, getitemstring, 42, b'a') - self.assertRaises(UnicodeDecodeError, getitemstring, {}, b'\xff') + self.assertRaises(UnicodeDecodeError, getitemstring, {}, INVALID_UTF8) self.assertRaises(SystemError, getitemstring, [], b'a') # CRASHES getitemstring({}, NULL) # CRASHES getitemstring(NULL, b'a') @@ -213,6 +214,21 @@ class CAPITest(unittest.TestCase): # CRASHES contains(42, 'a') # CRASHES contains(NULL, 'a') + def test_dict_contains_string(self): + contains_string = _testcapi.dict_containsstring + dct = {'a': 1, '\U0001f40d': 2} + self.assertTrue(contains_string(dct, b'a')) + self.assertFalse(contains_string(dct, b'b')) + self.assertTrue(contains_string(dct, '\U0001f40d'.encode())) + self.assertRaises(UnicodeDecodeError, contains_string, dct, INVALID_UTF8) + + dct2 = DictSubclass(dct) + self.assertTrue(contains_string(dct2, b'a')) + self.assertFalse(contains_string(dct2, b'b')) + + # CRASHES contains({}, NULL) + # CRASHES contains(NULL, b'a') + def test_dict_setitem(self): setitem = _testcapi.dict_setitem dct = {} @@ -245,7 +261,7 @@ class CAPITest(unittest.TestCase): setitemstring(dct2, b'a', 5) self.assertEqual(dct2, {'a': 5}) - self.assertRaises(UnicodeDecodeError, setitemstring, {}, b'\xff', 5) + self.assertRaises(UnicodeDecodeError, setitemstring, {}, INVALID_UTF8, 5) self.assertRaises(SystemError, setitemstring, UserDict(), b'a', 5) self.assertRaises(SystemError, setitemstring, 42, b'a', 5) # CRASHES setitemstring({}, NULL, 5) @@ -287,7 +303,7 @@ class CAPITest(unittest.TestCase): self.assertEqual(dct2, {'c': 2}) self.assertRaises(KeyError, delitemstring, dct2, b'b') - self.assertRaises(UnicodeDecodeError, delitemstring, {}, b'\xff') + self.assertRaises(UnicodeDecodeError, delitemstring, {}, INVALID_UTF8) self.assertRaises(SystemError, delitemstring, UserDict({'a': 1}), b'a') self.assertRaises(SystemError, delitemstring, 42, b'a') # CRASHES delitemstring({}, NULL) diff --git a/Misc/NEWS.d/next/C API/2023-08-22-18-45-20.gh-issue-108314.nOlmwq.rst b/Misc/NEWS.d/next/C API/2023-08-22-18-45-20.gh-issue-108314.nOlmwq.rst new file mode 100644 index 00000000000..90ae50a291b --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-08-22-18-45-20.gh-issue-108314.nOlmwq.rst @@ -0,0 +1,4 @@ +Add :c:func:`PyDict_ContainsString` function: same as +:c:func:`PyDict_Contains`, but *key* is specified as a :c:expr:`const char*` +UTF-8 encoded bytes string, rather than a :c:expr:`PyObject*`. +Patch by Victor Stinner. diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index dc80291d3b8..ed9efcad9ab 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -5139,8 +5139,6 @@ static PyObject * Pointer_get_contents(CDataObject *self, void *closure) { StgDictObject *stgdict; - PyObject *keep, *ptr_probe; - CDataObject *ptr2ptr; if (*(void **)self->b_ptr == NULL) { PyErr_SetString(PyExc_ValueError, @@ -5151,29 +5149,37 @@ Pointer_get_contents(CDataObject *self, void *closure) stgdict = PyObject_stgdict((PyObject *)self); assert(stgdict); /* Cannot be NULL for pointer instances */ - keep = GetKeepedObjects(self); + PyObject *keep = GetKeepedObjects(self); if (keep != NULL) { // check if it's a pointer to a pointer: // pointers will have '0' key in the _objects - ptr_probe = PyDict_GetItemString(keep, "0"); - - if (ptr_probe != NULL) { - ptr2ptr = (CDataObject*) PyDict_GetItemString(keep, "1"); - if (ptr2ptr == NULL) { - PyErr_SetString(PyExc_ValueError, - "Unexpected NULL pointer in _objects"); + int ptr_probe = PyDict_ContainsString(keep, "0"); + if (ptr_probe < 0) { + return NULL; + } + if (ptr_probe) { + PyObject *item; + if (PyDict_GetItemStringRef(keep, "1", &item) < 0) { return NULL; } - // don't construct a new object, - // return existing one instead to preserve refcount + if (item == NULL) { + PyErr_SetString(PyExc_ValueError, + "Unexpected NULL pointer in _objects"); + return NULL; + } +#ifndef NDEBUG + CDataObject *ptr2ptr = (CDataObject *)item; + // Don't construct a new object, + // return existing one instead to preserve refcount. + // Double-check that we are returning the same thing. assert( *(void**) self->b_ptr == ptr2ptr->b_ptr || *(void**) self->b_value.c == ptr2ptr->b_ptr || *(void**) self->b_ptr == ptr2ptr->b_value.c || *(void**) self->b_value.c == ptr2ptr->b_value.c - ); // double-check that we are returning the same thing - Py_INCREF(ptr2ptr); - return (PyObject *) ptr2ptr; + ); +#endif + return item; } } diff --git a/Modules/_testcapi/dict.c b/Modules/_testcapi/dict.c index b1dfcf4c707..6c3f9cd22fa 100644 --- a/Modules/_testcapi/dict.c +++ b/Modules/_testcapi/dict.c @@ -74,6 +74,19 @@ dict_contains(PyObject *self, PyObject *args) RETURN_INT(PyDict_Contains(obj, key)); } +static PyObject * +dict_containsstring(PyObject *self, PyObject *args) +{ + PyObject *obj; + const char *key; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "Oz#", &obj, &key, &size)) { + return NULL; + } + NULLABLE(obj); + RETURN_INT(PyDict_ContainsString(obj, key)); +} + static PyObject * dict_size(PyObject *self, PyObject *obj) { @@ -349,6 +362,7 @@ static PyMethodDef test_methods[] = { {"dict_getitemref", dict_getitemref, METH_VARARGS}, {"dict_getitemstringref", dict_getitemstringref, METH_VARARGS}, {"dict_contains", dict_contains, METH_VARARGS}, + {"dict_containsstring", dict_containsstring, METH_VARARGS}, {"dict_setitem", dict_setitem, METH_VARARGS}, {"dict_setitemstring", dict_setitemstring, METH_VARARGS}, {"dict_delitem", dict_delitem, METH_VARARGS}, diff --git a/Objects/dictobject.c b/Objects/dictobject.c index f9701f6b4b0..10205e379a5 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -3741,6 +3741,18 @@ PyDict_Contains(PyObject *op, PyObject *key) return (ix != DKIX_EMPTY && value != NULL); } +int +PyDict_ContainsString(PyObject *op, const char *key) +{ + PyObject *key_obj = PyUnicode_FromString(key); + if (key_obj == NULL) { + return -1; + } + int res = PyDict_Contains(op, key_obj); + Py_DECREF(key_obj); + return res; +} + /* Internal version of PyDict_Contains used when the hash value is already known */ int _PyDict_Contains_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash) diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 18614264989..7d362af32cb 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -2220,10 +2220,11 @@ add_main_module(PyInterpreterState *interp) } Py_DECREF(ann_dict); - if (_PyDict_GetItemStringWithError(d, "__builtins__") == NULL) { - if (PyErr_Occurred()) { - return _PyStatus_ERR("Failed to test __main__.__builtins__"); - } + int has_builtins = PyDict_ContainsString(d, "__builtins__"); + if (has_builtins < 0) { + return _PyStatus_ERR("Failed to test __main__.__builtins__"); + } + if (!has_builtins) { PyObject *bimod = PyImport_ImportModule("builtins"); if (bimod == NULL) { return _PyStatus_ERR("Failed to retrieve builtins module"); diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 05b7dfa61d1..0e118b03d6a 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -413,10 +413,11 @@ _PyRun_SimpleFileObject(FILE *fp, PyObject *filename, int closeit, PyObject *dict = PyModule_GetDict(main_module); // borrowed ref int set_file_name = 0; - if (_PyDict_GetItemStringWithError(dict, "__file__") == NULL) { - if (PyErr_Occurred()) { - goto done; - } + int has_file = PyDict_ContainsString(dict, "__file__"); + if (has_file < 0) { + goto done; + } + if (!has_file) { if (PyDict_SetItemString(dict, "__file__", filename) < 0) { goto done; } @@ -1713,13 +1714,17 @@ run_eval_code_obj(PyThreadState *tstate, PyCodeObject *co, PyObject *globals, Py _PyRuntime.signals.unhandled_keyboard_interrupt = 0; /* Set globals['__builtins__'] if it doesn't exist */ - if (globals != NULL && _PyDict_GetItemStringWithError(globals, "__builtins__") == NULL) { - if (PyErr_Occurred() || - PyDict_SetItemString(globals, "__builtins__", - tstate->interp->builtins) < 0) - { + if (globals != NULL) { + int has_builtins = PyDict_ContainsString(globals, "__builtins__"); + if (has_builtins < 0) { return NULL; } + if (!has_builtins) { + if (PyDict_SetItemString(globals, "__builtins__", + tstate->interp->builtins) < 0) { + return NULL; + } + } } v = PyEval_EvalCode((PyObject*)co, globals, locals);