gh-108314: Add PyDict_ContainsString() function (#108323)

This commit is contained in:
Victor Stinner 2023-08-24 15:59:12 +02:00 committed by GitHub
parent c163d7f0b6
commit 6726626646
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 112 additions and 36 deletions

View File

@ -55,6 +55,15 @@ Dictionary Objects
This is equivalent to the Python expression ``key in p``.
.. c:function:: int PyDict_ContainsString(PyObject *p, const char *key)
This is the same as :c:func:`PyDict_Contains`, but *key* is specified as a
:c:expr:`const char*` UTF-8 encoded bytes string, rather than a
:c:expr:`PyObject*`.
.. versionadded:: 3.13
.. c:function:: PyObject* PyDict_Copy(PyObject *p)
Return a new dictionary that contains the same key-value pairs as *p*.
@ -73,7 +82,7 @@ Dictionary Objects
.. index:: single: PyUnicode_FromString()
Insert *val* into the dictionary *p* using *key* as a key. *key* should
be a :c:expr:`const char*`. The key object is created using
be a :c:expr:`const char*` UTF-8 encoded bytes string. The key object is created using
``PyUnicode_FromString(key)``. Return ``0`` on success or ``-1`` on
failure. This function *does not* steal a reference to *val*.
@ -88,7 +97,8 @@ Dictionary Objects
.. c:function:: int PyDict_DelItemString(PyObject *p, const char *key)
Remove the entry in dictionary *p* which has a key specified by the string *key*.
Remove the entry in dictionary *p* which has a key specified by the UTF-8
encoded bytes string *key*.
If *key* is not in the dictionary, :exc:`KeyError` is raised.
Return ``0`` on success or ``-1`` on failure.
@ -136,7 +146,8 @@ Dictionary Objects
.. c:function:: PyObject* PyDict_GetItemString(PyObject *p, const char *key)
This is the same as :c:func:`PyDict_GetItem`, but *key* is specified as a
:c:expr:`const char*`, rather than a :c:expr:`PyObject*`.
:c:expr:`const char*` UTF-8 encoded bytes string, rather than a
:c:expr:`PyObject*`.
.. note::
@ -150,7 +161,8 @@ Dictionary Objects
.. c:function:: int PyDict_GetItemStringRef(PyObject *p, const char *key, PyObject **result)
Similar than :c:func:`PyDict_GetItemRef`, but *key* is specified as a
:c:expr:`const char*`, rather than a :c:expr:`PyObject*`.
:c:expr:`const char*` UTF-8 encoded bytes string, rather than a
:c:expr:`PyObject*`.
.. versionadded:: 3.13

View File

@ -862,6 +862,11 @@ New Features
not needed.
(Contributed by Victor Stinner in :gh:`106004`.)
* Added :c:func:`PyDict_ContainsString` function: same as
:c:func:`PyDict_Contains`, but *key* is specified as a :c:expr:`const char*`
UTF-8 encoded bytes string, rather than a :c:expr:`PyObject*`.
(Contributed by Victor Stinner in :gh:`108314`.)
* Add :c:func:`Py_IsFinalizing` function: check if the main Python interpreter is
:term:`shutting down <interpreter shutdown>`.
(Contributed by Victor Stinner in :gh:`108014`.)

View File

@ -55,6 +55,7 @@ static inline Py_ssize_t PyDict_GET_SIZE(PyObject *op) {
}
#define PyDict_GET_SIZE(op) PyDict_GET_SIZE(_PyObject_CAST(op))
PyAPI_FUNC(int) PyDict_ContainsString(PyObject *mp, const char *key);
PyAPI_FUNC(int) _PyDict_ContainsId(PyObject *, _Py_Identifier *);
PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused);

View File

@ -8,6 +8,7 @@ import _testcapi
NULL = None
INVALID_UTF8 = b'\xff'
class DictSubclass(dict):
def __getitem__(self, key):
@ -137,7 +138,7 @@ class CAPITest(unittest.TestCase):
self.assertEqual(getitemstring(dct2, b'a'), 1)
self.assertIs(getitemstring(dct2, b'b'), KeyError)
self.assertIs(getitemstring({}, b'\xff'), KeyError)
self.assertIs(getitemstring({}, INVALID_UTF8), KeyError)
self.assertIs(getitemstring(42, b'a'), KeyError)
self.assertIs(getitemstring([], b'a'), KeyError)
# CRASHES getitemstring({}, NULL)
@ -173,7 +174,7 @@ class CAPITest(unittest.TestCase):
self.assertIs(getitemstring(dct2, b'b'), KeyError)
self.assertRaises(SystemError, getitemstring, 42, b'a')
self.assertRaises(UnicodeDecodeError, getitemstring, {}, b'\xff')
self.assertRaises(UnicodeDecodeError, getitemstring, {}, INVALID_UTF8)
self.assertRaises(SystemError, getitemstring, [], b'a')
# CRASHES getitemstring({}, NULL)
# CRASHES getitemstring(NULL, b'a')
@ -213,6 +214,21 @@ class CAPITest(unittest.TestCase):
# CRASHES contains(42, 'a')
# CRASHES contains(NULL, 'a')
def test_dict_contains_string(self):
contains_string = _testcapi.dict_containsstring
dct = {'a': 1, '\U0001f40d': 2}
self.assertTrue(contains_string(dct, b'a'))
self.assertFalse(contains_string(dct, b'b'))
self.assertTrue(contains_string(dct, '\U0001f40d'.encode()))
self.assertRaises(UnicodeDecodeError, contains_string, dct, INVALID_UTF8)
dct2 = DictSubclass(dct)
self.assertTrue(contains_string(dct2, b'a'))
self.assertFalse(contains_string(dct2, b'b'))
# CRASHES contains({}, NULL)
# CRASHES contains(NULL, b'a')
def test_dict_setitem(self):
setitem = _testcapi.dict_setitem
dct = {}
@ -245,7 +261,7 @@ class CAPITest(unittest.TestCase):
setitemstring(dct2, b'a', 5)
self.assertEqual(dct2, {'a': 5})
self.assertRaises(UnicodeDecodeError, setitemstring, {}, b'\xff', 5)
self.assertRaises(UnicodeDecodeError, setitemstring, {}, INVALID_UTF8, 5)
self.assertRaises(SystemError, setitemstring, UserDict(), b'a', 5)
self.assertRaises(SystemError, setitemstring, 42, b'a', 5)
# CRASHES setitemstring({}, NULL, 5)
@ -287,7 +303,7 @@ class CAPITest(unittest.TestCase):
self.assertEqual(dct2, {'c': 2})
self.assertRaises(KeyError, delitemstring, dct2, b'b')
self.assertRaises(UnicodeDecodeError, delitemstring, {}, b'\xff')
self.assertRaises(UnicodeDecodeError, delitemstring, {}, INVALID_UTF8)
self.assertRaises(SystemError, delitemstring, UserDict({'a': 1}), b'a')
self.assertRaises(SystemError, delitemstring, 42, b'a')
# CRASHES delitemstring({}, NULL)

View File

@ -0,0 +1,4 @@
Add :c:func:`PyDict_ContainsString` function: same as
:c:func:`PyDict_Contains`, but *key* is specified as a :c:expr:`const char*`
UTF-8 encoded bytes string, rather than a :c:expr:`PyObject*`.
Patch by Victor Stinner.

View File

@ -5139,8 +5139,6 @@ static PyObject *
Pointer_get_contents(CDataObject *self, void *closure)
{
StgDictObject *stgdict;
PyObject *keep, *ptr_probe;
CDataObject *ptr2ptr;
if (*(void **)self->b_ptr == NULL) {
PyErr_SetString(PyExc_ValueError,
@ -5151,29 +5149,37 @@ Pointer_get_contents(CDataObject *self, void *closure)
stgdict = PyObject_stgdict((PyObject *)self);
assert(stgdict); /* Cannot be NULL for pointer instances */
keep = GetKeepedObjects(self);
PyObject *keep = GetKeepedObjects(self);
if (keep != NULL) {
// check if it's a pointer to a pointer:
// pointers will have '0' key in the _objects
ptr_probe = PyDict_GetItemString(keep, "0");
if (ptr_probe != NULL) {
ptr2ptr = (CDataObject*) PyDict_GetItemString(keep, "1");
if (ptr2ptr == NULL) {
PyErr_SetString(PyExc_ValueError,
"Unexpected NULL pointer in _objects");
int ptr_probe = PyDict_ContainsString(keep, "0");
if (ptr_probe < 0) {
return NULL;
}
if (ptr_probe) {
PyObject *item;
if (PyDict_GetItemStringRef(keep, "1", &item) < 0) {
return NULL;
}
// don't construct a new object,
// return existing one instead to preserve refcount
if (item == NULL) {
PyErr_SetString(PyExc_ValueError,
"Unexpected NULL pointer in _objects");
return NULL;
}
#ifndef NDEBUG
CDataObject *ptr2ptr = (CDataObject *)item;
// Don't construct a new object,
// return existing one instead to preserve refcount.
// Double-check that we are returning the same thing.
assert(
*(void**) self->b_ptr == ptr2ptr->b_ptr ||
*(void**) self->b_value.c == ptr2ptr->b_ptr ||
*(void**) self->b_ptr == ptr2ptr->b_value.c ||
*(void**) self->b_value.c == ptr2ptr->b_value.c
); // double-check that we are returning the same thing
Py_INCREF(ptr2ptr);
return (PyObject *) ptr2ptr;
);
#endif
return item;
}
}

View File

@ -74,6 +74,19 @@ dict_contains(PyObject *self, PyObject *args)
RETURN_INT(PyDict_Contains(obj, key));
}
static PyObject *
dict_containsstring(PyObject *self, PyObject *args)
{
PyObject *obj;
const char *key;
Py_ssize_t size;
if (!PyArg_ParseTuple(args, "Oz#", &obj, &key, &size)) {
return NULL;
}
NULLABLE(obj);
RETURN_INT(PyDict_ContainsString(obj, key));
}
static PyObject *
dict_size(PyObject *self, PyObject *obj)
{
@ -349,6 +362,7 @@ static PyMethodDef test_methods[] = {
{"dict_getitemref", dict_getitemref, METH_VARARGS},
{"dict_getitemstringref", dict_getitemstringref, METH_VARARGS},
{"dict_contains", dict_contains, METH_VARARGS},
{"dict_containsstring", dict_containsstring, METH_VARARGS},
{"dict_setitem", dict_setitem, METH_VARARGS},
{"dict_setitemstring", dict_setitemstring, METH_VARARGS},
{"dict_delitem", dict_delitem, METH_VARARGS},

View File

@ -3741,6 +3741,18 @@ PyDict_Contains(PyObject *op, PyObject *key)
return (ix != DKIX_EMPTY && value != NULL);
}
int
PyDict_ContainsString(PyObject *op, const char *key)
{
PyObject *key_obj = PyUnicode_FromString(key);
if (key_obj == NULL) {
return -1;
}
int res = PyDict_Contains(op, key_obj);
Py_DECREF(key_obj);
return res;
}
/* Internal version of PyDict_Contains used when the hash value is already known */
int
_PyDict_Contains_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)

View File

@ -2220,10 +2220,11 @@ add_main_module(PyInterpreterState *interp)
}
Py_DECREF(ann_dict);
if (_PyDict_GetItemStringWithError(d, "__builtins__") == NULL) {
if (PyErr_Occurred()) {
return _PyStatus_ERR("Failed to test __main__.__builtins__");
}
int has_builtins = PyDict_ContainsString(d, "__builtins__");
if (has_builtins < 0) {
return _PyStatus_ERR("Failed to test __main__.__builtins__");
}
if (!has_builtins) {
PyObject *bimod = PyImport_ImportModule("builtins");
if (bimod == NULL) {
return _PyStatus_ERR("Failed to retrieve builtins module");

View File

@ -413,10 +413,11 @@ _PyRun_SimpleFileObject(FILE *fp, PyObject *filename, int closeit,
PyObject *dict = PyModule_GetDict(main_module); // borrowed ref
int set_file_name = 0;
if (_PyDict_GetItemStringWithError(dict, "__file__") == NULL) {
if (PyErr_Occurred()) {
goto done;
}
int has_file = PyDict_ContainsString(dict, "__file__");
if (has_file < 0) {
goto done;
}
if (!has_file) {
if (PyDict_SetItemString(dict, "__file__", filename) < 0) {
goto done;
}
@ -1713,13 +1714,17 @@ run_eval_code_obj(PyThreadState *tstate, PyCodeObject *co, PyObject *globals, Py
_PyRuntime.signals.unhandled_keyboard_interrupt = 0;
/* Set globals['__builtins__'] if it doesn't exist */
if (globals != NULL && _PyDict_GetItemStringWithError(globals, "__builtins__") == NULL) {
if (PyErr_Occurred() ||
PyDict_SetItemString(globals, "__builtins__",
tstate->interp->builtins) < 0)
{
if (globals != NULL) {
int has_builtins = PyDict_ContainsString(globals, "__builtins__");
if (has_builtins < 0) {
return NULL;
}
if (!has_builtins) {
if (PyDict_SetItemString(globals, "__builtins__",
tstate->interp->builtins) < 0) {
return NULL;
}
}
}
v = PyEval_EvalCode((PyObject*)co, globals, locals);