From d332e7b8164c3c9c885b9e631f33d9517b628b75 Mon Sep 17 00:00:00 2001 From: Hai Shi Date: Tue, 29 Sep 2020 05:41:11 +0800 Subject: [PATCH] bpo-41842: Add codecs.unregister() function (GH-22360) Add codecs.unregister() and PyCodec_Unregister() functions to unregister a codec search function. --- Doc/c-api/codec.rst | 8 ++++++ Doc/library/codecs.rst | 11 +++++--- Doc/whatsnew/3.10.rst | 10 ++++++++ Include/codecs.h | 8 ++++++ Lib/test/test_codecs.py | 12 +++++++++ Misc/ACKS | 1 + .../2020-09-27-20-43-16.bpo-41842.bCakAj.rst | 2 ++ .../2020-09-23-22-52-24.bpo-41842.lIuhC9.rst | 1 + Modules/_codecsmodule.c | 22 ++++++++++++++++ Modules/clinic/_codecsmodule.c.h | 13 +++++++++- Python/codecs.c | 25 +++++++++++++++++++ 11 files changed, 108 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2020-09-27-20-43-16.bpo-41842.bCakAj.rst create mode 100644 Misc/NEWS.d/next/Library/2020-09-23-22-52-24.bpo-41842.lIuhC9.rst diff --git a/Doc/c-api/codec.rst b/Doc/c-api/codec.rst index 172dcb326a4..235c77c945c 100644 --- a/Doc/c-api/codec.rst +++ b/Doc/c-api/codec.rst @@ -10,6 +10,14 @@ Codec registry and support functions As side effect, this tries to load the :mod:`encodings` package, if not yet done, to make sure that it is always first in the list of search functions. +.. c:function:: int PyCodec_Unregister(PyObject *search_function) + + Unregister a codec search function and clear the registry's cache. + If the search function is not registered, do nothing. + Return 0 on success. Raise an exception and return -1 on error. + + .. versionadded:: 3.10 + .. c:function:: int PyCodec_KnownEncoding(const char *encoding) Return ``1`` or ``0`` depending on whether there is a registered codec for diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index f071057293e..a0265135205 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -163,11 +163,14 @@ function: :class:`CodecInfo` object. In case a search function cannot find a given encoding, it should return ``None``. - .. note:: - Search function registration is not currently reversible, - which may cause problems in some cases, such as unit testing or - module reloading. +.. function:: unregister(search_function) + + Unregister a codec search function and clear the registry's cache. + If the search function is not registered, do nothing. + + .. versionadded:: 3.10 + While the builtin :func:`open` and the associated :mod:`io` module are the recommended approach for working with encoded text files, this module diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index 1228f269549..f74dd1aa247 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -109,6 +109,12 @@ base64 Add :func:`base64.b32hexencode` and :func:`base64.b32hexdecode` to support the Base32 Encoding with Extended Hex Alphabet. +codecs +------ + +Add a :func:`codecs.unregister` function to unregister a codec search function. +(Contributed by Hai Shi in :issue:`41842`.) + curses ------ @@ -237,6 +243,10 @@ New Features :class:`datetime.time` objects. (Contributed by Zackery Spytz in :issue:`30155`.) +* Add a :c:func:`PyCodec_Unregister` function to unregister a codec + search function. + (Contributed by Hai Shi in :issue:`41842`.) + Porting to Python 3.10 ---------------------- diff --git a/Include/codecs.h b/Include/codecs.h index 3ad0f2b5aae..37ecfb4ab75 100644 --- a/Include/codecs.h +++ b/Include/codecs.h @@ -27,6 +27,14 @@ PyAPI_FUNC(int) PyCodec_Register( PyObject *search_function ); +/* Unregister a codec search function and clear the registry's cache. + If the search function is not registered, do nothing. + Return 0 on success. Raise an exception and return -1 on error. */ + +PyAPI_FUNC(int) PyCodec_Unregister( + PyObject *search_function + ); + /* Codec registry lookup API. Looks up the given encoding and returns a CodecInfo object with diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 3dd56820cd1..ed508f36ad4 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1641,6 +1641,18 @@ class CodecsModuleTest(unittest.TestCase): self.assertRaises(TypeError, codecs.register) self.assertRaises(TypeError, codecs.register, 42) + def test_unregister(self): + name = "nonexistent_codec_name" + search_function = mock.Mock() + codecs.register(search_function) + self.assertRaises(TypeError, codecs.lookup, name) + search_function.assert_called_with(name) + search_function.reset_mock() + + codecs.unregister(search_function) + self.assertRaises(LookupError, codecs.lookup, name) + search_function.assert_not_called() + def test_lookup(self): self.assertRaises(TypeError, codecs.lookup) self.assertRaises(LookupError, codecs.lookup, "__spam__") diff --git a/Misc/ACKS b/Misc/ACKS index 7b743464c1c..85001daf67d 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1575,6 +1575,7 @@ Akash Shende Charlie Shepherd Bruce Sherwood Gregory Shevchenko +Hai Shi Alexander Shigin Pete Shinners Michael Shiplett diff --git a/Misc/NEWS.d/next/C API/2020-09-27-20-43-16.bpo-41842.bCakAj.rst b/Misc/NEWS.d/next/C API/2020-09-27-20-43-16.bpo-41842.bCakAj.rst new file mode 100644 index 00000000000..116d08f4908 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2020-09-27-20-43-16.bpo-41842.bCakAj.rst @@ -0,0 +1,2 @@ +Add :c:func:`PyCodec_Unregister` function to unregister a codec search +function. diff --git a/Misc/NEWS.d/next/Library/2020-09-23-22-52-24.bpo-41842.lIuhC9.rst b/Misc/NEWS.d/next/Library/2020-09-23-22-52-24.bpo-41842.lIuhC9.rst new file mode 100644 index 00000000000..306b02d76ff --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-09-23-22-52-24.bpo-41842.lIuhC9.rst @@ -0,0 +1 @@ +Add :func:`codecs.unregister` function to unregister a codec search function. diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index 952072102d5..08a3d4ab024 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -68,6 +68,27 @@ _codecs_register(PyObject *module, PyObject *search_function) Py_RETURN_NONE; } +/*[clinic input] +_codecs.unregister + search_function: object + / + +Unregister a codec search function and clear the registry's cache. + +If the search function is not registered, do nothing. +[clinic start generated code]*/ + +static PyObject * +_codecs_unregister(PyObject *module, PyObject *search_function) +/*[clinic end generated code: output=1f0edee9cf246399 input=dd7c004c652d345e]*/ +{ + if (PyCodec_Unregister(search_function) < 0) { + return NULL; + } + + Py_RETURN_NONE; +} + /*[clinic input] _codecs.lookup encoding: str @@ -992,6 +1013,7 @@ _codecs_lookup_error_impl(PyObject *module, const char *name) static PyMethodDef _codecs_functions[] = { _CODECS_REGISTER_METHODDEF + _CODECS_UNREGISTER_METHODDEF _CODECS_LOOKUP_METHODDEF _CODECS_ENCODE_METHODDEF _CODECS_DECODE_METHODDEF diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h index 249065c9fd0..e2ebb686129 100644 --- a/Modules/clinic/_codecsmodule.c.h +++ b/Modules/clinic/_codecsmodule.c.h @@ -15,6 +15,17 @@ PyDoc_STRVAR(_codecs_register__doc__, #define _CODECS_REGISTER_METHODDEF \ {"register", (PyCFunction)_codecs_register, METH_O, _codecs_register__doc__}, +PyDoc_STRVAR(_codecs_unregister__doc__, +"unregister($module, search_function, /)\n" +"--\n" +"\n" +"Unregister a codec search function and clear the registry\'s cache.\n" +"\n" +"If the search function is not registered, do nothing."); + +#define _CODECS_UNREGISTER_METHODDEF \ + {"unregister", (PyCFunction)_codecs_unregister, METH_O, _codecs_unregister__doc__}, + PyDoc_STRVAR(_codecs_lookup__doc__, "lookup($module, encoding, /)\n" "--\n" @@ -2827,4 +2838,4 @@ exit: #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF #define _CODECS_CODE_PAGE_ENCODE_METHODDEF #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */ -/*[clinic end generated code: output=eeead01414be6e42 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=9a97e2ddf3e69072 input=a9049054013a1b77]*/ diff --git a/Python/codecs.c b/Python/codecs.c index 0f18c27e5fe..a8233a73c4e 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -50,6 +50,31 @@ int PyCodec_Register(PyObject *search_function) return -1; } +int +PyCodec_Unregister(PyObject *search_function) +{ + PyInterpreterState *interp = PyInterpreterState_Get(); + PyObject *codec_search_path = interp->codec_search_path; + /* Do nothing if codec_search_path is not created yet or was cleared. */ + if (codec_search_path == NULL) { + return 0; + } + + assert(PyList_CheckExact(codec_search_path)); + Py_ssize_t n = PyList_GET_SIZE(codec_search_path); + for (Py_ssize_t i = 0; i < n; i++) { + PyObject *item = PyList_GET_ITEM(codec_search_path, i); + if (item == search_function) { + if (interp->codec_search_cache != NULL) { + assert(PyDict_CheckExact(interp->codec_search_cache)); + PyDict_Clear(interp->codec_search_cache); + } + return PyList_SetSlice(codec_search_path, i, i+1, NULL); + } + } + return 0; +} + extern int _Py_normalize_encoding(const char *, char *, size_t); /* Convert a string to a normalized Python string(decoded from UTF-8): all characters are