From 217911ede5d52b02b2e3c9222439e1ea08545291 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Mon, 17 Apr 2023 02:41:25 +0200 Subject: [PATCH] gh-103583: Add codecs and maps to _codecs_* module state (#103540) --- Modules/cjkcodecs/_codecs_cn.c | 4 +- Modules/cjkcodecs/_codecs_hk.c | 5 +- Modules/cjkcodecs/_codecs_iso2022.c | 9 +- Modules/cjkcodecs/_codecs_jp.c | 13 +- Modules/cjkcodecs/_codecs_kr.c | 4 +- Modules/cjkcodecs/_codecs_tw.c | 4 +- Modules/cjkcodecs/cjkcodecs.h | 177 +++++++++++++------- Modules/cjkcodecs/multibytecodec.c | 41 ++--- Tools/c-analyzer/cpython/globals-to-fix.tsv | 2 - 9 files changed, 162 insertions(+), 97 deletions(-) diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c index 8a62f7e257c..e2c7908c9bb 100644 --- a/Modules/cjkcodecs/_codecs_cn.c +++ b/Modules/cjkcodecs/_codecs_cn.c @@ -453,14 +453,14 @@ DECODER(hz) } -BEGIN_MAPPINGS_LIST +BEGIN_MAPPINGS_LIST(4) MAPPING_DECONLY(gb2312) MAPPING_DECONLY(gbkext) MAPPING_ENCONLY(gbcommon) MAPPING_ENCDEC(gb18030ext) END_MAPPINGS_LIST -BEGIN_CODECS_LIST +BEGIN_CODECS_LIST(4) CODEC_STATELESS(gb2312) CODEC_STATELESS(gbk) CODEC_STATELESS(gb18030) diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c index 4f21569a0ce..43593b87373 100644 --- a/Modules/cjkcodecs/_codecs_hk.c +++ b/Modules/cjkcodecs/_codecs_hk.c @@ -177,14 +177,13 @@ DECODER(big5hkscs) return 0; } - -BEGIN_MAPPINGS_LIST +BEGIN_MAPPINGS_LIST(3) MAPPING_DECONLY(big5hkscs) MAPPING_ENCONLY(big5hkscs_bmp) MAPPING_ENCONLY(big5hkscs_nonbmp) END_MAPPINGS_LIST -BEGIN_CODECS_LIST +BEGIN_CODECS_LIST(1) CODEC_STATELESS_WINIT(big5hkscs) END_CODECS_LIST diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c index 7394cf67e0e..cf34752e16a 100644 --- a/Modules/cjkcodecs/_codecs_iso2022.c +++ b/Modules/cjkcodecs/_codecs_iso2022.c @@ -1119,18 +1119,19 @@ static const struct iso2022_designation iso2022_jp_ext_designations[] = { CONFIGDEF(jp_ext, NO_SHIFT | USE_JISX0208_EXT) -BEGIN_MAPPINGS_LIST +BEGIN_MAPPINGS_LIST(0) /* no mapping table here */ END_MAPPINGS_LIST -#define ISO2022_CODEC(variation) { \ +#define ISO2022_CODEC(variation) \ +NEXT_CODEC = (MultibyteCodec){ \ "iso2022_" #variation, \ &iso2022_##variation##_config, \ iso2022_codec_init, \ _STATEFUL_METHODS(iso2022) \ -}, +}; -BEGIN_CODECS_LIST +BEGIN_CODECS_LIST(7) ISO2022_CODEC(kr) ISO2022_CODEC(jp) ISO2022_CODEC(jp_1) diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c index 3a332953b95..7a8b78a2359 100644 --- a/Modules/cjkcodecs/_codecs_jp.c +++ b/Modules/cjkcodecs/_codecs_jp.c @@ -733,7 +733,7 @@ DECODER(shift_jis_2004) } -BEGIN_MAPPINGS_LIST +BEGIN_MAPPINGS_LIST(11) MAPPING_DECONLY(jisx0208) MAPPING_DECONLY(jisx0212) MAPPING_ENCONLY(jisxcommon) @@ -747,14 +747,19 @@ BEGIN_MAPPINGS_LIST MAPPING_ENCDEC(cp932ext) END_MAPPINGS_LIST -BEGIN_CODECS_LIST +#define CODEC_CUSTOM(NAME, N, METH) \ + NEXT_CODEC = (MultibyteCodec){NAME, (void *)N, NULL, _STATELESS_METHODS(METH)}; + +BEGIN_CODECS_LIST(7) CODEC_STATELESS(shift_jis) CODEC_STATELESS(cp932) CODEC_STATELESS(euc_jp) CODEC_STATELESS(shift_jis_2004) CODEC_STATELESS(euc_jis_2004) - { "euc_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(euc_jis_2004) }, - { "shift_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(shift_jis_2004) }, + CODEC_CUSTOM("euc_jisx0213", 2000, euc_jis_2004) + CODEC_CUSTOM("shift_jisx0213", 2000, shift_jis_2004) END_CODECS_LIST +#undef CODEC_CUSTOM + I_AM_A_MODULE_FOR(jp) diff --git a/Modules/cjkcodecs/_codecs_kr.c b/Modules/cjkcodecs/_codecs_kr.c index 72641e495af..fd9a9fd92db 100644 --- a/Modules/cjkcodecs/_codecs_kr.c +++ b/Modules/cjkcodecs/_codecs_kr.c @@ -453,13 +453,13 @@ DECODER(johab) #undef FILL -BEGIN_MAPPINGS_LIST +BEGIN_MAPPINGS_LIST(3) MAPPING_DECONLY(ksx1001) MAPPING_ENCONLY(cp949) MAPPING_DECONLY(cp949ext) END_MAPPINGS_LIST -BEGIN_CODECS_LIST +BEGIN_CODECS_LIST(3) CODEC_STATELESS(euc_kr) CODEC_STATELESS(cp949) CODEC_STATELESS(johab) diff --git a/Modules/cjkcodecs/_codecs_tw.c b/Modules/cjkcodecs/_codecs_tw.c index 722b26b128a..3e440991414 100644 --- a/Modules/cjkcodecs/_codecs_tw.c +++ b/Modules/cjkcodecs/_codecs_tw.c @@ -130,12 +130,12 @@ DECODER(cp950) -BEGIN_MAPPINGS_LIST +BEGIN_MAPPINGS_LIST(2) MAPPING_ENCDEC(big5) MAPPING_ENCDEC(cp950ext) END_MAPPINGS_LIST -BEGIN_CODECS_LIST +BEGIN_CODECS_LIST(2) CODEC_STATELESS(big5) CODEC_STATELESS(cp950) END_CODECS_LIST diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h index d9aeec2ff40..646a9fd255c 100644 --- a/Modules/cjkcodecs/cjkcodecs.h +++ b/Modules/cjkcodecs/cjkcodecs.h @@ -60,8 +60,20 @@ struct pair_encodemap { DBCHAR code; }; -static const MultibyteCodec *codec_list; -static const struct dbcs_map *mapping_list; +typedef struct { + int num_mappings; + int num_codecs; + struct dbcs_map *mapping_list; + MultibyteCodec *codec_list; +} cjkcodecs_module_state; + +static inline cjkcodecs_module_state * +get_module_state(PyObject *mod) +{ + void *state = PyModule_GetState(mod); + assert(state != NULL); + return (cjkcodecs_module_state *)state; +} #define CODEC_INIT(encoding) \ static int encoding##_codec_init(const void *config) @@ -202,16 +214,42 @@ static const struct dbcs_map *mapping_list; #define TRYMAP_DEC(charset, assi, c1, c2) \ _TRYMAP_DEC(&charset##_decmap[c1], assi, c2) -#define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = { -#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL}, -#define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap}, -#define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap}, -#define END_MAPPINGS_LIST \ - {"", NULL, NULL} }; \ - static const struct dbcs_map *mapping_list = \ - (const struct dbcs_map *)_mapping_list; +#define BEGIN_MAPPINGS_LIST(NUM) \ +static int \ +add_mappings(cjkcodecs_module_state *st) \ +{ \ + int idx = 0; \ + (void)idx; \ + st->num_mappings = NUM; \ + st->mapping_list = PyMem_Calloc(NUM, sizeof(struct dbcs_map)); \ + if (st->mapping_list == NULL) { \ + return -1; \ + } + +#define MAPPING_ENCONLY(enc) \ + st->mapping_list[idx++] = (struct dbcs_map){#enc, (void*)enc##_encmap, NULL}; +#define MAPPING_DECONLY(enc) \ + st->mapping_list[idx++] = (struct dbcs_map){#enc, NULL, (void*)enc##_decmap}; +#define MAPPING_ENCDEC(enc) \ + st->mapping_list[idx++] = (struct dbcs_map){#enc, (void*)enc##_encmap, (void*)enc##_decmap}; + +#define END_MAPPINGS_LIST \ + assert(st->num_mappings == idx); \ + return 0; \ +} + +#define BEGIN_CODECS_LIST(NUM) \ +static int \ +add_codecs(cjkcodecs_module_state *st) \ +{ \ + int idx = 0; \ + (void)idx; \ + st->num_codecs = NUM; \ + st->codec_list = PyMem_Calloc(NUM, sizeof(MultibyteCodec)); \ + if (st->codec_list == NULL) { \ + return -1; \ + } -#define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = { #define _STATEFUL_METHODS(enc) \ enc##_encode, \ enc##_encode_init, \ @@ -222,23 +260,21 @@ static const struct dbcs_map *mapping_list; #define _STATELESS_METHODS(enc) \ enc##_encode, NULL, NULL, \ enc##_decode, NULL, NULL, -#define CODEC_STATEFUL(enc) { \ - #enc, NULL, NULL, \ - _STATEFUL_METHODS(enc) \ -}, -#define CODEC_STATELESS(enc) { \ - #enc, NULL, NULL, \ - _STATELESS_METHODS(enc) \ -}, -#define CODEC_STATELESS_WINIT(enc) { \ - #enc, NULL, \ - enc##_codec_init, \ - _STATELESS_METHODS(enc) \ -}, -#define END_CODECS_LIST \ - {"", NULL,} }; \ - static const MultibyteCodec *codec_list = \ - (const MultibyteCodec *)_codec_list; + +#define NEXT_CODEC \ + st->codec_list[idx++] + +#define CODEC_STATEFUL(enc) \ + NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, _STATEFUL_METHODS(enc)}; +#define CODEC_STATELESS(enc) \ + NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, _STATELESS_METHODS(enc)}; +#define CODEC_STATELESS_WINIT(enc) \ + NEXT_CODEC = (MultibyteCodec){#enc, NULL, enc##_codec_init, _STATELESS_METHODS(enc)}; + +#define END_CODECS_LIST \ + assert(st->num_codecs == idx); \ + return 0; \ +} @@ -248,54 +284,71 @@ getmultibytecodec(void) return _PyImport_GetModuleAttrString("_multibytecodec", "__create_codec"); } +static PyObject * +_getcodec(const MultibyteCodec *codec) +{ + PyObject *cofunc = getmultibytecodec(); + if (cofunc == NULL) { + return NULL; + } + + PyObject *codecobj = PyCapsule_New((void *)codec, + PyMultibyteCodec_CAPSULE_NAME, + NULL); + if (codecobj == NULL) { + Py_DECREF(cofunc); + return NULL; + } + + PyObject *res = PyObject_CallOneArg(cofunc, codecobj); + Py_DECREF(codecobj); + Py_DECREF(cofunc); + return res; +} + static PyObject * getcodec(PyObject *self, PyObject *encoding) { - PyObject *codecobj, *r, *cofunc; - const MultibyteCodec *codec; - const char *enc; - if (!PyUnicode_Check(encoding)) { PyErr_SetString(PyExc_TypeError, "encoding name must be a string."); return NULL; } - enc = PyUnicode_AsUTF8(encoding); - if (enc == NULL) - return NULL; - - cofunc = getmultibytecodec(); - if (cofunc == NULL) - return NULL; - - for (codec = codec_list; codec->encoding[0]; codec++) - if (strcmp(codec->encoding, enc) == 0) - break; - - if (codec->encoding[0] == '\0') { - PyErr_SetString(PyExc_LookupError, - "no such codec is supported."); + const char *enc = PyUnicode_AsUTF8(encoding); + if (enc == NULL) { return NULL; } - codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL); - if (codecobj == NULL) - return NULL; + cjkcodecs_module_state *st = get_module_state(self); + for (int i = 0; i < st->num_codecs; i++) { + const MultibyteCodec *codec = &st->codec_list[i]; + if (strcmp(codec->encoding, enc) == 0) { + return _getcodec(codec); + } + } - r = PyObject_CallOneArg(cofunc, codecobj); - Py_DECREF(codecobj); - Py_DECREF(cofunc); - - return r; + PyErr_SetString(PyExc_LookupError, + "no such codec is supported."); + return NULL; } +static int add_mappings(cjkcodecs_module_state *); +static int add_codecs(cjkcodecs_module_state *); static int register_maps(PyObject *module) { - const struct dbcs_map *h; + // Init module state. + cjkcodecs_module_state *st = get_module_state(module); + if (add_mappings(st) < 0) { + return -1; + } + if (add_codecs(st) < 0) { + return -1; + } - for (h = mapping_list; h->charset[0] != '\0'; h++) { + for (int i = 0; i < st->num_mappings; i++) { + const struct dbcs_map *h = &st->mapping_list[i]; char mhname[256] = "__map_"; strcpy(mhname + sizeof("__map_") - 1, h->charset); @@ -394,6 +447,13 @@ _cjk_exec(PyObject *module) return register_maps(module); } +static void +_cjk_free(void *mod) +{ + cjkcodecs_module_state *st = get_module_state((PyObject *)mod); + PyMem_Free(st->mapping_list); + PyMem_Free(st->codec_list); +} static struct PyMethodDef _cjk_methods[] = { {"getcodec", (PyCFunction)getcodec, METH_O, ""}, @@ -409,9 +469,10 @@ static PyModuleDef_Slot _cjk_slots[] = { static struct PyModuleDef _cjk_module = { \ PyModuleDef_HEAD_INIT, \ .m_name = "_codecs_"#loc, \ - .m_size = 0, \ + .m_size = sizeof(cjkcodecs_module_state), \ .m_methods = _cjk_methods, \ .m_slots = _cjk_slots, \ + .m_free = _cjk_free, \ }; \ \ PyMODINIT_FUNC \ diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index 8564494f626..55778cdb59e 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -19,26 +19,27 @@ typedef struct { PyTypeObject *writer_type; PyTypeObject *multibytecodec_type; PyObject *str_write; -} _multibytecodec_state; +} module_state; -static _multibytecodec_state * -_multibytecodec_get_state(PyObject *module) +static module_state * +get_module_state(PyObject *module) { - _multibytecodec_state *state = PyModule_GetState(module); + module_state *state = PyModule_GetState(module); assert(state != NULL); return state; } static struct PyModuleDef _multibytecodecmodule; -static _multibytecodec_state * -_multibyte_codec_find_state_by_type(PyTypeObject *type) + +static module_state * +find_state_by_def(PyTypeObject *type) { PyObject *module = PyType_GetModuleByDef(type, &_multibytecodecmodule); assert(module != NULL); - return _multibytecodec_get_state(module); + return get_module_state(module); } -#define clinic_get_state() _multibyte_codec_find_state_by_type(type) +#define clinic_get_state() find_state_by_def(type) /*[clinic input] module _multibytecodec class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "clinic_get_state()->multibytecodec_type" @@ -1040,7 +1041,7 @@ mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (codec == NULL) goto errorexit; - _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type); + module_state *state = find_state_by_def(type); if (!MultibyteCodec_Check(state, codec)) { PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); goto errorexit; @@ -1315,7 +1316,7 @@ mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (codec == NULL) goto errorexit; - _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type); + module_state *state = find_state_by_def(type); if (!MultibyteCodec_Check(state, codec)) { PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); goto errorexit; @@ -1630,7 +1631,7 @@ mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (codec == NULL) goto errorexit; - _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type); + module_state *state = find_state_by_def(type); if (!MultibyteCodec_Check(state, codec)) { PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); goto errorexit; @@ -1735,7 +1736,7 @@ _multibytecodec_MultibyteStreamWriter_write_impl(MultibyteStreamWriterObject *se PyObject *strobj) /*[clinic end generated code: output=68ade3aea26410ac input=199f26f68bd8425a]*/ { - _multibytecodec_state *state = PyType_GetModuleState(cls); + module_state *state = PyType_GetModuleState(cls); assert(state != NULL); if (mbstreamwriter_iwrite(self, strobj, state->str_write)) { return NULL; @@ -1766,7 +1767,7 @@ _multibytecodec_MultibyteStreamWriter_writelines_impl(MultibyteStreamWriterObjec return NULL; } - _multibytecodec_state *state = PyType_GetModuleState(cls); + module_state *state = PyType_GetModuleState(cls); assert(state != NULL); for (i = 0; i < PySequence_Length(lines); i++) { /* length can be changed even within this loop */ @@ -1817,7 +1818,7 @@ _multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *se assert(PyBytes_Check(pwrt)); - _multibytecodec_state *state = PyType_GetModuleState(cls); + module_state *state = PyType_GetModuleState(cls); assert(state != NULL); if (PyBytes_Size(pwrt) > 0) { @@ -1853,7 +1854,7 @@ mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (codec == NULL) goto errorexit; - _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type); + module_state *state = find_state_by_def(type); if (!MultibyteCodec_Check(state, codec)) { PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); goto errorexit; @@ -1963,7 +1964,7 @@ _multibytecodec___create_codec(PyObject *module, PyObject *arg) if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0) return NULL; - _multibytecodec_state *state = _multibytecodec_get_state(module); + module_state *state = get_module_state(module); self = PyObject_GC_New(MultibyteCodecObject, state->multibytecodec_type); if (self == NULL) return NULL; @@ -1976,7 +1977,7 @@ _multibytecodec___create_codec(PyObject *module, PyObject *arg) static int _multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg) { - _multibytecodec_state *state = _multibytecodec_get_state(mod); + module_state *state = get_module_state(mod); Py_VISIT(state->multibytecodec_type); Py_VISIT(state->encoder_type); Py_VISIT(state->decoder_type); @@ -1988,7 +1989,7 @@ _multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg) static int _multibytecodec_clear(PyObject *mod) { - _multibytecodec_state *state = _multibytecodec_get_state(mod); + module_state *state = get_module_state(mod); Py_CLEAR(state->multibytecodec_type); Py_CLEAR(state->encoder_type); Py_CLEAR(state->decoder_type); @@ -2022,7 +2023,7 @@ _multibytecodec_free(void *mod) static int _multibytecodec_exec(PyObject *mod) { - _multibytecodec_state *state = _multibytecodec_get_state(mod); + module_state *state = get_module_state(mod); state->str_write = PyUnicode_InternFromString("write"); if (state->str_write == NULL) { return -1; @@ -2056,7 +2057,7 @@ static PyModuleDef_Slot _multibytecodec_slots[] = { static struct PyModuleDef _multibytecodecmodule = { .m_base = PyModuleDef_HEAD_INIT, .m_name = "_multibytecodec", - .m_size = sizeof(_multibytecodec_state), + .m_size = sizeof(module_state), .m_methods = _multibytecodec_methods, .m_slots = _multibytecodec_slots, .m_traverse = _multibytecodec_traverse, diff --git a/Tools/c-analyzer/cpython/globals-to-fix.tsv b/Tools/c-analyzer/cpython/globals-to-fix.tsv index 5c173b1041e..849fd5d9a1e 100644 --- a/Tools/c-analyzer/cpython/globals-to-fix.tsv +++ b/Tools/c-analyzer/cpython/globals-to-fix.tsv @@ -506,8 +506,6 @@ Modules/cjkcodecs/_codecs_iso2022.c jisx0208_init initialized - Modules/cjkcodecs/_codecs_iso2022.c jisx0212_init initialized - Modules/cjkcodecs/_codecs_iso2022.c jisx0213_init initialized - Modules/cjkcodecs/_codecs_iso2022.c gb2312_init initialized - -Modules/cjkcodecs/cjkcodecs.h - codec_list - -Modules/cjkcodecs/cjkcodecs.h - mapping_list - Modules/readline.c - libedit_append_replace_history_offset - Modules/readline.c - using_libedit_emulation - Modules/readline.c - libedit_history_start -