gh-103583: Add codecs and maps to _codecs_* module state (#103540)

This commit is contained in:
Erlend E. Aasland 2023-04-17 02:41:25 +02:00 committed by GitHub
parent ff3303e49c
commit 217911ede5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 162 additions and 97 deletions

View File

@ -453,14 +453,14 @@ DECODER(hz)
}
BEGIN_MAPPINGS_LIST
BEGIN_MAPPINGS_LIST(4)
MAPPING_DECONLY(gb2312)
MAPPING_DECONLY(gbkext)
MAPPING_ENCONLY(gbcommon)
MAPPING_ENCDEC(gb18030ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
BEGIN_CODECS_LIST(4)
CODEC_STATELESS(gb2312)
CODEC_STATELESS(gbk)
CODEC_STATELESS(gb18030)

View File

@ -177,14 +177,13 @@ DECODER(big5hkscs)
return 0;
}
BEGIN_MAPPINGS_LIST
BEGIN_MAPPINGS_LIST(3)
MAPPING_DECONLY(big5hkscs)
MAPPING_ENCONLY(big5hkscs_bmp)
MAPPING_ENCONLY(big5hkscs_nonbmp)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
BEGIN_CODECS_LIST(1)
CODEC_STATELESS_WINIT(big5hkscs)
END_CODECS_LIST

View File

@ -1119,18 +1119,19 @@ static const struct iso2022_designation iso2022_jp_ext_designations[] = {
CONFIGDEF(jp_ext, NO_SHIFT | USE_JISX0208_EXT)
BEGIN_MAPPINGS_LIST
BEGIN_MAPPINGS_LIST(0)
/* no mapping table here */
END_MAPPINGS_LIST
#define ISO2022_CODEC(variation) { \
#define ISO2022_CODEC(variation) \
NEXT_CODEC = (MultibyteCodec){ \
"iso2022_" #variation, \
&iso2022_##variation##_config, \
iso2022_codec_init, \
_STATEFUL_METHODS(iso2022) \
},
};
BEGIN_CODECS_LIST
BEGIN_CODECS_LIST(7)
ISO2022_CODEC(kr)
ISO2022_CODEC(jp)
ISO2022_CODEC(jp_1)

View File

@ -733,7 +733,7 @@ DECODER(shift_jis_2004)
}
BEGIN_MAPPINGS_LIST
BEGIN_MAPPINGS_LIST(11)
MAPPING_DECONLY(jisx0208)
MAPPING_DECONLY(jisx0212)
MAPPING_ENCONLY(jisxcommon)
@ -747,14 +747,19 @@ BEGIN_MAPPINGS_LIST
MAPPING_ENCDEC(cp932ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
#define CODEC_CUSTOM(NAME, N, METH) \
NEXT_CODEC = (MultibyteCodec){NAME, (void *)N, NULL, _STATELESS_METHODS(METH)};
BEGIN_CODECS_LIST(7)
CODEC_STATELESS(shift_jis)
CODEC_STATELESS(cp932)
CODEC_STATELESS(euc_jp)
CODEC_STATELESS(shift_jis_2004)
CODEC_STATELESS(euc_jis_2004)
{ "euc_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(euc_jis_2004) },
{ "shift_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(shift_jis_2004) },
CODEC_CUSTOM("euc_jisx0213", 2000, euc_jis_2004)
CODEC_CUSTOM("shift_jisx0213", 2000, shift_jis_2004)
END_CODECS_LIST
#undef CODEC_CUSTOM
I_AM_A_MODULE_FOR(jp)

View File

@ -453,13 +453,13 @@ DECODER(johab)
#undef FILL
BEGIN_MAPPINGS_LIST
BEGIN_MAPPINGS_LIST(3)
MAPPING_DECONLY(ksx1001)
MAPPING_ENCONLY(cp949)
MAPPING_DECONLY(cp949ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
BEGIN_CODECS_LIST(3)
CODEC_STATELESS(euc_kr)
CODEC_STATELESS(cp949)
CODEC_STATELESS(johab)

View File

@ -130,12 +130,12 @@ DECODER(cp950)
BEGIN_MAPPINGS_LIST
BEGIN_MAPPINGS_LIST(2)
MAPPING_ENCDEC(big5)
MAPPING_ENCDEC(cp950ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
BEGIN_CODECS_LIST(2)
CODEC_STATELESS(big5)
CODEC_STATELESS(cp950)
END_CODECS_LIST

View File

@ -60,8 +60,20 @@ struct pair_encodemap {
DBCHAR code;
};
static const MultibyteCodec *codec_list;
static const struct dbcs_map *mapping_list;
typedef struct {
int num_mappings;
int num_codecs;
struct dbcs_map *mapping_list;
MultibyteCodec *codec_list;
} cjkcodecs_module_state;
static inline cjkcodecs_module_state *
get_module_state(PyObject *mod)
{
void *state = PyModule_GetState(mod);
assert(state != NULL);
return (cjkcodecs_module_state *)state;
}
#define CODEC_INIT(encoding) \
static int encoding##_codec_init(const void *config)
@ -202,16 +214,42 @@ static const struct dbcs_map *mapping_list;
#define TRYMAP_DEC(charset, assi, c1, c2) \
_TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
#define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {
#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
#define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},
#define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap},
#define END_MAPPINGS_LIST \
{"", NULL, NULL} }; \
static const struct dbcs_map *mapping_list = \
(const struct dbcs_map *)_mapping_list;
#define BEGIN_MAPPINGS_LIST(NUM) \
static int \
add_mappings(cjkcodecs_module_state *st) \
{ \
int idx = 0; \
(void)idx; \
st->num_mappings = NUM; \
st->mapping_list = PyMem_Calloc(NUM, sizeof(struct dbcs_map)); \
if (st->mapping_list == NULL) { \
return -1; \
}
#define MAPPING_ENCONLY(enc) \
st->mapping_list[idx++] = (struct dbcs_map){#enc, (void*)enc##_encmap, NULL};
#define MAPPING_DECONLY(enc) \
st->mapping_list[idx++] = (struct dbcs_map){#enc, NULL, (void*)enc##_decmap};
#define MAPPING_ENCDEC(enc) \
st->mapping_list[idx++] = (struct dbcs_map){#enc, (void*)enc##_encmap, (void*)enc##_decmap};
#define END_MAPPINGS_LIST \
assert(st->num_mappings == idx); \
return 0; \
}
#define BEGIN_CODECS_LIST(NUM) \
static int \
add_codecs(cjkcodecs_module_state *st) \
{ \
int idx = 0; \
(void)idx; \
st->num_codecs = NUM; \
st->codec_list = PyMem_Calloc(NUM, sizeof(MultibyteCodec)); \
if (st->codec_list == NULL) { \
return -1; \
}
#define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = {
#define _STATEFUL_METHODS(enc) \
enc##_encode, \
enc##_encode_init, \
@ -222,23 +260,21 @@ static const struct dbcs_map *mapping_list;
#define _STATELESS_METHODS(enc) \
enc##_encode, NULL, NULL, \
enc##_decode, NULL, NULL,
#define CODEC_STATEFUL(enc) { \
#enc, NULL, NULL, \
_STATEFUL_METHODS(enc) \
},
#define CODEC_STATELESS(enc) { \
#enc, NULL, NULL, \
_STATELESS_METHODS(enc) \
},
#define CODEC_STATELESS_WINIT(enc) { \
#enc, NULL, \
enc##_codec_init, \
_STATELESS_METHODS(enc) \
},
#define NEXT_CODEC \
st->codec_list[idx++]
#define CODEC_STATEFUL(enc) \
NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, _STATEFUL_METHODS(enc)};
#define CODEC_STATELESS(enc) \
NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, _STATELESS_METHODS(enc)};
#define CODEC_STATELESS_WINIT(enc) \
NEXT_CODEC = (MultibyteCodec){#enc, NULL, enc##_codec_init, _STATELESS_METHODS(enc)};
#define END_CODECS_LIST \
{"", NULL,} }; \
static const MultibyteCodec *codec_list = \
(const MultibyteCodec *)_codec_list;
assert(st->num_codecs == idx); \
return 0; \
}
@ -248,54 +284,71 @@ getmultibytecodec(void)
return _PyImport_GetModuleAttrString("_multibytecodec", "__create_codec");
}
static PyObject *
_getcodec(const MultibyteCodec *codec)
{
PyObject *cofunc = getmultibytecodec();
if (cofunc == NULL) {
return NULL;
}
PyObject *codecobj = PyCapsule_New((void *)codec,
PyMultibyteCodec_CAPSULE_NAME,
NULL);
if (codecobj == NULL) {
Py_DECREF(cofunc);
return NULL;
}
PyObject *res = PyObject_CallOneArg(cofunc, codecobj);
Py_DECREF(codecobj);
Py_DECREF(cofunc);
return res;
}
static PyObject *
getcodec(PyObject *self, PyObject *encoding)
{
PyObject *codecobj, *r, *cofunc;
const MultibyteCodec *codec;
const char *enc;
if (!PyUnicode_Check(encoding)) {
PyErr_SetString(PyExc_TypeError,
"encoding name must be a string.");
return NULL;
}
enc = PyUnicode_AsUTF8(encoding);
if (enc == NULL)
const char *enc = PyUnicode_AsUTF8(encoding);
if (enc == NULL) {
return NULL;
}
cofunc = getmultibytecodec();
if (cofunc == NULL)
return NULL;
cjkcodecs_module_state *st = get_module_state(self);
for (int i = 0; i < st->num_codecs; i++) {
const MultibyteCodec *codec = &st->codec_list[i];
if (strcmp(codec->encoding, enc) == 0) {
return _getcodec(codec);
}
}
for (codec = codec_list; codec->encoding[0]; codec++)
if (strcmp(codec->encoding, enc) == 0)
break;
if (codec->encoding[0] == '\0') {
PyErr_SetString(PyExc_LookupError,
"no such codec is supported.");
return NULL;
}
codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL);
if (codecobj == NULL)
return NULL;
r = PyObject_CallOneArg(cofunc, codecobj);
Py_DECREF(codecobj);
Py_DECREF(cofunc);
return r;
}
static int add_mappings(cjkcodecs_module_state *);
static int add_codecs(cjkcodecs_module_state *);
static int
register_maps(PyObject *module)
{
const struct dbcs_map *h;
// Init module state.
cjkcodecs_module_state *st = get_module_state(module);
if (add_mappings(st) < 0) {
return -1;
}
if (add_codecs(st) < 0) {
return -1;
}
for (h = mapping_list; h->charset[0] != '\0'; h++) {
for (int i = 0; i < st->num_mappings; i++) {
const struct dbcs_map *h = &st->mapping_list[i];
char mhname[256] = "__map_";
strcpy(mhname + sizeof("__map_") - 1, h->charset);
@ -394,6 +447,13 @@ _cjk_exec(PyObject *module)
return register_maps(module);
}
static void
_cjk_free(void *mod)
{
cjkcodecs_module_state *st = get_module_state((PyObject *)mod);
PyMem_Free(st->mapping_list);
PyMem_Free(st->codec_list);
}
static struct PyMethodDef _cjk_methods[] = {
{"getcodec", (PyCFunction)getcodec, METH_O, ""},
@ -409,9 +469,10 @@ static PyModuleDef_Slot _cjk_slots[] = {
static struct PyModuleDef _cjk_module = { \
PyModuleDef_HEAD_INIT, \
.m_name = "_codecs_"#loc, \
.m_size = 0, \
.m_size = sizeof(cjkcodecs_module_state), \
.m_methods = _cjk_methods, \
.m_slots = _cjk_slots, \
.m_free = _cjk_free, \
}; \
\
PyMODINIT_FUNC \

View File

@ -19,26 +19,27 @@ typedef struct {
PyTypeObject *writer_type;
PyTypeObject *multibytecodec_type;
PyObject *str_write;
} _multibytecodec_state;
} module_state;
static _multibytecodec_state *
_multibytecodec_get_state(PyObject *module)
static module_state *
get_module_state(PyObject *module)
{
_multibytecodec_state *state = PyModule_GetState(module);
module_state *state = PyModule_GetState(module);
assert(state != NULL);
return state;
}
static struct PyModuleDef _multibytecodecmodule;
static _multibytecodec_state *
_multibyte_codec_find_state_by_type(PyTypeObject *type)
static module_state *
find_state_by_def(PyTypeObject *type)
{
PyObject *module = PyType_GetModuleByDef(type, &_multibytecodecmodule);
assert(module != NULL);
return _multibytecodec_get_state(module);
return get_module_state(module);
}
#define clinic_get_state() _multibyte_codec_find_state_by_type(type)
#define clinic_get_state() find_state_by_def(type)
/*[clinic input]
module _multibytecodec
class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "clinic_get_state()->multibytecodec_type"
@ -1040,7 +1041,7 @@ mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
if (codec == NULL)
goto errorexit;
_multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
module_state *state = find_state_by_def(type);
if (!MultibyteCodec_Check(state, codec)) {
PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
goto errorexit;
@ -1315,7 +1316,7 @@ mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
if (codec == NULL)
goto errorexit;
_multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
module_state *state = find_state_by_def(type);
if (!MultibyteCodec_Check(state, codec)) {
PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
goto errorexit;
@ -1630,7 +1631,7 @@ mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
if (codec == NULL)
goto errorexit;
_multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
module_state *state = find_state_by_def(type);
if (!MultibyteCodec_Check(state, codec)) {
PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
goto errorexit;
@ -1735,7 +1736,7 @@ _multibytecodec_MultibyteStreamWriter_write_impl(MultibyteStreamWriterObject *se
PyObject *strobj)
/*[clinic end generated code: output=68ade3aea26410ac input=199f26f68bd8425a]*/
{
_multibytecodec_state *state = PyType_GetModuleState(cls);
module_state *state = PyType_GetModuleState(cls);
assert(state != NULL);
if (mbstreamwriter_iwrite(self, strobj, state->str_write)) {
return NULL;
@ -1766,7 +1767,7 @@ _multibytecodec_MultibyteStreamWriter_writelines_impl(MultibyteStreamWriterObjec
return NULL;
}
_multibytecodec_state *state = PyType_GetModuleState(cls);
module_state *state = PyType_GetModuleState(cls);
assert(state != NULL);
for (i = 0; i < PySequence_Length(lines); i++) {
/* length can be changed even within this loop */
@ -1817,7 +1818,7 @@ _multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *se
assert(PyBytes_Check(pwrt));
_multibytecodec_state *state = PyType_GetModuleState(cls);
module_state *state = PyType_GetModuleState(cls);
assert(state != NULL);
if (PyBytes_Size(pwrt) > 0) {
@ -1853,7 +1854,7 @@ mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
if (codec == NULL)
goto errorexit;
_multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
module_state *state = find_state_by_def(type);
if (!MultibyteCodec_Check(state, codec)) {
PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
goto errorexit;
@ -1963,7 +1964,7 @@ _multibytecodec___create_codec(PyObject *module, PyObject *arg)
if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
return NULL;
_multibytecodec_state *state = _multibytecodec_get_state(module);
module_state *state = get_module_state(module);
self = PyObject_GC_New(MultibyteCodecObject, state->multibytecodec_type);
if (self == NULL)
return NULL;
@ -1976,7 +1977,7 @@ _multibytecodec___create_codec(PyObject *module, PyObject *arg)
static int
_multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg)
{
_multibytecodec_state *state = _multibytecodec_get_state(mod);
module_state *state = get_module_state(mod);
Py_VISIT(state->multibytecodec_type);
Py_VISIT(state->encoder_type);
Py_VISIT(state->decoder_type);
@ -1988,7 +1989,7 @@ _multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg)
static int
_multibytecodec_clear(PyObject *mod)
{
_multibytecodec_state *state = _multibytecodec_get_state(mod);
module_state *state = get_module_state(mod);
Py_CLEAR(state->multibytecodec_type);
Py_CLEAR(state->encoder_type);
Py_CLEAR(state->decoder_type);
@ -2022,7 +2023,7 @@ _multibytecodec_free(void *mod)
static int
_multibytecodec_exec(PyObject *mod)
{
_multibytecodec_state *state = _multibytecodec_get_state(mod);
module_state *state = get_module_state(mod);
state->str_write = PyUnicode_InternFromString("write");
if (state->str_write == NULL) {
return -1;
@ -2056,7 +2057,7 @@ static PyModuleDef_Slot _multibytecodec_slots[] = {
static struct PyModuleDef _multibytecodecmodule = {
.m_base = PyModuleDef_HEAD_INIT,
.m_name = "_multibytecodec",
.m_size = sizeof(_multibytecodec_state),
.m_size = sizeof(module_state),
.m_methods = _multibytecodec_methods,
.m_slots = _multibytecodec_slots,
.m_traverse = _multibytecodec_traverse,

View File

@ -506,8 +506,6 @@ Modules/cjkcodecs/_codecs_iso2022.c jisx0208_init initialized -
Modules/cjkcodecs/_codecs_iso2022.c jisx0212_init initialized -
Modules/cjkcodecs/_codecs_iso2022.c jisx0213_init initialized -
Modules/cjkcodecs/_codecs_iso2022.c gb2312_init initialized -
Modules/cjkcodecs/cjkcodecs.h - codec_list -
Modules/cjkcodecs/cjkcodecs.h - mapping_list -
Modules/readline.c - libedit_append_replace_history_offset -
Modules/readline.c - using_libedit_emulation -
Modules/readline.c - libedit_history_start -

Can't render this file because it has a wrong number of fields in line 4.