diff --git a/Modules/cjkcodecs/README b/Modules/cjkcodecs/README index 024562916f3..b2370bc298f 100644 --- a/Modules/cjkcodecs/README +++ b/Modules/cjkcodecs/README @@ -1,11 +1,3 @@ -Notes on cjkcodecs -------------------- -This directory contains source files for cjkcodecs extension modules. -They are based on CJKCodecs (http://cjkpython.i18n.org/#CJKCodecs) -as of Aug 20 2004 currently. - - - To generate or modify mapping headers ------------------------------------- Mapping headers are imported from CJKCodecs as pre-generated form. diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c index 9a885bbb3c3..fd048d94869 100644 --- a/Modules/cjkcodecs/_codecs_cn.c +++ b/Modules/cjkcodecs/_codecs_cn.c @@ -2,7 +2,6 @@ * _codecs_cn.c: Codecs collection for Mainland Chinese encodings * * Written by Hye-Shik Chang - * $CJKCodecs: _codecs_cn.c,v 1.8 2004/07/07 14:59:26 perky Exp $ */ #include "cjkcodecs.h" diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c index 87441673022..221eced3cc6 100644 --- a/Modules/cjkcodecs/_codecs_hk.c +++ b/Modules/cjkcodecs/_codecs_hk.c @@ -2,7 +2,6 @@ * _codecs_hk.c: Codecs collection for encodings from Hong Kong * * Written by Hye-Shik Chang - * $CJKCodecs: _codecs_hk.c,v 1.4 2004/07/18 04:44:27 perky Exp $ */ #define USING_IMPORTED_MAPS @@ -32,7 +31,7 @@ ENCODER(big5hkscs) while (inleft > 0) { ucs4_t c = **inbuf; DBCHAR code; - int insize; + Py_ssize_t insize; if (c < 0x80) { REQUIRE_OUTBUF(1) diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c index 5cee8ce2c4e..1e52c34cca1 100644 --- a/Modules/cjkcodecs/_codecs_iso2022.c +++ b/Modules/cjkcodecs/_codecs_iso2022.c @@ -2,7 +2,6 @@ * _codecs_iso2022.c: Codecs collection for ISO-2022 encodings. * * Written by Hye-Shik Chang - * $CJKCodecs: _codecs_iso2022.c,v 1.22 2004/08/19 17:08:13 perky Exp $ */ #define USING_IMPORTED_MAPS @@ -104,7 +103,7 @@ typedef int (*iso2022_init_func)(void); typedef ucs4_t (*iso2022_decode_func)(const unsigned char *data); -typedef DBCHAR (*iso2022_encode_func)(const ucs4_t *data, int *length); +typedef DBCHAR (*iso2022_encode_func)(const ucs4_t *data, Py_ssize_t *length); struct iso2022_designation { unsigned char mark; @@ -160,7 +159,7 @@ ENCODER(iso2022) const struct iso2022_designation *dsg; DBCHAR encoded; ucs4_t c = **inbuf; - int insize; + Py_ssize_t insize; if (c < 0x80) { if (STATE_G0 != CHARSET_ASCII) { @@ -183,7 +182,7 @@ ENCODER(iso2022) encoded = MAP_UNMAPPABLE; for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) { - int length = 1; + Py_ssize_t length = 1; encoded = dsg->encoder(&c, &length); if (encoded == MAP_MULTIPLE_AVAIL) { /* this implementation won't work for pair @@ -300,12 +299,12 @@ DECODER_RESET(iso2022) return 0; } -static int +static Py_ssize_t iso2022processesc(const void *config, MultibyteCodec_State *state, - const unsigned char **inbuf, size_t *inleft) + const unsigned char **inbuf, Py_ssize_t *inleft) { unsigned char charset, designation; - size_t i, esclen; + Py_ssize_t i, esclen; for (i = 1;i < MAX_ESCSEQLEN;i++) { if (i >= *inleft) @@ -388,10 +387,10 @@ iso2022processesc(const void *config, MultibyteCodec_State *state, else if ((c) == 0xa2) (assi) = 0x2019; \ else if ((c) == 0xaf) (assi) = 0x2015; -static int +static Py_ssize_t iso2022processg2(const void *config, MultibyteCodec_State *state, - const unsigned char **inbuf, size_t *inleft, - Py_UNICODE **outbuf, size_t *outleft) + const unsigned char **inbuf, Py_ssize_t *inleft, + Py_UNICODE **outbuf, Py_ssize_t *outleft) { /* not written to use encoder, decoder functions because only few * encodings use G2 designations in CJKCodecs */ @@ -425,7 +424,7 @@ DECODER(iso2022) while (inleft > 0) { unsigned char c = IN1; - int err; + Py_ssize_t err; if (STATE_GETFLAG(F_ESCTHROUGHOUT)) { /* ESC throughout mode: @@ -589,7 +588,7 @@ ksx1001_decoder(const unsigned char *data) } static DBCHAR -ksx1001_encoder(const ucs4_t *data, int *length) +ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); @@ -625,7 +624,7 @@ jisx0208_decoder(const unsigned char *data) } static DBCHAR -jisx0208_encoder(const ucs4_t *data, int *length) +jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); @@ -662,7 +661,7 @@ jisx0212_decoder(const unsigned char *data) } static DBCHAR -jisx0212_encoder(const ucs4_t *data, int *length) +jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); @@ -760,7 +759,7 @@ jisx0213_2004_2_decoder(const unsigned char *data) } static DBCHAR -jisx0213_encoder(const ucs4_t *data, int *length, void *config) +jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config) { DBCHAR coded; @@ -814,7 +813,7 @@ jisx0213_encoder(const ucs4_t *data, int *length, void *config) } static DBCHAR -jisx0213_2000_1_encoder(const ucs4_t *data, int *length) +jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length) { DBCHAR coded = jisx0213_encoder(data, length, (void *)2000); if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) @@ -826,10 +825,10 @@ jisx0213_2000_1_encoder(const ucs4_t *data, int *length) } static DBCHAR -jisx0213_2000_1_encoder_paironly(const ucs4_t *data, int *length) +jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length) { DBCHAR coded; - int ilength = *length; + Py_ssize_t ilength = *length; coded = jisx0213_encoder(data, length, (void *)2000); switch (ilength) { @@ -849,7 +848,7 @@ jisx0213_2000_1_encoder_paironly(const ucs4_t *data, int *length) } static DBCHAR -jisx0213_2000_2_encoder(const ucs4_t *data, int *length) +jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length) { DBCHAR coded = jisx0213_encoder(data, length, (void *)2000); if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) @@ -861,7 +860,7 @@ jisx0213_2000_2_encoder(const ucs4_t *data, int *length) } static DBCHAR -jisx0213_2004_1_encoder(const ucs4_t *data, int *length) +jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length) { DBCHAR coded = jisx0213_encoder(data, length, NULL); if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) @@ -873,10 +872,10 @@ jisx0213_2004_1_encoder(const ucs4_t *data, int *length) } static DBCHAR -jisx0213_2004_1_encoder_paironly(const ucs4_t *data, int *length) +jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length) { DBCHAR coded; - int ilength = *length; + Py_ssize_t ilength = *length; coded = jisx0213_encoder(data, length, NULL); switch (ilength) { @@ -896,7 +895,7 @@ jisx0213_2004_1_encoder_paironly(const ucs4_t *data, int *length) } static DBCHAR -jisx0213_2004_2_encoder(const ucs4_t *data, int *length) +jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length) { DBCHAR coded = jisx0213_encoder(data, length, NULL); if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) @@ -917,7 +916,7 @@ jisx0201_r_decoder(const unsigned char *data) } static DBCHAR -jisx0201_r_encoder(const ucs4_t *data, int *length) +jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length) { DBCHAR coded; JISX0201_R_ENCODE(*data, coded) @@ -935,7 +934,7 @@ jisx0201_k_decoder(const unsigned char *data) } static DBCHAR -jisx0201_k_encoder(const ucs4_t *data, int *length) +jisx0201_k_encoder(const ucs4_t *data, Py_ssize_t *length) { DBCHAR coded; JISX0201_K_ENCODE(*data, coded) @@ -967,7 +966,7 @@ gb2312_decoder(const unsigned char *data) } static DBCHAR -gb2312_encoder(const ucs4_t *data, int *length) +gb2312_encoder(const ucs4_t *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); @@ -986,7 +985,7 @@ dummy_decoder(const unsigned char *data) } static DBCHAR -dummy_encoder(const ucs4_t *data, int *length) +dummy_encoder(const ucs4_t *data, Py_ssize_t *length) { return MAP_UNMAPPABLE; } diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c index 860a979f87d..9b8d324f56b 100644 --- a/Modules/cjkcodecs/_codecs_jp.c +++ b/Modules/cjkcodecs/_codecs_jp.c @@ -2,7 +2,6 @@ * _codecs_jp.c: Codecs collection for Japanese encodings * * Written by Hye-Shik Chang - * $CJKCodecs: _codecs_jp.c,v 1.14 2004/07/07 17:54:47 perky Exp $ */ #define USING_BINARY_PAIR_SEARCH @@ -150,7 +149,7 @@ ENCODER(euc_jis_2004) while (inleft > 0) { ucs4_t c = IN1; DBCHAR code; - int insize; + Py_ssize_t insize; if (c < 0x80) { WRITE1(c) @@ -545,7 +544,7 @@ ENCODER(shift_jis_2004) ucs4_t c = IN1; DBCHAR code = NOCHAR; int c1, c2; - size_t insize; + Py_ssize_t insize; JISX0201_ENCODE(c, code) else DECODE_SURROGATE(c) diff --git a/Modules/cjkcodecs/_codecs_kr.c b/Modules/cjkcodecs/_codecs_kr.c index 53352eca6cd..2a95bbe9ba8 100644 --- a/Modules/cjkcodecs/_codecs_kr.c +++ b/Modules/cjkcodecs/_codecs_kr.c @@ -2,7 +2,6 @@ * _codecs_kr.c: Codecs collection for Korean encodings * * Written by Hye-Shik Chang - * $CJKCodecs: _codecs_kr.c,v 1.8 2004/07/07 14:59:26 perky Exp $ */ #include "cjkcodecs.h" diff --git a/Modules/cjkcodecs/_codecs_tw.c b/Modules/cjkcodecs/_codecs_tw.c index bf237951b96..8ccbca1df0b 100644 --- a/Modules/cjkcodecs/_codecs_tw.c +++ b/Modules/cjkcodecs/_codecs_tw.c @@ -2,7 +2,6 @@ * _codecs_tw.c: Codecs collection for Taiwan's encodings * * Written by Hye-Shik Chang - * $CJKCodecs: _codecs_tw.c,v 1.10 2004/07/07 14:59:26 perky Exp $ */ #include "cjkcodecs.h" diff --git a/Modules/cjkcodecs/alg_jisx0201.h b/Modules/cjkcodecs/alg_jisx0201.h index 87bb420418c..1fca06bce4d 100644 --- a/Modules/cjkcodecs/alg_jisx0201.h +++ b/Modules/cjkcodecs/alg_jisx0201.h @@ -1,5 +1,3 @@ -/* $CJKCodecs: alg_jisx0201.h,v 1.2 2004/06/29 05:42:08 perky Exp $ */ - #define JISX0201_R_ENCODE(c, assi) \ if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) \ (assi) = (c); \ diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h index e0229cc3674..9c321622aa7 100644 --- a/Modules/cjkcodecs/cjkcodecs.h +++ b/Modules/cjkcodecs/cjkcodecs.h @@ -2,12 +2,12 @@ * cjkcodecs.h: common header for cjkcodecs * * Written by Hye-Shik Chang - * $CJKCodecs: cjkcodecs.h,v 1.6 2004/07/18 15:22:31 perky Exp $ */ #ifndef _CJKCODECS_H_ #define _CJKCODECS_H_ +#define PY_SSIZE_T_CLEAN #include "Python.h" #include "multibytecodec.h" @@ -70,25 +70,25 @@ static const struct dbcs_map *mapping_list; static int encoding##_encode_init( \ MultibyteCodec_State *state, const void *config) #define ENCODER(encoding) \ - static int encoding##_encode( \ + static Py_ssize_t encoding##_encode( \ MultibyteCodec_State *state, const void *config, \ - const Py_UNICODE **inbuf, size_t inleft, \ - unsigned char **outbuf, size_t outleft, int flags) + const Py_UNICODE **inbuf, Py_ssize_t inleft, \ + unsigned char **outbuf, Py_ssize_t outleft, int flags) #define ENCODER_RESET(encoding) \ - static int encoding##_encode_reset( \ + static Py_ssize_t encoding##_encode_reset( \ MultibyteCodec_State *state, const void *config, \ - unsigned char **outbuf, size_t outleft) + unsigned char **outbuf, Py_ssize_t outleft) #define DECODER_INIT(encoding) \ static int encoding##_decode_init( \ MultibyteCodec_State *state, const void *config) #define DECODER(encoding) \ - static int encoding##_decode( \ + static Py_ssize_t encoding##_decode( \ MultibyteCodec_State *state, const void *config, \ - const unsigned char **inbuf, size_t inleft, \ - Py_UNICODE **outbuf, size_t outleft) + const unsigned char **inbuf, Py_ssize_t inleft, \ + Py_UNICODE **outbuf, Py_ssize_t outleft) #define DECODER_RESET(encoding) \ - static int encoding##_decode_reset( \ + static Py_ssize_t encoding##_decode_reset( \ MultibyteCodec_State *state, const void *config) #if Py_UNICODE_SIZE == 4 diff --git a/Modules/cjkcodecs/emu_jisx0213_2000.h b/Modules/cjkcodecs/emu_jisx0213_2000.h index f7b86bc9daa..250c6730474 100644 --- a/Modules/cjkcodecs/emu_jisx0213_2000.h +++ b/Modules/cjkcodecs/emu_jisx0213_2000.h @@ -1,5 +1,3 @@ -/* $CJKCodecs: emu_jisx0213_2000.h,v 1.3 2004/07/08 02:53:37 perky Exp $ */ - /* These routines may be quite inefficient, but it's used only to emulate old * standards. */ diff --git a/Modules/cjkcodecs/mappings_cn.h b/Modules/cjkcodecs/mappings_cn.h index 581cc0c5231..a6dcebfe4ae 100644 --- a/Modules/cjkcodecs/mappings_cn.h +++ b/Modules/cjkcodecs/mappings_cn.h @@ -1,7 +1,3 @@ -/* - * $CJKCodecs: mappings_cn.h,v 1.1 2004/07/07 14:59:27 perky Exp $ - */ - static const ucs2_t __gb2312_decmap[7482] = { 12288,12289,12290,12539,713,711,168,12291,12293,8213,65374,8214,8230,8216, 8217,8220,8221,12308,12309,12296,12297,12298,12299,12300,12301,12302,12303, diff --git a/Modules/cjkcodecs/mappings_hk.h b/Modules/cjkcodecs/mappings_hk.h index 1b71288d774..a526e90d561 100644 --- a/Modules/cjkcodecs/mappings_hk.h +++ b/Modules/cjkcodecs/mappings_hk.h @@ -1,7 +1,3 @@ -/* - * $CJKCodecs: mappings_hk.h,v 1.2 2004/07/07 15:07:23 perky Exp $ - */ - static const ucs2_t __big5hkscs_decmap[6095] = { 62211,62212,62213,62214,62215,268,62217,209,205,62220,62221,203,8168,62224, 202,62226,62227,62228,62229,270,62231,62232,256,193,461,192,274,201,282,200, diff --git a/Modules/cjkcodecs/mappings_jisx0213_pair.h b/Modules/cjkcodecs/mappings_jisx0213_pair.h index f3f2d30b543..eda8e9e8198 100644 --- a/Modules/cjkcodecs/mappings_jisx0213_pair.h +++ b/Modules/cjkcodecs/mappings_jisx0213_pair.h @@ -1,7 +1,3 @@ -/* - * $CJKCodecs: mappings_jisx0213_pair.h,v 1.2 2004/07/07 15:28:02 perky Exp $ - */ - #define JISX0213_ENCPAIRS 46 #ifdef EXTERN_JISX0213_PAIR static const struct widedbcs_index *jisx0213_pair_decmap; diff --git a/Modules/cjkcodecs/mappings_jp.h b/Modules/cjkcodecs/mappings_jp.h index 088256f5ec7..c6dae3daa7d 100644 --- a/Modules/cjkcodecs/mappings_jp.h +++ b/Modules/cjkcodecs/mappings_jp.h @@ -1,7 +1,3 @@ -/* - * $CJKCodecs: mappings_jp.h,v 1.3 2004/07/07 17:40:27 perky Exp $ - */ - static const ucs2_t __jisx0208_decmap[6956] = { 12288,12289,12290,65292,65294,12539,65306,65307,65311,65281,12443,12444,180, 65344,168,65342,65507,65343,12541,12542,12445,12446,12291,20189,12293,12294, diff --git a/Modules/cjkcodecs/mappings_kr.h b/Modules/cjkcodecs/mappings_kr.h index 7fc93855c7e..7e6fdd2701b 100644 --- a/Modules/cjkcodecs/mappings_kr.h +++ b/Modules/cjkcodecs/mappings_kr.h @@ -1,7 +1,3 @@ -/* - * $CJKCodecs: mappings_kr.h,v 1.1 2004/07/07 14:59:27 perky Exp $ - */ - static const ucs2_t __ksx1001_decmap[8264] = { 12288,12289,12290,183,8229,8230,168,12291,173,8213,8741,65340,8764,8216,8217, 8220,8221,12308,12309,12296,12297,12298,12299,12300,12301,12302,12303,12304, diff --git a/Modules/cjkcodecs/mappings_tw.h b/Modules/cjkcodecs/mappings_tw.h index e6b0fdb13cd..ec3f9f7468e 100644 --- a/Modules/cjkcodecs/mappings_tw.h +++ b/Modules/cjkcodecs/mappings_tw.h @@ -1,7 +1,3 @@ -/* - * $CJKCodecs: mappings_tw.h,v 1.2 2004/07/07 15:07:23 perky Exp $ - */ - static const ucs2_t __big5_decmap[16702] = { 12288,65292,12289,12290,65294,8226,65307,65306,65311,65281,65072,8230,8229, 65104,65380,65106,183,65108,65109,65110,65111,65372,8211,65073,8212,65075, diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index 00d86c99ace..f51b6f29ccc 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -2,13 +2,12 @@ * multibytecodec.c: Common Multibyte Codec Implementation * * Written by Hye-Shik Chang - * $CJKCodecs: multibytecodec.c,v 1.13 2004/08/19 16:57:19 perky Exp $ */ +#define PY_SSIZE_T_CLEAN #include "Python.h" #include "multibytecodec.h" - typedef struct { const Py_UNICODE *inbuf, *inbuf_top, *inbuf_end; unsigned char *outbuf, *outbuf_end; @@ -49,7 +48,7 @@ static char *codeckwarglist[] = {"input", "errors", NULL}; static char *streamkwarglist[] = {"stream", "errors", NULL}; static PyObject *multibytecodec_encode(MultibyteCodec *, - MultibyteCodec_State *, const Py_UNICODE **, size_t, + MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t, PyObject *, int); static PyObject *mbstreamreader_create(MultibyteCodec *, PyObject *, const char *); @@ -59,21 +58,21 @@ static PyObject *mbstreamwriter_create(MultibyteCodec *, #define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */ static PyObject * -make_tuple(PyObject *unicode, int len) +make_tuple(PyObject *object, Py_ssize_t len) { PyObject *v, *w; - if (unicode == NULL) + if (object == NULL) return NULL; v = PyTuple_New(2); if (v == NULL) { - Py_DECREF(unicode); + Py_DECREF(object); return NULL; } - PyTuple_SET_ITEM(v, 0, unicode); + PyTuple_SET_ITEM(v, 0, object); - w = PyInt_FromLong(len); + w = PyInt_FromSsize_t(len); if (w == NULL) { Py_DECREF(v); return NULL; @@ -98,11 +97,12 @@ get_errorcallback(const char *errors) } static int -expand_encodebuffer(MultibyteEncodeBuffer *buf, int esize) +expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize) { - int orgpos, orgsize; + Py_ssize_t orgpos, orgsize; - orgpos = (int)((char*)buf->outbuf - PyString_AS_STRING(buf->outobj)); + orgpos = (Py_ssize_t)((char*)buf->outbuf - + PyString_AS_STRING(buf->outobj)); orgsize = PyString_GET_SIZE(buf->outobj); if (_PyString_Resize(&buf->outobj, orgsize + ( esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1) @@ -121,11 +121,12 @@ expand_encodebuffer(MultibyteEncodeBuffer *buf, int esize) } static int -expand_decodebuffer(MultibyteDecodeBuffer *buf, int esize) +expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize) { - int orgpos, orgsize; + Py_ssize_t orgpos, orgsize; - orgpos = (int)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj)); + orgpos = (Py_ssize_t)(buf->outbuf - + PyUnicode_AS_UNICODE(buf->outobj)); orgsize = PyUnicode_GET_SIZE(buf->outobj); if (PyUnicode_Resize(&buf->outobj, orgsize + ( esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1) @@ -147,13 +148,12 @@ static int multibytecodec_encerror(MultibyteCodec *codec, MultibyteCodec_State *state, MultibyteEncodeBuffer *buf, - PyObject *errors, int e) + PyObject *errors, Py_ssize_t e) { PyObject *retobj = NULL, *retstr = NULL, *argsobj, *tobj; - int retstrsize, newpos; + Py_ssize_t retstrsize, newpos; const char *reason; - size_t esize; - int start, end; + Py_ssize_t esize, start, end; if (e > 0) { reason = "illegal multibyte sequence"; @@ -181,12 +181,12 @@ multibytecodec_encerror(MultibyteCodec *codec, if (errors == ERROR_REPLACE) { const Py_UNICODE replchar = '?', *inbuf = &replchar; - int r; + Py_ssize_t r; for (;;) { - size_t outleft; + Py_ssize_t outleft; - outleft = (size_t)(buf->outbuf_end - buf->outbuf); + outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf); r = codec->encode(state, codec->config, &inbuf, 1, &buf->outbuf, outleft, 0); if (r == MBERR_TOOSMALL) { @@ -207,7 +207,7 @@ multibytecodec_encerror(MultibyteCodec *codec, return 0; } - start = (int)(buf->inbuf - buf->inbuf_top); + start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top); end = start + esize; /* use cached exception object if available */ @@ -266,13 +266,13 @@ multibytecodec_encerror(MultibyteCodec *codec, memcpy(buf->outbuf, PyString_AS_STRING(retstr), retstrsize); buf->outbuf += retstrsize; - newpos = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1)); + newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); if (newpos < 0) - newpos += (int)(buf->inbuf_end - buf->inbuf_top); + newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top); if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) { PyErr_Format(PyExc_IndexError, "position %d from error handler out of bounds", - newpos); + (int)newpos); goto errorexit; } buf->inbuf = buf->inbuf_top + newpos; @@ -291,13 +291,12 @@ static int multibytecodec_decerror(MultibyteCodec *codec, MultibyteCodec_State *state, MultibyteDecodeBuffer *buf, - PyObject *errors, int e) + PyObject *errors, Py_ssize_t e) { PyObject *argsobj, *retobj = NULL, *retuni = NULL; - int retunisize, newpos; + Py_ssize_t retunisize, newpos; const char *reason; - size_t esize; - int start, end; + Py_ssize_t esize, start, end; if (e > 0) { reason = "illegal multibyte sequence"; @@ -332,14 +331,14 @@ multibytecodec_decerror(MultibyteCodec *codec, return 0; } - start = (int)(buf->inbuf - buf->inbuf_top); + start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top); end = start + esize; /* use cached exception object if available */ if (buf->excobj == NULL) { buf->excobj = PyUnicodeDecodeError_Create(codec->encoding, (const char *)buf->inbuf_top, - (int)(buf->inbuf_end - buf->inbuf_top), + (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top), start, end, reason); if (buf->excobj == NULL) goto errorexit; @@ -383,13 +382,13 @@ multibytecodec_decerror(MultibyteCodec *codec, buf->outbuf += retunisize; } - newpos = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1)); + newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); if (newpos < 0) - newpos += (int)(buf->inbuf_end - buf->inbuf_top); + newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top); if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) { PyErr_Format(PyExc_IndexError, "position %d from error handler out of bounds", - newpos); + (int)newpos); goto errorexit; } buf->inbuf = buf->inbuf_top + newpos; @@ -404,11 +403,11 @@ errorexit: static PyObject * multibytecodec_encode(MultibyteCodec *codec, MultibyteCodec_State *state, - const Py_UNICODE **data, size_t datalen, + const Py_UNICODE **data, Py_ssize_t datalen, PyObject *errors, int flags) { MultibyteEncodeBuffer buf; - int finalsize, r = 0; + Py_ssize_t finalsize, r = 0; if (datalen == 0) return PyString_FromString(""); @@ -423,12 +422,12 @@ multibytecodec_encode(MultibyteCodec *codec, buf.outbuf_end = buf.outbuf + PyString_GET_SIZE(buf.outobj); while (buf.inbuf < buf.inbuf_end) { - size_t inleft, outleft; + Py_ssize_t inleft, outleft; /* we don't reuse inleft and outleft here. * error callbacks can relocate the cursor anywhere on buffer*/ - inleft = (size_t)(buf.inbuf_end - buf.inbuf); - outleft = (size_t)(buf.outbuf_end - buf.outbuf); + inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf); + outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); r = codec->encode(state, codec->config, &buf.inbuf, inleft, &buf.outbuf, outleft, flags); *data = buf.inbuf; @@ -442,9 +441,9 @@ multibytecodec_encode(MultibyteCodec *codec, if (codec->encreset != NULL) for (;;) { - size_t outleft; + Py_ssize_t outleft; - outleft = (size_t)(buf.outbuf_end - buf.outbuf); + outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); r = codec->encreset(state, codec->config, &buf.outbuf, outleft); if (r == 0) @@ -454,7 +453,8 @@ multibytecodec_encode(MultibyteCodec *codec, goto errorexit; } - finalsize = (int)((char*)buf.outbuf - PyString_AS_STRING(buf.outobj)); + finalsize = (Py_ssize_t)((char*)buf.outbuf - + PyString_AS_STRING(buf.outobj)); if (finalsize != PyString_GET_SIZE(buf.outobj)) if (_PyString_Resize(&buf.outobj, finalsize) == -1) @@ -477,7 +477,7 @@ MultibyteCodec_Encode(MultibyteCodecObject *self, Py_UNICODE *data; PyObject *errorcb, *r, *arg, *ucvt; const char *errors = NULL; - int datalen; + Py_ssize_t datalen; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|z:encode", codeckwarglist, &arg, &errors)) @@ -537,7 +537,7 @@ MultibyteCodec_Decode(MultibyteCodecObject *self, MultibyteDecodeBuffer buf; PyObject *errorcb; const char *data, *errors = NULL; - int datalen, finalsize; + Py_ssize_t datalen, finalsize; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|z:decode", codeckwarglist, &data, &datalen, &errors)) @@ -568,11 +568,10 @@ MultibyteCodec_Decode(MultibyteCodecObject *self, goto errorexit; while (buf.inbuf < buf.inbuf_end) { - size_t inleft, outleft; - int r; + Py_ssize_t inleft, outleft, r; - inleft = (size_t)(buf.inbuf_end - buf.inbuf); - outleft = (size_t)(buf.outbuf_end - buf.outbuf); + inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf); + outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); r = self->codec->decode(&state, self->codec->config, &buf.inbuf, inleft, &buf.outbuf, outleft); @@ -583,7 +582,8 @@ MultibyteCodec_Decode(MultibyteCodecObject *self, goto errorexit; } - finalsize = (int)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj)); + finalsize = (Py_ssize_t)(buf.outbuf - + PyUnicode_AS_UNICODE(buf.outobj)); if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) @@ -666,7 +666,7 @@ static PyTypeObject MultibyteCodec_Type = { /* methods */ (destructor)multibytecodec_dealloc, /* tp_dealloc */ 0, /* tp_print */ - 0, /* tp_getattr */ + 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ 0, /* tp_repr */ @@ -692,11 +692,11 @@ static PyTypeObject MultibyteCodec_Type = { static PyObject * mbstreamreader_iread(MultibyteStreamReaderObject *self, - const char *method, int sizehint) + const char *method, Py_ssize_t sizehint) { MultibyteDecodeBuffer buf; PyObject *cres; - int rsize, r, finalsize = 0; + Py_ssize_t rsize, r, finalsize = 0; if (sizehint == 0) return PyUnicode_FromUnicode(NULL, 0); @@ -755,10 +755,12 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self, r = 0; if (rsize > 0) while (buf.inbuf < buf.inbuf_end) { - size_t inleft, outleft; + Py_ssize_t inleft, outleft; - inleft = (size_t)(buf.inbuf_end - buf.inbuf); - outleft = (size_t)(buf.outbuf_end -buf.outbuf); + inleft = (Py_ssize_t)(buf.inbuf_end - + buf.inbuf); + outleft = (Py_ssize_t)(buf.outbuf_end - + buf.outbuf); r = self->codec->decode(&self->state, self->codec->config, @@ -780,12 +782,12 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self, } if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */ - size_t npendings; + Py_ssize_t npendings; /* we can't assume that pendingsize is still 0 here. * because this function can be called recursively * from error callback */ - npendings = (size_t)(buf.inbuf_end - buf.inbuf); + npendings = (Py_ssize_t)(buf.inbuf_end - buf.inbuf); if (npendings + self->pendingsize > MAXDECPENDING) { PyErr_SetString(PyExc_RuntimeError, "pending buffer overflow"); @@ -796,7 +798,7 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self, self->pendingsize += npendings; } - finalsize = (int)(buf.outbuf - + finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj)); Py_DECREF(cres); cres = NULL; @@ -826,7 +828,7 @@ static PyObject * mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args) { PyObject *sizeobj = NULL; - long size; + Py_ssize_t size; if (!PyArg_ParseTuple(args, "|O:read", &sizeobj)) return NULL; @@ -834,7 +836,7 @@ mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args) if (sizeobj == Py_None || sizeobj == NULL) size = -1; else if (PyInt_Check(sizeobj)) - size = PyInt_AsLong(sizeobj); + size = PyInt_AsSsize_t(sizeobj); else { PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); return NULL; @@ -847,7 +849,7 @@ static PyObject * mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args) { PyObject *sizeobj = NULL; - long size; + Py_ssize_t size; if (!PyArg_ParseTuple(args, "|O:readline", &sizeobj)) return NULL; @@ -855,7 +857,7 @@ mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args) if (sizeobj == Py_None || sizeobj == NULL) size = -1; else if (PyInt_Check(sizeobj)) - size = PyInt_AsLong(sizeobj); + size = PyInt_AsSsize_t(sizeobj); else { PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); return NULL; @@ -868,7 +870,7 @@ static PyObject * mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args) { PyObject *sizehintobj = NULL, *r, *sr; - long sizehint; + Py_ssize_t sizehint; if (!PyArg_ParseTuple(args, "|O:readlines", &sizehintobj)) return NULL; @@ -876,7 +878,7 @@ mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args) if (sizehintobj == Py_None || sizehintobj == NULL) sizehint = -1; else if (PyInt_Check(sizehintobj)) - sizehint = PyInt_AsLong(sizehintobj); + sizehint = PyInt_AsSsize_t(sizehintobj); else { PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); return NULL; @@ -936,7 +938,7 @@ static PyTypeObject MultibyteStreamReader_Type = { /* methods */ (destructor)mbstreamreader_dealloc, /* tp_dealloc */ 0, /* tp_print */ - 0, /* tp_getattr */ + 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ 0, /* tp_repr */ @@ -966,7 +968,7 @@ mbstreamwriter_iwrite(MultibyteStreamWriterObject *self, { PyObject *wr, *ucvt, *r = NULL; Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL; - int datalen; + Py_ssize_t datalen; if (PyUnicode_Check(unistr)) ucvt = NULL; @@ -1012,7 +1014,7 @@ mbstreamwriter_iwrite(MultibyteStreamWriterObject *self, goto errorexit; if (inbuf < inbuf_end) { - self->pendingsize = (int)(inbuf_end - inbuf); + self->pendingsize = (Py_ssize_t)(inbuf_end - inbuf); if (self->pendingsize > MAXENCPENDING) { self->pendingsize = 0; PyErr_SetString(PyExc_RuntimeError, @@ -1152,7 +1154,7 @@ static PyTypeObject MultibyteStreamWriter_Type = { /* methods */ (destructor)mbstreamwriter_dealloc, /* tp_dealloc */ 0, /* tp_print */ - 0, /* tp_getattr */ + 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ 0, /* tp_repr */ @@ -1265,9 +1267,12 @@ static struct PyMethodDef __methods[] = { void init_multibytecodec(void) { - MultibyteCodec_Type.ob_type = &PyType_Type; - MultibyteStreamReader_Type.ob_type = &PyType_Type; - MultibyteStreamWriter_Type.ob_type = &PyType_Type; + if (PyType_Ready(&MultibyteCodec_Type) < 0) + return; + if (PyType_Ready(&MultibyteStreamReader_Type) < 0) + return; + if (PyType_Ready(&MultibyteStreamWriter_Type) < 0) + return; Py_InitModule("_multibytecodec", __methods); diff --git a/Modules/cjkcodecs/multibytecodec.h b/Modules/cjkcodecs/multibytecodec.h index 9435e0c03f7..ec49c785beb 100644 --- a/Modules/cjkcodecs/multibytecodec.h +++ b/Modules/cjkcodecs/multibytecodec.h @@ -2,7 +2,6 @@ * multibytecodec.h: Common Multibyte Codec Implementation * * Written by Hye-Shik Chang - * $CJKCodecs: multibytecodec.h,v 1.7 2004/06/27 10:39:28 perky Exp $ */ #ifndef _PYTHON_MULTIBYTECODEC_H_ @@ -32,23 +31,24 @@ typedef union { } MultibyteCodec_State; typedef int (*mbcodec_init)(const void *config); -typedef int (*mbencode_func)(MultibyteCodec_State *state, const void *config, - const Py_UNICODE **inbuf, size_t inleft, - unsigned char **outbuf, size_t outleft, - int flags); +typedef Py_ssize_t (*mbencode_func)(MultibyteCodec_State *state, + const void *config, + const Py_UNICODE **inbuf, Py_ssize_t inleft, + unsigned char **outbuf, Py_ssize_t outleft, + int flags); typedef int (*mbencodeinit_func)(MultibyteCodec_State *state, const void *config); -typedef int (*mbencodereset_func)(MultibyteCodec_State *state, - const void *config, - unsigned char **outbuf, size_t outleft); -typedef int (*mbdecode_func)(MultibyteCodec_State *state, - const void *config, - const unsigned char **inbuf, size_t inleft, - Py_UNICODE **outbuf, size_t outleft); +typedef Py_ssize_t (*mbencodereset_func)(MultibyteCodec_State *state, + const void *config, + unsigned char **outbuf, Py_ssize_t outleft); +typedef Py_ssize_t (*mbdecode_func)(MultibyteCodec_State *state, + const void *config, + const unsigned char **inbuf, Py_ssize_t inleft, + Py_UNICODE **outbuf, Py_ssize_t outleft); typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state, const void *config); -typedef int (*mbdecodereset_func)(MultibyteCodec_State *state, - const void *config); +typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state, + const void *config); typedef struct { const char *encoding; @@ -73,7 +73,7 @@ typedef struct { MultibyteCodec *codec; MultibyteCodec_State state; unsigned char pending[MAXDECPENDING]; - int pendingsize; + Py_ssize_t pendingsize; PyObject *stream, *errors; } MultibyteStreamReaderObject; @@ -83,7 +83,7 @@ typedef struct { MultibyteCodec *codec; MultibyteCodec_State state; Py_UNICODE pending[MAXENCPENDING]; - int pendingsize; + Py_ssize_t pendingsize; PyObject *stream, *errors; } MultibyteStreamWriterObject;