- Modernize code to use Py_ssize_t more intensively.
- Do some minor code clean-ups.
This commit is contained in:
parent
ad9afcf213
commit
4b96c1384e
|
@ -1,11 +1,3 @@
|
||||||
Notes on cjkcodecs
|
|
||||||
-------------------
|
|
||||||
This directory contains source files for cjkcodecs extension modules.
|
|
||||||
They are based on CJKCodecs (http://cjkpython.i18n.org/#CJKCodecs)
|
|
||||||
as of Aug 20 2004 currently.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
To generate or modify mapping headers
|
To generate or modify mapping headers
|
||||||
-------------------------------------
|
-------------------------------------
|
||||||
Mapping headers are imported from CJKCodecs as pre-generated form.
|
Mapping headers are imported from CJKCodecs as pre-generated form.
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
* _codecs_cn.c: Codecs collection for Mainland Chinese encodings
|
* _codecs_cn.c: Codecs collection for Mainland Chinese encodings
|
||||||
*
|
*
|
||||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||||
* $CJKCodecs: _codecs_cn.c,v 1.8 2004/07/07 14:59:26 perky Exp $
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "cjkcodecs.h"
|
#include "cjkcodecs.h"
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
* _codecs_hk.c: Codecs collection for encodings from Hong Kong
|
* _codecs_hk.c: Codecs collection for encodings from Hong Kong
|
||||||
*
|
*
|
||||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||||
* $CJKCodecs: _codecs_hk.c,v 1.4 2004/07/18 04:44:27 perky Exp $
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define USING_IMPORTED_MAPS
|
#define USING_IMPORTED_MAPS
|
||||||
|
@ -32,7 +31,7 @@ ENCODER(big5hkscs)
|
||||||
while (inleft > 0) {
|
while (inleft > 0) {
|
||||||
ucs4_t c = **inbuf;
|
ucs4_t c = **inbuf;
|
||||||
DBCHAR code;
|
DBCHAR code;
|
||||||
int insize;
|
Py_ssize_t insize;
|
||||||
|
|
||||||
if (c < 0x80) {
|
if (c < 0x80) {
|
||||||
REQUIRE_OUTBUF(1)
|
REQUIRE_OUTBUF(1)
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
* _codecs_iso2022.c: Codecs collection for ISO-2022 encodings.
|
* _codecs_iso2022.c: Codecs collection for ISO-2022 encodings.
|
||||||
*
|
*
|
||||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||||
* $CJKCodecs: _codecs_iso2022.c,v 1.22 2004/08/19 17:08:13 perky Exp $
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define USING_IMPORTED_MAPS
|
#define USING_IMPORTED_MAPS
|
||||||
|
@ -104,7 +103,7 @@
|
||||||
|
|
||||||
typedef int (*iso2022_init_func)(void);
|
typedef int (*iso2022_init_func)(void);
|
||||||
typedef ucs4_t (*iso2022_decode_func)(const unsigned char *data);
|
typedef ucs4_t (*iso2022_decode_func)(const unsigned char *data);
|
||||||
typedef DBCHAR (*iso2022_encode_func)(const ucs4_t *data, int *length);
|
typedef DBCHAR (*iso2022_encode_func)(const ucs4_t *data, Py_ssize_t *length);
|
||||||
|
|
||||||
struct iso2022_designation {
|
struct iso2022_designation {
|
||||||
unsigned char mark;
|
unsigned char mark;
|
||||||
|
@ -160,7 +159,7 @@ ENCODER(iso2022)
|
||||||
const struct iso2022_designation *dsg;
|
const struct iso2022_designation *dsg;
|
||||||
DBCHAR encoded;
|
DBCHAR encoded;
|
||||||
ucs4_t c = **inbuf;
|
ucs4_t c = **inbuf;
|
||||||
int insize;
|
Py_ssize_t insize;
|
||||||
|
|
||||||
if (c < 0x80) {
|
if (c < 0x80) {
|
||||||
if (STATE_G0 != CHARSET_ASCII) {
|
if (STATE_G0 != CHARSET_ASCII) {
|
||||||
|
@ -183,7 +182,7 @@ ENCODER(iso2022)
|
||||||
|
|
||||||
encoded = MAP_UNMAPPABLE;
|
encoded = MAP_UNMAPPABLE;
|
||||||
for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
|
for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
|
||||||
int length = 1;
|
Py_ssize_t length = 1;
|
||||||
encoded = dsg->encoder(&c, &length);
|
encoded = dsg->encoder(&c, &length);
|
||||||
if (encoded == MAP_MULTIPLE_AVAIL) {
|
if (encoded == MAP_MULTIPLE_AVAIL) {
|
||||||
/* this implementation won't work for pair
|
/* this implementation won't work for pair
|
||||||
|
@ -300,12 +299,12 @@ DECODER_RESET(iso2022)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static Py_ssize_t
|
||||||
iso2022processesc(const void *config, MultibyteCodec_State *state,
|
iso2022processesc(const void *config, MultibyteCodec_State *state,
|
||||||
const unsigned char **inbuf, size_t *inleft)
|
const unsigned char **inbuf, Py_ssize_t *inleft)
|
||||||
{
|
{
|
||||||
unsigned char charset, designation;
|
unsigned char charset, designation;
|
||||||
size_t i, esclen;
|
Py_ssize_t i, esclen;
|
||||||
|
|
||||||
for (i = 1;i < MAX_ESCSEQLEN;i++) {
|
for (i = 1;i < MAX_ESCSEQLEN;i++) {
|
||||||
if (i >= *inleft)
|
if (i >= *inleft)
|
||||||
|
@ -388,10 +387,10 @@ iso2022processesc(const void *config, MultibyteCodec_State *state,
|
||||||
else if ((c) == 0xa2) (assi) = 0x2019; \
|
else if ((c) == 0xa2) (assi) = 0x2019; \
|
||||||
else if ((c) == 0xaf) (assi) = 0x2015;
|
else if ((c) == 0xaf) (assi) = 0x2015;
|
||||||
|
|
||||||
static int
|
static Py_ssize_t
|
||||||
iso2022processg2(const void *config, MultibyteCodec_State *state,
|
iso2022processg2(const void *config, MultibyteCodec_State *state,
|
||||||
const unsigned char **inbuf, size_t *inleft,
|
const unsigned char **inbuf, Py_ssize_t *inleft,
|
||||||
Py_UNICODE **outbuf, size_t *outleft)
|
Py_UNICODE **outbuf, Py_ssize_t *outleft)
|
||||||
{
|
{
|
||||||
/* not written to use encoder, decoder functions because only few
|
/* not written to use encoder, decoder functions because only few
|
||||||
* encodings use G2 designations in CJKCodecs */
|
* encodings use G2 designations in CJKCodecs */
|
||||||
|
@ -425,7 +424,7 @@ DECODER(iso2022)
|
||||||
|
|
||||||
while (inleft > 0) {
|
while (inleft > 0) {
|
||||||
unsigned char c = IN1;
|
unsigned char c = IN1;
|
||||||
int err;
|
Py_ssize_t err;
|
||||||
|
|
||||||
if (STATE_GETFLAG(F_ESCTHROUGHOUT)) {
|
if (STATE_GETFLAG(F_ESCTHROUGHOUT)) {
|
||||||
/* ESC throughout mode:
|
/* ESC throughout mode:
|
||||||
|
@ -589,7 +588,7 @@ ksx1001_decoder(const unsigned char *data)
|
||||||
}
|
}
|
||||||
|
|
||||||
static DBCHAR
|
static DBCHAR
|
||||||
ksx1001_encoder(const ucs4_t *data, int *length)
|
ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||||
{
|
{
|
||||||
DBCHAR coded;
|
DBCHAR coded;
|
||||||
assert(*length == 1);
|
assert(*length == 1);
|
||||||
|
@ -625,7 +624,7 @@ jisx0208_decoder(const unsigned char *data)
|
||||||
}
|
}
|
||||||
|
|
||||||
static DBCHAR
|
static DBCHAR
|
||||||
jisx0208_encoder(const ucs4_t *data, int *length)
|
jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||||
{
|
{
|
||||||
DBCHAR coded;
|
DBCHAR coded;
|
||||||
assert(*length == 1);
|
assert(*length == 1);
|
||||||
|
@ -662,7 +661,7 @@ jisx0212_decoder(const unsigned char *data)
|
||||||
}
|
}
|
||||||
|
|
||||||
static DBCHAR
|
static DBCHAR
|
||||||
jisx0212_encoder(const ucs4_t *data, int *length)
|
jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||||
{
|
{
|
||||||
DBCHAR coded;
|
DBCHAR coded;
|
||||||
assert(*length == 1);
|
assert(*length == 1);
|
||||||
|
@ -760,7 +759,7 @@ jisx0213_2004_2_decoder(const unsigned char *data)
|
||||||
}
|
}
|
||||||
|
|
||||||
static DBCHAR
|
static DBCHAR
|
||||||
jisx0213_encoder(const ucs4_t *data, int *length, void *config)
|
jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config)
|
||||||
{
|
{
|
||||||
DBCHAR coded;
|
DBCHAR coded;
|
||||||
|
|
||||||
|
@ -814,7 +813,7 @@ jisx0213_encoder(const ucs4_t *data, int *length, void *config)
|
||||||
}
|
}
|
||||||
|
|
||||||
static DBCHAR
|
static DBCHAR
|
||||||
jisx0213_2000_1_encoder(const ucs4_t *data, int *length)
|
jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||||
{
|
{
|
||||||
DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
|
DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
|
||||||
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
|
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
|
||||||
|
@ -826,10 +825,10 @@ jisx0213_2000_1_encoder(const ucs4_t *data, int *length)
|
||||||
}
|
}
|
||||||
|
|
||||||
static DBCHAR
|
static DBCHAR
|
||||||
jisx0213_2000_1_encoder_paironly(const ucs4_t *data, int *length)
|
jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
|
||||||
{
|
{
|
||||||
DBCHAR coded;
|
DBCHAR coded;
|
||||||
int ilength = *length;
|
Py_ssize_t ilength = *length;
|
||||||
|
|
||||||
coded = jisx0213_encoder(data, length, (void *)2000);
|
coded = jisx0213_encoder(data, length, (void *)2000);
|
||||||
switch (ilength) {
|
switch (ilength) {
|
||||||
|
@ -849,7 +848,7 @@ jisx0213_2000_1_encoder_paironly(const ucs4_t *data, int *length)
|
||||||
}
|
}
|
||||||
|
|
||||||
static DBCHAR
|
static DBCHAR
|
||||||
jisx0213_2000_2_encoder(const ucs4_t *data, int *length)
|
jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||||
{
|
{
|
||||||
DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
|
DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
|
||||||
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
|
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
|
||||||
|
@ -861,7 +860,7 @@ jisx0213_2000_2_encoder(const ucs4_t *data, int *length)
|
||||||
}
|
}
|
||||||
|
|
||||||
static DBCHAR
|
static DBCHAR
|
||||||
jisx0213_2004_1_encoder(const ucs4_t *data, int *length)
|
jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||||
{
|
{
|
||||||
DBCHAR coded = jisx0213_encoder(data, length, NULL);
|
DBCHAR coded = jisx0213_encoder(data, length, NULL);
|
||||||
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
|
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
|
||||||
|
@ -873,10 +872,10 @@ jisx0213_2004_1_encoder(const ucs4_t *data, int *length)
|
||||||
}
|
}
|
||||||
|
|
||||||
static DBCHAR
|
static DBCHAR
|
||||||
jisx0213_2004_1_encoder_paironly(const ucs4_t *data, int *length)
|
jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
|
||||||
{
|
{
|
||||||
DBCHAR coded;
|
DBCHAR coded;
|
||||||
int ilength = *length;
|
Py_ssize_t ilength = *length;
|
||||||
|
|
||||||
coded = jisx0213_encoder(data, length, NULL);
|
coded = jisx0213_encoder(data, length, NULL);
|
||||||
switch (ilength) {
|
switch (ilength) {
|
||||||
|
@ -896,7 +895,7 @@ jisx0213_2004_1_encoder_paironly(const ucs4_t *data, int *length)
|
||||||
}
|
}
|
||||||
|
|
||||||
static DBCHAR
|
static DBCHAR
|
||||||
jisx0213_2004_2_encoder(const ucs4_t *data, int *length)
|
jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||||
{
|
{
|
||||||
DBCHAR coded = jisx0213_encoder(data, length, NULL);
|
DBCHAR coded = jisx0213_encoder(data, length, NULL);
|
||||||
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
|
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
|
||||||
|
@ -917,7 +916,7 @@ jisx0201_r_decoder(const unsigned char *data)
|
||||||
}
|
}
|
||||||
|
|
||||||
static DBCHAR
|
static DBCHAR
|
||||||
jisx0201_r_encoder(const ucs4_t *data, int *length)
|
jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||||
{
|
{
|
||||||
DBCHAR coded;
|
DBCHAR coded;
|
||||||
JISX0201_R_ENCODE(*data, coded)
|
JISX0201_R_ENCODE(*data, coded)
|
||||||
|
@ -935,7 +934,7 @@ jisx0201_k_decoder(const unsigned char *data)
|
||||||
}
|
}
|
||||||
|
|
||||||
static DBCHAR
|
static DBCHAR
|
||||||
jisx0201_k_encoder(const ucs4_t *data, int *length)
|
jisx0201_k_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||||
{
|
{
|
||||||
DBCHAR coded;
|
DBCHAR coded;
|
||||||
JISX0201_K_ENCODE(*data, coded)
|
JISX0201_K_ENCODE(*data, coded)
|
||||||
|
@ -967,7 +966,7 @@ gb2312_decoder(const unsigned char *data)
|
||||||
}
|
}
|
||||||
|
|
||||||
static DBCHAR
|
static DBCHAR
|
||||||
gb2312_encoder(const ucs4_t *data, int *length)
|
gb2312_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||||
{
|
{
|
||||||
DBCHAR coded;
|
DBCHAR coded;
|
||||||
assert(*length == 1);
|
assert(*length == 1);
|
||||||
|
@ -986,7 +985,7 @@ dummy_decoder(const unsigned char *data)
|
||||||
}
|
}
|
||||||
|
|
||||||
static DBCHAR
|
static DBCHAR
|
||||||
dummy_encoder(const ucs4_t *data, int *length)
|
dummy_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||||
{
|
{
|
||||||
return MAP_UNMAPPABLE;
|
return MAP_UNMAPPABLE;
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
* _codecs_jp.c: Codecs collection for Japanese encodings
|
* _codecs_jp.c: Codecs collection for Japanese encodings
|
||||||
*
|
*
|
||||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||||
* $CJKCodecs: _codecs_jp.c,v 1.14 2004/07/07 17:54:47 perky Exp $
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define USING_BINARY_PAIR_SEARCH
|
#define USING_BINARY_PAIR_SEARCH
|
||||||
|
@ -150,7 +149,7 @@ ENCODER(euc_jis_2004)
|
||||||
while (inleft > 0) {
|
while (inleft > 0) {
|
||||||
ucs4_t c = IN1;
|
ucs4_t c = IN1;
|
||||||
DBCHAR code;
|
DBCHAR code;
|
||||||
int insize;
|
Py_ssize_t insize;
|
||||||
|
|
||||||
if (c < 0x80) {
|
if (c < 0x80) {
|
||||||
WRITE1(c)
|
WRITE1(c)
|
||||||
|
@ -545,7 +544,7 @@ ENCODER(shift_jis_2004)
|
||||||
ucs4_t c = IN1;
|
ucs4_t c = IN1;
|
||||||
DBCHAR code = NOCHAR;
|
DBCHAR code = NOCHAR;
|
||||||
int c1, c2;
|
int c1, c2;
|
||||||
size_t insize;
|
Py_ssize_t insize;
|
||||||
|
|
||||||
JISX0201_ENCODE(c, code)
|
JISX0201_ENCODE(c, code)
|
||||||
else DECODE_SURROGATE(c)
|
else DECODE_SURROGATE(c)
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
* _codecs_kr.c: Codecs collection for Korean encodings
|
* _codecs_kr.c: Codecs collection for Korean encodings
|
||||||
*
|
*
|
||||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||||
* $CJKCodecs: _codecs_kr.c,v 1.8 2004/07/07 14:59:26 perky Exp $
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "cjkcodecs.h"
|
#include "cjkcodecs.h"
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
* _codecs_tw.c: Codecs collection for Taiwan's encodings
|
* _codecs_tw.c: Codecs collection for Taiwan's encodings
|
||||||
*
|
*
|
||||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||||
* $CJKCodecs: _codecs_tw.c,v 1.10 2004/07/07 14:59:26 perky Exp $
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "cjkcodecs.h"
|
#include "cjkcodecs.h"
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
/* $CJKCodecs: alg_jisx0201.h,v 1.2 2004/06/29 05:42:08 perky Exp $ */
|
|
||||||
|
|
||||||
#define JISX0201_R_ENCODE(c, assi) \
|
#define JISX0201_R_ENCODE(c, assi) \
|
||||||
if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) \
|
if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) \
|
||||||
(assi) = (c); \
|
(assi) = (c); \
|
||||||
|
|
|
@ -2,12 +2,12 @@
|
||||||
* cjkcodecs.h: common header for cjkcodecs
|
* cjkcodecs.h: common header for cjkcodecs
|
||||||
*
|
*
|
||||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||||
* $CJKCodecs: cjkcodecs.h,v 1.6 2004/07/18 15:22:31 perky Exp $
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _CJKCODECS_H_
|
#ifndef _CJKCODECS_H_
|
||||||
#define _CJKCODECS_H_
|
#define _CJKCODECS_H_
|
||||||
|
|
||||||
|
#define PY_SSIZE_T_CLEAN
|
||||||
#include "Python.h"
|
#include "Python.h"
|
||||||
#include "multibytecodec.h"
|
#include "multibytecodec.h"
|
||||||
|
|
||||||
|
@ -70,25 +70,25 @@ static const struct dbcs_map *mapping_list;
|
||||||
static int encoding##_encode_init( \
|
static int encoding##_encode_init( \
|
||||||
MultibyteCodec_State *state, const void *config)
|
MultibyteCodec_State *state, const void *config)
|
||||||
#define ENCODER(encoding) \
|
#define ENCODER(encoding) \
|
||||||
static int encoding##_encode( \
|
static Py_ssize_t encoding##_encode( \
|
||||||
MultibyteCodec_State *state, const void *config, \
|
MultibyteCodec_State *state, const void *config, \
|
||||||
const Py_UNICODE **inbuf, size_t inleft, \
|
const Py_UNICODE **inbuf, Py_ssize_t inleft, \
|
||||||
unsigned char **outbuf, size_t outleft, int flags)
|
unsigned char **outbuf, Py_ssize_t outleft, int flags)
|
||||||
#define ENCODER_RESET(encoding) \
|
#define ENCODER_RESET(encoding) \
|
||||||
static int encoding##_encode_reset( \
|
static Py_ssize_t encoding##_encode_reset( \
|
||||||
MultibyteCodec_State *state, const void *config, \
|
MultibyteCodec_State *state, const void *config, \
|
||||||
unsigned char **outbuf, size_t outleft)
|
unsigned char **outbuf, Py_ssize_t outleft)
|
||||||
|
|
||||||
#define DECODER_INIT(encoding) \
|
#define DECODER_INIT(encoding) \
|
||||||
static int encoding##_decode_init( \
|
static int encoding##_decode_init( \
|
||||||
MultibyteCodec_State *state, const void *config)
|
MultibyteCodec_State *state, const void *config)
|
||||||
#define DECODER(encoding) \
|
#define DECODER(encoding) \
|
||||||
static int encoding##_decode( \
|
static Py_ssize_t encoding##_decode( \
|
||||||
MultibyteCodec_State *state, const void *config, \
|
MultibyteCodec_State *state, const void *config, \
|
||||||
const unsigned char **inbuf, size_t inleft, \
|
const unsigned char **inbuf, Py_ssize_t inleft, \
|
||||||
Py_UNICODE **outbuf, size_t outleft)
|
Py_UNICODE **outbuf, Py_ssize_t outleft)
|
||||||
#define DECODER_RESET(encoding) \
|
#define DECODER_RESET(encoding) \
|
||||||
static int encoding##_decode_reset( \
|
static Py_ssize_t encoding##_decode_reset( \
|
||||||
MultibyteCodec_State *state, const void *config)
|
MultibyteCodec_State *state, const void *config)
|
||||||
|
|
||||||
#if Py_UNICODE_SIZE == 4
|
#if Py_UNICODE_SIZE == 4
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
/* $CJKCodecs: emu_jisx0213_2000.h,v 1.3 2004/07/08 02:53:37 perky Exp $ */
|
|
||||||
|
|
||||||
/* These routines may be quite inefficient, but it's used only to emulate old
|
/* These routines may be quite inefficient, but it's used only to emulate old
|
||||||
* standards. */
|
* standards. */
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,3 @@
|
||||||
/*
|
|
||||||
* $CJKCodecs: mappings_cn.h,v 1.1 2004/07/07 14:59:27 perky Exp $
|
|
||||||
*/
|
|
||||||
|
|
||||||
static const ucs2_t __gb2312_decmap[7482] = {
|
static const ucs2_t __gb2312_decmap[7482] = {
|
||||||
12288,12289,12290,12539,713,711,168,12291,12293,8213,65374,8214,8230,8216,
|
12288,12289,12290,12539,713,711,168,12291,12293,8213,65374,8214,8230,8216,
|
||||||
8217,8220,8221,12308,12309,12296,12297,12298,12299,12300,12301,12302,12303,
|
8217,8220,8221,12308,12309,12296,12297,12298,12299,12300,12301,12302,12303,
|
||||||
|
|
|
@ -1,7 +1,3 @@
|
||||||
/*
|
|
||||||
* $CJKCodecs: mappings_hk.h,v 1.2 2004/07/07 15:07:23 perky Exp $
|
|
||||||
*/
|
|
||||||
|
|
||||||
static const ucs2_t __big5hkscs_decmap[6095] = {
|
static const ucs2_t __big5hkscs_decmap[6095] = {
|
||||||
62211,62212,62213,62214,62215,268,62217,209,205,62220,62221,203,8168,62224,
|
62211,62212,62213,62214,62215,268,62217,209,205,62220,62221,203,8168,62224,
|
||||||
202,62226,62227,62228,62229,270,62231,62232,256,193,461,192,274,201,282,200,
|
202,62226,62227,62228,62229,270,62231,62232,256,193,461,192,274,201,282,200,
|
||||||
|
|
|
@ -1,7 +1,3 @@
|
||||||
/*
|
|
||||||
* $CJKCodecs: mappings_jisx0213_pair.h,v 1.2 2004/07/07 15:28:02 perky Exp $
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define JISX0213_ENCPAIRS 46
|
#define JISX0213_ENCPAIRS 46
|
||||||
#ifdef EXTERN_JISX0213_PAIR
|
#ifdef EXTERN_JISX0213_PAIR
|
||||||
static const struct widedbcs_index *jisx0213_pair_decmap;
|
static const struct widedbcs_index *jisx0213_pair_decmap;
|
||||||
|
|
|
@ -1,7 +1,3 @@
|
||||||
/*
|
|
||||||
* $CJKCodecs: mappings_jp.h,v 1.3 2004/07/07 17:40:27 perky Exp $
|
|
||||||
*/
|
|
||||||
|
|
||||||
static const ucs2_t __jisx0208_decmap[6956] = {
|
static const ucs2_t __jisx0208_decmap[6956] = {
|
||||||
12288,12289,12290,65292,65294,12539,65306,65307,65311,65281,12443,12444,180,
|
12288,12289,12290,65292,65294,12539,65306,65307,65311,65281,12443,12444,180,
|
||||||
65344,168,65342,65507,65343,12541,12542,12445,12446,12291,20189,12293,12294,
|
65344,168,65342,65507,65343,12541,12542,12445,12446,12291,20189,12293,12294,
|
||||||
|
|
|
@ -1,7 +1,3 @@
|
||||||
/*
|
|
||||||
* $CJKCodecs: mappings_kr.h,v 1.1 2004/07/07 14:59:27 perky Exp $
|
|
||||||
*/
|
|
||||||
|
|
||||||
static const ucs2_t __ksx1001_decmap[8264] = {
|
static const ucs2_t __ksx1001_decmap[8264] = {
|
||||||
12288,12289,12290,183,8229,8230,168,12291,173,8213,8741,65340,8764,8216,8217,
|
12288,12289,12290,183,8229,8230,168,12291,173,8213,8741,65340,8764,8216,8217,
|
||||||
8220,8221,12308,12309,12296,12297,12298,12299,12300,12301,12302,12303,12304,
|
8220,8221,12308,12309,12296,12297,12298,12299,12300,12301,12302,12303,12304,
|
||||||
|
|
|
@ -1,7 +1,3 @@
|
||||||
/*
|
|
||||||
* $CJKCodecs: mappings_tw.h,v 1.2 2004/07/07 15:07:23 perky Exp $
|
|
||||||
*/
|
|
||||||
|
|
||||||
static const ucs2_t __big5_decmap[16702] = {
|
static const ucs2_t __big5_decmap[16702] = {
|
||||||
12288,65292,12289,12290,65294,8226,65307,65306,65311,65281,65072,8230,8229,
|
12288,65292,12289,12290,65294,8226,65307,65306,65311,65281,65072,8230,8229,
|
||||||
65104,65380,65106,183,65108,65109,65110,65111,65372,8211,65073,8212,65075,
|
65104,65380,65106,183,65108,65109,65110,65111,65372,8211,65073,8212,65075,
|
||||||
|
|
|
@ -2,13 +2,12 @@
|
||||||
* multibytecodec.c: Common Multibyte Codec Implementation
|
* multibytecodec.c: Common Multibyte Codec Implementation
|
||||||
*
|
*
|
||||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||||
* $CJKCodecs: multibytecodec.c,v 1.13 2004/08/19 16:57:19 perky Exp $
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#define PY_SSIZE_T_CLEAN
|
||||||
#include "Python.h"
|
#include "Python.h"
|
||||||
#include "multibytecodec.h"
|
#include "multibytecodec.h"
|
||||||
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const Py_UNICODE *inbuf, *inbuf_top, *inbuf_end;
|
const Py_UNICODE *inbuf, *inbuf_top, *inbuf_end;
|
||||||
unsigned char *outbuf, *outbuf_end;
|
unsigned char *outbuf, *outbuf_end;
|
||||||
|
@ -49,7 +48,7 @@ static char *codeckwarglist[] = {"input", "errors", NULL};
|
||||||
static char *streamkwarglist[] = {"stream", "errors", NULL};
|
static char *streamkwarglist[] = {"stream", "errors", NULL};
|
||||||
|
|
||||||
static PyObject *multibytecodec_encode(MultibyteCodec *,
|
static PyObject *multibytecodec_encode(MultibyteCodec *,
|
||||||
MultibyteCodec_State *, const Py_UNICODE **, size_t,
|
MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t,
|
||||||
PyObject *, int);
|
PyObject *, int);
|
||||||
static PyObject *mbstreamreader_create(MultibyteCodec *,
|
static PyObject *mbstreamreader_create(MultibyteCodec *,
|
||||||
PyObject *, const char *);
|
PyObject *, const char *);
|
||||||
|
@ -59,21 +58,21 @@ static PyObject *mbstreamwriter_create(MultibyteCodec *,
|
||||||
#define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */
|
#define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
make_tuple(PyObject *unicode, int len)
|
make_tuple(PyObject *object, Py_ssize_t len)
|
||||||
{
|
{
|
||||||
PyObject *v, *w;
|
PyObject *v, *w;
|
||||||
|
|
||||||
if (unicode == NULL)
|
if (object == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
v = PyTuple_New(2);
|
v = PyTuple_New(2);
|
||||||
if (v == NULL) {
|
if (v == NULL) {
|
||||||
Py_DECREF(unicode);
|
Py_DECREF(object);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
PyTuple_SET_ITEM(v, 0, unicode);
|
PyTuple_SET_ITEM(v, 0, object);
|
||||||
|
|
||||||
w = PyInt_FromLong(len);
|
w = PyInt_FromSsize_t(len);
|
||||||
if (w == NULL) {
|
if (w == NULL) {
|
||||||
Py_DECREF(v);
|
Py_DECREF(v);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -98,11 +97,12 @@ get_errorcallback(const char *errors)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
expand_encodebuffer(MultibyteEncodeBuffer *buf, int esize)
|
expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
|
||||||
{
|
{
|
||||||
int orgpos, orgsize;
|
Py_ssize_t orgpos, orgsize;
|
||||||
|
|
||||||
orgpos = (int)((char*)buf->outbuf - PyString_AS_STRING(buf->outobj));
|
orgpos = (Py_ssize_t)((char*)buf->outbuf -
|
||||||
|
PyString_AS_STRING(buf->outobj));
|
||||||
orgsize = PyString_GET_SIZE(buf->outobj);
|
orgsize = PyString_GET_SIZE(buf->outobj);
|
||||||
if (_PyString_Resize(&buf->outobj, orgsize + (
|
if (_PyString_Resize(&buf->outobj, orgsize + (
|
||||||
esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
|
esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
|
||||||
|
@ -121,11 +121,12 @@ expand_encodebuffer(MultibyteEncodeBuffer *buf, int esize)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
expand_decodebuffer(MultibyteDecodeBuffer *buf, int esize)
|
expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize)
|
||||||
{
|
{
|
||||||
int orgpos, orgsize;
|
Py_ssize_t orgpos, orgsize;
|
||||||
|
|
||||||
orgpos = (int)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj));
|
orgpos = (Py_ssize_t)(buf->outbuf -
|
||||||
|
PyUnicode_AS_UNICODE(buf->outobj));
|
||||||
orgsize = PyUnicode_GET_SIZE(buf->outobj);
|
orgsize = PyUnicode_GET_SIZE(buf->outobj);
|
||||||
if (PyUnicode_Resize(&buf->outobj, orgsize + (
|
if (PyUnicode_Resize(&buf->outobj, orgsize + (
|
||||||
esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
|
esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
|
||||||
|
@ -147,13 +148,12 @@ static int
|
||||||
multibytecodec_encerror(MultibyteCodec *codec,
|
multibytecodec_encerror(MultibyteCodec *codec,
|
||||||
MultibyteCodec_State *state,
|
MultibyteCodec_State *state,
|
||||||
MultibyteEncodeBuffer *buf,
|
MultibyteEncodeBuffer *buf,
|
||||||
PyObject *errors, int e)
|
PyObject *errors, Py_ssize_t e)
|
||||||
{
|
{
|
||||||
PyObject *retobj = NULL, *retstr = NULL, *argsobj, *tobj;
|
PyObject *retobj = NULL, *retstr = NULL, *argsobj, *tobj;
|
||||||
int retstrsize, newpos;
|
Py_ssize_t retstrsize, newpos;
|
||||||
const char *reason;
|
const char *reason;
|
||||||
size_t esize;
|
Py_ssize_t esize, start, end;
|
||||||
int start, end;
|
|
||||||
|
|
||||||
if (e > 0) {
|
if (e > 0) {
|
||||||
reason = "illegal multibyte sequence";
|
reason = "illegal multibyte sequence";
|
||||||
|
@ -181,12 +181,12 @@ multibytecodec_encerror(MultibyteCodec *codec,
|
||||||
|
|
||||||
if (errors == ERROR_REPLACE) {
|
if (errors == ERROR_REPLACE) {
|
||||||
const Py_UNICODE replchar = '?', *inbuf = &replchar;
|
const Py_UNICODE replchar = '?', *inbuf = &replchar;
|
||||||
int r;
|
Py_ssize_t r;
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
size_t outleft;
|
Py_ssize_t outleft;
|
||||||
|
|
||||||
outleft = (size_t)(buf->outbuf_end - buf->outbuf);
|
outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
|
||||||
r = codec->encode(state, codec->config, &inbuf, 1,
|
r = codec->encode(state, codec->config, &inbuf, 1,
|
||||||
&buf->outbuf, outleft, 0);
|
&buf->outbuf, outleft, 0);
|
||||||
if (r == MBERR_TOOSMALL) {
|
if (r == MBERR_TOOSMALL) {
|
||||||
|
@ -207,7 +207,7 @@ multibytecodec_encerror(MultibyteCodec *codec,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
start = (int)(buf->inbuf - buf->inbuf_top);
|
start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
|
||||||
end = start + esize;
|
end = start + esize;
|
||||||
|
|
||||||
/* use cached exception object if available */
|
/* use cached exception object if available */
|
||||||
|
@ -266,13 +266,13 @@ multibytecodec_encerror(MultibyteCodec *codec,
|
||||||
memcpy(buf->outbuf, PyString_AS_STRING(retstr), retstrsize);
|
memcpy(buf->outbuf, PyString_AS_STRING(retstr), retstrsize);
|
||||||
buf->outbuf += retstrsize;
|
buf->outbuf += retstrsize;
|
||||||
|
|
||||||
newpos = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1));
|
newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
|
||||||
if (newpos < 0)
|
if (newpos < 0)
|
||||||
newpos += (int)(buf->inbuf_end - buf->inbuf_top);
|
newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
|
||||||
if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
|
if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
|
||||||
PyErr_Format(PyExc_IndexError,
|
PyErr_Format(PyExc_IndexError,
|
||||||
"position %d from error handler out of bounds",
|
"position %d from error handler out of bounds",
|
||||||
newpos);
|
(int)newpos);
|
||||||
goto errorexit;
|
goto errorexit;
|
||||||
}
|
}
|
||||||
buf->inbuf = buf->inbuf_top + newpos;
|
buf->inbuf = buf->inbuf_top + newpos;
|
||||||
|
@ -291,13 +291,12 @@ static int
|
||||||
multibytecodec_decerror(MultibyteCodec *codec,
|
multibytecodec_decerror(MultibyteCodec *codec,
|
||||||
MultibyteCodec_State *state,
|
MultibyteCodec_State *state,
|
||||||
MultibyteDecodeBuffer *buf,
|
MultibyteDecodeBuffer *buf,
|
||||||
PyObject *errors, int e)
|
PyObject *errors, Py_ssize_t e)
|
||||||
{
|
{
|
||||||
PyObject *argsobj, *retobj = NULL, *retuni = NULL;
|
PyObject *argsobj, *retobj = NULL, *retuni = NULL;
|
||||||
int retunisize, newpos;
|
Py_ssize_t retunisize, newpos;
|
||||||
const char *reason;
|
const char *reason;
|
||||||
size_t esize;
|
Py_ssize_t esize, start, end;
|
||||||
int start, end;
|
|
||||||
|
|
||||||
if (e > 0) {
|
if (e > 0) {
|
||||||
reason = "illegal multibyte sequence";
|
reason = "illegal multibyte sequence";
|
||||||
|
@ -332,14 +331,14 @@ multibytecodec_decerror(MultibyteCodec *codec,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
start = (int)(buf->inbuf - buf->inbuf_top);
|
start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
|
||||||
end = start + esize;
|
end = start + esize;
|
||||||
|
|
||||||
/* use cached exception object if available */
|
/* use cached exception object if available */
|
||||||
if (buf->excobj == NULL) {
|
if (buf->excobj == NULL) {
|
||||||
buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
|
buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
|
||||||
(const char *)buf->inbuf_top,
|
(const char *)buf->inbuf_top,
|
||||||
(int)(buf->inbuf_end - buf->inbuf_top),
|
(Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
|
||||||
start, end, reason);
|
start, end, reason);
|
||||||
if (buf->excobj == NULL)
|
if (buf->excobj == NULL)
|
||||||
goto errorexit;
|
goto errorexit;
|
||||||
|
@ -383,13 +382,13 @@ multibytecodec_decerror(MultibyteCodec *codec,
|
||||||
buf->outbuf += retunisize;
|
buf->outbuf += retunisize;
|
||||||
}
|
}
|
||||||
|
|
||||||
newpos = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1));
|
newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
|
||||||
if (newpos < 0)
|
if (newpos < 0)
|
||||||
newpos += (int)(buf->inbuf_end - buf->inbuf_top);
|
newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
|
||||||
if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
|
if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
|
||||||
PyErr_Format(PyExc_IndexError,
|
PyErr_Format(PyExc_IndexError,
|
||||||
"position %d from error handler out of bounds",
|
"position %d from error handler out of bounds",
|
||||||
newpos);
|
(int)newpos);
|
||||||
goto errorexit;
|
goto errorexit;
|
||||||
}
|
}
|
||||||
buf->inbuf = buf->inbuf_top + newpos;
|
buf->inbuf = buf->inbuf_top + newpos;
|
||||||
|
@ -404,11 +403,11 @@ errorexit:
|
||||||
static PyObject *
|
static PyObject *
|
||||||
multibytecodec_encode(MultibyteCodec *codec,
|
multibytecodec_encode(MultibyteCodec *codec,
|
||||||
MultibyteCodec_State *state,
|
MultibyteCodec_State *state,
|
||||||
const Py_UNICODE **data, size_t datalen,
|
const Py_UNICODE **data, Py_ssize_t datalen,
|
||||||
PyObject *errors, int flags)
|
PyObject *errors, int flags)
|
||||||
{
|
{
|
||||||
MultibyteEncodeBuffer buf;
|
MultibyteEncodeBuffer buf;
|
||||||
int finalsize, r = 0;
|
Py_ssize_t finalsize, r = 0;
|
||||||
|
|
||||||
if (datalen == 0)
|
if (datalen == 0)
|
||||||
return PyString_FromString("");
|
return PyString_FromString("");
|
||||||
|
@ -423,12 +422,12 @@ multibytecodec_encode(MultibyteCodec *codec,
|
||||||
buf.outbuf_end = buf.outbuf + PyString_GET_SIZE(buf.outobj);
|
buf.outbuf_end = buf.outbuf + PyString_GET_SIZE(buf.outobj);
|
||||||
|
|
||||||
while (buf.inbuf < buf.inbuf_end) {
|
while (buf.inbuf < buf.inbuf_end) {
|
||||||
size_t inleft, outleft;
|
Py_ssize_t inleft, outleft;
|
||||||
|
|
||||||
/* we don't reuse inleft and outleft here.
|
/* we don't reuse inleft and outleft here.
|
||||||
* error callbacks can relocate the cursor anywhere on buffer*/
|
* error callbacks can relocate the cursor anywhere on buffer*/
|
||||||
inleft = (size_t)(buf.inbuf_end - buf.inbuf);
|
inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
|
||||||
outleft = (size_t)(buf.outbuf_end - buf.outbuf);
|
outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
|
||||||
r = codec->encode(state, codec->config, &buf.inbuf, inleft,
|
r = codec->encode(state, codec->config, &buf.inbuf, inleft,
|
||||||
&buf.outbuf, outleft, flags);
|
&buf.outbuf, outleft, flags);
|
||||||
*data = buf.inbuf;
|
*data = buf.inbuf;
|
||||||
|
@ -442,9 +441,9 @@ multibytecodec_encode(MultibyteCodec *codec,
|
||||||
|
|
||||||
if (codec->encreset != NULL)
|
if (codec->encreset != NULL)
|
||||||
for (;;) {
|
for (;;) {
|
||||||
size_t outleft;
|
Py_ssize_t outleft;
|
||||||
|
|
||||||
outleft = (size_t)(buf.outbuf_end - buf.outbuf);
|
outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
|
||||||
r = codec->encreset(state, codec->config, &buf.outbuf,
|
r = codec->encreset(state, codec->config, &buf.outbuf,
|
||||||
outleft);
|
outleft);
|
||||||
if (r == 0)
|
if (r == 0)
|
||||||
|
@ -454,7 +453,8 @@ multibytecodec_encode(MultibyteCodec *codec,
|
||||||
goto errorexit;
|
goto errorexit;
|
||||||
}
|
}
|
||||||
|
|
||||||
finalsize = (int)((char*)buf.outbuf - PyString_AS_STRING(buf.outobj));
|
finalsize = (Py_ssize_t)((char*)buf.outbuf -
|
||||||
|
PyString_AS_STRING(buf.outobj));
|
||||||
|
|
||||||
if (finalsize != PyString_GET_SIZE(buf.outobj))
|
if (finalsize != PyString_GET_SIZE(buf.outobj))
|
||||||
if (_PyString_Resize(&buf.outobj, finalsize) == -1)
|
if (_PyString_Resize(&buf.outobj, finalsize) == -1)
|
||||||
|
@ -477,7 +477,7 @@ MultibyteCodec_Encode(MultibyteCodecObject *self,
|
||||||
Py_UNICODE *data;
|
Py_UNICODE *data;
|
||||||
PyObject *errorcb, *r, *arg, *ucvt;
|
PyObject *errorcb, *r, *arg, *ucvt;
|
||||||
const char *errors = NULL;
|
const char *errors = NULL;
|
||||||
int datalen;
|
Py_ssize_t datalen;
|
||||||
|
|
||||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|z:encode",
|
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|z:encode",
|
||||||
codeckwarglist, &arg, &errors))
|
codeckwarglist, &arg, &errors))
|
||||||
|
@ -537,7 +537,7 @@ MultibyteCodec_Decode(MultibyteCodecObject *self,
|
||||||
MultibyteDecodeBuffer buf;
|
MultibyteDecodeBuffer buf;
|
||||||
PyObject *errorcb;
|
PyObject *errorcb;
|
||||||
const char *data, *errors = NULL;
|
const char *data, *errors = NULL;
|
||||||
int datalen, finalsize;
|
Py_ssize_t datalen, finalsize;
|
||||||
|
|
||||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|z:decode",
|
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|z:decode",
|
||||||
codeckwarglist, &data, &datalen, &errors))
|
codeckwarglist, &data, &datalen, &errors))
|
||||||
|
@ -568,11 +568,10 @@ MultibyteCodec_Decode(MultibyteCodecObject *self,
|
||||||
goto errorexit;
|
goto errorexit;
|
||||||
|
|
||||||
while (buf.inbuf < buf.inbuf_end) {
|
while (buf.inbuf < buf.inbuf_end) {
|
||||||
size_t inleft, outleft;
|
Py_ssize_t inleft, outleft, r;
|
||||||
int r;
|
|
||||||
|
|
||||||
inleft = (size_t)(buf.inbuf_end - buf.inbuf);
|
inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
|
||||||
outleft = (size_t)(buf.outbuf_end - buf.outbuf);
|
outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
|
||||||
|
|
||||||
r = self->codec->decode(&state, self->codec->config,
|
r = self->codec->decode(&state, self->codec->config,
|
||||||
&buf.inbuf, inleft, &buf.outbuf, outleft);
|
&buf.inbuf, inleft, &buf.outbuf, outleft);
|
||||||
|
@ -583,7 +582,8 @@ MultibyteCodec_Decode(MultibyteCodecObject *self,
|
||||||
goto errorexit;
|
goto errorexit;
|
||||||
}
|
}
|
||||||
|
|
||||||
finalsize = (int)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj));
|
finalsize = (Py_ssize_t)(buf.outbuf -
|
||||||
|
PyUnicode_AS_UNICODE(buf.outobj));
|
||||||
|
|
||||||
if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
|
if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
|
||||||
if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
|
if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
|
||||||
|
@ -692,11 +692,11 @@ static PyTypeObject MultibyteCodec_Type = {
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
mbstreamreader_iread(MultibyteStreamReaderObject *self,
|
mbstreamreader_iread(MultibyteStreamReaderObject *self,
|
||||||
const char *method, int sizehint)
|
const char *method, Py_ssize_t sizehint)
|
||||||
{
|
{
|
||||||
MultibyteDecodeBuffer buf;
|
MultibyteDecodeBuffer buf;
|
||||||
PyObject *cres;
|
PyObject *cres;
|
||||||
int rsize, r, finalsize = 0;
|
Py_ssize_t rsize, r, finalsize = 0;
|
||||||
|
|
||||||
if (sizehint == 0)
|
if (sizehint == 0)
|
||||||
return PyUnicode_FromUnicode(NULL, 0);
|
return PyUnicode_FromUnicode(NULL, 0);
|
||||||
|
@ -755,10 +755,12 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self,
|
||||||
r = 0;
|
r = 0;
|
||||||
if (rsize > 0)
|
if (rsize > 0)
|
||||||
while (buf.inbuf < buf.inbuf_end) {
|
while (buf.inbuf < buf.inbuf_end) {
|
||||||
size_t inleft, outleft;
|
Py_ssize_t inleft, outleft;
|
||||||
|
|
||||||
inleft = (size_t)(buf.inbuf_end - buf.inbuf);
|
inleft = (Py_ssize_t)(buf.inbuf_end -
|
||||||
outleft = (size_t)(buf.outbuf_end -buf.outbuf);
|
buf.inbuf);
|
||||||
|
outleft = (Py_ssize_t)(buf.outbuf_end -
|
||||||
|
buf.outbuf);
|
||||||
|
|
||||||
r = self->codec->decode(&self->state,
|
r = self->codec->decode(&self->state,
|
||||||
self->codec->config,
|
self->codec->config,
|
||||||
|
@ -780,12 +782,12 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
|
if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
|
||||||
size_t npendings;
|
Py_ssize_t npendings;
|
||||||
|
|
||||||
/* we can't assume that pendingsize is still 0 here.
|
/* we can't assume that pendingsize is still 0 here.
|
||||||
* because this function can be called recursively
|
* because this function can be called recursively
|
||||||
* from error callback */
|
* from error callback */
|
||||||
npendings = (size_t)(buf.inbuf_end - buf.inbuf);
|
npendings = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
|
||||||
if (npendings + self->pendingsize > MAXDECPENDING) {
|
if (npendings + self->pendingsize > MAXDECPENDING) {
|
||||||
PyErr_SetString(PyExc_RuntimeError,
|
PyErr_SetString(PyExc_RuntimeError,
|
||||||
"pending buffer overflow");
|
"pending buffer overflow");
|
||||||
|
@ -796,7 +798,7 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self,
|
||||||
self->pendingsize += npendings;
|
self->pendingsize += npendings;
|
||||||
}
|
}
|
||||||
|
|
||||||
finalsize = (int)(buf.outbuf -
|
finalsize = (Py_ssize_t)(buf.outbuf -
|
||||||
PyUnicode_AS_UNICODE(buf.outobj));
|
PyUnicode_AS_UNICODE(buf.outobj));
|
||||||
Py_DECREF(cres);
|
Py_DECREF(cres);
|
||||||
cres = NULL;
|
cres = NULL;
|
||||||
|
@ -826,7 +828,7 @@ static PyObject *
|
||||||
mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args)
|
mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args)
|
||||||
{
|
{
|
||||||
PyObject *sizeobj = NULL;
|
PyObject *sizeobj = NULL;
|
||||||
long size;
|
Py_ssize_t size;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "|O:read", &sizeobj))
|
if (!PyArg_ParseTuple(args, "|O:read", &sizeobj))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -834,7 +836,7 @@ mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args)
|
||||||
if (sizeobj == Py_None || sizeobj == NULL)
|
if (sizeobj == Py_None || sizeobj == NULL)
|
||||||
size = -1;
|
size = -1;
|
||||||
else if (PyInt_Check(sizeobj))
|
else if (PyInt_Check(sizeobj))
|
||||||
size = PyInt_AsLong(sizeobj);
|
size = PyInt_AsSsize_t(sizeobj);
|
||||||
else {
|
else {
|
||||||
PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
|
PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -847,7 +849,7 @@ static PyObject *
|
||||||
mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args)
|
mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args)
|
||||||
{
|
{
|
||||||
PyObject *sizeobj = NULL;
|
PyObject *sizeobj = NULL;
|
||||||
long size;
|
Py_ssize_t size;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "|O:readline", &sizeobj))
|
if (!PyArg_ParseTuple(args, "|O:readline", &sizeobj))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -855,7 +857,7 @@ mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args)
|
||||||
if (sizeobj == Py_None || sizeobj == NULL)
|
if (sizeobj == Py_None || sizeobj == NULL)
|
||||||
size = -1;
|
size = -1;
|
||||||
else if (PyInt_Check(sizeobj))
|
else if (PyInt_Check(sizeobj))
|
||||||
size = PyInt_AsLong(sizeobj);
|
size = PyInt_AsSsize_t(sizeobj);
|
||||||
else {
|
else {
|
||||||
PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
|
PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -868,7 +870,7 @@ static PyObject *
|
||||||
mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args)
|
mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args)
|
||||||
{
|
{
|
||||||
PyObject *sizehintobj = NULL, *r, *sr;
|
PyObject *sizehintobj = NULL, *r, *sr;
|
||||||
long sizehint;
|
Py_ssize_t sizehint;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "|O:readlines", &sizehintobj))
|
if (!PyArg_ParseTuple(args, "|O:readlines", &sizehintobj))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -876,7 +878,7 @@ mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args)
|
||||||
if (sizehintobj == Py_None || sizehintobj == NULL)
|
if (sizehintobj == Py_None || sizehintobj == NULL)
|
||||||
sizehint = -1;
|
sizehint = -1;
|
||||||
else if (PyInt_Check(sizehintobj))
|
else if (PyInt_Check(sizehintobj))
|
||||||
sizehint = PyInt_AsLong(sizehintobj);
|
sizehint = PyInt_AsSsize_t(sizehintobj);
|
||||||
else {
|
else {
|
||||||
PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
|
PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -966,7 +968,7 @@ mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
|
||||||
{
|
{
|
||||||
PyObject *wr, *ucvt, *r = NULL;
|
PyObject *wr, *ucvt, *r = NULL;
|
||||||
Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL;
|
Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL;
|
||||||
int datalen;
|
Py_ssize_t datalen;
|
||||||
|
|
||||||
if (PyUnicode_Check(unistr))
|
if (PyUnicode_Check(unistr))
|
||||||
ucvt = NULL;
|
ucvt = NULL;
|
||||||
|
@ -1012,7 +1014,7 @@ mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
|
||||||
goto errorexit;
|
goto errorexit;
|
||||||
|
|
||||||
if (inbuf < inbuf_end) {
|
if (inbuf < inbuf_end) {
|
||||||
self->pendingsize = (int)(inbuf_end - inbuf);
|
self->pendingsize = (Py_ssize_t)(inbuf_end - inbuf);
|
||||||
if (self->pendingsize > MAXENCPENDING) {
|
if (self->pendingsize > MAXENCPENDING) {
|
||||||
self->pendingsize = 0;
|
self->pendingsize = 0;
|
||||||
PyErr_SetString(PyExc_RuntimeError,
|
PyErr_SetString(PyExc_RuntimeError,
|
||||||
|
@ -1265,9 +1267,12 @@ static struct PyMethodDef __methods[] = {
|
||||||
void
|
void
|
||||||
init_multibytecodec(void)
|
init_multibytecodec(void)
|
||||||
{
|
{
|
||||||
MultibyteCodec_Type.ob_type = &PyType_Type;
|
if (PyType_Ready(&MultibyteCodec_Type) < 0)
|
||||||
MultibyteStreamReader_Type.ob_type = &PyType_Type;
|
return;
|
||||||
MultibyteStreamWriter_Type.ob_type = &PyType_Type;
|
if (PyType_Ready(&MultibyteStreamReader_Type) < 0)
|
||||||
|
return;
|
||||||
|
if (PyType_Ready(&MultibyteStreamWriter_Type) < 0)
|
||||||
|
return;
|
||||||
|
|
||||||
Py_InitModule("_multibytecodec", __methods);
|
Py_InitModule("_multibytecodec", __methods);
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
* multibytecodec.h: Common Multibyte Codec Implementation
|
* multibytecodec.h: Common Multibyte Codec Implementation
|
||||||
*
|
*
|
||||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||||
* $CJKCodecs: multibytecodec.h,v 1.7 2004/06/27 10:39:28 perky Exp $
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _PYTHON_MULTIBYTECODEC_H_
|
#ifndef _PYTHON_MULTIBYTECODEC_H_
|
||||||
|
@ -32,22 +31,23 @@ typedef union {
|
||||||
} MultibyteCodec_State;
|
} MultibyteCodec_State;
|
||||||
|
|
||||||
typedef int (*mbcodec_init)(const void *config);
|
typedef int (*mbcodec_init)(const void *config);
|
||||||
typedef int (*mbencode_func)(MultibyteCodec_State *state, const void *config,
|
typedef Py_ssize_t (*mbencode_func)(MultibyteCodec_State *state,
|
||||||
const Py_UNICODE **inbuf, size_t inleft,
|
const void *config,
|
||||||
unsigned char **outbuf, size_t outleft,
|
const Py_UNICODE **inbuf, Py_ssize_t inleft,
|
||||||
|
unsigned char **outbuf, Py_ssize_t outleft,
|
||||||
int flags);
|
int flags);
|
||||||
typedef int (*mbencodeinit_func)(MultibyteCodec_State *state,
|
typedef int (*mbencodeinit_func)(MultibyteCodec_State *state,
|
||||||
const void *config);
|
const void *config);
|
||||||
typedef int (*mbencodereset_func)(MultibyteCodec_State *state,
|
typedef Py_ssize_t (*mbencodereset_func)(MultibyteCodec_State *state,
|
||||||
const void *config,
|
const void *config,
|
||||||
unsigned char **outbuf, size_t outleft);
|
unsigned char **outbuf, Py_ssize_t outleft);
|
||||||
typedef int (*mbdecode_func)(MultibyteCodec_State *state,
|
typedef Py_ssize_t (*mbdecode_func)(MultibyteCodec_State *state,
|
||||||
const void *config,
|
const void *config,
|
||||||
const unsigned char **inbuf, size_t inleft,
|
const unsigned char **inbuf, Py_ssize_t inleft,
|
||||||
Py_UNICODE **outbuf, size_t outleft);
|
Py_UNICODE **outbuf, Py_ssize_t outleft);
|
||||||
typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state,
|
typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state,
|
||||||
const void *config);
|
const void *config);
|
||||||
typedef int (*mbdecodereset_func)(MultibyteCodec_State *state,
|
typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state,
|
||||||
const void *config);
|
const void *config);
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -73,7 +73,7 @@ typedef struct {
|
||||||
MultibyteCodec *codec;
|
MultibyteCodec *codec;
|
||||||
MultibyteCodec_State state;
|
MultibyteCodec_State state;
|
||||||
unsigned char pending[MAXDECPENDING];
|
unsigned char pending[MAXDECPENDING];
|
||||||
int pendingsize;
|
Py_ssize_t pendingsize;
|
||||||
PyObject *stream, *errors;
|
PyObject *stream, *errors;
|
||||||
} MultibyteStreamReaderObject;
|
} MultibyteStreamReaderObject;
|
||||||
|
|
||||||
|
@ -83,7 +83,7 @@ typedef struct {
|
||||||
MultibyteCodec *codec;
|
MultibyteCodec *codec;
|
||||||
MultibyteCodec_State state;
|
MultibyteCodec_State state;
|
||||||
Py_UNICODE pending[MAXENCPENDING];
|
Py_UNICODE pending[MAXENCPENDING];
|
||||||
int pendingsize;
|
Py_ssize_t pendingsize;
|
||||||
PyObject *stream, *errors;
|
PyObject *stream, *errors;
|
||||||
} MultibyteStreamWriterObject;
|
} MultibyteStreamWriterObject;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue