Close #17693: Rewrite CJK decoders to use the _PyUnicodeWriter API instead of
the legacy Py_UNICODE API. Add also a new _PyUnicodeWriter_WriteChar() function.
This commit is contained in:
parent
d8a5cc91e6
commit
a0dd0213cc
|
@ -933,6 +933,13 @@ PyAPI_FUNC(int)
|
|||
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
|
||||
Py_ssize_t length, Py_UCS4 maxchar);
|
||||
|
||||
/* Append a Unicode character.
|
||||
Return 0 on success, raise an exception and return -1 on error. */
|
||||
PyAPI_FUNC(int)
|
||||
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
|
||||
Py_UCS4 ch
|
||||
);
|
||||
|
||||
/* Append a Unicode string.
|
||||
Return 0 on success, raise an exception and return -1 on error. */
|
||||
PyAPI_FUNC(int)
|
||||
|
|
|
@ -23,12 +23,12 @@
|
|||
* A844 undefined U+2015 HORIZONTAL BAR
|
||||
*/
|
||||
|
||||
#define GBK_DECODE(dc1, dc2, assi) \
|
||||
if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; \
|
||||
else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; \
|
||||
else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7; \
|
||||
else TRYMAP_DEC(gb2312, assi, dc1 ^ 0x80, dc2 ^ 0x80); \
|
||||
else TRYMAP_DEC(gbkext, assi, dc1, dc2);
|
||||
#define GBK_DECODE(dc1, dc2, writer) \
|
||||
if ((dc1) == 0xa1 && (dc2) == 0xaa) OUTCHAR(0x2014); \
|
||||
else if ((dc1) == 0xa8 && (dc2) == 0x44) OUTCHAR(0x2015); \
|
||||
else if ((dc1) == 0xa1 && (dc2) == 0xa4) OUTCHAR(0x00b7); \
|
||||
else TRYMAP_DEC(gb2312, writer, dc1 ^ 0x80, dc2 ^ 0x80); \
|
||||
else TRYMAP_DEC(gbkext, writer, dc1, dc2);
|
||||
|
||||
#define GBK_ENCODE(code, assi) \
|
||||
if ((code) == 0x2014) (assi) = 0xa1aa; \
|
||||
|
@ -43,7 +43,7 @@
|
|||
ENCODER(gb2312)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
Py_UCS4 c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
|
@ -73,17 +73,15 @@ DECODER(gb2312)
|
|||
while (inleft > 0) {
|
||||
unsigned char c = **inbuf;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
|
||||
NEXT(2, 1)
|
||||
TRYMAP_DEC(gb2312, writer, c ^ 0x80, IN2 ^ 0x80) {
|
||||
NEXT_IN(2);
|
||||
}
|
||||
else return 1;
|
||||
}
|
||||
|
@ -99,7 +97,7 @@ DECODER(gb2312)
|
|||
ENCODER(gbk)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
Py_UCS4 c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
|
@ -130,20 +128,18 @@ DECODER(gbk)
|
|||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
|
||||
GBK_DECODE(c, IN2, **outbuf)
|
||||
GBK_DECODE(c, IN2, writer)
|
||||
else return 1;
|
||||
|
||||
NEXT(2, 1)
|
||||
NEXT_IN(2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -157,7 +153,7 @@ DECODER(gbk)
|
|||
ENCODER(gb18030)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
ucs4_t c = IN1;
|
||||
Py_UCS4 c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
|
@ -174,7 +170,7 @@ ENCODER(gb18030)
|
|||
return 1;
|
||||
#endif
|
||||
else if (c >= 0x10000) {
|
||||
ucs4_t tc = c - 0x10000;
|
||||
Py_UCS4 tc = c - 0x10000;
|
||||
|
||||
REQUIRE_OUTBUF(4)
|
||||
|
||||
|
@ -208,7 +204,7 @@ ENCODER(gb18030)
|
|||
utrrange++)
|
||||
if (utrrange->first <= c &&
|
||||
c <= utrrange->last) {
|
||||
Py_UNICODE tc;
|
||||
Py_UCS4 tc;
|
||||
|
||||
tc = c - utrrange->first +
|
||||
utrrange->base;
|
||||
|
@ -247,11 +243,9 @@ DECODER(gb18030)
|
|||
while (inleft > 0) {
|
||||
unsigned char c = IN1, c2;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -261,7 +255,7 @@ DECODER(gb18030)
|
|||
if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
|
||||
const struct _gb18030_to_unibmp_ranges *utr;
|
||||
unsigned char c3, c4;
|
||||
ucs4_t lseq;
|
||||
Py_UCS4 lseq;
|
||||
|
||||
REQUIRE_INBUF(4)
|
||||
c3 = IN3;
|
||||
|
@ -272,34 +266,34 @@ DECODER(gb18030)
|
|||
c3 -= 0x81; c4 -= 0x30;
|
||||
|
||||
if (c < 4) { /* U+0080 - U+FFFF */
|
||||
lseq = ((ucs4_t)c * 10 + c2) * 1260 +
|
||||
(ucs4_t)c3 * 10 + c4;
|
||||
lseq = ((Py_UCS4)c * 10 + c2) * 1260 +
|
||||
(Py_UCS4)c3 * 10 + c4;
|
||||
if (lseq < 39420) {
|
||||
for (utr = gb18030_to_unibmp_ranges;
|
||||
lseq >= (utr + 1)->base;
|
||||
utr++) ;
|
||||
OUT1(utr->first - utr->base + lseq)
|
||||
NEXT(4, 1)
|
||||
OUTCHAR(utr->first - utr->base + lseq);
|
||||
NEXT_IN(4);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (c >= 15) { /* U+10000 - U+10FFFF */
|
||||
lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2)
|
||||
* 1260 + (ucs4_t)c3 * 10 + c4;
|
||||
lseq = 0x10000 + (((Py_UCS4)c-15) * 10 + c2)
|
||||
* 1260 + (Py_UCS4)c3 * 10 + c4;
|
||||
if (lseq <= 0x10FFFF) {
|
||||
WRITEUCS4(lseq);
|
||||
NEXT_IN(4)
|
||||
OUTCHAR(lseq);
|
||||
NEXT_IN(4);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
GBK_DECODE(c, c2, **outbuf)
|
||||
else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
|
||||
GBK_DECODE(c, c2, writer)
|
||||
else TRYMAP_DEC(gb18030ext, writer, c, c2);
|
||||
else return 1;
|
||||
|
||||
NEXT(2, 1)
|
||||
NEXT_IN(2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -329,7 +323,7 @@ ENCODER_RESET(hz)
|
|||
ENCODER(hz)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
Py_UCS4 c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
|
@ -389,8 +383,8 @@ DECODER(hz)
|
|||
|
||||
REQUIRE_INBUF(2)
|
||||
if (c2 == '~') {
|
||||
WRITE1('~')
|
||||
NEXT(2, 1)
|
||||
OUTCHAR('~');
|
||||
NEXT_IN(2);
|
||||
continue;
|
||||
}
|
||||
else if (c2 == '{' && state->i == 0)
|
||||
|
@ -401,7 +395,7 @@ DECODER(hz)
|
|||
; /* line-continuation */
|
||||
else
|
||||
return 1;
|
||||
NEXT(2, 0);
|
||||
NEXT_IN(2);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -409,14 +403,13 @@ DECODER(hz)
|
|||
return 1;
|
||||
|
||||
if (state->i == 0) { /* ASCII mode */
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
}
|
||||
else { /* GB mode */
|
||||
REQUIRE_INBUF(2)
|
||||
REQUIRE_OUTBUF(1)
|
||||
TRYMAP_DEC(gb2312, **outbuf, c, IN2) {
|
||||
NEXT(2, 1)
|
||||
TRYMAP_DEC(gb2312, writer, c, IN2) {
|
||||
NEXT_IN(2);
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
|
|
|
@ -39,7 +39,7 @@ static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5
|
|||
ENCODER(big5hkscs)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
ucs4_t c = **inbuf;
|
||||
Py_UCS4 c = **inbuf;
|
||||
DBCHAR code;
|
||||
Py_ssize_t insize;
|
||||
|
||||
|
@ -103,26 +103,24 @@ DECODER(big5hkscs)
|
|||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
ucs4_t decoded;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
Py_UCS4 decoded;
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
|
||||
if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) {
|
||||
TRYMAP_DEC(big5, **outbuf, c, IN2) {
|
||||
NEXT(2, 1)
|
||||
TRYMAP_DEC(big5, writer, c, IN2) {
|
||||
NEXT_IN(2);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
TRYMAP_DEC(big5hkscs, decoded, c, IN2)
|
||||
TRYMAP_DEC_CHAR(big5hkscs, decoded, c, IN2)
|
||||
{
|
||||
int s = BH2S(c, IN2);
|
||||
const unsigned char *hintbase;
|
||||
|
@ -146,25 +144,25 @@ DECODER(big5hkscs)
|
|||
return MBERR_INTERNAL;
|
||||
|
||||
if (hintbase[s >> 3] & (1 << (s & 7))) {
|
||||
WRITEUCS4(decoded | 0x20000)
|
||||
NEXT_IN(2)
|
||||
OUTCHAR(decoded | 0x20000);
|
||||
NEXT_IN(2);
|
||||
}
|
||||
else {
|
||||
OUT1(decoded)
|
||||
NEXT(2, 1)
|
||||
OUTCHAR(decoded);
|
||||
NEXT_IN(2);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
switch ((c << 8) | IN2) {
|
||||
case 0x8862: WRITE2(0x00ca, 0x0304); break;
|
||||
case 0x8864: WRITE2(0x00ca, 0x030c); break;
|
||||
case 0x88a3: WRITE2(0x00ea, 0x0304); break;
|
||||
case 0x88a5: WRITE2(0x00ea, 0x030c); break;
|
||||
case 0x8862: OUTCHAR2(0x00ca, 0x0304); break;
|
||||
case 0x8864: OUTCHAR2(0x00ca, 0x030c); break;
|
||||
case 0x88a3: OUTCHAR2(0x00ea, 0x0304); break;
|
||||
case 0x88a5: OUTCHAR2(0x00ea, 0x030c); break;
|
||||
default: return 1;
|
||||
}
|
||||
|
||||
NEXT(2, 2) /* all decoded codepoints are pairs, above. */
|
||||
NEXT_IN(2); /* all decoded codepoints are pairs, above. */
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -102,8 +102,8 @@
|
|||
/*-*- internal data structures -*-*/
|
||||
|
||||
typedef int (*iso2022_init_func)(void);
|
||||
typedef ucs4_t (*iso2022_decode_func)(const unsigned char *data);
|
||||
typedef DBCHAR (*iso2022_encode_func)(const ucs4_t *data, Py_ssize_t *length);
|
||||
typedef Py_UCS4 (*iso2022_decode_func)(const unsigned char *data);
|
||||
typedef DBCHAR (*iso2022_encode_func)(const Py_UCS4 *data, Py_ssize_t *length);
|
||||
|
||||
struct iso2022_designation {
|
||||
unsigned char mark;
|
||||
|
@ -158,7 +158,7 @@ ENCODER(iso2022)
|
|||
while (inleft > 0) {
|
||||
const struct iso2022_designation *dsg;
|
||||
DBCHAR encoded;
|
||||
ucs4_t c = **inbuf;
|
||||
Py_UCS4 c = **inbuf;
|
||||
Py_ssize_t insize;
|
||||
|
||||
if (c < 0x80) {
|
||||
|
@ -196,9 +196,9 @@ ENCODER(iso2022)
|
|||
length = 2;
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
if (length == 2) {
|
||||
ucs4_t u4in[2];
|
||||
u4in[0] = (ucs4_t)IN1;
|
||||
u4in[1] = (ucs4_t)IN2;
|
||||
Py_UCS4 u4in[2];
|
||||
u4in[0] = (Py_UCS4)IN1;
|
||||
u4in[1] = (Py_UCS4)IN2;
|
||||
encoded = dsg->encoder(u4in, &length);
|
||||
} else
|
||||
encoded = dsg->encoder(&c, &length);
|
||||
|
@ -277,7 +277,7 @@ ENCODER(iso2022)
|
|||
WRITE2(encoded >> 8, encoded & 0xff)
|
||||
NEXT_OUT(2)
|
||||
}
|
||||
NEXT_IN(insize)
|
||||
NEXT_IN(insize);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -376,45 +376,43 @@ iso2022processesc(const void *config, MultibyteCodec_State *state,
|
|||
return 0;
|
||||
}
|
||||
|
||||
#define ISO8859_7_DECODE(c, assi) \
|
||||
if ((c) < 0xa0) (assi) = (c); \
|
||||
else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \
|
||||
(assi) = (c); \
|
||||
else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \
|
||||
(0xbffffd77L & (1L << ((c)-0xb4))))) \
|
||||
(assi) = 0x02d0 + (c); \
|
||||
else if ((c) == 0xa1) (assi) = 0x2018; \
|
||||
else if ((c) == 0xa2) (assi) = 0x2019; \
|
||||
else if ((c) == 0xaf) (assi) = 0x2015;
|
||||
#define ISO8859_7_DECODE(c, writer) \
|
||||
if ((c) < 0xa0) OUTCHAR(c); \
|
||||
else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \
|
||||
OUTCHAR(c); \
|
||||
else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \
|
||||
(0xbffffd77L & (1L << ((c)-0xb4))))) \
|
||||
OUTCHAR(0x02d0 + (c)); \
|
||||
else if ((c) == 0xa1) OUTCHAR(0x2018); \
|
||||
else if ((c) == 0xa2) OUTCHAR(0x2019); \
|
||||
else if ((c) == 0xaf) OUTCHAR(0x2015);
|
||||
|
||||
static Py_ssize_t
|
||||
iso2022processg2(const void *config, MultibyteCodec_State *state,
|
||||
const unsigned char **inbuf, Py_ssize_t *inleft,
|
||||
Py_UNICODE **outbuf, Py_ssize_t *outleft)
|
||||
_PyUnicodeWriter *writer)
|
||||
{
|
||||
/* not written to use encoder, decoder functions because only few
|
||||
* encodings use G2 designations in CJKCodecs */
|
||||
if (STATE_G2 == CHARSET_ISO8859_1) {
|
||||
if (IN3 < 0x80)
|
||||
OUT1(IN3 + 0x80)
|
||||
OUTCHAR(IN3 + 0x80);
|
||||
else
|
||||
return 3;
|
||||
}
|
||||
else if (STATE_G2 == CHARSET_ISO8859_7) {
|
||||
ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf)
|
||||
ISO8859_7_DECODE(IN3 ^ 0x80, writer)
|
||||
else return 3;
|
||||
}
|
||||
else if (STATE_G2 == CHARSET_ASCII) {
|
||||
if (IN3 & 0x80) return 3;
|
||||
else **outbuf = IN3;
|
||||
else OUTCHAR(IN3);
|
||||
}
|
||||
else
|
||||
return MBERR_INTERNAL;
|
||||
|
||||
(*inbuf) += 3;
|
||||
*inleft -= 3;
|
||||
(*outbuf) += 1;
|
||||
*outleft -= 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -429,8 +427,8 @@ DECODER(iso2022)
|
|||
if (STATE_GETFLAG(F_ESCTHROUGHOUT)) {
|
||||
/* ESC throughout mode:
|
||||
* for non-iso2022 escape sequences */
|
||||
WRITE1(c) /* assume as ISO-8859-1 */
|
||||
NEXT(1, 1)
|
||||
OUTCHAR(c); /* assume as ISO-8859-1 */
|
||||
NEXT_IN(1);
|
||||
if (IS_ESCEND(c)) {
|
||||
STATE_CLEARFLAG(F_ESCTHROUGHOUT)
|
||||
}
|
||||
|
@ -449,32 +447,32 @@ DECODER(iso2022)
|
|||
else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */
|
||||
REQUIRE_INBUF(3)
|
||||
err = iso2022processg2(config, state,
|
||||
inbuf, &inleft, outbuf, &outleft);
|
||||
inbuf, &inleft, writer);
|
||||
if (err != 0)
|
||||
return err;
|
||||
}
|
||||
else {
|
||||
WRITE1(ESC)
|
||||
OUTCHAR(ESC);
|
||||
STATE_SETFLAG(F_ESCTHROUGHOUT)
|
||||
NEXT(1, 1)
|
||||
NEXT_IN(1);
|
||||
}
|
||||
break;
|
||||
case SI:
|
||||
if (CONFIG_ISSET(NO_SHIFT))
|
||||
goto bypass;
|
||||
STATE_CLEARFLAG(F_SHIFTED)
|
||||
NEXT_IN(1)
|
||||
NEXT_IN(1);
|
||||
break;
|
||||
case SO:
|
||||
if (CONFIG_ISSET(NO_SHIFT))
|
||||
goto bypass;
|
||||
STATE_SETFLAG(F_SHIFTED)
|
||||
NEXT_IN(1)
|
||||
NEXT_IN(1);
|
||||
break;
|
||||
case LF:
|
||||
STATE_CLEARFLAG(F_SHIFTED)
|
||||
WRITE1(LF)
|
||||
NEXT(1, 1)
|
||||
OUTCHAR(LF);
|
||||
NEXT_IN(1);
|
||||
break;
|
||||
default:
|
||||
if (c < 0x20) /* C0 */
|
||||
|
@ -484,7 +482,7 @@ DECODER(iso2022)
|
|||
else {
|
||||
const struct iso2022_designation *dsg;
|
||||
unsigned char charset;
|
||||
ucs4_t decoded;
|
||||
Py_UCS4 decoded;
|
||||
|
||||
if (STATE_GETFLAG(F_SHIFTED))
|
||||
charset = STATE_G1;
|
||||
|
@ -492,8 +490,8 @@ DECODER(iso2022)
|
|||
charset = STATE_G0;
|
||||
|
||||
if (charset == CHARSET_ASCII) {
|
||||
bypass: WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
bypass: OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -518,17 +516,15 @@ bypass: WRITE1(c)
|
|||
return dsg->width;
|
||||
|
||||
if (decoded < 0x10000) {
|
||||
WRITE1(decoded)
|
||||
NEXT_OUT(1)
|
||||
OUTCHAR(decoded);
|
||||
}
|
||||
else if (decoded < 0x30000) {
|
||||
WRITEUCS4(decoded)
|
||||
OUTCHAR(decoded);
|
||||
}
|
||||
else { /* JIS X 0213 pairs */
|
||||
WRITE2(decoded >> 16, decoded & 0xffff)
|
||||
NEXT_OUT(2)
|
||||
OUTCHAR2(decoded >> 16, decoded & 0xffff);
|
||||
}
|
||||
NEXT_IN(dsg->width)
|
||||
NEXT_IN(dsg->width);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -577,18 +573,18 @@ ksx1001_init(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static ucs4_t
|
||||
static Py_UCS4
|
||||
ksx1001_decoder(const unsigned char *data)
|
||||
{
|
||||
ucs4_t u;
|
||||
TRYMAP_DEC(ksx1001, u, data[0], data[1])
|
||||
Py_UCS4 u;
|
||||
TRYMAP_DEC_CHAR(ksx1001, u, data[0], data[1])
|
||||
return u;
|
||||
else
|
||||
return MAP_UNMAPPABLE;
|
||||
}
|
||||
|
||||
static DBCHAR
|
||||
ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||
ksx1001_encoder(const Py_UCS4 *data, Py_ssize_t *length)
|
||||
{
|
||||
DBCHAR coded;
|
||||
assert(*length == 1);
|
||||
|
@ -613,20 +609,20 @@ jisx0208_init(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static ucs4_t
|
||||
static Py_UCS4
|
||||
jisx0208_decoder(const unsigned char *data)
|
||||
{
|
||||
ucs4_t u;
|
||||
Py_UCS4 u;
|
||||
if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
|
||||
return 0xff3c;
|
||||
else TRYMAP_DEC(jisx0208, u, data[0], data[1])
|
||||
else TRYMAP_DEC_CHAR(jisx0208, u, data[0], data[1])
|
||||
return u;
|
||||
else
|
||||
return MAP_UNMAPPABLE;
|
||||
}
|
||||
|
||||
static DBCHAR
|
||||
jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||
jisx0208_encoder(const Py_UCS4 *data, Py_ssize_t *length)
|
||||
{
|
||||
DBCHAR coded;
|
||||
assert(*length == 1);
|
||||
|
@ -654,18 +650,18 @@ jisx0212_init(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static ucs4_t
|
||||
static Py_UCS4
|
||||
jisx0212_decoder(const unsigned char *data)
|
||||
{
|
||||
ucs4_t u;
|
||||
TRYMAP_DEC(jisx0212, u, data[0], data[1])
|
||||
Py_UCS4 u;
|
||||
TRYMAP_DEC_CHAR(jisx0212, u, data[0], data[1])
|
||||
return u;
|
||||
else
|
||||
return MAP_UNMAPPABLE;
|
||||
}
|
||||
|
||||
static DBCHAR
|
||||
jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||
jisx0212_encoder(const Py_UCS4 *data, Py_ssize_t *length)
|
||||
{
|
||||
DBCHAR coded;
|
||||
assert(*length == 1);
|
||||
|
@ -705,30 +701,30 @@ jisx0213_init(void)
|
|||
}
|
||||
|
||||
#define config ((void *)2000)
|
||||
static ucs4_t
|
||||
static Py_UCS4
|
||||
jisx0213_2000_1_decoder(const unsigned char *data)
|
||||
{
|
||||
ucs4_t u;
|
||||
Py_UCS4 u;
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1])
|
||||
else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
|
||||
return 0xff3c;
|
||||
else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
|
||||
else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
|
||||
else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
|
||||
else TRYMAP_DEC_CHAR(jisx0208, u, data[0], data[1]);
|
||||
else TRYMAP_DEC_CHAR(jisx0213_1_bmp, u, data[0], data[1]);
|
||||
else TRYMAP_DEC_CHAR(jisx0213_1_emp, u, data[0], data[1])
|
||||
u |= 0x20000;
|
||||
else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
|
||||
else TRYMAP_DEC_CHAR(jisx0213_pair, u, data[0], data[1]);
|
||||
else
|
||||
return MAP_UNMAPPABLE;
|
||||
return u;
|
||||
}
|
||||
|
||||
static ucs4_t
|
||||
static Py_UCS4
|
||||
jisx0213_2000_2_decoder(const unsigned char *data)
|
||||
{
|
||||
ucs4_t u;
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE2(u, data[0], data[1])
|
||||
TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
|
||||
else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
|
||||
Py_UCS4 u;
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(u, data[0], data[1])
|
||||
TRYMAP_DEC_CHAR(jisx0213_2_bmp, u, data[0], data[1]);
|
||||
else TRYMAP_DEC_CHAR(jisx0213_2_emp, u, data[0], data[1])
|
||||
u |= 0x20000;
|
||||
else
|
||||
return MAP_UNMAPPABLE;
|
||||
|
@ -736,28 +732,28 @@ jisx0213_2000_2_decoder(const unsigned char *data)
|
|||
}
|
||||
#undef config
|
||||
|
||||
static ucs4_t
|
||||
static Py_UCS4
|
||||
jisx0213_2004_1_decoder(const unsigned char *data)
|
||||
{
|
||||
ucs4_t u;
|
||||
Py_UCS4 u;
|
||||
if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
|
||||
return 0xff3c;
|
||||
else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
|
||||
else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
|
||||
else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
|
||||
else TRYMAP_DEC_CHAR(jisx0208, u, data[0], data[1]);
|
||||
else TRYMAP_DEC_CHAR(jisx0213_1_bmp, u, data[0], data[1]);
|
||||
else TRYMAP_DEC_CHAR(jisx0213_1_emp, u, data[0], data[1])
|
||||
u |= 0x20000;
|
||||
else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
|
||||
else TRYMAP_DEC_CHAR(jisx0213_pair, u, data[0], data[1]);
|
||||
else
|
||||
return MAP_UNMAPPABLE;
|
||||
return u;
|
||||
}
|
||||
|
||||
static ucs4_t
|
||||
static Py_UCS4
|
||||
jisx0213_2004_2_decoder(const unsigned char *data)
|
||||
{
|
||||
ucs4_t u;
|
||||
TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
|
||||
else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
|
||||
Py_UCS4 u;
|
||||
TRYMAP_DEC_CHAR(jisx0213_2_bmp, u, data[0], data[1]);
|
||||
else TRYMAP_DEC_CHAR(jisx0213_2_emp, u, data[0], data[1])
|
||||
u |= 0x20000;
|
||||
else
|
||||
return MAP_UNMAPPABLE;
|
||||
|
@ -765,7 +761,7 @@ jisx0213_2004_2_decoder(const unsigned char *data)
|
|||
}
|
||||
|
||||
static DBCHAR
|
||||
jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config)
|
||||
jisx0213_encoder(const Py_UCS4 *data, Py_ssize_t *length, void *config)
|
||||
{
|
||||
DBCHAR coded;
|
||||
|
||||
|
@ -819,7 +815,7 @@ jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config)
|
|||
}
|
||||
|
||||
static DBCHAR
|
||||
jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||
jisx0213_2000_1_encoder(const Py_UCS4 *data, Py_ssize_t *length)
|
||||
{
|
||||
DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
|
||||
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
|
||||
|
@ -831,7 +827,7 @@ jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length)
|
|||
}
|
||||
|
||||
static DBCHAR
|
||||
jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
|
||||
jisx0213_2000_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length)
|
||||
{
|
||||
DBCHAR coded;
|
||||
Py_ssize_t ilength = *length;
|
||||
|
@ -854,7 +850,7 @@ jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
|
|||
}
|
||||
|
||||
static DBCHAR
|
||||
jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||
jisx0213_2000_2_encoder(const Py_UCS4 *data, Py_ssize_t *length)
|
||||
{
|
||||
DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
|
||||
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
|
||||
|
@ -866,7 +862,7 @@ jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length)
|
|||
}
|
||||
|
||||
static DBCHAR
|
||||
jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||
jisx0213_2004_1_encoder(const Py_UCS4 *data, Py_ssize_t *length)
|
||||
{
|
||||
DBCHAR coded = jisx0213_encoder(data, length, NULL);
|
||||
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
|
||||
|
@ -878,7 +874,7 @@ jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length)
|
|||
}
|
||||
|
||||
static DBCHAR
|
||||
jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
|
||||
jisx0213_2004_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length)
|
||||
{
|
||||
DBCHAR coded;
|
||||
Py_ssize_t ilength = *length;
|
||||
|
@ -901,7 +897,7 @@ jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
|
|||
}
|
||||
|
||||
static DBCHAR
|
||||
jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||
jisx0213_2004_2_encoder(const Py_UCS4 *data, Py_ssize_t *length)
|
||||
{
|
||||
DBCHAR coded = jisx0213_encoder(data, length, NULL);
|
||||
if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
|
||||
|
@ -912,17 +908,17 @@ jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length)
|
|||
return MAP_UNMAPPABLE;
|
||||
}
|
||||
|
||||
static ucs4_t
|
||||
static Py_UCS4
|
||||
jisx0201_r_decoder(const unsigned char *data)
|
||||
{
|
||||
ucs4_t u;
|
||||
JISX0201_R_DECODE(*data, u)
|
||||
Py_UCS4 u;
|
||||
JISX0201_R_DECODE_CHAR(*data, u)
|
||||
else return MAP_UNMAPPABLE;
|
||||
return u;
|
||||
}
|
||||
|
||||
static DBCHAR
|
||||
jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||
jisx0201_r_encoder(const Py_UCS4 *data, Py_ssize_t *length)
|
||||
{
|
||||
DBCHAR coded;
|
||||
JISX0201_R_ENCODE(*data, coded)
|
||||
|
@ -930,17 +926,17 @@ jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length)
|
|||
return coded;
|
||||
}
|
||||
|
||||
static ucs4_t
|
||||
static Py_UCS4
|
||||
jisx0201_k_decoder(const unsigned char *data)
|
||||
{
|
||||
ucs4_t u;
|
||||
JISX0201_K_DECODE(*data ^ 0x80, u)
|
||||
Py_UCS4 u;
|
||||
JISX0201_K_DECODE_CHAR(*data ^ 0x80, u)
|
||||
else return MAP_UNMAPPABLE;
|
||||
return u;
|
||||
}
|
||||
|
||||
static DBCHAR
|
||||
jisx0201_k_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||
jisx0201_k_encoder(const Py_UCS4 *data, Py_ssize_t *length)
|
||||
{
|
||||
DBCHAR coded;
|
||||
JISX0201_K_ENCODE(*data, coded)
|
||||
|
@ -961,18 +957,18 @@ gb2312_init(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static ucs4_t
|
||||
static Py_UCS4
|
||||
gb2312_decoder(const unsigned char *data)
|
||||
{
|
||||
ucs4_t u;
|
||||
TRYMAP_DEC(gb2312, u, data[0], data[1])
|
||||
Py_UCS4 u;
|
||||
TRYMAP_DEC_CHAR(gb2312, u, data[0], data[1])
|
||||
return u;
|
||||
else
|
||||
return MAP_UNMAPPABLE;
|
||||
}
|
||||
|
||||
static DBCHAR
|
||||
gb2312_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||
gb2312_encoder(const Py_UCS4 *data, Py_ssize_t *length)
|
||||
{
|
||||
DBCHAR coded;
|
||||
assert(*length == 1);
|
||||
|
@ -986,14 +982,14 @@ gb2312_encoder(const ucs4_t *data, Py_ssize_t *length)
|
|||
}
|
||||
|
||||
|
||||
static ucs4_t
|
||||
static Py_UCS4
|
||||
dummy_decoder(const unsigned char *data)
|
||||
{
|
||||
return MAP_UNMAPPABLE;
|
||||
}
|
||||
|
||||
static DBCHAR
|
||||
dummy_encoder(const ucs4_t *data, Py_ssize_t *length)
|
||||
dummy_encoder(const Py_UCS4 *data, Py_ssize_t *length)
|
||||
{
|
||||
return MAP_UNMAPPABLE;
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
ENCODER(cp932)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
Py_UCS4 c = IN1;
|
||||
DBCHAR code;
|
||||
unsigned char c1, c2;
|
||||
|
||||
|
@ -66,8 +66,8 @@ ENCODER(cp932)
|
|||
}
|
||||
else if (c >= 0xe000 && c < 0xe758) {
|
||||
/* User-defined area */
|
||||
c1 = (Py_UNICODE)(c - 0xe000) / 188;
|
||||
c2 = (Py_UNICODE)(c - 0xe000) % 188;
|
||||
c1 = (Py_UCS4)(c - 0xe000) / 188;
|
||||
c2 = (Py_UCS4)(c - 0xe000) % 188;
|
||||
OUT1(c1 + 0xf0)
|
||||
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
|
||||
}
|
||||
|
@ -85,31 +85,30 @@ DECODER(cp932)
|
|||
while (inleft > 0) {
|
||||
unsigned char c = IN1, c2;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
if (c <= 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
else if (c >= 0xa0 && c <= 0xdf) {
|
||||
if (c == 0xa0)
|
||||
OUT1(0xf8f0) /* half-width katakana */
|
||||
OUTCHAR(0xf8f0); /* half-width katakana */
|
||||
else
|
||||
OUT1(0xfec0 + c)
|
||||
NEXT(1, 1)
|
||||
OUTCHAR(0xfec0 + c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
else if (c >= 0xfd/* && c <= 0xff*/) {
|
||||
/* Windows compatibility */
|
||||
OUT1(0xf8f1 - 0xfd + c)
|
||||
NEXT(1, 1)
|
||||
OUTCHAR(0xf8f1 - 0xfd + c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
c2 = IN2;
|
||||
|
||||
TRYMAP_DEC(cp932ext, **outbuf, c, c2);
|
||||
TRYMAP_DEC(cp932ext, writer, c, c2);
|
||||
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
|
||||
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
||||
return 1;
|
||||
|
@ -119,21 +118,21 @@ DECODER(cp932)
|
|||
c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
|
||||
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
|
||||
|
||||
TRYMAP_DEC(jisx0208, **outbuf, c, c2);
|
||||
TRYMAP_DEC(jisx0208, writer, c, c2);
|
||||
else return 1;
|
||||
}
|
||||
else if (c >= 0xf0 && c <= 0xf9) {
|
||||
if ((c2 >= 0x40 && c2 <= 0x7e) ||
|
||||
(c2 >= 0x80 && c2 <= 0xfc))
|
||||
OUT1(0xe000 + 188 * (c - 0xf0) +
|
||||
(c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
|
||||
OUTCHAR(0xe000 + 188 * (c - 0xf0) +
|
||||
(c2 < 0x80 ? c2 - 0x40 : c2 - 0x41));
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
|
||||
NEXT(2, 1)
|
||||
NEXT_IN(2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -147,7 +146,7 @@ DECODER(cp932)
|
|||
ENCODER(euc_jis_2004)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
ucs4_t c = IN1;
|
||||
Py_UCS4 c = IN1;
|
||||
DBCHAR code;
|
||||
Py_ssize_t insize;
|
||||
|
||||
|
@ -235,13 +234,11 @@ DECODER(euc_jis_2004)
|
|||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
ucs4_t code;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
Py_UCS4 code;
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -252,8 +249,8 @@ DECODER(euc_jis_2004)
|
|||
REQUIRE_INBUF(2)
|
||||
c2 = IN2;
|
||||
if (c2 >= 0xa1 && c2 <= 0xdf) {
|
||||
OUT1(0xfec0 + c2)
|
||||
NEXT(2, 1)
|
||||
OUTCHAR(0xfec0 + c2);
|
||||
NEXT_IN(2);
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
|
@ -266,16 +263,16 @@ DECODER(euc_jis_2004)
|
|||
c3 = IN3 ^ 0x80;
|
||||
|
||||
/* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, c2, c3)
|
||||
else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ;
|
||||
else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) {
|
||||
WRITEUCS4(EMPBASE | code)
|
||||
NEXT_IN(3)
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c2, c3)
|
||||
else TRYMAP_DEC(jisx0213_2_bmp, writer, c2, c3) ;
|
||||
else TRYMAP_DEC_CHAR(jisx0213_2_emp, code, c2, c3) {
|
||||
OUTCHAR(EMPBASE | code);
|
||||
NEXT_IN(3);
|
||||
continue;
|
||||
}
|
||||
else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
|
||||
else TRYMAP_DEC(jisx0212, writer, c2, c3) ;
|
||||
else return 1;
|
||||
NEXT(3, 1)
|
||||
NEXT_IN(3);
|
||||
}
|
||||
else {
|
||||
unsigned char c2;
|
||||
|
@ -285,23 +282,23 @@ DECODER(euc_jis_2004)
|
|||
c2 = IN2 ^ 0x80;
|
||||
|
||||
/* JIS X 0213 Plane 1 */
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, c, c2)
|
||||
else if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
|
||||
else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e;
|
||||
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
|
||||
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
|
||||
else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
|
||||
WRITEUCS4(EMPBASE | code)
|
||||
NEXT_IN(2)
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE1(writer, c, c2)
|
||||
else if (c == 0x21 && c2 == 0x40) OUTCHAR(0xff3c);
|
||||
else if (c == 0x22 && c2 == 0x32) OUTCHAR(0xff5e);
|
||||
else TRYMAP_DEC(jisx0208, writer, c, c2);
|
||||
else TRYMAP_DEC(jisx0213_1_bmp, writer, c, c2);
|
||||
else TRYMAP_DEC_CHAR(jisx0213_1_emp, code, c, c2) {
|
||||
OUTCHAR(EMPBASE | code);
|
||||
NEXT_IN(2);
|
||||
continue;
|
||||
}
|
||||
else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
|
||||
WRITE2(code >> 16, code & 0xffff)
|
||||
NEXT(2, 2)
|
||||
else TRYMAP_DEC_CHAR(jisx0213_pair, code, c, c2) {
|
||||
OUTCHAR2(code >> 16, code & 0xffff);
|
||||
NEXT_IN(2);
|
||||
continue;
|
||||
}
|
||||
else return 1;
|
||||
NEXT(2, 1)
|
||||
NEXT_IN(2);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -316,7 +313,7 @@ DECODER(euc_jis_2004)
|
|||
ENCODER(euc_jp)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
Py_UCS4 c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
|
@ -369,11 +366,9 @@ DECODER(euc_jp)
|
|||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -384,8 +379,8 @@ DECODER(euc_jp)
|
|||
REQUIRE_INBUF(2)
|
||||
c2 = IN2;
|
||||
if (c2 >= 0xa1 && c2 <= 0xdf) {
|
||||
OUT1(0xfec0 + c2)
|
||||
NEXT(2, 1)
|
||||
OUTCHAR(0xfec0 + c2);
|
||||
NEXT_IN(2);
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
|
@ -397,8 +392,8 @@ DECODER(euc_jp)
|
|||
c2 = IN2;
|
||||
c3 = IN3;
|
||||
/* JIS X 0212 */
|
||||
TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) {
|
||||
NEXT(3, 1)
|
||||
TRYMAP_DEC(jisx0212, writer, c2 ^ 0x80, c3 ^ 0x80) {
|
||||
NEXT_IN(3);
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
|
@ -412,13 +407,13 @@ DECODER(euc_jp)
|
|||
#ifndef STRICT_BUILD
|
||||
if (c == 0xa1 && c2 == 0xc0)
|
||||
/* FULL-WIDTH REVERSE SOLIDUS */
|
||||
**outbuf = 0xff3c;
|
||||
OUTCHAR(0xff3c);
|
||||
else
|
||||
#endif
|
||||
TRYMAP_DEC(jisx0208, **outbuf,
|
||||
TRYMAP_DEC(jisx0208, writer,
|
||||
c ^ 0x80, c2 ^ 0x80) ;
|
||||
else return 1;
|
||||
NEXT(2, 1)
|
||||
NEXT_IN(2);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -433,7 +428,7 @@ DECODER(euc_jp)
|
|||
ENCODER(shift_jis)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
Py_UCS4 c = IN1;
|
||||
DBCHAR code;
|
||||
unsigned char c1, c2;
|
||||
|
||||
|
@ -488,14 +483,12 @@ DECODER(shift_jis)
|
|||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
#ifdef STRICT_BUILD
|
||||
JISX0201_R_DECODE(c, **outbuf)
|
||||
JISX0201_R_DECODE(c, writer)
|
||||
#else
|
||||
if (c < 0x80) **outbuf = c;
|
||||
if (c < 0x80) OUTCHAR(c);
|
||||
#endif
|
||||
else JISX0201_K_DECODE(c, **outbuf)
|
||||
else JISX0201_K_DECODE(c, writer)
|
||||
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
|
||||
unsigned char c1, c2;
|
||||
|
||||
|
@ -512,13 +505,13 @@ DECODER(shift_jis)
|
|||
#ifndef STRICT_BUILD
|
||||
if (c1 == 0x21 && c2 == 0x40) {
|
||||
/* FULL-WIDTH REVERSE SOLIDUS */
|
||||
OUT1(0xff3c)
|
||||
NEXT(2, 1)
|
||||
OUTCHAR(0xff3c);
|
||||
NEXT_IN(2);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
|
||||
NEXT(2, 1)
|
||||
TRYMAP_DEC(jisx0208, writer, c1, c2) {
|
||||
NEXT_IN(2);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
|
@ -527,7 +520,7 @@ DECODER(shift_jis)
|
|||
else
|
||||
return 1;
|
||||
|
||||
NEXT(1, 1) /* JIS X 0201 */
|
||||
NEXT_IN(1); /* JIS X 0201 */
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -541,7 +534,7 @@ DECODER(shift_jis)
|
|||
ENCODER(shift_jis_2004)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
ucs4_t c = IN1;
|
||||
Py_UCS4 c = IN1;
|
||||
DBCHAR code = NOCHAR;
|
||||
int c1, c2;
|
||||
Py_ssize_t insize;
|
||||
|
@ -636,11 +629,10 @@ DECODER(shift_jis_2004)
|
|||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
JISX0201_DECODE(c, **outbuf)
|
||||
JISX0201_DECODE(c, writer)
|
||||
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
|
||||
unsigned char c1, c2;
|
||||
ucs4_t code;
|
||||
Py_UCS4 code;
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
c2 = IN2;
|
||||
|
@ -654,50 +646,47 @@ DECODER(shift_jis_2004)
|
|||
|
||||
if (c1 < 0x5e) { /* Plane 1 */
|
||||
c1 += 0x21;
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf,
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE1(writer,
|
||||
c1, c2)
|
||||
else TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
|
||||
NEXT_OUT(1)
|
||||
else TRYMAP_DEC(jisx0208, writer, c1, c2) {
|
||||
}
|
||||
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf,
|
||||
else TRYMAP_DEC(jisx0213_1_bmp, writer,
|
||||
c1, c2) {
|
||||
NEXT_OUT(1)
|
||||
}
|
||||
else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) {
|
||||
WRITEUCS4(EMPBASE | code)
|
||||
else TRYMAP_DEC_CHAR(jisx0213_1_emp, code, c1, c2) {
|
||||
OUTCHAR(EMPBASE | code);
|
||||
}
|
||||
else TRYMAP_DEC(jisx0213_pair, code, c1, c2) {
|
||||
WRITE2(code >> 16, code & 0xffff)
|
||||
NEXT_OUT(2)
|
||||
else TRYMAP_DEC_CHAR(jisx0213_pair, code, c1, c2) {
|
||||
OUTCHAR2(code >> 16, code & 0xffff);
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
NEXT_IN(2)
|
||||
NEXT_IN(2);
|
||||
}
|
||||
else { /* Plane 2 */
|
||||
if (c1 >= 0x67) c1 += 0x07;
|
||||
else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;
|
||||
else c1 -= 0x3d;
|
||||
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf,
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE2(writer,
|
||||
c1, c2)
|
||||
else TRYMAP_DEC(jisx0213_2_bmp, **outbuf,
|
||||
c1, c2) ;
|
||||
else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) {
|
||||
WRITEUCS4(EMPBASE | code)
|
||||
NEXT_IN(2)
|
||||
else TRYMAP_DEC(jisx0213_2_bmp, writer,
|
||||
c1, c2) {
|
||||
} else TRYMAP_DEC_CHAR(jisx0213_2_emp, code, c1, c2) {
|
||||
OUTCHAR(EMPBASE | code);
|
||||
NEXT_IN(2);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
NEXT(2, 1)
|
||||
NEXT_IN(2);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
|
||||
NEXT(1, 1) /* JIS X 0201 */
|
||||
NEXT_IN(1); /* JIS X 0201 */
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -34,7 +34,7 @@ static const unsigned char u2cgk_jongseong[28] = {
|
|||
ENCODER(euc_kr)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
Py_UCS4 c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
|
@ -104,11 +104,9 @@ DECODER(euc_kr)
|
|||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -145,11 +143,11 @@ DECODER(euc_kr)
|
|||
if (cho == NONE || jung == NONE || jong == NONE)
|
||||
return 1;
|
||||
|
||||
OUT1(0xac00 + cho*588 + jung*28 + jong);
|
||||
NEXT(8, 1)
|
||||
OUTCHAR(0xac00 + cho*588 + jung*28 + jong);
|
||||
NEXT_IN(8);
|
||||
}
|
||||
else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
|
||||
NEXT(2, 1)
|
||||
else TRYMAP_DEC(ksx1001, writer, c ^ 0x80, IN2 ^ 0x80) {
|
||||
NEXT_IN(2);
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
|
@ -167,7 +165,7 @@ DECODER(euc_kr)
|
|||
ENCODER(cp949)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
Py_UCS4 c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
|
@ -197,20 +195,18 @@ DECODER(cp949)
|
|||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
|
||||
else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
|
||||
TRYMAP_DEC(ksx1001, writer, c ^ 0x80, IN2 ^ 0x80);
|
||||
else TRYMAP_DEC(cp949ext, writer, c, IN2);
|
||||
else return 1;
|
||||
|
||||
NEXT(2, 1)
|
||||
NEXT_IN(2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -251,7 +247,7 @@ static const DBCHAR u2johabjamo[] = {
|
|||
ENCODER(johab)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
Py_UCS4 c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
|
@ -350,11 +346,9 @@ DECODER(johab)
|
|||
while (inleft > 0) {
|
||||
unsigned char c = IN1, c2;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -381,33 +375,33 @@ DECODER(johab)
|
|||
if (i_cho == FILL) {
|
||||
if (i_jung == FILL) {
|
||||
if (i_jong == FILL)
|
||||
OUT1(0x3000)
|
||||
OUTCHAR(0x3000);
|
||||
else
|
||||
OUT1(0x3100 |
|
||||
johabjamo_jongseong[c_jong])
|
||||
OUTCHAR(0x3100 |
|
||||
johabjamo_jongseong[c_jong]);
|
||||
}
|
||||
else {
|
||||
if (i_jong == FILL)
|
||||
OUT1(0x3100 |
|
||||
johabjamo_jungseong[c_jung])
|
||||
OUTCHAR(0x3100 |
|
||||
johabjamo_jungseong[c_jung]);
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
if (i_jung == FILL) {
|
||||
if (i_jong == FILL)
|
||||
OUT1(0x3100 |
|
||||
johabjamo_choseong[c_cho])
|
||||
OUTCHAR(0x3100 |
|
||||
johabjamo_choseong[c_cho]);
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
OUT1(0xac00 +
|
||||
i_cho * 588 +
|
||||
i_jung * 28 +
|
||||
(i_jong == FILL ? 0 : i_jong))
|
||||
OUTCHAR(0xac00 +
|
||||
i_cho * 588 +
|
||||
i_jung * 28 +
|
||||
(i_jong == FILL ? 0 : i_jong));
|
||||
}
|
||||
NEXT(2, 1)
|
||||
NEXT_IN(2);
|
||||
} else {
|
||||
/* KS X 1001 except hangul jamos and syllables */
|
||||
if (c == 0xdf || c > 0xf9 ||
|
||||
|
@ -424,9 +418,9 @@ DECODER(johab)
|
|||
t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
|
||||
t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
|
||||
|
||||
TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
|
||||
TRYMAP_DEC(ksx1001, writer, t1, t2);
|
||||
else return 1;
|
||||
NEXT(2, 1)
|
||||
NEXT_IN(2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
ENCODER(big5)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = **inbuf;
|
||||
Py_UCS4 c = **inbuf;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
|
@ -43,17 +43,15 @@ DECODER(big5)
|
|||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
TRYMAP_DEC(big5, **outbuf, c, IN2) {
|
||||
NEXT(2, 1)
|
||||
TRYMAP_DEC(big5, writer, c, IN2) {
|
||||
NEXT_IN(2);
|
||||
}
|
||||
else return 1;
|
||||
}
|
||||
|
@ -69,7 +67,7 @@ DECODER(big5)
|
|||
ENCODER(cp950)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
Py_UCS4 c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
|
@ -97,21 +95,19 @@ DECODER(cp950)
|
|||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
OUTCHAR(c);
|
||||
NEXT_IN(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
|
||||
TRYMAP_DEC(cp950ext, **outbuf, c, IN2);
|
||||
else TRYMAP_DEC(big5, **outbuf, c, IN2);
|
||||
TRYMAP_DEC(cp950ext, writer, c, IN2);
|
||||
else TRYMAP_DEC(big5, writer, c, IN2);
|
||||
else return 1;
|
||||
|
||||
NEXT(2, 1)
|
||||
NEXT_IN(2);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -10,15 +10,24 @@
|
|||
JISX0201_R_ENCODE(c, assi) \
|
||||
else JISX0201_K_ENCODE(c, assi)
|
||||
|
||||
#define JISX0201_R_DECODE(c, assi) \
|
||||
#define JISX0201_R_DECODE_CHAR(c, assi) \
|
||||
if ((c) < 0x5c) (assi) = (c); \
|
||||
else if ((c) == 0x5c) (assi) = 0x00a5; \
|
||||
else if ((c) < 0x7e) (assi) = (c); \
|
||||
else if ((c) == 0x7e) (assi) = 0x203e; \
|
||||
else if ((c) == 0x7f) (assi) = 0x7f;
|
||||
#define JISX0201_K_DECODE(c, assi) \
|
||||
#define JISX0201_R_DECODE(c, writer) \
|
||||
if ((c) < 0x5c) OUTCHAR(c); \
|
||||
else if ((c) == 0x5c) OUTCHAR(0x00a5); \
|
||||
else if ((c) < 0x7e) OUTCHAR(c); \
|
||||
else if ((c) == 0x7e) OUTCHAR(0x203e); \
|
||||
else if ((c) == 0x7f) OUTCHAR(0x7f);
|
||||
#define JISX0201_K_DECODE(c, writer) \
|
||||
if ((c) >= 0xa1 && (c) <= 0xdf) \
|
||||
(assi) = 0xfec0 + (c);
|
||||
#define JISX0201_DECODE(c, assi) \
|
||||
JISX0201_R_DECODE(c, assi) \
|
||||
else JISX0201_K_DECODE(c, assi)
|
||||
OUTCHAR(0xfec0 + (c));
|
||||
#define JISX0201_K_DECODE_CHAR(c, assi) \
|
||||
if ((c) >= 0xa1 && (c) <= 0xdf) \
|
||||
(assi) = 0xfec0 + (c);
|
||||
#define JISX0201_DECODE(c, writer) \
|
||||
JISX0201_R_DECODE(c, writer) \
|
||||
else JISX0201_K_DECODE(c, writer)
|
||||
|
|
|
@ -33,7 +33,7 @@ struct dbcs_index {
|
|||
typedef struct dbcs_index decode_map;
|
||||
|
||||
struct widedbcs_index {
|
||||
const ucs4_t *map;
|
||||
const Py_UCS4 *map;
|
||||
unsigned char bottom, top;
|
||||
};
|
||||
typedef struct widedbcs_index widedecode_map;
|
||||
|
@ -56,7 +56,7 @@ struct dbcs_map {
|
|||
};
|
||||
|
||||
struct pair_encodemap {
|
||||
ucs4_t uniseq;
|
||||
Py_UCS4 uniseq;
|
||||
DBCHAR code;
|
||||
};
|
||||
|
||||
|
@ -86,7 +86,7 @@ static const struct dbcs_map *mapping_list;
|
|||
static Py_ssize_t encoding##_decode( \
|
||||
MultibyteCodec_State *state, const void *config, \
|
||||
const unsigned char **inbuf, Py_ssize_t inleft, \
|
||||
Py_UNICODE **outbuf, Py_ssize_t outleft)
|
||||
_PyUnicodeWriter *writer)
|
||||
#define DECODER_RESET(encoding) \
|
||||
static Py_ssize_t encoding##_decode_reset( \
|
||||
MultibyteCodec_State *state, const void *config)
|
||||
|
@ -101,13 +101,15 @@ static const struct dbcs_map *mapping_list;
|
|||
#endif
|
||||
|
||||
#define NEXT_IN(i) \
|
||||
(*inbuf) += (i); \
|
||||
(inleft) -= (i);
|
||||
do { \
|
||||
(*inbuf) += (i); \
|
||||
(inleft) -= (i); \
|
||||
} while (0)
|
||||
#define NEXT_OUT(o) \
|
||||
(*outbuf) += (o); \
|
||||
(outleft) -= (o);
|
||||
#define NEXT(i, o) \
|
||||
NEXT_IN(i) NEXT_OUT(o)
|
||||
NEXT_IN(i); NEXT_OUT(o)
|
||||
|
||||
#define REQUIRE_INBUF(n) \
|
||||
if (inleft < (n)) \
|
||||
|
@ -121,6 +123,23 @@ static const struct dbcs_map *mapping_list;
|
|||
#define IN3 ((*inbuf)[2])
|
||||
#define IN4 ((*inbuf)[3])
|
||||
|
||||
#define OUTCHAR(c) \
|
||||
do { \
|
||||
if (_PyUnicodeWriter_WriteChar(writer, (c)) < 0) \
|
||||
return MBERR_TOOSMALL; \
|
||||
} while (0)
|
||||
|
||||
#define OUTCHAR2(c1, c2) \
|
||||
do { \
|
||||
Py_UCS4 _c1 = (c1); \
|
||||
Py_UCS4 _c2 = (c2); \
|
||||
if (_PyUnicodeWriter_Prepare(writer, 2, Py_MAX(_c1, c2)) < 0) \
|
||||
return MBERR_TOOSMALL; \
|
||||
PyUnicode_WRITE(writer->kind, writer->data, writer->pos, _c1); \
|
||||
PyUnicode_WRITE(writer->kind, writer->data, writer->pos + 1, _c2); \
|
||||
writer->pos += 2; \
|
||||
} while (0)
|
||||
|
||||
#define OUT1(c) ((*outbuf)[0]) = (c);
|
||||
#define OUT2(c) ((*outbuf)[1]) = (c);
|
||||
#define OUT3(c) ((*outbuf)[2]) = (c);
|
||||
|
@ -145,19 +164,6 @@ static const struct dbcs_map *mapping_list;
|
|||
(*outbuf)[2] = (c3); \
|
||||
(*outbuf)[3] = (c4);
|
||||
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
# define WRITEUCS4(c) \
|
||||
REQUIRE_OUTBUF(2) \
|
||||
(*outbuf)[0] = Py_UNICODE_HIGH_SURROGATE(c); \
|
||||
(*outbuf)[1] = Py_UNICODE_LOW_SURROGATE(c); \
|
||||
NEXT_OUT(2)
|
||||
#else
|
||||
# define WRITEUCS4(c) \
|
||||
REQUIRE_OUTBUF(1) \
|
||||
**outbuf = (Py_UNICODE)(c); \
|
||||
NEXT_OUT(1)
|
||||
#endif
|
||||
|
||||
#define _TRYMAP_ENC(m, assi, val) \
|
||||
((m)->map != NULL && (val) >= (m)->bottom && \
|
||||
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
|
||||
|
@ -167,24 +173,41 @@ static const struct dbcs_map *mapping_list;
|
|||
#define TRYMAP_ENC(charset, assi, uni) \
|
||||
if TRYMAP_ENC_COND(charset, assi, uni)
|
||||
|
||||
#define _TRYMAP_DEC(m, assi, val) \
|
||||
((m)->map != NULL && (val) >= (m)->bottom && \
|
||||
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
|
||||
(m)->bottom]) != UNIINV)
|
||||
#define TRYMAP_DEC(charset, assi, c1, c2) \
|
||||
if _TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
|
||||
Py_LOCAL_INLINE(int)
|
||||
_TRYMAP_DEC_WRITE(_PyUnicodeWriter *writer, Py_UCS4 c)
|
||||
{
|
||||
if (c == UNIINV || _PyUnicodeWriter_WriteChar(writer, c) < 0)
|
||||
return UNIINV;
|
||||
else
|
||||
return c;
|
||||
}
|
||||
|
||||
#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \
|
||||
((m)->map != NULL && (val) >= (m)->bottom && \
|
||||
(val)<= (m)->top && \
|
||||
((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \
|
||||
#define _TRYMAP_DEC(m, writer, val) \
|
||||
((m)->map != NULL && \
|
||||
(val) >= (m)->bottom && \
|
||||
(val)<= (m)->top && \
|
||||
_TRYMAP_DEC_WRITE(writer, (m)->map[(val) - (m)->bottom]) != UNIINV)
|
||||
#define _TRYMAP_DEC_CHAR(m, assi, val) \
|
||||
((m)->map != NULL && \
|
||||
(val) >= (m)->bottom && \
|
||||
(val)<= (m)->top && \
|
||||
((assi) = (m)->map[(val) - (m)->bottom]) != UNIINV)
|
||||
#define TRYMAP_DEC(charset, writer, c1, c2) \
|
||||
if _TRYMAP_DEC(&charset##_decmap[c1], writer, c2)
|
||||
#define TRYMAP_DEC_CHAR(charset, assi, c1, c2) \
|
||||
if _TRYMAP_DEC_CHAR(&charset##_decmap[c1], assi, c2)
|
||||
|
||||
#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \
|
||||
((m)->map != NULL && (val) >= (m)->bottom && \
|
||||
(val)<= (m)->top && \
|
||||
((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \
|
||||
(((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \
|
||||
(((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1))
|
||||
#define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \
|
||||
if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \
|
||||
assplane, asshi, asslo, (uni) & 0xff)
|
||||
#define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) \
|
||||
if _TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2)
|
||||
#define TRYMAP_DEC_MPLANE(charset, writer, plane, c1, c2) \
|
||||
if _TRYMAP_DEC(&charset##_decmap[plane][c1], writer, c2)
|
||||
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
#define DECODE_SURROGATE(c) \
|
||||
|
@ -323,7 +346,7 @@ find_pairencmap(ucs2_t body, ucs2_t modifier,
|
|||
const struct pair_encodemap *haystack, int haystacksize)
|
||||
{
|
||||
int pos, min, max;
|
||||
ucs4_t value = body << 16 | modifier;
|
||||
Py_UCS4 value = body << 16 | modifier;
|
||||
|
||||
min = 0;
|
||||
max = haystacksize;
|
||||
|
|
|
@ -38,6 +38,9 @@
|
|||
((c1) == 0x7E && (c2) == 0x7E))) \
|
||||
return EMULATE_JISX0213_2000_DECODE_INVALID;
|
||||
|
||||
#define EMULATE_JISX0213_2000_DECODE_PLANE2(assi, c1, c2) \
|
||||
#define EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c1, c2) \
|
||||
if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) \
|
||||
OUTCHAR(0x9B1D);
|
||||
#define EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(assi, c1, c2) \
|
||||
if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) \
|
||||
(assi) = 0x9B1D;
|
||||
|
|
|
@ -4049,7 +4049,7 @@ __gb18030ext_encmap+3126,0,100},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
|
|||
|
||||
|
||||
static const struct _gb18030_to_unibmp_ranges {
|
||||
Py_UNICODE first, last;
|
||||
Py_UCS4 first, last;
|
||||
DBCHAR base;
|
||||
} gb18030_to_unibmp_ranges[] = {
|
||||
{128,163,0},{165,166,36},{169,175,38},{178,182,45},{184,214,50},{216,223,81},{
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
static const struct widedbcs_index *jisx0213_pair_decmap;
|
||||
static const struct pair_encodemap *jisx0213_pair_encmap;
|
||||
#else
|
||||
static const ucs4_t __jisx0213_pair_decmap[49] = {
|
||||
static const Py_UCS4 __jisx0213_pair_decmap[49] = {
|
||||
810234010,810365082,810496154,810627226,810758298,816525466,816656538,
|
||||
816787610,816918682,817049754,817574042,818163866,818426010,838283418,
|
||||
15074048,U,U,U,39060224,39060225,42730240,42730241,39387904,39387905,39453440,
|
||||
|
|
|
@ -17,8 +17,8 @@ typedef struct {
|
|||
|
||||
typedef struct {
|
||||
const unsigned char *inbuf, *inbuf_top, *inbuf_end;
|
||||
Py_UNICODE *outbuf, *outbuf_end;
|
||||
PyObject *excobj, *outobj;
|
||||
PyObject *excobj;
|
||||
_PyUnicodeWriter writer;
|
||||
} MultibyteDecodeBuffer;
|
||||
|
||||
PyDoc_STRVAR(MultibyteCodec_Encode__doc__,
|
||||
|
@ -197,29 +197,6 @@ expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
|
|||
goto errorexit; \
|
||||
}
|
||||
|
||||
static int
|
||||
expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize)
|
||||
{
|
||||
Py_ssize_t orgpos, orgsize;
|
||||
|
||||
orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj));
|
||||
orgsize = PyUnicode_GET_SIZE(buf->outobj);
|
||||
if (PyUnicode_Resize(&buf->outobj, orgsize + (
|
||||
esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
|
||||
return -1;
|
||||
|
||||
buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos;
|
||||
buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj)
|
||||
+ PyUnicode_GET_SIZE(buf->outobj);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#define REQUIRE_DECODEBUFFER(buf, s) { \
|
||||
if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \
|
||||
if (expand_decodebuffer(buf, s) == -1) \
|
||||
goto errorexit; \
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* MultibyteCodec object
|
||||
|
@ -374,7 +351,7 @@ multibytecodec_decerror(MultibyteCodec *codec,
|
|||
PyObject *errors, Py_ssize_t e)
|
||||
{
|
||||
PyObject *retobj = NULL, *retuni = NULL;
|
||||
Py_ssize_t retunisize, newpos;
|
||||
Py_ssize_t newpos;
|
||||
const char *reason;
|
||||
Py_ssize_t esize, start, end;
|
||||
|
||||
|
@ -385,7 +362,6 @@ multibytecodec_decerror(MultibyteCodec *codec,
|
|||
else {
|
||||
switch (e) {
|
||||
case MBERR_TOOSMALL:
|
||||
REQUIRE_DECODEBUFFER(buf, -1);
|
||||
return 0; /* retry it */
|
||||
case MBERR_TOOFEW:
|
||||
reason = "incomplete multibyte sequence";
|
||||
|
@ -403,8 +379,9 @@ multibytecodec_decerror(MultibyteCodec *codec,
|
|||
}
|
||||
|
||||
if (errors == ERROR_REPLACE) {
|
||||
REQUIRE_DECODEBUFFER(buf, 1);
|
||||
*buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER;
|
||||
if (_PyUnicodeWriter_WriteChar(&buf->writer,
|
||||
Py_UNICODE_REPLACEMENT_CHARACTER) < 0)
|
||||
goto errorexit;
|
||||
}
|
||||
if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
|
||||
buf->inbuf += esize;
|
||||
|
@ -447,15 +424,8 @@ multibytecodec_decerror(MultibyteCodec *codec,
|
|||
goto errorexit;
|
||||
}
|
||||
|
||||
if (PyUnicode_AsUnicode(retuni) == NULL)
|
||||
if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0)
|
||||
goto errorexit;
|
||||
retunisize = PyUnicode_GET_SIZE(retuni);
|
||||
if (retunisize > 0) {
|
||||
REQUIRE_DECODEBUFFER(buf, retunisize);
|
||||
memcpy((char *)buf->outbuf, PyUnicode_AS_UNICODE(retuni),
|
||||
retunisize * Py_UNICODE_SIZE);
|
||||
buf->outbuf += retunisize;
|
||||
}
|
||||
|
||||
newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
|
||||
if (newpos < 0 && !PyErr_Occurred())
|
||||
|
@ -617,10 +587,10 @@ MultibyteCodec_Decode(MultibyteCodecObject *self,
|
|||
{
|
||||
MultibyteCodec_State state;
|
||||
MultibyteDecodeBuffer buf;
|
||||
PyObject *errorcb;
|
||||
PyObject *errorcb, *res;
|
||||
Py_buffer pdata;
|
||||
const char *data, *errors = NULL;
|
||||
Py_ssize_t datalen, finalsize;
|
||||
Py_ssize_t datalen;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|z:decode",
|
||||
codeckwarglist, &pdata, &errors))
|
||||
|
@ -640,29 +610,22 @@ MultibyteCodec_Decode(MultibyteCodecObject *self,
|
|||
return make_tuple(PyUnicode_New(0, 0), 0);
|
||||
}
|
||||
|
||||
_PyUnicodeWriter_Init(&buf.writer, datalen);
|
||||
buf.excobj = NULL;
|
||||
buf.inbuf = buf.inbuf_top = (unsigned char *)data;
|
||||
buf.inbuf_end = buf.inbuf_top + datalen;
|
||||
buf.outobj = PyUnicode_FromUnicode(NULL, datalen);
|
||||
if (buf.outobj == NULL)
|
||||
goto errorexit;
|
||||
buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj);
|
||||
if (buf.outbuf == NULL)
|
||||
goto errorexit;
|
||||
buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj);
|
||||
|
||||
if (self->codec->decinit != NULL &&
|
||||
self->codec->decinit(&state, self->codec->config) != 0)
|
||||
goto errorexit;
|
||||
|
||||
while (buf.inbuf < buf.inbuf_end) {
|
||||
Py_ssize_t inleft, outleft, r;
|
||||
Py_ssize_t inleft, r;
|
||||
|
||||
inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
|
||||
outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
|
||||
|
||||
r = self->codec->decode(&state, self->codec->config,
|
||||
&buf.inbuf, inleft, &buf.outbuf, outleft);
|
||||
&buf.inbuf, inleft, &buf.writer);
|
||||
if (r == 0)
|
||||
break;
|
||||
else if (multibytecodec_decerror(self->codec, &state,
|
||||
|
@ -670,23 +633,20 @@ MultibyteCodec_Decode(MultibyteCodecObject *self,
|
|||
goto errorexit;
|
||||
}
|
||||
|
||||
finalsize = (Py_ssize_t)(buf.outbuf -
|
||||
PyUnicode_AS_UNICODE(buf.outobj));
|
||||
|
||||
if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
|
||||
if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
|
||||
goto errorexit;
|
||||
res = _PyUnicodeWriter_Finish(&buf.writer);
|
||||
if (res == NULL)
|
||||
goto errorexit;
|
||||
|
||||
PyBuffer_Release(&pdata);
|
||||
Py_XDECREF(buf.excobj);
|
||||
ERROR_DECREF(errorcb);
|
||||
return make_tuple(buf.outobj, datalen);
|
||||
return make_tuple(res, datalen);
|
||||
|
||||
errorexit:
|
||||
PyBuffer_Release(&pdata);
|
||||
ERROR_DECREF(errorcb);
|
||||
Py_XDECREF(buf.excobj);
|
||||
Py_XDECREF(buf.outobj);
|
||||
_PyUnicodeWriter_Dealloc(&buf.writer);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
@ -859,17 +819,7 @@ decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
|
|||
{
|
||||
buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
|
||||
buf->inbuf_end = buf->inbuf_top + size;
|
||||
if (buf->outobj == NULL) { /* only if outobj is not allocated yet */
|
||||
buf->outobj = PyUnicode_FromUnicode(NULL, size);
|
||||
if (buf->outobj == NULL)
|
||||
return -1;
|
||||
buf->outbuf = PyUnicode_AsUnicode(buf->outobj);
|
||||
if (buf->outbuf == NULL)
|
||||
return -1;
|
||||
buf->outbuf_end = buf->outbuf +
|
||||
PyUnicode_GET_SIZE(buf->outobj);
|
||||
}
|
||||
|
||||
_PyUnicodeWriter_Init(&buf->writer, size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -878,14 +828,13 @@ decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
|
|||
MultibyteDecodeBuffer *buf)
|
||||
{
|
||||
while (buf->inbuf < buf->inbuf_end) {
|
||||
Py_ssize_t inleft, outleft;
|
||||
Py_ssize_t inleft;
|
||||
Py_ssize_t r;
|
||||
|
||||
inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
|
||||
outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
|
||||
|
||||
r = ctx->codec->decode(&ctx->state, ctx->codec->config,
|
||||
&buf->inbuf, inleft, &buf->outbuf, outleft);
|
||||
&buf->inbuf, inleft, &buf->writer);
|
||||
if (r == 0 || r == MBERR_TOOFEW)
|
||||
break;
|
||||
else if (multibytecodec_decerror(ctx->codec, &ctx->state,
|
||||
|
@ -1058,8 +1007,9 @@ mbidecoder_decode(MultibyteIncrementalDecoderObject *self,
|
|||
MultibyteDecodeBuffer buf;
|
||||
char *data, *wdata = NULL;
|
||||
Py_buffer pdata;
|
||||
Py_ssize_t wsize, finalsize = 0, size, origpending;
|
||||
Py_ssize_t wsize, size, origpending;
|
||||
int final = 0;
|
||||
PyObject *res;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i:decode",
|
||||
incrementalkwarglist, &pdata, &final))
|
||||
|
@ -1067,7 +1017,8 @@ mbidecoder_decode(MultibyteIncrementalDecoderObject *self,
|
|||
data = pdata.buf;
|
||||
size = pdata.len;
|
||||
|
||||
buf.outobj = buf.excobj = NULL;
|
||||
_PyUnicodeWriter_Init(&buf.writer, 1);
|
||||
buf.excobj = NULL;
|
||||
origpending = self->pendingsize;
|
||||
|
||||
if (self->pendingsize == 0) {
|
||||
|
@ -1109,23 +1060,22 @@ mbidecoder_decode(MultibyteIncrementalDecoderObject *self,
|
|||
goto errorexit;
|
||||
}
|
||||
|
||||
finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj));
|
||||
if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
|
||||
if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
|
||||
goto errorexit;
|
||||
res = _PyUnicodeWriter_Finish(&buf.writer);
|
||||
if (res == NULL)
|
||||
goto errorexit;
|
||||
|
||||
PyBuffer_Release(&pdata);
|
||||
if (wdata != data)
|
||||
PyMem_Del(wdata);
|
||||
Py_XDECREF(buf.excobj);
|
||||
return buf.outobj;
|
||||
return res;
|
||||
|
||||
errorexit:
|
||||
PyBuffer_Release(&pdata);
|
||||
if (wdata != NULL && wdata != data)
|
||||
PyMem_Del(wdata);
|
||||
Py_XDECREF(buf.excobj);
|
||||
Py_XDECREF(buf.outobj);
|
||||
_PyUnicodeWriter_Dealloc(&buf.writer);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -1265,13 +1215,14 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self,
|
|||
const char *method, Py_ssize_t sizehint)
|
||||
{
|
||||
MultibyteDecodeBuffer buf;
|
||||
PyObject *cres;
|
||||
Py_ssize_t rsize, finalsize = 0;
|
||||
PyObject *cres, *res;
|
||||
Py_ssize_t rsize;
|
||||
|
||||
if (sizehint == 0)
|
||||
return PyUnicode_New(0, 0);
|
||||
|
||||
buf.outobj = buf.excobj = NULL;
|
||||
_PyUnicodeWriter_Init(&buf.writer, 1);
|
||||
buf.excobj = NULL;
|
||||
cres = NULL;
|
||||
|
||||
for (;;) {
|
||||
|
@ -1340,29 +1291,27 @@ mbstreamreader_iread(MultibyteStreamReaderObject *self,
|
|||
goto errorexit;
|
||||
}
|
||||
|
||||
finalsize = (Py_ssize_t)(buf.outbuf -
|
||||
PyUnicode_AS_UNICODE(buf.outobj));
|
||||
Py_DECREF(cres);
|
||||
cres = NULL;
|
||||
|
||||
if (sizehint < 0 || finalsize != 0 || rsize == 0)
|
||||
if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)
|
||||
break;
|
||||
|
||||
sizehint = 1; /* read 1 more byte and retry */
|
||||
}
|
||||
|
||||
if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
|
||||
if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
|
||||
goto errorexit;
|
||||
res = _PyUnicodeWriter_Finish(&buf.writer);
|
||||
if (res == NULL)
|
||||
goto errorexit;
|
||||
|
||||
Py_XDECREF(cres);
|
||||
Py_XDECREF(buf.excobj);
|
||||
return buf.outobj;
|
||||
return res;
|
||||
|
||||
errorexit:
|
||||
Py_XDECREF(cres);
|
||||
Py_XDECREF(buf.excobj);
|
||||
Py_XDECREF(buf.outobj);
|
||||
_PyUnicodeWriter_Dealloc(&buf.writer);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
|
@ -10,12 +10,6 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef uint32_t
|
||||
typedef uint32_t ucs4_t;
|
||||
#else
|
||||
typedef unsigned int ucs4_t;
|
||||
#endif
|
||||
|
||||
#ifdef uint16_t
|
||||
typedef uint16_t ucs2_t, DBCHAR;
|
||||
#else
|
||||
|
@ -27,7 +21,7 @@ typedef union {
|
|||
int i;
|
||||
unsigned char c[8];
|
||||
ucs2_t u2[4];
|
||||
ucs4_t u4[2];
|
||||
Py_UCS4 u4[2];
|
||||
} MultibyteCodec_State;
|
||||
|
||||
typedef int (*mbcodec_init)(const void *config);
|
||||
|
@ -44,7 +38,7 @@ typedef Py_ssize_t (*mbencodereset_func)(MultibyteCodec_State *state,
|
|||
typedef Py_ssize_t (*mbdecode_func)(MultibyteCodec_State *state,
|
||||
const void *config,
|
||||
const unsigned char **inbuf, Py_ssize_t inleft,
|
||||
Py_UNICODE **outbuf, Py_ssize_t outleft);
|
||||
_PyUnicodeWriter *writer);
|
||||
typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state,
|
||||
const void *config);
|
||||
typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state,
|
||||
|
|
|
@ -12947,6 +12947,16 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch)
|
||||
{
|
||||
if (_PyUnicodeWriter_Prepare(writer, 1, ch) < 0)
|
||||
return -1;
|
||||
PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch);
|
||||
writer->pos++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue