CJK codecs: add newlines for readability

This commit is contained in:
Victor Stinner 2013-10-29 00:09:41 +01:00
parent bd97ac35f7
commit 146a2ed0f2
7 changed files with 188 additions and 91 deletions

View File

@ -33,10 +33,13 @@
OUTCHAR(decoded);
#define GBK_ENCODE(code, assi) \
if ((code) == 0x2014) (assi) = 0xa1aa; \
else if ((code) == 0x2015) (assi) = 0xa844; \
else if ((code) == 0x00b7) (assi) = 0xa1a4; \
else if ((code) != 0x30fb && TRYMAP_ENC(gbcommon, assi, code));
if ((code) == 0x2014) \
(assi) = 0xa1aa; \
else if ((code) == 0x2015) \
(assi) = 0xa844; \
else if ((code) == 0x00b7) \
(assi) = 0xa1a4; \
else if ((code) != 0x30fb && TRYMAP_ENC(gbcommon, assi, code))
/*
* GB2312 codec
@ -58,8 +61,10 @@ ENCODER(gb2312)
return 1;
REQUIRE_OUTBUF(2)
if (TRYMAP_ENC(gbcommon, code, c));
else return 1;
if (TRYMAP_ENC(gbcommon, code, c))
;
else
return 1;
if (code & 0x8000) /* MSB set: GBK */
return 1;
@ -89,7 +94,8 @@ DECODER(gb2312)
OUTCHAR(decoded);
NEXT_IN(2);
}
else return 1;
else
return 1;
}
return 0;
@ -118,7 +124,9 @@ ENCODER(gbk)
REQUIRE_OUTBUF(2)
GBK_ENCODE(c, code)
else return 1;
;
else
return 1;
OUTBYTE1((code >> 8) | 0x80)
if (code & 0x8000)
@ -146,7 +154,8 @@ DECODER(gbk)
REQUIRE_INBUF(2)
GBK_DECODE(c, INBYTE2, writer)
else return 1;
else
return 1;
NEXT_IN(2);
}
@ -192,7 +201,9 @@ ENCODER(gb18030)
REQUIRE_OUTBUF(2)
GBK_ENCODE(c, code)
else if (TRYMAP_ENC(gb18030ext, code, c));
;
else if (TRYMAP_ENC(gb18030ext, code, c))
;
else {
const struct _gb18030_to_unibmp_ranges *utrrange;
@ -292,7 +303,8 @@ DECODER(gb18030)
GBK_DECODE(c, c2, writer)
else if (TRYMAP_DEC(gb18030ext, decoded, c, c2))
OUTCHAR(decoded);
else return 1;
else
return 1;
NEXT_IN(2);
}
@ -343,8 +355,10 @@ ENCODER(hz)
if (c > 0xFFFF)
return 1;
if (TRYMAP_ENC(gbcommon, code, c));
else return 1;
if (TRYMAP_ENC(gbcommon, code, c))
;
else
return 1;
if (code & 0x8000) /* MSB set: GBK */
return 1;

View File

@ -81,14 +81,18 @@ ENCODER(big5hkscs)
}
}
}
else if (TRYMAP_ENC(big5, code, c));
else return 1;
else if (TRYMAP_ENC(big5, code, c))
;
else
return 1;
}
else if (c < 0x20000)
return insize;
else if (c < 0x30000) {
if (TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff));
else return insize;
if (TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff))
;
else
return insize;
}
else
return insize;

View File

@ -318,11 +318,14 @@ iso2022processesc(const void *config, MultibyteCodec_State *state,
}
else {
charset = INBYTE3;
if (INBYTE2 == '(') designation = 0;
else if (INBYTE2 == ')') designation = 1;
if (INBYTE2 == '(')
designation = 0;
else if (INBYTE2 == ')')
designation = 1;
else if (CONFIG_ISSET(USE_G2) && INBYTE2 == '.')
designation = 2;
else return 3;
else
return 3;
}
break;
case 4:
@ -330,9 +333,12 @@ iso2022processesc(const void *config, MultibyteCodec_State *state,
return 4;
charset = INBYTE4 | CHARSET_DBCS;
if (INBYTE3 == '(') designation = 0;
else if (INBYTE3 == ')') designation = 1;
else return 4;
if (INBYTE3 == '(')
designation = 0;
else if (INBYTE3 == ')')
designation = 1;
else
return 4;
break;
case 6: /* designation with prefix */
if (CONFIG_ISSET(USE_JISX0208_EXT) &&
@ -365,16 +371,20 @@ iso2022processesc(const void *config, MultibyteCodec_State *state,
return 0;
}
#define ISO8859_7_DECODE(c, writer) \
if ((c) < 0xa0) OUTCHAR(c); \
else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \
OUTCHAR(c); \
else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \
(0xbffffd77L & (1L << ((c)-0xb4))))) \
OUTCHAR(0x02d0 + (c)); \
else if ((c) == 0xa1) OUTCHAR(0x2018); \
else if ((c) == 0xa2) OUTCHAR(0x2019); \
else if ((c) == 0xaf) OUTCHAR(0x2015);
#define ISO8859_7_DECODE(c, writer) \
if ((c) < 0xa0) \
OUTCHAR(c); \
else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \
OUTCHAR(c); \
else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \
(0xbffffd77L & (1L << ((c)-0xb4))))) \
OUTCHAR(0x02d0 + (c)); \
else if ((c) == 0xa1) \
OUTCHAR(0x2018); \
else if ((c) == 0xa2) \
OUTCHAR(0x2019); \
else if ((c) == 0xaf) \
OUTCHAR(0x2015);
static Py_ssize_t
iso2022processg2(const void *config, MultibyteCodec_State *state,
@ -391,11 +401,14 @@ iso2022processg2(const void *config, MultibyteCodec_State *state,
}
else if (STATE_G2 == CHARSET_ISO8859_7) {
ISO8859_7_DECODE(INBYTE3 ^ 0x80, writer)
else return 3;
else
return 3;
}
else if (STATE_G2 == CHARSET_ASCII) {
if (INBYTE3 & 0x80) return 3;
else OUTCHAR(INBYTE3);
if (INBYTE3 & 0x80)
return 3;
else
OUTCHAR(INBYTE3);
}
else
return MBERR_INTERNAL;
@ -698,11 +711,14 @@ jisx0213_2000_1_decoder(const unsigned char *data)
EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1])
else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
return 0xff3c;
else if (TRYMAP_DEC(jisx0208, u, data[0], data[1]));
else if (TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]));
else if (TRYMAP_DEC(jisx0208, u, data[0], data[1]))
;
else if (TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]))
;
else if (TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1]))
u |= 0x20000;
else if (TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]));
else if (TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]))
;
else
return MAP_UNMAPPABLE;
return u;
@ -713,7 +729,8 @@ jisx0213_2000_2_decoder(const unsigned char *data)
{
Py_UCS4 u;
EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(u, data[0], data[1])
if (TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]));
if (TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]))
;
else if (TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1]))
u |= 0x20000;
else
@ -728,11 +745,14 @@ jisx0213_2004_1_decoder(const unsigned char *data)
Py_UCS4 u;
if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
return 0xff3c;
else if (TRYMAP_DEC(jisx0208, u, data[0], data[1]));
else if (TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]));
else if (TRYMAP_DEC(jisx0208, u, data[0], data[1]))
;
else if (TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]))
;
else if (TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1]))
u |= 0x20000;
else if (TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]));
else if (TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]))
;
else
return MAP_UNMAPPABLE;
return u;
@ -742,7 +762,8 @@ static Py_UCS4
jisx0213_2004_2_decoder(const unsigned char *data)
{
Py_UCS4 u;
if (TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]));
if (TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]))
;
else if (TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1]))
u |= 0x20000;
else
@ -902,7 +923,8 @@ jisx0201_r_decoder(const unsigned char *data)
{
Py_UCS4 u;
JISX0201_R_DECODE_CHAR(*data, u)
else return MAP_UNMAPPABLE;
else
return MAP_UNMAPPABLE;
return u;
}
@ -911,7 +933,8 @@ jisx0201_r_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
JISX0201_R_ENCODE(*data, coded)
else return MAP_UNMAPPABLE;
else
return MAP_UNMAPPABLE;
return coded;
}
@ -920,7 +943,8 @@ jisx0201_k_decoder(const unsigned char *data)
{
Py_UCS4 u;
JISX0201_K_DECODE_CHAR(*data ^ 0x80, u)
else return MAP_UNMAPPABLE;
else
return MAP_UNMAPPABLE;
return u;
}
@ -929,7 +953,8 @@ jisx0201_k_encoder(const Py_UCS4 *data, Py_ssize_t *length)
{
DBCHAR coded;
JISX0201_K_ENCODE(*data, coded)
else return MAP_UNMAPPABLE;
else
return MAP_UNMAPPABLE;
return coded - 0x80;
}

View File

@ -197,7 +197,8 @@ ENCODER(euc_jis_2004)
}
}
}
else if (TRYMAP_ENC(jisxcommon, code, c));
else if (TRYMAP_ENC(jisxcommon, code, c))
;
else if (c >= 0xff61 && c <= 0xff9f) {
/* JIS X 0201 half-width katakana */
WRITEBYTE2(0x8e, c - 0xfec0)
@ -215,8 +216,10 @@ ENCODER(euc_jis_2004)
}
else if (c >> 16 == EMPBASE >> 16) {
EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
else if (TRYMAP_ENC(jisx0213_emp, code, c & 0xffff));
else return insize;
else if (TRYMAP_ENC(jisx0213_emp, code, c & 0xffff))
;
else
return insize;
}
else
return insize;
@ -278,7 +281,8 @@ DECODER(euc_jis_2004)
}
else if (TRYMAP_DEC(jisx0212, decoded, c2, c3))
OUTCHAR(decoded);
else return 1;
else
return 1;
NEXT_IN(3);
}
else {
@ -290,8 +294,10 @@ DECODER(euc_jis_2004)
/* JIS X 0213 Plane 1 */
EMULATE_JISX0213_2000_DECODE_PLANE1(writer, c, c2)
else if (c == 0x21 && c2 == 0x40) OUTCHAR(0xff3c);
else if (c == 0x22 && c2 == 0x32) OUTCHAR(0xff5e);
else if (c == 0x21 && c2 == 0x40)
OUTCHAR(0xff3c);
else if (c == 0x22 && c2 == 0x32)
OUTCHAR(0xff5e);
else if (TRYMAP_DEC(jisx0208, decoded, c, c2))
OUTCHAR(decoded);
else if (TRYMAP_DEC(jisx0213_1_bmp, decoded, c, c2))
@ -306,7 +312,8 @@ DECODER(euc_jis_2004)
NEXT_IN(2);
continue;
}
else return 1;
else
return 1;
NEXT_IN(2);
}
}
@ -334,7 +341,8 @@ ENCODER(euc_jp)
if (c > 0xFFFF)
return 1;
if (TRYMAP_ENC(jisxcommon, code, c));
if (TRYMAP_ENC(jisxcommon, code, c))
;
else if (c >= 0xff61 && c <= 0xff9f) {
/* JIS X 0201 half-width katakana */
WRITEBYTE2(0x8e, c - 0xfec0)
@ -448,9 +456,12 @@ ENCODER(shift_jis)
#ifdef STRICT_BUILD
JISX0201_R_ENCODE(c, code)
#else
if (c < 0x80) code = c;
else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */
else if (c == 0x203e) code = 0x7e; /* OVERLINE */
if (c < 0x80)
code = c;
else if (c == 0x00a5)
code = 0x5c; /* YEN SIGN */
else if (c == 0x203e)
code = 0x7e; /* OVERLINE */
#endif
else JISX0201_K_ENCODE(c, code)
else if (c > 0xFFFF)
@ -469,7 +480,8 @@ ENCODER(shift_jis)
REQUIRE_OUTBUF(2)
if (code == NOCHAR) {
if (TRYMAP_ENC(jisxcommon, code, c));
if (TRYMAP_ENC(jisxcommon, code, c))
;
#ifndef STRICT_BUILD
else if (c == 0xff3c)
code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */
@ -502,7 +514,8 @@ DECODER(shift_jis)
#ifdef STRICT_BUILD
JISX0201_R_DECODE(c, writer)
#else
if (c < 0x80) OUTCHAR(c);
if (c < 0x80)
OUTCHAR(c);
#endif
else JISX0201_K_DECODE(c, writer)
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
@ -608,12 +621,15 @@ ENCODER(shift_jis_2004)
if (code & 0x8000)
return 1;
}
else return 1;
else
return 1;
}
else if (c >> 16 == EMPBASE >> 16) {
EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
else if (TRYMAP_ENC(jisx0213_emp, code, c&0xffff));
else return insize;
else if (TRYMAP_ENC(jisx0213_emp, code, c&0xffff))
;
else
return insize;
}
else
return insize;
@ -623,14 +639,20 @@ ENCODER(shift_jis_2004)
c2 = (code & 0xff) - 0x21;
if (c1 & 0x80) { /* Plane 2 */
if (c1 >= 0xee) c1 -= 0x87;
else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49;
else c1 -= 0x43;
if (c1 >= 0xee)
c1 -= 0x87;
else if (c1 >= 0xac || c1 == 0xa8)
c1 -= 0x49;
else
c1 -= 0x43;
}
else /* Plane 1 */
else {
/* Plane 1 */
c1 -= 0x21;
}
if (c1 & 1) c2 += 0x5e;
if (c1 & 1)
c2 += 0x5e;
c1 >>= 1;
OUTBYTE1(c1 + (c1 < 0x1f ? 0x81 : 0xc1))
OUTBYTE2(c2 + (c2 < 0x3f ? 0x40 : 0x41))
@ -678,9 +700,12 @@ DECODER(shift_jis_2004)
NEXT_IN(2);
}
else { /* Plane 2 */
if (c1 >= 0x67) c1 += 0x07;
else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;
else c1 -= 0x3d;
if (c1 >= 0x67)
c1 += 0x07;
else if (c1 >= 0x63 || c1 == 0x5f)
c1 -= 0x37;
else
c1 -= 0x3d;
EMULATE_JISX0213_2000_DECODE_PLANE2(writer,
c1, c2)

View File

@ -47,8 +47,10 @@ ENCODER(euc_kr)
return 1;
REQUIRE_OUTBUF(2)
if (TRYMAP_ENC(cp949, code, c));
else return 1;
if (TRYMAP_ENC(cp949, code, c))
;
else
return 1;
if ((code & 0x8000) == 0) {
/* KS X 1001 coded character */
@ -182,8 +184,10 @@ ENCODER(cp949)
return 1;
REQUIRE_OUTBUF(2)
if (TRYMAP_ENC(cp949, code, c));
else return 1;
if (TRYMAP_ENC(cp949, code, c))
;
else
return 1;
OUTBYTE1((code >> 8) | 0x80)
if (code & 0x8000)
@ -213,7 +217,8 @@ DECODER(cp949)
OUTCHAR(decoded);
else if (TRYMAP_DEC(cp949ext, decoded, c, INBYTE2))
OUTCHAR(decoded);
else return 1;
else
return 1;
NEXT_IN(2);
}

View File

@ -29,8 +29,10 @@ ENCODER(big5)
REQUIRE_OUTBUF(2)
if (TRYMAP_ENC(big5, code, c));
else return 1;
if (TRYMAP_ENC(big5, code, c))
;
else
return 1;
OUTBYTE1(code >> 8)
OUTBYTE2(code & 0xFF)
@ -84,9 +86,12 @@ ENCODER(cp950)
return 1;
REQUIRE_OUTBUF(2)
if (TRYMAP_ENC(cp950ext, code, c));
else if (TRYMAP_ENC(big5, code, c));
else return 1;
if (TRYMAP_ENC(cp950ext, code, c))
;
else if (TRYMAP_ENC(big5, code, c))
;
else
return 1;
OUTBYTE1(code >> 8)
OUTBYTE2(code & 0xFF)

View File

@ -1,27 +1,46 @@
#define JISX0201_R_ENCODE(c, assi) \
if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) \
if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) { \
(assi) = (c); \
else if ((c) == 0x00a5) (assi) = 0x5c; \
else if ((c) == 0x203e) (assi) = 0x7e;
} \
else if ((c) == 0x00a5) { \
(assi) = 0x5c; \
} \
else if ((c) == 0x203e) { \
(assi) = 0x7e; \
}
#define JISX0201_K_ENCODE(c, assi) \
if ((c) >= 0xff61 && (c) <= 0xff9f) \
(assi) = (c) - 0xfec0;
{ (assi) = (c) - 0xfec0; }
#define JISX0201_ENCODE(c, assi) \
JISX0201_R_ENCODE(c, assi) \
else JISX0201_K_ENCODE(c, assi)
#define JISX0201_R_DECODE_CHAR(c, assi) \
if ((c) < 0x5c) (assi) = (c); \
else if ((c) == 0x5c) (assi) = 0x00a5; \
else if ((c) < 0x7e) (assi) = (c); \
else if ((c) == 0x7e) (assi) = 0x203e; \
else if ((c) == 0x7f) (assi) = 0x7f;
if ((c) < 0x5c) { \
(assi) = (c); \
} \
else if ((c) == 0x5c) { \
(assi) = 0x00a5; \
} \
else if ((c) < 0x7e) { \
(assi) = (c); \
} \
else if ((c) == 0x7e) { \
(assi) = 0x203e; \
} \
else if ((c) == 0x7f) { \
(assi) = 0x7f; \
}
#define JISX0201_R_DECODE(c, writer) \
if ((c) < 0x5c) OUTCHAR(c); \
else if ((c) == 0x5c) OUTCHAR(0x00a5); \
else if ((c) < 0x7e) OUTCHAR(c); \
else if ((c) == 0x7e) OUTCHAR(0x203e); \
else if ((c) == 0x7f) OUTCHAR(0x7f);
#define JISX0201_K_DECODE(c, writer) \
if ((c) >= 0xa1 && (c) <= 0xdf) \
OUTCHAR(0xfec0 + (c));