CJK codecs: less magical macros, semicolon is now explicit

This commit is contained in:
Victor Stinner 2013-10-29 00:59:44 +01:00
parent 14c9fea60a
commit 28c63f7ffb
8 changed files with 115 additions and 101 deletions

View File

@ -40,14 +40,15 @@
OUTCHAR(decoded); \ OUTCHAR(decoded); \
} }
#define GBK_ENCODE(code, assi) \ #define GBK_ENCODE(code, assi) \
if ((code) == 0x2014) { \ if ((code) == 0x2014) { \
(assi) = 0xa1aa; \ (assi) = 0xa1aa; \
} else if ((code) == 0x2015) { \ } else if ((code) == 0x2015) { \
(assi) = 0xa844; \ (assi) = 0xa844; \
} else if ((code) == 0x00b7) { \ } else if ((code) == 0x00b7) { \
(assi) = 0xa1a4; \ (assi) = 0xa1a4; \
} else if ((code) != 0x30fb && TRYMAP_ENC(gbcommon, assi, code)) { \ } else if ((code) != 0x30fb && TRYMAP_ENC(gbcommon, assi, code)) { \
; \
} }
/* /*
@ -98,7 +99,7 @@ DECODER(gb2312)
continue; continue;
} }
REQUIRE_INBUF(2) REQUIRE_INBUF(2);
if (TRYMAP_DEC(gb2312, decoded, c ^ 0x80, INBYTE2 ^ 0x80)) { if (TRYMAP_DEC(gb2312, decoded, c ^ 0x80, INBYTE2 ^ 0x80)) {
OUTCHAR(decoded); OUTCHAR(decoded);
NEXT_IN(2); NEXT_IN(2);
@ -159,7 +160,7 @@ DECODER(gbk)
continue; continue;
} }
REQUIRE_INBUF(2) REQUIRE_INBUF(2);
GBK_DECODE(c, INBYTE2, writer) GBK_DECODE(c, INBYTE2, writer)
else else
@ -267,7 +268,7 @@ DECODER(gb18030)
continue; continue;
} }
REQUIRE_INBUF(2) REQUIRE_INBUF(2);
c2 = INBYTE2; c2 = INBYTE2;
if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */ if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
@ -275,7 +276,7 @@ DECODER(gb18030)
unsigned char c3, c4; unsigned char c3, c4;
Py_UCS4 lseq; Py_UCS4 lseq;
REQUIRE_INBUF(4) REQUIRE_INBUF(4);
c3 = INBYTE3; c3 = INBYTE3;
c4 = INBYTE4; c4 = INBYTE4;
if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39) if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
@ -405,7 +406,7 @@ DECODER(hz)
if (c == '~') { if (c == '~') {
unsigned char c2 = INBYTE2; unsigned char c2 = INBYTE2;
REQUIRE_INBUF(2) REQUIRE_INBUF(2);
if (c2 == '~') { if (c2 == '~') {
OUTCHAR('~'); OUTCHAR('~');
NEXT_IN(2); NEXT_IN(2);
@ -431,7 +432,7 @@ DECODER(hz)
NEXT_IN(1); NEXT_IN(1);
} }
else { /* GB mode */ else { /* GB mode */
REQUIRE_INBUF(2) REQUIRE_INBUF(2);
if (TRYMAP_DEC(gb2312, decoded, c, INBYTE2)) { if (TRYMAP_DEC(gb2312, decoded, c, INBYTE2)) {
OUTCHAR(decoded); OUTCHAR(decoded);
NEXT_IN(2); NEXT_IN(2);

View File

@ -119,7 +119,7 @@ DECODER(big5hkscs)
continue; continue;
} }
REQUIRE_INBUF(2) REQUIRE_INBUF(2);
if (0xc6 > c || c > 0xc8 || (c < 0xc7 && INBYTE2 < 0xa1)) { if (0xc6 > c || c > 0xc8 || (c < 0xc7 && INBYTE2 < 0xa1)) {
if (TRYMAP_DEC(big5, decoded, c, INBYTE2)) { if (TRYMAP_DEC(big5, decoded, c, INBYTE2)) {

View File

@ -73,7 +73,7 @@
#define F_SHIFTED 0x01 #define F_SHIFTED 0x01
#define F_ESCTHROUGHOUT 0x02 #define F_ESCTHROUGHOUT 0x02
#define STATE_SETG(dn, v) ((state)->c[dn]) = (v); #define STATE_SETG(dn, v) do { ((state)->c[dn]) = (v); } while (0)
#define STATE_GETG(dn) ((state)->c[dn]) #define STATE_GETG(dn) ((state)->c[dn])
#define STATE_G0 STATE_GETG(0) #define STATE_G0 STATE_GETG(0)
@ -85,10 +85,10 @@
#define STATE_SETG2(v) STATE_SETG(2, v) #define STATE_SETG2(v) STATE_SETG(2, v)
#define STATE_SETG3(v) STATE_SETG(3, v) #define STATE_SETG3(v) STATE_SETG(3, v)
#define STATE_SETFLAG(f) ((state)->c[4]) |= (f); #define STATE_SETFLAG(f) do { ((state)->c[4]) |= (f); } while (0)
#define STATE_GETFLAG(f) ((state)->c[4] & (f)) #define STATE_GETFLAG(f) ((state)->c[4] & (f))
#define STATE_CLEARFLAG(f) ((state)->c[4]) &= ~(f); #define STATE_CLEARFLAG(f) do { ((state)->c[4]) &= ~(f); } while (0)
#define STATE_CLEARFLAGS() ((state)->c[4]) = 0; #define STATE_CLEARFLAGS() do { ((state)->c[4]) = 0; } while (0)
#define ISO2022_CONFIG ((const struct iso2022_config *)config) #define ISO2022_CONFIG ((const struct iso2022_config *)config)
#define CONFIG_ISSET(flag) (ISO2022_CONFIG->flags & (flag)) #define CONFIG_ISSET(flag) (ISO2022_CONFIG->flags & (flag))
@ -132,9 +132,9 @@ CODEC_INIT(iso2022)
ENCODER_INIT(iso2022) ENCODER_INIT(iso2022)
{ {
STATE_CLEARFLAGS() STATE_CLEARFLAGS();
STATE_SETG0(CHARSET_ASCII) STATE_SETG0(CHARSET_ASCII);
STATE_SETG1(CHARSET_ASCII) STATE_SETG1(CHARSET_ASCII);
return 0; return 0;
} }
@ -143,12 +143,12 @@ ENCODER_RESET(iso2022)
if (STATE_GETFLAG(F_SHIFTED)) { if (STATE_GETFLAG(F_SHIFTED)) {
WRITEBYTE1(SI); WRITEBYTE1(SI);
NEXT_OUT(1); NEXT_OUT(1);
STATE_CLEARFLAG(F_SHIFTED) STATE_CLEARFLAG(F_SHIFTED);
} }
if (STATE_G0 != CHARSET_ASCII) { if (STATE_G0 != CHARSET_ASCII) {
WRITEBYTE3(ESC, '(', 'B'); WRITEBYTE3(ESC, '(', 'B');
NEXT_OUT(3); NEXT_OUT(3);
STATE_SETG0(CHARSET_ASCII) STATE_SETG0(CHARSET_ASCII);
} }
return 0; return 0;
} }
@ -164,12 +164,12 @@ ENCODER(iso2022)
if (c < 0x80) { if (c < 0x80) {
if (STATE_G0 != CHARSET_ASCII) { if (STATE_G0 != CHARSET_ASCII) {
WRITEBYTE3(ESC, '(', 'B'); WRITEBYTE3(ESC, '(', 'B');
STATE_SETG0(CHARSET_ASCII) STATE_SETG0(CHARSET_ASCII);
NEXT_OUT(3); NEXT_OUT(3);
} }
if (STATE_GETFLAG(F_SHIFTED)) { if (STATE_GETFLAG(F_SHIFTED)) {
WRITEBYTE1(SI); WRITEBYTE1(SI);
STATE_CLEARFLAG(F_SHIFTED) STATE_CLEARFLAG(F_SHIFTED);
NEXT_OUT(1); NEXT_OUT(1);
} }
WRITEBYTE1((unsigned char)c); WRITEBYTE1((unsigned char)c);
@ -211,24 +211,24 @@ ENCODER(iso2022)
case 0: /* G0 */ case 0: /* G0 */
if (STATE_GETFLAG(F_SHIFTED)) { if (STATE_GETFLAG(F_SHIFTED)) {
WRITEBYTE1(SI); WRITEBYTE1(SI);
STATE_CLEARFLAG(F_SHIFTED) STATE_CLEARFLAG(F_SHIFTED);
NEXT_OUT(1); NEXT_OUT(1);
} }
if (STATE_G0 != dsg->mark) { if (STATE_G0 != dsg->mark) {
if (dsg->width == 1) { if (dsg->width == 1) {
WRITEBYTE3(ESC, '(', ESCMARK(dsg->mark)); WRITEBYTE3(ESC, '(', ESCMARK(dsg->mark));
STATE_SETG0(dsg->mark) STATE_SETG0(dsg->mark);
NEXT_OUT(3); NEXT_OUT(3);
} }
else if (dsg->mark == CHARSET_JISX0208) { else if (dsg->mark == CHARSET_JISX0208) {
WRITEBYTE3(ESC, '$', ESCMARK(dsg->mark)); WRITEBYTE3(ESC, '$', ESCMARK(dsg->mark));
STATE_SETG0(dsg->mark) STATE_SETG0(dsg->mark);
NEXT_OUT(3); NEXT_OUT(3);
} }
else { else {
WRITEBYTE4(ESC, '$', '(', WRITEBYTE4(ESC, '$', '(',
ESCMARK(dsg->mark)); ESCMARK(dsg->mark));
STATE_SETG0(dsg->mark) STATE_SETG0(dsg->mark);
NEXT_OUT(4); NEXT_OUT(4);
} }
} }
@ -237,19 +237,18 @@ ENCODER(iso2022)
if (STATE_G1 != dsg->mark) { if (STATE_G1 != dsg->mark) {
if (dsg->width == 1) { if (dsg->width == 1) {
WRITEBYTE3(ESC, ')', ESCMARK(dsg->mark)); WRITEBYTE3(ESC, ')', ESCMARK(dsg->mark));
STATE_SETG1(dsg->mark) STATE_SETG1(dsg->mark);
NEXT_OUT(3); NEXT_OUT(3);
} }
else { else {
WRITEBYTE4(ESC, '$', ')', WRITEBYTE4(ESC, '$', ')', ESCMARK(dsg->mark));
ESCMARK(dsg->mark)); STATE_SETG1(dsg->mark);
STATE_SETG1(dsg->mark)
NEXT_OUT(4); NEXT_OUT(4);
} }
} }
if (!STATE_GETFLAG(F_SHIFTED)) { if (!STATE_GETFLAG(F_SHIFTED)) {
WRITEBYTE1(SO); WRITEBYTE1(SO);
STATE_SETFLAG(F_SHIFTED) STATE_SETFLAG(F_SHIFTED);
NEXT_OUT(1); NEXT_OUT(1);
} }
break; break;
@ -274,17 +273,17 @@ ENCODER(iso2022)
DECODER_INIT(iso2022) DECODER_INIT(iso2022)
{ {
STATE_CLEARFLAGS() STATE_CLEARFLAGS();
STATE_SETG0(CHARSET_ASCII) STATE_SETG0(CHARSET_ASCII);
STATE_SETG1(CHARSET_ASCII) STATE_SETG1(CHARSET_ASCII);
STATE_SETG2(CHARSET_ASCII) STATE_SETG2(CHARSET_ASCII);
return 0; return 0;
} }
DECODER_RESET(iso2022) DECODER_RESET(iso2022)
{ {
STATE_SETG0(CHARSET_ASCII) STATE_SETG0(CHARSET_ASCII);
STATE_CLEARFLAG(F_SHIFTED) STATE_CLEARFLAG(F_SHIFTED);
return 0; return 0;
} }
@ -303,8 +302,9 @@ iso2022processesc(const void *config, MultibyteCodec_State *state,
break; break;
} }
else if (CONFIG_ISSET(USE_JISX0208_EXT) && i+1 < *inleft && else if (CONFIG_ISSET(USE_JISX0208_EXT) && i+1 < *inleft &&
(*inbuf)[i] == '&' && (*inbuf)[i+1] == '@') (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@') {
i += 2; i += 2;
}
} }
if (i >= MAX_ESCSEQLEN) if (i >= MAX_ESCSEQLEN)
@ -358,14 +358,15 @@ iso2022processesc(const void *config, MultibyteCodec_State *state,
if (charset != CHARSET_ASCII) { if (charset != CHARSET_ASCII) {
const struct iso2022_designation *dsg; const struct iso2022_designation *dsg;
for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
if (dsg->mark == charset) if (dsg->mark == charset)
break; break;
}
if (!dsg->mark) if (!dsg->mark)
return esclen; return esclen;
} }
STATE_SETG(designation, charset) STATE_SETG(designation, charset);
*inleft -= esclen; *inleft -= esclen;
(*inbuf) += esclen; (*inbuf) += esclen;
return 0; return 0;
@ -433,14 +434,14 @@ DECODER(iso2022)
OUTCHAR(c); /* assume as ISO-8859-1 */ OUTCHAR(c); /* assume as ISO-8859-1 */
NEXT_IN(1); NEXT_IN(1);
if (IS_ESCEND(c)) { if (IS_ESCEND(c)) {
STATE_CLEARFLAG(F_ESCTHROUGHOUT) STATE_CLEARFLAG(F_ESCTHROUGHOUT);
} }
continue; continue;
} }
switch (c) { switch (c) {
case ESC: case ESC:
REQUIRE_INBUF(2) REQUIRE_INBUF(2);
if (IS_ISO2022ESC(INBYTE2)) { if (IS_ISO2022ESC(INBYTE2)) {
err = iso2022processesc(config, state, err = iso2022processesc(config, state,
inbuf, &inleft); inbuf, &inleft);
@ -448,7 +449,7 @@ DECODER(iso2022)
return err; return err;
} }
else if (CONFIG_ISSET(USE_G2) && INBYTE2 == 'N') {/* SS2 */ else if (CONFIG_ISSET(USE_G2) && INBYTE2 == 'N') {/* SS2 */
REQUIRE_INBUF(3) REQUIRE_INBUF(3);
err = iso2022processg2(config, state, err = iso2022processg2(config, state,
inbuf, &inleft, writer); inbuf, &inleft, writer);
if (err != 0) if (err != 0)
@ -456,24 +457,24 @@ DECODER(iso2022)
} }
else { else {
OUTCHAR(ESC); OUTCHAR(ESC);
STATE_SETFLAG(F_ESCTHROUGHOUT) STATE_SETFLAG(F_ESCTHROUGHOUT);
NEXT_IN(1); NEXT_IN(1);
} }
break; break;
case SI: case SI:
if (CONFIG_ISSET(NO_SHIFT)) if (CONFIG_ISSET(NO_SHIFT))
goto bypass; goto bypass;
STATE_CLEARFLAG(F_SHIFTED) STATE_CLEARFLAG(F_SHIFTED);
NEXT_IN(1); NEXT_IN(1);
break; break;
case SO: case SO:
if (CONFIG_ISSET(NO_SHIFT)) if (CONFIG_ISSET(NO_SHIFT))
goto bypass; goto bypass;
STATE_SETFLAG(F_SHIFTED) STATE_SETFLAG(F_SHIFTED);
NEXT_IN(1); NEXT_IN(1);
break; break;
case LF: case LF:
STATE_CLEARFLAG(F_SHIFTED) STATE_CLEARFLAG(F_SHIFTED);
OUTCHAR(LF); OUTCHAR(LF);
NEXT_IN(1); NEXT_IN(1);
break; break;
@ -493,38 +494,41 @@ DECODER(iso2022)
charset = STATE_G0; charset = STATE_G0;
if (charset == CHARSET_ASCII) { if (charset == CHARSET_ASCII) {
bypass: OUTCHAR(c); bypass:
NEXT_IN(1); OUTCHAR(c);
break; NEXT_IN(1);
} break;
}
if (dsgcache != NULL && if (dsgcache != NULL &&
dsgcache->mark == charset) dsgcache->mark == charset)
dsg = dsgcache; dsg = dsgcache;
else { else {
for (dsg = CONFIG_DESIGNATIONS; for (dsg = CONFIG_DESIGNATIONS;
dsg->mark != charset dsg->mark != charset
#ifdef Py_DEBUG #ifdef Py_DEBUG
&& dsg->mark != '\0' && dsg->mark != '\0'
#endif #endif
;dsg++) ; dsg++)
/* noop */; {
assert(dsg->mark != '\0'); /* noop */
dsgcache = dsg; }
} assert(dsg->mark != '\0');
dsgcache = dsg;
}
REQUIRE_INBUF(dsg->width) REQUIRE_INBUF(dsg->width);
decoded = dsg->decoder(*inbuf); decoded = dsg->decoder(*inbuf);
if (decoded == MAP_UNMAPPABLE) if (decoded == MAP_UNMAPPABLE)
return dsg->width; return dsg->width;
if (decoded < 0x10000) { if (decoded < 0x10000) {
OUTCHAR(decoded); OUTCHAR(decoded);
} }
else if (decoded < 0x30000) { else if (decoded < 0x30000) {
OUTCHAR(decoded); OUTCHAR(decoded);
} }
else { /* JIS X 0213 pairs */ else { /* JIS X 0213 pairs */
OUTCHAR2(decoded >> 16, decoded & 0xffff); OUTCHAR2(decoded >> 16, decoded & 0xffff);
} }
NEXT_IN(dsg->width); NEXT_IN(dsg->width);
@ -800,9 +804,10 @@ jisx0213_encoder(const Py_UCS4 *data, Py_ssize_t *length, void *config)
else else
return MAP_UNMAPPABLE; return MAP_UNMAPPABLE;
return coded; return coded;
case 2: /* second character of unicode pair */ case 2: /* second character of unicode pair */
coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1], coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
jisx0213_pair_encmap, JISX0213_ENCPAIRS); jisx0213_pair_encmap, JISX0213_ENCPAIRS);
if (coded == DBCINV) { if (coded == DBCINV) {
*length = 1; *length = 1;
coded = find_pairencmap((ucs2_t)data[0], 0, coded = find_pairencmap((ucs2_t)data[0], 0,
@ -812,14 +817,17 @@ jisx0213_encoder(const Py_UCS4 *data, Py_ssize_t *length, void *config)
} }
else else
return coded; return coded;
case -1: /* flush unterminated */ case -1: /* flush unterminated */
*length = 1; *length = 1;
coded = find_pairencmap((ucs2_t)data[0], 0, coded = find_pairencmap((ucs2_t)data[0], 0,
jisx0213_pair_encmap, JISX0213_ENCPAIRS); jisx0213_pair_encmap, JISX0213_ENCPAIRS);
if (coded == DBCINV) if (coded == DBCINV)
return MAP_UNMAPPABLE; return MAP_UNMAPPABLE;
else else
return coded; return coded;
break;
default: default:
return MAP_UNMAPPABLE; return MAP_UNMAPPABLE;
} }

View File

@ -107,7 +107,7 @@ DECODER(cp932)
continue; continue;
} }
REQUIRE_INBUF(2) REQUIRE_INBUF(2);
c2 = INBYTE2; c2 = INBYTE2;
if (TRYMAP_DEC(cp932ext, decoded, c, c2)) if (TRYMAP_DEC(cp932ext, decoded, c, c2))
@ -254,7 +254,7 @@ DECODER(euc_jis_2004)
/* JIS X 0201 half-width katakana */ /* JIS X 0201 half-width katakana */
unsigned char c2; unsigned char c2;
REQUIRE_INBUF(2) REQUIRE_INBUF(2);
c2 = INBYTE2; c2 = INBYTE2;
if (c2 >= 0xa1 && c2 <= 0xdf) { if (c2 >= 0xa1 && c2 <= 0xdf) {
OUTCHAR(0xfec0 + c2); OUTCHAR(0xfec0 + c2);
@ -266,7 +266,7 @@ DECODER(euc_jis_2004)
else if (c == 0x8f) { else if (c == 0x8f) {
unsigned char c2, c3; unsigned char c2, c3;
REQUIRE_INBUF(3) REQUIRE_INBUF(3);
c2 = INBYTE2 ^ 0x80; c2 = INBYTE2 ^ 0x80;
c3 = INBYTE3 ^ 0x80; c3 = INBYTE3 ^ 0x80;
@ -288,7 +288,7 @@ DECODER(euc_jis_2004)
else { else {
unsigned char c2; unsigned char c2;
REQUIRE_INBUF(2) REQUIRE_INBUF(2);
c ^= 0x80; c ^= 0x80;
c2 = INBYTE2 ^ 0x80; c2 = INBYTE2 ^ 0x80;
@ -395,7 +395,7 @@ DECODER(euc_jp)
/* JIS X 0201 half-width katakana */ /* JIS X 0201 half-width katakana */
unsigned char c2; unsigned char c2;
REQUIRE_INBUF(2) REQUIRE_INBUF(2);
c2 = INBYTE2; c2 = INBYTE2;
if (c2 >= 0xa1 && c2 <= 0xdf) { if (c2 >= 0xa1 && c2 <= 0xdf) {
OUTCHAR(0xfec0 + c2); OUTCHAR(0xfec0 + c2);
@ -407,7 +407,7 @@ DECODER(euc_jp)
else if (c == 0x8f) { else if (c == 0x8f) {
unsigned char c2, c3; unsigned char c2, c3;
REQUIRE_INBUF(3) REQUIRE_INBUF(3);
c2 = INBYTE2; c2 = INBYTE2;
c3 = INBYTE3; c3 = INBYTE3;
/* JIS X 0212 */ /* JIS X 0212 */
@ -421,7 +421,7 @@ DECODER(euc_jp)
else { else {
unsigned char c2; unsigned char c2;
REQUIRE_INBUF(2) REQUIRE_INBUF(2);
c2 = INBYTE2; c2 = INBYTE2;
/* JIS X 0208 */ /* JIS X 0208 */
#ifndef STRICT_BUILD #ifndef STRICT_BUILD
@ -521,7 +521,7 @@ DECODER(shift_jis)
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){ else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
unsigned char c1, c2; unsigned char c1, c2;
REQUIRE_INBUF(2) REQUIRE_INBUF(2);
c2 = INBYTE2; c2 = INBYTE2;
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 1; return 1;
@ -638,7 +638,8 @@ ENCODER(shift_jis_2004)
c1 = code >> 8; c1 = code >> 8;
c2 = (code & 0xff) - 0x21; c2 = (code & 0xff) - 0x21;
if (c1 & 0x80) { /* Plane 2 */ if (c1 & 0x80) {
/* Plane 2 */
if (c1 >= 0xee) if (c1 >= 0xee)
c1 -= 0x87; c1 -= 0x87;
else if (c1 >= 0xac || c1 == 0xa8) else if (c1 >= 0xac || c1 == 0xa8)
@ -673,7 +674,7 @@ DECODER(shift_jis_2004)
unsigned char c1, c2; unsigned char c1, c2;
Py_UCS4 code, decoded; Py_UCS4 code, decoded;
REQUIRE_INBUF(2) REQUIRE_INBUF(2);
c2 = INBYTE2; c2 = INBYTE2;
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 1; return 1;

View File

@ -58,9 +58,10 @@ ENCODER(euc_kr)
OUTBYTE2((code & 0xFF) | 0x80); OUTBYTE2((code & 0xFF) | 0x80);
NEXT(1, 2); NEXT(1, 2);
} }
else { /* Mapping is found in CP949 extension, else {
* but we encode it in KS X 1001:1998 Annex 3, /* Mapping is found in CP949 extension,
* make-up sequence for EUC-KR. */ but we encode it in KS X 1001:1998 Annex 3,
make-up sequence for EUC-KR. */
REQUIRE_OUTBUF(8); REQUIRE_OUTBUF(8);
@ -115,14 +116,14 @@ DECODER(euc_kr)
continue; continue;
} }
REQUIRE_INBUF(2) REQUIRE_INBUF(2);
if (c == EUCKR_JAMO_FIRSTBYTE && if (c == EUCKR_JAMO_FIRSTBYTE &&
INBYTE2 == EUCKR_JAMO_FILLER) { INBYTE2 == EUCKR_JAMO_FILLER) {
/* KS X 1001:1998 Annex 3 make-up sequence */ /* KS X 1001:1998 Annex 3 make-up sequence */
DBCHAR cho, jung, jong; DBCHAR cho, jung, jong;
REQUIRE_INBUF(8) REQUIRE_INBUF(8);
if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE || if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE ||
(*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE || (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE ||
(*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE) (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE)
@ -212,7 +213,7 @@ DECODER(cp949)
continue; continue;
} }
REQUIRE_INBUF(2) REQUIRE_INBUF(2);
if (TRYMAP_DEC(ksx1001, decoded, c ^ 0x80, INBYTE2 ^ 0x80)) if (TRYMAP_DEC(ksx1001, decoded, c ^ 0x80, INBYTE2 ^ 0x80))
OUTCHAR(decoded); OUTCHAR(decoded);
else if (TRYMAP_DEC(cp949ext, decoded, c, INBYTE2)) else if (TRYMAP_DEC(cp949ext, decoded, c, INBYTE2))
@ -369,7 +370,7 @@ DECODER(johab)
continue; continue;
} }
REQUIRE_INBUF(2) REQUIRE_INBUF(2);
c2 = INBYTE2; c2 = INBYTE2;
if (c < 0xd8) { if (c < 0xd8) {

View File

@ -54,7 +54,7 @@ DECODER(big5)
continue; continue;
} }
REQUIRE_INBUF(2) REQUIRE_INBUF(2);
if (TRYMAP_DEC(big5, decoded, c, INBYTE2)) { if (TRYMAP_DEC(big5, decoded, c, INBYTE2)) {
OUTCHAR(decoded); OUTCHAR(decoded);
NEXT_IN(2); NEXT_IN(2);
@ -113,7 +113,7 @@ DECODER(cp950)
continue; continue;
} }
REQUIRE_INBUF(2) REQUIRE_INBUF(2);
if (TRYMAP_DEC(cp950ext, decoded, c, INBYTE2)) if (TRYMAP_DEC(cp950ext, decoded, c, INBYTE2))
OUTCHAR(decoded); OUTCHAR(decoded);

View File

@ -113,8 +113,11 @@ static const struct dbcs_map *mapping_list;
} while (0) } while (0)
#define REQUIRE_INBUF(n) \ #define REQUIRE_INBUF(n) \
if (inleft < (n)) \ do { \
return MBERR_TOOFEW; if (inleft < (n)) \
return MBERR_TOOFEW; \
} while (0)
#define REQUIRE_OUTBUF(n) \ #define REQUIRE_OUTBUF(n) \
do { \ do { \
if (outleft < (n)) \ if (outleft < (n)) \

View File

@ -2,7 +2,7 @@
* standards. */ * standards. */
#ifndef EMULATE_JISX0213_2000_ENCODE_INVALID #ifndef EMULATE_JISX0213_2000_ENCODE_INVALID
#define EMULATE_JISX0213_2000_ENCODE_INVALID 1 # define EMULATE_JISX0213_2000_ENCODE_INVALID 1
#endif #endif
#define EMULATE_JISX0213_2000_ENCODE_BMP(assi, c) \ #define EMULATE_JISX0213_2000_ENCODE_BMP(assi, c) \