cpython/Modules/cjkcodecs/_iso2022_jp_2.c

231 lines
6.6 KiB
C

/*
* _iso2022_jp_2.c: the ISO-2022-JP-2 codec (RFC1554)
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _iso2022_jp_2.c,v 1.8 2003/12/31 05:46:55 perky Exp $
*/
#define ISO2022_DESIGNATIONS \
CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0208, \
CHARSET_JISX0208_O, CHARSET_JISX0212, CHARSET_GB2312, \
CHARSET_KSX1001, CHARSET_JISX0212, \
CHARSET_ISO8859_1, CHARSET_ISO8859_7
#define ISO2022_USE_G2_DESIGNATION yo!
#define ISO2022_USE_JISX0208EXT
#include "codeccommon.h"
#include "iso2022common.h"
#include "alg_jisx0201.h"
#include "alg_iso8859_1.h"
#include "alg_iso8859_7.h"
ENCMAP(jisxcommon)
DECMAP(jisx0208)
DECMAP(jisx0212)
ENCMAP(cp949)
DECMAP(ksx1001)
ENCMAP(gbcommon)
DECMAP(gb2312)
#define HAVE_ENCODER_INIT
ENCODER_INIT(iso2022_jp_2)
{
STATE_CLEARFLAGS(state)
STATE_SETG0(state, CHARSET_ASCII)
STATE_SETG1(state, CHARSET_ASCII)
STATE_SETG2(state, CHARSET_ASCII)
return 0;
}
#define HAVE_ENCODER_RESET
ENCODER_RESET(iso2022_jp_2)
{
if (STATE_GETG0(state) != CHARSET_ASCII) {
WRITE3(ESC, '(', 'B')
STATE_SETG0(state, CHARSET_ASCII)
NEXT_OUT(3)
}
return 0;
}
ENCODER(iso2022_jp_2)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
switch (STATE_GETG0(state)) {
case CHARSET_ASCII:
WRITE1((unsigned char)c)
NEXT(1, 1)
break;
case CHARSET_JISX0201_R:
JISX0201_R_ENCODE(c, code)
else { /* FALLTHROUGH (yay!) */
default:
WRITE3(ESC, '(', 'B')
NEXT_OUT(3)
STATE_SETG0(state, CHARSET_ASCII)
code = c;
}
WRITE1((unsigned char)code)
NEXT(1, 1)
break;
}
if (c == '\n')
STATE_CLEARFLAG(state, F_SHIFTED)
}
else UCS4INVALID(c)
else {
unsigned char charset;
charset = STATE_GETG0(state);
if (charset == CHARSET_JISX0201_R) {
code = DBCINV;
JISX0201_R_ENCODE(c, code)
if (code != DBCINV) {
WRITE1((unsigned char)code)
NEXT(1, 1)
continue;
}
}
TRYMAP_ENC(jisxcommon, code, c) {
if (code & 0x8000) { /* MSB set: JIS X 0212 */
if (charset != CHARSET_JISX0212) {
WRITE4(ESC, '$', '(', 'D')
STATE_SETG0(state, CHARSET_JISX0212)
NEXT_OUT(4)
}
WRITE2((code >> 8) & 0x7f, code & 0x7f)
} else { /* MSB unset: JIS X 0208 */
jisx0208encode: if (charset != CHARSET_JISX0208) {
WRITE3(ESC, '$', 'B')
STATE_SETG0(state, CHARSET_JISX0208)
NEXT_OUT(3)
}
WRITE2(code >> 8, code & 0xff)
}
NEXT(1, 2)
} else TRYMAP_ENC(cp949, code, c) {
if (code & 0x8000) /* MSB set: CP949 */
return 2;
if (charset != CHARSET_KSX1001) {
WRITE4(ESC, '$', '(', 'C')
STATE_SETG0(state, CHARSET_KSX1001)
NEXT_OUT(4)
}
WRITE2(code >> 8, code & 0xff)
NEXT(1, 2)
} else TRYMAP_ENC(gbcommon, code, c) {
if (code & 0x8000) /* MSB set: GBK */
return 2;
if (charset != CHARSET_GB2312) {
WRITE4(ESC, '$', '(', 'A')
STATE_SETG0(state, CHARSET_GB2312)
NEXT_OUT(4)
}
WRITE2(code >> 8, code & 0xff)
NEXT(1, 2)
} else if (c == 0xff3c) { /* FULL-WIDTH REVERSE SOLIDUS */
code = 0x2140;
goto jisx0208encode;
} else {
JISX0201_R_ENCODE(c, code)
else {
/* There's no need to try to encode as ISO-8859-1 or
* ISO-8859-7 because JIS X 0212 includes them already.
*/
return 1;
}
/* if (charset == CHARSET_JISX0201_R) : already checked */
WRITE4(ESC, '(', 'J', (unsigned char)code)
STATE_SETG0(state, CHARSET_JISX0201_R)
NEXT(1, 4)
}
}
}
return 0;
}
#define HAVE_DECODER_INIT
DECODER_INIT(iso2022_jp_2)
{
STATE_CLEARFLAGS(state)
STATE_SETG0(state, CHARSET_ASCII)
STATE_SETG1(state, CHARSET_ASCII)
STATE_SETG2(state, CHARSET_ASCII)
return 0;
}
#define HAVE_DECODER_RESET
DECODER_RESET(iso2022_jp_2)
{
STATE_CLEARFLAG(state, F_SHIFTED)
return 0;
}
DECODER(iso2022_jp_2)
{
ISO2022_LOOP_BEGIN
unsigned char charset, c2;
ISO2022_GETCHARSET(charset, c)
if (charset & CHARSET_DOUBLEBYTE) {
RESERVE_INBUF(2)
RESERVE_OUTBUF(1)
c2 = IN2;
if (charset == CHARSET_JISX0208 || charset == CHARSET_JISX0208_O) {
if (c == 0x21 && c2 == 0x40) /* FULL-WIDTH REVERSE SOLIDUS */
**outbuf = 0xff3c;
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
else return 2;
} else if (charset == CHARSET_JISX0212) {
TRYMAP_DEC(jisx0212, **outbuf, c, c2);
else return 2;
} else if (charset == CHARSET_KSX1001) {
TRYMAP_DEC(ksx1001, **outbuf, c, c2);
else return 2;
} else if (charset == CHARSET_GB2312) {
TRYMAP_DEC(gb2312, **outbuf, c, c2);
else return 2;
} else
return MBERR_INTERNAL;
NEXT(2, 1)
} else if (charset == CHARSET_ASCII) {
RESERVE_OUTBUF(1)
OUT1(c)
NEXT(1, 1)
} else if (charset == CHARSET_JISX0201_R) {
RESERVE_OUTBUF(1)
JISX0201_R_DECODE(c, **outbuf)
else
return 1;
NEXT(1, 1)
} else
return MBERR_INTERNAL;
ISO2022_LOOP_END
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(iso2022_jp_2)
MAPOPEN(ja_JP)
IMPORTMAP_DEC(jisx0208)
IMPORTMAP_DEC(jisx0212)
IMPORTMAP_ENC(jisxcommon)
MAPCLOSE()
MAPOPEN(ko_KR)
IMPORTMAP_ENC(cp949)
IMPORTMAP_DEC(ksx1001)
MAPCLOSE()
MAPOPEN(zh_CN)
IMPORTMAP_ENC(gbcommon)
IMPORTMAP_DEC(gb2312)
MAPCLOSE()
END_CODEC_REGISTRY(iso2022_jp_2)