Bring CJKCodecs 1.1 into trunk. This completely reorganizes source
and installed layouts to make maintenance simple and easy. And it also adds four new codecs; big5hkscs, euc-jis-2004, shift-jis-2004 and iso2022-jp-2004.
This commit is contained in:
parent
264c659197
commit
2bb146f2f4
|
@ -548,6 +548,10 @@ exist:
|
|||
{big5-tw, csbig5}
|
||||
{Traditional Chinese}
|
||||
|
||||
\lineiii{big5hkscs}
|
||||
{big5-hkscs, hkscs}
|
||||
{Traditional Chinese}
|
||||
|
||||
\lineiii{cp037}
|
||||
{IBM037, IBM039}
|
||||
{English}
|
||||
|
@ -696,8 +700,12 @@ exist:
|
|||
{eucjp, ujis, u-jis}
|
||||
{Japanese}
|
||||
|
||||
\lineiii{euc_jis_2004}
|
||||
{jisx0213, eucjis2004}
|
||||
{Japanese}
|
||||
|
||||
\lineiii{euc_jisx0213}
|
||||
{jisx0213, eucjisx0213}
|
||||
{eucjisx0213}
|
||||
{Japanese}
|
||||
|
||||
\lineiii{euc_kr}
|
||||
|
@ -733,6 +741,10 @@ exist:
|
|||
{iso2022jp-2, iso-2022-jp-2}
|
||||
{Japanese, Korean, Simplified Chinese, Western Europe, Greek}
|
||||
|
||||
\lineiii{iso2022_jp_2004}
|
||||
{iso2022jp-2004, iso-2022-jp-2004}
|
||||
{Japanese}
|
||||
|
||||
\lineiii{iso2022_jp_3}
|
||||
{iso2022jp-3, iso-2022-jp-3}
|
||||
{Japanese}
|
||||
|
@ -841,6 +853,10 @@ exist:
|
|||
{csshiftjis, shiftjis, sjis, s_jis}
|
||||
{Japanese}
|
||||
|
||||
\lineiii{shift_jis_2004}
|
||||
{shiftjis2004, sjis_2004, sjis2004}
|
||||
{Japanese}
|
||||
|
||||
\lineiii{shift_jisx0213}
|
||||
{shiftjisx0213, sjisx0213, s_jisx0213}
|
||||
{Japanese}
|
||||
|
|
|
@ -39,6 +39,10 @@ aliases = {
|
|||
'big5_tw' : 'big5',
|
||||
'csbig5' : 'big5',
|
||||
|
||||
# big5hkscs codec
|
||||
'big5_hkscs' : 'big5hkscs',
|
||||
'hkscs' : 'big5hkscs',
|
||||
|
||||
# bz2_codec codec
|
||||
'bz2' : 'bz2_codec',
|
||||
|
||||
|
@ -197,8 +201,12 @@ aliases = {
|
|||
'950' : 'cp950',
|
||||
'ms950' : 'cp950',
|
||||
|
||||
# euc_jis_2004 codec
|
||||
'jisx0213' : 'euc_jis_2004',
|
||||
'eucjis2004' : 'euc_jis_2004',
|
||||
'euc_jis2004' : 'euc_jis_2004',
|
||||
|
||||
# euc_jisx0213 codec
|
||||
'jisx0213' : 'euc_jisx0213',
|
||||
'eucjisx0213' : 'euc_jisx0213',
|
||||
|
||||
# euc_jp codec
|
||||
|
@ -254,6 +262,10 @@ aliases = {
|
|||
'iso2022jp_2' : 'iso2022_jp_2',
|
||||
'iso_2022_jp_2' : 'iso2022_jp_2',
|
||||
|
||||
# iso2022_jp_2004 codec
|
||||
'iso_2022_jp_2004' : 'iso2022_jp_2004',
|
||||
'iso2022jp_2004' : 'iso2022_jp_2004',
|
||||
|
||||
# iso2022_jp_3 codec
|
||||
'iso2022jp_3' : 'iso2022_jp_3',
|
||||
'iso_2022_jp_3' : 'iso2022_jp_3',
|
||||
|
@ -416,6 +428,11 @@ aliases = {
|
|||
'sjis' : 'shift_jis',
|
||||
's_jis' : 'shift_jis',
|
||||
|
||||
# shift_jis_2004 codec
|
||||
'shiftjis2004' : 'shift_jis_2004',
|
||||
'sjis_2004' : 'shift_jis_2004',
|
||||
's_jis_2004' : 'shift_jis_2004',
|
||||
|
||||
# shift_jisx0213 codec
|
||||
'shiftjisx0213' : 'shift_jisx0213',
|
||||
'sjisx0213' : 'shift_jisx0213',
|
||||
|
|
|
@ -2,11 +2,12 @@
|
|||
# big5.py: Python Unicode Codec for BIG5
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: big5.py,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
# $CJKCodecs: big5.py,v 1.8 2004/06/28 18:16:03 perky Exp $
|
||||
#
|
||||
|
||||
from _codecs_big5 import codec
|
||||
import codecs
|
||||
import _codecs_tw, codecs
|
||||
|
||||
codec = _codecs_tw.getcodec('big5')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
|
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
|
|||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
#
|
||||
# big5hkscs.py: Python Unicode Codec for BIG5HKSCS
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: big5hkscs.py,v 1.1 2004/06/29 05:14:27 perky Exp $
|
||||
#
|
||||
|
||||
import _codecs_hk, codecs
|
||||
|
||||
codec = _codecs_hk.getcodec('big5hkscs')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
decode = codec.decode
|
||||
|
||||
class StreamReader(Codec, codecs.StreamReader):
|
||||
def __init__(self, stream, errors='strict'):
|
||||
codecs.StreamReader.__init__(self, stream, errors)
|
||||
__codec = codec.StreamReader(stream, errors)
|
||||
self.read = __codec.read
|
||||
self.readline = __codec.readline
|
||||
self.readlines = __codec.readlines
|
||||
self.reset = __codec.reset
|
||||
|
||||
class StreamWriter(Codec, codecs.StreamWriter):
|
||||
def __init__(self, stream, errors='strict'):
|
||||
codecs.StreamWriter.__init__(self, stream, errors)
|
||||
__codec = codec.StreamWriter(stream, errors)
|
||||
self.write = __codec.write
|
||||
self.writelines = __codec.writelines
|
||||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
|
@ -2,11 +2,12 @@
|
|||
# cp932.py: Python Unicode Codec for CP932
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: cp932.py,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
# $CJKCodecs: cp932.py,v 1.8 2004/06/28 18:16:03 perky Exp $
|
||||
#
|
||||
|
||||
from _codecs_cp932 import codec
|
||||
import codecs
|
||||
import _codecs_jp, codecs
|
||||
|
||||
codec = _codecs_jp.getcodec('cp932')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
|
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
|
|||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
||||
|
|
|
@ -2,11 +2,12 @@
|
|||
# cp949.py: Python Unicode Codec for CP949
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: cp949.py,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
# $CJKCodecs: cp949.py,v 1.8 2004/06/28 18:16:03 perky Exp $
|
||||
#
|
||||
|
||||
from _codecs_cp949 import codec
|
||||
import codecs
|
||||
import _codecs_kr, codecs
|
||||
|
||||
codec = _codecs_kr.getcodec('cp949')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
|
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
|
|||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
||||
|
|
|
@ -2,11 +2,12 @@
|
|||
# cp950.py: Python Unicode Codec for CP950
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: cp950.py,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
# $CJKCodecs: cp950.py,v 1.8 2004/06/28 18:16:03 perky Exp $
|
||||
#
|
||||
|
||||
from _codecs_cp950 import codec
|
||||
import codecs
|
||||
import _codecs_tw, codecs
|
||||
|
||||
codec = _codecs_tw.getcodec('cp950')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
|
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
|
|||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
#
|
||||
# euc_jis_2004.py: Python Unicode Codec for EUC_JIS_2004
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: euc_jis_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $
|
||||
#
|
||||
|
||||
import _codecs_jp, codecs
|
||||
|
||||
codec = _codecs_jp.getcodec('euc_jis_2004')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
decode = codec.decode
|
||||
|
||||
class StreamReader(Codec, codecs.StreamReader):
|
||||
def __init__(self, stream, errors='strict'):
|
||||
codecs.StreamReader.__init__(self, stream, errors)
|
||||
__codec = codec.StreamReader(stream, errors)
|
||||
self.read = __codec.read
|
||||
self.readline = __codec.readline
|
||||
self.readlines = __codec.readlines
|
||||
self.reset = __codec.reset
|
||||
|
||||
class StreamWriter(Codec, codecs.StreamWriter):
|
||||
def __init__(self, stream, errors='strict'):
|
||||
codecs.StreamWriter.__init__(self, stream, errors)
|
||||
__codec = codec.StreamWriter(stream, errors)
|
||||
self.write = __codec.write
|
||||
self.writelines = __codec.writelines
|
||||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
|
@ -2,11 +2,12 @@
|
|||
# euc_jisx0213.py: Python Unicode Codec for EUC_JISX0213
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: euc_jisx0213.py,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
# $CJKCodecs: euc_jisx0213.py,v 1.8 2004/06/28 18:16:03 perky Exp $
|
||||
#
|
||||
|
||||
from _codecs_euc_jisx0213 import codec
|
||||
import codecs
|
||||
import _codecs_jp, codecs
|
||||
|
||||
codec = _codecs_jp.getcodec('euc_jisx0213')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
|
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
|
|||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
||||
|
|
|
@ -2,11 +2,12 @@
|
|||
# euc_jp.py: Python Unicode Codec for EUC_JP
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: euc_jp.py,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
# $CJKCodecs: euc_jp.py,v 1.8 2004/06/28 18:16:03 perky Exp $
|
||||
#
|
||||
|
||||
from _codecs_euc_jp import codec
|
||||
import codecs
|
||||
import _codecs_jp, codecs
|
||||
|
||||
codec = _codecs_jp.getcodec('euc_jp')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
|
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
|
|||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
||||
|
|
|
@ -2,11 +2,12 @@
|
|||
# euc_kr.py: Python Unicode Codec for EUC_KR
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: euc_kr.py,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
# $CJKCodecs: euc_kr.py,v 1.8 2004/06/28 18:16:03 perky Exp $
|
||||
#
|
||||
|
||||
from _codecs_euc_kr import codec
|
||||
import codecs
|
||||
import _codecs_kr, codecs
|
||||
|
||||
codec = _codecs_kr.getcodec('euc_kr')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
|
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
|
|||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
||||
|
|
|
@ -2,11 +2,12 @@
|
|||
# gb18030.py: Python Unicode Codec for GB18030
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: gb18030.py,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
# $CJKCodecs: gb18030.py,v 1.8 2004/06/28 18:16:03 perky Exp $
|
||||
#
|
||||
|
||||
from _codecs_gb18030 import codec
|
||||
import codecs
|
||||
import _codecs_cn, codecs
|
||||
|
||||
codec = _codecs_cn.getcodec('gb18030')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
|
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
|
|||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
||||
|
|
|
@ -2,11 +2,12 @@
|
|||
# gb2312.py: Python Unicode Codec for GB2312
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: gb2312.py,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
# $CJKCodecs: gb2312.py,v 1.8 2004/06/28 18:16:03 perky Exp $
|
||||
#
|
||||
|
||||
from _codecs_gb2312 import codec
|
||||
import codecs
|
||||
import _codecs_cn, codecs
|
||||
|
||||
codec = _codecs_cn.getcodec('gb2312')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
|
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
|
|||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
||||
|
|
|
@ -2,11 +2,12 @@
|
|||
# gbk.py: Python Unicode Codec for GBK
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: gbk.py,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
# $CJKCodecs: gbk.py,v 1.8 2004/06/28 18:16:03 perky Exp $
|
||||
#
|
||||
|
||||
from _codecs_gbk import codec
|
||||
import codecs
|
||||
import _codecs_cn, codecs
|
||||
|
||||
codec = _codecs_cn.getcodec('gbk')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
|
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
|
|||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
||||
|
|
|
@ -2,11 +2,12 @@
|
|||
# hz.py: Python Unicode Codec for HZ
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: hz.py,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
# $CJKCodecs: hz.py,v 1.8 2004/06/28 18:16:03 perky Exp $
|
||||
#
|
||||
|
||||
from _codecs_hz import codec
|
||||
import codecs
|
||||
import _codecs_cn, codecs
|
||||
|
||||
codec = _codecs_cn.getcodec('hz')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
|
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
|
|||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
#
|
||||
# iso2022_jp.py: Python Unicode Codec for ISO_2022_JP
|
||||
# iso2022_jp.py: Python Unicode Codec for ISO2022_JP
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: iso2022_jp.py,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
# $CJKCodecs: iso2022_jp.py,v 1.2 2004/06/28 18:16:03 perky Exp $
|
||||
#
|
||||
|
||||
from _codecs_iso2022_jp import codec
|
||||
import codecs
|
||||
import _codecs_iso2022, codecs
|
||||
|
||||
codec = _codecs_iso2022.getcodec('iso2022_jp')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
|
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
|
|||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
#
|
||||
# iso2022_jp_1.py: Python Unicode Codec for ISO_2022_JP_1
|
||||
# iso2022_jp_1.py: Python Unicode Codec for ISO2022_JP_1
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: iso2022_jp_1.py,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
# $CJKCodecs: iso2022_jp_1.py,v 1.2 2004/06/28 18:16:03 perky Exp $
|
||||
#
|
||||
|
||||
from _codecs_iso2022_jp_1 import codec
|
||||
import codecs
|
||||
import _codecs_iso2022, codecs
|
||||
|
||||
codec = _codecs_iso2022.getcodec('iso2022_jp_1')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
|
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
|
|||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
#
|
||||
# iso2022_jp_2.py: Python Unicode Codec for ISO_2022_JP_2
|
||||
# iso2022_jp_2.py: Python Unicode Codec for ISO2022_JP_2
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: iso2022_jp_2.py,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
# $CJKCodecs: iso2022_jp_2.py,v 1.2 2004/06/28 18:16:03 perky Exp $
|
||||
#
|
||||
|
||||
from _codecs_iso2022_jp_2 import codec
|
||||
import codecs
|
||||
import _codecs_iso2022, codecs
|
||||
|
||||
codec = _codecs_iso2022.getcodec('iso2022_jp_2')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
|
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
|
|||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
#
|
||||
# iso2022_jp_2004.py: Python Unicode Codec for ISO2022_JP_2004
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: iso2022_jp_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $
|
||||
#
|
||||
|
||||
import _codecs_iso2022, codecs
|
||||
|
||||
codec = _codecs_iso2022.getcodec('iso2022_jp_2004')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
decode = codec.decode
|
||||
|
||||
class StreamReader(Codec, codecs.StreamReader):
|
||||
def __init__(self, stream, errors='strict'):
|
||||
codecs.StreamReader.__init__(self, stream, errors)
|
||||
__codec = codec.StreamReader(stream, errors)
|
||||
self.read = __codec.read
|
||||
self.readline = __codec.readline
|
||||
self.readlines = __codec.readlines
|
||||
self.reset = __codec.reset
|
||||
|
||||
class StreamWriter(Codec, codecs.StreamWriter):
|
||||
def __init__(self, stream, errors='strict'):
|
||||
codecs.StreamWriter.__init__(self, stream, errors)
|
||||
__codec = codec.StreamWriter(stream, errors)
|
||||
self.write = __codec.write
|
||||
self.writelines = __codec.writelines
|
||||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
|
@ -1,12 +1,13 @@
|
|||
#
|
||||
# iso2022_jp_3.py: Python Unicode Codec for ISO_2022_JP_3
|
||||
# iso2022_jp_3.py: Python Unicode Codec for ISO2022_JP_3
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: iso2022_jp_3.py,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
# $CJKCodecs: iso2022_jp_3.py,v 1.2 2004/06/28 18:16:03 perky Exp $
|
||||
#
|
||||
|
||||
from _codecs_iso2022_jp_3 import codec
|
||||
import codecs
|
||||
import _codecs_iso2022, codecs
|
||||
|
||||
codec = _codecs_iso2022.getcodec('iso2022_jp_3')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
|
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
|
|||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
#
|
||||
# iso2022_jp_ext.py: Python Unicode Codec for ISO_2022_JP_EXT
|
||||
# iso2022_jp_ext.py: Python Unicode Codec for ISO2022_JP_EXT
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: iso2022_jp_ext.py,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
# $CJKCodecs: iso2022_jp_ext.py,v 1.2 2004/06/28 18:16:03 perky Exp $
|
||||
#
|
||||
|
||||
from _codecs_iso2022_jp_ext import codec
|
||||
import codecs
|
||||
import _codecs_iso2022, codecs
|
||||
|
||||
codec = _codecs_iso2022.getcodec('iso2022_jp_ext')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
|
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
|
|||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
#
|
||||
# iso2022_kr.py: Python Unicode Codec for ISO_2022_KR
|
||||
# iso2022_kr.py: Python Unicode Codec for ISO2022_KR
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: iso2022_kr.py,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
# $CJKCodecs: iso2022_kr.py,v 1.2 2004/06/28 18:16:03 perky Exp $
|
||||
#
|
||||
|
||||
from _codecs_iso2022_kr import codec
|
||||
import codecs
|
||||
import _codecs_iso2022, codecs
|
||||
|
||||
codec = _codecs_iso2022.getcodec('iso2022_kr')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
|
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
|
|||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
||||
|
|
|
@ -2,11 +2,12 @@
|
|||
# johab.py: Python Unicode Codec for JOHAB
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: johab.py,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
# $CJKCodecs: johab.py,v 1.8 2004/06/28 18:16:03 perky Exp $
|
||||
#
|
||||
|
||||
from _codecs_johab import codec
|
||||
import codecs
|
||||
import _codecs_kr, codecs
|
||||
|
||||
codec = _codecs_kr.getcodec('johab')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
|
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
|
|||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
||||
|
|
|
@ -2,11 +2,12 @@
|
|||
# shift_jis.py: Python Unicode Codec for SHIFT_JIS
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: shift_jis.py,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
# $CJKCodecs: shift_jis.py,v 1.8 2004/06/28 18:16:03 perky Exp $
|
||||
#
|
||||
|
||||
from _codecs_shift_jis import codec
|
||||
import codecs
|
||||
import _codecs_jp, codecs
|
||||
|
||||
codec = _codecs_jp.getcodec('shift_jis')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
|
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
|
|||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
#
|
||||
# shift_jis_2004.py: Python Unicode Codec for SHIFT_JIS_2004
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: shift_jis_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $
|
||||
#
|
||||
|
||||
import _codecs_jp, codecs
|
||||
|
||||
codec = _codecs_jp.getcodec('shift_jis_2004')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
decode = codec.decode
|
||||
|
||||
class StreamReader(Codec, codecs.StreamReader):
|
||||
def __init__(self, stream, errors='strict'):
|
||||
codecs.StreamReader.__init__(self, stream, errors)
|
||||
__codec = codec.StreamReader(stream, errors)
|
||||
self.read = __codec.read
|
||||
self.readline = __codec.readline
|
||||
self.readlines = __codec.readlines
|
||||
self.reset = __codec.reset
|
||||
|
||||
class StreamWriter(Codec, codecs.StreamWriter):
|
||||
def __init__(self, stream, errors='strict'):
|
||||
codecs.StreamWriter.__init__(self, stream, errors)
|
||||
__codec = codec.StreamWriter(stream, errors)
|
||||
self.write = __codec.write
|
||||
self.writelines = __codec.writelines
|
||||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
|
@ -2,11 +2,12 @@
|
|||
# shift_jisx0213.py: Python Unicode Codec for SHIFT_JISX0213
|
||||
#
|
||||
# Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
# $CJKCodecs: shift_jisx0213.py,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
# $CJKCodecs: shift_jisx0213.py,v 1.8 2004/06/28 18:16:03 perky Exp $
|
||||
#
|
||||
|
||||
from _codecs_shift_jisx0213 import codec
|
||||
import codecs
|
||||
import _codecs_jp, codecs
|
||||
|
||||
codec = _codecs_jp.getcodec('shift_jisx0213')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
encode = codec.encode
|
||||
|
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
|
|||
self.reset = __codec.reset
|
||||
|
||||
def getregentry():
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
return (codec.encode, codec.decode, StreamReader, StreamWriter)
|
||||
|
|
|
@ -63,6 +63,9 @@ teststring = {
|
|||
"\x88\x91\xe5\x80\x91\xe6\x89\x80\x0a\xe8\xa6\x81\xe8\xa8\x8e\xe8"
|
||||
"\xab\x96\xe7\x9a\x84\xe5\x95\x8f\xe9\xa1\x8c\xe5\xb0\xb1\xe6\x98"
|
||||
"\xaf\x3a\x0a\x0a"),
|
||||
'big5hkscs': (
|
||||
"\x88\x45\x88\x5c\x8a\x73\x8b\xda\x8d\xd8\x0a",
|
||||
"\xf0\xa0\x84\x8c\xc4\x9a\xe9\xb5\xae\xe7\xbd\x93\xe6\xb4\x86\x0a"),
|
||||
'cp949': (
|
||||
"\x8c\x63\xb9\xe6\xb0\xa2\xc7\xcf\x20\xbc\x84\xbd\xc3\xc4\xdd\xb6"
|
||||
"\xf3\x0a\x0a\xa8\xc0\xa8\xc0\xb3\xb3\x21\x21\x20\xec\xd7\xce\xfa"
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
# test_codecencodings_cn.py
|
||||
# Codec encoding tests for PRC encodings.
|
||||
#
|
||||
# $CJKCodecs: test_codecencodings_cn.py,v 1.1 2003/12/19 03:00:05 perky Exp $
|
||||
# $CJKCodecs: test_codecencodings_cn.py,v 1.2 2004/06/19 06:09:55 perky Exp $
|
||||
|
||||
from test import test_support
|
||||
from test import test_multibytecodec_support
|
||||
|
@ -27,8 +27,8 @@ class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
tstring = test_multibytecodec_support.load_teststring('gbk')
|
||||
codectests = (
|
||||
# invalid bytes
|
||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
||||
("abc\xc8", "strict", None),
|
||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
||||
("abc\xc8", "strict", None),
|
||||
("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
|
||||
("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
|
||||
("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"),
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# test_codecencodings_hk.py
|
||||
# Codec encoding tests for HongKong encodings.
|
||||
#
|
||||
# $CJKCodecs: test_codecencodings_hk.py,v 1.1 2004/07/10 17:35:20 perky Exp $
|
||||
|
||||
from test import test_support
|
||||
from test import test_multibytecodec_support
|
||||
import unittest
|
||||
|
||||
class Test_Big5HKSCS(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||
encoding = 'big5hkscs'
|
||||
tstring = test_multibytecodec_support.load_teststring('big5hkscs')
|
||||
codectests = (
|
||||
# invalid bytes
|
||||
("abc\x80\x80\xc1\xc4", "strict", None),
|
||||
("abc\xc8", "strict", None),
|
||||
("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u8b10"),
|
||||
("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u8b10\ufffd"),
|
||||
("abc\x80\x80\xc1\xc4", "ignore", u"abc\u8b10"),
|
||||
)
|
||||
|
||||
def test_main():
|
||||
suite = unittest.TestSuite()
|
||||
suite.addTest(unittest.makeSuite(Test_Big5HKSCS))
|
||||
test_support.run_suite(suite)
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_main()
|
|
@ -3,7 +3,7 @@
|
|||
# test_codecencodings_jp.py
|
||||
# Codec encoding tests for Japanese encodings.
|
||||
#
|
||||
# $CJKCodecs: test_codecencodings_jp.py,v 1.2 2004/01/06 09:25:37 perky Exp $
|
||||
# $CJKCodecs: test_codecencodings_jp.py,v 1.3 2004/06/19 06:09:55 perky Exp $
|
||||
|
||||
from test import test_support
|
||||
from test import test_multibytecodec_support
|
||||
|
@ -64,16 +64,6 @@ class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
|
|||
(u"\u203e", "strict", "\x7e"),
|
||||
)
|
||||
|
||||
class Test_EUC_JP_STRICT(test_multibytecodec_support.TestBase,
|
||||
unittest.TestCase):
|
||||
encoding = 'euc_jp_strict'
|
||||
tstring = test_multibytecodec_support.load_teststring('euc_jp')
|
||||
codectests = eucjp_commontests + (
|
||||
("\xa1\xc0\\", "strict", u"\\\\"),
|
||||
(u"\xa5", "strict", None),
|
||||
(u"\u203e", "strict", None),
|
||||
)
|
||||
|
||||
shiftjis_commonenctests = (
|
||||
("abc\x80\x80\x82\x84", "strict", None),
|
||||
("abc\xf8", "strict", None),
|
||||
|
@ -90,14 +80,6 @@ class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
("\x81\x5f\x81\x61\x81\x7c", "strict", u"\uff3c\u2016\u2212"),
|
||||
)
|
||||
|
||||
class Test_SJIS_STRICT(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||
encoding = 'shift_jis_strict'
|
||||
tstring = test_multibytecodec_support.load_teststring('shift_jis')
|
||||
codectests = shiftjis_commonenctests + (
|
||||
("\\\x7e", "replace", u"\xa5\u203e"),
|
||||
("\x81\x5f\x81\x61\x81\x7c", "replace", u"\x5c\u2016\u2212"),
|
||||
)
|
||||
|
||||
class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||
encoding = 'shift_jisx0213'
|
||||
tstring = test_multibytecodec_support.load_teststring('shift_jisx0213')
|
||||
|
@ -123,9 +105,6 @@ def test_main():
|
|||
suite.addTest(unittest.makeSuite(Test_EUC_JISX0213))
|
||||
suite.addTest(unittest.makeSuite(Test_EUC_JP_COMPAT))
|
||||
suite.addTest(unittest.makeSuite(Test_SJIS_COMPAT))
|
||||
if test_multibytecodec_support.__cjkcodecs__:
|
||||
suite.addTest(unittest.makeSuite(Test_EUC_JP_STRICT))
|
||||
suite.addTest(unittest.makeSuite(Test_SJIS_STRICT))
|
||||
suite.addTest(unittest.makeSuite(Test_SJISX0213))
|
||||
test_support.run_suite(suite)
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
# test_codecencodings_kr.py
|
||||
# Codec encoding tests for ROK encodings.
|
||||
#
|
||||
# $CJKCodecs: test_codecencodings_kr.py,v 1.1 2003/12/19 03:00:06 perky Exp $
|
||||
# $CJKCodecs: test_codecencodings_kr.py,v 1.2 2004/06/19 06:09:55 perky Exp $
|
||||
|
||||
from test import test_support
|
||||
from test import test_multibytecodec_support
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
# test_codecencodings_tw.py
|
||||
# Codec encoding tests for ROC encodings.
|
||||
#
|
||||
# $CJKCodecs: test_codecencodings_tw.py,v 1.1 2003/12/19 03:00:06 perky Exp $
|
||||
# $CJKCodecs: test_codecencodings_tw.py,v 1.2 2004/06/19 06:09:55 perky Exp $
|
||||
|
||||
from test import test_support
|
||||
from test import test_multibytecodec_support
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
# test_codecmaps_cn.py
|
||||
# Codec mapping tests for PRC encodings
|
||||
#
|
||||
# $CJKCodecs: test_codecmaps_cn.py,v 1.2 2004/01/17 12:47:19 perky Exp $
|
||||
# $CJKCodecs: test_codecmaps_cn.py,v 1.3 2004/06/19 06:09:55 perky Exp $
|
||||
|
||||
from test import test_support
|
||||
from test import test_multibytecodec_support
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# test_codecmaps_hk.py
|
||||
# Codec mapping tests for HongKong encodings
|
||||
#
|
||||
# $CJKCodecs: test_codecmaps_hk.py,v 1.1 2004/07/10 17:35:20 perky Exp $
|
||||
|
||||
from test import test_support
|
||||
from test import test_multibytecodec_support
|
||||
import unittest
|
||||
|
||||
class TestBig5HKSCSMap(test_multibytecodec_support.TestBase_Mapping,
|
||||
unittest.TestCase):
|
||||
encoding = 'big5hkscs'
|
||||
mapfilename = 'BIG5HKSCS.TXT'
|
||||
mapfileurl = 'http://people.freebsd.org/~perky/i18n/BIG5HKSCS.TXT'
|
||||
|
||||
def test_main():
|
||||
suite = unittest.TestSuite()
|
||||
suite.addTest(unittest.makeSuite(TestBig5HKSCSMap))
|
||||
test_support.run_suite(suite)
|
||||
|
||||
test_multibytecodec_support.register_skip_expected(TestBig5HKSCSMap)
|
||||
if __name__ == "__main__":
|
||||
test_main()
|
|
@ -3,7 +3,7 @@
|
|||
# test_codecmaps_jp.py
|
||||
# Codec mapping tests for Japanese encodings
|
||||
#
|
||||
# $CJKCodecs: test_codecmaps_jp.py,v 1.2 2004/01/17 12:47:19 perky Exp $
|
||||
# $CJKCodecs: test_codecmaps_jp.py,v 1.3 2004/06/19 06:09:55 perky Exp $
|
||||
|
||||
from test import test_support
|
||||
from test import test_multibytecodec_support
|
||||
|
@ -48,15 +48,6 @@ class TestSJISCOMPATMap(test_multibytecodec_support.TestBase_Mapping,
|
|||
('\x81_', u'\\'),
|
||||
]
|
||||
|
||||
|
||||
class TestSJISSTRICTMap(test_multibytecodec_support.TestBase_Mapping,
|
||||
unittest.TestCase):
|
||||
encoding = 'shift_jis_strict'
|
||||
mapfilename = 'SHIFTJIS.TXT'
|
||||
mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE' \
|
||||
'/EASTASIA/JIS/SHIFTJIS.TXT'
|
||||
|
||||
|
||||
class TestEUCJISX0213Map(test_multibytecodec_support.TestBase_Mapping,
|
||||
unittest.TestCase):
|
||||
encoding = 'euc_jisx0213'
|
||||
|
@ -76,8 +67,6 @@ def test_main():
|
|||
suite.addTest(unittest.makeSuite(TestCP932Map))
|
||||
suite.addTest(unittest.makeSuite(TestEUCJPCOMPATMap))
|
||||
suite.addTest(unittest.makeSuite(TestSJISCOMPATMap))
|
||||
if test_multibytecodec_support.__cjkcodecs__:
|
||||
suite.addTest(unittest.makeSuite(TestSJISSTRICTMap))
|
||||
suite.addTest(unittest.makeSuite(TestEUCJISX0213Map))
|
||||
suite.addTest(unittest.makeSuite(TestSJISX0213Map))
|
||||
test_support.run_suite(suite)
|
||||
|
@ -85,5 +74,6 @@ def test_main():
|
|||
test_multibytecodec_support.register_skip_expected(TestCP932Map,
|
||||
TestEUCJPCOMPATMap, TestSJISCOMPATMap, TestEUCJISX0213Map,
|
||||
TestSJISX0213Map)
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_main()
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
# test_codecmaps_kr.py
|
||||
# Codec mapping tests for ROK encodings
|
||||
#
|
||||
# $CJKCodecs: test_codecmaps_kr.py,v 1.2 2004/01/17 12:47:19 perky Exp $
|
||||
# $CJKCodecs: test_codecmaps_kr.py,v 1.3 2004/06/19 06:09:55 perky Exp $
|
||||
|
||||
from test import test_support
|
||||
from test import test_multibytecodec_support
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
# test_codecmaps_tw.py
|
||||
# Codec mapping tests for ROC encodings
|
||||
#
|
||||
# $CJKCodecs: test_codecmaps_tw.py,v 1.2 2004/01/17 12:47:19 perky Exp $
|
||||
# $CJKCodecs: test_codecmaps_tw.py,v 1.3 2004/06/19 06:09:55 perky Exp $
|
||||
|
||||
from test import test_support
|
||||
from test import test_multibytecodec_support
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
# test_multibytecodec.py
|
||||
# Unit test for multibytecodec itself
|
||||
#
|
||||
# $CJKCodecs: test_multibytecodec.py,v 1.5 2004/01/06 02:26:28 perky Exp $
|
||||
# $CJKCodecs: test_multibytecodec.py,v 1.8 2004/06/19 06:09:55 perky Exp $
|
||||
|
||||
from test import test_support
|
||||
from test import test_multibytecodec_support
|
||||
|
@ -69,6 +69,12 @@ class Test_StreamWriter(unittest.TestCase):
|
|||
def test_str_decode(self):
|
||||
self.assertEqual('abcd'.encode('gb18030'), 'abcd')
|
||||
|
||||
def test_streamwriter_strwrite(self):
|
||||
s = StringIO.StringIO()
|
||||
wr = codecs.getwriter('gb18030')(s)
|
||||
wr.write('abcd')
|
||||
self.assertEqual(s.getvalue(), 'abcd')
|
||||
|
||||
def test_main():
|
||||
suite = unittest.TestSuite()
|
||||
suite.addTest(unittest.makeSuite(Test_StreamWriter))
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
# test_multibytecodec_support.py
|
||||
# Common Unittest Routines for CJK codecs
|
||||
#
|
||||
# $CJKCodecs: test_multibytecodec_support.py,v 1.5 2004/01/17 12:47:19 perky Exp $
|
||||
# $CJKCodecs: test_multibytecodec_support.py,v 1.6 2004/06/19 06:09:55 perky Exp $
|
||||
|
||||
import sys, codecs, os.path
|
||||
import unittest
|
||||
|
@ -164,13 +164,8 @@ class TestBase_Mapping(unittest.TestCase):
|
|||
def __init__(self, *args, **kw):
|
||||
unittest.TestCase.__init__(self, *args, **kw)
|
||||
if not os.path.exists(self.mapfilename):
|
||||
parent = os.path.join(os.pardir, self.mapfilename)
|
||||
if not os.path.exists(parent):
|
||||
format = '%s not found, download from %s'
|
||||
raise test_support.TestSkipped(format %
|
||||
(self.mapfilename, self.mapfileurl))
|
||||
else:
|
||||
self.mapfilename = parent
|
||||
raise test_support.TestSkipped('%s not found, download from %s' %
|
||||
(self.mapfilename, self.mapfileurl))
|
||||
|
||||
def test_mapping_file(self):
|
||||
unichrs = lambda s: u''.join(map(unichr, map(eval, s.split('+'))))
|
||||
|
|
|
@ -34,6 +34,9 @@ Extension modules
|
|||
Library
|
||||
-------
|
||||
|
||||
- Several new unicode codecs are added: big5hkscs, euc_jis_2004,
|
||||
iso2022_jp_2004, shift_jis_2004.
|
||||
|
||||
- Bug #788520. Queue.{get, get_nowait, put, put_nowait} have new
|
||||
implementations, exploiting Conditions (which didn't exist at the time
|
||||
Queue was introduced). A minor semantic change is that the Full and
|
||||
|
|
|
@ -484,42 +484,12 @@ GLHACK=-Dclear=__GLclear
|
|||
# multibytecodec is required for all the other CJK codec modules
|
||||
#_multibytecodec cjkcodecs/multibytecodec.c
|
||||
|
||||
# mapdata modules are required to support their respective dependent codecs
|
||||
#_codecs_mapdata_ja_JP cjkcodecs/mapdata_ja_JP.c
|
||||
#_codecs_mapdata_ko_KR cjkcodecs/mapdata_ko_KR.c
|
||||
#_codecs_mapdata_zh_CN cjkcodecs/mapdata_zh_CN.c
|
||||
#_codecs_mapdata_zh_TW cjkcodecs/mapdata_zh_TW.c
|
||||
|
||||
# ja_JP codecs
|
||||
#_codecs_cp932 cjkcodecs/_cp932.c
|
||||
#_codecs_euc_jisx0213 cjkcodecs/_euc_jisx0213.c
|
||||
#_codecs_euc_jp cjkcodecs/_euc_jp.c
|
||||
#_codecs_iso2022_jp cjkcodecs/_iso2022_jp.c
|
||||
#_codecs_iso2022_jp_1 cjkcodecs/_iso2022_jp_1.c
|
||||
#_codecs_iso2022_jp_3 cjkcodecs/_iso2022_jp_3.c
|
||||
#_codecs_iso2022_jp_ext cjkcodecs/_iso2022_jp_ext.c
|
||||
#_codecs_shift_jis cjkcodecs/_shift_jis.c
|
||||
#_codecs_shift_jisx0213 cjkcodecs/_shift_jisx0213.c
|
||||
|
||||
# ko_KR codecs
|
||||
#_codecs_cp949 cjkcodecs/_cp949.c
|
||||
#_codecs_euc_kr cjkcodecs/_euc_kr.c
|
||||
#_codecs_iso2022_kr cjkcodecs/_iso2022_kr.c
|
||||
#_codecs_johab cjkcodecs/_johab.c
|
||||
|
||||
# zh_CN codecs
|
||||
#_codecs_gb18030 cjkcodecs/_gb18030.c
|
||||
#_codecs_gb2312 cjkcodecs/_gb2312.c
|
||||
#_codecs_gbk cjkcodecs/_gbk.c
|
||||
#_codecs_hz cjkcodecs/_hz.c
|
||||
|
||||
# zh_TW codecs
|
||||
#_codecs_big5 cjkcodecs/_big5.c
|
||||
#_codecs_cp950 cjkcodecs/_cp950.c
|
||||
|
||||
# international codecs
|
||||
#_codecs_iso2022_jp_2 cjkcodecs/_iso2022_jp_2.c # requires ja_JP, ko_KR, zh_CN
|
||||
|
||||
#_codecs_cn cjkcodecs/_codecs_cn.c
|
||||
#_codecs_hk cjkcodecs/_codecs_hk.c
|
||||
#_codecs_iso2022 cjkcodecs/_codecs_iso2022.c
|
||||
#_codecs_jp cjkcodecs/_codecs_jp.c
|
||||
#_codecs_kr cjkcodecs/_codecs_kr.c
|
||||
#_codecs_tw cjkcodecs/_codecs_tw.c
|
||||
|
||||
# Example -- included for reference only:
|
||||
# xx xxmodule.c
|
||||
|
|
|
@ -2,7 +2,7 @@ Notes on cjkcodecs
|
|||
-------------------
|
||||
This directory contains source files for cjkcodecs extension modules.
|
||||
They are based on CJKCodecs (http://cjkpython.i18n.org/#CJKCodecs)
|
||||
as of Jan 17 2004 currently.
|
||||
as of Jul 18 2004 currently.
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1,67 +0,0 @@
|
|||
/*
|
||||
* _big5.c: the Big5 codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _big5.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
|
||||
ENCMAP(big5)
|
||||
DECMAP(big5)
|
||||
|
||||
ENCODER(big5)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = **inbuf;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
RESERVE_OUTBUF(1)
|
||||
**outbuf = (unsigned char)c;
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
RESERVE_OUTBUF(2)
|
||||
|
||||
TRYMAP_ENC(big5, code, c);
|
||||
else return 1;
|
||||
|
||||
(*outbuf)[0] = code >> 8;
|
||||
(*outbuf)[1] = code & 0xFF;
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(big5)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
TRYMAP_DEC(big5, **outbuf, c, IN2) {
|
||||
NEXT(2, 1)
|
||||
} else return 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(big5)
|
||||
MAPOPEN(zh_TW)
|
||||
IMPORTMAP_ENCDEC(big5)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(big5)
|
|
@ -0,0 +1,434 @@
|
|||
/*
|
||||
* _codecs_cn.c: Codecs collection for Mainland Chinese encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _codecs_cn.c,v 1.8 2004/07/07 14:59:26 perky Exp $
|
||||
*/
|
||||
|
||||
#include "cjkcodecs.h"
|
||||
#include "mappings_cn.h"
|
||||
|
||||
#define GBK_PREDECODE(dc1, dc2, assi) \
|
||||
if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; \
|
||||
else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; \
|
||||
else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7;
|
||||
#define GBK_PREENCODE(code, assi) \
|
||||
if ((code) == 0x2014) (assi) = 0xa1aa; \
|
||||
else if ((code) == 0x2015) (assi) = 0xa844; \
|
||||
else if ((code) == 0x00b7) (assi) = 0xa1a4;
|
||||
|
||||
/*
|
||||
* GB2312 codec
|
||||
*/
|
||||
|
||||
ENCODER(gb2312)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
TRYMAP_ENC(gbcommon, code, c);
|
||||
else return 1;
|
||||
|
||||
if (code & 0x8000) /* MSB set: GBK */
|
||||
return 1;
|
||||
|
||||
OUT1((code >> 8) | 0x80)
|
||||
OUT2((code & 0xFF) | 0x80)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(gb2312)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = **inbuf;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
|
||||
NEXT(2, 1)
|
||||
}
|
||||
else return 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GBK codec
|
||||
*/
|
||||
|
||||
ENCODER(gbk)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
|
||||
GBK_PREENCODE(c, code)
|
||||
else TRYMAP_ENC(gbcommon, code, c);
|
||||
else return 1;
|
||||
|
||||
OUT1((code >> 8) | 0x80)
|
||||
if (code & 0x8000)
|
||||
OUT2((code & 0xFF)) /* MSB set: GBK */
|
||||
else
|
||||
OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(gbk)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
|
||||
GBK_PREDECODE(c, IN2, **outbuf)
|
||||
else TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80);
|
||||
else TRYMAP_DEC(gbkext, **outbuf, c, IN2);
|
||||
else return 2;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GB18030 codec
|
||||
*/
|
||||
|
||||
ENCODER(gb18030)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
ucs4_t c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
DECODE_SURROGATE(c)
|
||||
if (c > 0x10FFFF)
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
return 2; /* surrogates pair */
|
||||
#else
|
||||
return 1;
|
||||
#endif
|
||||
else if (c >= 0x10000) {
|
||||
ucs4_t tc = c - 0x10000;
|
||||
|
||||
REQUIRE_OUTBUF(4)
|
||||
|
||||
OUT4((unsigned char)(tc % 10) + 0x30)
|
||||
tc /= 10;
|
||||
OUT3((unsigned char)(tc % 126) + 0x81)
|
||||
tc /= 126;
|
||||
OUT2((unsigned char)(tc % 10) + 0x30)
|
||||
tc /= 10;
|
||||
OUT1((unsigned char)(tc + 0x90))
|
||||
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
NEXT(2, 4) /* surrogates pair */
|
||||
#else
|
||||
NEXT(1, 4)
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
|
||||
GBK_PREENCODE(c, code)
|
||||
else TRYMAP_ENC(gbcommon, code, c);
|
||||
else TRYMAP_ENC(gb18030ext, code, c);
|
||||
else {
|
||||
const struct _gb18030_to_unibmp_ranges *utrrange;
|
||||
|
||||
REQUIRE_OUTBUF(4)
|
||||
|
||||
for (utrrange = gb18030_to_unibmp_ranges;
|
||||
utrrange->first != 0;
|
||||
utrrange++)
|
||||
if (utrrange->first <= c &&
|
||||
c <= utrrange->last) {
|
||||
Py_UNICODE tc;
|
||||
|
||||
tc = c - utrrange->first +
|
||||
utrrange->base;
|
||||
|
||||
OUT4((unsigned char)(tc % 10) + 0x30)
|
||||
tc /= 10;
|
||||
OUT3((unsigned char)(tc % 126) + 0x81)
|
||||
tc /= 126;
|
||||
OUT2((unsigned char)(tc % 10) + 0x30)
|
||||
tc /= 10;
|
||||
OUT1((unsigned char)tc + 0x81)
|
||||
|
||||
NEXT(1, 4)
|
||||
break;
|
||||
}
|
||||
|
||||
if (utrrange->first == 0) {
|
||||
PyErr_SetString(PyExc_RuntimeError,
|
||||
"unicode mapping invalid");
|
||||
return 1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
OUT1((code >> 8) | 0x80)
|
||||
if (code & 0x8000)
|
||||
OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */
|
||||
else
|
||||
OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
|
||||
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(gb18030)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1, c2;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
|
||||
c2 = IN2;
|
||||
if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
|
||||
const struct _gb18030_to_unibmp_ranges *utr;
|
||||
unsigned char c3, c4;
|
||||
ucs4_t lseq;
|
||||
|
||||
REQUIRE_INBUF(4)
|
||||
c3 = IN3;
|
||||
c4 = IN4;
|
||||
if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
|
||||
return 4;
|
||||
c -= 0x81; c2 -= 0x30;
|
||||
c3 -= 0x81; c4 -= 0x30;
|
||||
|
||||
if (c < 4) { /* U+0080 - U+FFFF */
|
||||
lseq = ((ucs4_t)c * 10 + c2) * 1260 +
|
||||
(ucs4_t)c3 * 10 + c4;
|
||||
if (lseq < 39420) {
|
||||
for (utr = gb18030_to_unibmp_ranges;
|
||||
lseq >= (utr + 1)->base;
|
||||
utr++) ;
|
||||
OUT1(utr->first - utr->base + lseq)
|
||||
NEXT(4, 1)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (c >= 15) { /* U+10000 - U+10FFFF */
|
||||
lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2)
|
||||
* 1260 + (ucs4_t)c3 * 10 + c4;
|
||||
if (lseq <= 0x10FFFF) {
|
||||
WRITEUCS4(lseq);
|
||||
NEXT_IN(4)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
|
||||
GBK_PREDECODE(c, c2, **outbuf)
|
||||
else TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, c2 ^ 0x80);
|
||||
else TRYMAP_DEC(gbkext, **outbuf, c, c2);
|
||||
else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
|
||||
else return 2;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* HZ codec
|
||||
*/
|
||||
|
||||
ENCODER_INIT(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER_RESET(hz)
|
||||
{
|
||||
if (state->i != 0) {
|
||||
WRITE2('~', '}')
|
||||
state->i = 0;
|
||||
NEXT_OUT(2)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER(hz)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
if (state->i == 0) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
}
|
||||
else {
|
||||
WRITE3('~', '}', (unsigned char)c)
|
||||
NEXT(1, 3)
|
||||
state->i = 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
UCS4INVALID(c)
|
||||
|
||||
TRYMAP_ENC(gbcommon, code, c);
|
||||
else return 1;
|
||||
|
||||
if (code & 0x8000) /* MSB set: GBK */
|
||||
return 1;
|
||||
|
||||
if (state->i == 0) {
|
||||
WRITE4('~', '{', code >> 8, code & 0xff)
|
||||
NEXT(1, 4)
|
||||
state->i = 1;
|
||||
}
|
||||
else {
|
||||
WRITE2(code >> 8, code & 0xff)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER_INIT(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER_RESET(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(hz)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
if (c == '~') {
|
||||
unsigned char c2 = IN2;
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
if (c2 == '~') {
|
||||
WRITE1('~')
|
||||
NEXT(2, 1)
|
||||
continue;
|
||||
}
|
||||
else if (c2 == '{' && state->i == 0)
|
||||
state->i = 1; /* set GB */
|
||||
else if (c2 == '}' && state->i == 1)
|
||||
state->i = 0; /* set ASCII */
|
||||
else if (c2 == '\n')
|
||||
; /* line-continuation */
|
||||
else
|
||||
return 2;
|
||||
NEXT(2, 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c & 0x80)
|
||||
return 1;
|
||||
|
||||
if (state->i == 0) { /* ASCII mode */
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
}
|
||||
else { /* GB mode */
|
||||
REQUIRE_INBUF(2)
|
||||
REQUIRE_OUTBUF(1)
|
||||
TRYMAP_DEC(gb2312, **outbuf, c, IN2) {
|
||||
NEXT(2, 1)
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
BEGIN_MAPPINGS_LIST
|
||||
MAPPING_DECONLY(gb2312)
|
||||
MAPPING_DECONLY(gbkext)
|
||||
MAPPING_ENCONLY(gbcommon)
|
||||
MAPPING_ENCDEC(gb18030ext)
|
||||
END_MAPPINGS_LIST
|
||||
|
||||
BEGIN_CODECS_LIST
|
||||
CODEC_STATELESS(gb2312)
|
||||
CODEC_STATELESS(gbk)
|
||||
CODEC_STATELESS(gb18030)
|
||||
CODEC_STATEFUL(hz)
|
||||
END_CODECS_LIST
|
||||
|
||||
I_AM_A_MODULE_FOR(cn)
|
|
@ -0,0 +1,144 @@
|
|||
/*
|
||||
* _codecs_hk.c: Codecs collection for encodings from Hong Kong
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _codecs_hk.c,v 1.3 2004/07/07 14:59:26 perky Exp $
|
||||
*/
|
||||
|
||||
#define USING_IMPORTED_MAPS
|
||||
|
||||
#include "cjkcodecs.h"
|
||||
#include "mappings_hk.h"
|
||||
|
||||
/*
|
||||
* BIG5HKSCS codec
|
||||
*/
|
||||
|
||||
static const encode_map *big5_encmap = NULL;
|
||||
static const decode_map *big5_decmap = NULL;
|
||||
|
||||
CODEC_INIT(big5hkscs)
|
||||
{
|
||||
static int initialized = 0;
|
||||
|
||||
if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap))
|
||||
return -1;
|
||||
initialized = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER(big5hkscs)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
ucs4_t c = **inbuf;
|
||||
DBCHAR code;
|
||||
int insize;
|
||||
|
||||
if (c < 0x80) {
|
||||
REQUIRE_OUTBUF(1)
|
||||
**outbuf = (unsigned char)c;
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
DECODE_SURROGATE(c)
|
||||
insize = GET_INSIZE(c);
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
|
||||
if (c < 0x10000) {
|
||||
TRYMAP_ENC(big5hkscs_bmp, code, c);
|
||||
else TRYMAP_ENC(big5, code, c);
|
||||
else return 1;
|
||||
}
|
||||
else if (c < 0x20000)
|
||||
return insize;
|
||||
else if (c < 0x30000) {
|
||||
TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff);
|
||||
else return insize;
|
||||
}
|
||||
else
|
||||
return insize;
|
||||
|
||||
OUT1(code >> 8)
|
||||
OUT2(code & 0xFF)
|
||||
NEXT(insize, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define BH2S(c1, c2) (((c1) - 0x88) * (0xfe - 0x40 + 1) + ((c2) - 0x40))
|
||||
|
||||
DECODER(big5hkscs)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
ucs4_t decoded;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
|
||||
if (0xc6 <= c && c <= 0xc8 && (c >= 0xc7 || IN2 >= 0xa1))
|
||||
goto hkscsdec;
|
||||
|
||||
TRYMAP_DEC(big5, **outbuf, c, IN2) {
|
||||
NEXT(2, 1)
|
||||
}
|
||||
else
|
||||
hkscsdec: TRYMAP_DEC(big5hkscs, decoded, c, IN2) {
|
||||
int s = BH2S(c, IN2);
|
||||
const unsigned char *hintbase;
|
||||
|
||||
assert(0x88 <= c && c <= 0xfe);
|
||||
assert(0x40 <= IN2 && IN2 <= 0xfe);
|
||||
|
||||
if (BH2S(0x88, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
|
||||
hintbase = big5hkscs_phint_0;
|
||||
s -= BH2S(0x88, 0x40);
|
||||
}
|
||||
else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
|
||||
hintbase = big5hkscs_phint_11939;
|
||||
s -= BH2S(0xc6, 0xa1);
|
||||
}
|
||||
else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
|
||||
hintbase = big5hkscs_phint_21733;
|
||||
s -= BH2S(0xf9, 0xd6);
|
||||
}
|
||||
else
|
||||
return MBERR_INTERNAL;
|
||||
|
||||
if (hintbase[s >> 3] & (1 << (s & 7))) {
|
||||
WRITEUCS4(decoded | 0x20000)
|
||||
NEXT_IN(2)
|
||||
}
|
||||
else {
|
||||
OUT1(decoded)
|
||||
NEXT(2, 1)
|
||||
}
|
||||
}
|
||||
else return 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
BEGIN_MAPPINGS_LIST
|
||||
MAPPING_DECONLY(big5hkscs)
|
||||
MAPPING_ENCONLY(big5hkscs_bmp)
|
||||
MAPPING_ENCONLY(big5hkscs_nonbmp)
|
||||
END_MAPPINGS_LIST
|
||||
|
||||
BEGIN_CODECS_LIST
|
||||
CODEC_STATELESS_WINIT(big5hkscs)
|
||||
END_CODECS_LIST
|
||||
|
||||
I_AM_A_MODULE_FOR(hk)
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,731 @@
|
|||
/*
|
||||
* _codecs_jp.c: Codecs collection for Japanese encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _codecs_jp.c,v 1.14 2004/07/07 17:54:47 perky Exp $
|
||||
*/
|
||||
|
||||
#define USING_BINARY_PAIR_SEARCH
|
||||
#define EMPBASE 0x20000
|
||||
|
||||
#include "cjkcodecs.h"
|
||||
#include "mappings_jp.h"
|
||||
#include "mappings_jisx0213_pair.h"
|
||||
#include "alg_jisx0201.h"
|
||||
#include "emu_jisx0213_2000.h"
|
||||
|
||||
/*
|
||||
* CP932 codec
|
||||
*/
|
||||
|
||||
ENCODER(cp932)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
unsigned char c1, c2;
|
||||
|
||||
if (c <= 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
else if (c >= 0xff61 && c <= 0xff9f) {
|
||||
WRITE1(c - 0xfec0)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
else if (c >= 0xf8f0 && c <= 0xf8f3) {
|
||||
/* Windows compatability */
|
||||
REQUIRE_OUTBUF(1)
|
||||
if (c == 0xf8f0)
|
||||
OUT1(0xa0)
|
||||
else
|
||||
OUT1(c - 0xfef1 + 0xfd)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
UCS4INVALID(c)
|
||||
REQUIRE_OUTBUF(2)
|
||||
|
||||
TRYMAP_ENC(cp932ext, code, c) {
|
||||
OUT1(code >> 8)
|
||||
OUT2(code & 0xff)
|
||||
}
|
||||
else TRYMAP_ENC(jisxcommon, code, c) {
|
||||
if (code & 0x8000) /* MSB set: JIS X 0212 */
|
||||
return 1;
|
||||
|
||||
/* JIS X 0208 */
|
||||
c1 = code >> 8;
|
||||
c2 = code & 0xff;
|
||||
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
|
||||
c1 = (c1 - 0x21) >> 1;
|
||||
OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
|
||||
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
|
||||
}
|
||||
else if (c >= 0xe000 && c < 0xe758) {
|
||||
/* User-defined area */
|
||||
c1 = (Py_UNICODE)(c - 0xe000) / 188;
|
||||
c2 = (Py_UNICODE)(c - 0xe000) % 188;
|
||||
OUT1(c1 + 0xf0)
|
||||
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(cp932)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1, c2;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
if (c <= 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
else if (c >= 0xa0 && c <= 0xdf) {
|
||||
if (c == 0xa0)
|
||||
OUT1(0xf8f0) /* half-width katakana */
|
||||
else
|
||||
OUT1(0xfec0 + c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
else if (c >= 0xfd/* && c <= 0xff*/) {
|
||||
/* Windows compatibility */
|
||||
OUT1(0xf8f1 - 0xfd + c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
c2 = IN2;
|
||||
|
||||
TRYMAP_DEC(cp932ext, **outbuf, c, c2);
|
||||
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
|
||||
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
||||
return 2;
|
||||
|
||||
c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
||||
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
||||
c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
|
||||
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
|
||||
|
||||
TRYMAP_DEC(jisx0208, **outbuf, c, c2);
|
||||
else return 2;
|
||||
}
|
||||
else if (c >= 0xf0 && c <= 0xf9) {
|
||||
if ((c2 >= 0x40 && c2 <= 0x7e) ||
|
||||
(c2 >= 0x80 && c2 <= 0xfc))
|
||||
OUT1(0xe000 + 188 * (c - 0xf0) +
|
||||
(c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* EUC-JIS-2004 codec
|
||||
*/
|
||||
|
||||
ENCODER(euc_jis_2004)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
ucs4_t c = IN1;
|
||||
DBCHAR code;
|
||||
int insize;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
DECODE_SURROGATE(c)
|
||||
insize = GET_INSIZE(c);
|
||||
|
||||
if (c <= 0xFFFF) {
|
||||
EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
|
||||
else TRYMAP_ENC(jisx0213_bmp, code, c) {
|
||||
if (code == MULTIC) {
|
||||
if (inleft < 2) {
|
||||
if (flags & MBENC_FLUSH) {
|
||||
code = find_pairencmap(
|
||||
(ucs2_t)c, 0,
|
||||
jisx0213_pair_encmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return MBERR_TOOFEW;
|
||||
}
|
||||
else {
|
||||
code = find_pairencmap(
|
||||
(ucs2_t)c, (*inbuf)[1],
|
||||
jisx0213_pair_encmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV) {
|
||||
code = find_pairencmap(
|
||||
(ucs2_t)c, 0,
|
||||
jisx0213_pair_encmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
} else
|
||||
insize = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
else TRYMAP_ENC(jisxcommon, code, c);
|
||||
else if (c >= 0xff61 && c <= 0xff9f) {
|
||||
/* JIS X 0201 half-width katakana */
|
||||
WRITE2(0x8e, c - 0xfec0)
|
||||
NEXT(1, 2)
|
||||
continue;
|
||||
}
|
||||
else if (c == 0xff3c)
|
||||
/* F/W REVERSE SOLIDUS (see NOTES) */
|
||||
code = 0x2140;
|
||||
else if (c == 0xff5e)
|
||||
/* F/W TILDE (see NOTES) */
|
||||
code = 0x2232;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else if (c >> 16 == EMPBASE >> 16) {
|
||||
EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
|
||||
else TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
|
||||
else return insize;
|
||||
}
|
||||
else
|
||||
return insize;
|
||||
|
||||
if (code & 0x8000) {
|
||||
/* Codeset 2 */
|
||||
WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
|
||||
NEXT(insize, 3)
|
||||
} else {
|
||||
/* Codeset 1 */
|
||||
WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
|
||||
NEXT(insize, 2)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(euc_jis_2004)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
ucs4_t code;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == 0x8e) {
|
||||
/* JIS X 0201 half-width katakana */
|
||||
unsigned char c2;
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
c2 = IN2;
|
||||
if (c2 >= 0xa1 && c2 <= 0xdf) {
|
||||
OUT1(0xfec0 + c2)
|
||||
NEXT(2, 1)
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
else if (c == 0x8f) {
|
||||
unsigned char c2, c3;
|
||||
|
||||
REQUIRE_INBUF(3)
|
||||
c2 = IN2 ^ 0x80;
|
||||
c3 = IN3 ^ 0x80;
|
||||
|
||||
/* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, c2, c3)
|
||||
else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ;
|
||||
else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) {
|
||||
WRITEUCS4(EMPBASE | code)
|
||||
NEXT_IN(3)
|
||||
continue;
|
||||
}
|
||||
else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
|
||||
else return 3;
|
||||
NEXT(3, 1)
|
||||
}
|
||||
else {
|
||||
unsigned char c2;
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
c ^= 0x80;
|
||||
c2 = IN2 ^ 0x80;
|
||||
|
||||
/* JIS X 0213 Plane 1 */
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, c, c2)
|
||||
else if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
|
||||
else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e;
|
||||
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
|
||||
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
|
||||
else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
|
||||
WRITEUCS4(EMPBASE | code)
|
||||
NEXT_IN(2)
|
||||
continue;
|
||||
}
|
||||
else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
|
||||
WRITE2(code >> 16, code & 0xffff)
|
||||
NEXT(2, 2)
|
||||
continue;
|
||||
}
|
||||
else return 2;
|
||||
NEXT(2, 1)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* EUC-JP codec
|
||||
*/
|
||||
|
||||
ENCODER(euc_jp)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
UCS4INVALID(c)
|
||||
|
||||
TRYMAP_ENC(jisxcommon, code, c);
|
||||
else if (c >= 0xff61 && c <= 0xff9f) {
|
||||
/* JIS X 0201 half-width katakana */
|
||||
WRITE2(0x8e, c - 0xfec0)
|
||||
NEXT(1, 2)
|
||||
continue;
|
||||
}
|
||||
#ifndef STRICT_BUILD
|
||||
else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
code = 0x2140;
|
||||
else if (c == 0xa5) { /* YEN SIGN */
|
||||
WRITE1(0x5c);
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
} else if (c == 0x203e) { /* OVERLINE */
|
||||
WRITE1(0x7e);
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
else
|
||||
return 1;
|
||||
|
||||
if (code & 0x8000) {
|
||||
/* JIS X 0212 */
|
||||
WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
|
||||
NEXT(1, 3)
|
||||
} else {
|
||||
/* JIS X 0208 */
|
||||
WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(euc_jp)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == 0x8e) {
|
||||
/* JIS X 0201 half-width katakana */
|
||||
unsigned char c2;
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
c2 = IN2;
|
||||
if (c2 >= 0xa1 && c2 <= 0xdf) {
|
||||
OUT1(0xfec0 + c2)
|
||||
NEXT(2, 1)
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
else if (c == 0x8f) {
|
||||
unsigned char c2, c3;
|
||||
|
||||
REQUIRE_INBUF(3)
|
||||
c2 = IN2;
|
||||
c3 = IN3;
|
||||
/* JIS X 0212 */
|
||||
TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) {
|
||||
NEXT(3, 1)
|
||||
}
|
||||
else
|
||||
return 3;
|
||||
}
|
||||
else {
|
||||
unsigned char c2;
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
c2 = IN2;
|
||||
/* JIS X 0208 */
|
||||
#ifndef STRICT_BUILD
|
||||
if (c == 0xa1 && c2 == 0xc0)
|
||||
/* FULL-WIDTH REVERSE SOLIDUS */
|
||||
**outbuf = 0xff3c;
|
||||
else
|
||||
#endif
|
||||
TRYMAP_DEC(jisx0208, **outbuf,
|
||||
c ^ 0x80, c2 ^ 0x80) ;
|
||||
else return 2;
|
||||
NEXT(2, 1)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SHIFT_JIS codec
|
||||
*/
|
||||
|
||||
ENCODER(shift_jis)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
unsigned char c1, c2;
|
||||
|
||||
#ifdef STRICT_BUILD
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
#else
|
||||
if (c < 0x80) code = c;
|
||||
else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */
|
||||
else if (c == 0x203e) code = 0x7e; /* OVERLINE */
|
||||
#endif
|
||||
else JISX0201_K_ENCODE(c, code)
|
||||
else UCS4INVALID(c)
|
||||
else code = NOCHAR;
|
||||
|
||||
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
OUT1((unsigned char)code)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
|
||||
if (code == NOCHAR) {
|
||||
TRYMAP_ENC(jisxcommon, code, c);
|
||||
#ifndef STRICT_BUILD
|
||||
else if (c == 0xff3c)
|
||||
code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
#endif
|
||||
else
|
||||
return 1;
|
||||
|
||||
if (code & 0x8000) /* MSB set: JIS X 0212 */
|
||||
return 1;
|
||||
}
|
||||
|
||||
c1 = code >> 8;
|
||||
c2 = code & 0xff;
|
||||
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
|
||||
c1 = (c1 - 0x21) >> 1;
|
||||
OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
|
||||
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(shift_jis)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
#ifdef STRICT_BUILD
|
||||
JISX0201_R_DECODE(c, **outbuf)
|
||||
#else
|
||||
if (c < 0x80) **outbuf = c;
|
||||
#endif
|
||||
else JISX0201_K_DECODE(c, **outbuf)
|
||||
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
|
||||
unsigned char c1, c2;
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
c2 = IN2;
|
||||
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
||||
return 2;
|
||||
|
||||
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
||||
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
||||
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21);
|
||||
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
|
||||
|
||||
#ifndef STRICT_BUILD
|
||||
if (c1 == 0x21 && c2 == 0x40) {
|
||||
/* FULL-WIDTH REVERSE SOLIDUS */
|
||||
OUT1(0xff3c)
|
||||
NEXT(2, 1)
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
|
||||
NEXT(2, 1)
|
||||
continue;
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
|
||||
NEXT(1, 1) /* JIS X 0201 */
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SHIFT_JIS-2004 codec
|
||||
*/
|
||||
|
||||
ENCODER(shift_jis_2004)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
ucs4_t c = IN1;
|
||||
DBCHAR code = NOCHAR;
|
||||
int c1, c2;
|
||||
size_t insize;
|
||||
|
||||
JISX0201_ENCODE(c, code)
|
||||
else DECODE_SURROGATE(c)
|
||||
|
||||
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
|
||||
WRITE1((unsigned char)code)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
insize = GET_INSIZE(c);
|
||||
|
||||
if (code == NOCHAR) {
|
||||
if (c <= 0xffff) {
|
||||
EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
|
||||
else TRYMAP_ENC(jisx0213_bmp, code, c) {
|
||||
if (code == MULTIC) {
|
||||
if (inleft < 2) {
|
||||
if (flags & MBENC_FLUSH) {
|
||||
code = find_pairencmap
|
||||
((ucs2_t)c, 0,
|
||||
jisx0213_pair_encmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return MBERR_TOOFEW;
|
||||
}
|
||||
else {
|
||||
code = find_pairencmap(
|
||||
(ucs2_t)c, IN2,
|
||||
jisx0213_pair_encmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV) {
|
||||
code = find_pairencmap(
|
||||
(ucs2_t)c, 0,
|
||||
jisx0213_pair_encmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
insize = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
else TRYMAP_ENC(jisxcommon, code, c) {
|
||||
/* abandon JIS X 0212 codes */
|
||||
if (code & 0x8000)
|
||||
return 1;
|
||||
}
|
||||
else return 1;
|
||||
}
|
||||
else if (c >> 16 == EMPBASE >> 16) {
|
||||
EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
|
||||
else TRYMAP_ENC(jisx0213_emp, code, c&0xffff);
|
||||
else return insize;
|
||||
}
|
||||
else
|
||||
return insize;
|
||||
}
|
||||
|
||||
c1 = code >> 8;
|
||||
c2 = (code & 0xff) - 0x21;
|
||||
|
||||
if (c1 & 0x80) { /* Plane 2 */
|
||||
if (c1 >= 0xee) c1 -= 0x87;
|
||||
else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49;
|
||||
else c1 -= 0x43;
|
||||
}
|
||||
else /* Plane 1 */
|
||||
c1 -= 0x21;
|
||||
|
||||
if (c1 & 1) c2 += 0x5e;
|
||||
c1 >>= 1;
|
||||
OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1))
|
||||
OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41))
|
||||
|
||||
NEXT(insize, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(shift_jis_2004)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
JISX0201_DECODE(c, **outbuf)
|
||||
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
|
||||
unsigned char c1, c2 = IN2;
|
||||
ucs4_t code;
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
||||
return 2;
|
||||
|
||||
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
||||
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
||||
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1));
|
||||
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
|
||||
|
||||
if (c1 < 0x5e) { /* Plane 1 */
|
||||
c1 += 0x21;
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf,
|
||||
c1, c2)
|
||||
else TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
|
||||
NEXT_OUT(1)
|
||||
}
|
||||
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf,
|
||||
c1, c2) {
|
||||
NEXT_OUT(1)
|
||||
}
|
||||
else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) {
|
||||
WRITEUCS4(EMPBASE | code)
|
||||
}
|
||||
else TRYMAP_DEC(jisx0213_pair, code, c1, c2) {
|
||||
WRITE2(code >> 16, code & 0xffff)
|
||||
NEXT_OUT(2)
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
NEXT_IN(2)
|
||||
}
|
||||
else { /* Plane 2 */
|
||||
if (c1 >= 0x67) c1 += 0x07;
|
||||
else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;
|
||||
else c1 -= 0x3d;
|
||||
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf,
|
||||
c1, c2)
|
||||
else TRYMAP_DEC(jisx0213_2_bmp, **outbuf,
|
||||
c1, c2) ;
|
||||
else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) {
|
||||
WRITEUCS4(EMPBASE | code)
|
||||
NEXT_IN(2)
|
||||
continue;
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
NEXT(2, 1)
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
|
||||
NEXT(1, 1) /* JIS X 0201 */
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
BEGIN_MAPPINGS_LIST
|
||||
MAPPING_DECONLY(jisx0208)
|
||||
MAPPING_DECONLY(jisx0212)
|
||||
MAPPING_ENCONLY(jisxcommon)
|
||||
MAPPING_DECONLY(jisx0213_1_bmp)
|
||||
MAPPING_DECONLY(jisx0213_2_bmp)
|
||||
MAPPING_ENCONLY(jisx0213_bmp)
|
||||
MAPPING_DECONLY(jisx0213_1_emp)
|
||||
MAPPING_DECONLY(jisx0213_2_emp)
|
||||
MAPPING_ENCONLY(jisx0213_emp)
|
||||
MAPPING_ENCDEC(jisx0213_pair)
|
||||
MAPPING_ENCDEC(cp932ext)
|
||||
END_MAPPINGS_LIST
|
||||
|
||||
BEGIN_CODECS_LIST
|
||||
CODEC_STATELESS(shift_jis)
|
||||
CODEC_STATELESS(cp932)
|
||||
CODEC_STATELESS(euc_jp)
|
||||
CODEC_STATELESS(shift_jis_2004)
|
||||
CODEC_STATELESS(euc_jis_2004)
|
||||
{ "euc_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(euc_jis_2004) },
|
||||
{ "shift_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(shift_jis_2004) },
|
||||
END_CODECS_LIST
|
||||
|
||||
I_AM_A_MODULE_FOR(jp)
|
|
@ -0,0 +1,356 @@
|
|||
/*
|
||||
* _codecs_kr.c: Codecs collection for Korean encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _codecs_kr.c,v 1.8 2004/07/07 14:59:26 perky Exp $
|
||||
*/
|
||||
|
||||
#include "cjkcodecs.h"
|
||||
#include "mappings_kr.h"
|
||||
|
||||
/*
|
||||
* EUC-KR codec
|
||||
*/
|
||||
|
||||
ENCODER(euc_kr)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
TRYMAP_ENC(cp949, code, c);
|
||||
else return 1;
|
||||
|
||||
if (code & 0x8000) /* MSB set: CP949 */
|
||||
return 1;
|
||||
|
||||
OUT1((code >> 8) | 0x80)
|
||||
OUT2((code & 0xFF) | 0x80)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(euc_kr)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
|
||||
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
|
||||
NEXT(2, 1)
|
||||
} else return 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CP949 codec
|
||||
*/
|
||||
|
||||
ENCODER(cp949)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
TRYMAP_ENC(cp949, code, c);
|
||||
else return 1;
|
||||
|
||||
OUT1((code >> 8) | 0x80)
|
||||
if (code & 0x8000)
|
||||
OUT2(code & 0xFF) /* MSB set: CP949 */
|
||||
else
|
||||
OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(cp949)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
|
||||
else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
|
||||
else return 2;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* JOHAB codec
|
||||
*/
|
||||
|
||||
static const unsigned char u2johabidx_choseong[32] = {
|
||||
0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14,
|
||||
};
|
||||
static const unsigned char u2johabidx_jungseong[32] = {
|
||||
0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x1a, 0x1b, 0x1c, 0x1d,
|
||||
};
|
||||
static const unsigned char u2johabidx_jongseong[32] = {
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
|
||||
};
|
||||
static const DBCHAR u2johabjamo[] = {
|
||||
0x8841, 0x8c41, 0x8444, 0x9041, 0x8446, 0x8447, 0x9441,
|
||||
0x9841, 0x9c41, 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f,
|
||||
0x8450, 0xa041, 0xa441, 0xa841, 0x8454, 0xac41, 0xb041, 0xb441,
|
||||
0xb841, 0xbc41, 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041, 0x8461,
|
||||
0x8481, 0x84a1, 0x84c1, 0x84e1, 0x8541, 0x8561, 0x8581, 0x85a1,
|
||||
0x85c1, 0x85e1, 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
|
||||
0x8741, 0x8761, 0x8781, 0x87a1,
|
||||
};
|
||||
|
||||
ENCODER(johab)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
|
||||
if (c >= 0xac00 && c <= 0xd7a3) {
|
||||
c -= 0xac00;
|
||||
code = 0x8000 |
|
||||
(u2johabidx_choseong[c / 588] << 10) |
|
||||
(u2johabidx_jungseong[(c / 28) % 21] << 5) |
|
||||
u2johabidx_jongseong[c % 28];
|
||||
}
|
||||
else if (c >= 0x3131 && c <= 0x3163)
|
||||
code = u2johabjamo[c - 0x3131];
|
||||
else TRYMAP_ENC(cp949, code, c) {
|
||||
unsigned char c1, c2, t2;
|
||||
unsigned short t1;
|
||||
|
||||
assert((code & 0x8000) == 0);
|
||||
c1 = code >> 8;
|
||||
c2 = code & 0xff;
|
||||
if (((c1 >= 0x21 && c1 <= 0x2c) ||
|
||||
(c1 >= 0x4a && c1 <= 0x7d)) &&
|
||||
(c2 >= 0x21 && c2 <= 0x7e)) {
|
||||
t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) :
|
||||
(c1 - 0x21 + 0x197));
|
||||
t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
|
||||
OUT1(t1 >> 1)
|
||||
OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)
|
||||
NEXT(1, 2)
|
||||
continue;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
|
||||
OUT1(code >> 8)
|
||||
OUT2(code & 0xff)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define FILL 0xfd
|
||||
#define NONE 0xff
|
||||
|
||||
static const unsigned char johabidx_choseong[32] = {
|
||||
NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
|
||||
0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
|
||||
0x0e, 0x0f, 0x10, 0x11, 0x12, NONE, NONE, NONE,
|
||||
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||||
};
|
||||
static const unsigned char johabidx_jungseong[32] = {
|
||||
NONE, NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04,
|
||||
NONE, NONE, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
|
||||
NONE, NONE, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
|
||||
NONE, NONE, 0x11, 0x12, 0x13, 0x14, NONE, NONE,
|
||||
};
|
||||
static const unsigned char johabidx_jongseong[32] = {
|
||||
NONE, FILL, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
|
||||
0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
|
||||
0x0f, 0x10, NONE, 0x11, 0x12, 0x13, 0x14, 0x15,
|
||||
0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, NONE, NONE,
|
||||
};
|
||||
|
||||
static const unsigned char johabjamo_choseong[32] = {
|
||||
NONE, FILL, 0x31, 0x32, 0x34, 0x37, 0x38, 0x39,
|
||||
0x41, 0x42, 0x43, 0x45, 0x46, 0x47, 0x48, 0x49,
|
||||
0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE, NONE,
|
||||
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||||
};
|
||||
static const unsigned char johabjamo_jungseong[32] = {
|
||||
NONE, NONE, FILL, 0x4f, 0x50, 0x51, 0x52, 0x53,
|
||||
NONE, NONE, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
|
||||
NONE, NONE, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
|
||||
NONE, NONE, 0x60, 0x61, 0x62, 0x63, NONE, NONE,
|
||||
};
|
||||
static const unsigned char johabjamo_jongseong[32] = {
|
||||
NONE, FILL, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
|
||||
0x37, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
|
||||
0x40, 0x41, NONE, 0x42, 0x44, 0x45, 0x46, 0x47,
|
||||
0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE,
|
||||
};
|
||||
|
||||
DECODER(johab)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1, c2;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
c2 = IN2;
|
||||
|
||||
if (c < 0xd8) {
|
||||
/* johab hangul */
|
||||
unsigned char c_cho, c_jung, c_jong;
|
||||
unsigned char i_cho, i_jung, i_jong;
|
||||
|
||||
c_cho = (c >> 2) & 0x1f;
|
||||
c_jung = ((c << 3) | c2 >> 5) & 0x1f;
|
||||
c_jong = c2 & 0x1f;
|
||||
|
||||
i_cho = johabidx_choseong[c_cho];
|
||||
i_jung = johabidx_jungseong[c_jung];
|
||||
i_jong = johabidx_jongseong[c_jong];
|
||||
|
||||
if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
|
||||
return 2;
|
||||
|
||||
/* we don't use U+1100 hangul jamo yet. */
|
||||
if (i_cho == FILL) {
|
||||
if (i_jung == FILL) {
|
||||
if (i_jong == FILL)
|
||||
OUT1(0x3000)
|
||||
else
|
||||
OUT1(0x3100 |
|
||||
johabjamo_jongseong[c_jong])
|
||||
}
|
||||
else {
|
||||
if (i_jong == FILL)
|
||||
OUT1(0x3100 |
|
||||
johabjamo_jungseong[c_jung])
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
} else {
|
||||
if (i_jung == FILL) {
|
||||
if (i_jong == FILL)
|
||||
OUT1(0x3100 |
|
||||
johabjamo_choseong[c_cho])
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
OUT1(0xac00 +
|
||||
i_cho * 588 +
|
||||
i_jung * 28 +
|
||||
(i_jong == FILL ? 0 : i_jong))
|
||||
}
|
||||
NEXT(2, 1)
|
||||
} else {
|
||||
/* KS X 1001 except hangul jamos and syllables */
|
||||
if (c == 0xdf || c > 0xf9 ||
|
||||
c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
|
||||
(c2 & 0x7f) == 0x7f ||
|
||||
(c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
|
||||
return 2;
|
||||
else {
|
||||
unsigned char t1, t2;
|
||||
|
||||
t1 = (c < 0xe0 ? 2 * (c - 0xd9) :
|
||||
2 * c - 0x197);
|
||||
t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43);
|
||||
t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
|
||||
t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
|
||||
|
||||
TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
|
||||
else return 2;
|
||||
NEXT(2, 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#undef NONE
|
||||
#undef FILL
|
||||
|
||||
|
||||
BEGIN_MAPPINGS_LIST
|
||||
MAPPING_DECONLY(ksx1001)
|
||||
MAPPING_ENCONLY(cp949)
|
||||
MAPPING_DECONLY(cp949ext)
|
||||
END_MAPPINGS_LIST
|
||||
|
||||
BEGIN_CODECS_LIST
|
||||
CODEC_STATELESS(euc_kr)
|
||||
CODEC_STATELESS(cp949)
|
||||
CODEC_STATELESS(johab)
|
||||
END_CODECS_LIST
|
||||
|
||||
I_AM_A_MODULE_FOR(kr)
|
|
@ -0,0 +1,133 @@
|
|||
/*
|
||||
* _codecs_tw.c: Codecs collection for Taiwan's encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _codecs_tw.c,v 1.10 2004/07/07 14:59:26 perky Exp $
|
||||
*/
|
||||
|
||||
#include "cjkcodecs.h"
|
||||
#include "mappings_tw.h"
|
||||
|
||||
/*
|
||||
* BIG5 codec
|
||||
*/
|
||||
|
||||
ENCODER(big5)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = **inbuf;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
REQUIRE_OUTBUF(1)
|
||||
**outbuf = (unsigned char)c;
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
|
||||
TRYMAP_ENC(big5, code, c);
|
||||
else return 1;
|
||||
|
||||
OUT1(code >> 8)
|
||||
OUT2(code & 0xFF)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(big5)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
TRYMAP_DEC(big5, **outbuf, c, IN2) {
|
||||
NEXT(2, 1)
|
||||
}
|
||||
else return 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CP950 codec
|
||||
*/
|
||||
|
||||
ENCODER(cp950)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
TRYMAP_ENC(cp950ext, code, c);
|
||||
else TRYMAP_ENC(big5, code, c);
|
||||
else return 1;
|
||||
|
||||
OUT1(code >> 8)
|
||||
OUT2(code & 0xFF)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(cp950)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
|
||||
TRYMAP_DEC(cp950ext, **outbuf, c, IN2);
|
||||
else TRYMAP_DEC(big5, **outbuf, c, IN2);
|
||||
else return 2;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
BEGIN_MAPPINGS_LIST
|
||||
MAPPING_ENCDEC(big5)
|
||||
MAPPING_ENCDEC(cp950ext)
|
||||
END_MAPPINGS_LIST
|
||||
|
||||
BEGIN_CODECS_LIST
|
||||
CODEC_STATELESS(big5)
|
||||
CODEC_STATELESS(cp950)
|
||||
END_CODECS_LIST
|
||||
|
||||
I_AM_A_MODULE_FOR(tw)
|
|
@ -0,0 +1,560 @@
|
|||
/*
|
||||
* _codecs_unicode.c: Codecs collection for Unicode encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _codecs_unicode.c,v 1.5 2004/06/27 21:41:15 perky Exp $
|
||||
*/
|
||||
|
||||
#include "cjkcodecs.h"
|
||||
|
||||
/*
|
||||
* UTF-7 codec
|
||||
*/
|
||||
|
||||
#define SET_DIRECT 1
|
||||
#define SET_OPTIONAL 2
|
||||
#define SET_WHITESPACE 3
|
||||
|
||||
#define _D SET_DIRECT
|
||||
#define _O SET_OPTIONAL
|
||||
#define _W SET_WHITESPACE
|
||||
static const char utf7_sets[128] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, _W, _W, 0, 0, _W, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
_W, _O, _O, _O, _O, _O, _O, _D, _D, _D, _O, 0, _D, _D, _D, 0,
|
||||
_D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _O, _O, _O, _O, _D,
|
||||
_O, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D,
|
||||
_D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _O, 0, _O, _O, _O,
|
||||
_O, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D,
|
||||
_D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _O, _O, _O, 0, 0,
|
||||
};
|
||||
#undef _W
|
||||
#undef _O
|
||||
#undef _D
|
||||
|
||||
#define B64(n) ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" \
|
||||
"0123456789+/"[(n) & 0x3f])
|
||||
#define B64CHAR(c) (((c) >= 'A' && (c) <= 'Z') || \
|
||||
((c) >= 'a' && (c) <= 'z') || \
|
||||
((c) >= '0' && (c) <= '9') || \
|
||||
(c) == '+' || (c) == '/')
|
||||
#define UB64(c) ((c) == '+' ? 62 : (c) == '/' ? 63 : (c) >= 'a' ? \
|
||||
(c) - 71 : (c) >= 'A' ? (c) - 65 : (c) + 4)
|
||||
|
||||
#define UTF7_DENCODABLE_COMPATIBLE(c) (utf7_sets[c] != 0)
|
||||
#define UTF7_DENCODABLE_STRICT(c) (utf7_sets[c] == SET_DIRECT || \
|
||||
utf7_sets[c] == SET_WHITESPACE)
|
||||
|
||||
#define ESTATE_INITIALIZE(state) \
|
||||
ESTATE_SETSTAGE(state, 0) \
|
||||
ESTATE_CLEARSHIFTED(state)
|
||||
|
||||
#define ESTATE_SETPENDING(state, v) (state)->c[0] = (v);
|
||||
#define ESTATE_GETPENDING(state) (state)->c[0]
|
||||
|
||||
#define ESTATE_SETSHIFTED(state) (state)->c[2] = 1;
|
||||
#define ESTATE_ISSHIFTED(state) ((state)->c[2])
|
||||
#define ESTATE_CLEARSHIFTED(state) (state)->c[2] = 0;
|
||||
|
||||
#define ESTATE_SETSTAGE(state, v) (state)->c[3] = (v);
|
||||
#define ESTATE_GETSTAGE(state) ((state)->c[3])
|
||||
|
||||
ENCODER_INIT(utf_7)
|
||||
{
|
||||
ESTATE_INITIALIZE(state)
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER_RESET(utf_7)
|
||||
{
|
||||
if (ESTATE_ISSHIFTED(state)) {
|
||||
if (ESTATE_GETSTAGE(state) != 0) {
|
||||
unsigned char oc;
|
||||
|
||||
oc = B64(ESTATE_GETPENDING(state));
|
||||
WRITE2(oc, '-')
|
||||
NEXT_OUT(2)
|
||||
}
|
||||
else {
|
||||
WRITE1('-')
|
||||
NEXT_OUT(1)
|
||||
}
|
||||
ESTATE_CLEARSHIFTED(state)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER(utf_7)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c1 = IN1, c2 = 0;
|
||||
size_t insize = 1;
|
||||
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
if (c1 >> 10 == 0xd800 >> 10) { /* high surrogate */
|
||||
REQUIRE_INBUF(2)
|
||||
if (IN2 >> 10 != 0xdc00 >> 10) /* low surrogate */
|
||||
return 2; /* invalid surrogate pair */
|
||||
c2 = IN2;
|
||||
insize = 2;
|
||||
}
|
||||
#else
|
||||
if (c1 > 0x10ffff) /* UTF-16 unencodable */
|
||||
return 1;
|
||||
else if (c1 > 0xffff) {
|
||||
c2 = 0xdc00 | ((c1 - 0x10000) & 0x3ff);
|
||||
c1 = 0xd800 | ((c1 - 0x10000) >> 10);
|
||||
}
|
||||
#endif
|
||||
|
||||
for (;;) {
|
||||
unsigned char oc1, oc2, oc3;
|
||||
|
||||
if (ESTATE_ISSHIFTED(state)) {
|
||||
if (c1 < 128 && UTF7_DENCODABLE_STRICT(c1)) {
|
||||
if (ESTATE_GETSTAGE(state) != 0) {
|
||||
oc1 = B64(ESTATE_GETPENDING(
|
||||
state));
|
||||
WRITE3(oc1, '-',
|
||||
(unsigned char)c1)
|
||||
NEXT_OUT(3)
|
||||
} else {
|
||||
WRITE2('-',
|
||||
(unsigned char)c1)
|
||||
NEXT_OUT(2)
|
||||
}
|
||||
ESTATE_CLEARSHIFTED(state)
|
||||
} else {
|
||||
switch (ESTATE_GETSTAGE(state)) {
|
||||
case 0:
|
||||
oc1 = c1 >> 10;
|
||||
oc2 = (c1 >> 4) & 0x3f;
|
||||
WRITE2(B64(oc1), B64(oc2))
|
||||
ESTATE_SETPENDING(state,
|
||||
(c1 & 0x0f) << 2)
|
||||
ESTATE_SETSTAGE(state, 2)
|
||||
NEXT_OUT(2)
|
||||
break;
|
||||
case 1:
|
||||
oc1 = ESTATE_GETPENDING(state)
|
||||
| (c1 >> 12);
|
||||
oc2 = (c1 >> 6) & 0x3f;
|
||||
oc3 = c1 & 0x3f;
|
||||
WRITE3(B64(oc1), B64(oc2),
|
||||
B64(oc3))
|
||||
ESTATE_SETSTAGE(state, 0)
|
||||
NEXT_OUT(3)
|
||||
break;
|
||||
case 2:
|
||||
oc1 = ESTATE_GETPENDING(state)
|
||||
| (c1 >> 14);
|
||||
oc2 = (c1 >> 8) & 0x3f;
|
||||
oc3 = (c1 >> 2) & 0x3f;
|
||||
WRITE3(B64(oc1), B64(oc2),
|
||||
B64(oc3))
|
||||
ESTATE_SETPENDING(state,
|
||||
(c1 & 0x03) << 4)
|
||||
ESTATE_SETSTAGE(state, 1)
|
||||
NEXT_OUT(3)
|
||||
break;
|
||||
default:
|
||||
return MBERR_INTERNAL;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (c1 < 128 && UTF7_DENCODABLE_STRICT(c1)) {
|
||||
WRITE1((unsigned char)c1)
|
||||
NEXT_OUT(1)
|
||||
}
|
||||
else if (c1 == '+') {
|
||||
WRITE2('+', '-')
|
||||
NEXT_OUT(2)
|
||||
}
|
||||
else {
|
||||
oc1 = c1 >> 10;
|
||||
oc2 = (c1 >> 4) & 0x3f;
|
||||
WRITE3('+', B64(oc1), B64(oc2))
|
||||
ESTATE_SETPENDING(state,
|
||||
(c1 & 0x0f) << 2)
|
||||
ESTATE_SETSTAGE(state, 2)
|
||||
ESTATE_SETSHIFTED(state)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
}
|
||||
|
||||
if (c2 != 0) {
|
||||
c1 = c2;
|
||||
c2 = 0;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
NEXT_IN(insize)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define DSTATE_INITIALIZE(state) \
|
||||
DSTATE_SETBSTAGE(state, 0) \
|
||||
DSTATE_CLEARSHIFTED(state) \
|
||||
DSTATE_SETULENGTH(state, 0) \
|
||||
DSTATE_SETUPENDING1(state, 0) \
|
||||
DSTATE_SETUPENDING2(state, 0)
|
||||
|
||||
/* XXX: Type-mixed usage of a state union may be not so portable.
|
||||
* If you see any problem with this on your platfom. Please let
|
||||
* me know. */
|
||||
|
||||
#define DSTATE_SETSHIFTED(state) (state)->c[0] = 1;
|
||||
#define DSTATE_ISSHIFTED(state) ((state)->c[0])
|
||||
#define DSTATE_CLEARSHIFTED(state) (state)->c[0] = 0;
|
||||
|
||||
#define DSTATE_SETBSTAGE(state, v) (state)->c[1] = (v);
|
||||
#define DSTATE_GETBSTAGE(state) ((state)->c[1])
|
||||
|
||||
#define DSTATE_SETBPENDING(state, v) (state)->c[2] = (v);
|
||||
#define DSTATE_GETBPENDING(state) ((state)->c[2])
|
||||
|
||||
#define DSTATE_SETULENGTH(state, v) (state)->c[3] = (v);
|
||||
#define DSTATE_GETULENGTH(state) ((state)->c[3])
|
||||
|
||||
#define DSTATE_SETUPENDING1(state, v) (state)->u2[2] = (v);
|
||||
#define DSTATE_GETUPENDING1(state) (state)->u2[2]
|
||||
|
||||
#define DSTATE_SETUPENDING2(state, v) (state)->u2[3] = (v);
|
||||
#define DSTATE_GETUPENDING2(state) (state)->u2[3]
|
||||
|
||||
#define DSTATE_UAPPEND(state, v) \
|
||||
(state)->u2[(state)->c[3] > 1 ? 3 : 2] |= \
|
||||
((state)->c[3] & 1) ? (v) : ((ucs2_t)(v)) << 8; \
|
||||
(state)->c[3]++;
|
||||
|
||||
DECODER_INIT(utf_7)
|
||||
{
|
||||
DSTATE_INITIALIZE(state)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
utf_7_flush(MultibyteCodec_State *state,
|
||||
Py_UNICODE **outbuf, size_t *outleft)
|
||||
{
|
||||
switch (DSTATE_GETULENGTH(state)) {
|
||||
case 2: {
|
||||
ucs2_t uc;
|
||||
|
||||
uc = DSTATE_GETUPENDING1(state);
|
||||
#if Py_UNICODE_SIZE == 4
|
||||
if (uc >> 10 == 0xd800 >> 10)
|
||||
return MBERR_TOOFEW;
|
||||
#endif
|
||||
OUT1(uc)
|
||||
(*outbuf)++;
|
||||
(*outleft)--;
|
||||
DSTATE_SETULENGTH(state, 0)
|
||||
DSTATE_SETUPENDING1(state, 0)
|
||||
break;
|
||||
}
|
||||
#if Py_UNICODE_SIZE == 4
|
||||
case 4:
|
||||
if (DSTATE_GETUPENDING2(state) >> 10 != 0xdc00 >> 10)
|
||||
return 1;
|
||||
OUT1(0x10000 + (((ucs4_t)DSTATE_GETUPENDING1(state) - 0xd800)
|
||||
<< 10) + (DSTATE_GETUPENDING2(state) - 0xdc00))
|
||||
(*outbuf)++;
|
||||
(*outleft)--;
|
||||
DSTATE_SETULENGTH(state, 0)
|
||||
DSTATE_SETUPENDING1(state, 0)
|
||||
DSTATE_SETUPENDING2(state, 0)
|
||||
break;
|
||||
#endif
|
||||
case 0: /* FALLTHROUGH */
|
||||
case 1: /* FALLTHROUGH */
|
||||
case 3:
|
||||
return MBERR_TOOFEW;
|
||||
default:
|
||||
return MBERR_INTERNAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER_RESET(utf_7)
|
||||
{
|
||||
DSTATE_INITIALIZE(state)
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(utf_7)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
int r;
|
||||
|
||||
if (!DSTATE_ISSHIFTED(state)) {
|
||||
if (c == '+') {
|
||||
REQUIRE_INBUF(2)
|
||||
if (inleft >= 2 && IN2 == '-') {
|
||||
WRITE1('+')
|
||||
NEXT(2, 1)
|
||||
}
|
||||
else {
|
||||
DSTATE_SETSHIFTED(state)
|
||||
NEXT_IN(1)
|
||||
}
|
||||
}
|
||||
else if (c < 128 && UTF7_DENCODABLE_COMPATIBLE(c)) {
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else if (B64CHAR(c)) {
|
||||
unsigned char tb;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
c = UB64(c);
|
||||
assert(DSTATE_GETULENGTH(state) < 4);
|
||||
|
||||
switch (DSTATE_GETBSTAGE(state)) {
|
||||
case 0:
|
||||
DSTATE_SETBPENDING(state, c << 2)
|
||||
DSTATE_SETBSTAGE(state, 1)
|
||||
break;
|
||||
case 1:
|
||||
tb = DSTATE_GETBPENDING(state) | (c >> 4);
|
||||
DSTATE_SETBPENDING(state, c << 4)
|
||||
DSTATE_SETBSTAGE(state, 2)
|
||||
DSTATE_UAPPEND(state, tb)
|
||||
break;
|
||||
case 2:
|
||||
tb = DSTATE_GETBPENDING(state) | (c >> 2);
|
||||
DSTATE_SETBPENDING(state, c << 6)
|
||||
DSTATE_SETBSTAGE(state, 3)
|
||||
DSTATE_UAPPEND(state, tb)
|
||||
break;
|
||||
case 3:
|
||||
tb = DSTATE_GETBPENDING(state) | c;
|
||||
DSTATE_SETBSTAGE(state, 0)
|
||||
DSTATE_UAPPEND(state, tb)
|
||||
break;
|
||||
}
|
||||
|
||||
r = utf_7_flush(state, outbuf, &outleft);
|
||||
if (r != 0 && r != MBERR_TOOFEW)
|
||||
return r;
|
||||
NEXT_IN(1)
|
||||
}
|
||||
else if (c == '-' || UTF7_DENCODABLE_COMPATIBLE(c)) {
|
||||
if (DSTATE_GETBSTAGE(state) != 0) {
|
||||
DSTATE_UAPPEND(state, DSTATE_GETBSTAGE(state))
|
||||
DSTATE_SETBSTAGE(state, 0)
|
||||
}
|
||||
r = utf_7_flush(state, outbuf, &outleft);
|
||||
if (r != 0 && r != MBERR_TOOFEW)
|
||||
return r;
|
||||
DSTATE_CLEARSHIFTED(state)
|
||||
|
||||
if (c != '-') {
|
||||
WRITE1(c)
|
||||
NEXT_OUT(1)
|
||||
}
|
||||
NEXT_IN(1)
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* UTF-8 codec
|
||||
*/
|
||||
|
||||
ENCODER(utf_8)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
ucs4_t c = **inbuf;
|
||||
size_t outsize, insize = 1;
|
||||
|
||||
if (c < 0x80) outsize = 1;
|
||||
else if (c < 0x800) outsize = 2;
|
||||
else {
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
if (c >> 10 == 0xd800 >> 10) { /* high surrogate */
|
||||
if (inleft < 2) {
|
||||
if (!(flags & MBENC_FLUSH))
|
||||
return MBERR_TOOFEW;
|
||||
}
|
||||
else if ((*inbuf)[1] >> 10 == 0xdc00 >> 10) {
|
||||
/* low surrogate */
|
||||
c = 0x10000 + ((c - 0xd800) << 10) +
|
||||
((ucs4_t)((*inbuf)[1]) - 0xdc00);
|
||||
insize = 2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (c < 0x10000) outsize = 3;
|
||||
else if (c < 0x200000) outsize = 4;
|
||||
else if (c < 0x4000000) outsize = 5;
|
||||
else outsize = 6;
|
||||
}
|
||||
|
||||
REQUIRE_OUTBUF(outsize)
|
||||
|
||||
switch (outsize) {
|
||||
case 6:
|
||||
(*outbuf)[5] = 0x80 | (c & 0x3f);
|
||||
c = c >> 6;
|
||||
c |= 0x4000000;
|
||||
/* FALLTHROUGH */
|
||||
case 5:
|
||||
(*outbuf)[4] = 0x80 | (c & 0x3f);
|
||||
c = c >> 6;
|
||||
c |= 0x200000;
|
||||
/* FALLTHROUGH */
|
||||
case 4:
|
||||
(*outbuf)[3] = 0x80 | (c & 0x3f);
|
||||
c = c >> 6;
|
||||
c |= 0x10000;
|
||||
/* FALLTHROUGH */
|
||||
case 3:
|
||||
(*outbuf)[2] = 0x80 | (c & 0x3f);
|
||||
c = c >> 6;
|
||||
c |= 0x800;
|
||||
/* FALLTHROUGH */
|
||||
case 2:
|
||||
(*outbuf)[1] = 0x80 | (c & 0x3f);
|
||||
c = c >> 6;
|
||||
c |= 0xc0;
|
||||
/* FALLTHROUGH */
|
||||
case 1:
|
||||
(*outbuf)[0] = c;
|
||||
}
|
||||
|
||||
NEXT(insize, outsize)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(utf_8)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = **inbuf;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
(*outbuf)[0] = (unsigned char)c;
|
||||
NEXT(1, 1)
|
||||
}
|
||||
else if (c < 0xc2) {
|
||||
return 1;
|
||||
}
|
||||
else if (c < 0xe0) {
|
||||
unsigned char c2;
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
c2 = (*inbuf)[1];
|
||||
if (!((c2 ^ 0x80) < 0x40))
|
||||
return 2;
|
||||
**outbuf = ((Py_UNICODE)(c & 0x1f) << 6) |
|
||||
(Py_UNICODE)(c2 ^ 0x80);
|
||||
NEXT(2, 1)
|
||||
}
|
||||
else if (c < 0xf0) {
|
||||
unsigned char c2, c3;
|
||||
|
||||
REQUIRE_INBUF(3)
|
||||
c2 = (*inbuf)[1]; c3 = (*inbuf)[2];
|
||||
if (!((c2 ^ 0x80) < 0x40 &&
|
||||
(c3 ^ 0x80) < 0x40 && (c >= 0xe1 || c2 >= 0xa0)))
|
||||
return 3;
|
||||
**outbuf = ((Py_UNICODE)(c & 0x0f) << 12)
|
||||
| ((Py_UNICODE)(c2 ^ 0x80) << 6)
|
||||
| (Py_UNICODE)(c3 ^ 0x80);
|
||||
NEXT(3, 1)
|
||||
}
|
||||
else if (c < 0xf8) {
|
||||
unsigned char c2, c3, c4;
|
||||
ucs4_t code;
|
||||
|
||||
REQUIRE_INBUF(4)
|
||||
c2 = (*inbuf)[1]; c3 = (*inbuf)[2];
|
||||
c4 = (*inbuf)[3];
|
||||
if (!((c2 ^ 0x80) < 0x40 &&
|
||||
(c3 ^ 0x80) < 0x40 && (c4 ^ 0x80) < 0x40 &&
|
||||
(c >= 0xf1 || c2 >= 0x90)))
|
||||
return 4;
|
||||
code = ((ucs4_t)(c & 0x07) << 18)
|
||||
| ((ucs4_t)(c2 ^ 0x80) << 12)
|
||||
| ((ucs4_t)(c3 ^ 0x80) << 6)
|
||||
| (ucs4_t)(c4 ^ 0x80);
|
||||
WRITEUCS4(code)
|
||||
NEXT_IN(4)
|
||||
}
|
||||
else if (c < 0xfc) {
|
||||
unsigned char c2, c3, c4, c5;
|
||||
ucs4_t code;
|
||||
|
||||
REQUIRE_INBUF(5)
|
||||
c2 = (*inbuf)[1]; c3 = (*inbuf)[2];
|
||||
c4 = (*inbuf)[3]; c5 = (*inbuf)[4];
|
||||
if (!((c2 ^ 0x80) < 0x40 &&
|
||||
(c3 ^ 0x80) < 0x40 && (c4 ^ 0x80) < 0x40 &&
|
||||
(c5 ^ 0x80) < 0x40 && (c >= 0xf9 || c2 >= 0x88)))
|
||||
return 5;
|
||||
code = ((ucs4_t)(c & 0x03) << 24)
|
||||
| ((ucs4_t)(c2 ^ 0x80) << 18)
|
||||
| ((ucs4_t)(c3 ^ 0x80) << 12)
|
||||
| ((ucs4_t)(c4 ^ 0x80) << 6)
|
||||
| (ucs4_t)(c5 ^ 0x80);
|
||||
WRITEUCS4(code)
|
||||
NEXT_IN(5)
|
||||
}
|
||||
else if (c < 0xff) {
|
||||
unsigned char c2, c3, c4, c5, c6;
|
||||
ucs4_t code;
|
||||
|
||||
REQUIRE_INBUF(6)
|
||||
c2 = (*inbuf)[1]; c3 = (*inbuf)[2];
|
||||
c4 = (*inbuf)[3]; c5 = (*inbuf)[4];
|
||||
c6 = (*inbuf)[5];
|
||||
if (!((c2 ^ 0x80) < 0x40 &&
|
||||
(c3 ^ 0x80) < 0x40 && (c4 ^ 0x80) < 0x40 &&
|
||||
(c5 ^ 0x80) < 0x40 && (c6 ^ 0x80) < 0x40 &&
|
||||
(c >= 0xfd || c2 >= 0x84)))
|
||||
return 6;
|
||||
code = ((ucs4_t)(c & 0x01) << 30)
|
||||
| ((ucs4_t)(c2 ^ 0x80) << 24)
|
||||
| ((ucs4_t)(c3 ^ 0x80) << 18)
|
||||
| ((ucs4_t)(c4 ^ 0x80) << 12)
|
||||
| ((ucs4_t)(c5 ^ 0x80) << 6)
|
||||
| (ucs4_t)(c6 ^ 0x80);
|
||||
WRITEUCS4(code)
|
||||
NEXT_IN(6)
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
BEGIN_MAPPINGS_LIST
|
||||
END_MAPPINGS_LIST
|
||||
|
||||
BEGIN_CODECS_LIST
|
||||
CODEC_STATEFUL(utf_7)
|
||||
CODEC_STATELESS(utf_8)
|
||||
END_CODECS_LIST
|
||||
|
||||
I_AM_A_MODULE_FOR(unicode)
|
|
@ -1,134 +0,0 @@
|
|||
/*
|
||||
* _cp932.c: the CP932 codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _cp932.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
|
||||
ENCMAP(jisxcommon)
|
||||
ENCMAP(cp932ext)
|
||||
DECMAP(jisx0208)
|
||||
DECMAP(cp932ext)
|
||||
|
||||
ENCODER(cp932)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
unsigned char c1, c2;
|
||||
|
||||
if (c <= 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
} else if (c >= 0xff61 && c <= 0xff9f) {
|
||||
WRITE1(c - 0xfec0)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
} else if (c >= 0xf8f0 && c <= 0xf8f3) {
|
||||
/* Windows compatability */
|
||||
RESERVE_OUTBUF(1)
|
||||
if (c == 0xf8f0)
|
||||
OUT1(0xa0)
|
||||
else
|
||||
OUT1(c - 0xfef1 + 0xfd)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
UCS4INVALID(c)
|
||||
RESERVE_OUTBUF(2)
|
||||
|
||||
TRYMAP_ENC(cp932ext, code, c) {
|
||||
OUT1(code >> 8)
|
||||
OUT2(code & 0xff)
|
||||
} else TRYMAP_ENC(jisxcommon, code, c) {
|
||||
if (code & 0x8000) /* MSB set: JIS X 0212 */
|
||||
return 1;
|
||||
|
||||
/* JIS X 0208 */
|
||||
c1 = code >> 8;
|
||||
c2 = code & 0xff;
|
||||
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
|
||||
c1 = (c1 - 0x21) >> 1;
|
||||
OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
|
||||
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
|
||||
} else if (c >= 0xe000 && c < 0xe758) {
|
||||
/* User-defined area */
|
||||
c1 = (Py_UNICODE)(c - 0xe000) / 188;
|
||||
c2 = (Py_UNICODE)(c - 0xe000) % 188;
|
||||
OUT1(c1 + 0xf0)
|
||||
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
|
||||
} else
|
||||
return 1;
|
||||
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(cp932)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1, c2;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
if (c <= 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
} else if (c >= 0xa0 && c <= 0xdf) {
|
||||
if (c == 0xa0)
|
||||
OUT1(0xf8f0) /* half-width katakana */
|
||||
else
|
||||
OUT1(0xfec0 + c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
} else if (c >= 0xfd/* && c <= 0xff*/) {
|
||||
/* Windows compatibility */
|
||||
OUT1(0xf8f1 - 0xfd + c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
c2 = IN2;
|
||||
|
||||
TRYMAP_DEC(cp932ext, **outbuf, c, c2);
|
||||
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)) {
|
||||
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
||||
return 2;
|
||||
|
||||
c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
||||
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
||||
c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
|
||||
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
|
||||
|
||||
TRYMAP_DEC(jisx0208, **outbuf, c, c2);
|
||||
else return 2;
|
||||
} else if (c >= 0xf0 && c <= 0xf9) {
|
||||
if ((c2 >= 0x40 && c2 <= 0x7e) || (c2 >= 0x80 && c2 <= 0xfc))
|
||||
OUT1(0xe000 + 188 * (c - 0xf0) +
|
||||
(c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
|
||||
else
|
||||
return 2;
|
||||
} else
|
||||
return 2;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(cp932)
|
||||
MAPOPEN(ja_JP)
|
||||
IMPORTMAP_DEC(jisx0208)
|
||||
IMPORTMAP_ENCDEC(cp932ext)
|
||||
IMPORTMAP_ENC(jisxcommon)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(cp932)
|
|
@ -1,73 +0,0 @@
|
|||
/*
|
||||
* _cp949.c: the CP949 codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _cp949.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
|
||||
ENCMAP(cp949)
|
||||
DECMAP(ksx1001)
|
||||
DECMAP(cp949ext)
|
||||
|
||||
ENCODER(cp949)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
RESERVE_OUTBUF(2)
|
||||
TRYMAP_ENC(cp949, code, c);
|
||||
else return 1;
|
||||
|
||||
OUT1((code >> 8) | 0x80)
|
||||
if (code & 0x8000)
|
||||
OUT2(code & 0xFF) /* MSB set: CP949 */
|
||||
else
|
||||
OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(cp949)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
|
||||
else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
|
||||
else return 2;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(cp949)
|
||||
MAPOPEN(ko_KR)
|
||||
IMPORTMAP_DEC(ksx1001)
|
||||
IMPORTMAP_DEC(cp949ext)
|
||||
IMPORTMAP_ENC(cp949)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(cp949)
|
|
@ -1,72 +0,0 @@
|
|||
/*
|
||||
* _cp950.c: the CP950 codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _cp950.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
|
||||
ENCMAP(big5)
|
||||
ENCMAP(cp950ext)
|
||||
DECMAP(big5)
|
||||
DECMAP(cp950ext)
|
||||
|
||||
ENCODER(cp950)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
RESERVE_OUTBUF(2)
|
||||
TRYMAP_ENC(cp950ext, code, c);
|
||||
else TRYMAP_ENC(big5, code, c);
|
||||
else return 1;
|
||||
|
||||
OUT1(code >> 8)
|
||||
OUT2(code & 0xFF)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(cp950)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
|
||||
TRYMAP_DEC(cp950ext, **outbuf, c, IN2);
|
||||
else TRYMAP_DEC(big5, **outbuf, c, IN2);
|
||||
else return 2;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(cp950)
|
||||
MAPOPEN(zh_TW)
|
||||
IMPORTMAP_ENCDEC(big5)
|
||||
IMPORTMAP_ENCDEC(cp950ext)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(cp950)
|
|
@ -1,180 +0,0 @@
|
|||
/*
|
||||
* _euc_jisx0213.c: the EUC-JISX0213 codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _euc_jisx0213.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#define USING_BINARY_PAIR_SEARCH
|
||||
#include "codeccommon.h"
|
||||
#include "map_jisx0213_pairs.h"
|
||||
|
||||
ENCMAP(jisxcommon)
|
||||
DECMAP(jisx0208)
|
||||
DECMAP(jisx0212)
|
||||
ENCMAP(jisx0213_bmp)
|
||||
DECMAP(jisx0213_1_bmp)
|
||||
DECMAP(jisx0213_2_bmp)
|
||||
ENCMAP(jisx0213_emp)
|
||||
DECMAP(jisx0213_1_emp)
|
||||
DECMAP(jisx0213_2_emp)
|
||||
|
||||
#define EMPBASE 0x20000
|
||||
|
||||
ENCODER(euc_jisx0213)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
ucs4_t c = IN1;
|
||||
DBCHAR code;
|
||||
int insize;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
DECODE_SURROGATE(c)
|
||||
insize = GET_INSIZE(c);
|
||||
|
||||
if (c <= 0xFFFF) {
|
||||
/* try 0213 first because it might have MULTIC */
|
||||
TRYMAP_ENC(jisx0213_bmp, code, c) {
|
||||
if (code == MULTIC) {
|
||||
if (inleft < 2) {
|
||||
if (flags & MBENC_FLUSH) {
|
||||
code = find_pairencmap((ucs2_t)c, 0,
|
||||
jisx0213_pairencmap, JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
} else
|
||||
return MBERR_TOOFEW;
|
||||
} else {
|
||||
code = find_pairencmap((ucs2_t)c, (*inbuf)[1],
|
||||
jisx0213_pairencmap, JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV) {
|
||||
code = find_pairencmap((ucs2_t)c, 0,
|
||||
jisx0213_pairencmap, JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
} else
|
||||
insize = 2;
|
||||
}
|
||||
}
|
||||
} else TRYMAP_ENC(jisxcommon, code, c);
|
||||
else if (c >= 0xff61 && c <= 0xff9f) {
|
||||
/* JIS X 0201 half-width katakana */
|
||||
WRITE2(0x8e, c - 0xfec0)
|
||||
NEXT(1, 2)
|
||||
continue;
|
||||
} else if (c == 0xff3c)
|
||||
/* F/W REVERSE SOLIDUS (see NOTES.euc-jisx0213) */
|
||||
code = 0x2140;
|
||||
else if (c == 0xff5e)
|
||||
/* F/W TILDE (see NOTES.euc-jisx0213) */
|
||||
code = 0x2232;
|
||||
else
|
||||
return 1;
|
||||
} else if (c >> 16 == EMPBASE >> 16) {
|
||||
TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
|
||||
else return insize;
|
||||
} else
|
||||
return insize;
|
||||
|
||||
if (code & 0x8000) {
|
||||
/* Codeset 2 */
|
||||
WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
|
||||
NEXT(insize, 3)
|
||||
} else {
|
||||
/* Codeset 1 */
|
||||
WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
|
||||
NEXT(insize, 2)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(euc_jisx0213)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
ucs4_t code;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == 0x8e) {
|
||||
/* JIS X 0201 half-width katakana */
|
||||
unsigned char c2;
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
c2 = IN2;
|
||||
if (c2 >= 0xa1 && c2 <= 0xdf) {
|
||||
OUT1(0xfec0 + c2)
|
||||
NEXT(2, 1)
|
||||
} else
|
||||
return 2;
|
||||
} else if (c == 0x8f) {
|
||||
unsigned char c2, c3;
|
||||
|
||||
RESERVE_INBUF(3)
|
||||
c2 = IN2 ^ 0x80;
|
||||
c3 = IN3 ^ 0x80;
|
||||
|
||||
/* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES.euc-jisx0213) */
|
||||
TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ;
|
||||
else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) {
|
||||
PUTUCS4(EMPBASE | code)
|
||||
NEXT_IN(3)
|
||||
continue;
|
||||
} else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
|
||||
else return 3;
|
||||
NEXT(3, 1)
|
||||
} else {
|
||||
unsigned char c2;
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
c ^= 0x80;
|
||||
c2 = IN2 ^ 0x80;
|
||||
|
||||
/* JIS X 0213 Plane 1 */
|
||||
if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
|
||||
else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e;
|
||||
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
|
||||
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
|
||||
else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
|
||||
PUTUCS4(EMPBASE | code)
|
||||
NEXT_IN(2)
|
||||
continue;
|
||||
} else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
|
||||
WRITE2(code >> 16, code & 0xffff)
|
||||
NEXT(2, 2)
|
||||
continue;
|
||||
} else return 2;
|
||||
NEXT(2, 1)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(euc_jisx0213)
|
||||
MAPOPEN(ja_JP)
|
||||
IMPORTMAP_ENC(jisxcommon)
|
||||
IMPORTMAP_DEC(jisx0208)
|
||||
IMPORTMAP_DEC(jisx0212)
|
||||
IMPORTMAP_ENC(jisx0213_bmp)
|
||||
IMPORTMAP_DEC(jisx0213_1_bmp)
|
||||
IMPORTMAP_DEC(jisx0213_2_bmp)
|
||||
IMPORTMAP_ENC(jisx0213_emp)
|
||||
IMPORTMAP_DEC(jisx0213_1_emp)
|
||||
IMPORTMAP_DEC(jisx0213_2_emp)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(euc_jisx0213)
|
|
@ -1,127 +0,0 @@
|
|||
/*
|
||||
* _euc_jp.c: the EUC-JP codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _euc_jp.c,v 1.5 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
|
||||
ENCMAP(jisxcommon)
|
||||
DECMAP(jisx0208)
|
||||
DECMAP(jisx0212)
|
||||
|
||||
ENCODER(euc_jp)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
UCS4INVALID(c)
|
||||
|
||||
TRYMAP_ENC(jisxcommon, code, c);
|
||||
else if (c >= 0xff61 && c <= 0xff9f) {
|
||||
/* JIS X 0201 half-width katakana */
|
||||
WRITE2(0x8e, c - 0xfec0)
|
||||
NEXT(1, 2)
|
||||
continue;
|
||||
}
|
||||
#ifndef STRICT_BUILD
|
||||
else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
code = 0x2140;
|
||||
else if (c == 0xa5) { /* YEN SIGN */
|
||||
WRITE1(0x5c);
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
} else if (c == 0x203e) { /* OVERLINE */
|
||||
WRITE1(0x7e);
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
else
|
||||
return 1;
|
||||
|
||||
if (code & 0x8000) {
|
||||
/* JIS X 0212 */
|
||||
WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
|
||||
NEXT(1, 3)
|
||||
} else {
|
||||
/* JIS X 0208 */
|
||||
WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(euc_jp)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == 0x8e) {
|
||||
/* JIS X 0201 half-width katakana */
|
||||
unsigned char c2;
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
c2 = IN2;
|
||||
if (c2 >= 0xa1 && c2 <= 0xdf) {
|
||||
OUT1(0xfec0 + c2)
|
||||
NEXT(2, 1)
|
||||
} else
|
||||
return 2;
|
||||
} else if (c == 0x8f) {
|
||||
unsigned char c2, c3;
|
||||
|
||||
RESERVE_INBUF(3)
|
||||
c2 = IN2;
|
||||
c3 = IN3;
|
||||
/* JIS X 0212 */
|
||||
TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) {
|
||||
NEXT(3, 1)
|
||||
} else
|
||||
return 3;
|
||||
} else {
|
||||
unsigned char c2;
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
c2 = IN2;
|
||||
/* JIS X 0208 */
|
||||
#ifndef STRICT_BUILD
|
||||
if (c == 0xa1 && c2 == 0xc0) /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
**outbuf = 0xff3c;
|
||||
else
|
||||
#endif
|
||||
TRYMAP_DEC(jisx0208, **outbuf, c ^ 0x80, c2 ^ 0x80) ;
|
||||
else return 2;
|
||||
NEXT(2, 1)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(euc_jp)
|
||||
MAPOPEN(ja_JP)
|
||||
IMPORTMAP_DEC(jisx0208)
|
||||
IMPORTMAP_DEC(jisx0212)
|
||||
IMPORTMAP_ENC(jisxcommon)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(euc_jp)
|
|
@ -1,71 +0,0 @@
|
|||
/*
|
||||
* _euc_kr.c: the EUC-KR codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _euc_kr.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
|
||||
ENCMAP(cp949)
|
||||
DECMAP(ksx1001)
|
||||
|
||||
ENCODER(euc_kr)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
RESERVE_OUTBUF(2)
|
||||
TRYMAP_ENC(cp949, code, c);
|
||||
else return 1;
|
||||
|
||||
if (code & 0x8000) /* MSB set: CP949 */
|
||||
return 1;
|
||||
|
||||
OUT1((code >> 8) | 0x80)
|
||||
OUT2((code & 0xFF) | 0x80)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(euc_kr)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
|
||||
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
|
||||
NEXT(2, 1)
|
||||
} else return 2;
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(euc_kr)
|
||||
MAPOPEN(ko_KR)
|
||||
IMPORTMAP_DEC(ksx1001)
|
||||
IMPORTMAP_ENC(cp949)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(euc_kr)
|
|
@ -1,181 +0,0 @@
|
|||
/*
|
||||
* _gb18030.c: the GB18030 codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _gb18030.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
#include "tweak_gbk.h"
|
||||
#include "map_gb18030uni.h"
|
||||
|
||||
ENCMAP(gbcommon)
|
||||
ENCMAP(gb18030ext)
|
||||
DECMAP(gb2312)
|
||||
DECMAP(gbkext)
|
||||
DECMAP(gb18030ext)
|
||||
|
||||
ENCODER(gb18030)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
ucs4_t c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
DECODE_SURROGATE(c)
|
||||
if (c > 0x10FFFF)
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
return 2; /* surrogates pair */
|
||||
#else
|
||||
return 1;
|
||||
#endif
|
||||
else if (c >= 0x10000) {
|
||||
ucs4_t tc = c - 0x10000;
|
||||
|
||||
RESERVE_OUTBUF(4)
|
||||
|
||||
OUT4((unsigned char)(tc % 10) + 0x30)
|
||||
tc /= 10;
|
||||
OUT3((unsigned char)(tc % 126) + 0x81)
|
||||
tc /= 126;
|
||||
OUT2((unsigned char)(tc % 10) + 0x30)
|
||||
tc /= 10;
|
||||
OUT1((unsigned char)(tc + 0x90))
|
||||
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
NEXT(2, 4) /* surrogates pair */
|
||||
#else
|
||||
NEXT(1, 4)
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_OUTBUF(2)
|
||||
|
||||
GBK_PREENCODE(c, code)
|
||||
else TRYMAP_ENC(gbcommon, code, c);
|
||||
else TRYMAP_ENC(gb18030ext, code, c);
|
||||
else {
|
||||
const struct _gb18030_to_unibmp_ranges *utrrange;
|
||||
|
||||
RESERVE_OUTBUF(4)
|
||||
|
||||
for (utrrange = gb18030_to_unibmp_ranges;
|
||||
utrrange->first != 0;
|
||||
utrrange++)
|
||||
if (utrrange->first <= c && c <= utrrange->last) {
|
||||
Py_UNICODE tc;
|
||||
|
||||
tc = c - utrrange->first + utrrange->base;
|
||||
|
||||
OUT4((unsigned char)(tc % 10) + 0x30)
|
||||
tc /= 10;
|
||||
OUT3((unsigned char)(tc % 126) + 0x81)
|
||||
tc /= 126;
|
||||
OUT2((unsigned char)(tc % 10) + 0x30)
|
||||
tc /= 10;
|
||||
OUT1((unsigned char)tc + 0x81)
|
||||
|
||||
NEXT(1, 4)
|
||||
break;
|
||||
}
|
||||
|
||||
if (utrrange->first == 0) {
|
||||
PyErr_SetString(PyExc_RuntimeError,
|
||||
"unicode mapping invalid");
|
||||
return 1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
OUT1((code >> 8) | 0x80)
|
||||
if (code & 0x8000)
|
||||
OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */
|
||||
else
|
||||
OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
|
||||
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(gb18030)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1, c2;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
|
||||
c2 = IN2;
|
||||
if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
|
||||
const struct _gb18030_to_unibmp_ranges *utr;
|
||||
unsigned char c3, c4;
|
||||
ucs4_t lseq;
|
||||
|
||||
RESERVE_INBUF(4)
|
||||
c3 = IN3;
|
||||
c4 = IN4;
|
||||
if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
|
||||
return 4;
|
||||
c -= 0x81; c2 -= 0x30;
|
||||
c3 -= 0x81; c4 -= 0x30;
|
||||
|
||||
if (c < 4) { /* U+0080 - U+FFFF */
|
||||
lseq = ((ucs4_t)c * 10 + c2) * 1260 +
|
||||
(ucs4_t)c3 * 10 + c4;
|
||||
if (lseq < 39420) {
|
||||
for (utr = gb18030_to_unibmp_ranges;
|
||||
lseq >= (utr + 1)->base;
|
||||
utr++) ;
|
||||
OUT1(utr->first - utr->base + lseq)
|
||||
NEXT(4, 1)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (c >= 15) { /* U+10000 - U+10FFFF */
|
||||
lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2) * 1260 +
|
||||
(ucs4_t)c3 * 10 + c4;
|
||||
if (lseq <= 0x10FFFF) {
|
||||
PUTUCS4(lseq);
|
||||
NEXT_IN(4)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
|
||||
GBK_PREDECODE(c, c2, **outbuf)
|
||||
else TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, c2 ^ 0x80);
|
||||
else TRYMAP_DEC(gbkext, **outbuf, c, c2);
|
||||
else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
|
||||
else return 2;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(gb18030)
|
||||
MAPOPEN(zh_CN)
|
||||
IMPORTMAP_DEC(gb2312)
|
||||
IMPORTMAP_DEC(gbkext)
|
||||
IMPORTMAP_ENC(gbcommon)
|
||||
IMPORTMAP_ENCDEC(gb18030ext)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(gb18030)
|
|
@ -1,69 +0,0 @@
|
|||
/*
|
||||
* _gb2312.c: the GB2312 codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _gb2312.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
|
||||
ENCMAP(gbcommon)
|
||||
DECMAP(gb2312)
|
||||
|
||||
ENCODER(gb2312)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
RESERVE_OUTBUF(2)
|
||||
TRYMAP_ENC(gbcommon, code, c);
|
||||
else return 1;
|
||||
|
||||
if (code & 0x8000) /* MSB set: GBK */
|
||||
return 1;
|
||||
|
||||
OUT1((code >> 8) | 0x80)
|
||||
OUT2((code & 0xFF) | 0x80)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(gb2312)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = **inbuf;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
|
||||
NEXT(2, 1)
|
||||
} else return 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(gb2312)
|
||||
MAPOPEN(zh_CN)
|
||||
IMPORTMAP_DEC(gb2312)
|
||||
IMPORTMAP_ENC(gbcommon)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(gb2312)
|
|
@ -1,78 +0,0 @@
|
|||
/*
|
||||
* _gbk.c: the GBK codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _gbk.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
#include "tweak_gbk.h"
|
||||
|
||||
ENCMAP(gbcommon)
|
||||
DECMAP(gb2312)
|
||||
DECMAP(gbkext)
|
||||
|
||||
ENCODER(gbk)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
RESERVE_OUTBUF(2)
|
||||
|
||||
GBK_PREENCODE(c, code)
|
||||
else TRYMAP_ENC(gbcommon, code, c);
|
||||
else return 1;
|
||||
|
||||
OUT1((code >> 8) | 0x80)
|
||||
if (code & 0x8000)
|
||||
OUT2((code & 0xFF)) /* MSB set: GBK */
|
||||
else
|
||||
OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(gbk)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
|
||||
GBK_PREDECODE(c, IN2, **outbuf)
|
||||
else TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80);
|
||||
else TRYMAP_DEC(gbkext, **outbuf, c, IN2);
|
||||
else return 2;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(gbk)
|
||||
MAPOPEN(zh_CN)
|
||||
IMPORTMAP_DEC(gb2312)
|
||||
IMPORTMAP_DEC(gbkext)
|
||||
IMPORTMAP_ENC(gbcommon)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(gbk)
|
|
@ -1,134 +0,0 @@
|
|||
/*
|
||||
* _hz.c: the HZ codec (RFC1843)
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _hz.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
|
||||
ENCMAP(gbcommon)
|
||||
DECMAP(gb2312)
|
||||
|
||||
#define HAVE_ENCODER_INIT
|
||||
ENCODER_INIT(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_ENCODER_RESET
|
||||
ENCODER_RESET(hz)
|
||||
{
|
||||
if (state->i != 0) {
|
||||
WRITE2('~', '}')
|
||||
state->i = 0;
|
||||
NEXT_OUT(2)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER(hz)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
if (state->i == 0) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
} else {
|
||||
WRITE3('~', '}', (unsigned char)c)
|
||||
NEXT(1, 3)
|
||||
state->i = 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
UCS4INVALID(c)
|
||||
|
||||
TRYMAP_ENC(gbcommon, code, c);
|
||||
else return 1;
|
||||
|
||||
if (code & 0x8000) /* MSB set: GBK */
|
||||
return 1;
|
||||
|
||||
if (state->i == 0) {
|
||||
WRITE4('~', '{', code >> 8, code & 0xff)
|
||||
NEXT(1, 4)
|
||||
state->i = 1;
|
||||
} else {
|
||||
WRITE2(code >> 8, code & 0xff)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_INIT
|
||||
DECODER_INIT(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_RESET
|
||||
DECODER_RESET(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(hz)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
if (c == '~') {
|
||||
unsigned char c2 = IN2;
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
if (c2 == '~') {
|
||||
WRITE1('~')
|
||||
NEXT(2, 1)
|
||||
continue;
|
||||
} else if (c2 == '{' && state->i == 0)
|
||||
state->i = 1; /* set GB */
|
||||
else if (c2 == '}' && state->i == 1)
|
||||
state->i = 0; /* set ASCII */
|
||||
else if (c2 == '\n')
|
||||
; /* line-continuation */
|
||||
else
|
||||
return 2;
|
||||
NEXT(2, 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c & 0x80)
|
||||
return 1;
|
||||
|
||||
if (state->i == 0) { /* ASCII mode */
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
} else { /* GB mode */
|
||||
RESERVE_INBUF(2)
|
||||
RESERVE_OUTBUF(1)
|
||||
TRYMAP_DEC(gb2312, **outbuf, c, IN2) {
|
||||
NEXT(2, 1)
|
||||
} else
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(hz)
|
||||
MAPOPEN(zh_CN)
|
||||
IMPORTMAP_DEC(gb2312)
|
||||
IMPORTMAP_ENC(gbcommon)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(hz)
|
|
@ -1,173 +0,0 @@
|
|||
/*
|
||||
* _iso2022_jp.c: the ISO-2022-JP codec (RFC1468)
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _iso2022_jp.c,v 1.7 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#define ISO2022_DESIGNATIONS \
|
||||
CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0208, CHARSET_JISX0208_O
|
||||
#define ISO2022_NO_SHIFT
|
||||
#define ISO2022_USE_JISX0208EXT
|
||||
|
||||
#include "codeccommon.h"
|
||||
#include "iso2022common.h"
|
||||
#include "alg_jisx0201.h"
|
||||
|
||||
ENCMAP(jisxcommon)
|
||||
DECMAP(jisx0208)
|
||||
|
||||
#define HAVE_ENCODER_INIT
|
||||
ENCODER_INIT(iso2022_jp)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_ENCODER_RESET
|
||||
ENCODER_RESET(iso2022_jp)
|
||||
{
|
||||
if (STATE_GETG0(state) != CHARSET_ASCII) {
|
||||
RESERVE_OUTBUF(3)
|
||||
WRITE3(ESC, '(', 'B')
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ISO-2022-JP changes designations instead of shifting-out */
|
||||
|
||||
ENCODER(iso2022_jp)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
switch (STATE_GETG0(state)) {
|
||||
case CHARSET_ASCII:
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
break;
|
||||
case CHARSET_JISX0201_R:
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
else { /* FALLTHROUGH (yay!) */
|
||||
default:
|
||||
WRITE3(ESC, '(', 'B')
|
||||
NEXT_OUT(3)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
code = c;
|
||||
}
|
||||
WRITE1((unsigned char)code)
|
||||
NEXT(1, 1)
|
||||
break;
|
||||
}
|
||||
if (c == '\n')
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
}
|
||||
else UCS4INVALID(c)
|
||||
else {
|
||||
unsigned char charset;
|
||||
|
||||
charset = STATE_GETG0(state);
|
||||
if (charset == CHARSET_JISX0201_R) {
|
||||
code = DBCINV;
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
if (code != DBCINV) {
|
||||
WRITE1((unsigned char)code)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
TRYMAP_ENC(jisxcommon, code, c) {
|
||||
if (code & 0x8000) /* MSB set: JIS X 0212 */
|
||||
return 1;
|
||||
jisx0208encode: if (charset != CHARSET_JISX0208) {
|
||||
WRITE3(ESC, '$', 'B')
|
||||
STATE_SETG0(state, CHARSET_JISX0208)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
WRITE2(code >> 8, code & 0xff)
|
||||
NEXT(1, 2)
|
||||
} else if (c == 0xff3c) { /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
code = 0x2140;
|
||||
goto jisx0208encode;
|
||||
} else {
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
else
|
||||
return 1;
|
||||
/* if (charset == CHARSET_JISX0201_R) : already checked */
|
||||
WRITE4(ESC, '(', 'J', (unsigned char)code)
|
||||
STATE_SETG0(state, CHARSET_JISX0201_R)
|
||||
NEXT(1, 4)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_INIT
|
||||
DECODER_INIT(iso2022_jp)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_RESET
|
||||
DECODER_RESET(iso2022_jp)
|
||||
{
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(iso2022_jp)
|
||||
{
|
||||
ISO2022_LOOP_BEGIN
|
||||
unsigned char charset, c2;
|
||||
|
||||
ISO2022_GETCHARSET(charset, c)
|
||||
|
||||
if (charset & CHARSET_DOUBLEBYTE) {
|
||||
/* all double byte character sets are in JIS X 0208 here.
|
||||
* this means that we don't distinguish :1978 from :1983. */
|
||||
RESERVE_INBUF(2)
|
||||
RESERVE_OUTBUF(1)
|
||||
c2 = IN2;
|
||||
if (c == 0x21 && c2 == 0x40) { /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
**outbuf = 0xff3c;
|
||||
NEXT(2, 1)
|
||||
} else TRYMAP_DEC(jisx0208, **outbuf, c, c2) {
|
||||
NEXT(2, 1)
|
||||
} else
|
||||
return 2;
|
||||
} else if (charset == CHARSET_ASCII) {
|
||||
RESERVE_OUTBUF(1)
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
} else if (charset == CHARSET_JISX0201_R) {
|
||||
RESERVE_OUTBUF(1)
|
||||
JISX0201_R_DECODE(c, **outbuf)
|
||||
else
|
||||
return 1;
|
||||
NEXT(1, 1)
|
||||
} else
|
||||
return MBERR_INTERNAL;
|
||||
ISO2022_LOOP_END
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(iso2022_jp)
|
||||
MAPOPEN(ja_JP)
|
||||
IMPORTMAP_DEC(jisx0208)
|
||||
IMPORTMAP_ENC(jisxcommon)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(iso2022_jp)
|
|
@ -1,185 +0,0 @@
|
|||
/*
|
||||
* _iso2022_jp_1.c: the ISO-2022-JP-1 codec (RFC2237)
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _iso2022_jp_1.c,v 1.8 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#define ISO2022_DESIGNATIONS \
|
||||
CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0208, \
|
||||
CHARSET_JISX0208_O, CHARSET_JISX0212
|
||||
#define ISO2022_NO_SHIFT
|
||||
#define ISO2022_USE_JISX0208EXT
|
||||
|
||||
#include "codeccommon.h"
|
||||
#include "iso2022common.h"
|
||||
#include "alg_jisx0201.h"
|
||||
|
||||
ENCMAP(jisxcommon)
|
||||
DECMAP(jisx0208)
|
||||
DECMAP(jisx0212)
|
||||
|
||||
#define HAVE_ENCODER_INIT
|
||||
ENCODER_INIT(iso2022_jp_1)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_ENCODER_RESET
|
||||
ENCODER_RESET(iso2022_jp_1)
|
||||
{
|
||||
if (STATE_GETG0(state) != CHARSET_ASCII) {
|
||||
RESERVE_OUTBUF(3)
|
||||
WRITE3(ESC, '(', 'B')
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ISO-2022-JP-1 changes designations instead of shifting-out */
|
||||
|
||||
ENCODER(iso2022_jp_1)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = **inbuf;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
switch (STATE_GETG0(state)) {
|
||||
case CHARSET_ASCII:
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
break;
|
||||
case CHARSET_JISX0201_R:
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
else { /* FALLTHROUGH (yay!) */
|
||||
default:
|
||||
WRITE3(ESC, '(', 'B')
|
||||
NEXT_OUT(3)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
code = c;
|
||||
}
|
||||
WRITE1((unsigned char)code)
|
||||
NEXT(1, 1)
|
||||
break;
|
||||
}
|
||||
if (c == '\n')
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
}
|
||||
else UCS4INVALID(c)
|
||||
else {
|
||||
unsigned char charset;
|
||||
|
||||
charset = STATE_GETG0(state);
|
||||
if (charset == CHARSET_JISX0201_R) {
|
||||
code = DBCINV;
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
if (code != DBCINV) {
|
||||
WRITE1((unsigned char)code)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
TRYMAP_ENC(jisxcommon, code, c) {
|
||||
if (code & 0x8000) { /* MSB set: JIS X 0212 */
|
||||
if (charset != CHARSET_JISX0212) {
|
||||
WRITE4(ESC, '$', '(', 'D')
|
||||
STATE_SETG0(state, CHARSET_JISX0212)
|
||||
NEXT_OUT(4)
|
||||
}
|
||||
WRITE2((code >> 8) & 0x7f, code & 0x7f)
|
||||
} else { /* MSB unset: JIS X 0208 */
|
||||
jisx0208encode: if (charset != CHARSET_JISX0208) {
|
||||
WRITE3(ESC, '$', 'B')
|
||||
STATE_SETG0(state, CHARSET_JISX0208)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
WRITE2(code >> 8, code & 0xff)
|
||||
}
|
||||
NEXT(1, 2)
|
||||
} else if (c == 0xff3c) { /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
code = 0x2140;
|
||||
goto jisx0208encode;
|
||||
} else {
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
else
|
||||
return 1;
|
||||
/* if (charset == CHARSET_JISX0201_R) : already checked */
|
||||
WRITE4(ESC, '(', 'J', (unsigned char)code)
|
||||
STATE_SETG0(state, CHARSET_JISX0201_R)
|
||||
NEXT(1, 4)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_INIT
|
||||
DECODER_INIT(iso2022_jp_1)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_RESET
|
||||
DECODER_RESET(iso2022_jp_1)
|
||||
{
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(iso2022_jp_1)
|
||||
{
|
||||
ISO2022_LOOP_BEGIN
|
||||
unsigned char charset, c2;
|
||||
|
||||
ISO2022_GETCHARSET(charset, c)
|
||||
|
||||
if (charset & CHARSET_DOUBLEBYTE) {
|
||||
RESERVE_INBUF(2)
|
||||
RESERVE_OUTBUF(1)
|
||||
c2 = IN2;
|
||||
if (charset == CHARSET_JISX0208 || charset == CHARSET_JISX0208_O) {
|
||||
if (c == 0x21 && c2 == 0x40) /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
**outbuf = 0xff3c;
|
||||
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
|
||||
else return 2;
|
||||
} else if (charset == CHARSET_JISX0212) {
|
||||
TRYMAP_DEC(jisx0212, **outbuf, c, c2);
|
||||
else return 2;
|
||||
} else
|
||||
return MBERR_INTERNAL;
|
||||
NEXT(2, 1)
|
||||
} else if (charset == CHARSET_ASCII) {
|
||||
RESERVE_OUTBUF(1)
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
} else if (charset == CHARSET_JISX0201_R) {
|
||||
RESERVE_OUTBUF(1)
|
||||
JISX0201_R_DECODE(c, **outbuf)
|
||||
else
|
||||
return 1;
|
||||
NEXT(1, 1)
|
||||
} else
|
||||
return MBERR_INTERNAL;
|
||||
ISO2022_LOOP_END
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(iso2022_jp_1)
|
||||
MAPOPEN(ja_JP)
|
||||
IMPORTMAP_DEC(jisx0208)
|
||||
IMPORTMAP_DEC(jisx0212)
|
||||
IMPORTMAP_ENC(jisxcommon)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(iso2022_jp_1)
|
|
@ -1,230 +0,0 @@
|
|||
/*
|
||||
* _iso2022_jp_2.c: the ISO-2022-JP-2 codec (RFC1554)
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _iso2022_jp_2.c,v 1.8 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#define ISO2022_DESIGNATIONS \
|
||||
CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0208, \
|
||||
CHARSET_JISX0208_O, CHARSET_JISX0212, CHARSET_GB2312, \
|
||||
CHARSET_KSX1001, CHARSET_JISX0212, \
|
||||
CHARSET_ISO8859_1, CHARSET_ISO8859_7
|
||||
#define ISO2022_USE_G2_DESIGNATION yo!
|
||||
#define ISO2022_USE_JISX0208EXT
|
||||
|
||||
#include "codeccommon.h"
|
||||
#include "iso2022common.h"
|
||||
#include "alg_jisx0201.h"
|
||||
#include "alg_iso8859_1.h"
|
||||
#include "alg_iso8859_7.h"
|
||||
|
||||
ENCMAP(jisxcommon)
|
||||
DECMAP(jisx0208)
|
||||
DECMAP(jisx0212)
|
||||
ENCMAP(cp949)
|
||||
DECMAP(ksx1001)
|
||||
ENCMAP(gbcommon)
|
||||
DECMAP(gb2312)
|
||||
|
||||
#define HAVE_ENCODER_INIT
|
||||
ENCODER_INIT(iso2022_jp_2)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
STATE_SETG2(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_ENCODER_RESET
|
||||
ENCODER_RESET(iso2022_jp_2)
|
||||
{
|
||||
if (STATE_GETG0(state) != CHARSET_ASCII) {
|
||||
WRITE3(ESC, '(', 'B')
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER(iso2022_jp_2)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
switch (STATE_GETG0(state)) {
|
||||
case CHARSET_ASCII:
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
break;
|
||||
case CHARSET_JISX0201_R:
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
else { /* FALLTHROUGH (yay!) */
|
||||
default:
|
||||
WRITE3(ESC, '(', 'B')
|
||||
NEXT_OUT(3)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
code = c;
|
||||
}
|
||||
WRITE1((unsigned char)code)
|
||||
NEXT(1, 1)
|
||||
break;
|
||||
}
|
||||
if (c == '\n')
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
}
|
||||
else UCS4INVALID(c)
|
||||
else {
|
||||
unsigned char charset;
|
||||
|
||||
charset = STATE_GETG0(state);
|
||||
if (charset == CHARSET_JISX0201_R) {
|
||||
code = DBCINV;
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
if (code != DBCINV) {
|
||||
WRITE1((unsigned char)code)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
TRYMAP_ENC(jisxcommon, code, c) {
|
||||
if (code & 0x8000) { /* MSB set: JIS X 0212 */
|
||||
if (charset != CHARSET_JISX0212) {
|
||||
WRITE4(ESC, '$', '(', 'D')
|
||||
STATE_SETG0(state, CHARSET_JISX0212)
|
||||
NEXT_OUT(4)
|
||||
}
|
||||
WRITE2((code >> 8) & 0x7f, code & 0x7f)
|
||||
} else { /* MSB unset: JIS X 0208 */
|
||||
jisx0208encode: if (charset != CHARSET_JISX0208) {
|
||||
WRITE3(ESC, '$', 'B')
|
||||
STATE_SETG0(state, CHARSET_JISX0208)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
WRITE2(code >> 8, code & 0xff)
|
||||
}
|
||||
NEXT(1, 2)
|
||||
} else TRYMAP_ENC(cp949, code, c) {
|
||||
if (code & 0x8000) /* MSB set: CP949 */
|
||||
return 2;
|
||||
if (charset != CHARSET_KSX1001) {
|
||||
WRITE4(ESC, '$', '(', 'C')
|
||||
STATE_SETG0(state, CHARSET_KSX1001)
|
||||
NEXT_OUT(4)
|
||||
}
|
||||
WRITE2(code >> 8, code & 0xff)
|
||||
NEXT(1, 2)
|
||||
} else TRYMAP_ENC(gbcommon, code, c) {
|
||||
if (code & 0x8000) /* MSB set: GBK */
|
||||
return 2;
|
||||
if (charset != CHARSET_GB2312) {
|
||||
WRITE4(ESC, '$', '(', 'A')
|
||||
STATE_SETG0(state, CHARSET_GB2312)
|
||||
NEXT_OUT(4)
|
||||
}
|
||||
WRITE2(code >> 8, code & 0xff)
|
||||
NEXT(1, 2)
|
||||
} else if (c == 0xff3c) { /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
code = 0x2140;
|
||||
goto jisx0208encode;
|
||||
} else {
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
else {
|
||||
/* There's no need to try to encode as ISO-8859-1 or
|
||||
* ISO-8859-7 because JIS X 0212 includes them already.
|
||||
*/
|
||||
return 1;
|
||||
}
|
||||
/* if (charset == CHARSET_JISX0201_R) : already checked */
|
||||
WRITE4(ESC, '(', 'J', (unsigned char)code)
|
||||
STATE_SETG0(state, CHARSET_JISX0201_R)
|
||||
NEXT(1, 4)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_INIT
|
||||
DECODER_INIT(iso2022_jp_2)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
STATE_SETG2(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_RESET
|
||||
DECODER_RESET(iso2022_jp_2)
|
||||
{
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(iso2022_jp_2)
|
||||
{
|
||||
ISO2022_LOOP_BEGIN
|
||||
unsigned char charset, c2;
|
||||
|
||||
ISO2022_GETCHARSET(charset, c)
|
||||
|
||||
if (charset & CHARSET_DOUBLEBYTE) {
|
||||
RESERVE_INBUF(2)
|
||||
RESERVE_OUTBUF(1)
|
||||
c2 = IN2;
|
||||
if (charset == CHARSET_JISX0208 || charset == CHARSET_JISX0208_O) {
|
||||
if (c == 0x21 && c2 == 0x40) /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
**outbuf = 0xff3c;
|
||||
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
|
||||
else return 2;
|
||||
} else if (charset == CHARSET_JISX0212) {
|
||||
TRYMAP_DEC(jisx0212, **outbuf, c, c2);
|
||||
else return 2;
|
||||
} else if (charset == CHARSET_KSX1001) {
|
||||
TRYMAP_DEC(ksx1001, **outbuf, c, c2);
|
||||
else return 2;
|
||||
} else if (charset == CHARSET_GB2312) {
|
||||
TRYMAP_DEC(gb2312, **outbuf, c, c2);
|
||||
else return 2;
|
||||
} else
|
||||
return MBERR_INTERNAL;
|
||||
NEXT(2, 1)
|
||||
} else if (charset == CHARSET_ASCII) {
|
||||
RESERVE_OUTBUF(1)
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
} else if (charset == CHARSET_JISX0201_R) {
|
||||
RESERVE_OUTBUF(1)
|
||||
JISX0201_R_DECODE(c, **outbuf)
|
||||
else
|
||||
return 1;
|
||||
NEXT(1, 1)
|
||||
} else
|
||||
return MBERR_INTERNAL;
|
||||
ISO2022_LOOP_END
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(iso2022_jp_2)
|
||||
MAPOPEN(ja_JP)
|
||||
IMPORTMAP_DEC(jisx0208)
|
||||
IMPORTMAP_DEC(jisx0212)
|
||||
IMPORTMAP_ENC(jisxcommon)
|
||||
MAPCLOSE()
|
||||
MAPOPEN(ko_KR)
|
||||
IMPORTMAP_ENC(cp949)
|
||||
IMPORTMAP_DEC(ksx1001)
|
||||
MAPCLOSE()
|
||||
MAPOPEN(zh_CN)
|
||||
IMPORTMAP_ENC(gbcommon)
|
||||
IMPORTMAP_DEC(gb2312)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(iso2022_jp_2)
|
|
@ -1,211 +0,0 @@
|
|||
/*
|
||||
* _iso2022_jp_3.c: the ISO-2022-JP-3 codec (JIS X 0213)
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _iso2022_jp_3.c,v 1.7 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#define USING_BINARY_PAIR_SEARCH
|
||||
#define ISO2022_DESIGNATIONS \
|
||||
CHARSET_ASCII, CHARSET_JISX0208, CHARSET_JISX0213_1, CHARSET_JISX0213_2
|
||||
#define ISO2022_NO_SHIFT
|
||||
#define ISO2022_USE_JISX0208EXT
|
||||
|
||||
#include "codeccommon.h"
|
||||
#include "iso2022common.h"
|
||||
#include "map_jisx0213_pairs.h"
|
||||
|
||||
ENCMAP(jisxcommon)
|
||||
DECMAP(jisx0208)
|
||||
DECMAP(jisx0212)
|
||||
ENCMAP(jisx0213_bmp)
|
||||
DECMAP(jisx0213_1_bmp)
|
||||
DECMAP(jisx0213_2_bmp)
|
||||
ENCMAP(jisx0213_emp)
|
||||
DECMAP(jisx0213_1_emp)
|
||||
DECMAP(jisx0213_2_emp)
|
||||
|
||||
#define EMPBASE 0x20000
|
||||
|
||||
#define HAVE_ENCODER_INIT
|
||||
ENCODER_INIT(iso2022_jp_3)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_ENCODER_RESET
|
||||
ENCODER_RESET(iso2022_jp_3)
|
||||
{
|
||||
if (STATE_GETG0(state) != CHARSET_ASCII) {
|
||||
WRITE3(ESC, '(', 'B')
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER(iso2022_jp_3)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char charset;
|
||||
ucs4_t c = IN1;
|
||||
DBCHAR code;
|
||||
size_t insize;
|
||||
|
||||
if (c < 0x80) {
|
||||
switch (STATE_GETG0(state)) {
|
||||
case CHARSET_ASCII:
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
break;
|
||||
default:
|
||||
WRITE4(ESC, '(', 'B', c)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
NEXT(1, 4)
|
||||
break;
|
||||
}
|
||||
if (c == '\n')
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
continue;
|
||||
}
|
||||
|
||||
DECODE_SURROGATE(c)
|
||||
insize = GET_INSIZE(c);
|
||||
|
||||
if (c <= 0xffff) {
|
||||
TRYMAP_ENC(jisx0213_bmp, code, c) {
|
||||
if (code == MULTIC) {
|
||||
if (inleft < 2) {
|
||||
if (flags & MBENC_FLUSH) {
|
||||
code = find_pairencmap((ucs2_t)c, 0,
|
||||
jisx0213_pairencmap, JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
} else
|
||||
return MBERR_TOOFEW;
|
||||
} else {
|
||||
code = find_pairencmap((ucs2_t)c, IN2,
|
||||
jisx0213_pairencmap, JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV) {
|
||||
code = find_pairencmap((ucs2_t)c, 0,
|
||||
jisx0213_pairencmap, JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
} else
|
||||
insize = 2;
|
||||
}
|
||||
}
|
||||
} else TRYMAP_ENC(jisxcommon, code, c) {
|
||||
if (code & 0x8000)
|
||||
return 1; /* avoid JIS X 0212 codes */
|
||||
} else if (c == 0xff3c) /* F/W REVERSE SOLIDUS */
|
||||
code = 0x2140;
|
||||
else
|
||||
return 1;
|
||||
} else if (c >> 16 == EMPBASE >> 16) {
|
||||
TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
|
||||
else return insize;
|
||||
} else
|
||||
return insize;
|
||||
|
||||
charset = STATE_GETG0(state);
|
||||
if (code & 0x8000) { /* MSB set: Plane 2 */
|
||||
if (charset != CHARSET_JISX0213_2) {
|
||||
WRITE4(ESC, '$', '(', 'P')
|
||||
STATE_SETG0(state, CHARSET_JISX0213_2)
|
||||
NEXT_OUT(4)
|
||||
}
|
||||
WRITE2((code >> 8) & 0x7f, code & 0x7f)
|
||||
} else { /* MSB unset: Plane 1 */
|
||||
if (charset != CHARSET_JISX0213_1) {
|
||||
WRITE4(ESC, '$', '(', 'O')
|
||||
STATE_SETG0(state, CHARSET_JISX0213_1)
|
||||
NEXT_OUT(4)
|
||||
}
|
||||
WRITE2(code >> 8, code & 0xff)
|
||||
}
|
||||
NEXT(insize, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_INIT
|
||||
DECODER_INIT(iso2022_jp_3)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_RESET
|
||||
DECODER_RESET(iso2022_jp_3)
|
||||
{
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(iso2022_jp_3)
|
||||
{
|
||||
ISO2022_LOOP_BEGIN
|
||||
unsigned char charset, c2;
|
||||
ucs4_t code;
|
||||
|
||||
ISO2022_GETCHARSET(charset, c)
|
||||
|
||||
if (charset & CHARSET_DOUBLEBYTE) {
|
||||
RESERVE_INBUF(2)
|
||||
RESERVE_OUTBUF(1)
|
||||
c2 = IN2;
|
||||
if (charset == CHARSET_JISX0213_1) {
|
||||
if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
|
||||
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
|
||||
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
|
||||
else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
|
||||
PUTUCS4(EMPBASE | code)
|
||||
NEXT_IN(2)
|
||||
continue;
|
||||
} else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
|
||||
WRITE2(code >> 16, code & 0xffff)
|
||||
NEXT(2, 2)
|
||||
continue;
|
||||
} else return 2;
|
||||
} else if (charset == CHARSET_JISX0213_2) {
|
||||
TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c, c2);
|
||||
else TRYMAP_DEC(jisx0213_2_emp, code, c, c2) {
|
||||
PUTUCS4(EMPBASE | code)
|
||||
NEXT_IN(2)
|
||||
continue;
|
||||
} else return 2;
|
||||
} else
|
||||
return MBERR_INTERNAL;
|
||||
NEXT(2, 1)
|
||||
} else if (charset == CHARSET_ASCII) {
|
||||
RESERVE_OUTBUF(1)
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
} else
|
||||
return MBERR_INTERNAL;
|
||||
ISO2022_LOOP_END
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(iso2022_jp_3)
|
||||
MAPOPEN(ja_JP)
|
||||
IMPORTMAP_DEC(jisx0208)
|
||||
IMPORTMAP_DEC(jisx0212)
|
||||
IMPORTMAP_ENC(jisxcommon)
|
||||
IMPORTMAP_ENC(jisx0213_bmp)
|
||||
IMPORTMAP_DEC(jisx0213_1_bmp)
|
||||
IMPORTMAP_DEC(jisx0213_2_bmp)
|
||||
IMPORTMAP_ENC(jisx0213_emp)
|
||||
IMPORTMAP_DEC(jisx0213_1_emp)
|
||||
IMPORTMAP_DEC(jisx0213_2_emp)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(iso2022_jp_3)
|
|
@ -1,200 +0,0 @@
|
|||
/*
|
||||
* _iso2022_jp_ext.c: the ISO-2022-JP-EXT codec (RFC2237 + alpha)
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _iso2022_jp_ext.c,v 1.3 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#define ISO2022_DESIGNATIONS \
|
||||
CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0201_K, \
|
||||
CHARSET_JISX0208, CHARSET_JISX0208_O, CHARSET_JISX0212
|
||||
#define ISO2022_NO_SHIFT
|
||||
#define ISO2022_USE_JISX0208EXT
|
||||
|
||||
#include "codeccommon.h"
|
||||
#include "iso2022common.h"
|
||||
#include "alg_jisx0201.h"
|
||||
|
||||
ENCMAP(jisxcommon)
|
||||
DECMAP(jisx0208)
|
||||
DECMAP(jisx0212)
|
||||
|
||||
#define HAVE_ENCODER_INIT
|
||||
ENCODER_INIT(iso2022_jp_ext)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_ENCODER_RESET
|
||||
ENCODER_RESET(iso2022_jp_ext)
|
||||
{
|
||||
if (STATE_GETG0(state) != CHARSET_ASCII) {
|
||||
RESERVE_OUTBUF(3)
|
||||
WRITE3(ESC, '(', 'B')
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER(iso2022_jp_ext)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = **inbuf;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
switch (STATE_GETG0(state)) {
|
||||
case CHARSET_ASCII:
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
break;
|
||||
case CHARSET_JISX0201_R:
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
else { /* FALLTHROUGH (yay!) */
|
||||
default:
|
||||
WRITE3(ESC, '(', 'B')
|
||||
NEXT_OUT(3)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
code = c;
|
||||
}
|
||||
WRITE1((unsigned char)code)
|
||||
NEXT(1, 1)
|
||||
break;
|
||||
}
|
||||
if (c == '\n')
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
}
|
||||
else UCS4INVALID(c)
|
||||
else {
|
||||
unsigned char charset;
|
||||
|
||||
charset = STATE_GETG0(state);
|
||||
if (charset == CHARSET_JISX0201_R) {
|
||||
code = DBCINV;
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
if (code != DBCINV) {
|
||||
WRITE1((unsigned char)code)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
TRYMAP_ENC(jisxcommon, code, c) {
|
||||
if (code & 0x8000) { /* MSB set: JIS X 0212 */
|
||||
if (charset != CHARSET_JISX0212) {
|
||||
WRITE4(ESC, '$', '(', 'D')
|
||||
STATE_SETG0(state, CHARSET_JISX0212)
|
||||
NEXT_OUT(4)
|
||||
}
|
||||
WRITE2((code >> 8) & 0x7f, code & 0x7f)
|
||||
} else { /* MSB unset: JIS X 0208 */
|
||||
jisx0208encode: if (charset != CHARSET_JISX0208) {
|
||||
WRITE3(ESC, '$', 'B')
|
||||
STATE_SETG0(state, CHARSET_JISX0208)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
WRITE2(code >> 8, code & 0xff)
|
||||
}
|
||||
NEXT(1, 2)
|
||||
} else if (c == 0xff3c) { /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
code = 0x2140;
|
||||
goto jisx0208encode;
|
||||
} else {
|
||||
JISX0201_ENCODE(c, code)
|
||||
else
|
||||
return 1;
|
||||
|
||||
if (code < 0x80) { /* JIS X 0201 Roman */
|
||||
/* if (charset == CHARSET_JISX0201_R) : already checked */
|
||||
WRITE4(ESC, '(', 'J', (unsigned char)code)
|
||||
STATE_SETG0(state, CHARSET_JISX0201_R)
|
||||
NEXT(1, 4)
|
||||
} else { /* JIS X 0201 Katakana */
|
||||
if (charset != CHARSET_JISX0201_K) {
|
||||
WRITE3(ESC, '(', 'I')
|
||||
STATE_SETG0(state, CHARSET_JISX0201_K)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
WRITE1(code - 0x80)
|
||||
NEXT(1, 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_INIT
|
||||
DECODER_INIT(iso2022_jp_ext)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_RESET
|
||||
DECODER_RESET(iso2022_jp_ext)
|
||||
{
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(iso2022_jp_ext)
|
||||
{
|
||||
ISO2022_LOOP_BEGIN
|
||||
unsigned char charset, c2;
|
||||
|
||||
ISO2022_GETCHARSET(charset, c)
|
||||
|
||||
if (charset & CHARSET_DOUBLEBYTE) {
|
||||
RESERVE_INBUF(2)
|
||||
RESERVE_OUTBUF(1)
|
||||
c2 = IN2;
|
||||
if (charset == CHARSET_JISX0208 || charset == CHARSET_JISX0208_O) {
|
||||
if (c == 0x21 && c2 == 0x40) /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
**outbuf = 0xff3c;
|
||||
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
|
||||
else return 2;
|
||||
} else if (charset == CHARSET_JISX0212) {
|
||||
TRYMAP_DEC(jisx0212, **outbuf, c, c2);
|
||||
else return 2;
|
||||
} else
|
||||
return MBERR_INTERNAL;
|
||||
NEXT(2, 1)
|
||||
} else if (charset == CHARSET_ASCII) {
|
||||
RESERVE_OUTBUF(1)
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
} else if (charset == CHARSET_JISX0201_R) {
|
||||
RESERVE_OUTBUF(1)
|
||||
JISX0201_R_DECODE(c, **outbuf)
|
||||
else
|
||||
return 1;
|
||||
NEXT(1, 1)
|
||||
} else if (charset == CHARSET_JISX0201_K) {
|
||||
RESERVE_OUTBUF(1)
|
||||
JISX0201_K_DECODE(c ^ 0x80, **outbuf)
|
||||
else
|
||||
return 1;
|
||||
NEXT(1, 1)
|
||||
} else
|
||||
return MBERR_INTERNAL;
|
||||
ISO2022_LOOP_END
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(iso2022_jp_ext)
|
||||
MAPOPEN(ja_JP)
|
||||
IMPORTMAP_DEC(jisx0208)
|
||||
IMPORTMAP_DEC(jisx0212)
|
||||
IMPORTMAP_ENC(jisxcommon)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(iso2022_jp_ext)
|
|
@ -1,131 +0,0 @@
|
|||
/*
|
||||
* _iso2022_kr.c: the ISO-2022-KR codec (RFC1557)
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _iso2022_kr.c,v 1.3 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#define ISO2022_DESIGNATIONS \
|
||||
CHARSET_ASCII, CHARSET_KSX1001
|
||||
|
||||
#include "codeccommon.h"
|
||||
#include "iso2022common.h"
|
||||
|
||||
ENCMAP(cp949)
|
||||
DECMAP(ksx1001)
|
||||
|
||||
#define HAVE_ENCODER_INIT
|
||||
ENCODER_INIT(iso2022_kr)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_ENCODER_RESET
|
||||
ENCODER_RESET(iso2022_kr)
|
||||
{
|
||||
if (STATE_GETFLAG(state, F_SHIFTED)) {
|
||||
RESERVE_OUTBUF(1)
|
||||
OUT1(SI)
|
||||
NEXT_OUT(1)
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER(iso2022_kr)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = **inbuf;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
if (STATE_GETFLAG(state, F_SHIFTED)) {
|
||||
WRITE2(SI, (unsigned char)c)
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
NEXT(1, 2)
|
||||
} else {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
}
|
||||
if (c == '\n')
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
} else UCS4INVALID(c)
|
||||
else {
|
||||
if (STATE_GETG1(state) != CHARSET_KSX1001) {
|
||||
WRITE4(ESC, '$', ')', 'C')
|
||||
STATE_SETG1(state, CHARSET_KSX1001)
|
||||
NEXT_OUT(4)
|
||||
}
|
||||
|
||||
if (!STATE_GETFLAG(state, F_SHIFTED)) {
|
||||
WRITE1(SO)
|
||||
STATE_SETFLAG(state, F_SHIFTED)
|
||||
NEXT_OUT(1)
|
||||
}
|
||||
|
||||
TRYMAP_ENC(cp949, code, c) {
|
||||
if (code & 0x8000) /* MSB set: CP949 */
|
||||
return 1;
|
||||
WRITE2(code >> 8, code & 0xff)
|
||||
NEXT(1, 2)
|
||||
} else
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_INIT
|
||||
DECODER_INIT(iso2022_kr)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_RESET
|
||||
DECODER_RESET(iso2022_kr)
|
||||
{
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(iso2022_kr)
|
||||
{
|
||||
ISO2022_LOOP_BEGIN
|
||||
unsigned char charset, c2;
|
||||
|
||||
ISO2022_GETCHARSET(charset, c)
|
||||
|
||||
if (charset & CHARSET_DOUBLEBYTE) {
|
||||
/* all double byte character sets are in KS X 1001 here */
|
||||
RESERVE_INBUF(2)
|
||||
RESERVE_OUTBUF(1)
|
||||
c2 = IN2;
|
||||
if (c2 >= 0x80)
|
||||
return 1;
|
||||
TRYMAP_DEC(ksx1001, **outbuf, c, c2) {
|
||||
NEXT(2, 1)
|
||||
} else
|
||||
return 2;
|
||||
} else {
|
||||
RESERVE_OUTBUF(1)
|
||||
OUT1(c);
|
||||
NEXT(1, 1)
|
||||
}
|
||||
ISO2022_LOOP_END
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(iso2022_kr)
|
||||
MAPOPEN(ko_KR)
|
||||
IMPORTMAP_DEC(ksx1001)
|
||||
IMPORTMAP_ENC(cp949)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(iso2022_kr)
|
|
@ -1,223 +0,0 @@
|
|||
/*
|
||||
* _johab.c: the Johab codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _johab.c,v 1.3 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
|
||||
ENCMAP(cp949)
|
||||
DECMAP(ksx1001)
|
||||
|
||||
static const unsigned char u2johabidx_choseong[32] = {
|
||||
0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14,
|
||||
};
|
||||
static const unsigned char u2johabidx_jungseong[32] = {
|
||||
0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x1a, 0x1b, 0x1c, 0x1d,
|
||||
};
|
||||
static const unsigned char u2johabidx_jongseong[32] = {
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
|
||||
};
|
||||
static const DBCHAR u2johabjamo[] = {
|
||||
0x8841, 0x8c41, 0x8444, 0x9041, 0x8446, 0x8447, 0x9441,
|
||||
0x9841, 0x9c41, 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f,
|
||||
0x8450, 0xa041, 0xa441, 0xa841, 0x8454, 0xac41, 0xb041, 0xb441,
|
||||
0xb841, 0xbc41, 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041, 0x8461,
|
||||
0x8481, 0x84a1, 0x84c1, 0x84e1, 0x8541, 0x8561, 0x8581, 0x85a1,
|
||||
0x85c1, 0x85e1, 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
|
||||
0x8741, 0x8761, 0x8781, 0x87a1,
|
||||
};
|
||||
|
||||
ENCODER(johab)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
RESERVE_OUTBUF(2)
|
||||
|
||||
if (c >= 0xac00 && c <= 0xd7a3) {
|
||||
c -= 0xac00;
|
||||
code = 0x8000 |
|
||||
(u2johabidx_choseong[c / 588] << 10) |
|
||||
(u2johabidx_jungseong[(c / 28) % 21] << 5) |
|
||||
u2johabidx_jongseong[c % 28];
|
||||
} else if (c >= 0x3131 && c <= 0x3163)
|
||||
code = u2johabjamo[c - 0x3131];
|
||||
else TRYMAP_ENC(cp949, code, c) {
|
||||
unsigned char c1, c2, t2;
|
||||
unsigned short t1;
|
||||
|
||||
assert((code & 0x8000) == 0);
|
||||
c1 = code >> 8;
|
||||
c2 = code & 0xff;
|
||||
if (((c1 >= 0x21 && c1 <= 0x2c) || (c1 >= 0x4a && c1 <= 0x7d))
|
||||
&& (c2 >= 0x21 && c2 <= 0x7e)) {
|
||||
t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) : (c1 - 0x21 + 0x197));
|
||||
t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
|
||||
OUT1(t1 >> 1)
|
||||
OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)
|
||||
NEXT(1, 2)
|
||||
continue;
|
||||
} else
|
||||
return 1;
|
||||
} else
|
||||
return 1;
|
||||
|
||||
OUT1(code >> 8)
|
||||
OUT2(code & 0xff)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define FILL 0xfd
|
||||
#define NONE 0xff
|
||||
|
||||
static const unsigned char johabidx_choseong[32] = {
|
||||
NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
|
||||
0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
|
||||
0x0e, 0x0f, 0x10, 0x11, 0x12, NONE, NONE, NONE,
|
||||
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||||
};
|
||||
static const unsigned char johabidx_jungseong[32] = {
|
||||
NONE, NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04,
|
||||
NONE, NONE, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
|
||||
NONE, NONE, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
|
||||
NONE, NONE, 0x11, 0x12, 0x13, 0x14, NONE, NONE,
|
||||
};
|
||||
static const unsigned char johabidx_jongseong[32] = {
|
||||
NONE, FILL, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
|
||||
0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
|
||||
0x0f, 0x10, NONE, 0x11, 0x12, 0x13, 0x14, 0x15,
|
||||
0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, NONE, NONE,
|
||||
};
|
||||
|
||||
static const unsigned char johabjamo_choseong[32] = {
|
||||
NONE, FILL, 0x31, 0x32, 0x34, 0x37, 0x38, 0x39,
|
||||
0x41, 0x42, 0x43, 0x45, 0x46, 0x47, 0x48, 0x49,
|
||||
0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE, NONE,
|
||||
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||||
};
|
||||
static const unsigned char johabjamo_jungseong[32] = {
|
||||
NONE, NONE, FILL, 0x4f, 0x50, 0x51, 0x52, 0x53,
|
||||
NONE, NONE, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
|
||||
NONE, NONE, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
|
||||
NONE, NONE, 0x60, 0x61, 0x62, 0x63, NONE, NONE,
|
||||
};
|
||||
static const unsigned char johabjamo_jongseong[32] = {
|
||||
NONE, FILL, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
|
||||
0x37, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
|
||||
0x40, 0x41, NONE, 0x42, 0x44, 0x45, 0x46, 0x47,
|
||||
0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE,
|
||||
};
|
||||
|
||||
DECODER(johab)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1, c2;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
c2 = IN2;
|
||||
|
||||
if (c < 0xd8) {
|
||||
/* johab hangul */
|
||||
unsigned char c_cho, c_jung, c_jong;
|
||||
unsigned char i_cho, i_jung, i_jong;
|
||||
|
||||
c_cho = (c >> 2) & 0x1f;
|
||||
c_jung = ((c << 3) | c2 >> 5) & 0x1f;
|
||||
c_jong = c2 & 0x1f;
|
||||
|
||||
i_cho = johabidx_choseong[c_cho];
|
||||
i_jung = johabidx_jungseong[c_jung];
|
||||
i_jong = johabidx_jongseong[c_jong];
|
||||
|
||||
if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
|
||||
return 2;
|
||||
|
||||
/* we don't use U+1100 hangul jamo yet. */
|
||||
if (i_cho == FILL) {
|
||||
if (i_jung == FILL) {
|
||||
if (i_jong == FILL)
|
||||
OUT1(0x3000)
|
||||
else
|
||||
OUT1(0x3100 | johabjamo_jongseong[c_jong])
|
||||
} else {
|
||||
if (i_jong == FILL)
|
||||
OUT1(0x3100 | johabjamo_jungseong[c_jung])
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
} else {
|
||||
if (i_jung == FILL) {
|
||||
if (i_jong == FILL)
|
||||
OUT1(0x3100 | johabjamo_choseong[c_cho])
|
||||
else
|
||||
return 2;
|
||||
} else
|
||||
OUT1(0xac00 +
|
||||
i_cho * 588 +
|
||||
i_jung * 28 +
|
||||
(i_jong == FILL ? 0 : i_jong))
|
||||
}
|
||||
NEXT(2, 1)
|
||||
} else {
|
||||
/* KS X 1001 except hangul jamos and syllables */
|
||||
if (c == 0xdf || c > 0xf9 ||
|
||||
c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
|
||||
(c2 & 0x7f) == 0x7f ||
|
||||
(c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
|
||||
return 2;
|
||||
else {
|
||||
unsigned char t1, t2;
|
||||
|
||||
t1 = (c < 0xe0 ? 2 * (c - 0xd9) : 2 * c - 0x197);
|
||||
t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43);
|
||||
t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
|
||||
t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
|
||||
|
||||
TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
|
||||
else return 2;
|
||||
NEXT(2, 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#undef NONE
|
||||
#undef FILL
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(johab)
|
||||
MAPOPEN(ko_KR)
|
||||
IMPORTMAP_DEC(ksx1001)
|
||||
IMPORTMAP_ENC(cp949)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(johab)
|
|
@ -1,121 +0,0 @@
|
|||
/*
|
||||
* _shift_jis.c: the SHIFT-JIS codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _shift_jis.c,v 1.4 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
#include "alg_jisx0201.h"
|
||||
|
||||
ENCMAP(jisxcommon)
|
||||
DECMAP(jisx0208)
|
||||
|
||||
ENCODER(shift_jis)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
unsigned char c1, c2;
|
||||
|
||||
#ifdef STRICT_BUILD
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
#else
|
||||
if (c < 0x80) code = c;
|
||||
else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */
|
||||
else if (c == 0x203e) code = 0x7e; /* OVERLINE */
|
||||
#endif
|
||||
else JISX0201_K_ENCODE(c, code)
|
||||
else UCS4INVALID(c)
|
||||
else code = NOCHAR;
|
||||
|
||||
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
OUT1((unsigned char)code)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_OUTBUF(2)
|
||||
|
||||
if (code == NOCHAR) {
|
||||
TRYMAP_ENC(jisxcommon, code, c);
|
||||
#ifndef STRICT_BUILD
|
||||
else if (c == 0xff3c)
|
||||
code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
#endif
|
||||
else
|
||||
return 1;
|
||||
|
||||
if (code & 0x8000) /* MSB set: JIS X 0212 */
|
||||
return 1;
|
||||
}
|
||||
|
||||
c1 = code >> 8;
|
||||
c2 = code & 0xff;
|
||||
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
|
||||
c1 = (c1 - 0x21) >> 1;
|
||||
OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
|
||||
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(shift_jis)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
#ifdef STRICT_BUILD
|
||||
JISX0201_R_DECODE(c, **outbuf)
|
||||
#else
|
||||
if (c < 0x80) **outbuf = c;
|
||||
#endif
|
||||
else JISX0201_K_DECODE(c, **outbuf)
|
||||
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)) {
|
||||
unsigned char c1, c2;
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
c2 = IN2;
|
||||
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
||||
return 2;
|
||||
|
||||
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
||||
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
||||
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21);
|
||||
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
|
||||
|
||||
#ifndef STRICT_BUILD
|
||||
if (c1 == 0x21 && c2 == 0x40) {
|
||||
/* FULL-WIDTH REVERSE SOLIDUS */
|
||||
OUT1(0xff3c)
|
||||
NEXT(2, 1)
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
|
||||
NEXT(2, 1)
|
||||
continue;
|
||||
} else
|
||||
return 2;
|
||||
} else
|
||||
return 2;
|
||||
|
||||
NEXT(1, 1) /* JIS X 0201 */
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(shift_jis)
|
||||
MAPOPEN(ja_JP)
|
||||
IMPORTMAP_DEC(jisx0208)
|
||||
IMPORTMAP_ENC(jisxcommon)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(shift_jis)
|
|
@ -1,169 +0,0 @@
|
|||
/*
|
||||
* _shift_jisx0213.c: the SHIFT-JISX0213 codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _shift_jisx0213.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#define USING_BINARY_PAIR_SEARCH
|
||||
#include "codeccommon.h"
|
||||
#include "alg_jisx0201.h"
|
||||
#include "map_jisx0213_pairs.h"
|
||||
|
||||
ENCMAP(jisxcommon)
|
||||
DECMAP(jisx0208)
|
||||
ENCMAP(jisx0213_bmp)
|
||||
DECMAP(jisx0213_1_bmp)
|
||||
DECMAP(jisx0213_2_bmp)
|
||||
ENCMAP(jisx0213_emp)
|
||||
DECMAP(jisx0213_1_emp)
|
||||
DECMAP(jisx0213_2_emp)
|
||||
|
||||
#define EMPBASE 0x20000
|
||||
|
||||
ENCODER(shift_jisx0213)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
ucs4_t c = IN1;
|
||||
DBCHAR code = NOCHAR;
|
||||
int c1, c2;
|
||||
size_t insize;
|
||||
|
||||
JISX0201_ENCODE(c, code)
|
||||
else DECODE_SURROGATE(c)
|
||||
|
||||
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
|
||||
WRITE1((unsigned char)code)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_OUTBUF(2)
|
||||
insize = GET_INSIZE(c);
|
||||
|
||||
if (code == NOCHAR) {
|
||||
if (c <= 0xffff) {
|
||||
TRYMAP_ENC(jisx0213_bmp, code, c) {
|
||||
if (code == MULTIC) {
|
||||
if (inleft < 2) {
|
||||
if (flags & MBENC_FLUSH) {
|
||||
code = find_pairencmap((ucs2_t)c, 0,
|
||||
jisx0213_pairencmap, JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
} else
|
||||
return MBERR_TOOFEW;
|
||||
} else {
|
||||
code = find_pairencmap((ucs2_t)c, IN2,
|
||||
jisx0213_pairencmap, JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV) {
|
||||
code = find_pairencmap((ucs2_t)c, 0,
|
||||
jisx0213_pairencmap, JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
} else
|
||||
insize = 2;
|
||||
}
|
||||
}
|
||||
} else TRYMAP_ENC(jisxcommon, code, c) {
|
||||
if (code & 0x8000)
|
||||
return 1; /* abandon JIS X 0212 codes */
|
||||
} else return 1;
|
||||
} else if (c >> 16 == EMPBASE >> 16) {
|
||||
TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
|
||||
else return insize;
|
||||
} else
|
||||
return insize;
|
||||
}
|
||||
|
||||
c1 = code >> 8;
|
||||
c2 = (code & 0xff) - 0x21;
|
||||
|
||||
if (c1 & 0x80) { /* Plane 2 */
|
||||
if (c1 >= 0xee) c1 -= 0x87;
|
||||
else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49;
|
||||
else c1 -= 0x43;
|
||||
} else /* Plane 1 */
|
||||
c1 -= 0x21;
|
||||
|
||||
if (c1 & 1) c2 += 0x5e;
|
||||
c1 >>= 1;
|
||||
OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1))
|
||||
OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41))
|
||||
|
||||
NEXT(insize, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(shift_jisx0213)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
JISX0201_DECODE(c, **outbuf)
|
||||
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)) {
|
||||
unsigned char c1, c2 = IN2;
|
||||
ucs4_t code;
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
||||
return 2;
|
||||
|
||||
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
||||
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
||||
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1));
|
||||
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
|
||||
|
||||
if (c1 < 0x5e) { /* Plane 1 */
|
||||
c1 += 0x21;
|
||||
TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
|
||||
NEXT_OUT(1)
|
||||
} else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c1, c2) {
|
||||
NEXT_OUT(1)
|
||||
} else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) {
|
||||
PUTUCS4(EMPBASE | code)
|
||||
} else TRYMAP_DEC(jisx0213_pair, code, c1, c2) {
|
||||
WRITE2(code >> 16, code & 0xffff)
|
||||
NEXT_OUT(2)
|
||||
} else
|
||||
return 2;
|
||||
NEXT_IN(2)
|
||||
} else { /* Plane 2 */
|
||||
if (c1 >= 0x67) c1 += 0x07;
|
||||
else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;
|
||||
else c1 -= 0x3d;
|
||||
|
||||
TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c1, c2) {
|
||||
NEXT_OUT(1)
|
||||
} else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) {
|
||||
PUTUCS4(EMPBASE | code)
|
||||
} else
|
||||
return 2;
|
||||
NEXT_IN(2)
|
||||
}
|
||||
continue;
|
||||
} else
|
||||
return 2;
|
||||
|
||||
NEXT(1, 1) /* JIS X 0201 */
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(shift_jisx0213)
|
||||
MAPOPEN(ja_JP)
|
||||
IMPORTMAP_DEC(jisx0208)
|
||||
IMPORTMAP_ENC(jisxcommon)
|
||||
IMPORTMAP_ENC(jisx0213_bmp)
|
||||
IMPORTMAP_DEC(jisx0213_1_bmp)
|
||||
IMPORTMAP_DEC(jisx0213_2_bmp)
|
||||
IMPORTMAP_ENC(jisx0213_emp)
|
||||
IMPORTMAP_DEC(jisx0213_1_emp)
|
||||
IMPORTMAP_DEC(jisx0213_2_emp)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(shift_jisx0213)
|
|
@ -1,12 +0,0 @@
|
|||
/*
|
||||
* alg_iso8859_1.c: Encoder/Decoder macro for ISO8859-1
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: alg_iso8859_1.h,v 1.3 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#define ISO8859_1_ENCODE(c, assi) \
|
||||
if ((c) <= 0xff) (assi) = (c);
|
||||
|
||||
#define ISO8859_1_DECODE(c, assi) \
|
||||
if (1/*(c) <= 0xff*/) (assi) = (c);
|
|
@ -1,32 +0,0 @@
|
|||
/*
|
||||
* alg_iso8859_7.c: Encoder/Decoder macro for ISO8859-7
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: alg_iso8859_7.h,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
/*
|
||||
* 0x2888fbc9 and 0xbffffd77 are magic number that indicates availability
|
||||
* of mapping for each differences. (0 and 0x2d0)
|
||||
*/
|
||||
|
||||
#define ISO8859_7_ENCODE(c, assi) \
|
||||
if ((c) <= 0xa0) (assi) = (c); \
|
||||
else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \
|
||||
(assi) = (c); \
|
||||
else if ((c) >= 0x0384 && (c) <= 0x03ce && ((c) >= 0x03a4 ||\
|
||||
(0xbffffd77L & (1L << ((c)-0x0384))))) \
|
||||
(assi) = (c) - 0x02d0; \
|
||||
else if ((c)>>1 == 0x2018>>1) (assi) = (c) - 0x1f77; \
|
||||
else if ((c) == 0x2015) (assi) = 0xaf;
|
||||
|
||||
#define ISO8859_7_DECODE(c, assi) \
|
||||
if ((c) < 0xa0) (assi) = (c); \
|
||||
else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \
|
||||
(assi) = (c); \
|
||||
else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \
|
||||
(0xbffffd77L & (1L << ((c)-0xb4))))) \
|
||||
(assi) = 0x02d0 + (c); \
|
||||
else if ((c) == 0xa1) (assi) = 0x2018; \
|
||||
else if ((c) == 0xa2) (assi) = 0x2019; \
|
||||
else if ((c) == 0xaf) (assi) = 0x2015;
|
|
@ -1,27 +1,26 @@
|
|||
/* $CJKCodecs: alg_jisx0201.h,v 1.2 2003/11/27 16:42:20 perky Exp $ */
|
||||
/* $CJKCodecs: alg_jisx0201.h,v 1.2 2004/06/29 05:42:08 perky Exp $ */
|
||||
|
||||
#define JISX0201_R_ENCODE(c, assi) \
|
||||
if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) \
|
||||
(assi) = (c); \
|
||||
else if ((c) == 0x00a5) (assi) = 0x5c; \
|
||||
else if ((c) == 0x203e) (assi) = 0x7e;
|
||||
#define JISX0201_K_ENCODE(c, assi) \
|
||||
if ((c) >= 0xff61 && (c) <= 0xff9f) \
|
||||
(assi) = (c) - 0xfec0;
|
||||
#define JISX0201_ENCODE(c, assi) \
|
||||
JISX0201_R_ENCODE(c, assi) \
|
||||
else JISX0201_K_ENCODE(c, assi)
|
||||
|
||||
#define JISX0201_R_DECODE(c, assi) \
|
||||
if ((c) < 0x5c) (assi) = (c); \
|
||||
else if ((c) == 0x5c) (assi) = 0x00a5; \
|
||||
else if ((c) < 0x7e) (assi) = (c); \
|
||||
else if ((c) == 0x7e) (assi) = 0x203e; \
|
||||
else if ((c) == 0x7f) (assi) = 0x7f;
|
||||
#define JISX0201_K_DECODE(c, assi) \
|
||||
if ((c) >= 0xa1 && (c) <= 0xdf) \
|
||||
(assi) = 0xfec0 + (c);
|
||||
#define JISX0201_DECODE(c, assi) \
|
||||
JISX0201_R_DECODE(c, assi) \
|
||||
else JISX0201_K_DECODE(c, assi)
|
||||
#define JISX0201_R_ENCODE(c, assi) \
|
||||
if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) \
|
||||
(assi) = (c); \
|
||||
else if ((c) == 0x00a5) (assi) = 0x5c; \
|
||||
else if ((c) == 0x203e) (assi) = 0x7e;
|
||||
#define JISX0201_K_ENCODE(c, assi) \
|
||||
if ((c) >= 0xff61 && (c) <= 0xff9f) \
|
||||
(assi) = (c) - 0xfec0;
|
||||
#define JISX0201_ENCODE(c, assi) \
|
||||
JISX0201_R_ENCODE(c, assi) \
|
||||
else JISX0201_K_ENCODE(c, assi)
|
||||
|
||||
#define JISX0201_R_DECODE(c, assi) \
|
||||
if ((c) < 0x5c) (assi) = (c); \
|
||||
else if ((c) == 0x5c) (assi) = 0x00a5; \
|
||||
else if ((c) < 0x7e) (assi) = (c); \
|
||||
else if ((c) == 0x7e) (assi) = 0x203e; \
|
||||
else if ((c) == 0x7f) (assi) = 0x7f;
|
||||
#define JISX0201_K_DECODE(c, assi) \
|
||||
if ((c) >= 0xa1 && (c) <= 0xdf) \
|
||||
(assi) = 0xfec0 + (c);
|
||||
#define JISX0201_DECODE(c, assi) \
|
||||
JISX0201_R_DECODE(c, assi) \
|
||||
else JISX0201_K_DECODE(c, assi)
|
||||
|
|
|
@ -0,0 +1,399 @@
|
|||
/*
|
||||
* cjkcodecs.h: common header for cjkcodecs
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: cjkcodecs.h,v 1.5 2004/07/06 17:05:24 perky Exp $
|
||||
*/
|
||||
|
||||
#ifndef _CJKCODECS_H_
|
||||
#define _CJKCODECS_H_
|
||||
|
||||
#include "Python.h"
|
||||
#include "multibytecodec.h"
|
||||
|
||||
|
||||
#define UNIINV Py_UNICODE_REPLACEMENT_CHARACTER
|
||||
#define NOCHAR 0xFFFF
|
||||
#define MULTIC 0xFFFE
|
||||
#define DBCINV 0xFFFD
|
||||
|
||||
/* shorter macros to save source size of mapping tables */
|
||||
#define U UNIINV
|
||||
#define N NOCHAR
|
||||
#define M MULTIC
|
||||
#define D DBCINV
|
||||
|
||||
struct dbcs_index {
|
||||
const ucs2_t *map;
|
||||
unsigned char bottom, top;
|
||||
};
|
||||
typedef struct dbcs_index decode_map;
|
||||
|
||||
struct widedbcs_index {
|
||||
const ucs4_t *map;
|
||||
unsigned char bottom, top;
|
||||
};
|
||||
typedef struct widedbcs_index widedecode_map;
|
||||
|
||||
struct unim_index {
|
||||
const DBCHAR *map;
|
||||
unsigned char bottom, top;
|
||||
};
|
||||
typedef struct unim_index encode_map;
|
||||
|
||||
struct unim_index_bytebased {
|
||||
const unsigned char *map;
|
||||
unsigned char bottom, top;
|
||||
};
|
||||
|
||||
struct dbcs_map {
|
||||
const char *charset;
|
||||
const struct unim_index *encmap;
|
||||
const struct dbcs_index *decmap;
|
||||
};
|
||||
|
||||
struct pair_encodemap {
|
||||
ucs4_t uniseq;
|
||||
DBCHAR code;
|
||||
};
|
||||
|
||||
static const MultibyteCodec codec_list[];
|
||||
static const struct dbcs_map mapping_list[];
|
||||
|
||||
#define CODEC_INIT(encoding) \
|
||||
static int encoding##_codec_init(const void *config)
|
||||
|
||||
#define ENCODER_INIT(encoding) \
|
||||
static int encoding##_encode_init( \
|
||||
MultibyteCodec_State *state, const void *config)
|
||||
#define ENCODER(encoding) \
|
||||
static int encoding##_encode( \
|
||||
MultibyteCodec_State *state, const void *config, \
|
||||
const Py_UNICODE **inbuf, size_t inleft, \
|
||||
unsigned char **outbuf, size_t outleft, int flags)
|
||||
#define ENCODER_RESET(encoding) \
|
||||
static int encoding##_encode_reset( \
|
||||
MultibyteCodec_State *state, const void *config, \
|
||||
unsigned char **outbuf, size_t outleft)
|
||||
|
||||
#define DECODER_INIT(encoding) \
|
||||
static int encoding##_decode_init( \
|
||||
MultibyteCodec_State *state, const void *config)
|
||||
#define DECODER(encoding) \
|
||||
static int encoding##_decode( \
|
||||
MultibyteCodec_State *state, const void *config, \
|
||||
const unsigned char **inbuf, size_t inleft, \
|
||||
Py_UNICODE **outbuf, size_t outleft)
|
||||
#define DECODER_RESET(encoding) \
|
||||
static int encoding##_decode_reset( \
|
||||
MultibyteCodec_State *state, const void *config)
|
||||
|
||||
#if Py_UNICODE_SIZE == 4
|
||||
#define UCS4INVALID(code) \
|
||||
if ((code) > 0xFFFF) \
|
||||
return 1;
|
||||
#else
|
||||
#define UCS4INVALID(code) \
|
||||
if (0) ;
|
||||
#endif
|
||||
|
||||
#define NEXT_IN(i) \
|
||||
(*inbuf) += (i); \
|
||||
(inleft) -= (i);
|
||||
#define NEXT_OUT(o) \
|
||||
(*outbuf) += (o); \
|
||||
(outleft) -= (o);
|
||||
#define NEXT(i, o) \
|
||||
NEXT_IN(i) NEXT_OUT(o)
|
||||
|
||||
#define REQUIRE_INBUF(n) \
|
||||
if (inleft < (n)) \
|
||||
return MBERR_TOOFEW;
|
||||
#define REQUIRE_OUTBUF(n) \
|
||||
if (outleft < (n)) \
|
||||
return MBERR_TOOSMALL;
|
||||
|
||||
#define IN1 ((*inbuf)[0])
|
||||
#define IN2 ((*inbuf)[1])
|
||||
#define IN3 ((*inbuf)[2])
|
||||
#define IN4 ((*inbuf)[3])
|
||||
|
||||
#define OUT1(c) ((*outbuf)[0]) = (c);
|
||||
#define OUT2(c) ((*outbuf)[1]) = (c);
|
||||
#define OUT3(c) ((*outbuf)[2]) = (c);
|
||||
#define OUT4(c) ((*outbuf)[3]) = (c);
|
||||
|
||||
#define WRITE1(c1) \
|
||||
REQUIRE_OUTBUF(1) \
|
||||
(*outbuf)[0] = (c1);
|
||||
#define WRITE2(c1, c2) \
|
||||
REQUIRE_OUTBUF(2) \
|
||||
(*outbuf)[0] = (c1); \
|
||||
(*outbuf)[1] = (c2);
|
||||
#define WRITE3(c1, c2, c3) \
|
||||
REQUIRE_OUTBUF(3) \
|
||||
(*outbuf)[0] = (c1); \
|
||||
(*outbuf)[1] = (c2); \
|
||||
(*outbuf)[2] = (c3);
|
||||
#define WRITE4(c1, c2, c3, c4) \
|
||||
REQUIRE_OUTBUF(4) \
|
||||
(*outbuf)[0] = (c1); \
|
||||
(*outbuf)[1] = (c2); \
|
||||
(*outbuf)[2] = (c3); \
|
||||
(*outbuf)[3] = (c4);
|
||||
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
# define WRITEUCS4(c) \
|
||||
REQUIRE_OUTBUF(2) \
|
||||
(*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \
|
||||
(*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \
|
||||
NEXT_OUT(2)
|
||||
#else
|
||||
# define WRITEUCS4(c) \
|
||||
REQUIRE_OUTBUF(1) \
|
||||
**outbuf = (Py_UNICODE)(c); \
|
||||
NEXT_OUT(1)
|
||||
#endif
|
||||
|
||||
#define _TRYMAP_ENC(m, assi, val) \
|
||||
if ((m)->map != NULL && (val) >= (m)->bottom && \
|
||||
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
|
||||
(m)->bottom]) != NOCHAR)
|
||||
#define TRYMAP_ENC(charset, assi, uni) \
|
||||
_TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff)
|
||||
#define _TRYMAP_DEC(m, assi, val) \
|
||||
if ((m)->map != NULL && (val) >= (m)->bottom && \
|
||||
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
|
||||
(m)->bottom]) != UNIINV)
|
||||
#define TRYMAP_DEC(charset, assi, c1, c2) \
|
||||
_TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
|
||||
|
||||
#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \
|
||||
if ((m)->map != NULL && (val) >= (m)->bottom && \
|
||||
(val)<= (m)->top && \
|
||||
((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \
|
||||
(((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \
|
||||
(((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1))
|
||||
#define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \
|
||||
_TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \
|
||||
assplane, asshi, asslo, (uni) & 0xff)
|
||||
#define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) \
|
||||
_TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2)
|
||||
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
#define DECODE_SURROGATE(c) \
|
||||
if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \
|
||||
REQUIRE_INBUF(2) \
|
||||
if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \
|
||||
c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \
|
||||
((ucs4_t)(IN2) - 0xdc00); \
|
||||
} \
|
||||
}
|
||||
#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1)
|
||||
#else
|
||||
#define DECODE_SURROGATE(c) {;}
|
||||
#define GET_INSIZE(c) 1
|
||||
#endif
|
||||
|
||||
#define BEGIN_MAPPINGS_LIST static const struct dbcs_map mapping_list[] = {
|
||||
#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
|
||||
#define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},
|
||||
#define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap},
|
||||
#define END_MAPPINGS_LIST {"", NULL, NULL} };
|
||||
|
||||
#define BEGIN_CODECS_LIST static const MultibyteCodec codec_list[] = {
|
||||
#define _STATEFUL_METHODS(enc) \
|
||||
enc##_encode, \
|
||||
enc##_encode_init, \
|
||||
enc##_encode_reset, \
|
||||
enc##_decode, \
|
||||
enc##_decode_init, \
|
||||
enc##_decode_reset,
|
||||
#define _STATELESS_METHODS(enc) \
|
||||
enc##_encode, NULL, NULL, \
|
||||
enc##_decode, NULL, NULL,
|
||||
#define CODEC_STATEFUL(enc) { \
|
||||
#enc, NULL, NULL, \
|
||||
_STATEFUL_METHODS(enc) \
|
||||
},
|
||||
#define CODEC_STATELESS(enc) { \
|
||||
#enc, NULL, NULL, \
|
||||
_STATELESS_METHODS(enc) \
|
||||
},
|
||||
#define CODEC_STATELESS_WINIT(enc) { \
|
||||
#enc, NULL, \
|
||||
enc##_codec_init, \
|
||||
_STATELESS_METHODS(enc) \
|
||||
},
|
||||
#define END_CODECS_LIST {"", NULL,} };
|
||||
|
||||
static PyObject *
|
||||
getmultibytecodec(void)
|
||||
{
|
||||
static PyObject *cofunc = NULL;
|
||||
|
||||
if (cofunc == NULL) {
|
||||
PyObject *mod = PyImport_ImportModule("_multibytecodec");
|
||||
if (mod == NULL)
|
||||
return NULL;
|
||||
cofunc = PyObject_GetAttrString(mod, "__create_codec");
|
||||
Py_DECREF(mod);
|
||||
}
|
||||
return cofunc;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
getcodec(PyObject *self, PyObject *encoding)
|
||||
{
|
||||
PyObject *codecobj, *r, *cofunc;
|
||||
const MultibyteCodec *codec;
|
||||
const char *enc;
|
||||
#ifdef NO_METH_O
|
||||
PyObject *args = encoding;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "O:getcodec", &encoding))
|
||||
return NULL;
|
||||
#endif
|
||||
|
||||
if (!PyString_Check(encoding)) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"encoding name must be a string.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cofunc = getmultibytecodec();
|
||||
if (cofunc == NULL)
|
||||
return NULL;
|
||||
|
||||
enc = PyString_AS_STRING(encoding);
|
||||
for (codec = codec_list; codec->encoding[0]; codec++)
|
||||
if (strcmp(codec->encoding, enc) == 0)
|
||||
break;
|
||||
|
||||
if (codec->encoding[0] == '\0') {
|
||||
PyErr_SetString(PyExc_LookupError,
|
||||
"no such codec is supported.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
codecobj = PyCObject_FromVoidPtr((void *)codec, NULL);
|
||||
if (codecobj == NULL)
|
||||
return NULL;
|
||||
|
||||
#if PY_VERSION_HEX >= 0x02020000
|
||||
r = PyObject_CallFunctionObjArgs(cofunc, codecobj, NULL);
|
||||
#else
|
||||
r = PyObject_CallFunction(cofunc, "O", codecobj);
|
||||
#endif
|
||||
Py_DECREF(codecobj);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static struct PyMethodDef __methods[] = {
|
||||
#ifndef NO_METH_O
|
||||
{"getcodec", (PyCFunction)getcodec, METH_O, ""},
|
||||
#else
|
||||
{"getcodec", (PyCFunction)getcodec, METH_VARARGS, ""},
|
||||
#endif
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
static int
|
||||
register_maps(PyObject *module)
|
||||
{
|
||||
const struct dbcs_map *h;
|
||||
|
||||
for (h = mapping_list; h->charset[0] != '\0'; h++) {
|
||||
char mhname[256] = "__map_";
|
||||
int r;
|
||||
strcpy(mhname + sizeof("__map_") - 1, h->charset);
|
||||
r = PyModule_AddObject(module, mhname,
|
||||
PyCObject_FromVoidPtr((void *)h, NULL));
|
||||
if (r == -1)
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef USING_BINARY_PAIR_SEARCH
|
||||
static DBCHAR
|
||||
find_pairencmap(ucs2_t body, ucs2_t modifier,
|
||||
const struct pair_encodemap *haystack, int haystacksize)
|
||||
{
|
||||
int pos, min, max;
|
||||
ucs4_t value = body << 16 | modifier;
|
||||
|
||||
min = 0;
|
||||
max = haystacksize;
|
||||
|
||||
for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1)
|
||||
if (value < haystack[pos].uniseq) {
|
||||
if (max == pos) break;
|
||||
else max = pos;
|
||||
}
|
||||
else if (value > haystack[pos].uniseq) {
|
||||
if (min == pos) break;
|
||||
else min = pos;
|
||||
}
|
||||
else
|
||||
break;
|
||||
|
||||
if (value == haystack[pos].uniseq)
|
||||
return haystack[pos].code;
|
||||
else
|
||||
return DBCINV;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USING_IMPORTED_MAPS
|
||||
#define IMPORT_MAP(locale, charset, encmap, decmap) \
|
||||
importmap("_codecs_" #locale, "__map_" #charset, \
|
||||
(const void**)encmap, (const void**)decmap)
|
||||
|
||||
static int
|
||||
importmap(const char *modname, const char *symbol,
|
||||
const void **encmap, const void **decmap)
|
||||
{
|
||||
PyObject *o, *mod;
|
||||
|
||||
mod = PyImport_ImportModule((char *)modname);
|
||||
if (mod == NULL)
|
||||
return -1;
|
||||
|
||||
o = PyObject_GetAttrString(mod, (char*)symbol);
|
||||
if (o == NULL)
|
||||
goto errorexit;
|
||||
else if (!PyCObject_Check(o)) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"map data must be a CObject.");
|
||||
goto errorexit;
|
||||
}
|
||||
else {
|
||||
struct dbcs_map *map;
|
||||
map = PyCObject_AsVoidPtr(o);
|
||||
if (encmap != NULL)
|
||||
*encmap = map->encmap;
|
||||
if (decmap != NULL)
|
||||
*decmap = map->decmap;
|
||||
Py_DECREF(o);
|
||||
}
|
||||
|
||||
Py_DECREF(mod);
|
||||
return 0;
|
||||
|
||||
errorexit:
|
||||
Py_DECREF(mod);
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define I_AM_A_MODULE_FOR(loc) \
|
||||
void \
|
||||
init_codecs_##loc(void) \
|
||||
{ \
|
||||
PyObject *m = Py_InitModule("_codecs_" #loc, __methods);\
|
||||
(void)register_maps(m); \
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,57 +0,0 @@
|
|||
/*
|
||||
* cjkcommon.h: Common Constants and Macroes for CJK Character Sets
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: cjkcommon.h,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#ifndef _CJKCOMMON_H_
|
||||
#define _CJKCOMMON_H_
|
||||
|
||||
#ifdef uint32_t
|
||||
typedef uint32_t ucs4_t;
|
||||
#else
|
||||
typedef unsigned int ucs4_t;
|
||||
#endif
|
||||
|
||||
#ifdef uint16_t
|
||||
typedef uint16_t ucs2_t, DBCHAR;
|
||||
#else
|
||||
typedef unsigned short ucs2_t, DBCHAR;
|
||||
#endif
|
||||
|
||||
#define UNIINV Py_UNICODE_REPLACEMENT_CHARACTER
|
||||
#define NOCHAR 0xFFFF
|
||||
#define MULTIC 0xFFFE
|
||||
#define DBCINV 0xFFFD
|
||||
|
||||
struct dbcs_index {
|
||||
const ucs2_t *map;
|
||||
unsigned char bottom, top;
|
||||
};
|
||||
typedef struct dbcs_index decode_map;
|
||||
|
||||
struct widedbcs_index {
|
||||
const ucs4_t *map;
|
||||
unsigned char bottom, top;
|
||||
};
|
||||
typedef struct widedbcs_index widedecode_map;
|
||||
|
||||
struct unim_index {
|
||||
const DBCHAR *map;
|
||||
unsigned char bottom, top;
|
||||
};
|
||||
typedef struct unim_index encode_map;
|
||||
|
||||
struct dbcs_map {
|
||||
const char *charset;
|
||||
const struct unim_index *encmap;
|
||||
const struct dbcs_index *decmap;
|
||||
};
|
||||
|
||||
struct pair_encodemap {
|
||||
ucs4_t uniseq;
|
||||
DBCHAR code;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -1,144 +0,0 @@
|
|||
/*
|
||||
* codeccommon.h: Common Codec Routines
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: codeccommon.h,v 1.4 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "Python.h"
|
||||
#include "multibytecodec.h"
|
||||
|
||||
|
||||
#ifdef STRICT_BUILD
|
||||
#define STRICT_SUFX "_strict"
|
||||
#else
|
||||
#define STRICT_SUFX
|
||||
#endif
|
||||
|
||||
#define ENCMAP(encoding) \
|
||||
static const encode_map *encoding##encmap;
|
||||
#define DECMAP(encoding) \
|
||||
static const decode_map *encoding##decmap;
|
||||
|
||||
#define ENCODER_INIT(encoding) \
|
||||
static int encoding##_encode_init( \
|
||||
MultibyteCodec_State *state)
|
||||
#define ENCODER(encoding) \
|
||||
static int encoding##_encode( \
|
||||
MultibyteCodec_State *state, \
|
||||
const Py_UNICODE **inbuf, size_t inleft, \
|
||||
unsigned char **outbuf, size_t outleft, int flags)
|
||||
#define ENCODER_RESET(encoding) \
|
||||
static int encoding##_encode_reset( \
|
||||
MultibyteCodec_State *state, \
|
||||
unsigned char **outbuf, size_t outleft)
|
||||
|
||||
#define DECODER_INIT(encoding) \
|
||||
static int encoding##_decode_init( \
|
||||
MultibyteCodec_State *state)
|
||||
#define DECODER(encoding) \
|
||||
static int encoding##_decode( \
|
||||
MultibyteCodec_State *state, \
|
||||
const unsigned char **inbuf, size_t inleft, \
|
||||
Py_UNICODE **outbuf, size_t outleft)
|
||||
#define DECODER_RESET(encoding) \
|
||||
static int encoding##_decode_reset( \
|
||||
MultibyteCodec_State *state)
|
||||
|
||||
#if Py_UNICODE_SIZE == 4
|
||||
#define UCS4INVALID(code) \
|
||||
if ((code) > 0xFFFF) \
|
||||
return 1;
|
||||
#else
|
||||
#define UCS4INVALID(code) \
|
||||
if (0) ;
|
||||
#endif
|
||||
|
||||
#define NEXT_IN(i) \
|
||||
(*inbuf) += (i); \
|
||||
(inleft) -= (i);
|
||||
#define NEXT_OUT(o) \
|
||||
(*outbuf) += (o); \
|
||||
(outleft) -= (o);
|
||||
#define NEXT(i, o) NEXT_IN(i) NEXT_OUT(o)
|
||||
|
||||
#define RESERVE_INBUF(n) \
|
||||
if (inleft < (n)) \
|
||||
return MBERR_TOOFEW;
|
||||
#define RESERVE_OUTBUF(n) \
|
||||
if (outleft < (n)) \
|
||||
return MBERR_TOOSMALL;
|
||||
|
||||
#define IN1 ((*inbuf)[0])
|
||||
#define IN2 ((*inbuf)[1])
|
||||
#define IN3 ((*inbuf)[2])
|
||||
#define IN4 ((*inbuf)[3])
|
||||
|
||||
#define OUT1(c) ((*outbuf)[0]) = (c);
|
||||
#define OUT2(c) ((*outbuf)[1]) = (c);
|
||||
#define OUT3(c) ((*outbuf)[2]) = (c);
|
||||
#define OUT4(c) ((*outbuf)[3]) = (c);
|
||||
|
||||
#define WRITE1(c1) \
|
||||
RESERVE_OUTBUF(1) \
|
||||
(*outbuf)[0] = (c1);
|
||||
#define WRITE2(c1, c2) \
|
||||
RESERVE_OUTBUF(2) \
|
||||
(*outbuf)[0] = (c1); \
|
||||
(*outbuf)[1] = (c2);
|
||||
#define WRITE3(c1, c2, c3) \
|
||||
RESERVE_OUTBUF(3) \
|
||||
(*outbuf)[0] = (c1); \
|
||||
(*outbuf)[1] = (c2); \
|
||||
(*outbuf)[2] = (c3);
|
||||
#define WRITE4(c1, c2, c3, c4) \
|
||||
RESERVE_OUTBUF(4) \
|
||||
(*outbuf)[0] = (c1); \
|
||||
(*outbuf)[1] = (c2); \
|
||||
(*outbuf)[2] = (c3); \
|
||||
(*outbuf)[3] = (c4);
|
||||
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
# define PUTUCS4(c) \
|
||||
RESERVE_OUTBUF(2) \
|
||||
(*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \
|
||||
(*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \
|
||||
NEXT_OUT(2)
|
||||
#else
|
||||
# define PUTUCS4(c) \
|
||||
RESERVE_OUTBUF(1) \
|
||||
**outbuf = (Py_UNICODE)(c); \
|
||||
NEXT_OUT(1)
|
||||
#endif
|
||||
|
||||
#define _TRYMAP_ENC(m, assi, val) \
|
||||
if ((m)->map != NULL && (val) >= (m)->bottom && \
|
||||
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
|
||||
(m)->bottom]) != NOCHAR)
|
||||
#define TRYMAP_ENC(charset, assi, uni) \
|
||||
_TRYMAP_ENC(&charset##encmap[(uni) >> 8], assi, (uni) & 0xff)
|
||||
#define _TRYMAP_DEC(m, assi, val) \
|
||||
if ((m)->map != NULL && (val) >= (m)->bottom && \
|
||||
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
|
||||
(m)->bottom]) != UNIINV)
|
||||
#define TRYMAP_DEC(charset, assi, c1, c2) \
|
||||
_TRYMAP_DEC(&charset##decmap[c1], assi, c2)
|
||||
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
#define DECODE_SURROGATE(c) \
|
||||
if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \
|
||||
RESERVE_INBUF(2) \
|
||||
if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \
|
||||
c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \
|
||||
((ucs4_t)(IN2) - 0xdc00); \
|
||||
} \
|
||||
}
|
||||
#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1)
|
||||
#else
|
||||
#define DECODE_SURROGATE(c) {;}
|
||||
#define GET_INSIZE(c) 1
|
||||
#endif
|
||||
|
||||
#ifdef USING_BINARY_PAIR_SEARCH
|
||||
static DBCHAR find_pairencmap(ucs2_t , ucs2_t, struct pair_encodemap *, int);
|
||||
#endif
|
|
@ -1,177 +0,0 @@
|
|||
/*
|
||||
* codecentry.h: Common Codec Entry Routines
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: codecentry.h,v 1.5 2004/01/17 11:26:10 perky Exp $
|
||||
*/
|
||||
|
||||
#ifdef HAVE_ENCODER_INIT
|
||||
#define ENCODER_INIT_FUNC(encoding) encoding##_encode_init
|
||||
#else
|
||||
#define ENCODER_INIT_FUNC(encoding) NULL
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_ENCODER_RESET
|
||||
#define ENCODER_RESET_FUNC(encoding) encoding##_encode_reset
|
||||
#else
|
||||
#define ENCODER_RESET_FUNC(encoding) NULL
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_DECODER_INIT
|
||||
#define DECODER_INIT_FUNC(encoding) encoding##_decode_init
|
||||
#else
|
||||
#define DECODER_INIT_FUNC(encoding) NULL
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_DECODER_RESET
|
||||
#define DECODER_RESET_FUNC(encoding) encoding##_decode_reset
|
||||
#else
|
||||
#define DECODER_RESET_FUNC(encoding) NULL
|
||||
#endif
|
||||
|
||||
#ifdef STRICT_BUILD
|
||||
#define BEGIN_CODEC_REGISTRY(encoding) \
|
||||
__BEGIN_CODEC_REGISTRY(encoding, init_codecs_##encoding##_strict)
|
||||
#else
|
||||
#define BEGIN_CODEC_REGISTRY(encoding) \
|
||||
__BEGIN_CODEC_REGISTRY(encoding, init_codecs_##encoding)
|
||||
#endif
|
||||
|
||||
#define __BEGIN_CODEC_REGISTRY(encoding, initname) \
|
||||
static MultibyteCodec __codec = { \
|
||||
#encoding STRICT_SUFX, \
|
||||
encoding##_encode, \
|
||||
ENCODER_INIT_FUNC(encoding), \
|
||||
ENCODER_RESET_FUNC(encoding), \
|
||||
encoding##_decode, \
|
||||
DECODER_INIT_FUNC(encoding), \
|
||||
DECODER_RESET_FUNC(encoding), \
|
||||
}; \
|
||||
\
|
||||
static struct PyMethodDef __methods[] = { \
|
||||
{NULL, NULL}, \
|
||||
}; \
|
||||
\
|
||||
void \
|
||||
initname(void) \
|
||||
{ \
|
||||
PyObject *codec; \
|
||||
PyObject *m = NULL, *mod = NULL, *o = NULL; \
|
||||
\
|
||||
m = Py_InitModule("_codecs_" #encoding STRICT_SUFX, __methods);
|
||||
|
||||
#define MAPOPEN(locale) \
|
||||
mod = PyImport_ImportModule("_codecs_mapdata_" #locale);\
|
||||
if (mod == NULL) goto errorexit; \
|
||||
if (
|
||||
#define IMPORTMAP_ENCDEC(charset) \
|
||||
importmap(mod, "__map_" #charset, &charset##encmap, \
|
||||
&charset##decmap) ||
|
||||
#define IMPORTMAP_ENC(charset) \
|
||||
importmap(mod, "__map_" #charset, &charset##encmap, \
|
||||
NULL) ||
|
||||
#define IMPORTMAP_DEC(charset) \
|
||||
importmap(mod, "__map_" #charset, NULL, \
|
||||
&charset##decmap) ||
|
||||
#define MAPCLOSE() \
|
||||
0) goto errorexit; \
|
||||
Py_DECREF(mod);
|
||||
|
||||
#define END_CODEC_REGISTRY(encoding) \
|
||||
mod = PyImport_ImportModule("_multibytecodec"); \
|
||||
if (mod == NULL) goto errorexit; \
|
||||
o = PyObject_GetAttrString(mod, "__create_codec"); \
|
||||
if (o == NULL || !PyCallable_Check(o)) \
|
||||
goto errorexit; \
|
||||
\
|
||||
codec = createcodec(o, &__codec); \
|
||||
if (codec == NULL) \
|
||||
goto errorexit; \
|
||||
PyModule_AddObject(m, "codec", codec); \
|
||||
Py_DECREF(o); Py_DECREF(mod); \
|
||||
\
|
||||
if (PyErr_Occurred()) \
|
||||
Py_FatalError("can't initialize the _" #encoding \
|
||||
STRICT_SUFX " module"); \
|
||||
\
|
||||
return; \
|
||||
\
|
||||
errorexit: \
|
||||
Py_XDECREF(m); \
|
||||
Py_XDECREF(mod); \
|
||||
Py_XDECREF(o); \
|
||||
}
|
||||
|
||||
#define CODEC_REGISTRY(encoding) \
|
||||
BEGIN_CODEC_REGISTRY(encoding) \
|
||||
END_CODEC_REGISTRY(encoding)
|
||||
|
||||
#ifdef USING_BINARY_PAIR_SEARCH
|
||||
static DBCHAR
|
||||
find_pairencmap(ucs2_t body, ucs2_t modifier,
|
||||
struct pair_encodemap *haystack, int haystacksize)
|
||||
{
|
||||
int pos, min, max;
|
||||
ucs4_t value = body << 16 | modifier;
|
||||
|
||||
min = 0;
|
||||
max = haystacksize;
|
||||
|
||||
for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1)
|
||||
if (value < haystack[pos].uniseq) {
|
||||
if (max == pos) break;
|
||||
else max = pos;
|
||||
} else if (value > haystack[pos].uniseq) {
|
||||
if (min == pos) break;
|
||||
else min = pos;
|
||||
} else
|
||||
break;
|
||||
|
||||
if (value == haystack[pos].uniseq)
|
||||
return haystack[pos].code;
|
||||
else
|
||||
return DBCINV;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef CODEC_WITHOUT_MAPS
|
||||
static int
|
||||
importmap(PyObject *mod, const char *symbol,
|
||||
const struct unim_index **encmap, const struct dbcs_index **decmap)
|
||||
{
|
||||
PyObject *o;
|
||||
|
||||
o = PyObject_GetAttrString(mod, (char*)symbol);
|
||||
if (o == NULL)
|
||||
return -1;
|
||||
else if (!PyCObject_Check(o)) {
|
||||
PyErr_SetString(PyExc_ValueError, "map data must be a CObject.");
|
||||
return -1;
|
||||
} else {
|
||||
struct dbcs_map *map;
|
||||
map = PyCObject_AsVoidPtr(o);
|
||||
if (encmap != NULL)
|
||||
*encmap = map->encmap;
|
||||
if (decmap != NULL)
|
||||
*decmap = map->decmap;
|
||||
Py_DECREF(o);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static PyObject *
|
||||
createcodec(PyObject *cofunc, MultibyteCodec *codec)
|
||||
{
|
||||
PyObject *args, *r;
|
||||
|
||||
args = PyTuple_New(1);
|
||||
if (args == NULL) return NULL;
|
||||
PyTuple_SET_ITEM(args, 0, PyCObject_FromVoidPtr(codec, NULL));
|
||||
|
||||
r = PyObject_CallObject(cofunc, args);
|
||||
Py_DECREF(args);
|
||||
|
||||
return r;
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/* $CJKCodecs: emu_jisx0213_2000.h,v 1.3 2004/07/08 02:53:37 perky Exp $ */
|
||||
|
||||
/* These routines may be quite inefficient, but it's used only to emulate old
|
||||
* standards. */
|
||||
|
||||
#ifndef EMULATE_JISX0213_2000_ENCODE_INVALID
|
||||
#define EMULATE_JISX0213_2000_ENCODE_INVALID 1
|
||||
#endif
|
||||
|
||||
#define EMULATE_JISX0213_2000_ENCODE_BMP(assi, c) \
|
||||
if (config == (void *)2000 && ( \
|
||||
(c) == 0x9B1C || (c) == 0x4FF1 || \
|
||||
(c) == 0x525D || (c) == 0x541E || \
|
||||
(c) == 0x5653 || (c) == 0x59F8 || \
|
||||
(c) == 0x5C5B || (c) == 0x5E77 || \
|
||||
(c) == 0x7626 || (c) == 0x7E6B)) \
|
||||
return EMULATE_JISX0213_2000_ENCODE_INVALID; \
|
||||
else if (config == (void *)2000 && (c) == 0x9B1D) \
|
||||
(assi) = 0x8000 | 0x7d3b; \
|
||||
|
||||
#define EMULATE_JISX0213_2000_ENCODE_EMP(assi, c) \
|
||||
if (config == (void *)2000 && (c) == 0x20B9F) \
|
||||
return EMULATE_JISX0213_2000_ENCODE_INVALID;
|
||||
|
||||
#ifndef EMULATE_JISX0213_2000_DECODE_INVALID
|
||||
#define EMULATE_JISX0213_2000_DECODE_INVALID 2
|
||||
#endif
|
||||
|
||||
#define EMULATE_JISX0213_2000_DECODE_PLANE1(assi, c1, c2) \
|
||||
if (config == (void *)2000 && \
|
||||
(((c1) == 0x2E && (c2) == 0x21) || \
|
||||
((c1) == 0x2F && (c2) == 0x7E) || \
|
||||
((c1) == 0x4F && (c2) == 0x54) || \
|
||||
((c1) == 0x4F && (c2) == 0x7E) || \
|
||||
((c1) == 0x74 && (c2) == 0x27) || \
|
||||
((c1) == 0x7E && (c2) == 0x7A) || \
|
||||
((c1) == 0x7E && (c2) == 0x7B) || \
|
||||
((c1) == 0x7E && (c2) == 0x7C) || \
|
||||
((c1) == 0x7E && (c2) == 0x7D) || \
|
||||
((c1) == 0x7E && (c2) == 0x7E))) \
|
||||
return EMULATE_JISX0213_2000_DECODE_INVALID;
|
||||
|
||||
#define EMULATE_JISX0213_2000_DECODE_PLANE2(assi, c1, c2) \
|
||||
if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) \
|
||||
(assi) = 0x9B1D;
|
|
@ -1,256 +0,0 @@
|
|||
/*
|
||||
* iso2022common.h: Common Codec Routines for ISO-2022 codecs.
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: iso2022common.h,v 1.8 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
/* This ISO-2022 implementation is intended to comply ECMA-43 Level 1
|
||||
* rather than RFCs itself */
|
||||
|
||||
#define ESC 0x1b
|
||||
#define SO 0x0e
|
||||
#define SI 0x0f
|
||||
|
||||
#define MAX_ESCSEQLEN 16
|
||||
|
||||
#define IS_ESCEND(c) (((c) >= 'A' && (c) <= 'Z') || (c) == '@')
|
||||
#define IS_ISO2022ESC(c2) ((c2) == '(' || (c2) == ')' || (c2) == '$' || \
|
||||
(c2) == '.' || (c2) == '&')
|
||||
/* this is not a full list of ISO-2022 escape sequence headers.
|
||||
* but, it's enough to implement CJK instances of iso-2022. */
|
||||
|
||||
/* STATE
|
||||
|
||||
state->c[0-3]
|
||||
|
||||
00000000
|
||||
||^^^^^|
|
||||
|+-----+---- G0-3 Character Set
|
||||
+----------- Is G0-3 double byte?
|
||||
|
||||
state->c[4]
|
||||
|
||||
00000000
|
||||
||
|
||||
|+---- Locked-Shift?
|
||||
+----- ESC Throughout
|
||||
*/
|
||||
|
||||
#define CHARSET_DOUBLEBYTE 0x80
|
||||
|
||||
#define CHARSET_ASCII 'B'
|
||||
|
||||
#define CHARSET_ISO8859_1 'A'
|
||||
#define CHARSET_ISO8859_7 'F'
|
||||
|
||||
#define CHARSET_KSX1001 ('C'|CHARSET_DOUBLEBYTE)
|
||||
|
||||
#define CHARSET_JISX0201_R 'J'
|
||||
#define CHARSET_JISX0201_K 'I'
|
||||
#define CHARSET_JISX0208 ('B'|CHARSET_DOUBLEBYTE)
|
||||
#define CHARSET_JISX0208_O ('@'|CHARSET_DOUBLEBYTE)
|
||||
#define CHARSET_JISX0212 ('D'|CHARSET_DOUBLEBYTE)
|
||||
#define CHARSET_JISX0213_1 ('O'|CHARSET_DOUBLEBYTE)
|
||||
#define CHARSET_JISX0213_2 ('P'|CHARSET_DOUBLEBYTE)
|
||||
|
||||
#define CHARSET_GB2312 ('A'|CHARSET_DOUBLEBYTE)
|
||||
#define CHARSET_GB2312_8565 ('E'|CHARSET_DOUBLEBYTE)
|
||||
|
||||
#define CHARSET_DESIGN(c) ((c) & 0x7f)
|
||||
#define CHARSET_ISDBCS(c) ((c) & 0x80)
|
||||
|
||||
#define F_SHIFTED 0x01
|
||||
#define F_ESCTHROUGHOUT 0x02
|
||||
|
||||
#define STATE_SETG(dn, s, v) ((s)->c[dn]) = (v);
|
||||
#define STATE_GETG(dn, s) ((s)->c[dn])
|
||||
|
||||
#define STATE_SETG0(s, v) STATE_SETG(0, s, v)
|
||||
#define STATE_GETG0(s) STATE_GETG(0, s)
|
||||
#define STATE_SETG1(s, v) STATE_SETG(1, s, v)
|
||||
#define STATE_GETG1(s) STATE_GETG(1, s)
|
||||
#define STATE_SETG2(s, v) STATE_SETG(2, s, v)
|
||||
#define STATE_GETG2(s) STATE_GETG(2, s)
|
||||
#define STATE_SETG3(s, v) STATE_SETG(3, s, v)
|
||||
#define STATE_GETG3(s) STATE_GETG(3, s)
|
||||
|
||||
#define STATE_SETFLAG(s, f) ((s)->c[4]) |= (f);
|
||||
#define STATE_GETFLAG(s, f) ((s)->c[4] & (f))
|
||||
#define STATE_CLEARFLAG(s, f) ((s)->c[4]) &= ~(f);
|
||||
#define STATE_CLEARFLAGS(s) ((s)->c[4]) = 0;
|
||||
|
||||
#define ISO2022_GETCHARSET(charset, c1) \
|
||||
if ((c) >= 0x80) \
|
||||
return 1; \
|
||||
if (STATE_GETFLAG(state, F_SHIFTED)) /* G1 */ \
|
||||
(charset) = STATE_GETG1(state); \
|
||||
else /* G1 */ \
|
||||
(charset) = STATE_GETG0(state); \
|
||||
|
||||
#ifdef ISO2022_USE_G2_DESIGNATION
|
||||
/* hardcoded for iso-2022-jp-2 for now. we'll need to generalize it
|
||||
when we have more G2 designating encodings */
|
||||
#define SS2_ROUTINE \
|
||||
if (IN2 == 'N') { /* SS2 */ \
|
||||
RESERVE_INBUF(3) \
|
||||
if (STATE_GETG2(state) == CHARSET_ISO8859_1) { \
|
||||
ISO8859_1_DECODE(IN3 ^ 0x80, **outbuf) \
|
||||
else return 3; \
|
||||
} else if (STATE_GETG2(state) == CHARSET_ISO8859_7) { \
|
||||
ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf) \
|
||||
else return 3; \
|
||||
} else if (STATE_GETG2(state) == CHARSET_ASCII) { \
|
||||
if (IN3 & 0x80) return 3; \
|
||||
else **outbuf = IN3; \
|
||||
} else \
|
||||
return MBERR_INTERNAL; \
|
||||
NEXT(3, 1) \
|
||||
} else
|
||||
#else
|
||||
#define SS2_ROUTINE
|
||||
#endif
|
||||
|
||||
#ifndef ISO2022_NO_SHIFT
|
||||
#define SHIFT_CASES \
|
||||
case SI: \
|
||||
STATE_CLEARFLAG(state, F_SHIFTED) \
|
||||
NEXT_IN(1) \
|
||||
break; \
|
||||
case SO: \
|
||||
STATE_SETFLAG(state, F_SHIFTED) \
|
||||
NEXT_IN(1) \
|
||||
break;
|
||||
#else
|
||||
/* for compatibility with JapaneseCodecs */
|
||||
#define SHIFT_CASES
|
||||
#endif
|
||||
|
||||
#define ISO2022_BASECASES(c1) \
|
||||
case ESC: \
|
||||
RESERVE_INBUF(2) \
|
||||
if (IS_ISO2022ESC(IN2)) { \
|
||||
int err; \
|
||||
err = iso2022processesc(state, inbuf, &inleft); \
|
||||
if (err != 0) \
|
||||
return err; \
|
||||
} else SS2_ROUTINE { \
|
||||
STATE_SETFLAG(state, F_ESCTHROUGHOUT) \
|
||||
OUT1(ESC) \
|
||||
NEXT(1, 1) \
|
||||
} \
|
||||
break; \
|
||||
SHIFT_CASES \
|
||||
case '\n': \
|
||||
STATE_CLEARFLAG(state, F_SHIFTED) \
|
||||
WRITE1('\n') \
|
||||
NEXT(1, 1) \
|
||||
break;
|
||||
|
||||
#define ISO2022_ESCTHROUGHOUT(c) \
|
||||
if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { \
|
||||
/* ESC throughout mode: for non-iso2022 escape sequences */ \
|
||||
RESERVE_OUTBUF(1) \
|
||||
OUT1(c) /* assume as ISO-8859-1 */ \
|
||||
NEXT(1, 1) \
|
||||
if (IS_ESCEND(c)) { \
|
||||
STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) \
|
||||
} \
|
||||
continue; \
|
||||
}
|
||||
|
||||
#define ISO2022_LOOP_BEGIN \
|
||||
while (inleft > 0) { \
|
||||
unsigned char c = IN1; \
|
||||
ISO2022_ESCTHROUGHOUT(c) \
|
||||
switch(c) { \
|
||||
ISO2022_BASECASES(c) \
|
||||
default: \
|
||||
if (c < 0x20) { /* C0 */ \
|
||||
RESERVE_OUTBUF(1) \
|
||||
OUT1(c) \
|
||||
NEXT(1, 1) \
|
||||
} else if (c >= 0x80) \
|
||||
return 1; \
|
||||
else {
|
||||
#define ISO2022_LOOP_END \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
static int
|
||||
iso2022processesc(MultibyteCodec_State *state,
|
||||
const unsigned char **inbuf, size_t *inleft)
|
||||
{
|
||||
unsigned char charset, designation;
|
||||
size_t i, esclen;
|
||||
|
||||
for (i = 1;i < MAX_ESCSEQLEN;i++) {
|
||||
if (i >= *inleft)
|
||||
return MBERR_TOOFEW;
|
||||
if (IS_ESCEND((*inbuf)[i])) {
|
||||
esclen = i + 1;
|
||||
break;
|
||||
}
|
||||
#ifdef ISO2022_USE_JISX0208EXT
|
||||
else if (i+1 < *inleft && (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@')
|
||||
i += 2;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (i >= MAX_ESCSEQLEN)
|
||||
return 1; /* unterminated escape sequence */
|
||||
|
||||
switch (esclen) {
|
||||
case 3:
|
||||
if (IN2 == '$') {
|
||||
charset = IN3 | CHARSET_DOUBLEBYTE;
|
||||
designation = 0;
|
||||
} else {
|
||||
charset = IN3;
|
||||
if (IN2 == '(') designation = 0;
|
||||
else if (IN2 == ')') designation = 1;
|
||||
#ifdef ISO2022_USE_G2_DESIGNATION
|
||||
else if (IN2 == '.') designation = 2;
|
||||
#endif
|
||||
else return 3;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
if (IN2 != '$')
|
||||
return 4;
|
||||
|
||||
charset = IN4 | CHARSET_DOUBLEBYTE;
|
||||
if (IN3 == '(') designation = 0;
|
||||
else if (IN3 == ')') designation = 1;
|
||||
else return 4;
|
||||
break;
|
||||
#ifdef ISO2022_USE_JISX0208EXT
|
||||
case 6: /* designation with prefix */
|
||||
if ((*inbuf)[3] == ESC && (*inbuf)[4] == '$' && (*inbuf)[5] == 'B') {
|
||||
charset = 'B' | CHARSET_DOUBLEBYTE;
|
||||
designation = 0;
|
||||
} else
|
||||
return 6;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
return esclen;
|
||||
}
|
||||
|
||||
{ /* raise error when the charset is not designated for this encoding */
|
||||
const unsigned char dsgs[] = {ISO2022_DESIGNATIONS, '\x00'};
|
||||
|
||||
for (i = 0; dsgs[i] != '\x00'; i++)
|
||||
if (dsgs[i] == charset)
|
||||
break;
|
||||
|
||||
if (dsgs[i] == '\x00')
|
||||
return esclen;
|
||||
}
|
||||
|
||||
STATE_SETG(designation, state, charset)
|
||||
*inleft -= esclen;
|
||||
(*inbuf) += esclen;
|
||||
return 0;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,630 +0,0 @@
|
|||
/*
|
||||
* $CJKCodecs: map_cp950ext.h,v 1.1.1.1 2003/09/24 17:45:47 perky Exp $
|
||||
*/
|
||||
|
||||
static const ucs2_t __cp950ext_decmap[224] = {
|
||||
0x2027, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, 0xfe51, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, 0x00af, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, 0xff5e, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, 0x2295, 0x2299, 0x2215,
|
||||
0xfe68, UNIINV, 0xffe5, UNIINV, 0xffe0, 0xffe1, 0x20ac, 0x7881,
|
||||
0x92b9, 0x88cf, 0x58bb, 0x6052, 0x7ca7, 0x5afa, 0x2554, 0x2566,
|
||||
0x2557, 0x2560, 0x256c, 0x2563, 0x255a, 0x2569, 0x255d, 0x2552,
|
||||
0x2564, 0x2555, 0x255e, 0x256a, 0x2561, 0x2558, 0x2567, 0x255b,
|
||||
0x2553, 0x2565, 0x2556, 0x255f, 0x256b, 0x2562, 0x2559, 0x2568,
|
||||
0x255c, 0x2551, 0x2550, 0x256d, 0x256e, 0x2570, 0x256f, 0x2593,
|
||||
};
|
||||
|
||||
static const struct dbcs_index cp950ext_decmap[256] = {
|
||||
/* 0x00 */ {0, 0, 0},
|
||||
/* 0x01 */ {0, 0, 0},
|
||||
/* 0x02 */ {0, 0, 0},
|
||||
/* 0x03 */ {0, 0, 0},
|
||||
/* 0x04 */ {0, 0, 0},
|
||||
/* 0x05 */ {0, 0, 0},
|
||||
/* 0x06 */ {0, 0, 0},
|
||||
/* 0x07 */ {0, 0, 0},
|
||||
/* 0x08 */ {0, 0, 0},
|
||||
/* 0x09 */ {0, 0, 0},
|
||||
/* 0x0A */ {0, 0, 0},
|
||||
/* 0x0B */ {0, 0, 0},
|
||||
/* 0x0C */ {0, 0, 0},
|
||||
/* 0x0D */ {0, 0, 0},
|
||||
/* 0x0E */ {0, 0, 0},
|
||||
/* 0x0F */ {0, 0, 0},
|
||||
/* 0x10 */ {0, 0, 0},
|
||||
/* 0x11 */ {0, 0, 0},
|
||||
/* 0x12 */ {0, 0, 0},
|
||||
/* 0x13 */ {0, 0, 0},
|
||||
/* 0x14 */ {0, 0, 0},
|
||||
/* 0x15 */ {0, 0, 0},
|
||||
/* 0x16 */ {0, 0, 0},
|
||||
/* 0x17 */ {0, 0, 0},
|
||||
/* 0x18 */ {0, 0, 0},
|
||||
/* 0x19 */ {0, 0, 0},
|
||||
/* 0x1A */ {0, 0, 0},
|
||||
/* 0x1B */ {0, 0, 0},
|
||||
/* 0x1C */ {0, 0, 0},
|
||||
/* 0x1D */ {0, 0, 0},
|
||||
/* 0x1E */ {0, 0, 0},
|
||||
/* 0x1F */ {0, 0, 0},
|
||||
/* 0x20 */ {0, 0, 0},
|
||||
/* 0x21 */ {0, 0, 0},
|
||||
/* 0x22 */ {0, 0, 0},
|
||||
/* 0x23 */ {0, 0, 0},
|
||||
/* 0x24 */ {0, 0, 0},
|
||||
/* 0x25 */ {0, 0, 0},
|
||||
/* 0x26 */ {0, 0, 0},
|
||||
/* 0x27 */ {0, 0, 0},
|
||||
/* 0x28 */ {0, 0, 0},
|
||||
/* 0x29 */ {0, 0, 0},
|
||||
/* 0x2A */ {0, 0, 0},
|
||||
/* 0x2B */ {0, 0, 0},
|
||||
/* 0x2C */ {0, 0, 0},
|
||||
/* 0x2D */ {0, 0, 0},
|
||||
/* 0x2E */ {0, 0, 0},
|
||||
/* 0x2F */ {0, 0, 0},
|
||||
/* 0x30 */ {0, 0, 0},
|
||||
/* 0x31 */ {0, 0, 0},
|
||||
/* 0x32 */ {0, 0, 0},
|
||||
/* 0x33 */ {0, 0, 0},
|
||||
/* 0x34 */ {0, 0, 0},
|
||||
/* 0x35 */ {0, 0, 0},
|
||||
/* 0x36 */ {0, 0, 0},
|
||||
/* 0x37 */ {0, 0, 0},
|
||||
/* 0x38 */ {0, 0, 0},
|
||||
/* 0x39 */ {0, 0, 0},
|
||||
/* 0x3A */ {0, 0, 0},
|
||||
/* 0x3B */ {0, 0, 0},
|
||||
/* 0x3C */ {0, 0, 0},
|
||||
/* 0x3D */ {0, 0, 0},
|
||||
/* 0x3E */ {0, 0, 0},
|
||||
/* 0x3F */ {0, 0, 0},
|
||||
/* 0x40 */ {0, 0, 0},
|
||||
/* 0x41 */ {0, 0, 0},
|
||||
/* 0x42 */ {0, 0, 0},
|
||||
/* 0x43 */ {0, 0, 0},
|
||||
/* 0x44 */ {0, 0, 0},
|
||||
/* 0x45 */ {0, 0, 0},
|
||||
/* 0x46 */ {0, 0, 0},
|
||||
/* 0x47 */ {0, 0, 0},
|
||||
/* 0x48 */ {0, 0, 0},
|
||||
/* 0x49 */ {0, 0, 0},
|
||||
/* 0x4A */ {0, 0, 0},
|
||||
/* 0x4B */ {0, 0, 0},
|
||||
/* 0x4C */ {0, 0, 0},
|
||||
/* 0x4D */ {0, 0, 0},
|
||||
/* 0x4E */ {0, 0, 0},
|
||||
/* 0x4F */ {0, 0, 0},
|
||||
/* 0x50 */ {0, 0, 0},
|
||||
/* 0x51 */ {0, 0, 0},
|
||||
/* 0x52 */ {0, 0, 0},
|
||||
/* 0x53 */ {0, 0, 0},
|
||||
/* 0x54 */ {0, 0, 0},
|
||||
/* 0x55 */ {0, 0, 0},
|
||||
/* 0x56 */ {0, 0, 0},
|
||||
/* 0x57 */ {0, 0, 0},
|
||||
/* 0x58 */ {0, 0, 0},
|
||||
/* 0x59 */ {0, 0, 0},
|
||||
/* 0x5A */ {0, 0, 0},
|
||||
/* 0x5B */ {0, 0, 0},
|
||||
/* 0x5C */ {0, 0, 0},
|
||||
/* 0x5D */ {0, 0, 0},
|
||||
/* 0x5E */ {0, 0, 0},
|
||||
/* 0x5F */ {0, 0, 0},
|
||||
/* 0x60 */ {0, 0, 0},
|
||||
/* 0x61 */ {0, 0, 0},
|
||||
/* 0x62 */ {0, 0, 0},
|
||||
/* 0x63 */ {0, 0, 0},
|
||||
/* 0x64 */ {0, 0, 0},
|
||||
/* 0x65 */ {0, 0, 0},
|
||||
/* 0x66 */ {0, 0, 0},
|
||||
/* 0x67 */ {0, 0, 0},
|
||||
/* 0x68 */ {0, 0, 0},
|
||||
/* 0x69 */ {0, 0, 0},
|
||||
/* 0x6A */ {0, 0, 0},
|
||||
/* 0x6B */ {0, 0, 0},
|
||||
/* 0x6C */ {0, 0, 0},
|
||||
/* 0x6D */ {0, 0, 0},
|
||||
/* 0x6E */ {0, 0, 0},
|
||||
/* 0x6F */ {0, 0, 0},
|
||||
/* 0x70 */ {0, 0, 0},
|
||||
/* 0x71 */ {0, 0, 0},
|
||||
/* 0x72 */ {0, 0, 0},
|
||||
/* 0x73 */ {0, 0, 0},
|
||||
/* 0x74 */ {0, 0, 0},
|
||||
/* 0x75 */ {0, 0, 0},
|
||||
/* 0x76 */ {0, 0, 0},
|
||||
/* 0x77 */ {0, 0, 0},
|
||||
/* 0x78 */ {0, 0, 0},
|
||||
/* 0x79 */ {0, 0, 0},
|
||||
/* 0x7A */ {0, 0, 0},
|
||||
/* 0x7B */ {0, 0, 0},
|
||||
/* 0x7C */ {0, 0, 0},
|
||||
/* 0x7D */ {0, 0, 0},
|
||||
/* 0x7E */ {0, 0, 0},
|
||||
/* 0x7F */ {0, 0, 0},
|
||||
/* 0x80 */ {0, 0, 0},
|
||||
/* 0x81 */ {0, 0, 0},
|
||||
/* 0x82 */ {0, 0, 0},
|
||||
/* 0x83 */ {0, 0, 0},
|
||||
/* 0x84 */ {0, 0, 0},
|
||||
/* 0x85 */ {0, 0, 0},
|
||||
/* 0x86 */ {0, 0, 0},
|
||||
/* 0x87 */ {0, 0, 0},
|
||||
/* 0x88 */ {0, 0, 0},
|
||||
/* 0x89 */ {0, 0, 0},
|
||||
/* 0x8A */ {0, 0, 0},
|
||||
/* 0x8B */ {0, 0, 0},
|
||||
/* 0x8C */ {0, 0, 0},
|
||||
/* 0x8D */ {0, 0, 0},
|
||||
/* 0x8E */ {0, 0, 0},
|
||||
/* 0x8F */ {0, 0, 0},
|
||||
/* 0x90 */ {0, 0, 0},
|
||||
/* 0x91 */ {0, 0, 0},
|
||||
/* 0x92 */ {0, 0, 0},
|
||||
/* 0x93 */ {0, 0, 0},
|
||||
/* 0x94 */ {0, 0, 0},
|
||||
/* 0x95 */ {0, 0, 0},
|
||||
/* 0x96 */ {0, 0, 0},
|
||||
/* 0x97 */ {0, 0, 0},
|
||||
/* 0x98 */ {0, 0, 0},
|
||||
/* 0x99 */ {0, 0, 0},
|
||||
/* 0x9A */ {0, 0, 0},
|
||||
/* 0x9B */ {0, 0, 0},
|
||||
/* 0x9C */ {0, 0, 0},
|
||||
/* 0x9D */ {0, 0, 0},
|
||||
/* 0x9E */ {0, 0, 0},
|
||||
/* 0x9F */ {0, 0, 0},
|
||||
/* 0xA0 */ {0, 0, 0},
|
||||
/* 0xA1 */ {__cp950ext_decmap+0, 0x45, 0xf3},
|
||||
/* 0xA2 */ {__cp950ext_decmap+175, 0x41, 0x47},
|
||||
/* 0xA3 */ {__cp950ext_decmap+182, 0xe1, 0xe1},
|
||||
/* 0xA4 */ {0, 0, 0},
|
||||
/* 0xA5 */ {0, 0, 0},
|
||||
/* 0xA6 */ {0, 0, 0},
|
||||
/* 0xA7 */ {0, 0, 0},
|
||||
/* 0xA8 */ {0, 0, 0},
|
||||
/* 0xA9 */ {0, 0, 0},
|
||||
/* 0xAA */ {0, 0, 0},
|
||||
/* 0xAB */ {0, 0, 0},
|
||||
/* 0xAC */ {0, 0, 0},
|
||||
/* 0xAD */ {0, 0, 0},
|
||||
/* 0xAE */ {0, 0, 0},
|
||||
/* 0xAF */ {0, 0, 0},
|
||||
/* 0xB0 */ {0, 0, 0},
|
||||
/* 0xB1 */ {0, 0, 0},
|
||||
/* 0xB2 */ {0, 0, 0},
|
||||
/* 0xB3 */ {0, 0, 0},
|
||||
/* 0xB4 */ {0, 0, 0},
|
||||
/* 0xB5 */ {0, 0, 0},
|
||||
/* 0xB6 */ {0, 0, 0},
|
||||
/* 0xB7 */ {0, 0, 0},
|
||||
/* 0xB8 */ {0, 0, 0},
|
||||
/* 0xB9 */ {0, 0, 0},
|
||||
/* 0xBA */ {0, 0, 0},
|
||||
/* 0xBB */ {0, 0, 0},
|
||||
/* 0xBC */ {0, 0, 0},
|
||||
/* 0xBD */ {0, 0, 0},
|
||||
/* 0xBE */ {0, 0, 0},
|
||||
/* 0xBF */ {0, 0, 0},
|
||||
/* 0xC0 */ {0, 0, 0},
|
||||
/* 0xC1 */ {0, 0, 0},
|
||||
/* 0xC2 */ {0, 0, 0},
|
||||
/* 0xC3 */ {0, 0, 0},
|
||||
/* 0xC4 */ {0, 0, 0},
|
||||
/* 0xC5 */ {0, 0, 0},
|
||||
/* 0xC6 */ {0, 0, 0},
|
||||
/* 0xC7 */ {0, 0, 0},
|
||||
/* 0xC8 */ {0, 0, 0},
|
||||
/* 0xC9 */ {0, 0, 0},
|
||||
/* 0xCA */ {0, 0, 0},
|
||||
/* 0xCB */ {0, 0, 0},
|
||||
/* 0xCC */ {0, 0, 0},
|
||||
/* 0xCD */ {0, 0, 0},
|
||||
/* 0xCE */ {0, 0, 0},
|
||||
/* 0xCF */ {0, 0, 0},
|
||||
/* 0xD0 */ {0, 0, 0},
|
||||
/* 0xD1 */ {0, 0, 0},
|
||||
/* 0xD2 */ {0, 0, 0},
|
||||
/* 0xD3 */ {0, 0, 0},
|
||||
/* 0xD4 */ {0, 0, 0},
|
||||
/* 0xD5 */ {0, 0, 0},
|
||||
/* 0xD6 */ {0, 0, 0},
|
||||
/* 0xD7 */ {0, 0, 0},
|
||||
/* 0xD8 */ {0, 0, 0},
|
||||
/* 0xD9 */ {0, 0, 0},
|
||||
/* 0xDA */ {0, 0, 0},
|
||||
/* 0xDB */ {0, 0, 0},
|
||||
/* 0xDC */ {0, 0, 0},
|
||||
/* 0xDD */ {0, 0, 0},
|
||||
/* 0xDE */ {0, 0, 0},
|
||||
/* 0xDF */ {0, 0, 0},
|
||||
/* 0xE0 */ {0, 0, 0},
|
||||
/* 0xE1 */ {0, 0, 0},
|
||||
/* 0xE2 */ {0, 0, 0},
|
||||
/* 0xE3 */ {0, 0, 0},
|
||||
/* 0xE4 */ {0, 0, 0},
|
||||
/* 0xE5 */ {0, 0, 0},
|
||||
/* 0xE6 */ {0, 0, 0},
|
||||
/* 0xE7 */ {0, 0, 0},
|
||||
/* 0xE8 */ {0, 0, 0},
|
||||
/* 0xE9 */ {0, 0, 0},
|
||||
/* 0xEA */ {0, 0, 0},
|
||||
/* 0xEB */ {0, 0, 0},
|
||||
/* 0xEC */ {0, 0, 0},
|
||||
/* 0xED */ {0, 0, 0},
|
||||
/* 0xEE */ {0, 0, 0},
|
||||
/* 0xEF */ {0, 0, 0},
|
||||
/* 0xF0 */ {0, 0, 0},
|
||||
/* 0xF1 */ {0, 0, 0},
|
||||
/* 0xF2 */ {0, 0, 0},
|
||||
/* 0xF3 */ {0, 0, 0},
|
||||
/* 0xF4 */ {0, 0, 0},
|
||||
/* 0xF5 */ {0, 0, 0},
|
||||
/* 0xF6 */ {0, 0, 0},
|
||||
/* 0xF7 */ {0, 0, 0},
|
||||
/* 0xF8 */ {0, 0, 0},
|
||||
/* 0xF9 */ {__cp950ext_decmap+183, 0xd6, 0xfe},
|
||||
/* 0xFA */ {0, 0, 0},
|
||||
/* 0xFB */ {0, 0, 0},
|
||||
/* 0xFC */ {0, 0, 0},
|
||||
/* 0xFD */ {0, 0, 0},
|
||||
/* 0xFE */ {0, 0, 0},
|
||||
/* 0xFF */ {0, 0, 0},
|
||||
};
|
||||
|
||||
static const DBCHAR __cp950ext_encmap[581] = {
|
||||
0xa1c2, 0xa145, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa3e1, 0xa241,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa1f2,
|
||||
NOCHAR, NOCHAR, NOCHAR, 0xa1f3, 0xf9f8, 0xf9e6, 0xf9ef, 0xf9dd,
|
||||
0xf9e8, 0xf9f1, 0xf9df, 0xf9ec, 0xf9f5, 0xf9e3, 0xf9ee, 0xf9f7,
|
||||
0xf9e5, NOCHAR, 0xf9f2, 0xf9e0, NOCHAR, 0xf9f4, 0xf9e2, 0xf9e7,
|
||||
0xf9f0, 0xf9de, 0xf9ed, 0xf9f6, 0xf9e4, NOCHAR, 0xf9f3, 0xf9e1,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xf9fe, 0xf9d9,
|
||||
0xf9dc, 0xf9da, 0xf9d6, 0xf9db, 0xf9d8, 0xf9d7, 0xa14e, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa242, 0xa1fe, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, 0xa240, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa1e3, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa246,
|
||||
0xa247, NOCHAR, NOCHAR, NOCHAR, 0xa244,
|
||||
};
|
||||
|
||||
static const struct unim_index cp950ext_encmap[256] = {
|
||||
/* 0x00 */ {__cp950ext_encmap+0, 0xaf, 0xaf},
|
||||
/* 0x01 */ {0, 0, 0},
|
||||
/* 0x02 */ {0, 0, 0},
|
||||
/* 0x03 */ {0, 0, 0},
|
||||
/* 0x04 */ {0, 0, 0},
|
||||
/* 0x05 */ {0, 0, 0},
|
||||
/* 0x06 */ {0, 0, 0},
|
||||
/* 0x07 */ {0, 0, 0},
|
||||
/* 0x08 */ {0, 0, 0},
|
||||
/* 0x09 */ {0, 0, 0},
|
||||
/* 0x0A */ {0, 0, 0},
|
||||
/* 0x0B */ {0, 0, 0},
|
||||
/* 0x0C */ {0, 0, 0},
|
||||
/* 0x0D */ {0, 0, 0},
|
||||
/* 0x0E */ {0, 0, 0},
|
||||
/* 0x0F */ {0, 0, 0},
|
||||
/* 0x10 */ {0, 0, 0},
|
||||
/* 0x11 */ {0, 0, 0},
|
||||
/* 0x12 */ {0, 0, 0},
|
||||
/* 0x13 */ {0, 0, 0},
|
||||
/* 0x14 */ {0, 0, 0},
|
||||
/* 0x15 */ {0, 0, 0},
|
||||
/* 0x16 */ {0, 0, 0},
|
||||
/* 0x17 */ {0, 0, 0},
|
||||
/* 0x18 */ {0, 0, 0},
|
||||
/* 0x19 */ {0, 0, 0},
|
||||
/* 0x1A */ {0, 0, 0},
|
||||
/* 0x1B */ {0, 0, 0},
|
||||
/* 0x1C */ {0, 0, 0},
|
||||
/* 0x1D */ {0, 0, 0},
|
||||
/* 0x1E */ {0, 0, 0},
|
||||
/* 0x1F */ {0, 0, 0},
|
||||
/* 0x20 */ {__cp950ext_encmap+1, 0x27, 0xac},
|
||||
/* 0x21 */ {0, 0, 0},
|
||||
/* 0x22 */ {__cp950ext_encmap+135, 0x15, 0x99},
|
||||
/* 0x23 */ {0, 0, 0},
|
||||
/* 0x24 */ {0, 0, 0},
|
||||
/* 0x25 */ {__cp950ext_encmap+268, 0x51, 0x93},
|
||||
/* 0x26 */ {0, 0, 0},
|
||||
/* 0x27 */ {0, 0, 0},
|
||||
/* 0x28 */ {0, 0, 0},
|
||||
/* 0x29 */ {0, 0, 0},
|
||||
/* 0x2A */ {0, 0, 0},
|
||||
/* 0x2B */ {0, 0, 0},
|
||||
/* 0x2C */ {0, 0, 0},
|
||||
/* 0x2D */ {0, 0, 0},
|
||||
/* 0x2E */ {0, 0, 0},
|
||||
/* 0x2F */ {0, 0, 0},
|
||||
/* 0x30 */ {0, 0, 0},
|
||||
/* 0x31 */ {0, 0, 0},
|
||||
/* 0x32 */ {0, 0, 0},
|
||||
/* 0x33 */ {0, 0, 0},
|
||||
/* 0x34 */ {0, 0, 0},
|
||||
/* 0x35 */ {0, 0, 0},
|
||||
/* 0x36 */ {0, 0, 0},
|
||||
/* 0x37 */ {0, 0, 0},
|
||||
/* 0x38 */ {0, 0, 0},
|
||||
/* 0x39 */ {0, 0, 0},
|
||||
/* 0x3A */ {0, 0, 0},
|
||||
/* 0x3B */ {0, 0, 0},
|
||||
/* 0x3C */ {0, 0, 0},
|
||||
/* 0x3D */ {0, 0, 0},
|
||||
/* 0x3E */ {0, 0, 0},
|
||||
/* 0x3F */ {0, 0, 0},
|
||||
/* 0x40 */ {0, 0, 0},
|
||||
/* 0x41 */ {0, 0, 0},
|
||||
/* 0x42 */ {0, 0, 0},
|
||||
/* 0x43 */ {0, 0, 0},
|
||||
/* 0x44 */ {0, 0, 0},
|
||||
/* 0x45 */ {0, 0, 0},
|
||||
/* 0x46 */ {0, 0, 0},
|
||||
/* 0x47 */ {0, 0, 0},
|
||||
/* 0x48 */ {0, 0, 0},
|
||||
/* 0x49 */ {0, 0, 0},
|
||||
/* 0x4A */ {0, 0, 0},
|
||||
/* 0x4B */ {0, 0, 0},
|
||||
/* 0x4C */ {0, 0, 0},
|
||||
/* 0x4D */ {0, 0, 0},
|
||||
/* 0x4E */ {0, 0, 0},
|
||||
/* 0x4F */ {0, 0, 0},
|
||||
/* 0x50 */ {0, 0, 0},
|
||||
/* 0x51 */ {0, 0, 0},
|
||||
/* 0x52 */ {0, 0, 0},
|
||||
/* 0x53 */ {0, 0, 0},
|
||||
/* 0x54 */ {0, 0, 0},
|
||||
/* 0x55 */ {0, 0, 0},
|
||||
/* 0x56 */ {0, 0, 0},
|
||||
/* 0x57 */ {0, 0, 0},
|
||||
/* 0x58 */ {__cp950ext_encmap+335, 0xbb, 0xbb},
|
||||
/* 0x59 */ {0, 0, 0},
|
||||
/* 0x5A */ {__cp950ext_encmap+336, 0xfa, 0xfa},
|
||||
/* 0x5B */ {0, 0, 0},
|
||||
/* 0x5C */ {0, 0, 0},
|
||||
/* 0x5D */ {0, 0, 0},
|
||||
/* 0x5E */ {0, 0, 0},
|
||||
/* 0x5F */ {0, 0, 0},
|
||||
/* 0x60 */ {__cp950ext_encmap+337, 0x52, 0x52},
|
||||
/* 0x61 */ {0, 0, 0},
|
||||
/* 0x62 */ {0, 0, 0},
|
||||
/* 0x63 */ {0, 0, 0},
|
||||
/* 0x64 */ {0, 0, 0},
|
||||
/* 0x65 */ {0, 0, 0},
|
||||
/* 0x66 */ {0, 0, 0},
|
||||
/* 0x67 */ {0, 0, 0},
|
||||
/* 0x68 */ {0, 0, 0},
|
||||
/* 0x69 */ {0, 0, 0},
|
||||
/* 0x6A */ {0, 0, 0},
|
||||
/* 0x6B */ {0, 0, 0},
|
||||
/* 0x6C */ {0, 0, 0},
|
||||
/* 0x6D */ {0, 0, 0},
|
||||
/* 0x6E */ {0, 0, 0},
|
||||
/* 0x6F */ {0, 0, 0},
|
||||
/* 0x70 */ {0, 0, 0},
|
||||
/* 0x71 */ {0, 0, 0},
|
||||
/* 0x72 */ {0, 0, 0},
|
||||
/* 0x73 */ {0, 0, 0},
|
||||
/* 0x74 */ {0, 0, 0},
|
||||
/* 0x75 */ {0, 0, 0},
|
||||
/* 0x76 */ {0, 0, 0},
|
||||
/* 0x77 */ {0, 0, 0},
|
||||
/* 0x78 */ {__cp950ext_encmap+338, 0x81, 0x81},
|
||||
/* 0x79 */ {0, 0, 0},
|
||||
/* 0x7A */ {0, 0, 0},
|
||||
/* 0x7B */ {0, 0, 0},
|
||||
/* 0x7C */ {__cp950ext_encmap+339, 0xa7, 0xa7},
|
||||
/* 0x7D */ {0, 0, 0},
|
||||
/* 0x7E */ {0, 0, 0},
|
||||
/* 0x7F */ {0, 0, 0},
|
||||
/* 0x80 */ {0, 0, 0},
|
||||
/* 0x81 */ {0, 0, 0},
|
||||
/* 0x82 */ {0, 0, 0},
|
||||
/* 0x83 */ {0, 0, 0},
|
||||
/* 0x84 */ {0, 0, 0},
|
||||
/* 0x85 */ {0, 0, 0},
|
||||
/* 0x86 */ {0, 0, 0},
|
||||
/* 0x87 */ {0, 0, 0},
|
||||
/* 0x88 */ {__cp950ext_encmap+340, 0xcf, 0xcf},
|
||||
/* 0x89 */ {0, 0, 0},
|
||||
/* 0x8A */ {0, 0, 0},
|
||||
/* 0x8B */ {0, 0, 0},
|
||||
/* 0x8C */ {0, 0, 0},
|
||||
/* 0x8D */ {0, 0, 0},
|
||||
/* 0x8E */ {0, 0, 0},
|
||||
/* 0x8F */ {0, 0, 0},
|
||||
/* 0x90 */ {0, 0, 0},
|
||||
/* 0x91 */ {0, 0, 0},
|
||||
/* 0x92 */ {__cp950ext_encmap+341, 0xb9, 0xb9},
|
||||
/* 0x93 */ {0, 0, 0},
|
||||
/* 0x94 */ {0, 0, 0},
|
||||
/* 0x95 */ {0, 0, 0},
|
||||
/* 0x96 */ {0, 0, 0},
|
||||
/* 0x97 */ {0, 0, 0},
|
||||
/* 0x98 */ {0, 0, 0},
|
||||
/* 0x99 */ {0, 0, 0},
|
||||
/* 0x9A */ {0, 0, 0},
|
||||
/* 0x9B */ {0, 0, 0},
|
||||
/* 0x9C */ {0, 0, 0},
|
||||
/* 0x9D */ {0, 0, 0},
|
||||
/* 0x9E */ {0, 0, 0},
|
||||
/* 0x9F */ {0, 0, 0},
|
||||
/* 0xA0 */ {0, 0, 0},
|
||||
/* 0xA1 */ {0, 0, 0},
|
||||
/* 0xA2 */ {0, 0, 0},
|
||||
/* 0xA3 */ {0, 0, 0},
|
||||
/* 0xA4 */ {0, 0, 0},
|
||||
/* 0xA5 */ {0, 0, 0},
|
||||
/* 0xA6 */ {0, 0, 0},
|
||||
/* 0xA7 */ {0, 0, 0},
|
||||
/* 0xA8 */ {0, 0, 0},
|
||||
/* 0xA9 */ {0, 0, 0},
|
||||
/* 0xAA */ {0, 0, 0},
|
||||
/* 0xAB */ {0, 0, 0},
|
||||
/* 0xAC */ {0, 0, 0},
|
||||
/* 0xAD */ {0, 0, 0},
|
||||
/* 0xAE */ {0, 0, 0},
|
||||
/* 0xAF */ {0, 0, 0},
|
||||
/* 0xB0 */ {0, 0, 0},
|
||||
/* 0xB1 */ {0, 0, 0},
|
||||
/* 0xB2 */ {0, 0, 0},
|
||||
/* 0xB3 */ {0, 0, 0},
|
||||
/* 0xB4 */ {0, 0, 0},
|
||||
/* 0xB5 */ {0, 0, 0},
|
||||
/* 0xB6 */ {0, 0, 0},
|
||||
/* 0xB7 */ {0, 0, 0},
|
||||
/* 0xB8 */ {0, 0, 0},
|
||||
/* 0xB9 */ {0, 0, 0},
|
||||
/* 0xBA */ {0, 0, 0},
|
||||
/* 0xBB */ {0, 0, 0},
|
||||
/* 0xBC */ {0, 0, 0},
|
||||
/* 0xBD */ {0, 0, 0},
|
||||
/* 0xBE */ {0, 0, 0},
|
||||
/* 0xBF */ {0, 0, 0},
|
||||
/* 0xC0 */ {0, 0, 0},
|
||||
/* 0xC1 */ {0, 0, 0},
|
||||
/* 0xC2 */ {0, 0, 0},
|
||||
/* 0xC3 */ {0, 0, 0},
|
||||
/* 0xC4 */ {0, 0, 0},
|
||||
/* 0xC5 */ {0, 0, 0},
|
||||
/* 0xC6 */ {0, 0, 0},
|
||||
/* 0xC7 */ {0, 0, 0},
|
||||
/* 0xC8 */ {0, 0, 0},
|
||||
/* 0xC9 */ {0, 0, 0},
|
||||
/* 0xCA */ {0, 0, 0},
|
||||
/* 0xCB */ {0, 0, 0},
|
||||
/* 0xCC */ {0, 0, 0},
|
||||
/* 0xCD */ {0, 0, 0},
|
||||
/* 0xCE */ {0, 0, 0},
|
||||
/* 0xCF */ {0, 0, 0},
|
||||
/* 0xD0 */ {0, 0, 0},
|
||||
/* 0xD1 */ {0, 0, 0},
|
||||
/* 0xD2 */ {0, 0, 0},
|
||||
/* 0xD3 */ {0, 0, 0},
|
||||
/* 0xD4 */ {0, 0, 0},
|
||||
/* 0xD5 */ {0, 0, 0},
|
||||
/* 0xD6 */ {0, 0, 0},
|
||||
/* 0xD7 */ {0, 0, 0},
|
||||
/* 0xD8 */ {0, 0, 0},
|
||||
/* 0xD9 */ {0, 0, 0},
|
||||
/* 0xDA */ {0, 0, 0},
|
||||
/* 0xDB */ {0, 0, 0},
|
||||
/* 0xDC */ {0, 0, 0},
|
||||
/* 0xDD */ {0, 0, 0},
|
||||
/* 0xDE */ {0, 0, 0},
|
||||
/* 0xDF */ {0, 0, 0},
|
||||
/* 0xE0 */ {0, 0, 0},
|
||||
/* 0xE1 */ {0, 0, 0},
|
||||
/* 0xE2 */ {0, 0, 0},
|
||||
/* 0xE3 */ {0, 0, 0},
|
||||
/* 0xE4 */ {0, 0, 0},
|
||||
/* 0xE5 */ {0, 0, 0},
|
||||
/* 0xE6 */ {0, 0, 0},
|
||||
/* 0xE7 */ {0, 0, 0},
|
||||
/* 0xE8 */ {0, 0, 0},
|
||||
/* 0xE9 */ {0, 0, 0},
|
||||
/* 0xEA */ {0, 0, 0},
|
||||
/* 0xEB */ {0, 0, 0},
|
||||
/* 0xEC */ {0, 0, 0},
|
||||
/* 0xED */ {0, 0, 0},
|
||||
/* 0xEE */ {0, 0, 0},
|
||||
/* 0xEF */ {0, 0, 0},
|
||||
/* 0xF0 */ {0, 0, 0},
|
||||
/* 0xF1 */ {0, 0, 0},
|
||||
/* 0xF2 */ {0, 0, 0},
|
||||
/* 0xF3 */ {0, 0, 0},
|
||||
/* 0xF4 */ {0, 0, 0},
|
||||
/* 0xF5 */ {0, 0, 0},
|
||||
/* 0xF6 */ {0, 0, 0},
|
||||
/* 0xF7 */ {0, 0, 0},
|
||||
/* 0xF8 */ {0, 0, 0},
|
||||
/* 0xF9 */ {0, 0, 0},
|
||||
/* 0xFA */ {0, 0, 0},
|
||||
/* 0xFB */ {0, 0, 0},
|
||||
/* 0xFC */ {0, 0, 0},
|
||||
/* 0xFD */ {0, 0, 0},
|
||||
/* 0xFE */ {__cp950ext_encmap+342, 0x51, 0x68},
|
||||
/* 0xFF */ {__cp950ext_encmap+366, 0x0f, 0xe5},
|
||||
};
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -1,218 +0,0 @@
|
|||
/*
|
||||
* $CJKCodecs: map_gb18030uni.h,v 1.1.1.1 2003/09/24 17:45:51 perky Exp $
|
||||
*/
|
||||
|
||||
|
||||
static const struct _gb18030_to_unibmp_ranges {
|
||||
Py_UNICODE first, last;
|
||||
DBCHAR base;
|
||||
} gb18030_to_unibmp_ranges[] = {
|
||||
{ 0x0080, 0x00a3, 0x0000 },
|
||||
{ 0x00a5, 0x00a6, 0x0024 },
|
||||
{ 0x00a9, 0x00af, 0x0026 },
|
||||
{ 0x00b2, 0x00b6, 0x002d },
|
||||
{ 0x00b8, 0x00d6, 0x0032 },
|
||||
{ 0x00d8, 0x00df, 0x0051 },
|
||||
{ 0x00e2, 0x00e7, 0x0059 },
|
||||
{ 0x00eb, 0x00eb, 0x005f },
|
||||
{ 0x00ee, 0x00f1, 0x0060 },
|
||||
{ 0x00f4, 0x00f6, 0x0064 },
|
||||
{ 0x00f8, 0x00f8, 0x0067 },
|
||||
{ 0x00fb, 0x00fb, 0x0068 },
|
||||
{ 0x00fd, 0x0100, 0x0069 },
|
||||
{ 0x0102, 0x0112, 0x006d },
|
||||
{ 0x0114, 0x011a, 0x007e },
|
||||
{ 0x011c, 0x012a, 0x0085 },
|
||||
{ 0x012c, 0x0143, 0x0094 },
|
||||
{ 0x0145, 0x0147, 0x00ac },
|
||||
{ 0x0149, 0x014c, 0x00af },
|
||||
{ 0x014e, 0x016a, 0x00b3 },
|
||||
{ 0x016c, 0x01cd, 0x00d0 },
|
||||
{ 0x01cf, 0x01cf, 0x0132 },
|
||||
{ 0x01d1, 0x01d1, 0x0133 },
|
||||
{ 0x01d3, 0x01d3, 0x0134 },
|
||||
{ 0x01d5, 0x01d5, 0x0135 },
|
||||
{ 0x01d7, 0x01d7, 0x0136 },
|
||||
{ 0x01d9, 0x01d9, 0x0137 },
|
||||
{ 0x01db, 0x01db, 0x0138 },
|
||||
{ 0x01dd, 0x01f8, 0x0139 },
|
||||
{ 0x01fa, 0x0250, 0x0155 },
|
||||
{ 0x0252, 0x0260, 0x01ac },
|
||||
{ 0x0262, 0x02c6, 0x01bb },
|
||||
{ 0x02c8, 0x02c8, 0x0220 },
|
||||
{ 0x02cc, 0x02d8, 0x0221 },
|
||||
{ 0x02da, 0x0390, 0x022e },
|
||||
{ 0x03a2, 0x03a2, 0x02e5 },
|
||||
{ 0x03aa, 0x03b0, 0x02e6 },
|
||||
{ 0x03c2, 0x03c2, 0x02ed },
|
||||
{ 0x03ca, 0x0400, 0x02ee },
|
||||
{ 0x0402, 0x040f, 0x0325 },
|
||||
{ 0x0450, 0x0450, 0x0333 },
|
||||
{ 0x0452, 0x200f, 0x0334 },
|
||||
{ 0x2011, 0x2012, 0x1ef2 },
|
||||
{ 0x2017, 0x2017, 0x1ef4 },
|
||||
{ 0x201a, 0x201b, 0x1ef5 },
|
||||
{ 0x201e, 0x2024, 0x1ef7 },
|
||||
{ 0x2027, 0x202f, 0x1efe },
|
||||
{ 0x2031, 0x2031, 0x1f07 },
|
||||
{ 0x2034, 0x2034, 0x1f08 },
|
||||
{ 0x2036, 0x203a, 0x1f09 },
|
||||
{ 0x203c, 0x20ab, 0x1f0e },
|
||||
{ 0x20ad, 0x2102, 0x1f7e },
|
||||
{ 0x2104, 0x2104, 0x1fd4 },
|
||||
{ 0x2106, 0x2108, 0x1fd5 },
|
||||
{ 0x210a, 0x2115, 0x1fd8 },
|
||||
{ 0x2117, 0x2120, 0x1fe4 },
|
||||
{ 0x2122, 0x215f, 0x1fee },
|
||||
{ 0x216c, 0x216f, 0x202c },
|
||||
{ 0x217a, 0x218f, 0x2030 },
|
||||
{ 0x2194, 0x2195, 0x2046 },
|
||||
{ 0x219a, 0x2207, 0x2048 },
|
||||
{ 0x2209, 0x220e, 0x20b6 },
|
||||
{ 0x2210, 0x2210, 0x20bc },
|
||||
{ 0x2212, 0x2214, 0x20bd },
|
||||
{ 0x2216, 0x2219, 0x20c0 },
|
||||
{ 0x221b, 0x221c, 0x20c4 },
|
||||
{ 0x2221, 0x2222, 0x20c6 },
|
||||
{ 0x2224, 0x2224, 0x20c8 },
|
||||
{ 0x2226, 0x2226, 0x20c9 },
|
||||
{ 0x222c, 0x222d, 0x20ca },
|
||||
{ 0x222f, 0x2233, 0x20cc },
|
||||
{ 0x2238, 0x223c, 0x20d1 },
|
||||
{ 0x223e, 0x2247, 0x20d6 },
|
||||
{ 0x2249, 0x224b, 0x20e0 },
|
||||
{ 0x224d, 0x2251, 0x20e3 },
|
||||
{ 0x2253, 0x225f, 0x20e8 },
|
||||
{ 0x2262, 0x2263, 0x20f5 },
|
||||
{ 0x2268, 0x226d, 0x20f7 },
|
||||
{ 0x2270, 0x2294, 0x20fd },
|
||||
{ 0x2296, 0x2298, 0x2122 },
|
||||
{ 0x229a, 0x22a4, 0x2125 },
|
||||
{ 0x22a6, 0x22be, 0x2130 },
|
||||
{ 0x22c0, 0x2311, 0x2149 },
|
||||
{ 0x2313, 0x245f, 0x219b },
|
||||
{ 0x246a, 0x2473, 0x22e8 },
|
||||
{ 0x249c, 0x24ff, 0x22f2 },
|
||||
{ 0x254c, 0x254f, 0x2356 },
|
||||
{ 0x2574, 0x2580, 0x235a },
|
||||
{ 0x2590, 0x2592, 0x2367 },
|
||||
{ 0x2596, 0x259f, 0x236a },
|
||||
{ 0x25a2, 0x25b1, 0x2374 },
|
||||
{ 0x25b4, 0x25bb, 0x2384 },
|
||||
{ 0x25be, 0x25c5, 0x238c },
|
||||
{ 0x25c8, 0x25ca, 0x2394 },
|
||||
{ 0x25cc, 0x25cd, 0x2397 },
|
||||
{ 0x25d0, 0x25e1, 0x2399 },
|
||||
{ 0x25e6, 0x2604, 0x23ab },
|
||||
{ 0x2607, 0x2608, 0x23ca },
|
||||
{ 0x260a, 0x263f, 0x23cc },
|
||||
{ 0x2641, 0x2641, 0x2402 },
|
||||
{ 0x2643, 0x2e80, 0x2403 },
|
||||
{ 0x2e82, 0x2e83, 0x2c41 },
|
||||
{ 0x2e85, 0x2e87, 0x2c43 },
|
||||
{ 0x2e89, 0x2e8a, 0x2c46 },
|
||||
{ 0x2e8d, 0x2e96, 0x2c48 },
|
||||
{ 0x2e98, 0x2ea6, 0x2c52 },
|
||||
{ 0x2ea8, 0x2ea9, 0x2c61 },
|
||||
{ 0x2eab, 0x2ead, 0x2c63 },
|
||||
{ 0x2eaf, 0x2eb2, 0x2c66 },
|
||||
{ 0x2eb4, 0x2eb5, 0x2c6a },
|
||||
{ 0x2eb8, 0x2eba, 0x2c6c },
|
||||
{ 0x2ebc, 0x2ec9, 0x2c6f },
|
||||
{ 0x2ecb, 0x2fef, 0x2c7d },
|
||||
{ 0x2ffc, 0x2fff, 0x2da2 },
|
||||
{ 0x3004, 0x3004, 0x2da6 },
|
||||
{ 0x3018, 0x301c, 0x2da7 },
|
||||
{ 0x301f, 0x3020, 0x2dac },
|
||||
{ 0x302a, 0x303d, 0x2dae },
|
||||
{ 0x303f, 0x3040, 0x2dc2 },
|
||||
{ 0x3094, 0x309a, 0x2dc4 },
|
||||
{ 0x309f, 0x30a0, 0x2dcb },
|
||||
{ 0x30f7, 0x30fb, 0x2dcd },
|
||||
{ 0x30ff, 0x3104, 0x2dd2 },
|
||||
{ 0x312a, 0x321f, 0x2dd8 },
|
||||
{ 0x322a, 0x3230, 0x2ece },
|
||||
{ 0x3232, 0x32a2, 0x2ed5 },
|
||||
{ 0x32a4, 0x338d, 0x2f46 },
|
||||
{ 0x3390, 0x339b, 0x3030 },
|
||||
{ 0x339f, 0x33a0, 0x303c },
|
||||
{ 0x33a2, 0x33c3, 0x303e },
|
||||
{ 0x33c5, 0x33cd, 0x3060 },
|
||||
{ 0x33cf, 0x33d0, 0x3069 },
|
||||
{ 0x33d3, 0x33d4, 0x306b },
|
||||
{ 0x33d6, 0x3446, 0x306d },
|
||||
{ 0x3448, 0x3472, 0x30de },
|
||||
{ 0x3474, 0x359d, 0x3109 },
|
||||
{ 0x359f, 0x360d, 0x3233 },
|
||||
{ 0x360f, 0x3619, 0x32a2 },
|
||||
{ 0x361b, 0x3917, 0x32ad },
|
||||
{ 0x3919, 0x396d, 0x35aa },
|
||||
{ 0x396f, 0x39ce, 0x35ff },
|
||||
{ 0x39d1, 0x39de, 0x365f },
|
||||
{ 0x39e0, 0x3a72, 0x366d },
|
||||
{ 0x3a74, 0x3b4d, 0x3700 },
|
||||
{ 0x3b4f, 0x3c6d, 0x37da },
|
||||
{ 0x3c6f, 0x3cdf, 0x38f9 },
|
||||
{ 0x3ce1, 0x4055, 0x396a },
|
||||
{ 0x4057, 0x415e, 0x3cdf },
|
||||
{ 0x4160, 0x4336, 0x3de7 },
|
||||
{ 0x4338, 0x43ab, 0x3fbe },
|
||||
{ 0x43ad, 0x43b0, 0x4032 },
|
||||
{ 0x43b2, 0x43dc, 0x4036 },
|
||||
{ 0x43de, 0x44d5, 0x4061 },
|
||||
{ 0x44d7, 0x464b, 0x4159 },
|
||||
{ 0x464d, 0x4660, 0x42ce },
|
||||
{ 0x4662, 0x4722, 0x42e2 },
|
||||
{ 0x4724, 0x4728, 0x43a3 },
|
||||
{ 0x472a, 0x477b, 0x43a8 },
|
||||
{ 0x477d, 0x478c, 0x43fa },
|
||||
{ 0x478e, 0x4946, 0x440a },
|
||||
{ 0x4948, 0x4979, 0x45c3 },
|
||||
{ 0x497b, 0x497c, 0x45f5 },
|
||||
{ 0x497e, 0x4981, 0x45f7 },
|
||||
{ 0x4984, 0x4984, 0x45fb },
|
||||
{ 0x4987, 0x499a, 0x45fc },
|
||||
{ 0x499c, 0x499e, 0x4610 },
|
||||
{ 0x49a0, 0x49b5, 0x4613 },
|
||||
{ 0x49b8, 0x4c76, 0x4629 },
|
||||
{ 0x4c78, 0x4c9e, 0x48e8 },
|
||||
{ 0x4ca4, 0x4d12, 0x490f },
|
||||
{ 0x4d1a, 0x4dad, 0x497e },
|
||||
{ 0x4daf, 0x4dff, 0x4a12 },
|
||||
{ 0x9fa6, 0xd7ff, 0x4a63 },
|
||||
{ 0xe76c, 0xe76c, 0x82bd },
|
||||
{ 0xe7c8, 0xe7c8, 0x82be },
|
||||
{ 0xe7e7, 0xe7f3, 0x82bf },
|
||||
{ 0xe815, 0xe815, 0x82cc },
|
||||
{ 0xe819, 0xe81d, 0x82cd },
|
||||
{ 0xe81f, 0xe825, 0x82d2 },
|
||||
{ 0xe827, 0xe82a, 0x82d9 },
|
||||
{ 0xe82d, 0xe830, 0x82dd },
|
||||
{ 0xe833, 0xe83a, 0x82e1 },
|
||||
{ 0xe83c, 0xe842, 0x82e9 },
|
||||
{ 0xe844, 0xe853, 0x82f0 },
|
||||
{ 0xe856, 0xe863, 0x8300 },
|
||||
{ 0xe865, 0xf92b, 0x830e },
|
||||
{ 0xf92d, 0xf978, 0x93d5 },
|
||||
{ 0xf97a, 0xf994, 0x9421 },
|
||||
{ 0xf996, 0xf9e6, 0x943c },
|
||||
{ 0xf9e8, 0xf9f0, 0x948d },
|
||||
{ 0xf9f2, 0xfa0b, 0x9496 },
|
||||
{ 0xfa10, 0xfa10, 0x94b0 },
|
||||
{ 0xfa12, 0xfa12, 0x94b1 },
|
||||
{ 0xfa15, 0xfa17, 0x94b2 },
|
||||
{ 0xfa19, 0xfa1e, 0x94b5 },
|
||||
{ 0xfa22, 0xfa22, 0x94bb },
|
||||
{ 0xfa25, 0xfa26, 0x94bc },
|
||||
{ 0xfa2a, 0xfe2f, 0x94be },
|
||||
{ 0xfe32, 0xfe32, 0x98c4 },
|
||||
{ 0xfe45, 0xfe48, 0x98c5 },
|
||||
{ 0xfe53, 0xfe53, 0x98c9 },
|
||||
{ 0xfe58, 0xfe58, 0x98ca },
|
||||
{ 0xfe67, 0xfe67, 0x98cb },
|
||||
{ 0xfe6c, 0xff00, 0x98cc },
|
||||
{ 0xff5f, 0xffdf, 0x9961 },
|
||||
{ 0xffe6, 0xffff, 0x99e2 },
|
||||
{ 0x0000, 0x0000, 0x99fc },
|
||||
};
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,329 +0,0 @@
|
|||
/*
|
||||
* $CJKCodecs: map_jisx0213_pairs.h,v 1.2 2003/11/27 13:29:01 perky Exp $
|
||||
*/
|
||||
|
||||
static const ucs4_t __jisx0213_pairdecmap[49] = {
|
||||
0x304b309a, 0x304d309a, 0x304f309a, 0x3051309a,
|
||||
0x3053309a, 0x30ab309a, 0x30ad309a, 0x30af309a,
|
||||
0x30b1309a, 0x30b3309a, 0x30bb309a, 0x30c4309a,
|
||||
0x30c8309a, 0x31f7309a, 0x00e60300, UNIINV,
|
||||
UNIINV, UNIINV, 0x02540300, 0x02540301,
|
||||
0x028c0300, 0x028c0301, 0x02590300, 0x02590301,
|
||||
0x025a0300, 0x025a0301, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, 0x02e902e5,
|
||||
0x02e502e9,
|
||||
};
|
||||
|
||||
static const struct widedbcs_index jisx0213_pairdecmap[256] = {
|
||||
/* 0x00 */ {0, 0, 0},
|
||||
/* 0x01 */ {0, 0, 0},
|
||||
/* 0x02 */ {0, 0, 0},
|
||||
/* 0x03 */ {0, 0, 0},
|
||||
/* 0x04 */ {0, 0, 0},
|
||||
/* 0x05 */ {0, 0, 0},
|
||||
/* 0x06 */ {0, 0, 0},
|
||||
/* 0x07 */ {0, 0, 0},
|
||||
/* 0x08 */ {0, 0, 0},
|
||||
/* 0x09 */ {0, 0, 0},
|
||||
/* 0x0A */ {0, 0, 0},
|
||||
/* 0x0B */ {0, 0, 0},
|
||||
/* 0x0C */ {0, 0, 0},
|
||||
/* 0x0D */ {0, 0, 0},
|
||||
/* 0x0E */ {0, 0, 0},
|
||||
/* 0x0F */ {0, 0, 0},
|
||||
/* 0x10 */ {0, 0, 0},
|
||||
/* 0x11 */ {0, 0, 0},
|
||||
/* 0x12 */ {0, 0, 0},
|
||||
/* 0x13 */ {0, 0, 0},
|
||||
/* 0x14 */ {0, 0, 0},
|
||||
/* 0x15 */ {0, 0, 0},
|
||||
/* 0x16 */ {0, 0, 0},
|
||||
/* 0x17 */ {0, 0, 0},
|
||||
/* 0x18 */ {0, 0, 0},
|
||||
/* 0x19 */ {0, 0, 0},
|
||||
/* 0x1A */ {0, 0, 0},
|
||||
/* 0x1B */ {0, 0, 0},
|
||||
/* 0x1C */ {0, 0, 0},
|
||||
/* 0x1D */ {0, 0, 0},
|
||||
/* 0x1E */ {0, 0, 0},
|
||||
/* 0x1F */ {0, 0, 0},
|
||||
/* 0x20 */ {0, 0, 0},
|
||||
/* 0x21 */ {0, 0, 0},
|
||||
/* 0x22 */ {0, 0, 0},
|
||||
/* 0x23 */ {0, 0, 0},
|
||||
/* 0x24 */ {__jisx0213_pairdecmap+0, 0x77, 0x7b},
|
||||
/* 0x25 */ {__jisx0213_pairdecmap+5, 0x77, 0x7e},
|
||||
/* 0x26 */ {__jisx0213_pairdecmap+13, 0x78, 0x78},
|
||||
/* 0x27 */ {0, 0, 0},
|
||||
/* 0x28 */ {0, 0, 0},
|
||||
/* 0x29 */ {0, 0, 0},
|
||||
/* 0x2A */ {0, 0, 0},
|
||||
/* 0x2B */ {__jisx0213_pairdecmap+14, 0x44, 0x66},
|
||||
/* 0x2C */ {0, 0, 0},
|
||||
/* 0x2D */ {0, 0, 0},
|
||||
/* 0x2E */ {0, 0, 0},
|
||||
/* 0x2F */ {0, 0, 0},
|
||||
/* 0x30 */ {0, 0, 0},
|
||||
/* 0x31 */ {0, 0, 0},
|
||||
/* 0x32 */ {0, 0, 0},
|
||||
/* 0x33 */ {0, 0, 0},
|
||||
/* 0x34 */ {0, 0, 0},
|
||||
/* 0x35 */ {0, 0, 0},
|
||||
/* 0x36 */ {0, 0, 0},
|
||||
/* 0x37 */ {0, 0, 0},
|
||||
/* 0x38 */ {0, 0, 0},
|
||||
/* 0x39 */ {0, 0, 0},
|
||||
/* 0x3A */ {0, 0, 0},
|
||||
/* 0x3B */ {0, 0, 0},
|
||||
/* 0x3C */ {0, 0, 0},
|
||||
/* 0x3D */ {0, 0, 0},
|
||||
/* 0x3E */ {0, 0, 0},
|
||||
/* 0x3F */ {0, 0, 0},
|
||||
/* 0x40 */ {0, 0, 0},
|
||||
/* 0x41 */ {0, 0, 0},
|
||||
/* 0x42 */ {0, 0, 0},
|
||||
/* 0x43 */ {0, 0, 0},
|
||||
/* 0x44 */ {0, 0, 0},
|
||||
/* 0x45 */ {0, 0, 0},
|
||||
/* 0x46 */ {0, 0, 0},
|
||||
/* 0x47 */ {0, 0, 0},
|
||||
/* 0x48 */ {0, 0, 0},
|
||||
/* 0x49 */ {0, 0, 0},
|
||||
/* 0x4A */ {0, 0, 0},
|
||||
/* 0x4B */ {0, 0, 0},
|
||||
/* 0x4C */ {0, 0, 0},
|
||||
/* 0x4D */ {0, 0, 0},
|
||||
/* 0x4E */ {0, 0, 0},
|
||||
/* 0x4F */ {0, 0, 0},
|
||||
/* 0x50 */ {0, 0, 0},
|
||||
/* 0x51 */ {0, 0, 0},
|
||||
/* 0x52 */ {0, 0, 0},
|
||||
/* 0x53 */ {0, 0, 0},
|
||||
/* 0x54 */ {0, 0, 0},
|
||||
/* 0x55 */ {0, 0, 0},
|
||||
/* 0x56 */ {0, 0, 0},
|
||||
/* 0x57 */ {0, 0, 0},
|
||||
/* 0x58 */ {0, 0, 0},
|
||||
/* 0x59 */ {0, 0, 0},
|
||||
/* 0x5A */ {0, 0, 0},
|
||||
/* 0x5B */ {0, 0, 0},
|
||||
/* 0x5C */ {0, 0, 0},
|
||||
/* 0x5D */ {0, 0, 0},
|
||||
/* 0x5E */ {0, 0, 0},
|
||||
/* 0x5F */ {0, 0, 0},
|
||||
/* 0x60 */ {0, 0, 0},
|
||||
/* 0x61 */ {0, 0, 0},
|
||||
/* 0x62 */ {0, 0, 0},
|
||||
/* 0x63 */ {0, 0, 0},
|
||||
/* 0x64 */ {0, 0, 0},
|
||||
/* 0x65 */ {0, 0, 0},
|
||||
/* 0x66 */ {0, 0, 0},
|
||||
/* 0x67 */ {0, 0, 0},
|
||||
/* 0x68 */ {0, 0, 0},
|
||||
/* 0x69 */ {0, 0, 0},
|
||||
/* 0x6A */ {0, 0, 0},
|
||||
/* 0x6B */ {0, 0, 0},
|
||||
/* 0x6C */ {0, 0, 0},
|
||||
/* 0x6D */ {0, 0, 0},
|
||||
/* 0x6E */ {0, 0, 0},
|
||||
/* 0x6F */ {0, 0, 0},
|
||||
/* 0x70 */ {0, 0, 0},
|
||||
/* 0x71 */ {0, 0, 0},
|
||||
/* 0x72 */ {0, 0, 0},
|
||||
/* 0x73 */ {0, 0, 0},
|
||||
/* 0x74 */ {0, 0, 0},
|
||||
/* 0x75 */ {0, 0, 0},
|
||||
/* 0x76 */ {0, 0, 0},
|
||||
/* 0x77 */ {0, 0, 0},
|
||||
/* 0x78 */ {0, 0, 0},
|
||||
/* 0x79 */ {0, 0, 0},
|
||||
/* 0x7A */ {0, 0, 0},
|
||||
/* 0x7B */ {0, 0, 0},
|
||||
/* 0x7C */ {0, 0, 0},
|
||||
/* 0x7D */ {0, 0, 0},
|
||||
/* 0x7E */ {0, 0, 0},
|
||||
/* 0x7F */ {0, 0, 0},
|
||||
/* 0x80 */ {0, 0, 0},
|
||||
/* 0x81 */ {0, 0, 0},
|
||||
/* 0x82 */ {0, 0, 0},
|
||||
/* 0x83 */ {0, 0, 0},
|
||||
/* 0x84 */ {0, 0, 0},
|
||||
/* 0x85 */ {0, 0, 0},
|
||||
/* 0x86 */ {0, 0, 0},
|
||||
/* 0x87 */ {0, 0, 0},
|
||||
/* 0x88 */ {0, 0, 0},
|
||||
/* 0x89 */ {0, 0, 0},
|
||||
/* 0x8A */ {0, 0, 0},
|
||||
/* 0x8B */ {0, 0, 0},
|
||||
/* 0x8C */ {0, 0, 0},
|
||||
/* 0x8D */ {0, 0, 0},
|
||||
/* 0x8E */ {0, 0, 0},
|
||||
/* 0x8F */ {0, 0, 0},
|
||||
/* 0x90 */ {0, 0, 0},
|
||||
/* 0x91 */ {0, 0, 0},
|
||||
/* 0x92 */ {0, 0, 0},
|
||||
/* 0x93 */ {0, 0, 0},
|
||||
/* 0x94 */ {0, 0, 0},
|
||||
/* 0x95 */ {0, 0, 0},
|
||||
/* 0x96 */ {0, 0, 0},
|
||||
/* 0x97 */ {0, 0, 0},
|
||||
/* 0x98 */ {0, 0, 0},
|
||||
/* 0x99 */ {0, 0, 0},
|
||||
/* 0x9A */ {0, 0, 0},
|
||||
/* 0x9B */ {0, 0, 0},
|
||||
/* 0x9C */ {0, 0, 0},
|
||||
/* 0x9D */ {0, 0, 0},
|
||||
/* 0x9E */ {0, 0, 0},
|
||||
/* 0x9F */ {0, 0, 0},
|
||||
/* 0xA0 */ {0, 0, 0},
|
||||
/* 0xA1 */ {0, 0, 0},
|
||||
/* 0xA2 */ {0, 0, 0},
|
||||
/* 0xA3 */ {0, 0, 0},
|
||||
/* 0xA4 */ {0, 0, 0},
|
||||
/* 0xA5 */ {0, 0, 0},
|
||||
/* 0xA6 */ {0, 0, 0},
|
||||
/* 0xA7 */ {0, 0, 0},
|
||||
/* 0xA8 */ {0, 0, 0},
|
||||
/* 0xA9 */ {0, 0, 0},
|
||||
/* 0xAA */ {0, 0, 0},
|
||||
/* 0xAB */ {0, 0, 0},
|
||||
/* 0xAC */ {0, 0, 0},
|
||||
/* 0xAD */ {0, 0, 0},
|
||||
/* 0xAE */ {0, 0, 0},
|
||||
/* 0xAF */ {0, 0, 0},
|
||||
/* 0xB0 */ {0, 0, 0},
|
||||
/* 0xB1 */ {0, 0, 0},
|
||||
/* 0xB2 */ {0, 0, 0},
|
||||
/* 0xB3 */ {0, 0, 0},
|
||||
/* 0xB4 */ {0, 0, 0},
|
||||
/* 0xB5 */ {0, 0, 0},
|
||||
/* 0xB6 */ {0, 0, 0},
|
||||
/* 0xB7 */ {0, 0, 0},
|
||||
/* 0xB8 */ {0, 0, 0},
|
||||
/* 0xB9 */ {0, 0, 0},
|
||||
/* 0xBA */ {0, 0, 0},
|
||||
/* 0xBB */ {0, 0, 0},
|
||||
/* 0xBC */ {0, 0, 0},
|
||||
/* 0xBD */ {0, 0, 0},
|
||||
/* 0xBE */ {0, 0, 0},
|
||||
/* 0xBF */ {0, 0, 0},
|
||||
/* 0xC0 */ {0, 0, 0},
|
||||
/* 0xC1 */ {0, 0, 0},
|
||||
/* 0xC2 */ {0, 0, 0},
|
||||
/* 0xC3 */ {0, 0, 0},
|
||||
/* 0xC4 */ {0, 0, 0},
|
||||
/* 0xC5 */ {0, 0, 0},
|
||||
/* 0xC6 */ {0, 0, 0},
|
||||
/* 0xC7 */ {0, 0, 0},
|
||||
/* 0xC8 */ {0, 0, 0},
|
||||
/* 0xC9 */ {0, 0, 0},
|
||||
/* 0xCA */ {0, 0, 0},
|
||||
/* 0xCB */ {0, 0, 0},
|
||||
/* 0xCC */ {0, 0, 0},
|
||||
/* 0xCD */ {0, 0, 0},
|
||||
/* 0xCE */ {0, 0, 0},
|
||||
/* 0xCF */ {0, 0, 0},
|
||||
/* 0xD0 */ {0, 0, 0},
|
||||
/* 0xD1 */ {0, 0, 0},
|
||||
/* 0xD2 */ {0, 0, 0},
|
||||
/* 0xD3 */ {0, 0, 0},
|
||||
/* 0xD4 */ {0, 0, 0},
|
||||
/* 0xD5 */ {0, 0, 0},
|
||||
/* 0xD6 */ {0, 0, 0},
|
||||
/* 0xD7 */ {0, 0, 0},
|
||||
/* 0xD8 */ {0, 0, 0},
|
||||
/* 0xD9 */ {0, 0, 0},
|
||||
/* 0xDA */ {0, 0, 0},
|
||||
/* 0xDB */ {0, 0, 0},
|
||||
/* 0xDC */ {0, 0, 0},
|
||||
/* 0xDD */ {0, 0, 0},
|
||||
/* 0xDE */ {0, 0, 0},
|
||||
/* 0xDF */ {0, 0, 0},
|
||||
/* 0xE0 */ {0, 0, 0},
|
||||
/* 0xE1 */ {0, 0, 0},
|
||||
/* 0xE2 */ {0, 0, 0},
|
||||
/* 0xE3 */ {0, 0, 0},
|
||||
/* 0xE4 */ {0, 0, 0},
|
||||
/* 0xE5 */ {0, 0, 0},
|
||||
/* 0xE6 */ {0, 0, 0},
|
||||
/* 0xE7 */ {0, 0, 0},
|
||||
/* 0xE8 */ {0, 0, 0},
|
||||
/* 0xE9 */ {0, 0, 0},
|
||||
/* 0xEA */ {0, 0, 0},
|
||||
/* 0xEB */ {0, 0, 0},
|
||||
/* 0xEC */ {0, 0, 0},
|
||||
/* 0xED */ {0, 0, 0},
|
||||
/* 0xEE */ {0, 0, 0},
|
||||
/* 0xEF */ {0, 0, 0},
|
||||
/* 0xF0 */ {0, 0, 0},
|
||||
/* 0xF1 */ {0, 0, 0},
|
||||
/* 0xF2 */ {0, 0, 0},
|
||||
/* 0xF3 */ {0, 0, 0},
|
||||
/* 0xF4 */ {0, 0, 0},
|
||||
/* 0xF5 */ {0, 0, 0},
|
||||
/* 0xF6 */ {0, 0, 0},
|
||||
/* 0xF7 */ {0, 0, 0},
|
||||
/* 0xF8 */ {0, 0, 0},
|
||||
/* 0xF9 */ {0, 0, 0},
|
||||
/* 0xFA */ {0, 0, 0},
|
||||
/* 0xFB */ {0, 0, 0},
|
||||
/* 0xFC */ {0, 0, 0},
|
||||
/* 0xFD */ {0, 0, 0},
|
||||
/* 0xFE */ {0, 0, 0},
|
||||
/* 0xFF */ {0, 0, 0},
|
||||
};
|
||||
|
||||
#define JISX0213_ENCPAIRS 46
|
||||
static struct pair_encodemap jisx0213_pairencmap[JISX0213_ENCPAIRS] = {
|
||||
{ 0x00e60000, 0x295c },
|
||||
{ 0x00e60300, 0x2b44 },
|
||||
{ 0x02540000, 0x2b38 },
|
||||
{ 0x02540300, 0x2b48 },
|
||||
{ 0x02540301, 0x2b49 },
|
||||
{ 0x02590000, 0x2b30 },
|
||||
{ 0x02590300, 0x2b4c },
|
||||
{ 0x02590301, 0x2b4d },
|
||||
{ 0x025a0000, 0x2b43 },
|
||||
{ 0x025a0300, 0x2b4e },
|
||||
{ 0x025a0301, 0x2b4f },
|
||||
{ 0x028c0000, 0x2b37 },
|
||||
{ 0x028c0300, 0x2b4a },
|
||||
{ 0x028c0301, 0x2b4b },
|
||||
{ 0x02e50000, 0x2b60 },
|
||||
{ 0x02e502e9, 0x2b66 },
|
||||
{ 0x02e90000, 0x2b64 },
|
||||
{ 0x02e902e5, 0x2b65 },
|
||||
{ 0x304b0000, 0x242b },
|
||||
{ 0x304b309a, 0x2477 },
|
||||
{ 0x304d0000, 0x242d },
|
||||
{ 0x304d309a, 0x2478 },
|
||||
{ 0x304f0000, 0x242f },
|
||||
{ 0x304f309a, 0x2479 },
|
||||
{ 0x30510000, 0x2431 },
|
||||
{ 0x3051309a, 0x247a },
|
||||
{ 0x30530000, 0x2433 },
|
||||
{ 0x3053309a, 0x247b },
|
||||
{ 0x30ab0000, 0x252b },
|
||||
{ 0x30ab309a, 0x2577 },
|
||||
{ 0x30ad0000, 0x252d },
|
||||
{ 0x30ad309a, 0x2578 },
|
||||
{ 0x30af0000, 0x252f },
|
||||
{ 0x30af309a, 0x2579 },
|
||||
{ 0x30b10000, 0x2531 },
|
||||
{ 0x30b1309a, 0x257a },
|
||||
{ 0x30b30000, 0x2533 },
|
||||
{ 0x30b3309a, 0x257b },
|
||||
{ 0x30bb0000, 0x253b },
|
||||
{ 0x30bb309a, 0x257c },
|
||||
{ 0x30c40000, 0x2544 },
|
||||
{ 0x30c4309a, 0x257d },
|
||||
{ 0x30c80000, 0x2548 },
|
||||
{ 0x30c8309a, 0x257e },
|
||||
{ 0x31f70000, 0x2675 },
|
||||
{ 0x31f7309a, 0x2678 },
|
||||
};
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,51 +0,0 @@
|
|||
/*
|
||||
* mapdata_ja_JP.c: Map Provider for Japanese Encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: mapdata_ja_JP.c,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
*/
|
||||
|
||||
#include "Python.h"
|
||||
#include "cjkcommon.h"
|
||||
#include "map_jisx0208.h"
|
||||
#include "map_jisx0212.h"
|
||||
#include "map_jisx0213.h"
|
||||
#include "map_jisxcommon.h"
|
||||
#include "map_cp932ext.h"
|
||||
|
||||
static struct dbcs_map mapholders[] = {
|
||||
{"jisx0208", NULL, jisx0208_decmap},
|
||||
{"jisx0212", NULL, jisx0212_decmap},
|
||||
{"jisxcommon", jisxcommon_encmap, NULL},
|
||||
{"jisx0213_1_bmp", NULL, jisx0213_1_bmp_decmap},
|
||||
{"jisx0213_2_bmp", NULL, jisx0213_2_bmp_decmap},
|
||||
{"jisx0213_bmp", jisx0213_bmp_encmap, NULL},
|
||||
{"jisx0213_1_emp", NULL, jisx0213_1_emp_decmap},
|
||||
{"jisx0213_2_emp", NULL, jisx0213_2_emp_decmap},
|
||||
{"jisx0213_emp", jisx0213_emp_encmap, NULL},
|
||||
{"cp932ext", cp932ext_encmap, cp932ext_decmap},
|
||||
{"", NULL, NULL},
|
||||
};
|
||||
|
||||
static struct PyMethodDef __methods[] = {
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
void
|
||||
init_codecs_mapdata_ja_JP(void)
|
||||
{
|
||||
struct dbcs_map *h;
|
||||
PyObject *m;
|
||||
|
||||
m = Py_InitModule("_codecs_mapdata_ja_JP", __methods);
|
||||
|
||||
for (h = mapholders; h->charset[0] != '\0'; h++) {
|
||||
char mhname[256] = "__map_";
|
||||
|
||||
strcpy(mhname + sizeof("__map_") - 1, h->charset);
|
||||
PyModule_AddObject(m, mhname, PyCObject_FromVoidPtr(h, NULL));
|
||||
}
|
||||
|
||||
if (PyErr_Occurred())
|
||||
Py_FatalError("can't initialize the _codecs_mapdata_ja_JP module");
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
/*
|
||||
* mapdata_ko_KR.c: Map Provider for Korean Encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: mapdata_ko_KR.c,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
*/
|
||||
|
||||
#include "Python.h"
|
||||
#include "cjkcommon.h"
|
||||
#include "map_ksx1001.h"
|
||||
#include "map_cp949.h"
|
||||
#include "map_cp949ext.h"
|
||||
|
||||
static struct dbcs_map mapholders[] = {
|
||||
{"ksx1001", NULL, ksx1001_decmap},
|
||||
{"cp949", cp949_encmap, NULL},
|
||||
{"cp949ext", NULL, cp949ext_decmap},
|
||||
{"", NULL, NULL},
|
||||
};
|
||||
|
||||
static struct PyMethodDef __methods[] = {
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
void
|
||||
init_codecs_mapdata_ko_KR(void)
|
||||
{
|
||||
struct dbcs_map *h;
|
||||
PyObject *m;
|
||||
|
||||
m = Py_InitModule("_codecs_mapdata_ko_KR", __methods);
|
||||
|
||||
for (h = mapholders; h->charset[0] != '\0'; h++) {
|
||||
char mhname[256] = "__map_";
|
||||
|
||||
strcpy(mhname + sizeof("__map_") - 1, h->charset);
|
||||
PyModule_AddObject(m, mhname, PyCObject_FromVoidPtr(h, NULL));
|
||||
}
|
||||
|
||||
if (PyErr_Occurred())
|
||||
Py_FatalError("can't initialize the _codecs_mapdata_ko_KR module");
|
||||
}
|
|
@ -1,44 +0,0 @@
|
|||
/*
|
||||
* mapdata_zh_CN.c: Map Provider for Simplified Chinese Encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: mapdata_zh_CN.c,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
*/
|
||||
|
||||
#include "Python.h"
|
||||
#include "cjkcommon.h"
|
||||
#include "map_gb2312.h"
|
||||
#include "map_gbkext.h"
|
||||
#include "map_gbcommon.h"
|
||||
#include "map_gb18030ext.h"
|
||||
|
||||
static struct dbcs_map mapholders[] = {
|
||||
{"gb2312", NULL, gb2312_decmap},
|
||||
{"gbkext", NULL, gbkext_decmap},
|
||||
{"gbcommon", gbcommon_encmap, NULL},
|
||||
{"gb18030ext", gb18030ext_encmap, gb18030ext_decmap},
|
||||
{"", NULL, NULL},
|
||||
};
|
||||
|
||||
static struct PyMethodDef __methods[] = {
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
void
|
||||
init_codecs_mapdata_zh_CN(void)
|
||||
{
|
||||
struct dbcs_map *h;
|
||||
PyObject *m;
|
||||
|
||||
m = Py_InitModule("_codecs_mapdata_zh_CN", __methods);
|
||||
|
||||
for (h = mapholders; h->charset[0] != '\0'; h++) {
|
||||
char mhname[256] = "__map_";
|
||||
|
||||
strcpy(mhname + sizeof("__map_") - 1, h->charset);
|
||||
PyModule_AddObject(m, mhname, PyCObject_FromVoidPtr(h, NULL));
|
||||
}
|
||||
|
||||
if (PyErr_Occurred())
|
||||
Py_FatalError("can't initialize the _codecs_mapdata_zh_CN module");
|
||||
}
|
|
@ -1,40 +0,0 @@
|
|||
/*
|
||||
* mapdata_zh_TW.c: Map Provider for Traditional Chinese Encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: mapdata_zh_TW.c,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
*/
|
||||
|
||||
#include "Python.h"
|
||||
#include "cjkcommon.h"
|
||||
#include "map_big5.h"
|
||||
#include "map_cp950ext.h"
|
||||
|
||||
static struct dbcs_map mapholders[] = {
|
||||
{"big5", big5_encmap, big5_decmap},
|
||||
{"cp950ext", cp950ext_encmap, cp950ext_decmap},
|
||||
{"", NULL, NULL},
|
||||
};
|
||||
|
||||
static struct PyMethodDef __methods[] = {
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
void
|
||||
init_codecs_mapdata_zh_TW(void)
|
||||
{
|
||||
struct dbcs_map *h;
|
||||
PyObject *m;
|
||||
|
||||
m = Py_InitModule("_codecs_mapdata_zh_TW", __methods);
|
||||
|
||||
for (h = mapholders; h->charset[0] != '\0'; h++) {
|
||||
char mhname[256] = "__map_";
|
||||
|
||||
strcpy(mhname + sizeof("__map_") - 1, h->charset);
|
||||
PyModule_AddObject(m, mhname, PyCObject_FromVoidPtr(h, NULL));
|
||||
}
|
||||
|
||||
if (PyErr_Occurred())
|
||||
Py_FatalError("can't initialize the _codecs_mapdata_zh_TW module");
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue