Bring CJKCodecs 1.1 into trunk. This completely reorganizes source

and installed layouts to make maintenance simple and easy.  And it
also adds four new codecs; big5hkscs, euc-jis-2004, shift-jis-2004
and iso2022-jp-2004.
This commit is contained in:
Hye-Shik Chang 2004-07-18 03:06:29 +00:00
parent 264c659197
commit 2bb146f2f4
109 changed files with 34331 additions and 40239 deletions

View File

@ -548,6 +548,10 @@ exist:
{big5-tw, csbig5}
{Traditional Chinese}
\lineiii{big5hkscs}
{big5-hkscs, hkscs}
{Traditional Chinese}
\lineiii{cp037}
{IBM037, IBM039}
{English}
@ -696,8 +700,12 @@ exist:
{eucjp, ujis, u-jis}
{Japanese}
\lineiii{euc_jis_2004}
{jisx0213, eucjis2004}
{Japanese}
\lineiii{euc_jisx0213}
{jisx0213, eucjisx0213}
{eucjisx0213}
{Japanese}
\lineiii{euc_kr}
@ -733,6 +741,10 @@ exist:
{iso2022jp-2, iso-2022-jp-2}
{Japanese, Korean, Simplified Chinese, Western Europe, Greek}
\lineiii{iso2022_jp_2004}
{iso2022jp-2004, iso-2022-jp-2004}
{Japanese}
\lineiii{iso2022_jp_3}
{iso2022jp-3, iso-2022-jp-3}
{Japanese}
@ -841,6 +853,10 @@ exist:
{csshiftjis, shiftjis, sjis, s_jis}
{Japanese}
\lineiii{shift_jis_2004}
{shiftjis2004, sjis_2004, sjis2004}
{Japanese}
\lineiii{shift_jisx0213}
{shiftjisx0213, sjisx0213, s_jisx0213}
{Japanese}

View File

@ -39,6 +39,10 @@ aliases = {
'big5_tw' : 'big5',
'csbig5' : 'big5',
# big5hkscs codec
'big5_hkscs' : 'big5hkscs',
'hkscs' : 'big5hkscs',
# bz2_codec codec
'bz2' : 'bz2_codec',
@ -197,8 +201,12 @@ aliases = {
'950' : 'cp950',
'ms950' : 'cp950',
# euc_jis_2004 codec
'jisx0213' : 'euc_jis_2004',
'eucjis2004' : 'euc_jis_2004',
'euc_jis2004' : 'euc_jis_2004',
# euc_jisx0213 codec
'jisx0213' : 'euc_jisx0213',
'eucjisx0213' : 'euc_jisx0213',
# euc_jp codec
@ -254,6 +262,10 @@ aliases = {
'iso2022jp_2' : 'iso2022_jp_2',
'iso_2022_jp_2' : 'iso2022_jp_2',
# iso2022_jp_2004 codec
'iso_2022_jp_2004' : 'iso2022_jp_2004',
'iso2022jp_2004' : 'iso2022_jp_2004',
# iso2022_jp_3 codec
'iso2022jp_3' : 'iso2022_jp_3',
'iso_2022_jp_3' : 'iso2022_jp_3',
@ -416,6 +428,11 @@ aliases = {
'sjis' : 'shift_jis',
's_jis' : 'shift_jis',
# shift_jis_2004 codec
'shiftjis2004' : 'shift_jis_2004',
'sjis_2004' : 'shift_jis_2004',
's_jis_2004' : 'shift_jis_2004',
# shift_jisx0213 codec
'shiftjisx0213' : 'shift_jisx0213',
'sjisx0213' : 'shift_jisx0213',

View File

@ -2,11 +2,12 @@
# big5.py: Python Unicode Codec for BIG5
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: big5.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: big5.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_big5 import codec
import codecs
import _codecs_tw, codecs
codec = _codecs_tw.getcodec('big5')
class Codec(codecs.Codec):
encode = codec.encode
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -0,0 +1,34 @@
#
# big5hkscs.py: Python Unicode Codec for BIG5HKSCS
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: big5hkscs.py,v 1.1 2004/06/29 05:14:27 perky Exp $
#
import _codecs_hk, codecs
codec = _codecs_hk.getcodec('big5hkscs')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class StreamReader(Codec, codecs.StreamReader):
def __init__(self, stream, errors='strict'):
codecs.StreamReader.__init__(self, stream, errors)
__codec = codec.StreamReader(stream, errors)
self.read = __codec.read
self.readline = __codec.readline
self.readlines = __codec.readlines
self.reset = __codec.reset
class StreamWriter(Codec, codecs.StreamWriter):
def __init__(self, stream, errors='strict'):
codecs.StreamWriter.__init__(self, stream, errors)
__codec = codec.StreamWriter(stream, errors)
self.write = __codec.write
self.writelines = __codec.writelines
self.reset = __codec.reset
def getregentry():
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -2,11 +2,12 @@
# cp932.py: Python Unicode Codec for CP932
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: cp932.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: cp932.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_cp932 import codec
import codecs
import _codecs_jp, codecs
codec = _codecs_jp.getcodec('cp932')
class Codec(codecs.Codec):
encode = codec.encode
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -2,11 +2,12 @@
# cp949.py: Python Unicode Codec for CP949
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: cp949.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: cp949.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_cp949 import codec
import codecs
import _codecs_kr, codecs
codec = _codecs_kr.getcodec('cp949')
class Codec(codecs.Codec):
encode = codec.encode
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -2,11 +2,12 @@
# cp950.py: Python Unicode Codec for CP950
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: cp950.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: cp950.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_cp950 import codec
import codecs
import _codecs_tw, codecs
codec = _codecs_tw.getcodec('cp950')
class Codec(codecs.Codec):
encode = codec.encode
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -0,0 +1,34 @@
#
# euc_jis_2004.py: Python Unicode Codec for EUC_JIS_2004
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: euc_jis_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $
#
import _codecs_jp, codecs
codec = _codecs_jp.getcodec('euc_jis_2004')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class StreamReader(Codec, codecs.StreamReader):
def __init__(self, stream, errors='strict'):
codecs.StreamReader.__init__(self, stream, errors)
__codec = codec.StreamReader(stream, errors)
self.read = __codec.read
self.readline = __codec.readline
self.readlines = __codec.readlines
self.reset = __codec.reset
class StreamWriter(Codec, codecs.StreamWriter):
def __init__(self, stream, errors='strict'):
codecs.StreamWriter.__init__(self, stream, errors)
__codec = codec.StreamWriter(stream, errors)
self.write = __codec.write
self.writelines = __codec.writelines
self.reset = __codec.reset
def getregentry():
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -2,11 +2,12 @@
# euc_jisx0213.py: Python Unicode Codec for EUC_JISX0213
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: euc_jisx0213.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: euc_jisx0213.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_euc_jisx0213 import codec
import codecs
import _codecs_jp, codecs
codec = _codecs_jp.getcodec('euc_jisx0213')
class Codec(codecs.Codec):
encode = codec.encode
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -2,11 +2,12 @@
# euc_jp.py: Python Unicode Codec for EUC_JP
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: euc_jp.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: euc_jp.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_euc_jp import codec
import codecs
import _codecs_jp, codecs
codec = _codecs_jp.getcodec('euc_jp')
class Codec(codecs.Codec):
encode = codec.encode
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -2,11 +2,12 @@
# euc_kr.py: Python Unicode Codec for EUC_KR
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: euc_kr.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: euc_kr.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_euc_kr import codec
import codecs
import _codecs_kr, codecs
codec = _codecs_kr.getcodec('euc_kr')
class Codec(codecs.Codec):
encode = codec.encode
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -2,11 +2,12 @@
# gb18030.py: Python Unicode Codec for GB18030
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: gb18030.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: gb18030.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_gb18030 import codec
import codecs
import _codecs_cn, codecs
codec = _codecs_cn.getcodec('gb18030')
class Codec(codecs.Codec):
encode = codec.encode
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -2,11 +2,12 @@
# gb2312.py: Python Unicode Codec for GB2312
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: gb2312.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: gb2312.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_gb2312 import codec
import codecs
import _codecs_cn, codecs
codec = _codecs_cn.getcodec('gb2312')
class Codec(codecs.Codec):
encode = codec.encode
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -2,11 +2,12 @@
# gbk.py: Python Unicode Codec for GBK
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: gbk.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: gbk.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_gbk import codec
import codecs
import _codecs_cn, codecs
codec = _codecs_cn.getcodec('gbk')
class Codec(codecs.Codec):
encode = codec.encode
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -2,11 +2,12 @@
# hz.py: Python Unicode Codec for HZ
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: hz.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: hz.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_hz import codec
import codecs
import _codecs_cn, codecs
codec = _codecs_cn.getcodec('hz')
class Codec(codecs.Codec):
encode = codec.encode
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -1,12 +1,13 @@
#
# iso2022_jp.py: Python Unicode Codec for ISO_2022_JP
# iso2022_jp.py: Python Unicode Codec for ISO2022_JP
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: iso2022_jp.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: iso2022_jp.py,v 1.2 2004/06/28 18:16:03 perky Exp $
#
from _codecs_iso2022_jp import codec
import codecs
import _codecs_iso2022, codecs
codec = _codecs_iso2022.getcodec('iso2022_jp')
class Codec(codecs.Codec):
encode = codec.encode
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -1,12 +1,13 @@
#
# iso2022_jp_1.py: Python Unicode Codec for ISO_2022_JP_1
# iso2022_jp_1.py: Python Unicode Codec for ISO2022_JP_1
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: iso2022_jp_1.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: iso2022_jp_1.py,v 1.2 2004/06/28 18:16:03 perky Exp $
#
from _codecs_iso2022_jp_1 import codec
import codecs
import _codecs_iso2022, codecs
codec = _codecs_iso2022.getcodec('iso2022_jp_1')
class Codec(codecs.Codec):
encode = codec.encode
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -1,12 +1,13 @@
#
# iso2022_jp_2.py: Python Unicode Codec for ISO_2022_JP_2
# iso2022_jp_2.py: Python Unicode Codec for ISO2022_JP_2
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: iso2022_jp_2.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: iso2022_jp_2.py,v 1.2 2004/06/28 18:16:03 perky Exp $
#
from _codecs_iso2022_jp_2 import codec
import codecs
import _codecs_iso2022, codecs
codec = _codecs_iso2022.getcodec('iso2022_jp_2')
class Codec(codecs.Codec):
encode = codec.encode
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -0,0 +1,34 @@
#
# iso2022_jp_2004.py: Python Unicode Codec for ISO2022_JP_2004
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: iso2022_jp_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $
#
import _codecs_iso2022, codecs
codec = _codecs_iso2022.getcodec('iso2022_jp_2004')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class StreamReader(Codec, codecs.StreamReader):
def __init__(self, stream, errors='strict'):
codecs.StreamReader.__init__(self, stream, errors)
__codec = codec.StreamReader(stream, errors)
self.read = __codec.read
self.readline = __codec.readline
self.readlines = __codec.readlines
self.reset = __codec.reset
class StreamWriter(Codec, codecs.StreamWriter):
def __init__(self, stream, errors='strict'):
codecs.StreamWriter.__init__(self, stream, errors)
__codec = codec.StreamWriter(stream, errors)
self.write = __codec.write
self.writelines = __codec.writelines
self.reset = __codec.reset
def getregentry():
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -1,12 +1,13 @@
#
# iso2022_jp_3.py: Python Unicode Codec for ISO_2022_JP_3
# iso2022_jp_3.py: Python Unicode Codec for ISO2022_JP_3
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: iso2022_jp_3.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: iso2022_jp_3.py,v 1.2 2004/06/28 18:16:03 perky Exp $
#
from _codecs_iso2022_jp_3 import codec
import codecs
import _codecs_iso2022, codecs
codec = _codecs_iso2022.getcodec('iso2022_jp_3')
class Codec(codecs.Codec):
encode = codec.encode
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -1,12 +1,13 @@
#
# iso2022_jp_ext.py: Python Unicode Codec for ISO_2022_JP_EXT
# iso2022_jp_ext.py: Python Unicode Codec for ISO2022_JP_EXT
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: iso2022_jp_ext.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: iso2022_jp_ext.py,v 1.2 2004/06/28 18:16:03 perky Exp $
#
from _codecs_iso2022_jp_ext import codec
import codecs
import _codecs_iso2022, codecs
codec = _codecs_iso2022.getcodec('iso2022_jp_ext')
class Codec(codecs.Codec):
encode = codec.encode
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -1,12 +1,13 @@
#
# iso2022_kr.py: Python Unicode Codec for ISO_2022_KR
# iso2022_kr.py: Python Unicode Codec for ISO2022_KR
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: iso2022_kr.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: iso2022_kr.py,v 1.2 2004/06/28 18:16:03 perky Exp $
#
from _codecs_iso2022_kr import codec
import codecs
import _codecs_iso2022, codecs
codec = _codecs_iso2022.getcodec('iso2022_kr')
class Codec(codecs.Codec):
encode = codec.encode
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -2,11 +2,12 @@
# johab.py: Python Unicode Codec for JOHAB
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: johab.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: johab.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_johab import codec
import codecs
import _codecs_kr, codecs
codec = _codecs_kr.getcodec('johab')
class Codec(codecs.Codec):
encode = codec.encode
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -2,11 +2,12 @@
# shift_jis.py: Python Unicode Codec for SHIFT_JIS
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: shift_jis.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: shift_jis.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_shift_jis import codec
import codecs
import _codecs_jp, codecs
codec = _codecs_jp.getcodec('shift_jis')
class Codec(codecs.Codec):
encode = codec.encode
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -0,0 +1,34 @@
#
# shift_jis_2004.py: Python Unicode Codec for SHIFT_JIS_2004
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: shift_jis_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $
#
import _codecs_jp, codecs
codec = _codecs_jp.getcodec('shift_jis_2004')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class StreamReader(Codec, codecs.StreamReader):
def __init__(self, stream, errors='strict'):
codecs.StreamReader.__init__(self, stream, errors)
__codec = codec.StreamReader(stream, errors)
self.read = __codec.read
self.readline = __codec.readline
self.readlines = __codec.readlines
self.reset = __codec.reset
class StreamWriter(Codec, codecs.StreamWriter):
def __init__(self, stream, errors='strict'):
codecs.StreamWriter.__init__(self, stream, errors)
__codec = codec.StreamWriter(stream, errors)
self.write = __codec.write
self.writelines = __codec.writelines
self.reset = __codec.reset
def getregentry():
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -2,11 +2,12 @@
# shift_jisx0213.py: Python Unicode Codec for SHIFT_JISX0213
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: shift_jisx0213.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: shift_jisx0213.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_shift_jisx0213 import codec
import codecs
import _codecs_jp, codecs
codec = _codecs_jp.getcodec('shift_jisx0213')
class Codec(codecs.Codec):
encode = codec.encode
@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)

View File

@ -63,6 +63,9 @@ teststring = {
"\x88\x91\xe5\x80\x91\xe6\x89\x80\x0a\xe8\xa6\x81\xe8\xa8\x8e\xe8"
"\xab\x96\xe7\x9a\x84\xe5\x95\x8f\xe9\xa1\x8c\xe5\xb0\xb1\xe6\x98"
"\xaf\x3a\x0a\x0a"),
'big5hkscs': (
"\x88\x45\x88\x5c\x8a\x73\x8b\xda\x8d\xd8\x0a",
"\xf0\xa0\x84\x8c\xc4\x9a\xe9\xb5\xae\xe7\xbd\x93\xe6\xb4\x86\x0a"),
'cp949': (
"\x8c\x63\xb9\xe6\xb0\xa2\xc7\xcf\x20\xbc\x84\xbd\xc3\xc4\xdd\xb6"
"\xf3\x0a\x0a\xa8\xc0\xa8\xc0\xb3\xb3\x21\x21\x20\xec\xd7\xce\xfa"

View File

@ -3,7 +3,7 @@
# test_codecencodings_cn.py
# Codec encoding tests for PRC encodings.
#
# $CJKCodecs: test_codecencodings_cn.py,v 1.1 2003/12/19 03:00:05 perky Exp $
# $CJKCodecs: test_codecencodings_cn.py,v 1.2 2004/06/19 06:09:55 perky Exp $
from test import test_support
from test import test_multibytecodec_support
@ -27,8 +27,8 @@ class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
tstring = test_multibytecodec_support.load_teststring('gbk')
codectests = (
# invalid bytes
("abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"),

View File

@ -0,0 +1,30 @@
#!/usr/bin/env python
#
# test_codecencodings_hk.py
# Codec encoding tests for HongKong encodings.
#
# $CJKCodecs: test_codecencodings_hk.py,v 1.1 2004/07/10 17:35:20 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest
class Test_Big5HKSCS(test_multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'big5hkscs'
tstring = test_multibytecodec_support.load_teststring('big5hkscs')
codectests = (
# invalid bytes
("abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u8b10"),
("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u8b10\ufffd"),
("abc\x80\x80\xc1\xc4", "ignore", u"abc\u8b10"),
)
def test_main():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(Test_Big5HKSCS))
test_support.run_suite(suite)
if __name__ == "__main__":
test_main()

View File

@ -3,7 +3,7 @@
# test_codecencodings_jp.py
# Codec encoding tests for Japanese encodings.
#
# $CJKCodecs: test_codecencodings_jp.py,v 1.2 2004/01/06 09:25:37 perky Exp $
# $CJKCodecs: test_codecencodings_jp.py,v 1.3 2004/06/19 06:09:55 perky Exp $
from test import test_support
from test import test_multibytecodec_support
@ -64,16 +64,6 @@ class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
(u"\u203e", "strict", "\x7e"),
)
class Test_EUC_JP_STRICT(test_multibytecodec_support.TestBase,
unittest.TestCase):
encoding = 'euc_jp_strict'
tstring = test_multibytecodec_support.load_teststring('euc_jp')
codectests = eucjp_commontests + (
("\xa1\xc0\\", "strict", u"\\\\"),
(u"\xa5", "strict", None),
(u"\u203e", "strict", None),
)
shiftjis_commonenctests = (
("abc\x80\x80\x82\x84", "strict", None),
("abc\xf8", "strict", None),
@ -90,14 +80,6 @@ class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase):
("\x81\x5f\x81\x61\x81\x7c", "strict", u"\uff3c\u2016\u2212"),
)
class Test_SJIS_STRICT(test_multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'shift_jis_strict'
tstring = test_multibytecodec_support.load_teststring('shift_jis')
codectests = shiftjis_commonenctests + (
("\\\x7e", "replace", u"\xa5\u203e"),
("\x81\x5f\x81\x61\x81\x7c", "replace", u"\x5c\u2016\u2212"),
)
class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'shift_jisx0213'
tstring = test_multibytecodec_support.load_teststring('shift_jisx0213')
@ -123,9 +105,6 @@ def test_main():
suite.addTest(unittest.makeSuite(Test_EUC_JISX0213))
suite.addTest(unittest.makeSuite(Test_EUC_JP_COMPAT))
suite.addTest(unittest.makeSuite(Test_SJIS_COMPAT))
if test_multibytecodec_support.__cjkcodecs__:
suite.addTest(unittest.makeSuite(Test_EUC_JP_STRICT))
suite.addTest(unittest.makeSuite(Test_SJIS_STRICT))
suite.addTest(unittest.makeSuite(Test_SJISX0213))
test_support.run_suite(suite)

View File

@ -3,7 +3,7 @@
# test_codecencodings_kr.py
# Codec encoding tests for ROK encodings.
#
# $CJKCodecs: test_codecencodings_kr.py,v 1.1 2003/12/19 03:00:06 perky Exp $
# $CJKCodecs: test_codecencodings_kr.py,v 1.2 2004/06/19 06:09:55 perky Exp $
from test import test_support
from test import test_multibytecodec_support

View File

@ -3,7 +3,7 @@
# test_codecencodings_tw.py
# Codec encoding tests for ROC encodings.
#
# $CJKCodecs: test_codecencodings_tw.py,v 1.1 2003/12/19 03:00:06 perky Exp $
# $CJKCodecs: test_codecencodings_tw.py,v 1.2 2004/06/19 06:09:55 perky Exp $
from test import test_support
from test import test_multibytecodec_support

View File

@ -3,7 +3,7 @@
# test_codecmaps_cn.py
# Codec mapping tests for PRC encodings
#
# $CJKCodecs: test_codecmaps_cn.py,v 1.2 2004/01/17 12:47:19 perky Exp $
# $CJKCodecs: test_codecmaps_cn.py,v 1.3 2004/06/19 06:09:55 perky Exp $
from test import test_support
from test import test_multibytecodec_support

View File

@ -0,0 +1,25 @@
#!/usr/bin/env python
#
# test_codecmaps_hk.py
# Codec mapping tests for HongKong encodings
#
# $CJKCodecs: test_codecmaps_hk.py,v 1.1 2004/07/10 17:35:20 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest
class TestBig5HKSCSMap(test_multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'big5hkscs'
mapfilename = 'BIG5HKSCS.TXT'
mapfileurl = 'http://people.freebsd.org/~perky/i18n/BIG5HKSCS.TXT'
def test_main():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestBig5HKSCSMap))
test_support.run_suite(suite)
test_multibytecodec_support.register_skip_expected(TestBig5HKSCSMap)
if __name__ == "__main__":
test_main()

View File

@ -3,7 +3,7 @@
# test_codecmaps_jp.py
# Codec mapping tests for Japanese encodings
#
# $CJKCodecs: test_codecmaps_jp.py,v 1.2 2004/01/17 12:47:19 perky Exp $
# $CJKCodecs: test_codecmaps_jp.py,v 1.3 2004/06/19 06:09:55 perky Exp $
from test import test_support
from test import test_multibytecodec_support
@ -48,15 +48,6 @@ class TestSJISCOMPATMap(test_multibytecodec_support.TestBase_Mapping,
('\x81_', u'\\'),
]
class TestSJISSTRICTMap(test_multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'shift_jis_strict'
mapfilename = 'SHIFTJIS.TXT'
mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE' \
'/EASTASIA/JIS/SHIFTJIS.TXT'
class TestEUCJISX0213Map(test_multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'euc_jisx0213'
@ -76,8 +67,6 @@ def test_main():
suite.addTest(unittest.makeSuite(TestCP932Map))
suite.addTest(unittest.makeSuite(TestEUCJPCOMPATMap))
suite.addTest(unittest.makeSuite(TestSJISCOMPATMap))
if test_multibytecodec_support.__cjkcodecs__:
suite.addTest(unittest.makeSuite(TestSJISSTRICTMap))
suite.addTest(unittest.makeSuite(TestEUCJISX0213Map))
suite.addTest(unittest.makeSuite(TestSJISX0213Map))
test_support.run_suite(suite)
@ -85,5 +74,6 @@ def test_main():
test_multibytecodec_support.register_skip_expected(TestCP932Map,
TestEUCJPCOMPATMap, TestSJISCOMPATMap, TestEUCJISX0213Map,
TestSJISX0213Map)
if __name__ == "__main__":
test_main()

View File

@ -3,7 +3,7 @@
# test_codecmaps_kr.py
# Codec mapping tests for ROK encodings
#
# $CJKCodecs: test_codecmaps_kr.py,v 1.2 2004/01/17 12:47:19 perky Exp $
# $CJKCodecs: test_codecmaps_kr.py,v 1.3 2004/06/19 06:09:55 perky Exp $
from test import test_support
from test import test_multibytecodec_support

View File

@ -3,7 +3,7 @@
# test_codecmaps_tw.py
# Codec mapping tests for ROC encodings
#
# $CJKCodecs: test_codecmaps_tw.py,v 1.2 2004/01/17 12:47:19 perky Exp $
# $CJKCodecs: test_codecmaps_tw.py,v 1.3 2004/06/19 06:09:55 perky Exp $
from test import test_support
from test import test_multibytecodec_support

View File

@ -3,7 +3,7 @@
# test_multibytecodec.py
# Unit test for multibytecodec itself
#
# $CJKCodecs: test_multibytecodec.py,v 1.5 2004/01/06 02:26:28 perky Exp $
# $CJKCodecs: test_multibytecodec.py,v 1.8 2004/06/19 06:09:55 perky Exp $
from test import test_support
from test import test_multibytecodec_support
@ -69,6 +69,12 @@ class Test_StreamWriter(unittest.TestCase):
def test_str_decode(self):
self.assertEqual('abcd'.encode('gb18030'), 'abcd')
def test_streamwriter_strwrite(self):
s = StringIO.StringIO()
wr = codecs.getwriter('gb18030')(s)
wr.write('abcd')
self.assertEqual(s.getvalue(), 'abcd')
def test_main():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(Test_StreamWriter))

View File

@ -3,7 +3,7 @@
# test_multibytecodec_support.py
# Common Unittest Routines for CJK codecs
#
# $CJKCodecs: test_multibytecodec_support.py,v 1.5 2004/01/17 12:47:19 perky Exp $
# $CJKCodecs: test_multibytecodec_support.py,v 1.6 2004/06/19 06:09:55 perky Exp $
import sys, codecs, os.path
import unittest
@ -164,13 +164,8 @@ class TestBase_Mapping(unittest.TestCase):
def __init__(self, *args, **kw):
unittest.TestCase.__init__(self, *args, **kw)
if not os.path.exists(self.mapfilename):
parent = os.path.join(os.pardir, self.mapfilename)
if not os.path.exists(parent):
format = '%s not found, download from %s'
raise test_support.TestSkipped(format %
(self.mapfilename, self.mapfileurl))
else:
self.mapfilename = parent
raise test_support.TestSkipped('%s not found, download from %s' %
(self.mapfilename, self.mapfileurl))
def test_mapping_file(self):
unichrs = lambda s: u''.join(map(unichr, map(eval, s.split('+'))))

View File

@ -34,6 +34,9 @@ Extension modules
Library
-------
- Several new unicode codecs are added: big5hkscs, euc_jis_2004,
iso2022_jp_2004, shift_jis_2004.
- Bug #788520. Queue.{get, get_nowait, put, put_nowait} have new
implementations, exploiting Conditions (which didn't exist at the time
Queue was introduced). A minor semantic change is that the Full and

View File

@ -484,42 +484,12 @@ GLHACK=-Dclear=__GLclear
# multibytecodec is required for all the other CJK codec modules
#_multibytecodec cjkcodecs/multibytecodec.c
# mapdata modules are required to support their respective dependent codecs
#_codecs_mapdata_ja_JP cjkcodecs/mapdata_ja_JP.c
#_codecs_mapdata_ko_KR cjkcodecs/mapdata_ko_KR.c
#_codecs_mapdata_zh_CN cjkcodecs/mapdata_zh_CN.c
#_codecs_mapdata_zh_TW cjkcodecs/mapdata_zh_TW.c
# ja_JP codecs
#_codecs_cp932 cjkcodecs/_cp932.c
#_codecs_euc_jisx0213 cjkcodecs/_euc_jisx0213.c
#_codecs_euc_jp cjkcodecs/_euc_jp.c
#_codecs_iso2022_jp cjkcodecs/_iso2022_jp.c
#_codecs_iso2022_jp_1 cjkcodecs/_iso2022_jp_1.c
#_codecs_iso2022_jp_3 cjkcodecs/_iso2022_jp_3.c
#_codecs_iso2022_jp_ext cjkcodecs/_iso2022_jp_ext.c
#_codecs_shift_jis cjkcodecs/_shift_jis.c
#_codecs_shift_jisx0213 cjkcodecs/_shift_jisx0213.c
# ko_KR codecs
#_codecs_cp949 cjkcodecs/_cp949.c
#_codecs_euc_kr cjkcodecs/_euc_kr.c
#_codecs_iso2022_kr cjkcodecs/_iso2022_kr.c
#_codecs_johab cjkcodecs/_johab.c
# zh_CN codecs
#_codecs_gb18030 cjkcodecs/_gb18030.c
#_codecs_gb2312 cjkcodecs/_gb2312.c
#_codecs_gbk cjkcodecs/_gbk.c
#_codecs_hz cjkcodecs/_hz.c
# zh_TW codecs
#_codecs_big5 cjkcodecs/_big5.c
#_codecs_cp950 cjkcodecs/_cp950.c
# international codecs
#_codecs_iso2022_jp_2 cjkcodecs/_iso2022_jp_2.c # requires ja_JP, ko_KR, zh_CN
#_codecs_cn cjkcodecs/_codecs_cn.c
#_codecs_hk cjkcodecs/_codecs_hk.c
#_codecs_iso2022 cjkcodecs/_codecs_iso2022.c
#_codecs_jp cjkcodecs/_codecs_jp.c
#_codecs_kr cjkcodecs/_codecs_kr.c
#_codecs_tw cjkcodecs/_codecs_tw.c
# Example -- included for reference only:
# xx xxmodule.c

View File

@ -2,7 +2,7 @@ Notes on cjkcodecs
-------------------
This directory contains source files for cjkcodecs extension modules.
They are based on CJKCodecs (http://cjkpython.i18n.org/#CJKCodecs)
as of Jan 17 2004 currently.
as of Jul 18 2004 currently.

View File

@ -1,67 +0,0 @@
/*
* _big5.c: the Big5 codec
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _big5.c,v 1.2 2003/12/31 05:46:55 perky Exp $
*/
#include "codeccommon.h"
ENCMAP(big5)
DECMAP(big5)
ENCODER(big5)
{
while (inleft > 0) {
Py_UNICODE c = **inbuf;
DBCHAR code;
if (c < 0x80) {
RESERVE_OUTBUF(1)
**outbuf = (unsigned char)c;
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
RESERVE_OUTBUF(2)
TRYMAP_ENC(big5, code, c);
else return 1;
(*outbuf)[0] = code >> 8;
(*outbuf)[1] = code & 0xFF;
NEXT(1, 2)
}
return 0;
}
DECODER(big5)
{
while (inleft > 0) {
unsigned char c = IN1;
RESERVE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
RESERVE_INBUF(2)
TRYMAP_DEC(big5, **outbuf, c, IN2) {
NEXT(2, 1)
} else return 2;
}
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(big5)
MAPOPEN(zh_TW)
IMPORTMAP_ENCDEC(big5)
MAPCLOSE()
END_CODEC_REGISTRY(big5)

View File

@ -0,0 +1,434 @@
/*
* _codecs_cn.c: Codecs collection for Mainland Chinese encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _codecs_cn.c,v 1.8 2004/07/07 14:59:26 perky Exp $
*/
#include "cjkcodecs.h"
#include "mappings_cn.h"
#define GBK_PREDECODE(dc1, dc2, assi) \
if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; \
else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; \
else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7;
#define GBK_PREENCODE(code, assi) \
if ((code) == 0x2014) (assi) = 0xa1aa; \
else if ((code) == 0x2015) (assi) = 0xa844; \
else if ((code) == 0x00b7) (assi) = 0xa1a4;
/*
* GB2312 codec
*/
ENCODER(gb2312)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(gbcommon, code, c);
else return 1;
if (code & 0x8000) /* MSB set: GBK */
return 1;
OUT1((code >> 8) | 0x80)
OUT2((code & 0xFF) | 0x80)
NEXT(1, 2)
}
return 0;
}
DECODER(gb2312)
{
while (inleft > 0) {
unsigned char c = **inbuf;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
NEXT(2, 1)
}
else return 2;
}
return 0;
}
/*
* GBK codec
*/
ENCODER(gbk)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
GBK_PREENCODE(c, code)
else TRYMAP_ENC(gbcommon, code, c);
else return 1;
OUT1((code >> 8) | 0x80)
if (code & 0x8000)
OUT2((code & 0xFF)) /* MSB set: GBK */
else
OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
NEXT(1, 2)
}
return 0;
}
DECODER(gbk)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
GBK_PREDECODE(c, IN2, **outbuf)
else TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80);
else TRYMAP_DEC(gbkext, **outbuf, c, IN2);
else return 2;
NEXT(2, 1)
}
return 0;
}
/*
* GB18030 codec
*/
ENCODER(gb18030)
{
while (inleft > 0) {
ucs4_t c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1(c)
NEXT(1, 1)
continue;
}
DECODE_SURROGATE(c)
if (c > 0x10FFFF)
#if Py_UNICODE_SIZE == 2
return 2; /* surrogates pair */
#else
return 1;
#endif
else if (c >= 0x10000) {
ucs4_t tc = c - 0x10000;
REQUIRE_OUTBUF(4)
OUT4((unsigned char)(tc % 10) + 0x30)
tc /= 10;
OUT3((unsigned char)(tc % 126) + 0x81)
tc /= 126;
OUT2((unsigned char)(tc % 10) + 0x30)
tc /= 10;
OUT1((unsigned char)(tc + 0x90))
#if Py_UNICODE_SIZE == 2
NEXT(2, 4) /* surrogates pair */
#else
NEXT(1, 4)
#endif
continue;
}
REQUIRE_OUTBUF(2)
GBK_PREENCODE(c, code)
else TRYMAP_ENC(gbcommon, code, c);
else TRYMAP_ENC(gb18030ext, code, c);
else {
const struct _gb18030_to_unibmp_ranges *utrrange;
REQUIRE_OUTBUF(4)
for (utrrange = gb18030_to_unibmp_ranges;
utrrange->first != 0;
utrrange++)
if (utrrange->first <= c &&
c <= utrrange->last) {
Py_UNICODE tc;
tc = c - utrrange->first +
utrrange->base;
OUT4((unsigned char)(tc % 10) + 0x30)
tc /= 10;
OUT3((unsigned char)(tc % 126) + 0x81)
tc /= 126;
OUT2((unsigned char)(tc % 10) + 0x30)
tc /= 10;
OUT1((unsigned char)tc + 0x81)
NEXT(1, 4)
break;
}
if (utrrange->first == 0) {
PyErr_SetString(PyExc_RuntimeError,
"unicode mapping invalid");
return 1;
}
continue;
}
OUT1((code >> 8) | 0x80)
if (code & 0x8000)
OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */
else
OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
NEXT(1, 2)
}
return 0;
}
DECODER(gb18030)
{
while (inleft > 0) {
unsigned char c = IN1, c2;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
const struct _gb18030_to_unibmp_ranges *utr;
unsigned char c3, c4;
ucs4_t lseq;
REQUIRE_INBUF(4)
c3 = IN3;
c4 = IN4;
if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
return 4;
c -= 0x81; c2 -= 0x30;
c3 -= 0x81; c4 -= 0x30;
if (c < 4) { /* U+0080 - U+FFFF */
lseq = ((ucs4_t)c * 10 + c2) * 1260 +
(ucs4_t)c3 * 10 + c4;
if (lseq < 39420) {
for (utr = gb18030_to_unibmp_ranges;
lseq >= (utr + 1)->base;
utr++) ;
OUT1(utr->first - utr->base + lseq)
NEXT(4, 1)
continue;
}
}
else if (c >= 15) { /* U+10000 - U+10FFFF */
lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2)
* 1260 + (ucs4_t)c3 * 10 + c4;
if (lseq <= 0x10FFFF) {
WRITEUCS4(lseq);
NEXT_IN(4)
continue;
}
}
return 4;
}
GBK_PREDECODE(c, c2, **outbuf)
else TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, c2 ^ 0x80);
else TRYMAP_DEC(gbkext, **outbuf, c, c2);
else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
else return 2;
NEXT(2, 1)
}
return 0;
}
/*
* HZ codec
*/
ENCODER_INIT(hz)
{
state->i = 0;
return 0;
}
ENCODER_RESET(hz)
{
if (state->i != 0) {
WRITE2('~', '}')
state->i = 0;
NEXT_OUT(2)
}
return 0;
}
ENCODER(hz)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
if (state->i == 0) {
WRITE1((unsigned char)c)
NEXT(1, 1)
}
else {
WRITE3('~', '}', (unsigned char)c)
NEXT(1, 3)
state->i = 0;
}
continue;
}
UCS4INVALID(c)
TRYMAP_ENC(gbcommon, code, c);
else return 1;
if (code & 0x8000) /* MSB set: GBK */
return 1;
if (state->i == 0) {
WRITE4('~', '{', code >> 8, code & 0xff)
NEXT(1, 4)
state->i = 1;
}
else {
WRITE2(code >> 8, code & 0xff)
NEXT(1, 2)
}
}
return 0;
}
DECODER_INIT(hz)
{
state->i = 0;
return 0;
}
DECODER_RESET(hz)
{
state->i = 0;
return 0;
}
DECODER(hz)
{
while (inleft > 0) {
unsigned char c = IN1;
if (c == '~') {
unsigned char c2 = IN2;
REQUIRE_INBUF(2)
if (c2 == '~') {
WRITE1('~')
NEXT(2, 1)
continue;
}
else if (c2 == '{' && state->i == 0)
state->i = 1; /* set GB */
else if (c2 == '}' && state->i == 1)
state->i = 0; /* set ASCII */
else if (c2 == '\n')
; /* line-continuation */
else
return 2;
NEXT(2, 0);
continue;
}
if (c & 0x80)
return 1;
if (state->i == 0) { /* ASCII mode */
WRITE1(c)
NEXT(1, 1)
}
else { /* GB mode */
REQUIRE_INBUF(2)
REQUIRE_OUTBUF(1)
TRYMAP_DEC(gb2312, **outbuf, c, IN2) {
NEXT(2, 1)
}
else
return 2;
}
}
return 0;
}
BEGIN_MAPPINGS_LIST
MAPPING_DECONLY(gb2312)
MAPPING_DECONLY(gbkext)
MAPPING_ENCONLY(gbcommon)
MAPPING_ENCDEC(gb18030ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS(gb2312)
CODEC_STATELESS(gbk)
CODEC_STATELESS(gb18030)
CODEC_STATEFUL(hz)
END_CODECS_LIST
I_AM_A_MODULE_FOR(cn)

View File

@ -0,0 +1,144 @@
/*
* _codecs_hk.c: Codecs collection for encodings from Hong Kong
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _codecs_hk.c,v 1.3 2004/07/07 14:59:26 perky Exp $
*/
#define USING_IMPORTED_MAPS
#include "cjkcodecs.h"
#include "mappings_hk.h"
/*
* BIG5HKSCS codec
*/
static const encode_map *big5_encmap = NULL;
static const decode_map *big5_decmap = NULL;
CODEC_INIT(big5hkscs)
{
static int initialized = 0;
if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap))
return -1;
initialized = 1;
return 0;
}
ENCODER(big5hkscs)
{
while (inleft > 0) {
ucs4_t c = **inbuf;
DBCHAR code;
int insize;
if (c < 0x80) {
REQUIRE_OUTBUF(1)
**outbuf = (unsigned char)c;
NEXT(1, 1)
continue;
}
DECODE_SURROGATE(c)
insize = GET_INSIZE(c);
REQUIRE_OUTBUF(2)
if (c < 0x10000) {
TRYMAP_ENC(big5hkscs_bmp, code, c);
else TRYMAP_ENC(big5, code, c);
else return 1;
}
else if (c < 0x20000)
return insize;
else if (c < 0x30000) {
TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff);
else return insize;
}
else
return insize;
OUT1(code >> 8)
OUT2(code & 0xFF)
NEXT(insize, 2)
}
return 0;
}
#define BH2S(c1, c2) (((c1) - 0x88) * (0xfe - 0x40 + 1) + ((c2) - 0x40))
DECODER(big5hkscs)
{
while (inleft > 0) {
unsigned char c = IN1;
ucs4_t decoded;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
if (0xc6 <= c && c <= 0xc8 && (c >= 0xc7 || IN2 >= 0xa1))
goto hkscsdec;
TRYMAP_DEC(big5, **outbuf, c, IN2) {
NEXT(2, 1)
}
else
hkscsdec: TRYMAP_DEC(big5hkscs, decoded, c, IN2) {
int s = BH2S(c, IN2);
const unsigned char *hintbase;
assert(0x88 <= c && c <= 0xfe);
assert(0x40 <= IN2 && IN2 <= 0xfe);
if (BH2S(0x88, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
hintbase = big5hkscs_phint_0;
s -= BH2S(0x88, 0x40);
}
else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
hintbase = big5hkscs_phint_11939;
s -= BH2S(0xc6, 0xa1);
}
else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
hintbase = big5hkscs_phint_21733;
s -= BH2S(0xf9, 0xd6);
}
else
return MBERR_INTERNAL;
if (hintbase[s >> 3] & (1 << (s & 7))) {
WRITEUCS4(decoded | 0x20000)
NEXT_IN(2)
}
else {
OUT1(decoded)
NEXT(2, 1)
}
}
else return 2;
}
return 0;
}
BEGIN_MAPPINGS_LIST
MAPPING_DECONLY(big5hkscs)
MAPPING_ENCONLY(big5hkscs_bmp)
MAPPING_ENCONLY(big5hkscs_nonbmp)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS_WINIT(big5hkscs)
END_CODECS_LIST
I_AM_A_MODULE_FOR(hk)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,731 @@
/*
* _codecs_jp.c: Codecs collection for Japanese encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _codecs_jp.c,v 1.14 2004/07/07 17:54:47 perky Exp $
*/
#define USING_BINARY_PAIR_SEARCH
#define EMPBASE 0x20000
#include "cjkcodecs.h"
#include "mappings_jp.h"
#include "mappings_jisx0213_pair.h"
#include "alg_jisx0201.h"
#include "emu_jisx0213_2000.h"
/*
* CP932 codec
*/
ENCODER(cp932)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
unsigned char c1, c2;
if (c <= 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
else if (c >= 0xff61 && c <= 0xff9f) {
WRITE1(c - 0xfec0)
NEXT(1, 1)
continue;
}
else if (c >= 0xf8f0 && c <= 0xf8f3) {
/* Windows compatability */
REQUIRE_OUTBUF(1)
if (c == 0xf8f0)
OUT1(0xa0)
else
OUT1(c - 0xfef1 + 0xfd)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp932ext, code, c) {
OUT1(code >> 8)
OUT2(code & 0xff)
}
else TRYMAP_ENC(jisxcommon, code, c) {
if (code & 0x8000) /* MSB set: JIS X 0212 */
return 1;
/* JIS X 0208 */
c1 = code >> 8;
c2 = code & 0xff;
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
c1 = (c1 - 0x21) >> 1;
OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
}
else if (c >= 0xe000 && c < 0xe758) {
/* User-defined area */
c1 = (Py_UNICODE)(c - 0xe000) / 188;
c2 = (Py_UNICODE)(c - 0xe000) % 188;
OUT1(c1 + 0xf0)
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
}
else
return 1;
NEXT(1, 2)
}
return 0;
}
DECODER(cp932)
{
while (inleft > 0) {
unsigned char c = IN1, c2;
REQUIRE_OUTBUF(1)
if (c <= 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
else if (c >= 0xa0 && c <= 0xdf) {
if (c == 0xa0)
OUT1(0xf8f0) /* half-width katakana */
else
OUT1(0xfec0 + c)
NEXT(1, 1)
continue;
}
else if (c >= 0xfd/* && c <= 0xff*/) {
/* Windows compatibility */
OUT1(0xf8f1 - 0xfd + c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
c2 = IN2;
TRYMAP_DEC(cp932ext, **outbuf, c, c2);
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 2;
c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
TRYMAP_DEC(jisx0208, **outbuf, c, c2);
else return 2;
}
else if (c >= 0xf0 && c <= 0xf9) {
if ((c2 >= 0x40 && c2 <= 0x7e) ||
(c2 >= 0x80 && c2 <= 0xfc))
OUT1(0xe000 + 188 * (c - 0xf0) +
(c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
else
return 2;
}
else
return 2;
NEXT(2, 1)
}
return 0;
}
/*
* EUC-JIS-2004 codec
*/
ENCODER(euc_jis_2004)
{
while (inleft > 0) {
ucs4_t c = IN1;
DBCHAR code;
int insize;
if (c < 0x80) {
WRITE1(c)
NEXT(1, 1)
continue;
}
DECODE_SURROGATE(c)
insize = GET_INSIZE(c);
if (c <= 0xFFFF) {
EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
else TRYMAP_ENC(jisx0213_bmp, code, c) {
if (code == MULTIC) {
if (inleft < 2) {
if (flags & MBENC_FLUSH) {
code = find_pairencmap(
(ucs2_t)c, 0,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
}
else
return MBERR_TOOFEW;
}
else {
code = find_pairencmap(
(ucs2_t)c, (*inbuf)[1],
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV) {
code = find_pairencmap(
(ucs2_t)c, 0,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
} else
insize = 2;
}
}
}
else TRYMAP_ENC(jisxcommon, code, c);
else if (c >= 0xff61 && c <= 0xff9f) {
/* JIS X 0201 half-width katakana */
WRITE2(0x8e, c - 0xfec0)
NEXT(1, 2)
continue;
}
else if (c == 0xff3c)
/* F/W REVERSE SOLIDUS (see NOTES) */
code = 0x2140;
else if (c == 0xff5e)
/* F/W TILDE (see NOTES) */
code = 0x2232;
else
return 1;
}
else if (c >> 16 == EMPBASE >> 16) {
EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
else TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
else return insize;
}
else
return insize;
if (code & 0x8000) {
/* Codeset 2 */
WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
NEXT(insize, 3)
} else {
/* Codeset 1 */
WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
NEXT(insize, 2)
}
}
return 0;
}
DECODER(euc_jis_2004)
{
while (inleft > 0) {
unsigned char c = IN1;
ucs4_t code;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
if (c == 0x8e) {
/* JIS X 0201 half-width katakana */
unsigned char c2;
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 >= 0xa1 && c2 <= 0xdf) {
OUT1(0xfec0 + c2)
NEXT(2, 1)
}
else
return 2;
}
else if (c == 0x8f) {
unsigned char c2, c3;
REQUIRE_INBUF(3)
c2 = IN2 ^ 0x80;
c3 = IN3 ^ 0x80;
/* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, c2, c3)
else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ;
else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) {
WRITEUCS4(EMPBASE | code)
NEXT_IN(3)
continue;
}
else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
else return 3;
NEXT(3, 1)
}
else {
unsigned char c2;
REQUIRE_INBUF(2)
c ^= 0x80;
c2 = IN2 ^ 0x80;
/* JIS X 0213 Plane 1 */
EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, c, c2)
else if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e;
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
WRITEUCS4(EMPBASE | code)
NEXT_IN(2)
continue;
}
else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
WRITE2(code >> 16, code & 0xffff)
NEXT(2, 2)
continue;
}
else return 2;
NEXT(2, 1)
}
}
return 0;
}
/*
* EUC-JP codec
*/
ENCODER(euc_jp)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
TRYMAP_ENC(jisxcommon, code, c);
else if (c >= 0xff61 && c <= 0xff9f) {
/* JIS X 0201 half-width katakana */
WRITE2(0x8e, c - 0xfec0)
NEXT(1, 2)
continue;
}
#ifndef STRICT_BUILD
else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
code = 0x2140;
else if (c == 0xa5) { /* YEN SIGN */
WRITE1(0x5c);
NEXT(1, 1)
continue;
} else if (c == 0x203e) { /* OVERLINE */
WRITE1(0x7e);
NEXT(1, 1)
continue;
}
#endif
else
return 1;
if (code & 0x8000) {
/* JIS X 0212 */
WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
NEXT(1, 3)
} else {
/* JIS X 0208 */
WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
NEXT(1, 2)
}
}
return 0;
}
DECODER(euc_jp)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
if (c == 0x8e) {
/* JIS X 0201 half-width katakana */
unsigned char c2;
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 >= 0xa1 && c2 <= 0xdf) {
OUT1(0xfec0 + c2)
NEXT(2, 1)
}
else
return 2;
}
else if (c == 0x8f) {
unsigned char c2, c3;
REQUIRE_INBUF(3)
c2 = IN2;
c3 = IN3;
/* JIS X 0212 */
TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) {
NEXT(3, 1)
}
else
return 3;
}
else {
unsigned char c2;
REQUIRE_INBUF(2)
c2 = IN2;
/* JIS X 0208 */
#ifndef STRICT_BUILD
if (c == 0xa1 && c2 == 0xc0)
/* FULL-WIDTH REVERSE SOLIDUS */
**outbuf = 0xff3c;
else
#endif
TRYMAP_DEC(jisx0208, **outbuf,
c ^ 0x80, c2 ^ 0x80) ;
else return 2;
NEXT(2, 1)
}
}
return 0;
}
/*
* SHIFT_JIS codec
*/
ENCODER(shift_jis)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
unsigned char c1, c2;
#ifdef STRICT_BUILD
JISX0201_R_ENCODE(c, code)
#else
if (c < 0x80) code = c;
else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */
else if (c == 0x203e) code = 0x7e; /* OVERLINE */
#endif
else JISX0201_K_ENCODE(c, code)
else UCS4INVALID(c)
else code = NOCHAR;
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
REQUIRE_OUTBUF(1)
OUT1((unsigned char)code)
NEXT(1, 1)
continue;
}
REQUIRE_OUTBUF(2)
if (code == NOCHAR) {
TRYMAP_ENC(jisxcommon, code, c);
#ifndef STRICT_BUILD
else if (c == 0xff3c)
code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */
#endif
else
return 1;
if (code & 0x8000) /* MSB set: JIS X 0212 */
return 1;
}
c1 = code >> 8;
c2 = code & 0xff;
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
c1 = (c1 - 0x21) >> 1;
OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
NEXT(1, 2)
}
return 0;
}
DECODER(shift_jis)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
#ifdef STRICT_BUILD
JISX0201_R_DECODE(c, **outbuf)
#else
if (c < 0x80) **outbuf = c;
#endif
else JISX0201_K_DECODE(c, **outbuf)
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
unsigned char c1, c2;
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 2;
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21);
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
#ifndef STRICT_BUILD
if (c1 == 0x21 && c2 == 0x40) {
/* FULL-WIDTH REVERSE SOLIDUS */
OUT1(0xff3c)
NEXT(2, 1)
continue;
}
#endif
TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
NEXT(2, 1)
continue;
}
else
return 2;
}
else
return 2;
NEXT(1, 1) /* JIS X 0201 */
}
return 0;
}
/*
* SHIFT_JIS-2004 codec
*/
ENCODER(shift_jis_2004)
{
while (inleft > 0) {
ucs4_t c = IN1;
DBCHAR code = NOCHAR;
int c1, c2;
size_t insize;
JISX0201_ENCODE(c, code)
else DECODE_SURROGATE(c)
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
WRITE1((unsigned char)code)
NEXT(1, 1)
continue;
}
REQUIRE_OUTBUF(2)
insize = GET_INSIZE(c);
if (code == NOCHAR) {
if (c <= 0xffff) {
EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
else TRYMAP_ENC(jisx0213_bmp, code, c) {
if (code == MULTIC) {
if (inleft < 2) {
if (flags & MBENC_FLUSH) {
code = find_pairencmap
((ucs2_t)c, 0,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
}
else
return MBERR_TOOFEW;
}
else {
code = find_pairencmap(
(ucs2_t)c, IN2,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV) {
code = find_pairencmap(
(ucs2_t)c, 0,
jisx0213_pair_encmap,
JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
}
else
insize = 2;
}
}
}
else TRYMAP_ENC(jisxcommon, code, c) {
/* abandon JIS X 0212 codes */
if (code & 0x8000)
return 1;
}
else return 1;
}
else if (c >> 16 == EMPBASE >> 16) {
EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
else TRYMAP_ENC(jisx0213_emp, code, c&0xffff);
else return insize;
}
else
return insize;
}
c1 = code >> 8;
c2 = (code & 0xff) - 0x21;
if (c1 & 0x80) { /* Plane 2 */
if (c1 >= 0xee) c1 -= 0x87;
else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49;
else c1 -= 0x43;
}
else /* Plane 1 */
c1 -= 0x21;
if (c1 & 1) c2 += 0x5e;
c1 >>= 1;
OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1))
OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41))
NEXT(insize, 2)
}
return 0;
}
DECODER(shift_jis_2004)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
JISX0201_DECODE(c, **outbuf)
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
unsigned char c1, c2 = IN2;
ucs4_t code;
REQUIRE_INBUF(2)
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 2;
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1));
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
if (c1 < 0x5e) { /* Plane 1 */
c1 += 0x21;
EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf,
c1, c2)
else TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
NEXT_OUT(1)
}
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf,
c1, c2) {
NEXT_OUT(1)
}
else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) {
WRITEUCS4(EMPBASE | code)
}
else TRYMAP_DEC(jisx0213_pair, code, c1, c2) {
WRITE2(code >> 16, code & 0xffff)
NEXT_OUT(2)
}
else
return 2;
NEXT_IN(2)
}
else { /* Plane 2 */
if (c1 >= 0x67) c1 += 0x07;
else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;
else c1 -= 0x3d;
EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf,
c1, c2)
else TRYMAP_DEC(jisx0213_2_bmp, **outbuf,
c1, c2) ;
else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) {
WRITEUCS4(EMPBASE | code)
NEXT_IN(2)
continue;
}
else
return 2;
NEXT(2, 1)
}
continue;
}
else
return 2;
NEXT(1, 1) /* JIS X 0201 */
}
return 0;
}
BEGIN_MAPPINGS_LIST
MAPPING_DECONLY(jisx0208)
MAPPING_DECONLY(jisx0212)
MAPPING_ENCONLY(jisxcommon)
MAPPING_DECONLY(jisx0213_1_bmp)
MAPPING_DECONLY(jisx0213_2_bmp)
MAPPING_ENCONLY(jisx0213_bmp)
MAPPING_DECONLY(jisx0213_1_emp)
MAPPING_DECONLY(jisx0213_2_emp)
MAPPING_ENCONLY(jisx0213_emp)
MAPPING_ENCDEC(jisx0213_pair)
MAPPING_ENCDEC(cp932ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS(shift_jis)
CODEC_STATELESS(cp932)
CODEC_STATELESS(euc_jp)
CODEC_STATELESS(shift_jis_2004)
CODEC_STATELESS(euc_jis_2004)
{ "euc_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(euc_jis_2004) },
{ "shift_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(shift_jis_2004) },
END_CODECS_LIST
I_AM_A_MODULE_FOR(jp)

View File

@ -0,0 +1,356 @@
/*
* _codecs_kr.c: Codecs collection for Korean encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _codecs_kr.c,v 1.8 2004/07/07 14:59:26 perky Exp $
*/
#include "cjkcodecs.h"
#include "mappings_kr.h"
/*
* EUC-KR codec
*/
ENCODER(euc_kr)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp949, code, c);
else return 1;
if (code & 0x8000) /* MSB set: CP949 */
return 1;
OUT1((code >> 8) | 0x80)
OUT2((code & 0xFF) | 0x80)
NEXT(1, 2)
}
return 0;
}
DECODER(euc_kr)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
NEXT(2, 1)
} else return 2;
}
return 0;
}
/*
* CP949 codec
*/
ENCODER(cp949)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp949, code, c);
else return 1;
OUT1((code >> 8) | 0x80)
if (code & 0x8000)
OUT2(code & 0xFF) /* MSB set: CP949 */
else
OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */
NEXT(1, 2)
}
return 0;
}
DECODER(cp949)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
else return 2;
NEXT(2, 1)
}
return 0;
}
/*
* JOHAB codec
*/
static const unsigned char u2johabidx_choseong[32] = {
0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14,
};
static const unsigned char u2johabidx_jungseong[32] = {
0x03, 0x04, 0x05, 0x06, 0x07,
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x1a, 0x1b, 0x1c, 0x1d,
};
static const unsigned char u2johabidx_jongseong[32] = {
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
};
static const DBCHAR u2johabjamo[] = {
0x8841, 0x8c41, 0x8444, 0x9041, 0x8446, 0x8447, 0x9441,
0x9841, 0x9c41, 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f,
0x8450, 0xa041, 0xa441, 0xa841, 0x8454, 0xac41, 0xb041, 0xb441,
0xb841, 0xbc41, 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041, 0x8461,
0x8481, 0x84a1, 0x84c1, 0x84e1, 0x8541, 0x8561, 0x8581, 0x85a1,
0x85c1, 0x85e1, 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
0x8741, 0x8761, 0x8781, 0x87a1,
};
ENCODER(johab)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
if (c >= 0xac00 && c <= 0xd7a3) {
c -= 0xac00;
code = 0x8000 |
(u2johabidx_choseong[c / 588] << 10) |
(u2johabidx_jungseong[(c / 28) % 21] << 5) |
u2johabidx_jongseong[c % 28];
}
else if (c >= 0x3131 && c <= 0x3163)
code = u2johabjamo[c - 0x3131];
else TRYMAP_ENC(cp949, code, c) {
unsigned char c1, c2, t2;
unsigned short t1;
assert((code & 0x8000) == 0);
c1 = code >> 8;
c2 = code & 0xff;
if (((c1 >= 0x21 && c1 <= 0x2c) ||
(c1 >= 0x4a && c1 <= 0x7d)) &&
(c2 >= 0x21 && c2 <= 0x7e)) {
t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) :
(c1 - 0x21 + 0x197));
t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
OUT1(t1 >> 1)
OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)
NEXT(1, 2)
continue;
}
else
return 1;
}
else
return 1;
OUT1(code >> 8)
OUT2(code & 0xff)
NEXT(1, 2)
}
return 0;
}
#define FILL 0xfd
#define NONE 0xff
static const unsigned char johabidx_choseong[32] = {
NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
0x0e, 0x0f, 0x10, 0x11, 0x12, NONE, NONE, NONE,
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
};
static const unsigned char johabidx_jungseong[32] = {
NONE, NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04,
NONE, NONE, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
NONE, NONE, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
NONE, NONE, 0x11, 0x12, 0x13, 0x14, NONE, NONE,
};
static const unsigned char johabidx_jongseong[32] = {
NONE, FILL, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
0x0f, 0x10, NONE, 0x11, 0x12, 0x13, 0x14, 0x15,
0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, NONE, NONE,
};
static const unsigned char johabjamo_choseong[32] = {
NONE, FILL, 0x31, 0x32, 0x34, 0x37, 0x38, 0x39,
0x41, 0x42, 0x43, 0x45, 0x46, 0x47, 0x48, 0x49,
0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE, NONE,
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
};
static const unsigned char johabjamo_jungseong[32] = {
NONE, NONE, FILL, 0x4f, 0x50, 0x51, 0x52, 0x53,
NONE, NONE, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
NONE, NONE, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
NONE, NONE, 0x60, 0x61, 0x62, 0x63, NONE, NONE,
};
static const unsigned char johabjamo_jongseong[32] = {
NONE, FILL, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
0x37, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
0x40, 0x41, NONE, 0x42, 0x44, 0x45, 0x46, 0x47,
0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE,
};
DECODER(johab)
{
while (inleft > 0) {
unsigned char c = IN1, c2;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
c2 = IN2;
if (c < 0xd8) {
/* johab hangul */
unsigned char c_cho, c_jung, c_jong;
unsigned char i_cho, i_jung, i_jong;
c_cho = (c >> 2) & 0x1f;
c_jung = ((c << 3) | c2 >> 5) & 0x1f;
c_jong = c2 & 0x1f;
i_cho = johabidx_choseong[c_cho];
i_jung = johabidx_jungseong[c_jung];
i_jong = johabidx_jongseong[c_jong];
if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
return 2;
/* we don't use U+1100 hangul jamo yet. */
if (i_cho == FILL) {
if (i_jung == FILL) {
if (i_jong == FILL)
OUT1(0x3000)
else
OUT1(0x3100 |
johabjamo_jongseong[c_jong])
}
else {
if (i_jong == FILL)
OUT1(0x3100 |
johabjamo_jungseong[c_jung])
else
return 2;
}
} else {
if (i_jung == FILL) {
if (i_jong == FILL)
OUT1(0x3100 |
johabjamo_choseong[c_cho])
else
return 2;
}
else
OUT1(0xac00 +
i_cho * 588 +
i_jung * 28 +
(i_jong == FILL ? 0 : i_jong))
}
NEXT(2, 1)
} else {
/* KS X 1001 except hangul jamos and syllables */
if (c == 0xdf || c > 0xf9 ||
c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
(c2 & 0x7f) == 0x7f ||
(c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
return 2;
else {
unsigned char t1, t2;
t1 = (c < 0xe0 ? 2 * (c - 0xd9) :
2 * c - 0x197);
t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43);
t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
else return 2;
NEXT(2, 1)
}
}
}
return 0;
}
#undef NONE
#undef FILL
BEGIN_MAPPINGS_LIST
MAPPING_DECONLY(ksx1001)
MAPPING_ENCONLY(cp949)
MAPPING_DECONLY(cp949ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS(euc_kr)
CODEC_STATELESS(cp949)
CODEC_STATELESS(johab)
END_CODECS_LIST
I_AM_A_MODULE_FOR(kr)

View File

@ -0,0 +1,133 @@
/*
* _codecs_tw.c: Codecs collection for Taiwan's encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _codecs_tw.c,v 1.10 2004/07/07 14:59:26 perky Exp $
*/
#include "cjkcodecs.h"
#include "mappings_tw.h"
/*
* BIG5 codec
*/
ENCODER(big5)
{
while (inleft > 0) {
Py_UNICODE c = **inbuf;
DBCHAR code;
if (c < 0x80) {
REQUIRE_OUTBUF(1)
**outbuf = (unsigned char)c;
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(big5, code, c);
else return 1;
OUT1(code >> 8)
OUT2(code & 0xFF)
NEXT(1, 2)
}
return 0;
}
DECODER(big5)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
TRYMAP_DEC(big5, **outbuf, c, IN2) {
NEXT(2, 1)
}
else return 2;
}
return 0;
}
/*
* CP950 codec
*/
ENCODER(cp950)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp950ext, code, c);
else TRYMAP_ENC(big5, code, c);
else return 1;
OUT1(code >> 8)
OUT2(code & 0xFF)
NEXT(1, 2)
}
return 0;
}
DECODER(cp950)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
TRYMAP_DEC(cp950ext, **outbuf, c, IN2);
else TRYMAP_DEC(big5, **outbuf, c, IN2);
else return 2;
NEXT(2, 1)
}
return 0;
}
BEGIN_MAPPINGS_LIST
MAPPING_ENCDEC(big5)
MAPPING_ENCDEC(cp950ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS(big5)
CODEC_STATELESS(cp950)
END_CODECS_LIST
I_AM_A_MODULE_FOR(tw)

View File

@ -0,0 +1,560 @@
/*
* _codecs_unicode.c: Codecs collection for Unicode encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _codecs_unicode.c,v 1.5 2004/06/27 21:41:15 perky Exp $
*/
#include "cjkcodecs.h"
/*
* UTF-7 codec
*/
#define SET_DIRECT 1
#define SET_OPTIONAL 2
#define SET_WHITESPACE 3
#define _D SET_DIRECT
#define _O SET_OPTIONAL
#define _W SET_WHITESPACE
static const char utf7_sets[128] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, _W, _W, 0, 0, _W, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
_W, _O, _O, _O, _O, _O, _O, _D, _D, _D, _O, 0, _D, _D, _D, 0,
_D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _O, _O, _O, _O, _D,
_O, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D,
_D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _O, 0, _O, _O, _O,
_O, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D,
_D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _D, _O, _O, _O, 0, 0,
};
#undef _W
#undef _O
#undef _D
#define B64(n) ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" \
"0123456789+/"[(n) & 0x3f])
#define B64CHAR(c) (((c) >= 'A' && (c) <= 'Z') || \
((c) >= 'a' && (c) <= 'z') || \
((c) >= '0' && (c) <= '9') || \
(c) == '+' || (c) == '/')
#define UB64(c) ((c) == '+' ? 62 : (c) == '/' ? 63 : (c) >= 'a' ? \
(c) - 71 : (c) >= 'A' ? (c) - 65 : (c) + 4)
#define UTF7_DENCODABLE_COMPATIBLE(c) (utf7_sets[c] != 0)
#define UTF7_DENCODABLE_STRICT(c) (utf7_sets[c] == SET_DIRECT || \
utf7_sets[c] == SET_WHITESPACE)
#define ESTATE_INITIALIZE(state) \
ESTATE_SETSTAGE(state, 0) \
ESTATE_CLEARSHIFTED(state)
#define ESTATE_SETPENDING(state, v) (state)->c[0] = (v);
#define ESTATE_GETPENDING(state) (state)->c[0]
#define ESTATE_SETSHIFTED(state) (state)->c[2] = 1;
#define ESTATE_ISSHIFTED(state) ((state)->c[2])
#define ESTATE_CLEARSHIFTED(state) (state)->c[2] = 0;
#define ESTATE_SETSTAGE(state, v) (state)->c[3] = (v);
#define ESTATE_GETSTAGE(state) ((state)->c[3])
ENCODER_INIT(utf_7)
{
ESTATE_INITIALIZE(state)
return 0;
}
ENCODER_RESET(utf_7)
{
if (ESTATE_ISSHIFTED(state)) {
if (ESTATE_GETSTAGE(state) != 0) {
unsigned char oc;
oc = B64(ESTATE_GETPENDING(state));
WRITE2(oc, '-')
NEXT_OUT(2)
}
else {
WRITE1('-')
NEXT_OUT(1)
}
ESTATE_CLEARSHIFTED(state)
}
return 0;
}
ENCODER(utf_7)
{
while (inleft > 0) {
Py_UNICODE c1 = IN1, c2 = 0;
size_t insize = 1;
#if Py_UNICODE_SIZE == 2
if (c1 >> 10 == 0xd800 >> 10) { /* high surrogate */
REQUIRE_INBUF(2)
if (IN2 >> 10 != 0xdc00 >> 10) /* low surrogate */
return 2; /* invalid surrogate pair */
c2 = IN2;
insize = 2;
}
#else
if (c1 > 0x10ffff) /* UTF-16 unencodable */
return 1;
else if (c1 > 0xffff) {
c2 = 0xdc00 | ((c1 - 0x10000) & 0x3ff);
c1 = 0xd800 | ((c1 - 0x10000) >> 10);
}
#endif
for (;;) {
unsigned char oc1, oc2, oc3;
if (ESTATE_ISSHIFTED(state)) {
if (c1 < 128 && UTF7_DENCODABLE_STRICT(c1)) {
if (ESTATE_GETSTAGE(state) != 0) {
oc1 = B64(ESTATE_GETPENDING(
state));
WRITE3(oc1, '-',
(unsigned char)c1)
NEXT_OUT(3)
} else {
WRITE2('-',
(unsigned char)c1)
NEXT_OUT(2)
}
ESTATE_CLEARSHIFTED(state)
} else {
switch (ESTATE_GETSTAGE(state)) {
case 0:
oc1 = c1 >> 10;
oc2 = (c1 >> 4) & 0x3f;
WRITE2(B64(oc1), B64(oc2))
ESTATE_SETPENDING(state,
(c1 & 0x0f) << 2)
ESTATE_SETSTAGE(state, 2)
NEXT_OUT(2)
break;
case 1:
oc1 = ESTATE_GETPENDING(state)
| (c1 >> 12);
oc2 = (c1 >> 6) & 0x3f;
oc3 = c1 & 0x3f;
WRITE3(B64(oc1), B64(oc2),
B64(oc3))
ESTATE_SETSTAGE(state, 0)
NEXT_OUT(3)
break;
case 2:
oc1 = ESTATE_GETPENDING(state)
| (c1 >> 14);
oc2 = (c1 >> 8) & 0x3f;
oc3 = (c1 >> 2) & 0x3f;
WRITE3(B64(oc1), B64(oc2),
B64(oc3))
ESTATE_SETPENDING(state,
(c1 & 0x03) << 4)
ESTATE_SETSTAGE(state, 1)
NEXT_OUT(3)
break;
default:
return MBERR_INTERNAL;
}
}
}
else {
if (c1 < 128 && UTF7_DENCODABLE_STRICT(c1)) {
WRITE1((unsigned char)c1)
NEXT_OUT(1)
}
else if (c1 == '+') {
WRITE2('+', '-')
NEXT_OUT(2)
}
else {
oc1 = c1 >> 10;
oc2 = (c1 >> 4) & 0x3f;
WRITE3('+', B64(oc1), B64(oc2))
ESTATE_SETPENDING(state,
(c1 & 0x0f) << 2)
ESTATE_SETSTAGE(state, 2)
ESTATE_SETSHIFTED(state)
NEXT_OUT(3)
}
}
if (c2 != 0) {
c1 = c2;
c2 = 0;
}
else
break;
}
NEXT_IN(insize)
}
return 0;
}
#define DSTATE_INITIALIZE(state) \
DSTATE_SETBSTAGE(state, 0) \
DSTATE_CLEARSHIFTED(state) \
DSTATE_SETULENGTH(state, 0) \
DSTATE_SETUPENDING1(state, 0) \
DSTATE_SETUPENDING2(state, 0)
/* XXX: Type-mixed usage of a state union may be not so portable.
* If you see any problem with this on your platfom. Please let
* me know. */
#define DSTATE_SETSHIFTED(state) (state)->c[0] = 1;
#define DSTATE_ISSHIFTED(state) ((state)->c[0])
#define DSTATE_CLEARSHIFTED(state) (state)->c[0] = 0;
#define DSTATE_SETBSTAGE(state, v) (state)->c[1] = (v);
#define DSTATE_GETBSTAGE(state) ((state)->c[1])
#define DSTATE_SETBPENDING(state, v) (state)->c[2] = (v);
#define DSTATE_GETBPENDING(state) ((state)->c[2])
#define DSTATE_SETULENGTH(state, v) (state)->c[3] = (v);
#define DSTATE_GETULENGTH(state) ((state)->c[3])
#define DSTATE_SETUPENDING1(state, v) (state)->u2[2] = (v);
#define DSTATE_GETUPENDING1(state) (state)->u2[2]
#define DSTATE_SETUPENDING2(state, v) (state)->u2[3] = (v);
#define DSTATE_GETUPENDING2(state) (state)->u2[3]
#define DSTATE_UAPPEND(state, v) \
(state)->u2[(state)->c[3] > 1 ? 3 : 2] |= \
((state)->c[3] & 1) ? (v) : ((ucs2_t)(v)) << 8; \
(state)->c[3]++;
DECODER_INIT(utf_7)
{
DSTATE_INITIALIZE(state)
return 0;
}
static int
utf_7_flush(MultibyteCodec_State *state,
Py_UNICODE **outbuf, size_t *outleft)
{
switch (DSTATE_GETULENGTH(state)) {
case 2: {
ucs2_t uc;
uc = DSTATE_GETUPENDING1(state);
#if Py_UNICODE_SIZE == 4
if (uc >> 10 == 0xd800 >> 10)
return MBERR_TOOFEW;
#endif
OUT1(uc)
(*outbuf)++;
(*outleft)--;
DSTATE_SETULENGTH(state, 0)
DSTATE_SETUPENDING1(state, 0)
break;
}
#if Py_UNICODE_SIZE == 4
case 4:
if (DSTATE_GETUPENDING2(state) >> 10 != 0xdc00 >> 10)
return 1;
OUT1(0x10000 + (((ucs4_t)DSTATE_GETUPENDING1(state) - 0xd800)
<< 10) + (DSTATE_GETUPENDING2(state) - 0xdc00))
(*outbuf)++;
(*outleft)--;
DSTATE_SETULENGTH(state, 0)
DSTATE_SETUPENDING1(state, 0)
DSTATE_SETUPENDING2(state, 0)
break;
#endif
case 0: /* FALLTHROUGH */
case 1: /* FALLTHROUGH */
case 3:
return MBERR_TOOFEW;
default:
return MBERR_INTERNAL;
}
return 0;
}
DECODER_RESET(utf_7)
{
DSTATE_INITIALIZE(state)
return 0;
}
DECODER(utf_7)
{
while (inleft > 0) {
unsigned char c = IN1;
int r;
if (!DSTATE_ISSHIFTED(state)) {
if (c == '+') {
REQUIRE_INBUF(2)
if (inleft >= 2 && IN2 == '-') {
WRITE1('+')
NEXT(2, 1)
}
else {
DSTATE_SETSHIFTED(state)
NEXT_IN(1)
}
}
else if (c < 128 && UTF7_DENCODABLE_COMPATIBLE(c)) {
WRITE1(c)
NEXT(1, 1)
}
else
return 1;
}
else if (B64CHAR(c)) {
unsigned char tb;
REQUIRE_OUTBUF(1)
c = UB64(c);
assert(DSTATE_GETULENGTH(state) < 4);
switch (DSTATE_GETBSTAGE(state)) {
case 0:
DSTATE_SETBPENDING(state, c << 2)
DSTATE_SETBSTAGE(state, 1)
break;
case 1:
tb = DSTATE_GETBPENDING(state) | (c >> 4);
DSTATE_SETBPENDING(state, c << 4)
DSTATE_SETBSTAGE(state, 2)
DSTATE_UAPPEND(state, tb)
break;
case 2:
tb = DSTATE_GETBPENDING(state) | (c >> 2);
DSTATE_SETBPENDING(state, c << 6)
DSTATE_SETBSTAGE(state, 3)
DSTATE_UAPPEND(state, tb)
break;
case 3:
tb = DSTATE_GETBPENDING(state) | c;
DSTATE_SETBSTAGE(state, 0)
DSTATE_UAPPEND(state, tb)
break;
}
r = utf_7_flush(state, outbuf, &outleft);
if (r != 0 && r != MBERR_TOOFEW)
return r;
NEXT_IN(1)
}
else if (c == '-' || UTF7_DENCODABLE_COMPATIBLE(c)) {
if (DSTATE_GETBSTAGE(state) != 0) {
DSTATE_UAPPEND(state, DSTATE_GETBSTAGE(state))
DSTATE_SETBSTAGE(state, 0)
}
r = utf_7_flush(state, outbuf, &outleft);
if (r != 0 && r != MBERR_TOOFEW)
return r;
DSTATE_CLEARSHIFTED(state)
if (c != '-') {
WRITE1(c)
NEXT_OUT(1)
}
NEXT_IN(1)
}
else
return 1;
}
return 0;
}
/*
* UTF-8 codec
*/
ENCODER(utf_8)
{
while (inleft > 0) {
ucs4_t c = **inbuf;
size_t outsize, insize = 1;
if (c < 0x80) outsize = 1;
else if (c < 0x800) outsize = 2;
else {
#if Py_UNICODE_SIZE == 2
if (c >> 10 == 0xd800 >> 10) { /* high surrogate */
if (inleft < 2) {
if (!(flags & MBENC_FLUSH))
return MBERR_TOOFEW;
}
else if ((*inbuf)[1] >> 10 == 0xdc00 >> 10) {
/* low surrogate */
c = 0x10000 + ((c - 0xd800) << 10) +
((ucs4_t)((*inbuf)[1]) - 0xdc00);
insize = 2;
}
}
#endif
if (c < 0x10000) outsize = 3;
else if (c < 0x200000) outsize = 4;
else if (c < 0x4000000) outsize = 5;
else outsize = 6;
}
REQUIRE_OUTBUF(outsize)
switch (outsize) {
case 6:
(*outbuf)[5] = 0x80 | (c & 0x3f);
c = c >> 6;
c |= 0x4000000;
/* FALLTHROUGH */
case 5:
(*outbuf)[4] = 0x80 | (c & 0x3f);
c = c >> 6;
c |= 0x200000;
/* FALLTHROUGH */
case 4:
(*outbuf)[3] = 0x80 | (c & 0x3f);
c = c >> 6;
c |= 0x10000;
/* FALLTHROUGH */
case 3:
(*outbuf)[2] = 0x80 | (c & 0x3f);
c = c >> 6;
c |= 0x800;
/* FALLTHROUGH */
case 2:
(*outbuf)[1] = 0x80 | (c & 0x3f);
c = c >> 6;
c |= 0xc0;
/* FALLTHROUGH */
case 1:
(*outbuf)[0] = c;
}
NEXT(insize, outsize)
}
return 0;
}
DECODER(utf_8)
{
while (inleft > 0) {
unsigned char c = **inbuf;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
(*outbuf)[0] = (unsigned char)c;
NEXT(1, 1)
}
else if (c < 0xc2) {
return 1;
}
else if (c < 0xe0) {
unsigned char c2;
REQUIRE_INBUF(2)
c2 = (*inbuf)[1];
if (!((c2 ^ 0x80) < 0x40))
return 2;
**outbuf = ((Py_UNICODE)(c & 0x1f) << 6) |
(Py_UNICODE)(c2 ^ 0x80);
NEXT(2, 1)
}
else if (c < 0xf0) {
unsigned char c2, c3;
REQUIRE_INBUF(3)
c2 = (*inbuf)[1]; c3 = (*inbuf)[2];
if (!((c2 ^ 0x80) < 0x40 &&
(c3 ^ 0x80) < 0x40 && (c >= 0xe1 || c2 >= 0xa0)))
return 3;
**outbuf = ((Py_UNICODE)(c & 0x0f) << 12)
| ((Py_UNICODE)(c2 ^ 0x80) << 6)
| (Py_UNICODE)(c3 ^ 0x80);
NEXT(3, 1)
}
else if (c < 0xf8) {
unsigned char c2, c3, c4;
ucs4_t code;
REQUIRE_INBUF(4)
c2 = (*inbuf)[1]; c3 = (*inbuf)[2];
c4 = (*inbuf)[3];
if (!((c2 ^ 0x80) < 0x40 &&
(c3 ^ 0x80) < 0x40 && (c4 ^ 0x80) < 0x40 &&
(c >= 0xf1 || c2 >= 0x90)))
return 4;
code = ((ucs4_t)(c & 0x07) << 18)
| ((ucs4_t)(c2 ^ 0x80) << 12)
| ((ucs4_t)(c3 ^ 0x80) << 6)
| (ucs4_t)(c4 ^ 0x80);
WRITEUCS4(code)
NEXT_IN(4)
}
else if (c < 0xfc) {
unsigned char c2, c3, c4, c5;
ucs4_t code;
REQUIRE_INBUF(5)
c2 = (*inbuf)[1]; c3 = (*inbuf)[2];
c4 = (*inbuf)[3]; c5 = (*inbuf)[4];
if (!((c2 ^ 0x80) < 0x40 &&
(c3 ^ 0x80) < 0x40 && (c4 ^ 0x80) < 0x40 &&
(c5 ^ 0x80) < 0x40 && (c >= 0xf9 || c2 >= 0x88)))
return 5;
code = ((ucs4_t)(c & 0x03) << 24)
| ((ucs4_t)(c2 ^ 0x80) << 18)
| ((ucs4_t)(c3 ^ 0x80) << 12)
| ((ucs4_t)(c4 ^ 0x80) << 6)
| (ucs4_t)(c5 ^ 0x80);
WRITEUCS4(code)
NEXT_IN(5)
}
else if (c < 0xff) {
unsigned char c2, c3, c4, c5, c6;
ucs4_t code;
REQUIRE_INBUF(6)
c2 = (*inbuf)[1]; c3 = (*inbuf)[2];
c4 = (*inbuf)[3]; c5 = (*inbuf)[4];
c6 = (*inbuf)[5];
if (!((c2 ^ 0x80) < 0x40 &&
(c3 ^ 0x80) < 0x40 && (c4 ^ 0x80) < 0x40 &&
(c5 ^ 0x80) < 0x40 && (c6 ^ 0x80) < 0x40 &&
(c >= 0xfd || c2 >= 0x84)))
return 6;
code = ((ucs4_t)(c & 0x01) << 30)
| ((ucs4_t)(c2 ^ 0x80) << 24)
| ((ucs4_t)(c3 ^ 0x80) << 18)
| ((ucs4_t)(c4 ^ 0x80) << 12)
| ((ucs4_t)(c5 ^ 0x80) << 6)
| (ucs4_t)(c6 ^ 0x80);
WRITEUCS4(code)
NEXT_IN(6)
}
else
return 1;
}
return 0;
}
BEGIN_MAPPINGS_LIST
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATEFUL(utf_7)
CODEC_STATELESS(utf_8)
END_CODECS_LIST
I_AM_A_MODULE_FOR(unicode)

View File

@ -1,134 +0,0 @@
/*
* _cp932.c: the CP932 codec
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _cp932.c,v 1.2 2003/12/31 05:46:55 perky Exp $
*/
#include "codeccommon.h"
ENCMAP(jisxcommon)
ENCMAP(cp932ext)
DECMAP(jisx0208)
DECMAP(cp932ext)
ENCODER(cp932)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
unsigned char c1, c2;
if (c <= 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
} else if (c >= 0xff61 && c <= 0xff9f) {
WRITE1(c - 0xfec0)
NEXT(1, 1)
continue;
} else if (c >= 0xf8f0 && c <= 0xf8f3) {
/* Windows compatability */
RESERVE_OUTBUF(1)
if (c == 0xf8f0)
OUT1(0xa0)
else
OUT1(c - 0xfef1 + 0xfd)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
RESERVE_OUTBUF(2)
TRYMAP_ENC(cp932ext, code, c) {
OUT1(code >> 8)
OUT2(code & 0xff)
} else TRYMAP_ENC(jisxcommon, code, c) {
if (code & 0x8000) /* MSB set: JIS X 0212 */
return 1;
/* JIS X 0208 */
c1 = code >> 8;
c2 = code & 0xff;
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
c1 = (c1 - 0x21) >> 1;
OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
} else if (c >= 0xe000 && c < 0xe758) {
/* User-defined area */
c1 = (Py_UNICODE)(c - 0xe000) / 188;
c2 = (Py_UNICODE)(c - 0xe000) % 188;
OUT1(c1 + 0xf0)
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
} else
return 1;
NEXT(1, 2)
}
return 0;
}
DECODER(cp932)
{
while (inleft > 0) {
unsigned char c = IN1, c2;
RESERVE_OUTBUF(1)
if (c <= 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
} else if (c >= 0xa0 && c <= 0xdf) {
if (c == 0xa0)
OUT1(0xf8f0) /* half-width katakana */
else
OUT1(0xfec0 + c)
NEXT(1, 1)
continue;
} else if (c >= 0xfd/* && c <= 0xff*/) {
/* Windows compatibility */
OUT1(0xf8f1 - 0xfd + c)
NEXT(1, 1)
continue;
}
RESERVE_INBUF(2)
c2 = IN2;
TRYMAP_DEC(cp932ext, **outbuf, c, c2);
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)) {
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 2;
c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
TRYMAP_DEC(jisx0208, **outbuf, c, c2);
else return 2;
} else if (c >= 0xf0 && c <= 0xf9) {
if ((c2 >= 0x40 && c2 <= 0x7e) || (c2 >= 0x80 && c2 <= 0xfc))
OUT1(0xe000 + 188 * (c - 0xf0) +
(c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
else
return 2;
} else
return 2;
NEXT(2, 1)
}
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(cp932)
MAPOPEN(ja_JP)
IMPORTMAP_DEC(jisx0208)
IMPORTMAP_ENCDEC(cp932ext)
IMPORTMAP_ENC(jisxcommon)
MAPCLOSE()
END_CODEC_REGISTRY(cp932)

View File

@ -1,73 +0,0 @@
/*
* _cp949.c: the CP949 codec
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _cp949.c,v 1.2 2003/12/31 05:46:55 perky Exp $
*/
#include "codeccommon.h"
ENCMAP(cp949)
DECMAP(ksx1001)
DECMAP(cp949ext)
ENCODER(cp949)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
RESERVE_OUTBUF(2)
TRYMAP_ENC(cp949, code, c);
else return 1;
OUT1((code >> 8) | 0x80)
if (code & 0x8000)
OUT2(code & 0xFF) /* MSB set: CP949 */
else
OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */
NEXT(1, 2)
}
return 0;
}
DECODER(cp949)
{
while (inleft > 0) {
unsigned char c = IN1;
RESERVE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
RESERVE_INBUF(2)
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
else return 2;
NEXT(2, 1)
}
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(cp949)
MAPOPEN(ko_KR)
IMPORTMAP_DEC(ksx1001)
IMPORTMAP_DEC(cp949ext)
IMPORTMAP_ENC(cp949)
MAPCLOSE()
END_CODEC_REGISTRY(cp949)

View File

@ -1,72 +0,0 @@
/*
* _cp950.c: the CP950 codec
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _cp950.c,v 1.2 2003/12/31 05:46:55 perky Exp $
*/
#include "codeccommon.h"
ENCMAP(big5)
ENCMAP(cp950ext)
DECMAP(big5)
DECMAP(cp950ext)
ENCODER(cp950)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
RESERVE_OUTBUF(2)
TRYMAP_ENC(cp950ext, code, c);
else TRYMAP_ENC(big5, code, c);
else return 1;
OUT1(code >> 8)
OUT2(code & 0xFF)
NEXT(1, 2)
}
return 0;
}
DECODER(cp950)
{
while (inleft > 0) {
unsigned char c = IN1;
RESERVE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
RESERVE_INBUF(2)
TRYMAP_DEC(cp950ext, **outbuf, c, IN2);
else TRYMAP_DEC(big5, **outbuf, c, IN2);
else return 2;
NEXT(2, 1)
}
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(cp950)
MAPOPEN(zh_TW)
IMPORTMAP_ENCDEC(big5)
IMPORTMAP_ENCDEC(cp950ext)
MAPCLOSE()
END_CODEC_REGISTRY(cp950)

View File

@ -1,180 +0,0 @@
/*
* _euc_jisx0213.c: the EUC-JISX0213 codec
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _euc_jisx0213.c,v 1.2 2003/12/31 05:46:55 perky Exp $
*/
#define USING_BINARY_PAIR_SEARCH
#include "codeccommon.h"
#include "map_jisx0213_pairs.h"
ENCMAP(jisxcommon)
DECMAP(jisx0208)
DECMAP(jisx0212)
ENCMAP(jisx0213_bmp)
DECMAP(jisx0213_1_bmp)
DECMAP(jisx0213_2_bmp)
ENCMAP(jisx0213_emp)
DECMAP(jisx0213_1_emp)
DECMAP(jisx0213_2_emp)
#define EMPBASE 0x20000
ENCODER(euc_jisx0213)
{
while (inleft > 0) {
ucs4_t c = IN1;
DBCHAR code;
int insize;
if (c < 0x80) {
WRITE1(c)
NEXT(1, 1)
continue;
}
DECODE_SURROGATE(c)
insize = GET_INSIZE(c);
if (c <= 0xFFFF) {
/* try 0213 first because it might have MULTIC */
TRYMAP_ENC(jisx0213_bmp, code, c) {
if (code == MULTIC) {
if (inleft < 2) {
if (flags & MBENC_FLUSH) {
code = find_pairencmap((ucs2_t)c, 0,
jisx0213_pairencmap, JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
} else
return MBERR_TOOFEW;
} else {
code = find_pairencmap((ucs2_t)c, (*inbuf)[1],
jisx0213_pairencmap, JISX0213_ENCPAIRS);
if (code == DBCINV) {
code = find_pairencmap((ucs2_t)c, 0,
jisx0213_pairencmap, JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
} else
insize = 2;
}
}
} else TRYMAP_ENC(jisxcommon, code, c);
else if (c >= 0xff61 && c <= 0xff9f) {
/* JIS X 0201 half-width katakana */
WRITE2(0x8e, c - 0xfec0)
NEXT(1, 2)
continue;
} else if (c == 0xff3c)
/* F/W REVERSE SOLIDUS (see NOTES.euc-jisx0213) */
code = 0x2140;
else if (c == 0xff5e)
/* F/W TILDE (see NOTES.euc-jisx0213) */
code = 0x2232;
else
return 1;
} else if (c >> 16 == EMPBASE >> 16) {
TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
else return insize;
} else
return insize;
if (code & 0x8000) {
/* Codeset 2 */
WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
NEXT(insize, 3)
} else {
/* Codeset 1 */
WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
NEXT(insize, 2)
}
}
return 0;
}
DECODER(euc_jisx0213)
{
while (inleft > 0) {
unsigned char c = IN1;
ucs4_t code;
RESERVE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
if (c == 0x8e) {
/* JIS X 0201 half-width katakana */
unsigned char c2;
RESERVE_INBUF(2)
c2 = IN2;
if (c2 >= 0xa1 && c2 <= 0xdf) {
OUT1(0xfec0 + c2)
NEXT(2, 1)
} else
return 2;
} else if (c == 0x8f) {
unsigned char c2, c3;
RESERVE_INBUF(3)
c2 = IN2 ^ 0x80;
c3 = IN3 ^ 0x80;
/* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES.euc-jisx0213) */
TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ;
else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) {
PUTUCS4(EMPBASE | code)
NEXT_IN(3)
continue;
} else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
else return 3;
NEXT(3, 1)
} else {
unsigned char c2;
RESERVE_INBUF(2)
c ^= 0x80;
c2 = IN2 ^ 0x80;
/* JIS X 0213 Plane 1 */
if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e;
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
PUTUCS4(EMPBASE | code)
NEXT_IN(2)
continue;
} else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
WRITE2(code >> 16, code & 0xffff)
NEXT(2, 2)
continue;
} else return 2;
NEXT(2, 1)
}
}
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(euc_jisx0213)
MAPOPEN(ja_JP)
IMPORTMAP_ENC(jisxcommon)
IMPORTMAP_DEC(jisx0208)
IMPORTMAP_DEC(jisx0212)
IMPORTMAP_ENC(jisx0213_bmp)
IMPORTMAP_DEC(jisx0213_1_bmp)
IMPORTMAP_DEC(jisx0213_2_bmp)
IMPORTMAP_ENC(jisx0213_emp)
IMPORTMAP_DEC(jisx0213_1_emp)
IMPORTMAP_DEC(jisx0213_2_emp)
MAPCLOSE()
END_CODEC_REGISTRY(euc_jisx0213)

View File

@ -1,127 +0,0 @@
/*
* _euc_jp.c: the EUC-JP codec
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _euc_jp.c,v 1.5 2003/12/31 05:46:55 perky Exp $
*/
#include "codeccommon.h"
ENCMAP(jisxcommon)
DECMAP(jisx0208)
DECMAP(jisx0212)
ENCODER(euc_jp)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
TRYMAP_ENC(jisxcommon, code, c);
else if (c >= 0xff61 && c <= 0xff9f) {
/* JIS X 0201 half-width katakana */
WRITE2(0x8e, c - 0xfec0)
NEXT(1, 2)
continue;
}
#ifndef STRICT_BUILD
else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
code = 0x2140;
else if (c == 0xa5) { /* YEN SIGN */
WRITE1(0x5c);
NEXT(1, 1)
continue;
} else if (c == 0x203e) { /* OVERLINE */
WRITE1(0x7e);
NEXT(1, 1)
continue;
}
#endif
else
return 1;
if (code & 0x8000) {
/* JIS X 0212 */
WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
NEXT(1, 3)
} else {
/* JIS X 0208 */
WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
NEXT(1, 2)
}
}
return 0;
}
DECODER(euc_jp)
{
while (inleft > 0) {
unsigned char c = IN1;
RESERVE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
if (c == 0x8e) {
/* JIS X 0201 half-width katakana */
unsigned char c2;
RESERVE_INBUF(2)
c2 = IN2;
if (c2 >= 0xa1 && c2 <= 0xdf) {
OUT1(0xfec0 + c2)
NEXT(2, 1)
} else
return 2;
} else if (c == 0x8f) {
unsigned char c2, c3;
RESERVE_INBUF(3)
c2 = IN2;
c3 = IN3;
/* JIS X 0212 */
TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) {
NEXT(3, 1)
} else
return 3;
} else {
unsigned char c2;
RESERVE_INBUF(2)
c2 = IN2;
/* JIS X 0208 */
#ifndef STRICT_BUILD
if (c == 0xa1 && c2 == 0xc0) /* FULL-WIDTH REVERSE SOLIDUS */
**outbuf = 0xff3c;
else
#endif
TRYMAP_DEC(jisx0208, **outbuf, c ^ 0x80, c2 ^ 0x80) ;
else return 2;
NEXT(2, 1)
}
}
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(euc_jp)
MAPOPEN(ja_JP)
IMPORTMAP_DEC(jisx0208)
IMPORTMAP_DEC(jisx0212)
IMPORTMAP_ENC(jisxcommon)
MAPCLOSE()
END_CODEC_REGISTRY(euc_jp)

View File

@ -1,71 +0,0 @@
/*
* _euc_kr.c: the EUC-KR codec
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _euc_kr.c,v 1.2 2003/12/31 05:46:55 perky Exp $
*/
#include "codeccommon.h"
ENCMAP(cp949)
DECMAP(ksx1001)
ENCODER(euc_kr)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
RESERVE_OUTBUF(2)
TRYMAP_ENC(cp949, code, c);
else return 1;
if (code & 0x8000) /* MSB set: CP949 */
return 1;
OUT1((code >> 8) | 0x80)
OUT2((code & 0xFF) | 0x80)
NEXT(1, 2)
}
return 0;
}
DECODER(euc_kr)
{
while (inleft > 0) {
unsigned char c = IN1;
RESERVE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
RESERVE_INBUF(2)
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
NEXT(2, 1)
} else return 2;
}
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(euc_kr)
MAPOPEN(ko_KR)
IMPORTMAP_DEC(ksx1001)
IMPORTMAP_ENC(cp949)
MAPCLOSE()
END_CODEC_REGISTRY(euc_kr)

View File

@ -1,181 +0,0 @@
/*
* _gb18030.c: the GB18030 codec
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _gb18030.c,v 1.2 2003/12/31 05:46:55 perky Exp $
*/
#include "codeccommon.h"
#include "tweak_gbk.h"
#include "map_gb18030uni.h"
ENCMAP(gbcommon)
ENCMAP(gb18030ext)
DECMAP(gb2312)
DECMAP(gbkext)
DECMAP(gb18030ext)
ENCODER(gb18030)
{
while (inleft > 0) {
ucs4_t c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1(c)
NEXT(1, 1)
continue;
}
DECODE_SURROGATE(c)
if (c > 0x10FFFF)
#if Py_UNICODE_SIZE == 2
return 2; /* surrogates pair */
#else
return 1;
#endif
else if (c >= 0x10000) {
ucs4_t tc = c - 0x10000;
RESERVE_OUTBUF(4)
OUT4((unsigned char)(tc % 10) + 0x30)
tc /= 10;
OUT3((unsigned char)(tc % 126) + 0x81)
tc /= 126;
OUT2((unsigned char)(tc % 10) + 0x30)
tc /= 10;
OUT1((unsigned char)(tc + 0x90))
#if Py_UNICODE_SIZE == 2
NEXT(2, 4) /* surrogates pair */
#else
NEXT(1, 4)
#endif
continue;
}
RESERVE_OUTBUF(2)
GBK_PREENCODE(c, code)
else TRYMAP_ENC(gbcommon, code, c);
else TRYMAP_ENC(gb18030ext, code, c);
else {
const struct _gb18030_to_unibmp_ranges *utrrange;
RESERVE_OUTBUF(4)
for (utrrange = gb18030_to_unibmp_ranges;
utrrange->first != 0;
utrrange++)
if (utrrange->first <= c && c <= utrrange->last) {
Py_UNICODE tc;
tc = c - utrrange->first + utrrange->base;
OUT4((unsigned char)(tc % 10) + 0x30)
tc /= 10;
OUT3((unsigned char)(tc % 126) + 0x81)
tc /= 126;
OUT2((unsigned char)(tc % 10) + 0x30)
tc /= 10;
OUT1((unsigned char)tc + 0x81)
NEXT(1, 4)
break;
}
if (utrrange->first == 0) {
PyErr_SetString(PyExc_RuntimeError,
"unicode mapping invalid");
return 1;
}
continue;
}
OUT1((code >> 8) | 0x80)
if (code & 0x8000)
OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */
else
OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
NEXT(1, 2)
}
return 0;
}
DECODER(gb18030)
{
while (inleft > 0) {
unsigned char c = IN1, c2;
RESERVE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
RESERVE_INBUF(2)
c2 = IN2;
if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
const struct _gb18030_to_unibmp_ranges *utr;
unsigned char c3, c4;
ucs4_t lseq;
RESERVE_INBUF(4)
c3 = IN3;
c4 = IN4;
if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
return 4;
c -= 0x81; c2 -= 0x30;
c3 -= 0x81; c4 -= 0x30;
if (c < 4) { /* U+0080 - U+FFFF */
lseq = ((ucs4_t)c * 10 + c2) * 1260 +
(ucs4_t)c3 * 10 + c4;
if (lseq < 39420) {
for (utr = gb18030_to_unibmp_ranges;
lseq >= (utr + 1)->base;
utr++) ;
OUT1(utr->first - utr->base + lseq)
NEXT(4, 1)
continue;
}
}
else if (c >= 15) { /* U+10000 - U+10FFFF */
lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2) * 1260 +
(ucs4_t)c3 * 10 + c4;
if (lseq <= 0x10FFFF) {
PUTUCS4(lseq);
NEXT_IN(4)
continue;
}
}
return 4;
}
GBK_PREDECODE(c, c2, **outbuf)
else TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, c2 ^ 0x80);
else TRYMAP_DEC(gbkext, **outbuf, c, c2);
else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
else return 2;
NEXT(2, 1)
}
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(gb18030)
MAPOPEN(zh_CN)
IMPORTMAP_DEC(gb2312)
IMPORTMAP_DEC(gbkext)
IMPORTMAP_ENC(gbcommon)
IMPORTMAP_ENCDEC(gb18030ext)
MAPCLOSE()
END_CODEC_REGISTRY(gb18030)

View File

@ -1,69 +0,0 @@
/*
* _gb2312.c: the GB2312 codec
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _gb2312.c,v 1.2 2003/12/31 05:46:55 perky Exp $
*/
#include "codeccommon.h"
ENCMAP(gbcommon)
DECMAP(gb2312)
ENCODER(gb2312)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
RESERVE_OUTBUF(2)
TRYMAP_ENC(gbcommon, code, c);
else return 1;
if (code & 0x8000) /* MSB set: GBK */
return 1;
OUT1((code >> 8) | 0x80)
OUT2((code & 0xFF) | 0x80)
NEXT(1, 2)
}
return 0;
}
DECODER(gb2312)
{
while (inleft > 0) {
unsigned char c = **inbuf;
RESERVE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
RESERVE_INBUF(2)
TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
NEXT(2, 1)
} else return 2;
}
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(gb2312)
MAPOPEN(zh_CN)
IMPORTMAP_DEC(gb2312)
IMPORTMAP_ENC(gbcommon)
MAPCLOSE()
END_CODEC_REGISTRY(gb2312)

View File

@ -1,78 +0,0 @@
/*
* _gbk.c: the GBK codec
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _gbk.c,v 1.2 2003/12/31 05:46:55 perky Exp $
*/
#include "codeccommon.h"
#include "tweak_gbk.h"
ENCMAP(gbcommon)
DECMAP(gb2312)
DECMAP(gbkext)
ENCODER(gbk)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
RESERVE_OUTBUF(2)
GBK_PREENCODE(c, code)
else TRYMAP_ENC(gbcommon, code, c);
else return 1;
OUT1((code >> 8) | 0x80)
if (code & 0x8000)
OUT2((code & 0xFF)) /* MSB set: GBK */
else
OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
NEXT(1, 2)
}
return 0;
}
DECODER(gbk)
{
while (inleft > 0) {
unsigned char c = IN1;
RESERVE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
RESERVE_INBUF(2)
GBK_PREDECODE(c, IN2, **outbuf)
else TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80);
else TRYMAP_DEC(gbkext, **outbuf, c, IN2);
else return 2;
NEXT(2, 1)
}
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(gbk)
MAPOPEN(zh_CN)
IMPORTMAP_DEC(gb2312)
IMPORTMAP_DEC(gbkext)
IMPORTMAP_ENC(gbcommon)
MAPCLOSE()
END_CODEC_REGISTRY(gbk)

View File

@ -1,134 +0,0 @@
/*
* _hz.c: the HZ codec (RFC1843)
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _hz.c,v 1.2 2003/12/31 05:46:55 perky Exp $
*/
#include "codeccommon.h"
ENCMAP(gbcommon)
DECMAP(gb2312)
#define HAVE_ENCODER_INIT
ENCODER_INIT(hz)
{
state->i = 0;
return 0;
}
#define HAVE_ENCODER_RESET
ENCODER_RESET(hz)
{
if (state->i != 0) {
WRITE2('~', '}')
state->i = 0;
NEXT_OUT(2)
}
return 0;
}
ENCODER(hz)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
if (state->i == 0) {
WRITE1((unsigned char)c)
NEXT(1, 1)
} else {
WRITE3('~', '}', (unsigned char)c)
NEXT(1, 3)
state->i = 0;
}
continue;
}
UCS4INVALID(c)
TRYMAP_ENC(gbcommon, code, c);
else return 1;
if (code & 0x8000) /* MSB set: GBK */
return 1;
if (state->i == 0) {
WRITE4('~', '{', code >> 8, code & 0xff)
NEXT(1, 4)
state->i = 1;
} else {
WRITE2(code >> 8, code & 0xff)
NEXT(1, 2)
}
}
return 0;
}
#define HAVE_DECODER_INIT
DECODER_INIT(hz)
{
state->i = 0;
return 0;
}
#define HAVE_DECODER_RESET
DECODER_RESET(hz)
{
state->i = 0;
return 0;
}
DECODER(hz)
{
while (inleft > 0) {
unsigned char c = IN1;
if (c == '~') {
unsigned char c2 = IN2;
RESERVE_INBUF(2)
if (c2 == '~') {
WRITE1('~')
NEXT(2, 1)
continue;
} else if (c2 == '{' && state->i == 0)
state->i = 1; /* set GB */
else if (c2 == '}' && state->i == 1)
state->i = 0; /* set ASCII */
else if (c2 == '\n')
; /* line-continuation */
else
return 2;
NEXT(2, 0);
continue;
}
if (c & 0x80)
return 1;
if (state->i == 0) { /* ASCII mode */
WRITE1(c)
NEXT(1, 1)
} else { /* GB mode */
RESERVE_INBUF(2)
RESERVE_OUTBUF(1)
TRYMAP_DEC(gb2312, **outbuf, c, IN2) {
NEXT(2, 1)
} else
return 2;
}
}
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(hz)
MAPOPEN(zh_CN)
IMPORTMAP_DEC(gb2312)
IMPORTMAP_ENC(gbcommon)
MAPCLOSE()
END_CODEC_REGISTRY(hz)

View File

@ -1,173 +0,0 @@
/*
* _iso2022_jp.c: the ISO-2022-JP codec (RFC1468)
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _iso2022_jp.c,v 1.7 2003/12/31 05:46:55 perky Exp $
*/
#define ISO2022_DESIGNATIONS \
CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0208, CHARSET_JISX0208_O
#define ISO2022_NO_SHIFT
#define ISO2022_USE_JISX0208EXT
#include "codeccommon.h"
#include "iso2022common.h"
#include "alg_jisx0201.h"
ENCMAP(jisxcommon)
DECMAP(jisx0208)
#define HAVE_ENCODER_INIT
ENCODER_INIT(iso2022_jp)
{
STATE_CLEARFLAGS(state)
STATE_SETG0(state, CHARSET_ASCII)
STATE_SETG1(state, CHARSET_ASCII)
return 0;
}
#define HAVE_ENCODER_RESET
ENCODER_RESET(iso2022_jp)
{
if (STATE_GETG0(state) != CHARSET_ASCII) {
RESERVE_OUTBUF(3)
WRITE3(ESC, '(', 'B')
STATE_SETG0(state, CHARSET_ASCII)
NEXT_OUT(3)
}
return 0;
}
/* ISO-2022-JP changes designations instead of shifting-out */
ENCODER(iso2022_jp)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
switch (STATE_GETG0(state)) {
case CHARSET_ASCII:
WRITE1((unsigned char)c)
NEXT(1, 1)
break;
case CHARSET_JISX0201_R:
JISX0201_R_ENCODE(c, code)
else { /* FALLTHROUGH (yay!) */
default:
WRITE3(ESC, '(', 'B')
NEXT_OUT(3)
STATE_SETG0(state, CHARSET_ASCII)
code = c;
}
WRITE1((unsigned char)code)
NEXT(1, 1)
break;
}
if (c == '\n')
STATE_CLEARFLAG(state, F_SHIFTED)
}
else UCS4INVALID(c)
else {
unsigned char charset;
charset = STATE_GETG0(state);
if (charset == CHARSET_JISX0201_R) {
code = DBCINV;
JISX0201_R_ENCODE(c, code)
if (code != DBCINV) {
WRITE1((unsigned char)code)
NEXT(1, 1)
continue;
}
}
TRYMAP_ENC(jisxcommon, code, c) {
if (code & 0x8000) /* MSB set: JIS X 0212 */
return 1;
jisx0208encode: if (charset != CHARSET_JISX0208) {
WRITE3(ESC, '$', 'B')
STATE_SETG0(state, CHARSET_JISX0208)
NEXT_OUT(3)
}
WRITE2(code >> 8, code & 0xff)
NEXT(1, 2)
} else if (c == 0xff3c) { /* FULL-WIDTH REVERSE SOLIDUS */
code = 0x2140;
goto jisx0208encode;
} else {
JISX0201_R_ENCODE(c, code)
else
return 1;
/* if (charset == CHARSET_JISX0201_R) : already checked */
WRITE4(ESC, '(', 'J', (unsigned char)code)
STATE_SETG0(state, CHARSET_JISX0201_R)
NEXT(1, 4)
}
}
}
return 0;
}
#define HAVE_DECODER_INIT
DECODER_INIT(iso2022_jp)
{
STATE_CLEARFLAGS(state)
STATE_SETG0(state, CHARSET_ASCII)
STATE_SETG1(state, CHARSET_ASCII)
return 0;
}
#define HAVE_DECODER_RESET
DECODER_RESET(iso2022_jp)
{
STATE_CLEARFLAG(state, F_SHIFTED)
return 0;
}
DECODER(iso2022_jp)
{
ISO2022_LOOP_BEGIN
unsigned char charset, c2;
ISO2022_GETCHARSET(charset, c)
if (charset & CHARSET_DOUBLEBYTE) {
/* all double byte character sets are in JIS X 0208 here.
* this means that we don't distinguish :1978 from :1983. */
RESERVE_INBUF(2)
RESERVE_OUTBUF(1)
c2 = IN2;
if (c == 0x21 && c2 == 0x40) { /* FULL-WIDTH REVERSE SOLIDUS */
**outbuf = 0xff3c;
NEXT(2, 1)
} else TRYMAP_DEC(jisx0208, **outbuf, c, c2) {
NEXT(2, 1)
} else
return 2;
} else if (charset == CHARSET_ASCII) {
RESERVE_OUTBUF(1)
OUT1(c)
NEXT(1, 1)
} else if (charset == CHARSET_JISX0201_R) {
RESERVE_OUTBUF(1)
JISX0201_R_DECODE(c, **outbuf)
else
return 1;
NEXT(1, 1)
} else
return MBERR_INTERNAL;
ISO2022_LOOP_END
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(iso2022_jp)
MAPOPEN(ja_JP)
IMPORTMAP_DEC(jisx0208)
IMPORTMAP_ENC(jisxcommon)
MAPCLOSE()
END_CODEC_REGISTRY(iso2022_jp)

View File

@ -1,185 +0,0 @@
/*
* _iso2022_jp_1.c: the ISO-2022-JP-1 codec (RFC2237)
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _iso2022_jp_1.c,v 1.8 2003/12/31 05:46:55 perky Exp $
*/
#define ISO2022_DESIGNATIONS \
CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0208, \
CHARSET_JISX0208_O, CHARSET_JISX0212
#define ISO2022_NO_SHIFT
#define ISO2022_USE_JISX0208EXT
#include "codeccommon.h"
#include "iso2022common.h"
#include "alg_jisx0201.h"
ENCMAP(jisxcommon)
DECMAP(jisx0208)
DECMAP(jisx0212)
#define HAVE_ENCODER_INIT
ENCODER_INIT(iso2022_jp_1)
{
STATE_CLEARFLAGS(state)
STATE_SETG0(state, CHARSET_ASCII)
STATE_SETG1(state, CHARSET_ASCII)
return 0;
}
#define HAVE_ENCODER_RESET
ENCODER_RESET(iso2022_jp_1)
{
if (STATE_GETG0(state) != CHARSET_ASCII) {
RESERVE_OUTBUF(3)
WRITE3(ESC, '(', 'B')
STATE_SETG0(state, CHARSET_ASCII)
NEXT_OUT(3)
}
return 0;
}
/* ISO-2022-JP-1 changes designations instead of shifting-out */
ENCODER(iso2022_jp_1)
{
while (inleft > 0) {
Py_UNICODE c = **inbuf;
DBCHAR code;
if (c < 0x80) {
switch (STATE_GETG0(state)) {
case CHARSET_ASCII:
WRITE1((unsigned char)c)
NEXT(1, 1)
break;
case CHARSET_JISX0201_R:
JISX0201_R_ENCODE(c, code)
else { /* FALLTHROUGH (yay!) */
default:
WRITE3(ESC, '(', 'B')
NEXT_OUT(3)
STATE_SETG0(state, CHARSET_ASCII)
code = c;
}
WRITE1((unsigned char)code)
NEXT(1, 1)
break;
}
if (c == '\n')
STATE_CLEARFLAG(state, F_SHIFTED)
}
else UCS4INVALID(c)
else {
unsigned char charset;
charset = STATE_GETG0(state);
if (charset == CHARSET_JISX0201_R) {
code = DBCINV;
JISX0201_R_ENCODE(c, code)
if (code != DBCINV) {
WRITE1((unsigned char)code)
NEXT(1, 1)
continue;
}
}
TRYMAP_ENC(jisxcommon, code, c) {
if (code & 0x8000) { /* MSB set: JIS X 0212 */
if (charset != CHARSET_JISX0212) {
WRITE4(ESC, '$', '(', 'D')
STATE_SETG0(state, CHARSET_JISX0212)
NEXT_OUT(4)
}
WRITE2((code >> 8) & 0x7f, code & 0x7f)
} else { /* MSB unset: JIS X 0208 */
jisx0208encode: if (charset != CHARSET_JISX0208) {
WRITE3(ESC, '$', 'B')
STATE_SETG0(state, CHARSET_JISX0208)
NEXT_OUT(3)
}
WRITE2(code >> 8, code & 0xff)
}
NEXT(1, 2)
} else if (c == 0xff3c) { /* FULL-WIDTH REVERSE SOLIDUS */
code = 0x2140;
goto jisx0208encode;
} else {
JISX0201_R_ENCODE(c, code)
else
return 1;
/* if (charset == CHARSET_JISX0201_R) : already checked */
WRITE4(ESC, '(', 'J', (unsigned char)code)
STATE_SETG0(state, CHARSET_JISX0201_R)
NEXT(1, 4)
}
}
}
return 0;
}
#define HAVE_DECODER_INIT
DECODER_INIT(iso2022_jp_1)
{
STATE_CLEARFLAGS(state)
STATE_SETG0(state, CHARSET_ASCII)
STATE_SETG1(state, CHARSET_ASCII)
return 0;
}
#define HAVE_DECODER_RESET
DECODER_RESET(iso2022_jp_1)
{
STATE_CLEARFLAG(state, F_SHIFTED)
return 0;
}
DECODER(iso2022_jp_1)
{
ISO2022_LOOP_BEGIN
unsigned char charset, c2;
ISO2022_GETCHARSET(charset, c)
if (charset & CHARSET_DOUBLEBYTE) {
RESERVE_INBUF(2)
RESERVE_OUTBUF(1)
c2 = IN2;
if (charset == CHARSET_JISX0208 || charset == CHARSET_JISX0208_O) {
if (c == 0x21 && c2 == 0x40) /* FULL-WIDTH REVERSE SOLIDUS */
**outbuf = 0xff3c;
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
else return 2;
} else if (charset == CHARSET_JISX0212) {
TRYMAP_DEC(jisx0212, **outbuf, c, c2);
else return 2;
} else
return MBERR_INTERNAL;
NEXT(2, 1)
} else if (charset == CHARSET_ASCII) {
RESERVE_OUTBUF(1)
OUT1(c)
NEXT(1, 1)
} else if (charset == CHARSET_JISX0201_R) {
RESERVE_OUTBUF(1)
JISX0201_R_DECODE(c, **outbuf)
else
return 1;
NEXT(1, 1)
} else
return MBERR_INTERNAL;
ISO2022_LOOP_END
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(iso2022_jp_1)
MAPOPEN(ja_JP)
IMPORTMAP_DEC(jisx0208)
IMPORTMAP_DEC(jisx0212)
IMPORTMAP_ENC(jisxcommon)
MAPCLOSE()
END_CODEC_REGISTRY(iso2022_jp_1)

View File

@ -1,230 +0,0 @@
/*
* _iso2022_jp_2.c: the ISO-2022-JP-2 codec (RFC1554)
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _iso2022_jp_2.c,v 1.8 2003/12/31 05:46:55 perky Exp $
*/
#define ISO2022_DESIGNATIONS \
CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0208, \
CHARSET_JISX0208_O, CHARSET_JISX0212, CHARSET_GB2312, \
CHARSET_KSX1001, CHARSET_JISX0212, \
CHARSET_ISO8859_1, CHARSET_ISO8859_7
#define ISO2022_USE_G2_DESIGNATION yo!
#define ISO2022_USE_JISX0208EXT
#include "codeccommon.h"
#include "iso2022common.h"
#include "alg_jisx0201.h"
#include "alg_iso8859_1.h"
#include "alg_iso8859_7.h"
ENCMAP(jisxcommon)
DECMAP(jisx0208)
DECMAP(jisx0212)
ENCMAP(cp949)
DECMAP(ksx1001)
ENCMAP(gbcommon)
DECMAP(gb2312)
#define HAVE_ENCODER_INIT
ENCODER_INIT(iso2022_jp_2)
{
STATE_CLEARFLAGS(state)
STATE_SETG0(state, CHARSET_ASCII)
STATE_SETG1(state, CHARSET_ASCII)
STATE_SETG2(state, CHARSET_ASCII)
return 0;
}
#define HAVE_ENCODER_RESET
ENCODER_RESET(iso2022_jp_2)
{
if (STATE_GETG0(state) != CHARSET_ASCII) {
WRITE3(ESC, '(', 'B')
STATE_SETG0(state, CHARSET_ASCII)
NEXT_OUT(3)
}
return 0;
}
ENCODER(iso2022_jp_2)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
switch (STATE_GETG0(state)) {
case CHARSET_ASCII:
WRITE1((unsigned char)c)
NEXT(1, 1)
break;
case CHARSET_JISX0201_R:
JISX0201_R_ENCODE(c, code)
else { /* FALLTHROUGH (yay!) */
default:
WRITE3(ESC, '(', 'B')
NEXT_OUT(3)
STATE_SETG0(state, CHARSET_ASCII)
code = c;
}
WRITE1((unsigned char)code)
NEXT(1, 1)
break;
}
if (c == '\n')
STATE_CLEARFLAG(state, F_SHIFTED)
}
else UCS4INVALID(c)
else {
unsigned char charset;
charset = STATE_GETG0(state);
if (charset == CHARSET_JISX0201_R) {
code = DBCINV;
JISX0201_R_ENCODE(c, code)
if (code != DBCINV) {
WRITE1((unsigned char)code)
NEXT(1, 1)
continue;
}
}
TRYMAP_ENC(jisxcommon, code, c) {
if (code & 0x8000) { /* MSB set: JIS X 0212 */
if (charset != CHARSET_JISX0212) {
WRITE4(ESC, '$', '(', 'D')
STATE_SETG0(state, CHARSET_JISX0212)
NEXT_OUT(4)
}
WRITE2((code >> 8) & 0x7f, code & 0x7f)
} else { /* MSB unset: JIS X 0208 */
jisx0208encode: if (charset != CHARSET_JISX0208) {
WRITE3(ESC, '$', 'B')
STATE_SETG0(state, CHARSET_JISX0208)
NEXT_OUT(3)
}
WRITE2(code >> 8, code & 0xff)
}
NEXT(1, 2)
} else TRYMAP_ENC(cp949, code, c) {
if (code & 0x8000) /* MSB set: CP949 */
return 2;
if (charset != CHARSET_KSX1001) {
WRITE4(ESC, '$', '(', 'C')
STATE_SETG0(state, CHARSET_KSX1001)
NEXT_OUT(4)
}
WRITE2(code >> 8, code & 0xff)
NEXT(1, 2)
} else TRYMAP_ENC(gbcommon, code, c) {
if (code & 0x8000) /* MSB set: GBK */
return 2;
if (charset != CHARSET_GB2312) {
WRITE4(ESC, '$', '(', 'A')
STATE_SETG0(state, CHARSET_GB2312)
NEXT_OUT(4)
}
WRITE2(code >> 8, code & 0xff)
NEXT(1, 2)
} else if (c == 0xff3c) { /* FULL-WIDTH REVERSE SOLIDUS */
code = 0x2140;
goto jisx0208encode;
} else {
JISX0201_R_ENCODE(c, code)
else {
/* There's no need to try to encode as ISO-8859-1 or
* ISO-8859-7 because JIS X 0212 includes them already.
*/
return 1;
}
/* if (charset == CHARSET_JISX0201_R) : already checked */
WRITE4(ESC, '(', 'J', (unsigned char)code)
STATE_SETG0(state, CHARSET_JISX0201_R)
NEXT(1, 4)
}
}
}
return 0;
}
#define HAVE_DECODER_INIT
DECODER_INIT(iso2022_jp_2)
{
STATE_CLEARFLAGS(state)
STATE_SETG0(state, CHARSET_ASCII)
STATE_SETG1(state, CHARSET_ASCII)
STATE_SETG2(state, CHARSET_ASCII)
return 0;
}
#define HAVE_DECODER_RESET
DECODER_RESET(iso2022_jp_2)
{
STATE_CLEARFLAG(state, F_SHIFTED)
return 0;
}
DECODER(iso2022_jp_2)
{
ISO2022_LOOP_BEGIN
unsigned char charset, c2;
ISO2022_GETCHARSET(charset, c)
if (charset & CHARSET_DOUBLEBYTE) {
RESERVE_INBUF(2)
RESERVE_OUTBUF(1)
c2 = IN2;
if (charset == CHARSET_JISX0208 || charset == CHARSET_JISX0208_O) {
if (c == 0x21 && c2 == 0x40) /* FULL-WIDTH REVERSE SOLIDUS */
**outbuf = 0xff3c;
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
else return 2;
} else if (charset == CHARSET_JISX0212) {
TRYMAP_DEC(jisx0212, **outbuf, c, c2);
else return 2;
} else if (charset == CHARSET_KSX1001) {
TRYMAP_DEC(ksx1001, **outbuf, c, c2);
else return 2;
} else if (charset == CHARSET_GB2312) {
TRYMAP_DEC(gb2312, **outbuf, c, c2);
else return 2;
} else
return MBERR_INTERNAL;
NEXT(2, 1)
} else if (charset == CHARSET_ASCII) {
RESERVE_OUTBUF(1)
OUT1(c)
NEXT(1, 1)
} else if (charset == CHARSET_JISX0201_R) {
RESERVE_OUTBUF(1)
JISX0201_R_DECODE(c, **outbuf)
else
return 1;
NEXT(1, 1)
} else
return MBERR_INTERNAL;
ISO2022_LOOP_END
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(iso2022_jp_2)
MAPOPEN(ja_JP)
IMPORTMAP_DEC(jisx0208)
IMPORTMAP_DEC(jisx0212)
IMPORTMAP_ENC(jisxcommon)
MAPCLOSE()
MAPOPEN(ko_KR)
IMPORTMAP_ENC(cp949)
IMPORTMAP_DEC(ksx1001)
MAPCLOSE()
MAPOPEN(zh_CN)
IMPORTMAP_ENC(gbcommon)
IMPORTMAP_DEC(gb2312)
MAPCLOSE()
END_CODEC_REGISTRY(iso2022_jp_2)

View File

@ -1,211 +0,0 @@
/*
* _iso2022_jp_3.c: the ISO-2022-JP-3 codec (JIS X 0213)
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _iso2022_jp_3.c,v 1.7 2003/12/31 05:46:55 perky Exp $
*/
#define USING_BINARY_PAIR_SEARCH
#define ISO2022_DESIGNATIONS \
CHARSET_ASCII, CHARSET_JISX0208, CHARSET_JISX0213_1, CHARSET_JISX0213_2
#define ISO2022_NO_SHIFT
#define ISO2022_USE_JISX0208EXT
#include "codeccommon.h"
#include "iso2022common.h"
#include "map_jisx0213_pairs.h"
ENCMAP(jisxcommon)
DECMAP(jisx0208)
DECMAP(jisx0212)
ENCMAP(jisx0213_bmp)
DECMAP(jisx0213_1_bmp)
DECMAP(jisx0213_2_bmp)
ENCMAP(jisx0213_emp)
DECMAP(jisx0213_1_emp)
DECMAP(jisx0213_2_emp)
#define EMPBASE 0x20000
#define HAVE_ENCODER_INIT
ENCODER_INIT(iso2022_jp_3)
{
STATE_CLEARFLAGS(state)
STATE_SETG0(state, CHARSET_ASCII)
STATE_SETG1(state, CHARSET_ASCII)
return 0;
}
#define HAVE_ENCODER_RESET
ENCODER_RESET(iso2022_jp_3)
{
if (STATE_GETG0(state) != CHARSET_ASCII) {
WRITE3(ESC, '(', 'B')
STATE_SETG0(state, CHARSET_ASCII)
NEXT_OUT(3)
}
return 0;
}
ENCODER(iso2022_jp_3)
{
while (inleft > 0) {
unsigned char charset;
ucs4_t c = IN1;
DBCHAR code;
size_t insize;
if (c < 0x80) {
switch (STATE_GETG0(state)) {
case CHARSET_ASCII:
WRITE1(c)
NEXT(1, 1)
break;
default:
WRITE4(ESC, '(', 'B', c)
STATE_SETG0(state, CHARSET_ASCII)
NEXT(1, 4)
break;
}
if (c == '\n')
STATE_CLEARFLAG(state, F_SHIFTED)
continue;
}
DECODE_SURROGATE(c)
insize = GET_INSIZE(c);
if (c <= 0xffff) {
TRYMAP_ENC(jisx0213_bmp, code, c) {
if (code == MULTIC) {
if (inleft < 2) {
if (flags & MBENC_FLUSH) {
code = find_pairencmap((ucs2_t)c, 0,
jisx0213_pairencmap, JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
} else
return MBERR_TOOFEW;
} else {
code = find_pairencmap((ucs2_t)c, IN2,
jisx0213_pairencmap, JISX0213_ENCPAIRS);
if (code == DBCINV) {
code = find_pairencmap((ucs2_t)c, 0,
jisx0213_pairencmap, JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
} else
insize = 2;
}
}
} else TRYMAP_ENC(jisxcommon, code, c) {
if (code & 0x8000)
return 1; /* avoid JIS X 0212 codes */
} else if (c == 0xff3c) /* F/W REVERSE SOLIDUS */
code = 0x2140;
else
return 1;
} else if (c >> 16 == EMPBASE >> 16) {
TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
else return insize;
} else
return insize;
charset = STATE_GETG0(state);
if (code & 0x8000) { /* MSB set: Plane 2 */
if (charset != CHARSET_JISX0213_2) {
WRITE4(ESC, '$', '(', 'P')
STATE_SETG0(state, CHARSET_JISX0213_2)
NEXT_OUT(4)
}
WRITE2((code >> 8) & 0x7f, code & 0x7f)
} else { /* MSB unset: Plane 1 */
if (charset != CHARSET_JISX0213_1) {
WRITE4(ESC, '$', '(', 'O')
STATE_SETG0(state, CHARSET_JISX0213_1)
NEXT_OUT(4)
}
WRITE2(code >> 8, code & 0xff)
}
NEXT(insize, 2)
}
return 0;
}
#define HAVE_DECODER_INIT
DECODER_INIT(iso2022_jp_3)
{
STATE_CLEARFLAGS(state)
STATE_SETG0(state, CHARSET_ASCII)
STATE_SETG1(state, CHARSET_ASCII)
return 0;
}
#define HAVE_DECODER_RESET
DECODER_RESET(iso2022_jp_3)
{
STATE_CLEARFLAG(state, F_SHIFTED)
return 0;
}
DECODER(iso2022_jp_3)
{
ISO2022_LOOP_BEGIN
unsigned char charset, c2;
ucs4_t code;
ISO2022_GETCHARSET(charset, c)
if (charset & CHARSET_DOUBLEBYTE) {
RESERVE_INBUF(2)
RESERVE_OUTBUF(1)
c2 = IN2;
if (charset == CHARSET_JISX0213_1) {
if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
PUTUCS4(EMPBASE | code)
NEXT_IN(2)
continue;
} else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
WRITE2(code >> 16, code & 0xffff)
NEXT(2, 2)
continue;
} else return 2;
} else if (charset == CHARSET_JISX0213_2) {
TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c, c2);
else TRYMAP_DEC(jisx0213_2_emp, code, c, c2) {
PUTUCS4(EMPBASE | code)
NEXT_IN(2)
continue;
} else return 2;
} else
return MBERR_INTERNAL;
NEXT(2, 1)
} else if (charset == CHARSET_ASCII) {
RESERVE_OUTBUF(1)
OUT1(c)
NEXT(1, 1)
} else
return MBERR_INTERNAL;
ISO2022_LOOP_END
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(iso2022_jp_3)
MAPOPEN(ja_JP)
IMPORTMAP_DEC(jisx0208)
IMPORTMAP_DEC(jisx0212)
IMPORTMAP_ENC(jisxcommon)
IMPORTMAP_ENC(jisx0213_bmp)
IMPORTMAP_DEC(jisx0213_1_bmp)
IMPORTMAP_DEC(jisx0213_2_bmp)
IMPORTMAP_ENC(jisx0213_emp)
IMPORTMAP_DEC(jisx0213_1_emp)
IMPORTMAP_DEC(jisx0213_2_emp)
MAPCLOSE()
END_CODEC_REGISTRY(iso2022_jp_3)

View File

@ -1,200 +0,0 @@
/*
* _iso2022_jp_ext.c: the ISO-2022-JP-EXT codec (RFC2237 + alpha)
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _iso2022_jp_ext.c,v 1.3 2003/12/31 05:46:55 perky Exp $
*/
#define ISO2022_DESIGNATIONS \
CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0201_K, \
CHARSET_JISX0208, CHARSET_JISX0208_O, CHARSET_JISX0212
#define ISO2022_NO_SHIFT
#define ISO2022_USE_JISX0208EXT
#include "codeccommon.h"
#include "iso2022common.h"
#include "alg_jisx0201.h"
ENCMAP(jisxcommon)
DECMAP(jisx0208)
DECMAP(jisx0212)
#define HAVE_ENCODER_INIT
ENCODER_INIT(iso2022_jp_ext)
{
STATE_CLEARFLAGS(state)
STATE_SETG0(state, CHARSET_ASCII)
STATE_SETG1(state, CHARSET_ASCII)
return 0;
}
#define HAVE_ENCODER_RESET
ENCODER_RESET(iso2022_jp_ext)
{
if (STATE_GETG0(state) != CHARSET_ASCII) {
RESERVE_OUTBUF(3)
WRITE3(ESC, '(', 'B')
STATE_SETG0(state, CHARSET_ASCII)
NEXT_OUT(3)
}
return 0;
}
ENCODER(iso2022_jp_ext)
{
while (inleft > 0) {
Py_UNICODE c = **inbuf;
DBCHAR code;
if (c < 0x80) {
switch (STATE_GETG0(state)) {
case CHARSET_ASCII:
WRITE1((unsigned char)c)
NEXT(1, 1)
break;
case CHARSET_JISX0201_R:
JISX0201_R_ENCODE(c, code)
else { /* FALLTHROUGH (yay!) */
default:
WRITE3(ESC, '(', 'B')
NEXT_OUT(3)
STATE_SETG0(state, CHARSET_ASCII)
code = c;
}
WRITE1((unsigned char)code)
NEXT(1, 1)
break;
}
if (c == '\n')
STATE_CLEARFLAG(state, F_SHIFTED)
}
else UCS4INVALID(c)
else {
unsigned char charset;
charset = STATE_GETG0(state);
if (charset == CHARSET_JISX0201_R) {
code = DBCINV;
JISX0201_R_ENCODE(c, code)
if (code != DBCINV) {
WRITE1((unsigned char)code)
NEXT(1, 1)
continue;
}
}
TRYMAP_ENC(jisxcommon, code, c) {
if (code & 0x8000) { /* MSB set: JIS X 0212 */
if (charset != CHARSET_JISX0212) {
WRITE4(ESC, '$', '(', 'D')
STATE_SETG0(state, CHARSET_JISX0212)
NEXT_OUT(4)
}
WRITE2((code >> 8) & 0x7f, code & 0x7f)
} else { /* MSB unset: JIS X 0208 */
jisx0208encode: if (charset != CHARSET_JISX0208) {
WRITE3(ESC, '$', 'B')
STATE_SETG0(state, CHARSET_JISX0208)
NEXT_OUT(3)
}
WRITE2(code >> 8, code & 0xff)
}
NEXT(1, 2)
} else if (c == 0xff3c) { /* FULL-WIDTH REVERSE SOLIDUS */
code = 0x2140;
goto jisx0208encode;
} else {
JISX0201_ENCODE(c, code)
else
return 1;
if (code < 0x80) { /* JIS X 0201 Roman */
/* if (charset == CHARSET_JISX0201_R) : already checked */
WRITE4(ESC, '(', 'J', (unsigned char)code)
STATE_SETG0(state, CHARSET_JISX0201_R)
NEXT(1, 4)
} else { /* JIS X 0201 Katakana */
if (charset != CHARSET_JISX0201_K) {
WRITE3(ESC, '(', 'I')
STATE_SETG0(state, CHARSET_JISX0201_K)
NEXT_OUT(3)
}
WRITE1(code - 0x80)
NEXT(1, 1)
}
}
}
}
return 0;
}
#define HAVE_DECODER_INIT
DECODER_INIT(iso2022_jp_ext)
{
STATE_CLEARFLAGS(state)
STATE_SETG0(state, CHARSET_ASCII)
STATE_SETG1(state, CHARSET_ASCII)
return 0;
}
#define HAVE_DECODER_RESET
DECODER_RESET(iso2022_jp_ext)
{
STATE_CLEARFLAG(state, F_SHIFTED)
return 0;
}
DECODER(iso2022_jp_ext)
{
ISO2022_LOOP_BEGIN
unsigned char charset, c2;
ISO2022_GETCHARSET(charset, c)
if (charset & CHARSET_DOUBLEBYTE) {
RESERVE_INBUF(2)
RESERVE_OUTBUF(1)
c2 = IN2;
if (charset == CHARSET_JISX0208 || charset == CHARSET_JISX0208_O) {
if (c == 0x21 && c2 == 0x40) /* FULL-WIDTH REVERSE SOLIDUS */
**outbuf = 0xff3c;
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
else return 2;
} else if (charset == CHARSET_JISX0212) {
TRYMAP_DEC(jisx0212, **outbuf, c, c2);
else return 2;
} else
return MBERR_INTERNAL;
NEXT(2, 1)
} else if (charset == CHARSET_ASCII) {
RESERVE_OUTBUF(1)
OUT1(c)
NEXT(1, 1)
} else if (charset == CHARSET_JISX0201_R) {
RESERVE_OUTBUF(1)
JISX0201_R_DECODE(c, **outbuf)
else
return 1;
NEXT(1, 1)
} else if (charset == CHARSET_JISX0201_K) {
RESERVE_OUTBUF(1)
JISX0201_K_DECODE(c ^ 0x80, **outbuf)
else
return 1;
NEXT(1, 1)
} else
return MBERR_INTERNAL;
ISO2022_LOOP_END
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(iso2022_jp_ext)
MAPOPEN(ja_JP)
IMPORTMAP_DEC(jisx0208)
IMPORTMAP_DEC(jisx0212)
IMPORTMAP_ENC(jisxcommon)
MAPCLOSE()
END_CODEC_REGISTRY(iso2022_jp_ext)

View File

@ -1,131 +0,0 @@
/*
* _iso2022_kr.c: the ISO-2022-KR codec (RFC1557)
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _iso2022_kr.c,v 1.3 2003/12/31 05:46:55 perky Exp $
*/
#define ISO2022_DESIGNATIONS \
CHARSET_ASCII, CHARSET_KSX1001
#include "codeccommon.h"
#include "iso2022common.h"
ENCMAP(cp949)
DECMAP(ksx1001)
#define HAVE_ENCODER_INIT
ENCODER_INIT(iso2022_kr)
{
STATE_CLEARFLAGS(state)
STATE_SETG0(state, CHARSET_ASCII)
STATE_SETG1(state, CHARSET_ASCII)
return 0;
}
#define HAVE_ENCODER_RESET
ENCODER_RESET(iso2022_kr)
{
if (STATE_GETFLAG(state, F_SHIFTED)) {
RESERVE_OUTBUF(1)
OUT1(SI)
NEXT_OUT(1)
STATE_CLEARFLAG(state, F_SHIFTED)
}
return 0;
}
ENCODER(iso2022_kr)
{
while (inleft > 0) {
Py_UNICODE c = **inbuf;
DBCHAR code;
if (c < 0x80) {
if (STATE_GETFLAG(state, F_SHIFTED)) {
WRITE2(SI, (unsigned char)c)
STATE_CLEARFLAG(state, F_SHIFTED)
NEXT(1, 2)
} else {
WRITE1((unsigned char)c)
NEXT(1, 1)
}
if (c == '\n')
STATE_CLEARFLAG(state, F_SHIFTED)
} else UCS4INVALID(c)
else {
if (STATE_GETG1(state) != CHARSET_KSX1001) {
WRITE4(ESC, '$', ')', 'C')
STATE_SETG1(state, CHARSET_KSX1001)
NEXT_OUT(4)
}
if (!STATE_GETFLAG(state, F_SHIFTED)) {
WRITE1(SO)
STATE_SETFLAG(state, F_SHIFTED)
NEXT_OUT(1)
}
TRYMAP_ENC(cp949, code, c) {
if (code & 0x8000) /* MSB set: CP949 */
return 1;
WRITE2(code >> 8, code & 0xff)
NEXT(1, 2)
} else
return 1;
}
}
return 0;
}
#define HAVE_DECODER_INIT
DECODER_INIT(iso2022_kr)
{
STATE_CLEARFLAGS(state)
STATE_SETG0(state, CHARSET_ASCII)
STATE_SETG1(state, CHARSET_ASCII)
return 0;
}
#define HAVE_DECODER_RESET
DECODER_RESET(iso2022_kr)
{
STATE_CLEARFLAG(state, F_SHIFTED)
return 0;
}
DECODER(iso2022_kr)
{
ISO2022_LOOP_BEGIN
unsigned char charset, c2;
ISO2022_GETCHARSET(charset, c)
if (charset & CHARSET_DOUBLEBYTE) {
/* all double byte character sets are in KS X 1001 here */
RESERVE_INBUF(2)
RESERVE_OUTBUF(1)
c2 = IN2;
if (c2 >= 0x80)
return 1;
TRYMAP_DEC(ksx1001, **outbuf, c, c2) {
NEXT(2, 1)
} else
return 2;
} else {
RESERVE_OUTBUF(1)
OUT1(c);
NEXT(1, 1)
}
ISO2022_LOOP_END
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(iso2022_kr)
MAPOPEN(ko_KR)
IMPORTMAP_DEC(ksx1001)
IMPORTMAP_ENC(cp949)
MAPCLOSE()
END_CODEC_REGISTRY(iso2022_kr)

View File

@ -1,223 +0,0 @@
/*
* _johab.c: the Johab codec
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _johab.c,v 1.3 2003/12/31 05:46:55 perky Exp $
*/
#include "codeccommon.h"
ENCMAP(cp949)
DECMAP(ksx1001)
static const unsigned char u2johabidx_choseong[32] = {
0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14,
};
static const unsigned char u2johabidx_jungseong[32] = {
0x03, 0x04, 0x05, 0x06, 0x07,
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x1a, 0x1b, 0x1c, 0x1d,
};
static const unsigned char u2johabidx_jongseong[32] = {
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
};
static const DBCHAR u2johabjamo[] = {
0x8841, 0x8c41, 0x8444, 0x9041, 0x8446, 0x8447, 0x9441,
0x9841, 0x9c41, 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f,
0x8450, 0xa041, 0xa441, 0xa841, 0x8454, 0xac41, 0xb041, 0xb441,
0xb841, 0xbc41, 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041, 0x8461,
0x8481, 0x84a1, 0x84c1, 0x84e1, 0x8541, 0x8561, 0x8581, 0x85a1,
0x85c1, 0x85e1, 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
0x8741, 0x8761, 0x8781, 0x87a1,
};
ENCODER(johab)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
RESERVE_OUTBUF(2)
if (c >= 0xac00 && c <= 0xd7a3) {
c -= 0xac00;
code = 0x8000 |
(u2johabidx_choseong[c / 588] << 10) |
(u2johabidx_jungseong[(c / 28) % 21] << 5) |
u2johabidx_jongseong[c % 28];
} else if (c >= 0x3131 && c <= 0x3163)
code = u2johabjamo[c - 0x3131];
else TRYMAP_ENC(cp949, code, c) {
unsigned char c1, c2, t2;
unsigned short t1;
assert((code & 0x8000) == 0);
c1 = code >> 8;
c2 = code & 0xff;
if (((c1 >= 0x21 && c1 <= 0x2c) || (c1 >= 0x4a && c1 <= 0x7d))
&& (c2 >= 0x21 && c2 <= 0x7e)) {
t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) : (c1 - 0x21 + 0x197));
t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
OUT1(t1 >> 1)
OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)
NEXT(1, 2)
continue;
} else
return 1;
} else
return 1;
OUT1(code >> 8)
OUT2(code & 0xff)
NEXT(1, 2)
}
return 0;
}
#define FILL 0xfd
#define NONE 0xff
static const unsigned char johabidx_choseong[32] = {
NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
0x0e, 0x0f, 0x10, 0x11, 0x12, NONE, NONE, NONE,
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
};
static const unsigned char johabidx_jungseong[32] = {
NONE, NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04,
NONE, NONE, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
NONE, NONE, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
NONE, NONE, 0x11, 0x12, 0x13, 0x14, NONE, NONE,
};
static const unsigned char johabidx_jongseong[32] = {
NONE, FILL, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
0x0f, 0x10, NONE, 0x11, 0x12, 0x13, 0x14, 0x15,
0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, NONE, NONE,
};
static const unsigned char johabjamo_choseong[32] = {
NONE, FILL, 0x31, 0x32, 0x34, 0x37, 0x38, 0x39,
0x41, 0x42, 0x43, 0x45, 0x46, 0x47, 0x48, 0x49,
0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE, NONE,
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
};
static const unsigned char johabjamo_jungseong[32] = {
NONE, NONE, FILL, 0x4f, 0x50, 0x51, 0x52, 0x53,
NONE, NONE, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
NONE, NONE, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
NONE, NONE, 0x60, 0x61, 0x62, 0x63, NONE, NONE,
};
static const unsigned char johabjamo_jongseong[32] = {
NONE, FILL, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
0x37, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
0x40, 0x41, NONE, 0x42, 0x44, 0x45, 0x46, 0x47,
0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE,
};
DECODER(johab)
{
while (inleft > 0) {
unsigned char c = IN1, c2;
RESERVE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
RESERVE_INBUF(2)
c2 = IN2;
if (c < 0xd8) {
/* johab hangul */
unsigned char c_cho, c_jung, c_jong;
unsigned char i_cho, i_jung, i_jong;
c_cho = (c >> 2) & 0x1f;
c_jung = ((c << 3) | c2 >> 5) & 0x1f;
c_jong = c2 & 0x1f;
i_cho = johabidx_choseong[c_cho];
i_jung = johabidx_jungseong[c_jung];
i_jong = johabidx_jongseong[c_jong];
if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
return 2;
/* we don't use U+1100 hangul jamo yet. */
if (i_cho == FILL) {
if (i_jung == FILL) {
if (i_jong == FILL)
OUT1(0x3000)
else
OUT1(0x3100 | johabjamo_jongseong[c_jong])
} else {
if (i_jong == FILL)
OUT1(0x3100 | johabjamo_jungseong[c_jung])
else
return 2;
}
} else {
if (i_jung == FILL) {
if (i_jong == FILL)
OUT1(0x3100 | johabjamo_choseong[c_cho])
else
return 2;
} else
OUT1(0xac00 +
i_cho * 588 +
i_jung * 28 +
(i_jong == FILL ? 0 : i_jong))
}
NEXT(2, 1)
} else {
/* KS X 1001 except hangul jamos and syllables */
if (c == 0xdf || c > 0xf9 ||
c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
(c2 & 0x7f) == 0x7f ||
(c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
return 2;
else {
unsigned char t1, t2;
t1 = (c < 0xe0 ? 2 * (c - 0xd9) : 2 * c - 0x197);
t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43);
t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
else return 2;
NEXT(2, 1)
}
}
}
return 0;
}
#undef NONE
#undef FILL
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(johab)
MAPOPEN(ko_KR)
IMPORTMAP_DEC(ksx1001)
IMPORTMAP_ENC(cp949)
MAPCLOSE()
END_CODEC_REGISTRY(johab)

View File

@ -1,121 +0,0 @@
/*
* _shift_jis.c: the SHIFT-JIS codec
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _shift_jis.c,v 1.4 2003/12/31 05:46:55 perky Exp $
*/
#include "codeccommon.h"
#include "alg_jisx0201.h"
ENCMAP(jisxcommon)
DECMAP(jisx0208)
ENCODER(shift_jis)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
unsigned char c1, c2;
#ifdef STRICT_BUILD
JISX0201_R_ENCODE(c, code)
#else
if (c < 0x80) code = c;
else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */
else if (c == 0x203e) code = 0x7e; /* OVERLINE */
#endif
else JISX0201_K_ENCODE(c, code)
else UCS4INVALID(c)
else code = NOCHAR;
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
RESERVE_OUTBUF(1)
OUT1((unsigned char)code)
NEXT(1, 1)
continue;
}
RESERVE_OUTBUF(2)
if (code == NOCHAR) {
TRYMAP_ENC(jisxcommon, code, c);
#ifndef STRICT_BUILD
else if (c == 0xff3c)
code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */
#endif
else
return 1;
if (code & 0x8000) /* MSB set: JIS X 0212 */
return 1;
}
c1 = code >> 8;
c2 = code & 0xff;
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
c1 = (c1 - 0x21) >> 1;
OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
NEXT(1, 2)
}
return 0;
}
DECODER(shift_jis)
{
while (inleft > 0) {
unsigned char c = IN1;
RESERVE_OUTBUF(1)
#ifdef STRICT_BUILD
JISX0201_R_DECODE(c, **outbuf)
#else
if (c < 0x80) **outbuf = c;
#endif
else JISX0201_K_DECODE(c, **outbuf)
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)) {
unsigned char c1, c2;
RESERVE_INBUF(2)
c2 = IN2;
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 2;
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21);
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
#ifndef STRICT_BUILD
if (c1 == 0x21 && c2 == 0x40) {
/* FULL-WIDTH REVERSE SOLIDUS */
OUT1(0xff3c)
NEXT(2, 1)
continue;
}
#endif
TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
NEXT(2, 1)
continue;
} else
return 2;
} else
return 2;
NEXT(1, 1) /* JIS X 0201 */
}
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(shift_jis)
MAPOPEN(ja_JP)
IMPORTMAP_DEC(jisx0208)
IMPORTMAP_ENC(jisxcommon)
MAPCLOSE()
END_CODEC_REGISTRY(shift_jis)

View File

@ -1,169 +0,0 @@
/*
* _shift_jisx0213.c: the SHIFT-JISX0213 codec
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _shift_jisx0213.c,v 1.2 2003/12/31 05:46:55 perky Exp $
*/
#define USING_BINARY_PAIR_SEARCH
#include "codeccommon.h"
#include "alg_jisx0201.h"
#include "map_jisx0213_pairs.h"
ENCMAP(jisxcommon)
DECMAP(jisx0208)
ENCMAP(jisx0213_bmp)
DECMAP(jisx0213_1_bmp)
DECMAP(jisx0213_2_bmp)
ENCMAP(jisx0213_emp)
DECMAP(jisx0213_1_emp)
DECMAP(jisx0213_2_emp)
#define EMPBASE 0x20000
ENCODER(shift_jisx0213)
{
while (inleft > 0) {
ucs4_t c = IN1;
DBCHAR code = NOCHAR;
int c1, c2;
size_t insize;
JISX0201_ENCODE(c, code)
else DECODE_SURROGATE(c)
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
WRITE1((unsigned char)code)
NEXT(1, 1)
continue;
}
RESERVE_OUTBUF(2)
insize = GET_INSIZE(c);
if (code == NOCHAR) {
if (c <= 0xffff) {
TRYMAP_ENC(jisx0213_bmp, code, c) {
if (code == MULTIC) {
if (inleft < 2) {
if (flags & MBENC_FLUSH) {
code = find_pairencmap((ucs2_t)c, 0,
jisx0213_pairencmap, JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
} else
return MBERR_TOOFEW;
} else {
code = find_pairencmap((ucs2_t)c, IN2,
jisx0213_pairencmap, JISX0213_ENCPAIRS);
if (code == DBCINV) {
code = find_pairencmap((ucs2_t)c, 0,
jisx0213_pairencmap, JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
} else
insize = 2;
}
}
} else TRYMAP_ENC(jisxcommon, code, c) {
if (code & 0x8000)
return 1; /* abandon JIS X 0212 codes */
} else return 1;
} else if (c >> 16 == EMPBASE >> 16) {
TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
else return insize;
} else
return insize;
}
c1 = code >> 8;
c2 = (code & 0xff) - 0x21;
if (c1 & 0x80) { /* Plane 2 */
if (c1 >= 0xee) c1 -= 0x87;
else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49;
else c1 -= 0x43;
} else /* Plane 1 */
c1 -= 0x21;
if (c1 & 1) c2 += 0x5e;
c1 >>= 1;
OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1))
OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41))
NEXT(insize, 2)
}
return 0;
}
DECODER(shift_jisx0213)
{
while (inleft > 0) {
unsigned char c = IN1;
RESERVE_OUTBUF(1)
JISX0201_DECODE(c, **outbuf)
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)) {
unsigned char c1, c2 = IN2;
ucs4_t code;
RESERVE_INBUF(2)
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 2;
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1));
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
if (c1 < 0x5e) { /* Plane 1 */
c1 += 0x21;
TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
NEXT_OUT(1)
} else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c1, c2) {
NEXT_OUT(1)
} else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) {
PUTUCS4(EMPBASE | code)
} else TRYMAP_DEC(jisx0213_pair, code, c1, c2) {
WRITE2(code >> 16, code & 0xffff)
NEXT_OUT(2)
} else
return 2;
NEXT_IN(2)
} else { /* Plane 2 */
if (c1 >= 0x67) c1 += 0x07;
else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;
else c1 -= 0x3d;
TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c1, c2) {
NEXT_OUT(1)
} else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) {
PUTUCS4(EMPBASE | code)
} else
return 2;
NEXT_IN(2)
}
continue;
} else
return 2;
NEXT(1, 1) /* JIS X 0201 */
}
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(shift_jisx0213)
MAPOPEN(ja_JP)
IMPORTMAP_DEC(jisx0208)
IMPORTMAP_ENC(jisxcommon)
IMPORTMAP_ENC(jisx0213_bmp)
IMPORTMAP_DEC(jisx0213_1_bmp)
IMPORTMAP_DEC(jisx0213_2_bmp)
IMPORTMAP_ENC(jisx0213_emp)
IMPORTMAP_DEC(jisx0213_1_emp)
IMPORTMAP_DEC(jisx0213_2_emp)
MAPCLOSE()
END_CODEC_REGISTRY(shift_jisx0213)

View File

@ -1,12 +0,0 @@
/*
* alg_iso8859_1.c: Encoder/Decoder macro for ISO8859-1
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: alg_iso8859_1.h,v 1.3 2003/12/31 05:46:55 perky Exp $
*/
#define ISO8859_1_ENCODE(c, assi) \
if ((c) <= 0xff) (assi) = (c);
#define ISO8859_1_DECODE(c, assi) \
if (1/*(c) <= 0xff*/) (assi) = (c);

View File

@ -1,32 +0,0 @@
/*
* alg_iso8859_7.c: Encoder/Decoder macro for ISO8859-7
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: alg_iso8859_7.h,v 1.2 2003/12/31 05:46:55 perky Exp $
*/
/*
* 0x2888fbc9 and 0xbffffd77 are magic number that indicates availability
* of mapping for each differences. (0 and 0x2d0)
*/
#define ISO8859_7_ENCODE(c, assi) \
if ((c) <= 0xa0) (assi) = (c); \
else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \
(assi) = (c); \
else if ((c) >= 0x0384 && (c) <= 0x03ce && ((c) >= 0x03a4 ||\
(0xbffffd77L & (1L << ((c)-0x0384))))) \
(assi) = (c) - 0x02d0; \
else if ((c)>>1 == 0x2018>>1) (assi) = (c) - 0x1f77; \
else if ((c) == 0x2015) (assi) = 0xaf;
#define ISO8859_7_DECODE(c, assi) \
if ((c) < 0xa0) (assi) = (c); \
else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \
(assi) = (c); \
else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \
(0xbffffd77L & (1L << ((c)-0xb4))))) \
(assi) = 0x02d0 + (c); \
else if ((c) == 0xa1) (assi) = 0x2018; \
else if ((c) == 0xa2) (assi) = 0x2019; \
else if ((c) == 0xaf) (assi) = 0x2015;

View File

@ -1,27 +1,26 @@
/* $CJKCodecs: alg_jisx0201.h,v 1.2 2003/11/27 16:42:20 perky Exp $ */
/* $CJKCodecs: alg_jisx0201.h,v 1.2 2004/06/29 05:42:08 perky Exp $ */
#define JISX0201_R_ENCODE(c, assi) \
if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) \
(assi) = (c); \
else if ((c) == 0x00a5) (assi) = 0x5c; \
else if ((c) == 0x203e) (assi) = 0x7e;
#define JISX0201_K_ENCODE(c, assi) \
if ((c) >= 0xff61 && (c) <= 0xff9f) \
(assi) = (c) - 0xfec0;
#define JISX0201_ENCODE(c, assi) \
JISX0201_R_ENCODE(c, assi) \
else JISX0201_K_ENCODE(c, assi)
#define JISX0201_R_DECODE(c, assi) \
if ((c) < 0x5c) (assi) = (c); \
else if ((c) == 0x5c) (assi) = 0x00a5; \
else if ((c) < 0x7e) (assi) = (c); \
else if ((c) == 0x7e) (assi) = 0x203e; \
else if ((c) == 0x7f) (assi) = 0x7f;
#define JISX0201_K_DECODE(c, assi) \
if ((c) >= 0xa1 && (c) <= 0xdf) \
(assi) = 0xfec0 + (c);
#define JISX0201_DECODE(c, assi) \
JISX0201_R_DECODE(c, assi) \
else JISX0201_K_DECODE(c, assi)
#define JISX0201_R_ENCODE(c, assi) \
if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) \
(assi) = (c); \
else if ((c) == 0x00a5) (assi) = 0x5c; \
else if ((c) == 0x203e) (assi) = 0x7e;
#define JISX0201_K_ENCODE(c, assi) \
if ((c) >= 0xff61 && (c) <= 0xff9f) \
(assi) = (c) - 0xfec0;
#define JISX0201_ENCODE(c, assi) \
JISX0201_R_ENCODE(c, assi) \
else JISX0201_K_ENCODE(c, assi)
#define JISX0201_R_DECODE(c, assi) \
if ((c) < 0x5c) (assi) = (c); \
else if ((c) == 0x5c) (assi) = 0x00a5; \
else if ((c) < 0x7e) (assi) = (c); \
else if ((c) == 0x7e) (assi) = 0x203e; \
else if ((c) == 0x7f) (assi) = 0x7f;
#define JISX0201_K_DECODE(c, assi) \
if ((c) >= 0xa1 && (c) <= 0xdf) \
(assi) = 0xfec0 + (c);
#define JISX0201_DECODE(c, assi) \
JISX0201_R_DECODE(c, assi) \
else JISX0201_K_DECODE(c, assi)

View File

@ -0,0 +1,399 @@
/*
* cjkcodecs.h: common header for cjkcodecs
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: cjkcodecs.h,v 1.5 2004/07/06 17:05:24 perky Exp $
*/
#ifndef _CJKCODECS_H_
#define _CJKCODECS_H_
#include "Python.h"
#include "multibytecodec.h"
#define UNIINV Py_UNICODE_REPLACEMENT_CHARACTER
#define NOCHAR 0xFFFF
#define MULTIC 0xFFFE
#define DBCINV 0xFFFD
/* shorter macros to save source size of mapping tables */
#define U UNIINV
#define N NOCHAR
#define M MULTIC
#define D DBCINV
struct dbcs_index {
const ucs2_t *map;
unsigned char bottom, top;
};
typedef struct dbcs_index decode_map;
struct widedbcs_index {
const ucs4_t *map;
unsigned char bottom, top;
};
typedef struct widedbcs_index widedecode_map;
struct unim_index {
const DBCHAR *map;
unsigned char bottom, top;
};
typedef struct unim_index encode_map;
struct unim_index_bytebased {
const unsigned char *map;
unsigned char bottom, top;
};
struct dbcs_map {
const char *charset;
const struct unim_index *encmap;
const struct dbcs_index *decmap;
};
struct pair_encodemap {
ucs4_t uniseq;
DBCHAR code;
};
static const MultibyteCodec codec_list[];
static const struct dbcs_map mapping_list[];
#define CODEC_INIT(encoding) \
static int encoding##_codec_init(const void *config)
#define ENCODER_INIT(encoding) \
static int encoding##_encode_init( \
MultibyteCodec_State *state, const void *config)
#define ENCODER(encoding) \
static int encoding##_encode( \
MultibyteCodec_State *state, const void *config, \
const Py_UNICODE **inbuf, size_t inleft, \
unsigned char **outbuf, size_t outleft, int flags)
#define ENCODER_RESET(encoding) \
static int encoding##_encode_reset( \
MultibyteCodec_State *state, const void *config, \
unsigned char **outbuf, size_t outleft)
#define DECODER_INIT(encoding) \
static int encoding##_decode_init( \
MultibyteCodec_State *state, const void *config)
#define DECODER(encoding) \
static int encoding##_decode( \
MultibyteCodec_State *state, const void *config, \
const unsigned char **inbuf, size_t inleft, \
Py_UNICODE **outbuf, size_t outleft)
#define DECODER_RESET(encoding) \
static int encoding##_decode_reset( \
MultibyteCodec_State *state, const void *config)
#if Py_UNICODE_SIZE == 4
#define UCS4INVALID(code) \
if ((code) > 0xFFFF) \
return 1;
#else
#define UCS4INVALID(code) \
if (0) ;
#endif
#define NEXT_IN(i) \
(*inbuf) += (i); \
(inleft) -= (i);
#define NEXT_OUT(o) \
(*outbuf) += (o); \
(outleft) -= (o);
#define NEXT(i, o) \
NEXT_IN(i) NEXT_OUT(o)
#define REQUIRE_INBUF(n) \
if (inleft < (n)) \
return MBERR_TOOFEW;
#define REQUIRE_OUTBUF(n) \
if (outleft < (n)) \
return MBERR_TOOSMALL;
#define IN1 ((*inbuf)[0])
#define IN2 ((*inbuf)[1])
#define IN3 ((*inbuf)[2])
#define IN4 ((*inbuf)[3])
#define OUT1(c) ((*outbuf)[0]) = (c);
#define OUT2(c) ((*outbuf)[1]) = (c);
#define OUT3(c) ((*outbuf)[2]) = (c);
#define OUT4(c) ((*outbuf)[3]) = (c);
#define WRITE1(c1) \
REQUIRE_OUTBUF(1) \
(*outbuf)[0] = (c1);
#define WRITE2(c1, c2) \
REQUIRE_OUTBUF(2) \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2);
#define WRITE3(c1, c2, c3) \
REQUIRE_OUTBUF(3) \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2); \
(*outbuf)[2] = (c3);
#define WRITE4(c1, c2, c3, c4) \
REQUIRE_OUTBUF(4) \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2); \
(*outbuf)[2] = (c3); \
(*outbuf)[3] = (c4);
#if Py_UNICODE_SIZE == 2
# define WRITEUCS4(c) \
REQUIRE_OUTBUF(2) \
(*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \
(*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \
NEXT_OUT(2)
#else
# define WRITEUCS4(c) \
REQUIRE_OUTBUF(1) \
**outbuf = (Py_UNICODE)(c); \
NEXT_OUT(1)
#endif
#define _TRYMAP_ENC(m, assi, val) \
if ((m)->map != NULL && (val) >= (m)->bottom && \
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
(m)->bottom]) != NOCHAR)
#define TRYMAP_ENC(charset, assi, uni) \
_TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff)
#define _TRYMAP_DEC(m, assi, val) \
if ((m)->map != NULL && (val) >= (m)->bottom && \
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
(m)->bottom]) != UNIINV)
#define TRYMAP_DEC(charset, assi, c1, c2) \
_TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \
if ((m)->map != NULL && (val) >= (m)->bottom && \
(val)<= (m)->top && \
((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \
(((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \
(((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1))
#define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \
_TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \
assplane, asshi, asslo, (uni) & 0xff)
#define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) \
_TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2)
#if Py_UNICODE_SIZE == 2
#define DECODE_SURROGATE(c) \
if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \
REQUIRE_INBUF(2) \
if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \
c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \
((ucs4_t)(IN2) - 0xdc00); \
} \
}
#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1)
#else
#define DECODE_SURROGATE(c) {;}
#define GET_INSIZE(c) 1
#endif
#define BEGIN_MAPPINGS_LIST static const struct dbcs_map mapping_list[] = {
#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
#define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},
#define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap},
#define END_MAPPINGS_LIST {"", NULL, NULL} };
#define BEGIN_CODECS_LIST static const MultibyteCodec codec_list[] = {
#define _STATEFUL_METHODS(enc) \
enc##_encode, \
enc##_encode_init, \
enc##_encode_reset, \
enc##_decode, \
enc##_decode_init, \
enc##_decode_reset,
#define _STATELESS_METHODS(enc) \
enc##_encode, NULL, NULL, \
enc##_decode, NULL, NULL,
#define CODEC_STATEFUL(enc) { \
#enc, NULL, NULL, \
_STATEFUL_METHODS(enc) \
},
#define CODEC_STATELESS(enc) { \
#enc, NULL, NULL, \
_STATELESS_METHODS(enc) \
},
#define CODEC_STATELESS_WINIT(enc) { \
#enc, NULL, \
enc##_codec_init, \
_STATELESS_METHODS(enc) \
},
#define END_CODECS_LIST {"", NULL,} };
static PyObject *
getmultibytecodec(void)
{
static PyObject *cofunc = NULL;
if (cofunc == NULL) {
PyObject *mod = PyImport_ImportModule("_multibytecodec");
if (mod == NULL)
return NULL;
cofunc = PyObject_GetAttrString(mod, "__create_codec");
Py_DECREF(mod);
}
return cofunc;
}
static PyObject *
getcodec(PyObject *self, PyObject *encoding)
{
PyObject *codecobj, *r, *cofunc;
const MultibyteCodec *codec;
const char *enc;
#ifdef NO_METH_O
PyObject *args = encoding;
if (!PyArg_ParseTuple(args, "O:getcodec", &encoding))
return NULL;
#endif
if (!PyString_Check(encoding)) {
PyErr_SetString(PyExc_TypeError,
"encoding name must be a string.");
return NULL;
}
cofunc = getmultibytecodec();
if (cofunc == NULL)
return NULL;
enc = PyString_AS_STRING(encoding);
for (codec = codec_list; codec->encoding[0]; codec++)
if (strcmp(codec->encoding, enc) == 0)
break;
if (codec->encoding[0] == '\0') {
PyErr_SetString(PyExc_LookupError,
"no such codec is supported.");
return NULL;
}
codecobj = PyCObject_FromVoidPtr((void *)codec, NULL);
if (codecobj == NULL)
return NULL;
#if PY_VERSION_HEX >= 0x02020000
r = PyObject_CallFunctionObjArgs(cofunc, codecobj, NULL);
#else
r = PyObject_CallFunction(cofunc, "O", codecobj);
#endif
Py_DECREF(codecobj);
return r;
}
static struct PyMethodDef __methods[] = {
#ifndef NO_METH_O
{"getcodec", (PyCFunction)getcodec, METH_O, ""},
#else
{"getcodec", (PyCFunction)getcodec, METH_VARARGS, ""},
#endif
{NULL, NULL},
};
static int
register_maps(PyObject *module)
{
const struct dbcs_map *h;
for (h = mapping_list; h->charset[0] != '\0'; h++) {
char mhname[256] = "__map_";
int r;
strcpy(mhname + sizeof("__map_") - 1, h->charset);
r = PyModule_AddObject(module, mhname,
PyCObject_FromVoidPtr((void *)h, NULL));
if (r == -1)
return -1;
}
return 0;
}
#ifdef USING_BINARY_PAIR_SEARCH
static DBCHAR
find_pairencmap(ucs2_t body, ucs2_t modifier,
const struct pair_encodemap *haystack, int haystacksize)
{
int pos, min, max;
ucs4_t value = body << 16 | modifier;
min = 0;
max = haystacksize;
for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1)
if (value < haystack[pos].uniseq) {
if (max == pos) break;
else max = pos;
}
else if (value > haystack[pos].uniseq) {
if (min == pos) break;
else min = pos;
}
else
break;
if (value == haystack[pos].uniseq)
return haystack[pos].code;
else
return DBCINV;
}
#endif
#ifdef USING_IMPORTED_MAPS
#define IMPORT_MAP(locale, charset, encmap, decmap) \
importmap("_codecs_" #locale, "__map_" #charset, \
(const void**)encmap, (const void**)decmap)
static int
importmap(const char *modname, const char *symbol,
const void **encmap, const void **decmap)
{
PyObject *o, *mod;
mod = PyImport_ImportModule((char *)modname);
if (mod == NULL)
return -1;
o = PyObject_GetAttrString(mod, (char*)symbol);
if (o == NULL)
goto errorexit;
else if (!PyCObject_Check(o)) {
PyErr_SetString(PyExc_ValueError,
"map data must be a CObject.");
goto errorexit;
}
else {
struct dbcs_map *map;
map = PyCObject_AsVoidPtr(o);
if (encmap != NULL)
*encmap = map->encmap;
if (decmap != NULL)
*decmap = map->decmap;
Py_DECREF(o);
}
Py_DECREF(mod);
return 0;
errorexit:
Py_DECREF(mod);
return -1;
}
#endif
#define I_AM_A_MODULE_FOR(loc) \
void \
init_codecs_##loc(void) \
{ \
PyObject *m = Py_InitModule("_codecs_" #loc, __methods);\
(void)register_maps(m); \
}
#endif

View File

@ -1,57 +0,0 @@
/*
* cjkcommon.h: Common Constants and Macroes for CJK Character Sets
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: cjkcommon.h,v 1.2 2003/12/31 05:46:55 perky Exp $
*/
#ifndef _CJKCOMMON_H_
#define _CJKCOMMON_H_
#ifdef uint32_t
typedef uint32_t ucs4_t;
#else
typedef unsigned int ucs4_t;
#endif
#ifdef uint16_t
typedef uint16_t ucs2_t, DBCHAR;
#else
typedef unsigned short ucs2_t, DBCHAR;
#endif
#define UNIINV Py_UNICODE_REPLACEMENT_CHARACTER
#define NOCHAR 0xFFFF
#define MULTIC 0xFFFE
#define DBCINV 0xFFFD
struct dbcs_index {
const ucs2_t *map;
unsigned char bottom, top;
};
typedef struct dbcs_index decode_map;
struct widedbcs_index {
const ucs4_t *map;
unsigned char bottom, top;
};
typedef struct widedbcs_index widedecode_map;
struct unim_index {
const DBCHAR *map;
unsigned char bottom, top;
};
typedef struct unim_index encode_map;
struct dbcs_map {
const char *charset;
const struct unim_index *encmap;
const struct dbcs_index *decmap;
};
struct pair_encodemap {
ucs4_t uniseq;
DBCHAR code;
};
#endif

View File

@ -1,144 +0,0 @@
/*
* codeccommon.h: Common Codec Routines
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: codeccommon.h,v 1.4 2003/12/31 05:46:55 perky Exp $
*/
#include "Python.h"
#include "multibytecodec.h"
#ifdef STRICT_BUILD
#define STRICT_SUFX "_strict"
#else
#define STRICT_SUFX
#endif
#define ENCMAP(encoding) \
static const encode_map *encoding##encmap;
#define DECMAP(encoding) \
static const decode_map *encoding##decmap;
#define ENCODER_INIT(encoding) \
static int encoding##_encode_init( \
MultibyteCodec_State *state)
#define ENCODER(encoding) \
static int encoding##_encode( \
MultibyteCodec_State *state, \
const Py_UNICODE **inbuf, size_t inleft, \
unsigned char **outbuf, size_t outleft, int flags)
#define ENCODER_RESET(encoding) \
static int encoding##_encode_reset( \
MultibyteCodec_State *state, \
unsigned char **outbuf, size_t outleft)
#define DECODER_INIT(encoding) \
static int encoding##_decode_init( \
MultibyteCodec_State *state)
#define DECODER(encoding) \
static int encoding##_decode( \
MultibyteCodec_State *state, \
const unsigned char **inbuf, size_t inleft, \
Py_UNICODE **outbuf, size_t outleft)
#define DECODER_RESET(encoding) \
static int encoding##_decode_reset( \
MultibyteCodec_State *state)
#if Py_UNICODE_SIZE == 4
#define UCS4INVALID(code) \
if ((code) > 0xFFFF) \
return 1;
#else
#define UCS4INVALID(code) \
if (0) ;
#endif
#define NEXT_IN(i) \
(*inbuf) += (i); \
(inleft) -= (i);
#define NEXT_OUT(o) \
(*outbuf) += (o); \
(outleft) -= (o);
#define NEXT(i, o) NEXT_IN(i) NEXT_OUT(o)
#define RESERVE_INBUF(n) \
if (inleft < (n)) \
return MBERR_TOOFEW;
#define RESERVE_OUTBUF(n) \
if (outleft < (n)) \
return MBERR_TOOSMALL;
#define IN1 ((*inbuf)[0])
#define IN2 ((*inbuf)[1])
#define IN3 ((*inbuf)[2])
#define IN4 ((*inbuf)[3])
#define OUT1(c) ((*outbuf)[0]) = (c);
#define OUT2(c) ((*outbuf)[1]) = (c);
#define OUT3(c) ((*outbuf)[2]) = (c);
#define OUT4(c) ((*outbuf)[3]) = (c);
#define WRITE1(c1) \
RESERVE_OUTBUF(1) \
(*outbuf)[0] = (c1);
#define WRITE2(c1, c2) \
RESERVE_OUTBUF(2) \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2);
#define WRITE3(c1, c2, c3) \
RESERVE_OUTBUF(3) \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2); \
(*outbuf)[2] = (c3);
#define WRITE4(c1, c2, c3, c4) \
RESERVE_OUTBUF(4) \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2); \
(*outbuf)[2] = (c3); \
(*outbuf)[3] = (c4);
#if Py_UNICODE_SIZE == 2
# define PUTUCS4(c) \
RESERVE_OUTBUF(2) \
(*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \
(*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \
NEXT_OUT(2)
#else
# define PUTUCS4(c) \
RESERVE_OUTBUF(1) \
**outbuf = (Py_UNICODE)(c); \
NEXT_OUT(1)
#endif
#define _TRYMAP_ENC(m, assi, val) \
if ((m)->map != NULL && (val) >= (m)->bottom && \
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
(m)->bottom]) != NOCHAR)
#define TRYMAP_ENC(charset, assi, uni) \
_TRYMAP_ENC(&charset##encmap[(uni) >> 8], assi, (uni) & 0xff)
#define _TRYMAP_DEC(m, assi, val) \
if ((m)->map != NULL && (val) >= (m)->bottom && \
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
(m)->bottom]) != UNIINV)
#define TRYMAP_DEC(charset, assi, c1, c2) \
_TRYMAP_DEC(&charset##decmap[c1], assi, c2)
#if Py_UNICODE_SIZE == 2
#define DECODE_SURROGATE(c) \
if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \
RESERVE_INBUF(2) \
if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \
c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \
((ucs4_t)(IN2) - 0xdc00); \
} \
}
#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1)
#else
#define DECODE_SURROGATE(c) {;}
#define GET_INSIZE(c) 1
#endif
#ifdef USING_BINARY_PAIR_SEARCH
static DBCHAR find_pairencmap(ucs2_t , ucs2_t, struct pair_encodemap *, int);
#endif

View File

@ -1,177 +0,0 @@
/*
* codecentry.h: Common Codec Entry Routines
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: codecentry.h,v 1.5 2004/01/17 11:26:10 perky Exp $
*/
#ifdef HAVE_ENCODER_INIT
#define ENCODER_INIT_FUNC(encoding) encoding##_encode_init
#else
#define ENCODER_INIT_FUNC(encoding) NULL
#endif
#ifdef HAVE_ENCODER_RESET
#define ENCODER_RESET_FUNC(encoding) encoding##_encode_reset
#else
#define ENCODER_RESET_FUNC(encoding) NULL
#endif
#ifdef HAVE_DECODER_INIT
#define DECODER_INIT_FUNC(encoding) encoding##_decode_init
#else
#define DECODER_INIT_FUNC(encoding) NULL
#endif
#ifdef HAVE_DECODER_RESET
#define DECODER_RESET_FUNC(encoding) encoding##_decode_reset
#else
#define DECODER_RESET_FUNC(encoding) NULL
#endif
#ifdef STRICT_BUILD
#define BEGIN_CODEC_REGISTRY(encoding) \
__BEGIN_CODEC_REGISTRY(encoding, init_codecs_##encoding##_strict)
#else
#define BEGIN_CODEC_REGISTRY(encoding) \
__BEGIN_CODEC_REGISTRY(encoding, init_codecs_##encoding)
#endif
#define __BEGIN_CODEC_REGISTRY(encoding, initname) \
static MultibyteCodec __codec = { \
#encoding STRICT_SUFX, \
encoding##_encode, \
ENCODER_INIT_FUNC(encoding), \
ENCODER_RESET_FUNC(encoding), \
encoding##_decode, \
DECODER_INIT_FUNC(encoding), \
DECODER_RESET_FUNC(encoding), \
}; \
\
static struct PyMethodDef __methods[] = { \
{NULL, NULL}, \
}; \
\
void \
initname(void) \
{ \
PyObject *codec; \
PyObject *m = NULL, *mod = NULL, *o = NULL; \
\
m = Py_InitModule("_codecs_" #encoding STRICT_SUFX, __methods);
#define MAPOPEN(locale) \
mod = PyImport_ImportModule("_codecs_mapdata_" #locale);\
if (mod == NULL) goto errorexit; \
if (
#define IMPORTMAP_ENCDEC(charset) \
importmap(mod, "__map_" #charset, &charset##encmap, \
&charset##decmap) ||
#define IMPORTMAP_ENC(charset) \
importmap(mod, "__map_" #charset, &charset##encmap, \
NULL) ||
#define IMPORTMAP_DEC(charset) \
importmap(mod, "__map_" #charset, NULL, \
&charset##decmap) ||
#define MAPCLOSE() \
0) goto errorexit; \
Py_DECREF(mod);
#define END_CODEC_REGISTRY(encoding) \
mod = PyImport_ImportModule("_multibytecodec"); \
if (mod == NULL) goto errorexit; \
o = PyObject_GetAttrString(mod, "__create_codec"); \
if (o == NULL || !PyCallable_Check(o)) \
goto errorexit; \
\
codec = createcodec(o, &__codec); \
if (codec == NULL) \
goto errorexit; \
PyModule_AddObject(m, "codec", codec); \
Py_DECREF(o); Py_DECREF(mod); \
\
if (PyErr_Occurred()) \
Py_FatalError("can't initialize the _" #encoding \
STRICT_SUFX " module"); \
\
return; \
\
errorexit: \
Py_XDECREF(m); \
Py_XDECREF(mod); \
Py_XDECREF(o); \
}
#define CODEC_REGISTRY(encoding) \
BEGIN_CODEC_REGISTRY(encoding) \
END_CODEC_REGISTRY(encoding)
#ifdef USING_BINARY_PAIR_SEARCH
static DBCHAR
find_pairencmap(ucs2_t body, ucs2_t modifier,
struct pair_encodemap *haystack, int haystacksize)
{
int pos, min, max;
ucs4_t value = body << 16 | modifier;
min = 0;
max = haystacksize;
for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1)
if (value < haystack[pos].uniseq) {
if (max == pos) break;
else max = pos;
} else if (value > haystack[pos].uniseq) {
if (min == pos) break;
else min = pos;
} else
break;
if (value == haystack[pos].uniseq)
return haystack[pos].code;
else
return DBCINV;
}
#endif
#ifndef CODEC_WITHOUT_MAPS
static int
importmap(PyObject *mod, const char *symbol,
const struct unim_index **encmap, const struct dbcs_index **decmap)
{
PyObject *o;
o = PyObject_GetAttrString(mod, (char*)symbol);
if (o == NULL)
return -1;
else if (!PyCObject_Check(o)) {
PyErr_SetString(PyExc_ValueError, "map data must be a CObject.");
return -1;
} else {
struct dbcs_map *map;
map = PyCObject_AsVoidPtr(o);
if (encmap != NULL)
*encmap = map->encmap;
if (decmap != NULL)
*decmap = map->decmap;
Py_DECREF(o);
}
return 0;
}
#endif
static PyObject *
createcodec(PyObject *cofunc, MultibyteCodec *codec)
{
PyObject *args, *r;
args = PyTuple_New(1);
if (args == NULL) return NULL;
PyTuple_SET_ITEM(args, 0, PyCObject_FromVoidPtr(codec, NULL));
r = PyObject_CallObject(cofunc, args);
Py_DECREF(args);
return r;
}

View File

@ -0,0 +1,45 @@
/* $CJKCodecs: emu_jisx0213_2000.h,v 1.3 2004/07/08 02:53:37 perky Exp $ */
/* These routines may be quite inefficient, but it's used only to emulate old
* standards. */
#ifndef EMULATE_JISX0213_2000_ENCODE_INVALID
#define EMULATE_JISX0213_2000_ENCODE_INVALID 1
#endif
#define EMULATE_JISX0213_2000_ENCODE_BMP(assi, c) \
if (config == (void *)2000 && ( \
(c) == 0x9B1C || (c) == 0x4FF1 || \
(c) == 0x525D || (c) == 0x541E || \
(c) == 0x5653 || (c) == 0x59F8 || \
(c) == 0x5C5B || (c) == 0x5E77 || \
(c) == 0x7626 || (c) == 0x7E6B)) \
return EMULATE_JISX0213_2000_ENCODE_INVALID; \
else if (config == (void *)2000 && (c) == 0x9B1D) \
(assi) = 0x8000 | 0x7d3b; \
#define EMULATE_JISX0213_2000_ENCODE_EMP(assi, c) \
if (config == (void *)2000 && (c) == 0x20B9F) \
return EMULATE_JISX0213_2000_ENCODE_INVALID;
#ifndef EMULATE_JISX0213_2000_DECODE_INVALID
#define EMULATE_JISX0213_2000_DECODE_INVALID 2
#endif
#define EMULATE_JISX0213_2000_DECODE_PLANE1(assi, c1, c2) \
if (config == (void *)2000 && \
(((c1) == 0x2E && (c2) == 0x21) || \
((c1) == 0x2F && (c2) == 0x7E) || \
((c1) == 0x4F && (c2) == 0x54) || \
((c1) == 0x4F && (c2) == 0x7E) || \
((c1) == 0x74 && (c2) == 0x27) || \
((c1) == 0x7E && (c2) == 0x7A) || \
((c1) == 0x7E && (c2) == 0x7B) || \
((c1) == 0x7E && (c2) == 0x7C) || \
((c1) == 0x7E && (c2) == 0x7D) || \
((c1) == 0x7E && (c2) == 0x7E))) \
return EMULATE_JISX0213_2000_DECODE_INVALID;
#define EMULATE_JISX0213_2000_DECODE_PLANE2(assi, c1, c2) \
if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) \
(assi) = 0x9B1D;

View File

@ -1,256 +0,0 @@
/*
* iso2022common.h: Common Codec Routines for ISO-2022 codecs.
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: iso2022common.h,v 1.8 2003/12/31 05:46:55 perky Exp $
*/
/* This ISO-2022 implementation is intended to comply ECMA-43 Level 1
* rather than RFCs itself */
#define ESC 0x1b
#define SO 0x0e
#define SI 0x0f
#define MAX_ESCSEQLEN 16
#define IS_ESCEND(c) (((c) >= 'A' && (c) <= 'Z') || (c) == '@')
#define IS_ISO2022ESC(c2) ((c2) == '(' || (c2) == ')' || (c2) == '$' || \
(c2) == '.' || (c2) == '&')
/* this is not a full list of ISO-2022 escape sequence headers.
* but, it's enough to implement CJK instances of iso-2022. */
/* STATE
state->c[0-3]
00000000
||^^^^^|
|+-----+---- G0-3 Character Set
+----------- Is G0-3 double byte?
state->c[4]
00000000
||
|+---- Locked-Shift?
+----- ESC Throughout
*/
#define CHARSET_DOUBLEBYTE 0x80
#define CHARSET_ASCII 'B'
#define CHARSET_ISO8859_1 'A'
#define CHARSET_ISO8859_7 'F'
#define CHARSET_KSX1001 ('C'|CHARSET_DOUBLEBYTE)
#define CHARSET_JISX0201_R 'J'
#define CHARSET_JISX0201_K 'I'
#define CHARSET_JISX0208 ('B'|CHARSET_DOUBLEBYTE)
#define CHARSET_JISX0208_O ('@'|CHARSET_DOUBLEBYTE)
#define CHARSET_JISX0212 ('D'|CHARSET_DOUBLEBYTE)
#define CHARSET_JISX0213_1 ('O'|CHARSET_DOUBLEBYTE)
#define CHARSET_JISX0213_2 ('P'|CHARSET_DOUBLEBYTE)
#define CHARSET_GB2312 ('A'|CHARSET_DOUBLEBYTE)
#define CHARSET_GB2312_8565 ('E'|CHARSET_DOUBLEBYTE)
#define CHARSET_DESIGN(c) ((c) & 0x7f)
#define CHARSET_ISDBCS(c) ((c) & 0x80)
#define F_SHIFTED 0x01
#define F_ESCTHROUGHOUT 0x02
#define STATE_SETG(dn, s, v) ((s)->c[dn]) = (v);
#define STATE_GETG(dn, s) ((s)->c[dn])
#define STATE_SETG0(s, v) STATE_SETG(0, s, v)
#define STATE_GETG0(s) STATE_GETG(0, s)
#define STATE_SETG1(s, v) STATE_SETG(1, s, v)
#define STATE_GETG1(s) STATE_GETG(1, s)
#define STATE_SETG2(s, v) STATE_SETG(2, s, v)
#define STATE_GETG2(s) STATE_GETG(2, s)
#define STATE_SETG3(s, v) STATE_SETG(3, s, v)
#define STATE_GETG3(s) STATE_GETG(3, s)
#define STATE_SETFLAG(s, f) ((s)->c[4]) |= (f);
#define STATE_GETFLAG(s, f) ((s)->c[4] & (f))
#define STATE_CLEARFLAG(s, f) ((s)->c[4]) &= ~(f);
#define STATE_CLEARFLAGS(s) ((s)->c[4]) = 0;
#define ISO2022_GETCHARSET(charset, c1) \
if ((c) >= 0x80) \
return 1; \
if (STATE_GETFLAG(state, F_SHIFTED)) /* G1 */ \
(charset) = STATE_GETG1(state); \
else /* G1 */ \
(charset) = STATE_GETG0(state); \
#ifdef ISO2022_USE_G2_DESIGNATION
/* hardcoded for iso-2022-jp-2 for now. we'll need to generalize it
when we have more G2 designating encodings */
#define SS2_ROUTINE \
if (IN2 == 'N') { /* SS2 */ \
RESERVE_INBUF(3) \
if (STATE_GETG2(state) == CHARSET_ISO8859_1) { \
ISO8859_1_DECODE(IN3 ^ 0x80, **outbuf) \
else return 3; \
} else if (STATE_GETG2(state) == CHARSET_ISO8859_7) { \
ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf) \
else return 3; \
} else if (STATE_GETG2(state) == CHARSET_ASCII) { \
if (IN3 & 0x80) return 3; \
else **outbuf = IN3; \
} else \
return MBERR_INTERNAL; \
NEXT(3, 1) \
} else
#else
#define SS2_ROUTINE
#endif
#ifndef ISO2022_NO_SHIFT
#define SHIFT_CASES \
case SI: \
STATE_CLEARFLAG(state, F_SHIFTED) \
NEXT_IN(1) \
break; \
case SO: \
STATE_SETFLAG(state, F_SHIFTED) \
NEXT_IN(1) \
break;
#else
/* for compatibility with JapaneseCodecs */
#define SHIFT_CASES
#endif
#define ISO2022_BASECASES(c1) \
case ESC: \
RESERVE_INBUF(2) \
if (IS_ISO2022ESC(IN2)) { \
int err; \
err = iso2022processesc(state, inbuf, &inleft); \
if (err != 0) \
return err; \
} else SS2_ROUTINE { \
STATE_SETFLAG(state, F_ESCTHROUGHOUT) \
OUT1(ESC) \
NEXT(1, 1) \
} \
break; \
SHIFT_CASES \
case '\n': \
STATE_CLEARFLAG(state, F_SHIFTED) \
WRITE1('\n') \
NEXT(1, 1) \
break;
#define ISO2022_ESCTHROUGHOUT(c) \
if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { \
/* ESC throughout mode: for non-iso2022 escape sequences */ \
RESERVE_OUTBUF(1) \
OUT1(c) /* assume as ISO-8859-1 */ \
NEXT(1, 1) \
if (IS_ESCEND(c)) { \
STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) \
} \
continue; \
}
#define ISO2022_LOOP_BEGIN \
while (inleft > 0) { \
unsigned char c = IN1; \
ISO2022_ESCTHROUGHOUT(c) \
switch(c) { \
ISO2022_BASECASES(c) \
default: \
if (c < 0x20) { /* C0 */ \
RESERVE_OUTBUF(1) \
OUT1(c) \
NEXT(1, 1) \
} else if (c >= 0x80) \
return 1; \
else {
#define ISO2022_LOOP_END \
} \
} \
}
static int
iso2022processesc(MultibyteCodec_State *state,
const unsigned char **inbuf, size_t *inleft)
{
unsigned char charset, designation;
size_t i, esclen;
for (i = 1;i < MAX_ESCSEQLEN;i++) {
if (i >= *inleft)
return MBERR_TOOFEW;
if (IS_ESCEND((*inbuf)[i])) {
esclen = i + 1;
break;
}
#ifdef ISO2022_USE_JISX0208EXT
else if (i+1 < *inleft && (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@')
i += 2;
#endif
}
if (i >= MAX_ESCSEQLEN)
return 1; /* unterminated escape sequence */
switch (esclen) {
case 3:
if (IN2 == '$') {
charset = IN3 | CHARSET_DOUBLEBYTE;
designation = 0;
} else {
charset = IN3;
if (IN2 == '(') designation = 0;
else if (IN2 == ')') designation = 1;
#ifdef ISO2022_USE_G2_DESIGNATION
else if (IN2 == '.') designation = 2;
#endif
else return 3;
}
break;
case 4:
if (IN2 != '$')
return 4;
charset = IN4 | CHARSET_DOUBLEBYTE;
if (IN3 == '(') designation = 0;
else if (IN3 == ')') designation = 1;
else return 4;
break;
#ifdef ISO2022_USE_JISX0208EXT
case 6: /* designation with prefix */
if ((*inbuf)[3] == ESC && (*inbuf)[4] == '$' && (*inbuf)[5] == 'B') {
charset = 'B' | CHARSET_DOUBLEBYTE;
designation = 0;
} else
return 6;
break;
#endif
default:
return esclen;
}
{ /* raise error when the charset is not designated for this encoding */
const unsigned char dsgs[] = {ISO2022_DESIGNATIONS, '\x00'};
for (i = 0; dsgs[i] != '\x00'; i++)
if (dsgs[i] == charset)
break;
if (dsgs[i] == '\x00')
return esclen;
}
STATE_SETG(designation, state, charset)
*inleft -= esclen;
(*inbuf) += esclen;
return 0;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,630 +0,0 @@
/*
* $CJKCodecs: map_cp950ext.h,v 1.1.1.1 2003/09/24 17:45:47 perky Exp $
*/
static const ucs2_t __cp950ext_decmap[224] = {
0x2027, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, 0xfe51, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, 0x00af, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, 0xff5e, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, 0x2295, 0x2299, 0x2215,
0xfe68, UNIINV, 0xffe5, UNIINV, 0xffe0, 0xffe1, 0x20ac, 0x7881,
0x92b9, 0x88cf, 0x58bb, 0x6052, 0x7ca7, 0x5afa, 0x2554, 0x2566,
0x2557, 0x2560, 0x256c, 0x2563, 0x255a, 0x2569, 0x255d, 0x2552,
0x2564, 0x2555, 0x255e, 0x256a, 0x2561, 0x2558, 0x2567, 0x255b,
0x2553, 0x2565, 0x2556, 0x255f, 0x256b, 0x2562, 0x2559, 0x2568,
0x255c, 0x2551, 0x2550, 0x256d, 0x256e, 0x2570, 0x256f, 0x2593,
};
static const struct dbcs_index cp950ext_decmap[256] = {
/* 0x00 */ {0, 0, 0},
/* 0x01 */ {0, 0, 0},
/* 0x02 */ {0, 0, 0},
/* 0x03 */ {0, 0, 0},
/* 0x04 */ {0, 0, 0},
/* 0x05 */ {0, 0, 0},
/* 0x06 */ {0, 0, 0},
/* 0x07 */ {0, 0, 0},
/* 0x08 */ {0, 0, 0},
/* 0x09 */ {0, 0, 0},
/* 0x0A */ {0, 0, 0},
/* 0x0B */ {0, 0, 0},
/* 0x0C */ {0, 0, 0},
/* 0x0D */ {0, 0, 0},
/* 0x0E */ {0, 0, 0},
/* 0x0F */ {0, 0, 0},
/* 0x10 */ {0, 0, 0},
/* 0x11 */ {0, 0, 0},
/* 0x12 */ {0, 0, 0},
/* 0x13 */ {0, 0, 0},
/* 0x14 */ {0, 0, 0},
/* 0x15 */ {0, 0, 0},
/* 0x16 */ {0, 0, 0},
/* 0x17 */ {0, 0, 0},
/* 0x18 */ {0, 0, 0},
/* 0x19 */ {0, 0, 0},
/* 0x1A */ {0, 0, 0},
/* 0x1B */ {0, 0, 0},
/* 0x1C */ {0, 0, 0},
/* 0x1D */ {0, 0, 0},
/* 0x1E */ {0, 0, 0},
/* 0x1F */ {0, 0, 0},
/* 0x20 */ {0, 0, 0},
/* 0x21 */ {0, 0, 0},
/* 0x22 */ {0, 0, 0},
/* 0x23 */ {0, 0, 0},
/* 0x24 */ {0, 0, 0},
/* 0x25 */ {0, 0, 0},
/* 0x26 */ {0, 0, 0},
/* 0x27 */ {0, 0, 0},
/* 0x28 */ {0, 0, 0},
/* 0x29 */ {0, 0, 0},
/* 0x2A */ {0, 0, 0},
/* 0x2B */ {0, 0, 0},
/* 0x2C */ {0, 0, 0},
/* 0x2D */ {0, 0, 0},
/* 0x2E */ {0, 0, 0},
/* 0x2F */ {0, 0, 0},
/* 0x30 */ {0, 0, 0},
/* 0x31 */ {0, 0, 0},
/* 0x32 */ {0, 0, 0},
/* 0x33 */ {0, 0, 0},
/* 0x34 */ {0, 0, 0},
/* 0x35 */ {0, 0, 0},
/* 0x36 */ {0, 0, 0},
/* 0x37 */ {0, 0, 0},
/* 0x38 */ {0, 0, 0},
/* 0x39 */ {0, 0, 0},
/* 0x3A */ {0, 0, 0},
/* 0x3B */ {0, 0, 0},
/* 0x3C */ {0, 0, 0},
/* 0x3D */ {0, 0, 0},
/* 0x3E */ {0, 0, 0},
/* 0x3F */ {0, 0, 0},
/* 0x40 */ {0, 0, 0},
/* 0x41 */ {0, 0, 0},
/* 0x42 */ {0, 0, 0},
/* 0x43 */ {0, 0, 0},
/* 0x44 */ {0, 0, 0},
/* 0x45 */ {0, 0, 0},
/* 0x46 */ {0, 0, 0},
/* 0x47 */ {0, 0, 0},
/* 0x48 */ {0, 0, 0},
/* 0x49 */ {0, 0, 0},
/* 0x4A */ {0, 0, 0},
/* 0x4B */ {0, 0, 0},
/* 0x4C */ {0, 0, 0},
/* 0x4D */ {0, 0, 0},
/* 0x4E */ {0, 0, 0},
/* 0x4F */ {0, 0, 0},
/* 0x50 */ {0, 0, 0},
/* 0x51 */ {0, 0, 0},
/* 0x52 */ {0, 0, 0},
/* 0x53 */ {0, 0, 0},
/* 0x54 */ {0, 0, 0},
/* 0x55 */ {0, 0, 0},
/* 0x56 */ {0, 0, 0},
/* 0x57 */ {0, 0, 0},
/* 0x58 */ {0, 0, 0},
/* 0x59 */ {0, 0, 0},
/* 0x5A */ {0, 0, 0},
/* 0x5B */ {0, 0, 0},
/* 0x5C */ {0, 0, 0},
/* 0x5D */ {0, 0, 0},
/* 0x5E */ {0, 0, 0},
/* 0x5F */ {0, 0, 0},
/* 0x60 */ {0, 0, 0},
/* 0x61 */ {0, 0, 0},
/* 0x62 */ {0, 0, 0},
/* 0x63 */ {0, 0, 0},
/* 0x64 */ {0, 0, 0},
/* 0x65 */ {0, 0, 0},
/* 0x66 */ {0, 0, 0},
/* 0x67 */ {0, 0, 0},
/* 0x68 */ {0, 0, 0},
/* 0x69 */ {0, 0, 0},
/* 0x6A */ {0, 0, 0},
/* 0x6B */ {0, 0, 0},
/* 0x6C */ {0, 0, 0},
/* 0x6D */ {0, 0, 0},
/* 0x6E */ {0, 0, 0},
/* 0x6F */ {0, 0, 0},
/* 0x70 */ {0, 0, 0},
/* 0x71 */ {0, 0, 0},
/* 0x72 */ {0, 0, 0},
/* 0x73 */ {0, 0, 0},
/* 0x74 */ {0, 0, 0},
/* 0x75 */ {0, 0, 0},
/* 0x76 */ {0, 0, 0},
/* 0x77 */ {0, 0, 0},
/* 0x78 */ {0, 0, 0},
/* 0x79 */ {0, 0, 0},
/* 0x7A */ {0, 0, 0},
/* 0x7B */ {0, 0, 0},
/* 0x7C */ {0, 0, 0},
/* 0x7D */ {0, 0, 0},
/* 0x7E */ {0, 0, 0},
/* 0x7F */ {0, 0, 0},
/* 0x80 */ {0, 0, 0},
/* 0x81 */ {0, 0, 0},
/* 0x82 */ {0, 0, 0},
/* 0x83 */ {0, 0, 0},
/* 0x84 */ {0, 0, 0},
/* 0x85 */ {0, 0, 0},
/* 0x86 */ {0, 0, 0},
/* 0x87 */ {0, 0, 0},
/* 0x88 */ {0, 0, 0},
/* 0x89 */ {0, 0, 0},
/* 0x8A */ {0, 0, 0},
/* 0x8B */ {0, 0, 0},
/* 0x8C */ {0, 0, 0},
/* 0x8D */ {0, 0, 0},
/* 0x8E */ {0, 0, 0},
/* 0x8F */ {0, 0, 0},
/* 0x90 */ {0, 0, 0},
/* 0x91 */ {0, 0, 0},
/* 0x92 */ {0, 0, 0},
/* 0x93 */ {0, 0, 0},
/* 0x94 */ {0, 0, 0},
/* 0x95 */ {0, 0, 0},
/* 0x96 */ {0, 0, 0},
/* 0x97 */ {0, 0, 0},
/* 0x98 */ {0, 0, 0},
/* 0x99 */ {0, 0, 0},
/* 0x9A */ {0, 0, 0},
/* 0x9B */ {0, 0, 0},
/* 0x9C */ {0, 0, 0},
/* 0x9D */ {0, 0, 0},
/* 0x9E */ {0, 0, 0},
/* 0x9F */ {0, 0, 0},
/* 0xA0 */ {0, 0, 0},
/* 0xA1 */ {__cp950ext_decmap+0, 0x45, 0xf3},
/* 0xA2 */ {__cp950ext_decmap+175, 0x41, 0x47},
/* 0xA3 */ {__cp950ext_decmap+182, 0xe1, 0xe1},
/* 0xA4 */ {0, 0, 0},
/* 0xA5 */ {0, 0, 0},
/* 0xA6 */ {0, 0, 0},
/* 0xA7 */ {0, 0, 0},
/* 0xA8 */ {0, 0, 0},
/* 0xA9 */ {0, 0, 0},
/* 0xAA */ {0, 0, 0},
/* 0xAB */ {0, 0, 0},
/* 0xAC */ {0, 0, 0},
/* 0xAD */ {0, 0, 0},
/* 0xAE */ {0, 0, 0},
/* 0xAF */ {0, 0, 0},
/* 0xB0 */ {0, 0, 0},
/* 0xB1 */ {0, 0, 0},
/* 0xB2 */ {0, 0, 0},
/* 0xB3 */ {0, 0, 0},
/* 0xB4 */ {0, 0, 0},
/* 0xB5 */ {0, 0, 0},
/* 0xB6 */ {0, 0, 0},
/* 0xB7 */ {0, 0, 0},
/* 0xB8 */ {0, 0, 0},
/* 0xB9 */ {0, 0, 0},
/* 0xBA */ {0, 0, 0},
/* 0xBB */ {0, 0, 0},
/* 0xBC */ {0, 0, 0},
/* 0xBD */ {0, 0, 0},
/* 0xBE */ {0, 0, 0},
/* 0xBF */ {0, 0, 0},
/* 0xC0 */ {0, 0, 0},
/* 0xC1 */ {0, 0, 0},
/* 0xC2 */ {0, 0, 0},
/* 0xC3 */ {0, 0, 0},
/* 0xC4 */ {0, 0, 0},
/* 0xC5 */ {0, 0, 0},
/* 0xC6 */ {0, 0, 0},
/* 0xC7 */ {0, 0, 0},
/* 0xC8 */ {0, 0, 0},
/* 0xC9 */ {0, 0, 0},
/* 0xCA */ {0, 0, 0},
/* 0xCB */ {0, 0, 0},
/* 0xCC */ {0, 0, 0},
/* 0xCD */ {0, 0, 0},
/* 0xCE */ {0, 0, 0},
/* 0xCF */ {0, 0, 0},
/* 0xD0 */ {0, 0, 0},
/* 0xD1 */ {0, 0, 0},
/* 0xD2 */ {0, 0, 0},
/* 0xD3 */ {0, 0, 0},
/* 0xD4 */ {0, 0, 0},
/* 0xD5 */ {0, 0, 0},
/* 0xD6 */ {0, 0, 0},
/* 0xD7 */ {0, 0, 0},
/* 0xD8 */ {0, 0, 0},
/* 0xD9 */ {0, 0, 0},
/* 0xDA */ {0, 0, 0},
/* 0xDB */ {0, 0, 0},
/* 0xDC */ {0, 0, 0},
/* 0xDD */ {0, 0, 0},
/* 0xDE */ {0, 0, 0},
/* 0xDF */ {0, 0, 0},
/* 0xE0 */ {0, 0, 0},
/* 0xE1 */ {0, 0, 0},
/* 0xE2 */ {0, 0, 0},
/* 0xE3 */ {0, 0, 0},
/* 0xE4 */ {0, 0, 0},
/* 0xE5 */ {0, 0, 0},
/* 0xE6 */ {0, 0, 0},
/* 0xE7 */ {0, 0, 0},
/* 0xE8 */ {0, 0, 0},
/* 0xE9 */ {0, 0, 0},
/* 0xEA */ {0, 0, 0},
/* 0xEB */ {0, 0, 0},
/* 0xEC */ {0, 0, 0},
/* 0xED */ {0, 0, 0},
/* 0xEE */ {0, 0, 0},
/* 0xEF */ {0, 0, 0},
/* 0xF0 */ {0, 0, 0},
/* 0xF1 */ {0, 0, 0},
/* 0xF2 */ {0, 0, 0},
/* 0xF3 */ {0, 0, 0},
/* 0xF4 */ {0, 0, 0},
/* 0xF5 */ {0, 0, 0},
/* 0xF6 */ {0, 0, 0},
/* 0xF7 */ {0, 0, 0},
/* 0xF8 */ {0, 0, 0},
/* 0xF9 */ {__cp950ext_decmap+183, 0xd6, 0xfe},
/* 0xFA */ {0, 0, 0},
/* 0xFB */ {0, 0, 0},
/* 0xFC */ {0, 0, 0},
/* 0xFD */ {0, 0, 0},
/* 0xFE */ {0, 0, 0},
/* 0xFF */ {0, 0, 0},
};
static const DBCHAR __cp950ext_encmap[581] = {
0xa1c2, 0xa145, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa3e1, 0xa241,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa1f2,
NOCHAR, NOCHAR, NOCHAR, 0xa1f3, 0xf9f8, 0xf9e6, 0xf9ef, 0xf9dd,
0xf9e8, 0xf9f1, 0xf9df, 0xf9ec, 0xf9f5, 0xf9e3, 0xf9ee, 0xf9f7,
0xf9e5, NOCHAR, 0xf9f2, 0xf9e0, NOCHAR, 0xf9f4, 0xf9e2, 0xf9e7,
0xf9f0, 0xf9de, 0xf9ed, 0xf9f6, 0xf9e4, NOCHAR, 0xf9f3, 0xf9e1,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xf9fe, 0xf9d9,
0xf9dc, 0xf9da, 0xf9d6, 0xf9db, 0xf9d8, 0xf9d7, 0xa14e, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa242, 0xa1fe, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, 0xa240, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa1e3, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa246,
0xa247, NOCHAR, NOCHAR, NOCHAR, 0xa244,
};
static const struct unim_index cp950ext_encmap[256] = {
/* 0x00 */ {__cp950ext_encmap+0, 0xaf, 0xaf},
/* 0x01 */ {0, 0, 0},
/* 0x02 */ {0, 0, 0},
/* 0x03 */ {0, 0, 0},
/* 0x04 */ {0, 0, 0},
/* 0x05 */ {0, 0, 0},
/* 0x06 */ {0, 0, 0},
/* 0x07 */ {0, 0, 0},
/* 0x08 */ {0, 0, 0},
/* 0x09 */ {0, 0, 0},
/* 0x0A */ {0, 0, 0},
/* 0x0B */ {0, 0, 0},
/* 0x0C */ {0, 0, 0},
/* 0x0D */ {0, 0, 0},
/* 0x0E */ {0, 0, 0},
/* 0x0F */ {0, 0, 0},
/* 0x10 */ {0, 0, 0},
/* 0x11 */ {0, 0, 0},
/* 0x12 */ {0, 0, 0},
/* 0x13 */ {0, 0, 0},
/* 0x14 */ {0, 0, 0},
/* 0x15 */ {0, 0, 0},
/* 0x16 */ {0, 0, 0},
/* 0x17 */ {0, 0, 0},
/* 0x18 */ {0, 0, 0},
/* 0x19 */ {0, 0, 0},
/* 0x1A */ {0, 0, 0},
/* 0x1B */ {0, 0, 0},
/* 0x1C */ {0, 0, 0},
/* 0x1D */ {0, 0, 0},
/* 0x1E */ {0, 0, 0},
/* 0x1F */ {0, 0, 0},
/* 0x20 */ {__cp950ext_encmap+1, 0x27, 0xac},
/* 0x21 */ {0, 0, 0},
/* 0x22 */ {__cp950ext_encmap+135, 0x15, 0x99},
/* 0x23 */ {0, 0, 0},
/* 0x24 */ {0, 0, 0},
/* 0x25 */ {__cp950ext_encmap+268, 0x51, 0x93},
/* 0x26 */ {0, 0, 0},
/* 0x27 */ {0, 0, 0},
/* 0x28 */ {0, 0, 0},
/* 0x29 */ {0, 0, 0},
/* 0x2A */ {0, 0, 0},
/* 0x2B */ {0, 0, 0},
/* 0x2C */ {0, 0, 0},
/* 0x2D */ {0, 0, 0},
/* 0x2E */ {0, 0, 0},
/* 0x2F */ {0, 0, 0},
/* 0x30 */ {0, 0, 0},
/* 0x31 */ {0, 0, 0},
/* 0x32 */ {0, 0, 0},
/* 0x33 */ {0, 0, 0},
/* 0x34 */ {0, 0, 0},
/* 0x35 */ {0, 0, 0},
/* 0x36 */ {0, 0, 0},
/* 0x37 */ {0, 0, 0},
/* 0x38 */ {0, 0, 0},
/* 0x39 */ {0, 0, 0},
/* 0x3A */ {0, 0, 0},
/* 0x3B */ {0, 0, 0},
/* 0x3C */ {0, 0, 0},
/* 0x3D */ {0, 0, 0},
/* 0x3E */ {0, 0, 0},
/* 0x3F */ {0, 0, 0},
/* 0x40 */ {0, 0, 0},
/* 0x41 */ {0, 0, 0},
/* 0x42 */ {0, 0, 0},
/* 0x43 */ {0, 0, 0},
/* 0x44 */ {0, 0, 0},
/* 0x45 */ {0, 0, 0},
/* 0x46 */ {0, 0, 0},
/* 0x47 */ {0, 0, 0},
/* 0x48 */ {0, 0, 0},
/* 0x49 */ {0, 0, 0},
/* 0x4A */ {0, 0, 0},
/* 0x4B */ {0, 0, 0},
/* 0x4C */ {0, 0, 0},
/* 0x4D */ {0, 0, 0},
/* 0x4E */ {0, 0, 0},
/* 0x4F */ {0, 0, 0},
/* 0x50 */ {0, 0, 0},
/* 0x51 */ {0, 0, 0},
/* 0x52 */ {0, 0, 0},
/* 0x53 */ {0, 0, 0},
/* 0x54 */ {0, 0, 0},
/* 0x55 */ {0, 0, 0},
/* 0x56 */ {0, 0, 0},
/* 0x57 */ {0, 0, 0},
/* 0x58 */ {__cp950ext_encmap+335, 0xbb, 0xbb},
/* 0x59 */ {0, 0, 0},
/* 0x5A */ {__cp950ext_encmap+336, 0xfa, 0xfa},
/* 0x5B */ {0, 0, 0},
/* 0x5C */ {0, 0, 0},
/* 0x5D */ {0, 0, 0},
/* 0x5E */ {0, 0, 0},
/* 0x5F */ {0, 0, 0},
/* 0x60 */ {__cp950ext_encmap+337, 0x52, 0x52},
/* 0x61 */ {0, 0, 0},
/* 0x62 */ {0, 0, 0},
/* 0x63 */ {0, 0, 0},
/* 0x64 */ {0, 0, 0},
/* 0x65 */ {0, 0, 0},
/* 0x66 */ {0, 0, 0},
/* 0x67 */ {0, 0, 0},
/* 0x68 */ {0, 0, 0},
/* 0x69 */ {0, 0, 0},
/* 0x6A */ {0, 0, 0},
/* 0x6B */ {0, 0, 0},
/* 0x6C */ {0, 0, 0},
/* 0x6D */ {0, 0, 0},
/* 0x6E */ {0, 0, 0},
/* 0x6F */ {0, 0, 0},
/* 0x70 */ {0, 0, 0},
/* 0x71 */ {0, 0, 0},
/* 0x72 */ {0, 0, 0},
/* 0x73 */ {0, 0, 0},
/* 0x74 */ {0, 0, 0},
/* 0x75 */ {0, 0, 0},
/* 0x76 */ {0, 0, 0},
/* 0x77 */ {0, 0, 0},
/* 0x78 */ {__cp950ext_encmap+338, 0x81, 0x81},
/* 0x79 */ {0, 0, 0},
/* 0x7A */ {0, 0, 0},
/* 0x7B */ {0, 0, 0},
/* 0x7C */ {__cp950ext_encmap+339, 0xa7, 0xa7},
/* 0x7D */ {0, 0, 0},
/* 0x7E */ {0, 0, 0},
/* 0x7F */ {0, 0, 0},
/* 0x80 */ {0, 0, 0},
/* 0x81 */ {0, 0, 0},
/* 0x82 */ {0, 0, 0},
/* 0x83 */ {0, 0, 0},
/* 0x84 */ {0, 0, 0},
/* 0x85 */ {0, 0, 0},
/* 0x86 */ {0, 0, 0},
/* 0x87 */ {0, 0, 0},
/* 0x88 */ {__cp950ext_encmap+340, 0xcf, 0xcf},
/* 0x89 */ {0, 0, 0},
/* 0x8A */ {0, 0, 0},
/* 0x8B */ {0, 0, 0},
/* 0x8C */ {0, 0, 0},
/* 0x8D */ {0, 0, 0},
/* 0x8E */ {0, 0, 0},
/* 0x8F */ {0, 0, 0},
/* 0x90 */ {0, 0, 0},
/* 0x91 */ {0, 0, 0},
/* 0x92 */ {__cp950ext_encmap+341, 0xb9, 0xb9},
/* 0x93 */ {0, 0, 0},
/* 0x94 */ {0, 0, 0},
/* 0x95 */ {0, 0, 0},
/* 0x96 */ {0, 0, 0},
/* 0x97 */ {0, 0, 0},
/* 0x98 */ {0, 0, 0},
/* 0x99 */ {0, 0, 0},
/* 0x9A */ {0, 0, 0},
/* 0x9B */ {0, 0, 0},
/* 0x9C */ {0, 0, 0},
/* 0x9D */ {0, 0, 0},
/* 0x9E */ {0, 0, 0},
/* 0x9F */ {0, 0, 0},
/* 0xA0 */ {0, 0, 0},
/* 0xA1 */ {0, 0, 0},
/* 0xA2 */ {0, 0, 0},
/* 0xA3 */ {0, 0, 0},
/* 0xA4 */ {0, 0, 0},
/* 0xA5 */ {0, 0, 0},
/* 0xA6 */ {0, 0, 0},
/* 0xA7 */ {0, 0, 0},
/* 0xA8 */ {0, 0, 0},
/* 0xA9 */ {0, 0, 0},
/* 0xAA */ {0, 0, 0},
/* 0xAB */ {0, 0, 0},
/* 0xAC */ {0, 0, 0},
/* 0xAD */ {0, 0, 0},
/* 0xAE */ {0, 0, 0},
/* 0xAF */ {0, 0, 0},
/* 0xB0 */ {0, 0, 0},
/* 0xB1 */ {0, 0, 0},
/* 0xB2 */ {0, 0, 0},
/* 0xB3 */ {0, 0, 0},
/* 0xB4 */ {0, 0, 0},
/* 0xB5 */ {0, 0, 0},
/* 0xB6 */ {0, 0, 0},
/* 0xB7 */ {0, 0, 0},
/* 0xB8 */ {0, 0, 0},
/* 0xB9 */ {0, 0, 0},
/* 0xBA */ {0, 0, 0},
/* 0xBB */ {0, 0, 0},
/* 0xBC */ {0, 0, 0},
/* 0xBD */ {0, 0, 0},
/* 0xBE */ {0, 0, 0},
/* 0xBF */ {0, 0, 0},
/* 0xC0 */ {0, 0, 0},
/* 0xC1 */ {0, 0, 0},
/* 0xC2 */ {0, 0, 0},
/* 0xC3 */ {0, 0, 0},
/* 0xC4 */ {0, 0, 0},
/* 0xC5 */ {0, 0, 0},
/* 0xC6 */ {0, 0, 0},
/* 0xC7 */ {0, 0, 0},
/* 0xC8 */ {0, 0, 0},
/* 0xC9 */ {0, 0, 0},
/* 0xCA */ {0, 0, 0},
/* 0xCB */ {0, 0, 0},
/* 0xCC */ {0, 0, 0},
/* 0xCD */ {0, 0, 0},
/* 0xCE */ {0, 0, 0},
/* 0xCF */ {0, 0, 0},
/* 0xD0 */ {0, 0, 0},
/* 0xD1 */ {0, 0, 0},
/* 0xD2 */ {0, 0, 0},
/* 0xD3 */ {0, 0, 0},
/* 0xD4 */ {0, 0, 0},
/* 0xD5 */ {0, 0, 0},
/* 0xD6 */ {0, 0, 0},
/* 0xD7 */ {0, 0, 0},
/* 0xD8 */ {0, 0, 0},
/* 0xD9 */ {0, 0, 0},
/* 0xDA */ {0, 0, 0},
/* 0xDB */ {0, 0, 0},
/* 0xDC */ {0, 0, 0},
/* 0xDD */ {0, 0, 0},
/* 0xDE */ {0, 0, 0},
/* 0xDF */ {0, 0, 0},
/* 0xE0 */ {0, 0, 0},
/* 0xE1 */ {0, 0, 0},
/* 0xE2 */ {0, 0, 0},
/* 0xE3 */ {0, 0, 0},
/* 0xE4 */ {0, 0, 0},
/* 0xE5 */ {0, 0, 0},
/* 0xE6 */ {0, 0, 0},
/* 0xE7 */ {0, 0, 0},
/* 0xE8 */ {0, 0, 0},
/* 0xE9 */ {0, 0, 0},
/* 0xEA */ {0, 0, 0},
/* 0xEB */ {0, 0, 0},
/* 0xEC */ {0, 0, 0},
/* 0xED */ {0, 0, 0},
/* 0xEE */ {0, 0, 0},
/* 0xEF */ {0, 0, 0},
/* 0xF0 */ {0, 0, 0},
/* 0xF1 */ {0, 0, 0},
/* 0xF2 */ {0, 0, 0},
/* 0xF3 */ {0, 0, 0},
/* 0xF4 */ {0, 0, 0},
/* 0xF5 */ {0, 0, 0},
/* 0xF6 */ {0, 0, 0},
/* 0xF7 */ {0, 0, 0},
/* 0xF8 */ {0, 0, 0},
/* 0xF9 */ {0, 0, 0},
/* 0xFA */ {0, 0, 0},
/* 0xFB */ {0, 0, 0},
/* 0xFC */ {0, 0, 0},
/* 0xFD */ {0, 0, 0},
/* 0xFE */ {__cp950ext_encmap+342, 0x51, 0x68},
/* 0xFF */ {__cp950ext_encmap+366, 0x0f, 0xe5},
};

File diff suppressed because it is too large Load Diff

View File

@ -1,218 +0,0 @@
/*
* $CJKCodecs: map_gb18030uni.h,v 1.1.1.1 2003/09/24 17:45:51 perky Exp $
*/
static const struct _gb18030_to_unibmp_ranges {
Py_UNICODE first, last;
DBCHAR base;
} gb18030_to_unibmp_ranges[] = {
{ 0x0080, 0x00a3, 0x0000 },
{ 0x00a5, 0x00a6, 0x0024 },
{ 0x00a9, 0x00af, 0x0026 },
{ 0x00b2, 0x00b6, 0x002d },
{ 0x00b8, 0x00d6, 0x0032 },
{ 0x00d8, 0x00df, 0x0051 },
{ 0x00e2, 0x00e7, 0x0059 },
{ 0x00eb, 0x00eb, 0x005f },
{ 0x00ee, 0x00f1, 0x0060 },
{ 0x00f4, 0x00f6, 0x0064 },
{ 0x00f8, 0x00f8, 0x0067 },
{ 0x00fb, 0x00fb, 0x0068 },
{ 0x00fd, 0x0100, 0x0069 },
{ 0x0102, 0x0112, 0x006d },
{ 0x0114, 0x011a, 0x007e },
{ 0x011c, 0x012a, 0x0085 },
{ 0x012c, 0x0143, 0x0094 },
{ 0x0145, 0x0147, 0x00ac },
{ 0x0149, 0x014c, 0x00af },
{ 0x014e, 0x016a, 0x00b3 },
{ 0x016c, 0x01cd, 0x00d0 },
{ 0x01cf, 0x01cf, 0x0132 },
{ 0x01d1, 0x01d1, 0x0133 },
{ 0x01d3, 0x01d3, 0x0134 },
{ 0x01d5, 0x01d5, 0x0135 },
{ 0x01d7, 0x01d7, 0x0136 },
{ 0x01d9, 0x01d9, 0x0137 },
{ 0x01db, 0x01db, 0x0138 },
{ 0x01dd, 0x01f8, 0x0139 },
{ 0x01fa, 0x0250, 0x0155 },
{ 0x0252, 0x0260, 0x01ac },
{ 0x0262, 0x02c6, 0x01bb },
{ 0x02c8, 0x02c8, 0x0220 },
{ 0x02cc, 0x02d8, 0x0221 },
{ 0x02da, 0x0390, 0x022e },
{ 0x03a2, 0x03a2, 0x02e5 },
{ 0x03aa, 0x03b0, 0x02e6 },
{ 0x03c2, 0x03c2, 0x02ed },
{ 0x03ca, 0x0400, 0x02ee },
{ 0x0402, 0x040f, 0x0325 },
{ 0x0450, 0x0450, 0x0333 },
{ 0x0452, 0x200f, 0x0334 },
{ 0x2011, 0x2012, 0x1ef2 },
{ 0x2017, 0x2017, 0x1ef4 },
{ 0x201a, 0x201b, 0x1ef5 },
{ 0x201e, 0x2024, 0x1ef7 },
{ 0x2027, 0x202f, 0x1efe },
{ 0x2031, 0x2031, 0x1f07 },
{ 0x2034, 0x2034, 0x1f08 },
{ 0x2036, 0x203a, 0x1f09 },
{ 0x203c, 0x20ab, 0x1f0e },
{ 0x20ad, 0x2102, 0x1f7e },
{ 0x2104, 0x2104, 0x1fd4 },
{ 0x2106, 0x2108, 0x1fd5 },
{ 0x210a, 0x2115, 0x1fd8 },
{ 0x2117, 0x2120, 0x1fe4 },
{ 0x2122, 0x215f, 0x1fee },
{ 0x216c, 0x216f, 0x202c },
{ 0x217a, 0x218f, 0x2030 },
{ 0x2194, 0x2195, 0x2046 },
{ 0x219a, 0x2207, 0x2048 },
{ 0x2209, 0x220e, 0x20b6 },
{ 0x2210, 0x2210, 0x20bc },
{ 0x2212, 0x2214, 0x20bd },
{ 0x2216, 0x2219, 0x20c0 },
{ 0x221b, 0x221c, 0x20c4 },
{ 0x2221, 0x2222, 0x20c6 },
{ 0x2224, 0x2224, 0x20c8 },
{ 0x2226, 0x2226, 0x20c9 },
{ 0x222c, 0x222d, 0x20ca },
{ 0x222f, 0x2233, 0x20cc },
{ 0x2238, 0x223c, 0x20d1 },
{ 0x223e, 0x2247, 0x20d6 },
{ 0x2249, 0x224b, 0x20e0 },
{ 0x224d, 0x2251, 0x20e3 },
{ 0x2253, 0x225f, 0x20e8 },
{ 0x2262, 0x2263, 0x20f5 },
{ 0x2268, 0x226d, 0x20f7 },
{ 0x2270, 0x2294, 0x20fd },
{ 0x2296, 0x2298, 0x2122 },
{ 0x229a, 0x22a4, 0x2125 },
{ 0x22a6, 0x22be, 0x2130 },
{ 0x22c0, 0x2311, 0x2149 },
{ 0x2313, 0x245f, 0x219b },
{ 0x246a, 0x2473, 0x22e8 },
{ 0x249c, 0x24ff, 0x22f2 },
{ 0x254c, 0x254f, 0x2356 },
{ 0x2574, 0x2580, 0x235a },
{ 0x2590, 0x2592, 0x2367 },
{ 0x2596, 0x259f, 0x236a },
{ 0x25a2, 0x25b1, 0x2374 },
{ 0x25b4, 0x25bb, 0x2384 },
{ 0x25be, 0x25c5, 0x238c },
{ 0x25c8, 0x25ca, 0x2394 },
{ 0x25cc, 0x25cd, 0x2397 },
{ 0x25d0, 0x25e1, 0x2399 },
{ 0x25e6, 0x2604, 0x23ab },
{ 0x2607, 0x2608, 0x23ca },
{ 0x260a, 0x263f, 0x23cc },
{ 0x2641, 0x2641, 0x2402 },
{ 0x2643, 0x2e80, 0x2403 },
{ 0x2e82, 0x2e83, 0x2c41 },
{ 0x2e85, 0x2e87, 0x2c43 },
{ 0x2e89, 0x2e8a, 0x2c46 },
{ 0x2e8d, 0x2e96, 0x2c48 },
{ 0x2e98, 0x2ea6, 0x2c52 },
{ 0x2ea8, 0x2ea9, 0x2c61 },
{ 0x2eab, 0x2ead, 0x2c63 },
{ 0x2eaf, 0x2eb2, 0x2c66 },
{ 0x2eb4, 0x2eb5, 0x2c6a },
{ 0x2eb8, 0x2eba, 0x2c6c },
{ 0x2ebc, 0x2ec9, 0x2c6f },
{ 0x2ecb, 0x2fef, 0x2c7d },
{ 0x2ffc, 0x2fff, 0x2da2 },
{ 0x3004, 0x3004, 0x2da6 },
{ 0x3018, 0x301c, 0x2da7 },
{ 0x301f, 0x3020, 0x2dac },
{ 0x302a, 0x303d, 0x2dae },
{ 0x303f, 0x3040, 0x2dc2 },
{ 0x3094, 0x309a, 0x2dc4 },
{ 0x309f, 0x30a0, 0x2dcb },
{ 0x30f7, 0x30fb, 0x2dcd },
{ 0x30ff, 0x3104, 0x2dd2 },
{ 0x312a, 0x321f, 0x2dd8 },
{ 0x322a, 0x3230, 0x2ece },
{ 0x3232, 0x32a2, 0x2ed5 },
{ 0x32a4, 0x338d, 0x2f46 },
{ 0x3390, 0x339b, 0x3030 },
{ 0x339f, 0x33a0, 0x303c },
{ 0x33a2, 0x33c3, 0x303e },
{ 0x33c5, 0x33cd, 0x3060 },
{ 0x33cf, 0x33d0, 0x3069 },
{ 0x33d3, 0x33d4, 0x306b },
{ 0x33d6, 0x3446, 0x306d },
{ 0x3448, 0x3472, 0x30de },
{ 0x3474, 0x359d, 0x3109 },
{ 0x359f, 0x360d, 0x3233 },
{ 0x360f, 0x3619, 0x32a2 },
{ 0x361b, 0x3917, 0x32ad },
{ 0x3919, 0x396d, 0x35aa },
{ 0x396f, 0x39ce, 0x35ff },
{ 0x39d1, 0x39de, 0x365f },
{ 0x39e0, 0x3a72, 0x366d },
{ 0x3a74, 0x3b4d, 0x3700 },
{ 0x3b4f, 0x3c6d, 0x37da },
{ 0x3c6f, 0x3cdf, 0x38f9 },
{ 0x3ce1, 0x4055, 0x396a },
{ 0x4057, 0x415e, 0x3cdf },
{ 0x4160, 0x4336, 0x3de7 },
{ 0x4338, 0x43ab, 0x3fbe },
{ 0x43ad, 0x43b0, 0x4032 },
{ 0x43b2, 0x43dc, 0x4036 },
{ 0x43de, 0x44d5, 0x4061 },
{ 0x44d7, 0x464b, 0x4159 },
{ 0x464d, 0x4660, 0x42ce },
{ 0x4662, 0x4722, 0x42e2 },
{ 0x4724, 0x4728, 0x43a3 },
{ 0x472a, 0x477b, 0x43a8 },
{ 0x477d, 0x478c, 0x43fa },
{ 0x478e, 0x4946, 0x440a },
{ 0x4948, 0x4979, 0x45c3 },
{ 0x497b, 0x497c, 0x45f5 },
{ 0x497e, 0x4981, 0x45f7 },
{ 0x4984, 0x4984, 0x45fb },
{ 0x4987, 0x499a, 0x45fc },
{ 0x499c, 0x499e, 0x4610 },
{ 0x49a0, 0x49b5, 0x4613 },
{ 0x49b8, 0x4c76, 0x4629 },
{ 0x4c78, 0x4c9e, 0x48e8 },
{ 0x4ca4, 0x4d12, 0x490f },
{ 0x4d1a, 0x4dad, 0x497e },
{ 0x4daf, 0x4dff, 0x4a12 },
{ 0x9fa6, 0xd7ff, 0x4a63 },
{ 0xe76c, 0xe76c, 0x82bd },
{ 0xe7c8, 0xe7c8, 0x82be },
{ 0xe7e7, 0xe7f3, 0x82bf },
{ 0xe815, 0xe815, 0x82cc },
{ 0xe819, 0xe81d, 0x82cd },
{ 0xe81f, 0xe825, 0x82d2 },
{ 0xe827, 0xe82a, 0x82d9 },
{ 0xe82d, 0xe830, 0x82dd },
{ 0xe833, 0xe83a, 0x82e1 },
{ 0xe83c, 0xe842, 0x82e9 },
{ 0xe844, 0xe853, 0x82f0 },
{ 0xe856, 0xe863, 0x8300 },
{ 0xe865, 0xf92b, 0x830e },
{ 0xf92d, 0xf978, 0x93d5 },
{ 0xf97a, 0xf994, 0x9421 },
{ 0xf996, 0xf9e6, 0x943c },
{ 0xf9e8, 0xf9f0, 0x948d },
{ 0xf9f2, 0xfa0b, 0x9496 },
{ 0xfa10, 0xfa10, 0x94b0 },
{ 0xfa12, 0xfa12, 0x94b1 },
{ 0xfa15, 0xfa17, 0x94b2 },
{ 0xfa19, 0xfa1e, 0x94b5 },
{ 0xfa22, 0xfa22, 0x94bb },
{ 0xfa25, 0xfa26, 0x94bc },
{ 0xfa2a, 0xfe2f, 0x94be },
{ 0xfe32, 0xfe32, 0x98c4 },
{ 0xfe45, 0xfe48, 0x98c5 },
{ 0xfe53, 0xfe53, 0x98c9 },
{ 0xfe58, 0xfe58, 0x98ca },
{ 0xfe67, 0xfe67, 0x98cb },
{ 0xfe6c, 0xff00, 0x98cc },
{ 0xff5f, 0xffdf, 0x9961 },
{ 0xffe6, 0xffff, 0x99e2 },
{ 0x0000, 0x0000, 0x99fc },
};

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,329 +0,0 @@
/*
* $CJKCodecs: map_jisx0213_pairs.h,v 1.2 2003/11/27 13:29:01 perky Exp $
*/
static const ucs4_t __jisx0213_pairdecmap[49] = {
0x304b309a, 0x304d309a, 0x304f309a, 0x3051309a,
0x3053309a, 0x30ab309a, 0x30ad309a, 0x30af309a,
0x30b1309a, 0x30b3309a, 0x30bb309a, 0x30c4309a,
0x30c8309a, 0x31f7309a, 0x00e60300, UNIINV,
UNIINV, UNIINV, 0x02540300, 0x02540301,
0x028c0300, 0x028c0301, 0x02590300, 0x02590301,
0x025a0300, 0x025a0301, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, UNIINV,
UNIINV, UNIINV, UNIINV, 0x02e902e5,
0x02e502e9,
};
static const struct widedbcs_index jisx0213_pairdecmap[256] = {
/* 0x00 */ {0, 0, 0},
/* 0x01 */ {0, 0, 0},
/* 0x02 */ {0, 0, 0},
/* 0x03 */ {0, 0, 0},
/* 0x04 */ {0, 0, 0},
/* 0x05 */ {0, 0, 0},
/* 0x06 */ {0, 0, 0},
/* 0x07 */ {0, 0, 0},
/* 0x08 */ {0, 0, 0},
/* 0x09 */ {0, 0, 0},
/* 0x0A */ {0, 0, 0},
/* 0x0B */ {0, 0, 0},
/* 0x0C */ {0, 0, 0},
/* 0x0D */ {0, 0, 0},
/* 0x0E */ {0, 0, 0},
/* 0x0F */ {0, 0, 0},
/* 0x10 */ {0, 0, 0},
/* 0x11 */ {0, 0, 0},
/* 0x12 */ {0, 0, 0},
/* 0x13 */ {0, 0, 0},
/* 0x14 */ {0, 0, 0},
/* 0x15 */ {0, 0, 0},
/* 0x16 */ {0, 0, 0},
/* 0x17 */ {0, 0, 0},
/* 0x18 */ {0, 0, 0},
/* 0x19 */ {0, 0, 0},
/* 0x1A */ {0, 0, 0},
/* 0x1B */ {0, 0, 0},
/* 0x1C */ {0, 0, 0},
/* 0x1D */ {0, 0, 0},
/* 0x1E */ {0, 0, 0},
/* 0x1F */ {0, 0, 0},
/* 0x20 */ {0, 0, 0},
/* 0x21 */ {0, 0, 0},
/* 0x22 */ {0, 0, 0},
/* 0x23 */ {0, 0, 0},
/* 0x24 */ {__jisx0213_pairdecmap+0, 0x77, 0x7b},
/* 0x25 */ {__jisx0213_pairdecmap+5, 0x77, 0x7e},
/* 0x26 */ {__jisx0213_pairdecmap+13, 0x78, 0x78},
/* 0x27 */ {0, 0, 0},
/* 0x28 */ {0, 0, 0},
/* 0x29 */ {0, 0, 0},
/* 0x2A */ {0, 0, 0},
/* 0x2B */ {__jisx0213_pairdecmap+14, 0x44, 0x66},
/* 0x2C */ {0, 0, 0},
/* 0x2D */ {0, 0, 0},
/* 0x2E */ {0, 0, 0},
/* 0x2F */ {0, 0, 0},
/* 0x30 */ {0, 0, 0},
/* 0x31 */ {0, 0, 0},
/* 0x32 */ {0, 0, 0},
/* 0x33 */ {0, 0, 0},
/* 0x34 */ {0, 0, 0},
/* 0x35 */ {0, 0, 0},
/* 0x36 */ {0, 0, 0},
/* 0x37 */ {0, 0, 0},
/* 0x38 */ {0, 0, 0},
/* 0x39 */ {0, 0, 0},
/* 0x3A */ {0, 0, 0},
/* 0x3B */ {0, 0, 0},
/* 0x3C */ {0, 0, 0},
/* 0x3D */ {0, 0, 0},
/* 0x3E */ {0, 0, 0},
/* 0x3F */ {0, 0, 0},
/* 0x40 */ {0, 0, 0},
/* 0x41 */ {0, 0, 0},
/* 0x42 */ {0, 0, 0},
/* 0x43 */ {0, 0, 0},
/* 0x44 */ {0, 0, 0},
/* 0x45 */ {0, 0, 0},
/* 0x46 */ {0, 0, 0},
/* 0x47 */ {0, 0, 0},
/* 0x48 */ {0, 0, 0},
/* 0x49 */ {0, 0, 0},
/* 0x4A */ {0, 0, 0},
/* 0x4B */ {0, 0, 0},
/* 0x4C */ {0, 0, 0},
/* 0x4D */ {0, 0, 0},
/* 0x4E */ {0, 0, 0},
/* 0x4F */ {0, 0, 0},
/* 0x50 */ {0, 0, 0},
/* 0x51 */ {0, 0, 0},
/* 0x52 */ {0, 0, 0},
/* 0x53 */ {0, 0, 0},
/* 0x54 */ {0, 0, 0},
/* 0x55 */ {0, 0, 0},
/* 0x56 */ {0, 0, 0},
/* 0x57 */ {0, 0, 0},
/* 0x58 */ {0, 0, 0},
/* 0x59 */ {0, 0, 0},
/* 0x5A */ {0, 0, 0},
/* 0x5B */ {0, 0, 0},
/* 0x5C */ {0, 0, 0},
/* 0x5D */ {0, 0, 0},
/* 0x5E */ {0, 0, 0},
/* 0x5F */ {0, 0, 0},
/* 0x60 */ {0, 0, 0},
/* 0x61 */ {0, 0, 0},
/* 0x62 */ {0, 0, 0},
/* 0x63 */ {0, 0, 0},
/* 0x64 */ {0, 0, 0},
/* 0x65 */ {0, 0, 0},
/* 0x66 */ {0, 0, 0},
/* 0x67 */ {0, 0, 0},
/* 0x68 */ {0, 0, 0},
/* 0x69 */ {0, 0, 0},
/* 0x6A */ {0, 0, 0},
/* 0x6B */ {0, 0, 0},
/* 0x6C */ {0, 0, 0},
/* 0x6D */ {0, 0, 0},
/* 0x6E */ {0, 0, 0},
/* 0x6F */ {0, 0, 0},
/* 0x70 */ {0, 0, 0},
/* 0x71 */ {0, 0, 0},
/* 0x72 */ {0, 0, 0},
/* 0x73 */ {0, 0, 0},
/* 0x74 */ {0, 0, 0},
/* 0x75 */ {0, 0, 0},
/* 0x76 */ {0, 0, 0},
/* 0x77 */ {0, 0, 0},
/* 0x78 */ {0, 0, 0},
/* 0x79 */ {0, 0, 0},
/* 0x7A */ {0, 0, 0},
/* 0x7B */ {0, 0, 0},
/* 0x7C */ {0, 0, 0},
/* 0x7D */ {0, 0, 0},
/* 0x7E */ {0, 0, 0},
/* 0x7F */ {0, 0, 0},
/* 0x80 */ {0, 0, 0},
/* 0x81 */ {0, 0, 0},
/* 0x82 */ {0, 0, 0},
/* 0x83 */ {0, 0, 0},
/* 0x84 */ {0, 0, 0},
/* 0x85 */ {0, 0, 0},
/* 0x86 */ {0, 0, 0},
/* 0x87 */ {0, 0, 0},
/* 0x88 */ {0, 0, 0},
/* 0x89 */ {0, 0, 0},
/* 0x8A */ {0, 0, 0},
/* 0x8B */ {0, 0, 0},
/* 0x8C */ {0, 0, 0},
/* 0x8D */ {0, 0, 0},
/* 0x8E */ {0, 0, 0},
/* 0x8F */ {0, 0, 0},
/* 0x90 */ {0, 0, 0},
/* 0x91 */ {0, 0, 0},
/* 0x92 */ {0, 0, 0},
/* 0x93 */ {0, 0, 0},
/* 0x94 */ {0, 0, 0},
/* 0x95 */ {0, 0, 0},
/* 0x96 */ {0, 0, 0},
/* 0x97 */ {0, 0, 0},
/* 0x98 */ {0, 0, 0},
/* 0x99 */ {0, 0, 0},
/* 0x9A */ {0, 0, 0},
/* 0x9B */ {0, 0, 0},
/* 0x9C */ {0, 0, 0},
/* 0x9D */ {0, 0, 0},
/* 0x9E */ {0, 0, 0},
/* 0x9F */ {0, 0, 0},
/* 0xA0 */ {0, 0, 0},
/* 0xA1 */ {0, 0, 0},
/* 0xA2 */ {0, 0, 0},
/* 0xA3 */ {0, 0, 0},
/* 0xA4 */ {0, 0, 0},
/* 0xA5 */ {0, 0, 0},
/* 0xA6 */ {0, 0, 0},
/* 0xA7 */ {0, 0, 0},
/* 0xA8 */ {0, 0, 0},
/* 0xA9 */ {0, 0, 0},
/* 0xAA */ {0, 0, 0},
/* 0xAB */ {0, 0, 0},
/* 0xAC */ {0, 0, 0},
/* 0xAD */ {0, 0, 0},
/* 0xAE */ {0, 0, 0},
/* 0xAF */ {0, 0, 0},
/* 0xB0 */ {0, 0, 0},
/* 0xB1 */ {0, 0, 0},
/* 0xB2 */ {0, 0, 0},
/* 0xB3 */ {0, 0, 0},
/* 0xB4 */ {0, 0, 0},
/* 0xB5 */ {0, 0, 0},
/* 0xB6 */ {0, 0, 0},
/* 0xB7 */ {0, 0, 0},
/* 0xB8 */ {0, 0, 0},
/* 0xB9 */ {0, 0, 0},
/* 0xBA */ {0, 0, 0},
/* 0xBB */ {0, 0, 0},
/* 0xBC */ {0, 0, 0},
/* 0xBD */ {0, 0, 0},
/* 0xBE */ {0, 0, 0},
/* 0xBF */ {0, 0, 0},
/* 0xC0 */ {0, 0, 0},
/* 0xC1 */ {0, 0, 0},
/* 0xC2 */ {0, 0, 0},
/* 0xC3 */ {0, 0, 0},
/* 0xC4 */ {0, 0, 0},
/* 0xC5 */ {0, 0, 0},
/* 0xC6 */ {0, 0, 0},
/* 0xC7 */ {0, 0, 0},
/* 0xC8 */ {0, 0, 0},
/* 0xC9 */ {0, 0, 0},
/* 0xCA */ {0, 0, 0},
/* 0xCB */ {0, 0, 0},
/* 0xCC */ {0, 0, 0},
/* 0xCD */ {0, 0, 0},
/* 0xCE */ {0, 0, 0},
/* 0xCF */ {0, 0, 0},
/* 0xD0 */ {0, 0, 0},
/* 0xD1 */ {0, 0, 0},
/* 0xD2 */ {0, 0, 0},
/* 0xD3 */ {0, 0, 0},
/* 0xD4 */ {0, 0, 0},
/* 0xD5 */ {0, 0, 0},
/* 0xD6 */ {0, 0, 0},
/* 0xD7 */ {0, 0, 0},
/* 0xD8 */ {0, 0, 0},
/* 0xD9 */ {0, 0, 0},
/* 0xDA */ {0, 0, 0},
/* 0xDB */ {0, 0, 0},
/* 0xDC */ {0, 0, 0},
/* 0xDD */ {0, 0, 0},
/* 0xDE */ {0, 0, 0},
/* 0xDF */ {0, 0, 0},
/* 0xE0 */ {0, 0, 0},
/* 0xE1 */ {0, 0, 0},
/* 0xE2 */ {0, 0, 0},
/* 0xE3 */ {0, 0, 0},
/* 0xE4 */ {0, 0, 0},
/* 0xE5 */ {0, 0, 0},
/* 0xE6 */ {0, 0, 0},
/* 0xE7 */ {0, 0, 0},
/* 0xE8 */ {0, 0, 0},
/* 0xE9 */ {0, 0, 0},
/* 0xEA */ {0, 0, 0},
/* 0xEB */ {0, 0, 0},
/* 0xEC */ {0, 0, 0},
/* 0xED */ {0, 0, 0},
/* 0xEE */ {0, 0, 0},
/* 0xEF */ {0, 0, 0},
/* 0xF0 */ {0, 0, 0},
/* 0xF1 */ {0, 0, 0},
/* 0xF2 */ {0, 0, 0},
/* 0xF3 */ {0, 0, 0},
/* 0xF4 */ {0, 0, 0},
/* 0xF5 */ {0, 0, 0},
/* 0xF6 */ {0, 0, 0},
/* 0xF7 */ {0, 0, 0},
/* 0xF8 */ {0, 0, 0},
/* 0xF9 */ {0, 0, 0},
/* 0xFA */ {0, 0, 0},
/* 0xFB */ {0, 0, 0},
/* 0xFC */ {0, 0, 0},
/* 0xFD */ {0, 0, 0},
/* 0xFE */ {0, 0, 0},
/* 0xFF */ {0, 0, 0},
};
#define JISX0213_ENCPAIRS 46
static struct pair_encodemap jisx0213_pairencmap[JISX0213_ENCPAIRS] = {
{ 0x00e60000, 0x295c },
{ 0x00e60300, 0x2b44 },
{ 0x02540000, 0x2b38 },
{ 0x02540300, 0x2b48 },
{ 0x02540301, 0x2b49 },
{ 0x02590000, 0x2b30 },
{ 0x02590300, 0x2b4c },
{ 0x02590301, 0x2b4d },
{ 0x025a0000, 0x2b43 },
{ 0x025a0300, 0x2b4e },
{ 0x025a0301, 0x2b4f },
{ 0x028c0000, 0x2b37 },
{ 0x028c0300, 0x2b4a },
{ 0x028c0301, 0x2b4b },
{ 0x02e50000, 0x2b60 },
{ 0x02e502e9, 0x2b66 },
{ 0x02e90000, 0x2b64 },
{ 0x02e902e5, 0x2b65 },
{ 0x304b0000, 0x242b },
{ 0x304b309a, 0x2477 },
{ 0x304d0000, 0x242d },
{ 0x304d309a, 0x2478 },
{ 0x304f0000, 0x242f },
{ 0x304f309a, 0x2479 },
{ 0x30510000, 0x2431 },
{ 0x3051309a, 0x247a },
{ 0x30530000, 0x2433 },
{ 0x3053309a, 0x247b },
{ 0x30ab0000, 0x252b },
{ 0x30ab309a, 0x2577 },
{ 0x30ad0000, 0x252d },
{ 0x30ad309a, 0x2578 },
{ 0x30af0000, 0x252f },
{ 0x30af309a, 0x2579 },
{ 0x30b10000, 0x2531 },
{ 0x30b1309a, 0x257a },
{ 0x30b30000, 0x2533 },
{ 0x30b3309a, 0x257b },
{ 0x30bb0000, 0x253b },
{ 0x30bb309a, 0x257c },
{ 0x30c40000, 0x2544 },
{ 0x30c4309a, 0x257d },
{ 0x30c80000, 0x2548 },
{ 0x30c8309a, 0x257e },
{ 0x31f70000, 0x2675 },
{ 0x31f7309a, 0x2678 },
};

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,51 +0,0 @@
/*
* mapdata_ja_JP.c: Map Provider for Japanese Encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: mapdata_ja_JP.c,v 1.3 2004/01/17 11:26:10 perky Exp $
*/
#include "Python.h"
#include "cjkcommon.h"
#include "map_jisx0208.h"
#include "map_jisx0212.h"
#include "map_jisx0213.h"
#include "map_jisxcommon.h"
#include "map_cp932ext.h"
static struct dbcs_map mapholders[] = {
{"jisx0208", NULL, jisx0208_decmap},
{"jisx0212", NULL, jisx0212_decmap},
{"jisxcommon", jisxcommon_encmap, NULL},
{"jisx0213_1_bmp", NULL, jisx0213_1_bmp_decmap},
{"jisx0213_2_bmp", NULL, jisx0213_2_bmp_decmap},
{"jisx0213_bmp", jisx0213_bmp_encmap, NULL},
{"jisx0213_1_emp", NULL, jisx0213_1_emp_decmap},
{"jisx0213_2_emp", NULL, jisx0213_2_emp_decmap},
{"jisx0213_emp", jisx0213_emp_encmap, NULL},
{"cp932ext", cp932ext_encmap, cp932ext_decmap},
{"", NULL, NULL},
};
static struct PyMethodDef __methods[] = {
{NULL, NULL},
};
void
init_codecs_mapdata_ja_JP(void)
{
struct dbcs_map *h;
PyObject *m;
m = Py_InitModule("_codecs_mapdata_ja_JP", __methods);
for (h = mapholders; h->charset[0] != '\0'; h++) {
char mhname[256] = "__map_";
strcpy(mhname + sizeof("__map_") - 1, h->charset);
PyModule_AddObject(m, mhname, PyCObject_FromVoidPtr(h, NULL));
}
if (PyErr_Occurred())
Py_FatalError("can't initialize the _codecs_mapdata_ja_JP module");
}

View File

@ -1,42 +0,0 @@
/*
* mapdata_ko_KR.c: Map Provider for Korean Encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: mapdata_ko_KR.c,v 1.3 2004/01/17 11:26:10 perky Exp $
*/
#include "Python.h"
#include "cjkcommon.h"
#include "map_ksx1001.h"
#include "map_cp949.h"
#include "map_cp949ext.h"
static struct dbcs_map mapholders[] = {
{"ksx1001", NULL, ksx1001_decmap},
{"cp949", cp949_encmap, NULL},
{"cp949ext", NULL, cp949ext_decmap},
{"", NULL, NULL},
};
static struct PyMethodDef __methods[] = {
{NULL, NULL},
};
void
init_codecs_mapdata_ko_KR(void)
{
struct dbcs_map *h;
PyObject *m;
m = Py_InitModule("_codecs_mapdata_ko_KR", __methods);
for (h = mapholders; h->charset[0] != '\0'; h++) {
char mhname[256] = "__map_";
strcpy(mhname + sizeof("__map_") - 1, h->charset);
PyModule_AddObject(m, mhname, PyCObject_FromVoidPtr(h, NULL));
}
if (PyErr_Occurred())
Py_FatalError("can't initialize the _codecs_mapdata_ko_KR module");
}

View File

@ -1,44 +0,0 @@
/*
* mapdata_zh_CN.c: Map Provider for Simplified Chinese Encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: mapdata_zh_CN.c,v 1.3 2004/01/17 11:26:10 perky Exp $
*/
#include "Python.h"
#include "cjkcommon.h"
#include "map_gb2312.h"
#include "map_gbkext.h"
#include "map_gbcommon.h"
#include "map_gb18030ext.h"
static struct dbcs_map mapholders[] = {
{"gb2312", NULL, gb2312_decmap},
{"gbkext", NULL, gbkext_decmap},
{"gbcommon", gbcommon_encmap, NULL},
{"gb18030ext", gb18030ext_encmap, gb18030ext_decmap},
{"", NULL, NULL},
};
static struct PyMethodDef __methods[] = {
{NULL, NULL},
};
void
init_codecs_mapdata_zh_CN(void)
{
struct dbcs_map *h;
PyObject *m;
m = Py_InitModule("_codecs_mapdata_zh_CN", __methods);
for (h = mapholders; h->charset[0] != '\0'; h++) {
char mhname[256] = "__map_";
strcpy(mhname + sizeof("__map_") - 1, h->charset);
PyModule_AddObject(m, mhname, PyCObject_FromVoidPtr(h, NULL));
}
if (PyErr_Occurred())
Py_FatalError("can't initialize the _codecs_mapdata_zh_CN module");
}

View File

@ -1,40 +0,0 @@
/*
* mapdata_zh_TW.c: Map Provider for Traditional Chinese Encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: mapdata_zh_TW.c,v 1.3 2004/01/17 11:26:10 perky Exp $
*/
#include "Python.h"
#include "cjkcommon.h"
#include "map_big5.h"
#include "map_cp950ext.h"
static struct dbcs_map mapholders[] = {
{"big5", big5_encmap, big5_decmap},
{"cp950ext", cp950ext_encmap, cp950ext_decmap},
{"", NULL, NULL},
};
static struct PyMethodDef __methods[] = {
{NULL, NULL},
};
void
init_codecs_mapdata_zh_TW(void)
{
struct dbcs_map *h;
PyObject *m;
m = Py_InitModule("_codecs_mapdata_zh_TW", __methods);
for (h = mapholders; h->charset[0] != '\0'; h++) {
char mhname[256] = "__map_";
strcpy(mhname + sizeof("__map_") - 1, h->charset);
PyModule_AddObject(m, mhname, PyCObject_FromVoidPtr(h, NULL));
}
if (PyErr_Occurred())
Py_FatalError("can't initialize the _codecs_mapdata_zh_TW module");
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More