Issue #12016: Multibyte CJK decoders now resynchronize faster
They only ignore the first byte of an invalid byte sequence. For example, b'\xff\n'.decode('gb2312', 'replace') gives '\ufffd\n' instead of '\ufffd'.
This commit is contained in:
parent
081fe46ff9
commit
2cded9c3f3
|
@ -68,6 +68,29 @@ New, Improved, and Deprecated Modules
|
|||
|
||||
* Stub
|
||||
|
||||
codecs
|
||||
------
|
||||
|
||||
Multibyte CJK decoders now resynchronize faster. They only ignore the first
|
||||
byte of an invalid byte sequence. For example, b'\xff\n'.decode('gb2312',
|
||||
'replace') gives '<27>\n' instead of '<27>'.
|
||||
|
||||
(http://bugs.python.org/issue12016)
|
||||
|
||||
Don't reset incremental encoders of CJK codecs at each call to their encode()
|
||||
method anymore. For example: ::
|
||||
|
||||
$ ./python -q
|
||||
>>> import codecs
|
||||
>>> encoder = codecs.getincrementalencoder('hz')('strict')
|
||||
>>> b''.join(encoder.encode(x) for x in '\u52ff\u65bd\u65bc\u4eba\u3002 Bye.')
|
||||
b'~{NpJ)l6HK!#~} Bye.'
|
||||
|
||||
This example gives b'~{Np~}~{J)~}~{l6~}~{HK~}~{!#~} Bye.' with older Python
|
||||
versions.
|
||||
|
||||
(http://bugs.python.org/issue12100)
|
||||
|
||||
faulthandler
|
||||
------------
|
||||
|
||||
|
|
|
@ -15,8 +15,8 @@ class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
# invalid bytes
|
||||
(b"abc\x81\x81\xc1\xc4", "strict", None),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\u804a"),
|
||||
(b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
|
||||
(b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
|
||||
(b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
|
||||
(b"abc\x81\x81\xc1\xc4", "ignore", "abc\u804a"),
|
||||
(b"\xc1\x64", "strict", None),
|
||||
)
|
||||
|
@ -28,8 +28,8 @@ class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
# invalid bytes
|
||||
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
|
||||
(b"\x83\x34\x83\x31", "strict", None),
|
||||
("\u30fb", "strict", None),
|
||||
|
@ -42,11 +42,14 @@ class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
# invalid bytes
|
||||
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
|
||||
(b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd\u804a"),
|
||||
(b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd9\ufffd9\u804a"),
|
||||
("\u30fb", "strict", b"\x819\xa79"),
|
||||
(b"abc\x84\x32\x80\x80def", "replace", 'abc\ufffd2\ufffd\ufffddef'),
|
||||
(b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'),
|
||||
(b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'),
|
||||
)
|
||||
has_iso10646 = True
|
||||
|
||||
|
@ -74,9 +77,11 @@ class Test_HZ(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
|
||||
'Bye.\n'),
|
||||
# invalid bytes
|
||||
(b'ab~cd', 'replace', 'ab\uFFFDd'),
|
||||
(b'ab~cd', 'replace', 'ab\uFFFDcd'),
|
||||
(b'ab\xffcd', 'replace', 'ab\uFFFDcd'),
|
||||
(b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'),
|
||||
(b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'),
|
||||
(b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"),
|
||||
)
|
||||
|
||||
def test_main():
|
||||
|
|
|
@ -15,8 +15,8 @@ class Test_Big5HKSCS(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
# invalid bytes
|
||||
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u8b10"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u8b10\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"),
|
||||
)
|
||||
|
||||
|
|
|
@ -15,50 +15,57 @@ class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
# invalid bytes
|
||||
(b"abc\x81\x00\x81\x00\x82\x84", "strict", None),
|
||||
(b"abc\xf8", "strict", None),
|
||||
(b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\uff44"),
|
||||
(b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
|
||||
(b"abc\x81\x00\x82\x84", "ignore", "abc\uff44"),
|
||||
(b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\x00\uff44"),
|
||||
(b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\x00\uff44\ufffd"),
|
||||
(b"abc\x81\x00\x82\x84", "ignore", "abc\x00\uff44"),
|
||||
(b"ab\xEBxy", "replace", "ab\uFFFDxy"),
|
||||
(b"ab\xF0\x39xy", "replace", "ab\uFFFD9xy"),
|
||||
(b"ab\xEA\xF0xy", "replace", 'ab\ufffd\ue038y'),
|
||||
# sjis vs cp932
|
||||
(b"\\\x7e", "replace", "\\\x7e"),
|
||||
(b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"),
|
||||
)
|
||||
|
||||
euc_commontests = (
|
||||
# invalid bytes
|
||||
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u7956"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u7956\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x8f\x83\x83", "replace", "abc\ufffd\ufffd\ufffd"),
|
||||
(b"\x82\xFCxy", "replace", "\ufffd\ufffdxy"),
|
||||
(b"\xc1\x64", "strict", None),
|
||||
(b"\xa1\xc0", "strict", "\uff3c"),
|
||||
(b"\xa1\xc0\\", "strict", "\uff3c\\"),
|
||||
(b"\x8eXY", "replace", "\ufffdXY"),
|
||||
)
|
||||
|
||||
class Test_EUC_JIS_2004(test_multibytecodec_support.TestBase,
|
||||
unittest.TestCase):
|
||||
encoding = 'euc_jis_2004'
|
||||
tstring = test_multibytecodec_support.load_teststring('euc_jisx0213')
|
||||
codectests = euc_commontests
|
||||
xmlcharnametest = (
|
||||
"\xab\u211c\xbb = \u2329\u1234\u232a",
|
||||
b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
|
||||
)
|
||||
|
||||
class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
|
||||
unittest.TestCase):
|
||||
encoding = 'euc_jisx0213'
|
||||
tstring = test_multibytecodec_support.load_teststring('euc_jisx0213')
|
||||
codectests = (
|
||||
# invalid bytes
|
||||
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
|
||||
(b"abc\x8f\x83\x83", "replace", "abc\ufffd"),
|
||||
(b"\xc1\x64", "strict", None),
|
||||
(b"\xa1\xc0", "strict", "\uff3c"),
|
||||
)
|
||||
codectests = euc_commontests
|
||||
xmlcharnametest = (
|
||||
"\xab\u211c\xbb = \u2329\u1234\u232a",
|
||||
b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
|
||||
)
|
||||
|
||||
eucjp_commontests = (
|
||||
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
|
||||
(b"abc\x8f\x83\x83", "replace", "abc\ufffd"),
|
||||
(b"\xc1\x64", "strict", None),
|
||||
)
|
||||
|
||||
class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
|
||||
unittest.TestCase):
|
||||
encoding = 'euc_jp'
|
||||
tstring = test_multibytecodec_support.load_teststring('euc_jp')
|
||||
codectests = eucjp_commontests + (
|
||||
(b"\xa1\xc0\\", "strict", "\uff3c\\"),
|
||||
codectests = euc_commontests + (
|
||||
("\xa5", "strict", b"\x5c"),
|
||||
("\u203e", "strict", b"\x7e"),
|
||||
)
|
||||
|
@ -66,8 +73,6 @@ class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
|
|||
shiftjis_commonenctests = (
|
||||
(b"abc\x80\x80\x82\x84", "strict", None),
|
||||
(b"abc\xf8", "strict", None),
|
||||
(b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
|
||||
(b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
|
||||
(b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
|
||||
)
|
||||
|
||||
|
@ -75,20 +80,41 @@ class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
encoding = 'shift_jis'
|
||||
tstring = test_multibytecodec_support.load_teststring('shift_jis')
|
||||
codectests = shiftjis_commonenctests + (
|
||||
(b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
|
||||
(b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
|
||||
|
||||
(b"\\\x7e", "strict", "\\\x7e"),
|
||||
(b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
|
||||
(b"abc\x81\x39", "replace", "abc\ufffd9"),
|
||||
(b"abc\xEA\xFC", "replace", "abc\ufffd\ufffd"),
|
||||
(b"abc\xFF\x58", "replace", "abc\ufffdX"),
|
||||
)
|
||||
|
||||
class Test_SJIS_2004(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||
encoding = 'shift_jis_2004'
|
||||
tstring = test_multibytecodec_support.load_teststring('shift_jis')
|
||||
codectests = shiftjis_commonenctests + (
|
||||
(b"\\\x7e", "strict", "\xa5\u203e"),
|
||||
(b"\x81\x5f\x81\x61\x81\x7c", "strict", "\\\u2016\u2212"),
|
||||
(b"abc\xEA\xFC", "strict", "abc\u64bf"),
|
||||
(b"\x81\x39xy", "replace", "\ufffd9xy"),
|
||||
(b"\xFF\x58xy", "replace", "\ufffdXxy"),
|
||||
(b"\x80\x80\x82\x84xy", "replace", "\ufffd\ufffd\uff44xy"),
|
||||
(b"\x80\x80\x82\x84\x88xy", "replace", "\ufffd\ufffd\uff44\u5864y"),
|
||||
(b"\xFC\xFBxy", "replace", '\ufffd\u95b4y'),
|
||||
)
|
||||
xmlcharnametest = (
|
||||
"\xab\u211c\xbb = \u2329\u1234\u232a",
|
||||
b"\x85Gℜ\x85Q = ⟨ሴ⟩"
|
||||
)
|
||||
|
||||
class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
|
||||
encoding = 'shift_jisx0213'
|
||||
tstring = test_multibytecodec_support.load_teststring('shift_jisx0213')
|
||||
codectests = (
|
||||
# invalid bytes
|
||||
(b"abc\x80\x80\x82\x84", "strict", None),
|
||||
(b"abc\xf8", "strict", None),
|
||||
(b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
|
||||
(b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
|
||||
(b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
|
||||
codectests = shiftjis_commonenctests + (
|
||||
(b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
|
||||
(b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
|
||||
|
||||
# sjis vs cp932
|
||||
(b"\\\x7e", "replace", "\xa5\u203e"),
|
||||
(b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"),
|
||||
|
|
|
@ -15,8 +15,8 @@ class Test_CP949(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
# invalid bytes
|
||||
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\uc894"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\uc894\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"),
|
||||
)
|
||||
|
||||
|
@ -27,8 +27,8 @@ class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
# invalid bytes
|
||||
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", 'abc\ufffd\ufffd\uc894'),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\uc894\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"),
|
||||
|
||||
# composed make-up sequence errors
|
||||
|
@ -40,13 +40,14 @@ class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
(b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4", "strict", None),
|
||||
(b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "strict", "\uc4d4"),
|
||||
(b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4x", "strict", "\uc4d4x"),
|
||||
(b"a\xa4\xd4\xa4\xb6\xa4", "replace", "a\ufffd"),
|
||||
(b"a\xa4\xd4\xa4\xb6\xa4", "replace", 'a\ufffd'),
|
||||
(b"\xa4\xd4\xa3\xb6\xa4\xd0\xa4\xd4", "strict", None),
|
||||
(b"\xa4\xd4\xa4\xb6\xa3\xd0\xa4\xd4", "strict", None),
|
||||
(b"\xa4\xd4\xa4\xb6\xa4\xd0\xa3\xd4", "strict", None),
|
||||
(b"\xa4\xd4\xa4\xff\xa4\xd0\xa4\xd4", "replace", "\ufffd"),
|
||||
(b"\xa4\xd4\xa4\xb6\xa4\xff\xa4\xd4", "replace", "\ufffd"),
|
||||
(b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xff", "replace", "\ufffd"),
|
||||
(b"\xa4\xd4\xa4\xff\xa4\xd0\xa4\xd4", "replace", '\ufffd\u6e21\ufffd\u3160\ufffd'),
|
||||
(b"\xa4\xd4\xa4\xb6\xa4\xff\xa4\xd4", "replace", '\ufffd\u6e21\ub544\ufffd\ufffd'),
|
||||
(b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xff", "replace", '\ufffd\u6e21\ub544\u572d\ufffd'),
|
||||
(b"\xa4\xd4\xff\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "replace", '\ufffd\ufffd\ufffd\uc4d4'),
|
||||
(b"\xc1\xc4", "strict", "\uc894"),
|
||||
)
|
||||
|
||||
|
@ -57,9 +58,13 @@ class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
# invalid bytes
|
||||
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ucd27"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ucd27\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\ucd27"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\ucd27\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\ucd27"),
|
||||
(b"\xD8abc", "replace", "\uFFFDabc"),
|
||||
(b"\xD8\xFFabc", "replace", "\uFFFD\uFFFDabc"),
|
||||
(b"\x84bxy", "replace", "\uFFFDbxy"),
|
||||
(b"\x8CBxy", "replace", "\uFFFDBxy"),
|
||||
)
|
||||
|
||||
def test_main():
|
||||
|
|
|
@ -15,8 +15,8 @@ class Test_Big5(test_multibytecodec_support.TestBase, unittest.TestCase):
|
|||
# invalid bytes
|
||||
(b"abc\x80\x80\xc1\xc4", "strict", None),
|
||||
(b"abc\xc8", "strict", None),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u8b10"),
|
||||
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u8b10\ufffd"),
|
||||
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"),
|
||||
)
|
||||
|
||||
|
|
|
@ -23,6 +23,9 @@ class TestCP950Map(test_multibytecodec_support.TestBase_Mapping,
|
|||
(b'\xa2\xcc', '\u5341'),
|
||||
(b'\xa2\xce', '\u5345'),
|
||||
]
|
||||
codectests = (
|
||||
(b"\xFFxy", "replace", "\ufffdxy"),
|
||||
)
|
||||
|
||||
def test_main():
|
||||
support.run_unittest(__name__)
|
||||
|
|
|
@ -219,6 +219,10 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #12016: Multibyte CJK decoders now resynchronize faster. They only
|
||||
ignore the first byte of an invalid byte sequence. For example,
|
||||
b'\xff\n'.decode('gb2312', 'replace') gives '\ufffd\n' instead of '\ufffd'.
|
||||
|
||||
- Issue #12459: time.sleep() now raises a ValueError if the sleep length is
|
||||
negative, instead of an infinite sleep on Windows or raising an IOError on
|
||||
Linux for example, to have the same behaviour on all platforms.
|
||||
|
|
|
@ -85,7 +85,7 @@ DECODER(gb2312)
|
|||
TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
|
||||
NEXT(2, 1)
|
||||
}
|
||||
else return 2;
|
||||
else return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -141,7 +141,7 @@ DECODER(gbk)
|
|||
REQUIRE_INBUF(2)
|
||||
|
||||
GBK_DECODE(c, IN2, **outbuf)
|
||||
else return 2;
|
||||
else return 1;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
@ -267,7 +267,7 @@ DECODER(gb18030)
|
|||
c3 = IN3;
|
||||
c4 = IN4;
|
||||
if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
|
||||
return 4;
|
||||
return 1;
|
||||
c -= 0x81; c2 -= 0x30;
|
||||
c3 -= 0x81; c4 -= 0x30;
|
||||
|
||||
|
@ -292,12 +292,12 @@ DECODER(gb18030)
|
|||
continue;
|
||||
}
|
||||
}
|
||||
return 4;
|
||||
return 1;
|
||||
}
|
||||
|
||||
GBK_DECODE(c, c2, **outbuf)
|
||||
else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
|
||||
else return 2;
|
||||
else return 1;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
@ -400,7 +400,7 @@ DECODER(hz)
|
|||
else if (c2 == '\n')
|
||||
; /* line-continuation */
|
||||
else
|
||||
return 2;
|
||||
return 1;
|
||||
NEXT(2, 0);
|
||||
continue;
|
||||
}
|
||||
|
@ -419,7 +419,7 @@ DECODER(hz)
|
|||
NEXT(2, 1)
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -161,7 +161,7 @@ DECODER(big5hkscs)
|
|||
case 0x8864: WRITE2(0x00ca, 0x030c); break;
|
||||
case 0x88a3: WRITE2(0x00ea, 0x0304); break;
|
||||
case 0x88a5: WRITE2(0x00ea, 0x030c); break;
|
||||
default: return 2;
|
||||
default: return 1;
|
||||
}
|
||||
|
||||
NEXT(2, 2) /* all decoded codepoints are pairs, above. */
|
||||
|
|
|
@ -112,7 +112,7 @@ DECODER(cp932)
|
|||
TRYMAP_DEC(cp932ext, **outbuf, c, c2);
|
||||
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
|
||||
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
||||
return 2;
|
||||
return 1;
|
||||
|
||||
c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
||||
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
||||
|
@ -120,7 +120,7 @@ DECODER(cp932)
|
|||
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
|
||||
|
||||
TRYMAP_DEC(jisx0208, **outbuf, c, c2);
|
||||
else return 2;
|
||||
else return 1;
|
||||
}
|
||||
else if (c >= 0xf0 && c <= 0xf9) {
|
||||
if ((c2 >= 0x40 && c2 <= 0x7e) ||
|
||||
|
@ -128,10 +128,10 @@ DECODER(cp932)
|
|||
OUT1(0xe000 + 188 * (c - 0xf0) +
|
||||
(c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
|
||||
else
|
||||
return 2;
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
return 1;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
@ -256,7 +256,7 @@ DECODER(euc_jis_2004)
|
|||
NEXT(2, 1)
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
return 1;
|
||||
}
|
||||
else if (c == 0x8f) {
|
||||
unsigned char c2, c3;
|
||||
|
@ -274,7 +274,7 @@ DECODER(euc_jis_2004)
|
|||
continue;
|
||||
}
|
||||
else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
|
||||
else return 3;
|
||||
else return 1;
|
||||
NEXT(3, 1)
|
||||
}
|
||||
else {
|
||||
|
@ -300,7 +300,7 @@ DECODER(euc_jis_2004)
|
|||
NEXT(2, 2)
|
||||
continue;
|
||||
}
|
||||
else return 2;
|
||||
else return 1;
|
||||
NEXT(2, 1)
|
||||
}
|
||||
}
|
||||
|
@ -388,7 +388,7 @@ DECODER(euc_jp)
|
|||
NEXT(2, 1)
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
return 1;
|
||||
}
|
||||
else if (c == 0x8f) {
|
||||
unsigned char c2, c3;
|
||||
|
@ -401,7 +401,7 @@ DECODER(euc_jp)
|
|||
NEXT(3, 1)
|
||||
}
|
||||
else
|
||||
return 3;
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
unsigned char c2;
|
||||
|
@ -417,7 +417,7 @@ DECODER(euc_jp)
|
|||
#endif
|
||||
TRYMAP_DEC(jisx0208, **outbuf,
|
||||
c ^ 0x80, c2 ^ 0x80) ;
|
||||
else return 2;
|
||||
else return 1;
|
||||
NEXT(2, 1)
|
||||
}
|
||||
}
|
||||
|
@ -502,7 +502,7 @@ DECODER(shift_jis)
|
|||
REQUIRE_INBUF(2)
|
||||
c2 = IN2;
|
||||
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
||||
return 2;
|
||||
return 1;
|
||||
|
||||
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
||||
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
||||
|
@ -522,10 +522,10 @@ DECODER(shift_jis)
|
|||
continue;
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
return 1;
|
||||
|
||||
NEXT(1, 1) /* JIS X 0201 */
|
||||
}
|
||||
|
@ -645,7 +645,7 @@ DECODER(shift_jis_2004)
|
|||
REQUIRE_INBUF(2)
|
||||
c2 = IN2;
|
||||
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
||||
return 2;
|
||||
return 1;
|
||||
|
||||
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
||||
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
||||
|
@ -671,7 +671,7 @@ DECODER(shift_jis_2004)
|
|||
NEXT_OUT(2)
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
return 1;
|
||||
NEXT_IN(2)
|
||||
}
|
||||
else { /* Plane 2 */
|
||||
|
@ -689,13 +689,13 @@ DECODER(shift_jis_2004)
|
|||
continue;
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
return 1;
|
||||
NEXT(2, 1)
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
return 1;
|
||||
|
||||
NEXT(1, 1) /* JIS X 0201 */
|
||||
}
|
||||
|
|
|
@ -123,7 +123,7 @@ DECODER(euc_kr)
|
|||
if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE ||
|
||||
(*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE ||
|
||||
(*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE)
|
||||
return 8;
|
||||
return 1;
|
||||
|
||||
c = (*inbuf)[3];
|
||||
if (0xa1 <= c && c <= 0xbe)
|
||||
|
@ -143,7 +143,7 @@ DECODER(euc_kr)
|
|||
jong = NONE;
|
||||
|
||||
if (cho == NONE || jung == NONE || jong == NONE)
|
||||
return 8;
|
||||
return 1;
|
||||
|
||||
OUT1(0xac00 + cho*588 + jung*28 + jong);
|
||||
NEXT(8, 1)
|
||||
|
@ -152,7 +152,7 @@ DECODER(euc_kr)
|
|||
NEXT(2, 1)
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -208,7 +208,7 @@ DECODER(cp949)
|
|||
REQUIRE_INBUF(2)
|
||||
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
|
||||
else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
|
||||
else return 2;
|
||||
else return 1;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
@ -375,7 +375,7 @@ DECODER(johab)
|
|||
i_jong = johabidx_jongseong[c_jong];
|
||||
|
||||
if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
|
||||
return 2;
|
||||
return 1;
|
||||
|
||||
/* we don't use U+1100 hangul jamo yet. */
|
||||
if (i_cho == FILL) {
|
||||
|
@ -391,7 +391,7 @@ DECODER(johab)
|
|||
OUT1(0x3100 |
|
||||
johabjamo_jungseong[c_jung])
|
||||
else
|
||||
return 2;
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
if (i_jung == FILL) {
|
||||
|
@ -399,7 +399,7 @@ DECODER(johab)
|
|||
OUT1(0x3100 |
|
||||
johabjamo_choseong[c_cho])
|
||||
else
|
||||
return 2;
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
OUT1(0xac00 +
|
||||
|
@ -414,7 +414,7 @@ DECODER(johab)
|
|||
c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
|
||||
(c2 & 0x7f) == 0x7f ||
|
||||
(c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
|
||||
return 2;
|
||||
return 1;
|
||||
else {
|
||||
unsigned char t1, t2;
|
||||
|
||||
|
@ -425,7 +425,7 @@ DECODER(johab)
|
|||
t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
|
||||
|
||||
TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
|
||||
else return 2;
|
||||
else return 1;
|
||||
NEXT(2, 1)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -55,7 +55,7 @@ DECODER(big5)
|
|||
TRYMAP_DEC(big5, **outbuf, c, IN2) {
|
||||
NEXT(2, 1)
|
||||
}
|
||||
else return 2;
|
||||
else return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -109,7 +109,7 @@ DECODER(cp950)
|
|||
|
||||
TRYMAP_DEC(cp950ext, **outbuf, c, IN2);
|
||||
else TRYMAP_DEC(big5, **outbuf, c, IN2);
|
||||
else return 2;
|
||||
else return 1;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue