cpython/Modules/cjkcodecs/_codecs_hk.c

192 lines
5.0 KiB
C
Raw Permalink Normal View History

/*
* _codecs_hk.c: Codecs collection for encodings from Hong Kong
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
*/
#define USING_IMPORTED_MAPS
#include "cjkcodecs.h"
#include "mappings_hk.h"
/*
* BIG5HKSCS codec
*/
static const encode_map *big5_encmap = NULL;
static const decode_map *big5_decmap = NULL;
CODEC_INIT(big5hkscs)
{
static int initialized = 0;
if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap))
return -1;
initialized = 1;
return 0;
}
Merged revisions 60481,60485,60489-60492,60494-60496,60498-60499,60501-60503,60505-60506,60508-60509,60523-60524,60532,60543,60545,60547-60548,60552,60554,60556-60559,60561-60562,60569,60571-60572,60574,60576-60583,60585-60586,60589,60591,60594-60595,60597-60598,60600-60601,60606-60612,60615,60617-60678 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r60618 | walter.doerwald | 2008-02-06 15:31:55 +0100 (Wed, 06 Feb 2008) | 6 lines Remove month parameter from Calendar.yeardatescalendar(), Calendar.yeardays2calendar() and Calendar.yeardayscalendar() as the methods don't have such a parameter. Fixes issue #2017. Rewrap content to 80 chars. ........ r60622 | facundo.batista | 2008-02-06 20:28:49 +0100 (Wed, 06 Feb 2008) | 4 lines Fixes issue 1959. Converted tests to unittest. Thanks Giampaolo Rodola. ........ r60626 | thomas.heller | 2008-02-06 21:29:17 +0100 (Wed, 06 Feb 2008) | 3 lines Fixed refcounts and error handling. Should not be merged to py3k branch. ........ r60630 | mark.dickinson | 2008-02-06 23:10:50 +0100 (Wed, 06 Feb 2008) | 4 lines Issue 1979: Make Decimal comparisons (other than !=, ==) involving NaN raise InvalidOperation (and return False if InvalidOperation is trapped). ........ r60632 | mark.dickinson | 2008-02-06 23:25:16 +0100 (Wed, 06 Feb 2008) | 2 lines Remove incorrect usage of :const: in documentation. ........ r60634 | georg.brandl | 2008-02-07 00:45:51 +0100 (Thu, 07 Feb 2008) | 2 lines Revert accidental changes to test_queue in r60605. ........ r60636 | raymond.hettinger | 2008-02-07 01:54:20 +0100 (Thu, 07 Feb 2008) | 1 line Issue 2025: Add tuple.count() and tuple.index() to follow the ABC in collections.Sequence. ........ r60637 | mark.dickinson | 2008-02-07 02:14:23 +0100 (Thu, 07 Feb 2008) | 2 lines Fix broken link in decimal documentation. ........ r60638 | mark.dickinson | 2008-02-07 02:42:06 +0100 (Thu, 07 Feb 2008) | 3 lines IEEE 754 should be IEEE 854; give precise reference for comparisons involving NaNs. ........ r60639 | raymond.hettinger | 2008-02-07 03:12:52 +0100 (Thu, 07 Feb 2008) | 1 line Return ints instead of longs for tuple.count() and tuple.index(). ........ r60640 | raymond.hettinger | 2008-02-07 04:10:33 +0100 (Thu, 07 Feb 2008) | 1 line Merge 60627. ........ r60641 | raymond.hettinger | 2008-02-07 04:25:46 +0100 (Thu, 07 Feb 2008) | 1 line Merge r60628, r60631, and r60633. Register UserList and UserString will the appropriate ABCs. ........ r60642 | brett.cannon | 2008-02-07 08:47:31 +0100 (Thu, 07 Feb 2008) | 3 lines Cast a struct to a void pointer so as to do a type-safe pointer comparison (mistmatch found by clang). ........ r60643 | brett.cannon | 2008-02-07 09:04:07 +0100 (Thu, 07 Feb 2008) | 2 lines Remove unnecessary curly braces around an int literal. ........ r60644 | andrew.kuchling | 2008-02-07 12:43:47 +0100 (Thu, 07 Feb 2008) | 1 line Update URL ........ r60645 | facundo.batista | 2008-02-07 17:16:29 +0100 (Thu, 07 Feb 2008) | 4 lines Fixes issue 2026. Tests converted to unittest. Thanks Giampaolo Rodola. ........ r60646 | christian.heimes | 2008-02-07 18:15:30 +0100 (Thu, 07 Feb 2008) | 1 line Added some statistics code to dict and list object code. I wanted to test how a larger freelist affects the reusage of freed objects. Contrary to my gut feelings 80 objects is more than fine for small apps. I haven't profiled a large app yet. ........ r60648 | facundo.batista | 2008-02-07 20:06:52 +0100 (Thu, 07 Feb 2008) | 6 lines Fixes Issue 1401. When redirected, a possible POST get converted to GET, so it loses its payload. So, it also must lose the headers related to the payload (if it has no content any more, it shouldn't indicate content length and type). ........ r60649 | walter.doerwald | 2008-02-07 20:30:22 +0100 (Thu, 07 Feb 2008) | 3 lines Clarify that the output of TextCalendar.formatmonth() and TextCalendar.formatyear() for custom instances won't be influenced by calls to the module global setfirstweekday() function. Fixes #2018. ........ r60651 | walter.doerwald | 2008-02-07 20:48:34 +0100 (Thu, 07 Feb 2008) | 3 lines Fix documentation for Calendar.iterweekdays(): firstweekday is a property. Fixes second part of #2018. ........ r60653 | walter.doerwald | 2008-02-07 20:57:32 +0100 (Thu, 07 Feb 2008) | 2 lines Fix typo in docstring for Calendar.itermonthdays(). ........ r60655 | raymond.hettinger | 2008-02-07 21:04:37 +0100 (Thu, 07 Feb 2008) | 1 line The float conversion recipe is simpler in Py2.6 ........ r60657 | raymond.hettinger | 2008-02-07 21:10:49 +0100 (Thu, 07 Feb 2008) | 1 line Fix typo ........ r60660 | brett.cannon | 2008-02-07 23:27:10 +0100 (Thu, 07 Feb 2008) | 3 lines Make sure a switch statement does not have repetitive case statements. Error found through LLVM post-2.1 svn. ........ r60661 | christian.heimes | 2008-02-08 01:11:31 +0100 (Fri, 08 Feb 2008) | 1 line Deallocate content of the dict free list on interpreter shutdown ........ r60662 | christian.heimes | 2008-02-08 01:14:34 +0100 (Fri, 08 Feb 2008) | 1 line Use prefix decrement ........ r60663 | amaury.forgeotdarc | 2008-02-08 01:56:02 +0100 (Fri, 08 Feb 2008) | 5 lines issue 2045: Infinite recursion when printing a subclass of defaultdict, if default_factory is set to a bound method. Will backport. ........ r60667 | jeffrey.yasskin | 2008-02-08 07:45:40 +0100 (Fri, 08 Feb 2008) | 2 lines Oops! 2.6's Rational.__ne__ didn't work. ........ r60671 | hyeshik.chang | 2008-02-08 18:10:20 +0100 (Fri, 08 Feb 2008) | 2 lines Update big5hkscs codec to conform to the HKSCS:2004 revision. ........ r60673 | raymond.hettinger | 2008-02-08 23:30:04 +0100 (Fri, 08 Feb 2008) | 4 lines Remove unnecessary modulo division. The preceding test guarantees that 0 <= i < len. ........ r60674 | raymond.hettinger | 2008-02-09 00:02:27 +0100 (Sat, 09 Feb 2008) | 1 line Speed-up __iter__() mixin method. ........ r60675 | raymond.hettinger | 2008-02-09 00:34:21 +0100 (Sat, 09 Feb 2008) | 1 line Fill-in missing Set comparisons ........ r60677 | raymond.hettinger | 2008-02-09 00:57:06 +0100 (Sat, 09 Feb 2008) | 1 line Add advice on choosing between DictMixin and MutableMapping ........
2008-02-08 22:18:51 -04:00
/*
* There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004:
* U+00CA U+0304 -> 8862 (U+00CA alone is mapped to 8866)
* U+00CA U+030C -> 8864
* U+00EA U+0304 -> 88a3 (U+00EA alone is mapped to 88a7)
* U+00EA U+030C -> 88a5
* These are handled by not mapping tables but a hand-written code.
*/
static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5};
ENCODER(big5hkscs)
{
while (*inpos < inlen) {
Py_UCS4 c = INCHAR1;
DBCHAR code;
Py_ssize_t insize;
if (c < 0x80) {
REQUIRE_OUTBUF(1);
**outbuf = (unsigned char)c;
NEXT(1, 1);
continue;
}
insize = 1;
REQUIRE_OUTBUF(2);
if (c < 0x10000) {
if (TRYMAP_ENC(big5hkscs_bmp, code, c)) {
if (code == MULTIC) {
Py_UCS4 c2;
if (inlen - *inpos >= 2)
c2 = INCHAR2;
else
c2 = 0;
if (inlen - *inpos >= 2 &&
((c & 0xffdf) == 0x00ca) &&
((c2 & 0xfff7) == 0x0304)) {
code = big5hkscs_pairenc_table[
((c >> 4) |
(c2 >> 3)) & 3];
insize = 2;
}
else if (inlen - *inpos < 2 &&
!(flags & MBENC_FLUSH))
return MBERR_TOOFEW;
else {
if (c == 0xca)
code = 0x8866;
else /* c == 0xea */
code = 0x88a7;
}
}
}
else if (TRYMAP_ENC(big5, code, c))
;
else
return 1;
}
else if (c < 0x20000)
return insize;
else if (c < 0x30000) {
if (TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff))
;
else
return insize;
}
else
return insize;
OUTBYTE1(code >> 8);
OUTBYTE2(code & 0xFF);
NEXT(insize, 2);
}
return 0;
}
Merged revisions 60481,60485,60489-60492,60494-60496,60498-60499,60501-60503,60505-60506,60508-60509,60523-60524,60532,60543,60545,60547-60548,60552,60554,60556-60559,60561-60562,60569,60571-60572,60574,60576-60583,60585-60586,60589,60591,60594-60595,60597-60598,60600-60601,60606-60612,60615,60617-60678 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r60618 | walter.doerwald | 2008-02-06 15:31:55 +0100 (Wed, 06 Feb 2008) | 6 lines Remove month parameter from Calendar.yeardatescalendar(), Calendar.yeardays2calendar() and Calendar.yeardayscalendar() as the methods don't have such a parameter. Fixes issue #2017. Rewrap content to 80 chars. ........ r60622 | facundo.batista | 2008-02-06 20:28:49 +0100 (Wed, 06 Feb 2008) | 4 lines Fixes issue 1959. Converted tests to unittest. Thanks Giampaolo Rodola. ........ r60626 | thomas.heller | 2008-02-06 21:29:17 +0100 (Wed, 06 Feb 2008) | 3 lines Fixed refcounts and error handling. Should not be merged to py3k branch. ........ r60630 | mark.dickinson | 2008-02-06 23:10:50 +0100 (Wed, 06 Feb 2008) | 4 lines Issue 1979: Make Decimal comparisons (other than !=, ==) involving NaN raise InvalidOperation (and return False if InvalidOperation is trapped). ........ r60632 | mark.dickinson | 2008-02-06 23:25:16 +0100 (Wed, 06 Feb 2008) | 2 lines Remove incorrect usage of :const: in documentation. ........ r60634 | georg.brandl | 2008-02-07 00:45:51 +0100 (Thu, 07 Feb 2008) | 2 lines Revert accidental changes to test_queue in r60605. ........ r60636 | raymond.hettinger | 2008-02-07 01:54:20 +0100 (Thu, 07 Feb 2008) | 1 line Issue 2025: Add tuple.count() and tuple.index() to follow the ABC in collections.Sequence. ........ r60637 | mark.dickinson | 2008-02-07 02:14:23 +0100 (Thu, 07 Feb 2008) | 2 lines Fix broken link in decimal documentation. ........ r60638 | mark.dickinson | 2008-02-07 02:42:06 +0100 (Thu, 07 Feb 2008) | 3 lines IEEE 754 should be IEEE 854; give precise reference for comparisons involving NaNs. ........ r60639 | raymond.hettinger | 2008-02-07 03:12:52 +0100 (Thu, 07 Feb 2008) | 1 line Return ints instead of longs for tuple.count() and tuple.index(). ........ r60640 | raymond.hettinger | 2008-02-07 04:10:33 +0100 (Thu, 07 Feb 2008) | 1 line Merge 60627. ........ r60641 | raymond.hettinger | 2008-02-07 04:25:46 +0100 (Thu, 07 Feb 2008) | 1 line Merge r60628, r60631, and r60633. Register UserList and UserString will the appropriate ABCs. ........ r60642 | brett.cannon | 2008-02-07 08:47:31 +0100 (Thu, 07 Feb 2008) | 3 lines Cast a struct to a void pointer so as to do a type-safe pointer comparison (mistmatch found by clang). ........ r60643 | brett.cannon | 2008-02-07 09:04:07 +0100 (Thu, 07 Feb 2008) | 2 lines Remove unnecessary curly braces around an int literal. ........ r60644 | andrew.kuchling | 2008-02-07 12:43:47 +0100 (Thu, 07 Feb 2008) | 1 line Update URL ........ r60645 | facundo.batista | 2008-02-07 17:16:29 +0100 (Thu, 07 Feb 2008) | 4 lines Fixes issue 2026. Tests converted to unittest. Thanks Giampaolo Rodola. ........ r60646 | christian.heimes | 2008-02-07 18:15:30 +0100 (Thu, 07 Feb 2008) | 1 line Added some statistics code to dict and list object code. I wanted to test how a larger freelist affects the reusage of freed objects. Contrary to my gut feelings 80 objects is more than fine for small apps. I haven't profiled a large app yet. ........ r60648 | facundo.batista | 2008-02-07 20:06:52 +0100 (Thu, 07 Feb 2008) | 6 lines Fixes Issue 1401. When redirected, a possible POST get converted to GET, so it loses its payload. So, it also must lose the headers related to the payload (if it has no content any more, it shouldn't indicate content length and type). ........ r60649 | walter.doerwald | 2008-02-07 20:30:22 +0100 (Thu, 07 Feb 2008) | 3 lines Clarify that the output of TextCalendar.formatmonth() and TextCalendar.formatyear() for custom instances won't be influenced by calls to the module global setfirstweekday() function. Fixes #2018. ........ r60651 | walter.doerwald | 2008-02-07 20:48:34 +0100 (Thu, 07 Feb 2008) | 3 lines Fix documentation for Calendar.iterweekdays(): firstweekday is a property. Fixes second part of #2018. ........ r60653 | walter.doerwald | 2008-02-07 20:57:32 +0100 (Thu, 07 Feb 2008) | 2 lines Fix typo in docstring for Calendar.itermonthdays(). ........ r60655 | raymond.hettinger | 2008-02-07 21:04:37 +0100 (Thu, 07 Feb 2008) | 1 line The float conversion recipe is simpler in Py2.6 ........ r60657 | raymond.hettinger | 2008-02-07 21:10:49 +0100 (Thu, 07 Feb 2008) | 1 line Fix typo ........ r60660 | brett.cannon | 2008-02-07 23:27:10 +0100 (Thu, 07 Feb 2008) | 3 lines Make sure a switch statement does not have repetitive case statements. Error found through LLVM post-2.1 svn. ........ r60661 | christian.heimes | 2008-02-08 01:11:31 +0100 (Fri, 08 Feb 2008) | 1 line Deallocate content of the dict free list on interpreter shutdown ........ r60662 | christian.heimes | 2008-02-08 01:14:34 +0100 (Fri, 08 Feb 2008) | 1 line Use prefix decrement ........ r60663 | amaury.forgeotdarc | 2008-02-08 01:56:02 +0100 (Fri, 08 Feb 2008) | 5 lines issue 2045: Infinite recursion when printing a subclass of defaultdict, if default_factory is set to a bound method. Will backport. ........ r60667 | jeffrey.yasskin | 2008-02-08 07:45:40 +0100 (Fri, 08 Feb 2008) | 2 lines Oops! 2.6's Rational.__ne__ didn't work. ........ r60671 | hyeshik.chang | 2008-02-08 18:10:20 +0100 (Fri, 08 Feb 2008) | 2 lines Update big5hkscs codec to conform to the HKSCS:2004 revision. ........ r60673 | raymond.hettinger | 2008-02-08 23:30:04 +0100 (Fri, 08 Feb 2008) | 4 lines Remove unnecessary modulo division. The preceding test guarantees that 0 <= i < len. ........ r60674 | raymond.hettinger | 2008-02-09 00:02:27 +0100 (Sat, 09 Feb 2008) | 1 line Speed-up __iter__() mixin method. ........ r60675 | raymond.hettinger | 2008-02-09 00:34:21 +0100 (Sat, 09 Feb 2008) | 1 line Fill-in missing Set comparisons ........ r60677 | raymond.hettinger | 2008-02-09 00:57:06 +0100 (Sat, 09 Feb 2008) | 1 line Add advice on choosing between DictMixin and MutableMapping ........
2008-02-08 22:18:51 -04:00
#define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40))
DECODER(big5hkscs)
{
while (inleft > 0) {
unsigned char c = INBYTE1;
Py_UCS4 decoded;
if (c < 0x80) {
OUTCHAR(c);
NEXT_IN(1);
continue;
}
REQUIRE_INBUF(2);
if (0xc6 > c || c > 0xc8 || (c < 0xc7 && INBYTE2 < 0xa1)) {
if (TRYMAP_DEC(big5, decoded, c, INBYTE2)) {
OUTCHAR(decoded);
NEXT_IN(2);
continue;
}
}
if (TRYMAP_DEC(big5hkscs, decoded, c, INBYTE2))
{
int s = BH2S(c, INBYTE2);
const unsigned char *hintbase;
assert(0x87 <= c && c <= 0xfe);
assert(0x40 <= INBYTE2 && INBYTE2 <= 0xfe);
if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
hintbase = big5hkscs_phint_0;
s -= BH2S(0x87, 0x40);
}
else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
hintbase = big5hkscs_phint_12130;
s -= BH2S(0xc6, 0xa1);
}
else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
hintbase = big5hkscs_phint_21924;
s -= BH2S(0xf9, 0xd6);
}
else
return MBERR_INTERNAL;
if (hintbase[s >> 3] & (1 << (s & 7))) {
OUTCHAR(decoded | 0x20000);
NEXT_IN(2);
}
else {
OUTCHAR(decoded);
NEXT_IN(2);
}
continue;
}
switch ((c << 8) | INBYTE2) {
case 0x8862: OUTCHAR2(0x00ca, 0x0304); break;
case 0x8864: OUTCHAR2(0x00ca, 0x030c); break;
case 0x88a3: OUTCHAR2(0x00ea, 0x0304); break;
case 0x88a5: OUTCHAR2(0x00ea, 0x030c); break;
default: return 1;
}
NEXT_IN(2); /* all decoded code points are pairs, above. */
}
return 0;
}
BEGIN_MAPPINGS_LIST
MAPPING_DECONLY(big5hkscs)
MAPPING_ENCONLY(big5hkscs_bmp)
MAPPING_ENCONLY(big5hkscs_nonbmp)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS_WINIT(big5hkscs)
END_CODECS_LIST
I_AM_A_MODULE_FOR(hk)