Verify that the code in CJK UNIFIED IDEOGRAPH- actually denotes an ideograph.

This commit is contained in:
Martin v. Löwis 2002-11-23 22:10:29 +00:00
parent 677bde2dd1
commit 8d93ca1383
1 changed files with 12 additions and 3 deletions

View File

@ -580,6 +580,15 @@ static char *hangul_syllables[][3] = {
{ 0, 0, "H" }
};
static int
is_unified_ideograph(Py_UCS4 code)
{
return (
(0x3400 <= code && code <= 0x4DB5) || /* CJK Ideograph Extension A */
(0x4E00 <= code && code <= 0x9FA5) || /* CJK Ideograph */
(0x20000 <= code && code <= 0x2A6D6));/* CJK Ideograph Extension B */
}
static int
_getucname(Py_UCS4 code, char* buffer, int buflen)
{
@ -610,9 +619,7 @@ _getucname(Py_UCS4 code, char* buffer, int buflen)
return 1;
}
if ((0x3400 <= code && code <= 0x4DB5) || /* CJK Ideograph Extension A */
(0x4E00 <= code && code <= 0x9FA5) || /* CJK Ideograph */
(0x20000 <= code && code <= 0x2A6D6)) {/* CJK Ideograph Extension B */
if (is_unified_ideograph(code)) {
if (buflen < 28)
/* Worst case: CJK UNIFIED IDEOGRAPH-20000 */
return 0;
@ -743,6 +750,8 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
return 0;
name++;
}
if (!is_unified_ideograph(v))
return 0;
*code = v;
return 1;
}