Make lower/upper/title work for non-BMP characters.

This commit is contained in:
Martin v. Löwis 2002-10-18 16:40:36 +00:00
parent ddc369a7d2
commit edf368c351
1 changed files with 15 additions and 24 deletions

View File

@ -62,18 +62,17 @@ int _PyUnicode_IsLinebreak(Py_UNICODE ch)
Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch) Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
{ {
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
int delta;
if (ctype->title) if (ctype->title)
ch += ctype->title; delta = ctype->title;
else else
ch += ctype->upper; delta = ctype->upper;
#ifdef Py_UNICODE_WIDE if (delta >= 32768)
/* The database assumes that the values wrap around at 0x10000. */ delta -= 65536;
if (ch > 0x10000)
ch -= 0x10000; return ch + delta;
#endif
return ch;
} }
/* Returns 1 for Unicode characters having the category 'Lt', 0 /* Returns 1 for Unicode characters having the category 'Lt', 0
@ -358,14 +357,10 @@ int _PyUnicode_IsUppercase(Py_UNICODE ch)
Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch) Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
{ {
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
int delta = ctype->upper;
ch += ctype->upper; if (delta >= 32768)
#ifdef Py_UNICODE_WIDE delta -= 65536;
/* The database assumes that the values wrap around at 0x10000. */ return ch + delta;
if (ch > 0x10000)
ch -= 0x10000;
#endif
return ch;
} }
/* Returns the lowercase Unicode characters corresponding to ch or just /* Returns the lowercase Unicode characters corresponding to ch or just
@ -374,14 +369,10 @@ Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch) Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
{ {
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
int delta = ctype->lower;
ch += ctype->lower; if (delta >= 32768)
#ifdef Py_UNICODE_WIDE delta -= 65536;
/* The database assumes that the values wrap around at 0x10000. */ return ch + delta;
if (ch > 0x10000)
ch -= 0x10000;
#endif
return ch;
} }
/* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt', /* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',