#5127: Even on narrow unicode builds, the C functions that access the Unicode

Database (Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others) now accept
and return characters from the full Unicode range (Py_UCS4).

The differences from Python code are few:
- unicodedata.numeric(), unicodedata.decimal() and unicodedata.digit()
  now return the correct value for large code points
- repr() may consider more characters as printable.
This commit is contained in:
Amaury Forgeot d'Arc 2010-08-18 20:44:58 +00:00
parent 36e778ef02
commit 324ac65ceb
7 changed files with 69 additions and 232 deletions

View File

@ -221,24 +221,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
# define _PyUnicode_Fini _PyUnicodeUCS2_Fini # define _PyUnicode_Fini _PyUnicodeUCS2_Fini
# define _PyUnicode_Init _PyUnicodeUCS2_Init # define _PyUnicode_Init _PyUnicodeUCS2_Init
# define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha
# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit
# define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit
# define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
# define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
# define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
# define _PyUnicode_IsPrintable _PyUnicodeUCS2_IsPrintable
# define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
# define _PyUnicode_IsXidStart _PyUnicodeUCS2_IsXidStart
# define _PyUnicode_IsXidContinue _PyUnicodeUCS2_IsXidContinue
# define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
# define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit
# define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit
# define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase
# define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric
# define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase
# define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase
#else #else
@ -322,24 +304,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
# define _PyUnicode_Fini _PyUnicodeUCS4_Fini # define _PyUnicode_Fini _PyUnicodeUCS4_Fini
# define _PyUnicode_Init _PyUnicodeUCS4_Init # define _PyUnicode_Init _PyUnicodeUCS4_Init
# define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha
# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit
# define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit
# define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
# define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
# define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
# define _PyUnicode_IsPrintable _PyUnicodeUCS4_IsPrintable
# define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
# define _PyUnicode_IsXidStart _PyUnicodeUCS4_IsXidStart
# define _PyUnicode_IsXidContinue _PyUnicodeUCS4_IsXidContinue
# define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
# define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit
# define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit
# define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase
# define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric
# define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase
# define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase
#endif #endif
@ -351,7 +315,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
configure Python using --with-wctype-functions. This reduces the configure Python using --with-wctype-functions. This reduces the
interpreter's code size. */ interpreter's code size. */
#if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS) #if defined(Py_UNICODE_WIDE) && defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
#include <wctype.h> #include <wctype.h>
@ -1542,75 +1506,75 @@ PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
*/ */
PyAPI_FUNC(int) _PyUnicode_IsLowercase( PyAPI_FUNC(int) _PyUnicode_IsLowercase(
Py_UNICODE ch /* Unicode character */ Py_UCS4 ch /* Unicode character */
); );
PyAPI_FUNC(int) _PyUnicode_IsUppercase( PyAPI_FUNC(int) _PyUnicode_IsUppercase(
Py_UNICODE ch /* Unicode character */ Py_UCS4 ch /* Unicode character */
); );
PyAPI_FUNC(int) _PyUnicode_IsTitlecase( PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
Py_UNICODE ch /* Unicode character */ Py_UCS4 ch /* Unicode character */
); );
PyAPI_FUNC(int) _PyUnicode_IsXidStart( PyAPI_FUNC(int) _PyUnicode_IsXidStart(
Py_UNICODE ch /* Unicode character */ Py_UCS4 ch /* Unicode character */
); );
PyAPI_FUNC(int) _PyUnicode_IsXidContinue( PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
Py_UNICODE ch /* Unicode character */ Py_UCS4 ch /* Unicode character */
); );
PyAPI_FUNC(int) _PyUnicode_IsWhitespace( PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
const Py_UNICODE ch /* Unicode character */ const Py_UCS4 ch /* Unicode character */
); );
PyAPI_FUNC(int) _PyUnicode_IsLinebreak( PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
const Py_UNICODE ch /* Unicode character */ const Py_UCS4 ch /* Unicode character */
); );
PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase( PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
Py_UNICODE ch /* Unicode character */ Py_UCS4 ch /* Unicode character */
); );
PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase( PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
Py_UNICODE ch /* Unicode character */ Py_UCS4 ch /* Unicode character */
); );
PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase( PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
Py_UNICODE ch /* Unicode character */ Py_UCS4 ch /* Unicode character */
); );
PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit( PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
Py_UNICODE ch /* Unicode character */ Py_UCS4 ch /* Unicode character */
); );
PyAPI_FUNC(int) _PyUnicode_ToDigit( PyAPI_FUNC(int) _PyUnicode_ToDigit(
Py_UNICODE ch /* Unicode character */ Py_UCS4 ch /* Unicode character */
); );
PyAPI_FUNC(double) _PyUnicode_ToNumeric( PyAPI_FUNC(double) _PyUnicode_ToNumeric(
Py_UNICODE ch /* Unicode character */ Py_UCS4 ch /* Unicode character */
); );
PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit( PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
Py_UNICODE ch /* Unicode character */ Py_UCS4 ch /* Unicode character */
); );
PyAPI_FUNC(int) _PyUnicode_IsDigit( PyAPI_FUNC(int) _PyUnicode_IsDigit(
Py_UNICODE ch /* Unicode character */ Py_UCS4 ch /* Unicode character */
); );
PyAPI_FUNC(int) _PyUnicode_IsNumeric( PyAPI_FUNC(int) _PyUnicode_IsNumeric(
Py_UNICODE ch /* Unicode character */ Py_UCS4 ch /* Unicode character */
); );
PyAPI_FUNC(int) _PyUnicode_IsPrintable( PyAPI_FUNC(int) _PyUnicode_IsPrintable(
Py_UNICODE ch /* Unicode character */ Py_UCS4 ch /* Unicode character */
); );
PyAPI_FUNC(int) _PyUnicode_IsAlpha( PyAPI_FUNC(int) _PyUnicode_IsAlpha(
Py_UNICODE ch /* Unicode character */ Py_UCS4 ch /* Unicode character */
); );
PyAPI_FUNC(size_t) Py_UNICODE_strlen( PyAPI_FUNC(size_t) Py_UNICODE_strlen(

View File

@ -1353,6 +1353,10 @@ class UnicodeTest(string_tests.CommonTest,
self.assertEqual(repr(s1()), '\\n') self.assertEqual(repr(s1()), '\\n')
self.assertEqual(repr(s2()), '\\n') self.assertEqual(repr(s2()), '\\n')
def test_printable_repr(self):
self.assertEqual(repr('\U00010000'), "'%c'" % (0x10000,)) # printable
self.assertEqual(repr('\U00011000'), "'\\U00011000'") # nonprintable
def test_expandtabs_overflows_gracefully(self): def test_expandtabs_overflows_gracefully(self):
# This test only affects 32-bit platforms because expandtabs can only take # This test only affects 32-bit platforms because expandtabs can only take
# an int as the max value, not a 64-bit C long. If expandtabs is changed # an int as the max value, not a 64-bit C long. If expandtabs is changed

View File

@ -294,6 +294,12 @@ class UnicodeMiscTest(UnicodeDatabaseTest):
self.assertEqual(len(lines), 1, self.assertEqual(len(lines), 1,
r"\u%.4x should not be a linebreak" % i) r"\u%.4x should not be a linebreak" % i)
def test_UCS4(self):
# unicodedata should work with code points outside the BMP
# even on a narrow Unicode build
self.assertEqual(self.db.category(u"\U0001012A"), "No")
self.assertEqual(self.db.numeric(u"\U0001012A"), 9000)
def test_main(): def test_main():
test.support.run_unittest( test.support.run_unittest(
UnicodeMiscTest, UnicodeMiscTest,

View File

@ -12,6 +12,12 @@ What's New in Python 3.2 Alpha 2?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #5127: The C functions that access the Unicode Database now accept and
return characters from the full Unicode range, even on narrow unicode builds
(Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others). A visible difference
in Python is that unicodedata.numeric() now returns the correct value for
large code points, and repr() may consider more characters as printable.
- Issue #9425: Create PyModule_GetFilenameObject() function to get the filename - Issue #9425: Create PyModule_GetFilenameObject() function to get the filename
as a unicode object, instead of a byte string. Function needed to support as a unicode object, instead of a byte string. Function needed to support
unencodable filenames. Deprecate PyModule_GetFilename() in favor on the new unencodable filenames. Deprecate PyModule_GetFilename() in favor on the new

View File

@ -26,9 +26,9 @@
#define NUMERIC_MASK 0x1000 #define NUMERIC_MASK 0x1000
typedef struct { typedef struct {
const Py_UNICODE upper; const Py_UCS4 upper;
const Py_UNICODE lower; const Py_UCS4 lower;
const Py_UNICODE title; const Py_UCS4 title;
const unsigned char decimal; const unsigned char decimal;
const unsigned char digit; const unsigned char digit;
const unsigned short flags; const unsigned short flags;
@ -37,15 +37,13 @@ typedef struct {
#include "unicodetype_db.h" #include "unicodetype_db.h"
static const _PyUnicode_TypeRecord * static const _PyUnicode_TypeRecord *
gettyperecord(Py_UNICODE code) gettyperecord(Py_UCS4 code)
{ {
int index; int index;
#ifdef Py_UNICODE_WIDE
if (code >= 0x110000) if (code >= 0x110000)
index = 0; index = 0;
else else
#endif
{ {
index = index1[(code>>SHIFT)]; index = index1[(code>>SHIFT)];
index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))]; index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
@ -57,7 +55,7 @@ gettyperecord(Py_UNICODE code)
/* Returns the titlecase Unicode characters corresponding to ch or just /* Returns the titlecase Unicode characters corresponding to ch or just
ch if no titlecase mapping is known. */ ch if no titlecase mapping is known. */
Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch) Py_UCS4 _PyUnicode_ToTitlecase(register Py_UCS4 ch)
{ {
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
int delta = ctype->title; int delta = ctype->title;
@ -74,7 +72,7 @@ Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
/* Returns 1 for Unicode characters having the category 'Lt', 0 /* Returns 1 for Unicode characters having the category 'Lt', 0
otherwise. */ otherwise. */
int _PyUnicode_IsTitlecase(Py_UNICODE ch) int _PyUnicode_IsTitlecase(Py_UCS4 ch)
{ {
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
@ -84,7 +82,7 @@ int _PyUnicode_IsTitlecase(Py_UNICODE ch)
/* Returns 1 for Unicode characters having the XID_Start property, 0 /* Returns 1 for Unicode characters having the XID_Start property, 0
otherwise. */ otherwise. */
int _PyUnicode_IsXidStart(Py_UNICODE ch) int _PyUnicode_IsXidStart(Py_UCS4 ch)
{ {
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
@ -94,7 +92,7 @@ int _PyUnicode_IsXidStart(Py_UNICODE ch)
/* Returns 1 for Unicode characters having the XID_Continue property, /* Returns 1 for Unicode characters having the XID_Continue property,
0 otherwise. */ 0 otherwise. */
int _PyUnicode_IsXidContinue(Py_UNICODE ch) int _PyUnicode_IsXidContinue(Py_UCS4 ch)
{ {
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
@ -104,14 +102,14 @@ int _PyUnicode_IsXidContinue(Py_UNICODE ch)
/* Returns the integer decimal (0-9) for Unicode characters having /* Returns the integer decimal (0-9) for Unicode characters having
this property, -1 otherwise. */ this property, -1 otherwise. */
int _PyUnicode_ToDecimalDigit(Py_UNICODE ch) int _PyUnicode_ToDecimalDigit(Py_UCS4 ch)
{ {
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1; return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1;
} }
int _PyUnicode_IsDecimalDigit(Py_UNICODE ch) int _PyUnicode_IsDecimalDigit(Py_UCS4 ch)
{ {
if (_PyUnicode_ToDecimalDigit(ch) < 0) if (_PyUnicode_ToDecimalDigit(ch) < 0)
return 0; return 0;
@ -121,14 +119,14 @@ int _PyUnicode_IsDecimalDigit(Py_UNICODE ch)
/* Returns the integer digit (0-9) for Unicode characters having /* Returns the integer digit (0-9) for Unicode characters having
this property, -1 otherwise. */ this property, -1 otherwise. */
int _PyUnicode_ToDigit(Py_UNICODE ch) int _PyUnicode_ToDigit(Py_UCS4 ch)
{ {
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1; return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1;
} }
int _PyUnicode_IsDigit(Py_UNICODE ch) int _PyUnicode_IsDigit(Py_UCS4 ch)
{ {
if (_PyUnicode_ToDigit(ch) < 0) if (_PyUnicode_ToDigit(ch) < 0)
return 0; return 0;
@ -138,7 +136,7 @@ int _PyUnicode_IsDigit(Py_UNICODE ch)
/* Returns the numeric value as double for Unicode characters having /* Returns the numeric value as double for Unicode characters having
this property, -1.0 otherwise. */ this property, -1.0 otherwise. */
int _PyUnicode_IsNumeric(Py_UNICODE ch) int _PyUnicode_IsNumeric(Py_UCS4 ch)
{ {
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
@ -158,7 +156,7 @@ int _PyUnicode_IsNumeric(Py_UNICODE ch)
* Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR) * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)
* Zs (Separator, Space) other than ASCII space('\x20'). * Zs (Separator, Space) other than ASCII space('\x20').
*/ */
int _PyUnicode_IsPrintable(Py_UNICODE ch) int _PyUnicode_IsPrintable(Py_UCS4 ch)
{ {
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
@ -170,7 +168,7 @@ int _PyUnicode_IsPrintable(Py_UNICODE ch)
/* Returns 1 for Unicode characters having the category 'Ll', 0 /* Returns 1 for Unicode characters having the category 'Ll', 0
otherwise. */ otherwise. */
int _PyUnicode_IsLowercase(Py_UNICODE ch) int _PyUnicode_IsLowercase(Py_UCS4 ch)
{ {
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
@ -180,7 +178,7 @@ int _PyUnicode_IsLowercase(Py_UNICODE ch)
/* Returns 1 for Unicode characters having the category 'Lu', 0 /* Returns 1 for Unicode characters having the category 'Lu', 0
otherwise. */ otherwise. */
int _PyUnicode_IsUppercase(Py_UNICODE ch) int _PyUnicode_IsUppercase(Py_UCS4 ch)
{ {
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
@ -190,7 +188,7 @@ int _PyUnicode_IsUppercase(Py_UNICODE ch)
/* Returns the uppercase Unicode characters corresponding to ch or just /* Returns the uppercase Unicode characters corresponding to ch or just
ch if no uppercase mapping is known. */ ch if no uppercase mapping is known. */
Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch) Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch)
{ {
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
int delta = ctype->upper; int delta = ctype->upper;
@ -204,7 +202,7 @@ Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
/* Returns the lowercase Unicode characters corresponding to ch or just /* Returns the lowercase Unicode characters corresponding to ch or just
ch if no lowercase mapping is known. */ ch if no lowercase mapping is known. */
Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch) Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch)
{ {
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
int delta = ctype->lower; int delta = ctype->lower;
@ -218,7 +216,7 @@ Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
/* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt', /* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
'Lo' or 'Lm', 0 otherwise. */ 'Lo' or 'Lm', 0 otherwise. */
int _PyUnicode_IsAlpha(Py_UNICODE ch) int _PyUnicode_IsAlpha(Py_UCS4 ch)
{ {
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
@ -230,27 +228,27 @@ int _PyUnicode_IsAlpha(Py_UNICODE ch)
/* Export the interfaces using the wchar_t type for portability /* Export the interfaces using the wchar_t type for portability
reasons: */ reasons: */
int _PyUnicode_IsLowercase(Py_UNICODE ch) int _PyUnicode_IsLowercase(Py_UCS4 ch)
{ {
return iswlower(ch); return iswlower(ch);
} }
int _PyUnicode_IsUppercase(Py_UNICODE ch) int _PyUnicode_IsUppercase(Py_UCS4 ch)
{ {
return iswupper(ch); return iswupper(ch);
} }
Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch) Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch)
{ {
return towlower(ch); return towlower(ch);
} }
Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch) Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch)
{ {
return towupper(ch); return towupper(ch);
} }
int _PyUnicode_IsAlpha(Py_UNICODE ch) int _PyUnicode_IsAlpha(Py_UCS4 ch)
{ {
return iswalpha(ch); return iswalpha(ch);
} }

View File

@ -1980,7 +1980,7 @@ static unsigned char index2[] = {
/* Returns the numeric value as double for Unicode characters /* Returns the numeric value as double for Unicode characters
* having this property, -1.0 otherwise. * having this property, -1.0 otherwise.
*/ */
double _PyUnicode_ToNumeric(Py_UNICODE ch) double _PyUnicode_ToNumeric(Py_UCS4 ch)
{ {
switch (ch) { switch (ch) {
case 0x0F33: case 0x0F33:
@ -2031,7 +2031,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0xABF0: case 0xABF0:
case 0xF9B2: case 0xF9B2:
case 0xFF10: case 0xFF10:
#ifdef Py_UNICODE_WIDE
case 0x1018A: case 0x1018A:
case 0x104A0: case 0x104A0:
case 0x1D7CE: case 0x1D7CE:
@ -2041,7 +2040,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x1D7F6: case 0x1D7F6:
case 0x1F100: case 0x1F100:
case 0x1F101: case 0x1F101:
#endif
return (double) 0.0; return (double) 0.0;
case 0x0031: case 0x0031:
case 0x00B9: case 0x00B9:
@ -2105,7 +2103,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0xAA51: case 0xAA51:
case 0xABF1: case 0xABF1:
case 0xFF11: case 0xFF11:
#ifdef Py_UNICODE_WIDE
case 0x10107: case 0x10107:
case 0x10142: case 0x10142:
case 0x10158: case 0x10158:
@ -2135,7 +2132,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x1D7F7: case 0x1D7F7:
case 0x1F102: case 0x1F102:
case 0x2092A: case 0x2092A:
#endif
return (double) 1.0; return (double) 1.0;
case 0x2152: case 0x2152:
return (double) 1.0/10.0; return (double) 1.0/10.0;
@ -2147,46 +2143,36 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x0F2A: case 0x0F2A:
case 0x2CFD: case 0x2CFD:
case 0xA831: case 0xA831:
#ifdef Py_UNICODE_WIDE
case 0x10141: case 0x10141:
case 0x10175: case 0x10175:
case 0x10176: case 0x10176:
case 0x10E7B: case 0x10E7B:
#endif
return (double) 1.0/2.0; return (double) 1.0/2.0;
case 0x2153: case 0x2153:
#ifdef Py_UNICODE_WIDE
case 0x10E7D: case 0x10E7D:
case 0x1245A: case 0x1245A:
case 0x1245D: case 0x1245D:
#endif
return (double) 1.0/3.0; return (double) 1.0/3.0;
case 0x00BC: case 0x00BC:
case 0x09F7: case 0x09F7:
case 0x0D73: case 0x0D73:
case 0xA830: case 0xA830:
#ifdef Py_UNICODE_WIDE
case 0x10140: case 0x10140:
case 0x10E7C: case 0x10E7C:
case 0x12460: case 0x12460:
case 0x12462: case 0x12462:
#endif
return (double) 1.0/4.0; return (double) 1.0/4.0;
case 0x2155: case 0x2155:
return (double) 1.0/5.0; return (double) 1.0/5.0;
case 0x2159: case 0x2159:
#ifdef Py_UNICODE_WIDE
case 0x12461: case 0x12461:
#endif
return (double) 1.0/6.0; return (double) 1.0/6.0;
case 0x2150: case 0x2150:
return (double) 1.0/7.0; return (double) 1.0/7.0;
case 0x09F5: case 0x09F5:
case 0x215B: case 0x215B:
case 0xA834: case 0xA834:
#ifdef Py_UNICODE_WIDE
case 0x1245F: case 0x1245F:
#endif
return (double) 1.0/8.0; return (double) 1.0/8.0;
case 0x2151: case 0x2151:
return (double) 1.0/9.0; return (double) 1.0/9.0;
@ -2210,7 +2196,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x62FE: case 0x62FE:
case 0xF973: case 0xF973:
case 0xF9FD: case 0xF9FD:
#ifdef Py_UNICODE_WIDE
case 0x10110: case 0x10110:
case 0x10149: case 0x10149:
case 0x10150: case 0x10150:
@ -2229,7 +2214,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x10B7C: case 0x10B7C:
case 0x10E69: case 0x10E69:
case 0x1D369: case 0x1D369:
#endif
return (double) 10.0; return (double) 10.0;
case 0x0BF1: case 0x0BF1:
case 0x0D71: case 0x0D71:
@ -2239,7 +2223,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x4F70: case 0x4F70:
case 0x767E: case 0x767E:
case 0x964C: case 0x964C:
#ifdef Py_UNICODE_WIDE
case 0x10119: case 0x10119:
case 0x1014B: case 0x1014B:
case 0x10152: case 0x10152:
@ -2251,7 +2234,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x10B5E: case 0x10B5E:
case 0x10B7E: case 0x10B7E:
case 0x10E72: case 0x10E72:
#endif
return (double) 100.0; return (double) 100.0;
case 0x0BF2: case 0x0BF2:
case 0x0D72: case 0x0D72:
@ -2261,7 +2243,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x4EDF: case 0x4EDF:
case 0x5343: case 0x5343:
case 0x9621: case 0x9621:
#ifdef Py_UNICODE_WIDE
case 0x10122: case 0x10122:
case 0x1014D: case 0x1014D:
case 0x10154: case 0x10154:
@ -2270,17 +2251,14 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x10A47: case 0x10A47:
case 0x10B5F: case 0x10B5F:
case 0x10B7F: case 0x10B7F:
#endif
return (double) 1000.0; return (double) 1000.0;
case 0x137C: case 0x137C:
case 0x2182: case 0x2182:
case 0x4E07: case 0x4E07:
case 0x842C: case 0x842C:
#ifdef Py_UNICODE_WIDE
case 0x1012B: case 0x1012B:
case 0x10155: case 0x10155:
case 0x1085F: case 0x1085F:
#endif
return (double) 10000.0; return (double) 10000.0;
case 0x2188: case 0x2188:
return (double) 100000.0; return (double) 100000.0;
@ -2414,7 +2392,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0xABF2: case 0xABF2:
case 0xF978: case 0xF978:
case 0xFF12: case 0xFF12:
#ifdef Py_UNICODE_WIDE
case 0x10108: case 0x10108:
case 0x1015B: case 0x1015B:
case 0x1015C: case 0x1015C:
@ -2445,15 +2422,12 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x1D7F8: case 0x1D7F8:
case 0x1F103: case 0x1F103:
case 0x22390: case 0x22390:
#endif
return (double) 2.0; return (double) 2.0;
case 0x2154: case 0x2154:
#ifdef Py_UNICODE_WIDE
case 0x10177: case 0x10177:
case 0x10E7E: case 0x10E7E:
case 0x1245B: case 0x1245B:
case 0x1245E: case 0x1245E:
#endif
return (double) 2.0/3.0; return (double) 2.0/3.0;
case 0x2156: case 0x2156:
return (double) 2.0/5.0; return (double) 2.0/5.0;
@ -2465,7 +2439,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x3039: case 0x3039:
case 0x5344: case 0x5344:
case 0x5EFF: case 0x5EFF:
#ifdef Py_UNICODE_WIDE
case 0x10111: case 0x10111:
case 0x103D4: case 0x103D4:
case 0x1085C: case 0x1085C:
@ -2475,21 +2448,14 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x10B7D: case 0x10B7D:
case 0x10E6A: case 0x10E6A:
case 0x1D36A: case 0x1D36A:
#endif
return (double) 20.0; return (double) 20.0;
#ifdef Py_UNICODE_WIDE
case 0x1011A: case 0x1011A:
case 0x10E73: case 0x10E73:
return (double) 200.0; return (double) 200.0;
#endif
#ifdef Py_UNICODE_WIDE
case 0x10123: case 0x10123:
return (double) 2000.0; return (double) 2000.0;
#endif
#ifdef Py_UNICODE_WIDE
case 0x1012C: case 0x1012C:
return (double) 20000.0; return (double) 20000.0;
#endif
case 0x3251: case 0x3251:
return (double) 21.0; return (double) 21.0;
case 0x3252: case 0x3252:
@ -2571,7 +2537,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0xABF3: case 0xABF3:
case 0xF96B: case 0xF96B:
case 0xFF13: case 0xFF13:
#ifdef Py_UNICODE_WIDE
case 0x10109: case 0x10109:
case 0x104A3: case 0x104A3:
case 0x1085A: case 0x1085A:
@ -2605,7 +2570,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x20B19: case 0x20B19:
case 0x22998: case 0x22998:
case 0x23B1B: case 0x23B1B:
#endif
return (double) 3.0; return (double) 3.0;
case 0x09F6: case 0x09F6:
case 0xA835: case 0xA835:
@ -2616,9 +2580,7 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x09F8: case 0x09F8:
case 0x0D75: case 0x0D75:
case 0xA832: case 0xA832:
#ifdef Py_UNICODE_WIDE
case 0x10178: case 0x10178:
#endif
return (double) 3.0/4.0; return (double) 3.0/4.0;
case 0x2157: case 0x2157:
return (double) 3.0/5.0; return (double) 3.0/5.0;
@ -2628,28 +2590,20 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x303A: case 0x303A:
case 0x325A: case 0x325A:
case 0x5345: case 0x5345:
#ifdef Py_UNICODE_WIDE
case 0x10112: case 0x10112:
case 0x10165: case 0x10165:
case 0x10E6B: case 0x10E6B:
case 0x1D36B: case 0x1D36B:
case 0x20983: case 0x20983:
#endif
return (double) 30.0; return (double) 30.0;
#ifdef Py_UNICODE_WIDE
case 0x1011B: case 0x1011B:
case 0x1016B: case 0x1016B:
case 0x10E74: case 0x10E74:
return (double) 300.0; return (double) 300.0;
#endif
#ifdef Py_UNICODE_WIDE
case 0x10124: case 0x10124:
return (double) 3000.0; return (double) 3000.0;
#endif
#ifdef Py_UNICODE_WIDE
case 0x1012D: case 0x1012D:
return (double) 30000.0; return (double) 30000.0;
#endif
case 0x325B: case 0x325B:
return (double) 31.0; return (double) 31.0;
case 0x325C: case 0x325C:
@ -2724,7 +2678,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0xAA54: case 0xAA54:
case 0xABF4: case 0xABF4:
case 0xFF14: case 0xFF14:
#ifdef Py_UNICODE_WIDE
case 0x1010A: case 0x1010A:
case 0x104A4: case 0x104A4:
case 0x10A43: case 0x10A43:
@ -2756,34 +2709,25 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x20064: case 0x20064:
case 0x200E2: case 0x200E2:
case 0x2626D: case 0x2626D:
#endif
return (double) 4.0; return (double) 4.0;
case 0x2158: case 0x2158:
return (double) 4.0/5.0; return (double) 4.0/5.0;
case 0x1375: case 0x1375:
case 0x32B5: case 0x32B5:
case 0x534C: case 0x534C:
#ifdef Py_UNICODE_WIDE
case 0x10113: case 0x10113:
case 0x10E6C: case 0x10E6C:
case 0x1D36C: case 0x1D36C:
case 0x2098C: case 0x2098C:
case 0x2099C: case 0x2099C:
#endif
return (double) 40.0; return (double) 40.0;
#ifdef Py_UNICODE_WIDE
case 0x1011C: case 0x1011C:
case 0x10E75: case 0x10E75:
return (double) 400.0; return (double) 400.0;
#endif
#ifdef Py_UNICODE_WIDE
case 0x10125: case 0x10125:
return (double) 4000.0; return (double) 4000.0;
#endif
#ifdef Py_UNICODE_WIDE
case 0x1012E: case 0x1012E:
return (double) 40000.0; return (double) 40000.0;
#endif
case 0x32B6: case 0x32B6:
return (double) 41.0; return (double) 41.0;
case 0x32B7: case 0x32B7:
@ -2858,7 +2802,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0xAA55: case 0xAA55:
case 0xABF5: case 0xABF5:
case 0xFF15: case 0xFF15:
#ifdef Py_UNICODE_WIDE
case 0x1010B: case 0x1010B:
case 0x10143: case 0x10143:
case 0x10148: case 0x10148:
@ -2887,14 +2830,11 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x1D7FB: case 0x1D7FB:
case 0x1F106: case 0x1F106:
case 0x20121: case 0x20121:
#endif
return (double) 5.0; return (double) 5.0;
case 0x0F2C: case 0x0F2C:
return (double) 5.0/2.0; return (double) 5.0/2.0;
case 0x215A: case 0x215A:
#ifdef Py_UNICODE_WIDE
case 0x1245C: case 0x1245C:
#endif
return (double) 5.0/6.0; return (double) 5.0/6.0;
case 0x215D: case 0x215D:
return (double) 5.0/8.0; return (double) 5.0/8.0;
@ -2903,7 +2843,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x217C: case 0x217C:
case 0x2186: case 0x2186:
case 0x32BF: case 0x32BF:
#ifdef Py_UNICODE_WIDE
case 0x10114: case 0x10114:
case 0x10144: case 0x10144:
case 0x1014A: case 0x1014A:
@ -2917,11 +2856,9 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x10A7E: case 0x10A7E:
case 0x10E6D: case 0x10E6D:
case 0x1D36D: case 0x1D36D:
#endif
return (double) 50.0; return (double) 50.0;
case 0x216E: case 0x216E:
case 0x217E: case 0x217E:
#ifdef Py_UNICODE_WIDE
case 0x1011D: case 0x1011D:
case 0x10145: case 0x10145:
case 0x1014C: case 0x1014C:
@ -2932,22 +2869,17 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x1016F: case 0x1016F:
case 0x10170: case 0x10170:
case 0x10E76: case 0x10E76:
#endif
return (double) 500.0; return (double) 500.0;
case 0x2181: case 0x2181:
#ifdef Py_UNICODE_WIDE
case 0x10126: case 0x10126:
case 0x10146: case 0x10146:
case 0x1014E: case 0x1014E:
case 0x10172: case 0x10172:
#endif
return (double) 5000.0; return (double) 5000.0;
case 0x2187: case 0x2187:
#ifdef Py_UNICODE_WIDE
case 0x1012F: case 0x1012F:
case 0x10147: case 0x10147:
case 0x10156: case 0x10156:
#endif
return (double) 50000.0; return (double) 50000.0;
case 0x0036: case 0x0036:
case 0x0666: case 0x0666:
@ -3007,7 +2939,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0xF9D1: case 0xF9D1:
case 0xF9D3: case 0xF9D3:
case 0xFF16: case 0xFF16:
#ifdef Py_UNICODE_WIDE
case 0x1010C: case 0x1010C:
case 0x104A6: case 0x104A6:
case 0x10E65: case 0x10E65:
@ -3026,28 +2957,19 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x1D7FC: case 0x1D7FC:
case 0x1F107: case 0x1F107:
case 0x20AEA: case 0x20AEA:
#endif
return (double) 6.0; return (double) 6.0;
case 0x1377: case 0x1377:
#ifdef Py_UNICODE_WIDE
case 0x10115: case 0x10115:
case 0x10E6E: case 0x10E6E:
case 0x1D36E: case 0x1D36E:
#endif
return (double) 60.0; return (double) 60.0;
#ifdef Py_UNICODE_WIDE
case 0x1011E: case 0x1011E:
case 0x10E77: case 0x10E77:
return (double) 600.0; return (double) 600.0;
#endif
#ifdef Py_UNICODE_WIDE
case 0x10127: case 0x10127:
return (double) 6000.0; return (double) 6000.0;
#endif
#ifdef Py_UNICODE_WIDE
case 0x10130: case 0x10130:
return (double) 60000.0; return (double) 60000.0;
#endif
case 0x0037: case 0x0037:
case 0x0667: case 0x0667:
case 0x06F7: case 0x06F7:
@ -3104,7 +3026,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0xAA57: case 0xAA57:
case 0xABF7: case 0xABF7:
case 0xFF17: case 0xFF17:
#ifdef Py_UNICODE_WIDE
case 0x1010D: case 0x1010D:
case 0x104A7: case 0x104A7:
case 0x10E66: case 0x10E66:
@ -3124,32 +3045,23 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x1D7FD: case 0x1D7FD:
case 0x1F108: case 0x1F108:
case 0x20001: case 0x20001:
#endif
return (double) 7.0; return (double) 7.0;
case 0x0F2D: case 0x0F2D:
return (double) 7.0/2.0; return (double) 7.0/2.0;
case 0x215E: case 0x215E:
return (double) 7.0/8.0; return (double) 7.0/8.0;
case 0x1378: case 0x1378:
#ifdef Py_UNICODE_WIDE
case 0x10116: case 0x10116:
case 0x10E6F: case 0x10E6F:
case 0x1D36F: case 0x1D36F:
#endif
return (double) 70.0; return (double) 70.0;
#ifdef Py_UNICODE_WIDE
case 0x1011F: case 0x1011F:
case 0x10E78: case 0x10E78:
return (double) 700.0; return (double) 700.0;
#endif
#ifdef Py_UNICODE_WIDE
case 0x10128: case 0x10128:
return (double) 7000.0; return (double) 7000.0;
#endif
#ifdef Py_UNICODE_WIDE
case 0x10131: case 0x10131:
return (double) 70000.0; return (double) 70000.0;
#endif
case 0x0038: case 0x0038:
case 0x0668: case 0x0668:
case 0x06F8: case 0x06F8:
@ -3204,7 +3116,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0xAA58: case 0xAA58:
case 0xABF8: case 0xABF8:
case 0xFF18: case 0xFF18:
#ifdef Py_UNICODE_WIDE
case 0x1010E: case 0x1010E:
case 0x104A8: case 0x104A8:
case 0x10E67: case 0x10E67:
@ -3222,28 +3133,19 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x1D7F4: case 0x1D7F4:
case 0x1D7FE: case 0x1D7FE:
case 0x1F109: case 0x1F109:
#endif
return (double) 8.0; return (double) 8.0;
case 0x1379: case 0x1379:
#ifdef Py_UNICODE_WIDE
case 0x10117: case 0x10117:
case 0x10E70: case 0x10E70:
case 0x1D370: case 0x1D370:
#endif
return (double) 80.0; return (double) 80.0;
#ifdef Py_UNICODE_WIDE
case 0x10120: case 0x10120:
case 0x10E79: case 0x10E79:
return (double) 800.0; return (double) 800.0;
#endif
#ifdef Py_UNICODE_WIDE
case 0x10129: case 0x10129:
return (double) 8000.0; return (double) 8000.0;
#endif
#ifdef Py_UNICODE_WIDE
case 0x10132: case 0x10132:
return (double) 80000.0; return (double) 80000.0;
#endif
case 0x0039: case 0x0039:
case 0x0669: case 0x0669:
case 0x06F9: case 0x06F9:
@ -3299,7 +3201,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0xAA59: case 0xAA59:
case 0xABF9: case 0xABF9:
case 0xFF19: case 0xFF19:
#ifdef Py_UNICODE_WIDE
case 0x1010F: case 0x1010F:
case 0x104A9: case 0x104A9:
case 0x10E68: case 0x10E68:
@ -3320,32 +3221,23 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
case 0x1D7FF: case 0x1D7FF:
case 0x1F10A: case 0x1F10A:
case 0x2F890: case 0x2F890:
#endif
return (double) 9.0; return (double) 9.0;
case 0x0F2E: case 0x0F2E:
return (double) 9.0/2.0; return (double) 9.0/2.0;
case 0x137A: case 0x137A:
#ifdef Py_UNICODE_WIDE
case 0x10118: case 0x10118:
case 0x10341: case 0x10341:
case 0x10E71: case 0x10E71:
case 0x1D371: case 0x1D371:
#endif
return (double) 90.0; return (double) 90.0;
#ifdef Py_UNICODE_WIDE
case 0x10121: case 0x10121:
case 0x1034A: case 0x1034A:
case 0x10E7A: case 0x10E7A:
return (double) 900.0; return (double) 900.0;
#endif
#ifdef Py_UNICODE_WIDE
case 0x1012A: case 0x1012A:
return (double) 9000.0; return (double) 9000.0;
#endif
#ifdef Py_UNICODE_WIDE
case 0x10133: case 0x10133:
return (double) 90000.0; return (double) 90000.0;
#endif
} }
return -1.0; return -1.0;
} }
@ -3353,7 +3245,7 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
/* Returns 1 for Unicode characters having the bidirectional /* Returns 1 for Unicode characters having the bidirectional
* type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise. * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise.
*/ */
int _PyUnicode_IsWhitespace(register const Py_UNICODE ch) int _PyUnicode_IsWhitespace(register const Py_UCS4 ch)
{ {
#ifdef WANT_WCTYPE_FUNCTIONS #ifdef WANT_WCTYPE_FUNCTIONS
return iswspace(ch); return iswspace(ch);
@ -3399,7 +3291,7 @@ int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)
* property 'BK', 'CR', 'LF' or 'NL' or having bidirectional * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional
* type 'B', 0 otherwise. * type 'B', 0 otherwise.
*/ */
int _PyUnicode_IsLinebreak(register const Py_UNICODE ch) int _PyUnicode_IsLinebreak(register const Py_UCS4 ch)
{ {
switch (ch) { switch (ch) {
case 0x000A: case 0x000A:

View File

@ -28,7 +28,7 @@
import sys import sys
SCRIPT = sys.argv[0] SCRIPT = sys.argv[0]
VERSION = "2.6" VERSION = "3.2"
# The Unicode Database # The Unicode Database
UNIDATA_VERSION = "5.2.0" UNIDATA_VERSION = "5.2.0"
@ -479,7 +479,7 @@ def makeunicodetype(unicode, trace):
print('/* Returns the numeric value as double for Unicode characters', file=fp) print('/* Returns the numeric value as double for Unicode characters', file=fp)
print(' * having this property, -1.0 otherwise.', file=fp) print(' * having this property, -1.0 otherwise.', file=fp)
print(' */', file=fp) print(' */', file=fp)
print('double _PyUnicode_ToNumeric(Py_UNICODE ch)', file=fp) print('double _PyUnicode_ToNumeric(Py_UCS4 ch)', file=fp)
print('{', file=fp) print('{', file=fp)
print(' switch (ch) {', file=fp) print(' switch (ch) {', file=fp)
for value, codepoints in numeric_items: for value, codepoints in numeric_items:
@ -488,21 +488,10 @@ def makeunicodetype(unicode, trace):
parts = [repr(float(part)) for part in parts] parts = [repr(float(part)) for part in parts]
value = '/'.join(parts) value = '/'.join(parts)
haswide = False
hasnonewide = False
codepoints.sort() codepoints.sort()
for codepoint in codepoints: for codepoint in codepoints:
if codepoint < 0x10000:
hasnonewide = True
if codepoint >= 0x10000 and not haswide:
print('#ifdef Py_UNICODE_WIDE', file=fp)
haswide = True
print(' case 0x%04X:' % (codepoint,), file=fp) print(' case 0x%04X:' % (codepoint,), file=fp)
if haswide and hasnonewide:
print('#endif', file=fp)
print(' return (double) %s;' % (value,), file=fp) print(' return (double) %s;' % (value,), file=fp)
if haswide and not hasnonewide:
print('#endif', file=fp)
print(' }', file=fp) print(' }', file=fp)
print(' return -1.0;', file=fp) print(' return -1.0;', file=fp)
print('}', file=fp) print('}', file=fp)
@ -512,27 +501,16 @@ def makeunicodetype(unicode, trace):
print("/* Returns 1 for Unicode characters having the bidirectional", file=fp) print("/* Returns 1 for Unicode characters having the bidirectional", file=fp)
print(" * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise.", file=fp) print(" * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise.", file=fp)
print(" */", file=fp) print(" */", file=fp)
print('int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)', file=fp) print('int _PyUnicode_IsWhitespace(register const Py_UCS4 ch)', file=fp)
print('{', file=fp) print('{', file=fp)
print('#ifdef WANT_WCTYPE_FUNCTIONS', file=fp) print('#ifdef WANT_WCTYPE_FUNCTIONS', file=fp)
print(' return iswspace(ch);', file=fp) print(' return iswspace(ch);', file=fp)
print('#else', file=fp) print('#else', file=fp)
print(' switch (ch) {', file=fp) print(' switch (ch) {', file=fp)
haswide = False
hasnonewide = False
for codepoint in sorted(spaces): for codepoint in sorted(spaces):
if codepoint < 0x10000:
hasnonewide = True
if codepoint >= 0x10000 and not haswide:
print('#ifdef Py_UNICODE_WIDE', file=fp)
haswide = True
print(' case 0x%04X:' % (codepoint,), file=fp) print(' case 0x%04X:' % (codepoint,), file=fp)
if haswide and hasnonewide:
print('#endif', file=fp)
print(' return 1;', file=fp) print(' return 1;', file=fp)
if haswide and not hasnonewide:
print('#endif', file=fp)
print(' }', file=fp) print(' }', file=fp)
print(' return 0;', file=fp) print(' return 0;', file=fp)
@ -545,23 +523,12 @@ def makeunicodetype(unicode, trace):
print(" * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional", file=fp) print(" * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional", file=fp)
print(" * type 'B', 0 otherwise.", file=fp) print(" * type 'B', 0 otherwise.", file=fp)
print(" */", file=fp) print(" */", file=fp)
print('int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)', file=fp) print('int _PyUnicode_IsLinebreak(register const Py_UCS4 ch)', file=fp)
print('{', file=fp) print('{', file=fp)
print(' switch (ch) {', file=fp) print(' switch (ch) {', file=fp)
haswide = False
hasnonewide = False
for codepoint in sorted(linebreaks): for codepoint in sorted(linebreaks):
if codepoint < 0x10000:
hasnonewide = True
if codepoint >= 0x10000 and not haswide:
print('#ifdef Py_UNICODE_WIDE', file=fp)
haswide = True
print(' case 0x%04X:' % (codepoint,), file=fp) print(' case 0x%04X:' % (codepoint,), file=fp)
if haswide and hasnonewide:
print('#endif', file=fp)
print(' return 1;', file=fp) print(' return 1;', file=fp)
if haswide and not hasnonewide:
print('#endif', file=fp)
print(' }', file=fp) print(' }', file=fp)
print(' return 0;', file=fp) print(' return 0;', file=fp)