mirror of https://github.com/python/cpython
#5127: Even on narrow unicode builds, the C functions that access the Unicode
Database (Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others) now accept and return characters from the full Unicode range (Py_UCS4). The differences from Python code are few: - unicodedata.numeric(), unicodedata.decimal() and unicodedata.digit() now return the correct value for large code points - repr() may consider more characters as printable.
This commit is contained in:
parent
36e778ef02
commit
324ac65ceb
|
@ -221,24 +221,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
||||||
# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
|
# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
|
||||||
# define _PyUnicode_Fini _PyUnicodeUCS2_Fini
|
# define _PyUnicode_Fini _PyUnicodeUCS2_Fini
|
||||||
# define _PyUnicode_Init _PyUnicodeUCS2_Init
|
# define _PyUnicode_Init _PyUnicodeUCS2_Init
|
||||||
# define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha
|
|
||||||
# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit
|
|
||||||
# define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit
|
|
||||||
# define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
|
|
||||||
# define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
|
|
||||||
# define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
|
|
||||||
# define _PyUnicode_IsPrintable _PyUnicodeUCS2_IsPrintable
|
|
||||||
# define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
|
|
||||||
# define _PyUnicode_IsXidStart _PyUnicodeUCS2_IsXidStart
|
|
||||||
# define _PyUnicode_IsXidContinue _PyUnicodeUCS2_IsXidContinue
|
|
||||||
# define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
|
|
||||||
# define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
|
|
||||||
# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit
|
|
||||||
# define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit
|
|
||||||
# define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase
|
|
||||||
# define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric
|
|
||||||
# define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase
|
|
||||||
# define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
@ -322,24 +304,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
||||||
# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
|
# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
|
||||||
# define _PyUnicode_Fini _PyUnicodeUCS4_Fini
|
# define _PyUnicode_Fini _PyUnicodeUCS4_Fini
|
||||||
# define _PyUnicode_Init _PyUnicodeUCS4_Init
|
# define _PyUnicode_Init _PyUnicodeUCS4_Init
|
||||||
# define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha
|
|
||||||
# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit
|
|
||||||
# define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit
|
|
||||||
# define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
|
|
||||||
# define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
|
|
||||||
# define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
|
|
||||||
# define _PyUnicode_IsPrintable _PyUnicodeUCS4_IsPrintable
|
|
||||||
# define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
|
|
||||||
# define _PyUnicode_IsXidStart _PyUnicodeUCS4_IsXidStart
|
|
||||||
# define _PyUnicode_IsXidContinue _PyUnicodeUCS4_IsXidContinue
|
|
||||||
# define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
|
|
||||||
# define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
|
|
||||||
# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit
|
|
||||||
# define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit
|
|
||||||
# define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase
|
|
||||||
# define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric
|
|
||||||
# define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase
|
|
||||||
# define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -351,7 +315,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
||||||
configure Python using --with-wctype-functions. This reduces the
|
configure Python using --with-wctype-functions. This reduces the
|
||||||
interpreter's code size. */
|
interpreter's code size. */
|
||||||
|
|
||||||
#if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
|
#if defined(Py_UNICODE_WIDE) && defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
|
||||||
|
|
||||||
#include <wctype.h>
|
#include <wctype.h>
|
||||||
|
|
||||||
|
@ -1542,75 +1506,75 @@ PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PyAPI_FUNC(int) _PyUnicode_IsLowercase(
|
PyAPI_FUNC(int) _PyUnicode_IsLowercase(
|
||||||
Py_UNICODE ch /* Unicode character */
|
Py_UCS4 ch /* Unicode character */
|
||||||
);
|
);
|
||||||
|
|
||||||
PyAPI_FUNC(int) _PyUnicode_IsUppercase(
|
PyAPI_FUNC(int) _PyUnicode_IsUppercase(
|
||||||
Py_UNICODE ch /* Unicode character */
|
Py_UCS4 ch /* Unicode character */
|
||||||
);
|
);
|
||||||
|
|
||||||
PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
|
PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
|
||||||
Py_UNICODE ch /* Unicode character */
|
Py_UCS4 ch /* Unicode character */
|
||||||
);
|
);
|
||||||
|
|
||||||
PyAPI_FUNC(int) _PyUnicode_IsXidStart(
|
PyAPI_FUNC(int) _PyUnicode_IsXidStart(
|
||||||
Py_UNICODE ch /* Unicode character */
|
Py_UCS4 ch /* Unicode character */
|
||||||
);
|
);
|
||||||
|
|
||||||
PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
|
PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
|
||||||
Py_UNICODE ch /* Unicode character */
|
Py_UCS4 ch /* Unicode character */
|
||||||
);
|
);
|
||||||
|
|
||||||
PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
|
PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
|
||||||
const Py_UNICODE ch /* Unicode character */
|
const Py_UCS4 ch /* Unicode character */
|
||||||
);
|
);
|
||||||
|
|
||||||
PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
|
PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
|
||||||
const Py_UNICODE ch /* Unicode character */
|
const Py_UCS4 ch /* Unicode character */
|
||||||
);
|
);
|
||||||
|
|
||||||
PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase(
|
PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
|
||||||
Py_UNICODE ch /* Unicode character */
|
Py_UCS4 ch /* Unicode character */
|
||||||
);
|
);
|
||||||
|
|
||||||
PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase(
|
PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
|
||||||
Py_UNICODE ch /* Unicode character */
|
Py_UCS4 ch /* Unicode character */
|
||||||
);
|
);
|
||||||
|
|
||||||
PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase(
|
PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
|
||||||
Py_UNICODE ch /* Unicode character */
|
Py_UCS4 ch /* Unicode character */
|
||||||
);
|
);
|
||||||
|
|
||||||
PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
|
PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
|
||||||
Py_UNICODE ch /* Unicode character */
|
Py_UCS4 ch /* Unicode character */
|
||||||
);
|
);
|
||||||
|
|
||||||
PyAPI_FUNC(int) _PyUnicode_ToDigit(
|
PyAPI_FUNC(int) _PyUnicode_ToDigit(
|
||||||
Py_UNICODE ch /* Unicode character */
|
Py_UCS4 ch /* Unicode character */
|
||||||
);
|
);
|
||||||
|
|
||||||
PyAPI_FUNC(double) _PyUnicode_ToNumeric(
|
PyAPI_FUNC(double) _PyUnicode_ToNumeric(
|
||||||
Py_UNICODE ch /* Unicode character */
|
Py_UCS4 ch /* Unicode character */
|
||||||
);
|
);
|
||||||
|
|
||||||
PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
|
PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
|
||||||
Py_UNICODE ch /* Unicode character */
|
Py_UCS4 ch /* Unicode character */
|
||||||
);
|
);
|
||||||
|
|
||||||
PyAPI_FUNC(int) _PyUnicode_IsDigit(
|
PyAPI_FUNC(int) _PyUnicode_IsDigit(
|
||||||
Py_UNICODE ch /* Unicode character */
|
Py_UCS4 ch /* Unicode character */
|
||||||
);
|
);
|
||||||
|
|
||||||
PyAPI_FUNC(int) _PyUnicode_IsNumeric(
|
PyAPI_FUNC(int) _PyUnicode_IsNumeric(
|
||||||
Py_UNICODE ch /* Unicode character */
|
Py_UCS4 ch /* Unicode character */
|
||||||
);
|
);
|
||||||
|
|
||||||
PyAPI_FUNC(int) _PyUnicode_IsPrintable(
|
PyAPI_FUNC(int) _PyUnicode_IsPrintable(
|
||||||
Py_UNICODE ch /* Unicode character */
|
Py_UCS4 ch /* Unicode character */
|
||||||
);
|
);
|
||||||
|
|
||||||
PyAPI_FUNC(int) _PyUnicode_IsAlpha(
|
PyAPI_FUNC(int) _PyUnicode_IsAlpha(
|
||||||
Py_UNICODE ch /* Unicode character */
|
Py_UCS4 ch /* Unicode character */
|
||||||
);
|
);
|
||||||
|
|
||||||
PyAPI_FUNC(size_t) Py_UNICODE_strlen(
|
PyAPI_FUNC(size_t) Py_UNICODE_strlen(
|
||||||
|
|
|
@ -1353,6 +1353,10 @@ class UnicodeTest(string_tests.CommonTest,
|
||||||
self.assertEqual(repr(s1()), '\\n')
|
self.assertEqual(repr(s1()), '\\n')
|
||||||
self.assertEqual(repr(s2()), '\\n')
|
self.assertEqual(repr(s2()), '\\n')
|
||||||
|
|
||||||
|
def test_printable_repr(self):
|
||||||
|
self.assertEqual(repr('\U00010000'), "'%c'" % (0x10000,)) # printable
|
||||||
|
self.assertEqual(repr('\U00011000'), "'\\U00011000'") # nonprintable
|
||||||
|
|
||||||
def test_expandtabs_overflows_gracefully(self):
|
def test_expandtabs_overflows_gracefully(self):
|
||||||
# This test only affects 32-bit platforms because expandtabs can only take
|
# This test only affects 32-bit platforms because expandtabs can only take
|
||||||
# an int as the max value, not a 64-bit C long. If expandtabs is changed
|
# an int as the max value, not a 64-bit C long. If expandtabs is changed
|
||||||
|
|
|
@ -294,6 +294,12 @@ class UnicodeMiscTest(UnicodeDatabaseTest):
|
||||||
self.assertEqual(len(lines), 1,
|
self.assertEqual(len(lines), 1,
|
||||||
r"\u%.4x should not be a linebreak" % i)
|
r"\u%.4x should not be a linebreak" % i)
|
||||||
|
|
||||||
|
def test_UCS4(self):
|
||||||
|
# unicodedata should work with code points outside the BMP
|
||||||
|
# even on a narrow Unicode build
|
||||||
|
self.assertEqual(self.db.category(u"\U0001012A"), "No")
|
||||||
|
self.assertEqual(self.db.numeric(u"\U0001012A"), 9000)
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
test.support.run_unittest(
|
test.support.run_unittest(
|
||||||
UnicodeMiscTest,
|
UnicodeMiscTest,
|
||||||
|
|
|
@ -12,6 +12,12 @@ What's New in Python 3.2 Alpha 2?
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #5127: The C functions that access the Unicode Database now accept and
|
||||||
|
return characters from the full Unicode range, even on narrow unicode builds
|
||||||
|
(Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others). A visible difference
|
||||||
|
in Python is that unicodedata.numeric() now returns the correct value for
|
||||||
|
large code points, and repr() may consider more characters as printable.
|
||||||
|
|
||||||
- Issue #9425: Create PyModule_GetFilenameObject() function to get the filename
|
- Issue #9425: Create PyModule_GetFilenameObject() function to get the filename
|
||||||
as a unicode object, instead of a byte string. Function needed to support
|
as a unicode object, instead of a byte string. Function needed to support
|
||||||
unencodable filenames. Deprecate PyModule_GetFilename() in favor on the new
|
unencodable filenames. Deprecate PyModule_GetFilename() in favor on the new
|
||||||
|
|
|
@ -26,9 +26,9 @@
|
||||||
#define NUMERIC_MASK 0x1000
|
#define NUMERIC_MASK 0x1000
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const Py_UNICODE upper;
|
const Py_UCS4 upper;
|
||||||
const Py_UNICODE lower;
|
const Py_UCS4 lower;
|
||||||
const Py_UNICODE title;
|
const Py_UCS4 title;
|
||||||
const unsigned char decimal;
|
const unsigned char decimal;
|
||||||
const unsigned char digit;
|
const unsigned char digit;
|
||||||
const unsigned short flags;
|
const unsigned short flags;
|
||||||
|
@ -37,15 +37,13 @@ typedef struct {
|
||||||
#include "unicodetype_db.h"
|
#include "unicodetype_db.h"
|
||||||
|
|
||||||
static const _PyUnicode_TypeRecord *
|
static const _PyUnicode_TypeRecord *
|
||||||
gettyperecord(Py_UNICODE code)
|
gettyperecord(Py_UCS4 code)
|
||||||
{
|
{
|
||||||
int index;
|
int index;
|
||||||
|
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
if (code >= 0x110000)
|
if (code >= 0x110000)
|
||||||
index = 0;
|
index = 0;
|
||||||
else
|
else
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
index = index1[(code>>SHIFT)];
|
index = index1[(code>>SHIFT)];
|
||||||
index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
|
index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
|
||||||
|
@ -57,7 +55,7 @@ gettyperecord(Py_UNICODE code)
|
||||||
/* Returns the titlecase Unicode characters corresponding to ch or just
|
/* Returns the titlecase Unicode characters corresponding to ch or just
|
||||||
ch if no titlecase mapping is known. */
|
ch if no titlecase mapping is known. */
|
||||||
|
|
||||||
Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
|
Py_UCS4 _PyUnicode_ToTitlecase(register Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
||||||
int delta = ctype->title;
|
int delta = ctype->title;
|
||||||
|
@ -74,7 +72,7 @@ Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
|
||||||
/* Returns 1 for Unicode characters having the category 'Lt', 0
|
/* Returns 1 for Unicode characters having the category 'Lt', 0
|
||||||
otherwise. */
|
otherwise. */
|
||||||
|
|
||||||
int _PyUnicode_IsTitlecase(Py_UNICODE ch)
|
int _PyUnicode_IsTitlecase(Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
||||||
|
|
||||||
|
@ -84,7 +82,7 @@ int _PyUnicode_IsTitlecase(Py_UNICODE ch)
|
||||||
/* Returns 1 for Unicode characters having the XID_Start property, 0
|
/* Returns 1 for Unicode characters having the XID_Start property, 0
|
||||||
otherwise. */
|
otherwise. */
|
||||||
|
|
||||||
int _PyUnicode_IsXidStart(Py_UNICODE ch)
|
int _PyUnicode_IsXidStart(Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
||||||
|
|
||||||
|
@ -94,7 +92,7 @@ int _PyUnicode_IsXidStart(Py_UNICODE ch)
|
||||||
/* Returns 1 for Unicode characters having the XID_Continue property,
|
/* Returns 1 for Unicode characters having the XID_Continue property,
|
||||||
0 otherwise. */
|
0 otherwise. */
|
||||||
|
|
||||||
int _PyUnicode_IsXidContinue(Py_UNICODE ch)
|
int _PyUnicode_IsXidContinue(Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
||||||
|
|
||||||
|
@ -104,14 +102,14 @@ int _PyUnicode_IsXidContinue(Py_UNICODE ch)
|
||||||
/* Returns the integer decimal (0-9) for Unicode characters having
|
/* Returns the integer decimal (0-9) for Unicode characters having
|
||||||
this property, -1 otherwise. */
|
this property, -1 otherwise. */
|
||||||
|
|
||||||
int _PyUnicode_ToDecimalDigit(Py_UNICODE ch)
|
int _PyUnicode_ToDecimalDigit(Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
||||||
|
|
||||||
return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1;
|
return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int _PyUnicode_IsDecimalDigit(Py_UNICODE ch)
|
int _PyUnicode_IsDecimalDigit(Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
if (_PyUnicode_ToDecimalDigit(ch) < 0)
|
if (_PyUnicode_ToDecimalDigit(ch) < 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -121,14 +119,14 @@ int _PyUnicode_IsDecimalDigit(Py_UNICODE ch)
|
||||||
/* Returns the integer digit (0-9) for Unicode characters having
|
/* Returns the integer digit (0-9) for Unicode characters having
|
||||||
this property, -1 otherwise. */
|
this property, -1 otherwise. */
|
||||||
|
|
||||||
int _PyUnicode_ToDigit(Py_UNICODE ch)
|
int _PyUnicode_ToDigit(Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
||||||
|
|
||||||
return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1;
|
return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int _PyUnicode_IsDigit(Py_UNICODE ch)
|
int _PyUnicode_IsDigit(Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
if (_PyUnicode_ToDigit(ch) < 0)
|
if (_PyUnicode_ToDigit(ch) < 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -138,7 +136,7 @@ int _PyUnicode_IsDigit(Py_UNICODE ch)
|
||||||
/* Returns the numeric value as double for Unicode characters having
|
/* Returns the numeric value as double for Unicode characters having
|
||||||
this property, -1.0 otherwise. */
|
this property, -1.0 otherwise. */
|
||||||
|
|
||||||
int _PyUnicode_IsNumeric(Py_UNICODE ch)
|
int _PyUnicode_IsNumeric(Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
||||||
|
|
||||||
|
@ -158,7 +156,7 @@ int _PyUnicode_IsNumeric(Py_UNICODE ch)
|
||||||
* Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)
|
* Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)
|
||||||
* Zs (Separator, Space) other than ASCII space('\x20').
|
* Zs (Separator, Space) other than ASCII space('\x20').
|
||||||
*/
|
*/
|
||||||
int _PyUnicode_IsPrintable(Py_UNICODE ch)
|
int _PyUnicode_IsPrintable(Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
||||||
|
|
||||||
|
@ -170,7 +168,7 @@ int _PyUnicode_IsPrintable(Py_UNICODE ch)
|
||||||
/* Returns 1 for Unicode characters having the category 'Ll', 0
|
/* Returns 1 for Unicode characters having the category 'Ll', 0
|
||||||
otherwise. */
|
otherwise. */
|
||||||
|
|
||||||
int _PyUnicode_IsLowercase(Py_UNICODE ch)
|
int _PyUnicode_IsLowercase(Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
||||||
|
|
||||||
|
@ -180,7 +178,7 @@ int _PyUnicode_IsLowercase(Py_UNICODE ch)
|
||||||
/* Returns 1 for Unicode characters having the category 'Lu', 0
|
/* Returns 1 for Unicode characters having the category 'Lu', 0
|
||||||
otherwise. */
|
otherwise. */
|
||||||
|
|
||||||
int _PyUnicode_IsUppercase(Py_UNICODE ch)
|
int _PyUnicode_IsUppercase(Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
||||||
|
|
||||||
|
@ -190,7 +188,7 @@ int _PyUnicode_IsUppercase(Py_UNICODE ch)
|
||||||
/* Returns the uppercase Unicode characters corresponding to ch or just
|
/* Returns the uppercase Unicode characters corresponding to ch or just
|
||||||
ch if no uppercase mapping is known. */
|
ch if no uppercase mapping is known. */
|
||||||
|
|
||||||
Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
|
Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
||||||
int delta = ctype->upper;
|
int delta = ctype->upper;
|
||||||
|
@ -204,7 +202,7 @@ Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
|
||||||
/* Returns the lowercase Unicode characters corresponding to ch or just
|
/* Returns the lowercase Unicode characters corresponding to ch or just
|
||||||
ch if no lowercase mapping is known. */
|
ch if no lowercase mapping is known. */
|
||||||
|
|
||||||
Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
|
Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
||||||
int delta = ctype->lower;
|
int delta = ctype->lower;
|
||||||
|
@ -218,7 +216,7 @@ Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
|
||||||
/* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
|
/* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
|
||||||
'Lo' or 'Lm', 0 otherwise. */
|
'Lo' or 'Lm', 0 otherwise. */
|
||||||
|
|
||||||
int _PyUnicode_IsAlpha(Py_UNICODE ch)
|
int _PyUnicode_IsAlpha(Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
||||||
|
|
||||||
|
@ -230,27 +228,27 @@ int _PyUnicode_IsAlpha(Py_UNICODE ch)
|
||||||
/* Export the interfaces using the wchar_t type for portability
|
/* Export the interfaces using the wchar_t type for portability
|
||||||
reasons: */
|
reasons: */
|
||||||
|
|
||||||
int _PyUnicode_IsLowercase(Py_UNICODE ch)
|
int _PyUnicode_IsLowercase(Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
return iswlower(ch);
|
return iswlower(ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
int _PyUnicode_IsUppercase(Py_UNICODE ch)
|
int _PyUnicode_IsUppercase(Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
return iswupper(ch);
|
return iswupper(ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
|
Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
return towlower(ch);
|
return towlower(ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
|
Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
return towupper(ch);
|
return towupper(ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
int _PyUnicode_IsAlpha(Py_UNICODE ch)
|
int _PyUnicode_IsAlpha(Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
return iswalpha(ch);
|
return iswalpha(ch);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1980,7 +1980,7 @@ static unsigned char index2[] = {
|
||||||
/* Returns the numeric value as double for Unicode characters
|
/* Returns the numeric value as double for Unicode characters
|
||||||
* having this property, -1.0 otherwise.
|
* having this property, -1.0 otherwise.
|
||||||
*/
|
*/
|
||||||
double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
double _PyUnicode_ToNumeric(Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
switch (ch) {
|
switch (ch) {
|
||||||
case 0x0F33:
|
case 0x0F33:
|
||||||
|
@ -2031,7 +2031,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0xABF0:
|
case 0xABF0:
|
||||||
case 0xF9B2:
|
case 0xF9B2:
|
||||||
case 0xFF10:
|
case 0xFF10:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1018A:
|
case 0x1018A:
|
||||||
case 0x104A0:
|
case 0x104A0:
|
||||||
case 0x1D7CE:
|
case 0x1D7CE:
|
||||||
|
@ -2041,7 +2040,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x1D7F6:
|
case 0x1D7F6:
|
||||||
case 0x1F100:
|
case 0x1F100:
|
||||||
case 0x1F101:
|
case 0x1F101:
|
||||||
#endif
|
|
||||||
return (double) 0.0;
|
return (double) 0.0;
|
||||||
case 0x0031:
|
case 0x0031:
|
||||||
case 0x00B9:
|
case 0x00B9:
|
||||||
|
@ -2105,7 +2103,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0xAA51:
|
case 0xAA51:
|
||||||
case 0xABF1:
|
case 0xABF1:
|
||||||
case 0xFF11:
|
case 0xFF11:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10107:
|
case 0x10107:
|
||||||
case 0x10142:
|
case 0x10142:
|
||||||
case 0x10158:
|
case 0x10158:
|
||||||
|
@ -2135,7 +2132,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x1D7F7:
|
case 0x1D7F7:
|
||||||
case 0x1F102:
|
case 0x1F102:
|
||||||
case 0x2092A:
|
case 0x2092A:
|
||||||
#endif
|
|
||||||
return (double) 1.0;
|
return (double) 1.0;
|
||||||
case 0x2152:
|
case 0x2152:
|
||||||
return (double) 1.0/10.0;
|
return (double) 1.0/10.0;
|
||||||
|
@ -2147,46 +2143,36 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x0F2A:
|
case 0x0F2A:
|
||||||
case 0x2CFD:
|
case 0x2CFD:
|
||||||
case 0xA831:
|
case 0xA831:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10141:
|
case 0x10141:
|
||||||
case 0x10175:
|
case 0x10175:
|
||||||
case 0x10176:
|
case 0x10176:
|
||||||
case 0x10E7B:
|
case 0x10E7B:
|
||||||
#endif
|
|
||||||
return (double) 1.0/2.0;
|
return (double) 1.0/2.0;
|
||||||
case 0x2153:
|
case 0x2153:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10E7D:
|
case 0x10E7D:
|
||||||
case 0x1245A:
|
case 0x1245A:
|
||||||
case 0x1245D:
|
case 0x1245D:
|
||||||
#endif
|
|
||||||
return (double) 1.0/3.0;
|
return (double) 1.0/3.0;
|
||||||
case 0x00BC:
|
case 0x00BC:
|
||||||
case 0x09F7:
|
case 0x09F7:
|
||||||
case 0x0D73:
|
case 0x0D73:
|
||||||
case 0xA830:
|
case 0xA830:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10140:
|
case 0x10140:
|
||||||
case 0x10E7C:
|
case 0x10E7C:
|
||||||
case 0x12460:
|
case 0x12460:
|
||||||
case 0x12462:
|
case 0x12462:
|
||||||
#endif
|
|
||||||
return (double) 1.0/4.0;
|
return (double) 1.0/4.0;
|
||||||
case 0x2155:
|
case 0x2155:
|
||||||
return (double) 1.0/5.0;
|
return (double) 1.0/5.0;
|
||||||
case 0x2159:
|
case 0x2159:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x12461:
|
case 0x12461:
|
||||||
#endif
|
|
||||||
return (double) 1.0/6.0;
|
return (double) 1.0/6.0;
|
||||||
case 0x2150:
|
case 0x2150:
|
||||||
return (double) 1.0/7.0;
|
return (double) 1.0/7.0;
|
||||||
case 0x09F5:
|
case 0x09F5:
|
||||||
case 0x215B:
|
case 0x215B:
|
||||||
case 0xA834:
|
case 0xA834:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1245F:
|
case 0x1245F:
|
||||||
#endif
|
|
||||||
return (double) 1.0/8.0;
|
return (double) 1.0/8.0;
|
||||||
case 0x2151:
|
case 0x2151:
|
||||||
return (double) 1.0/9.0;
|
return (double) 1.0/9.0;
|
||||||
|
@ -2210,7 +2196,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x62FE:
|
case 0x62FE:
|
||||||
case 0xF973:
|
case 0xF973:
|
||||||
case 0xF9FD:
|
case 0xF9FD:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10110:
|
case 0x10110:
|
||||||
case 0x10149:
|
case 0x10149:
|
||||||
case 0x10150:
|
case 0x10150:
|
||||||
|
@ -2229,7 +2214,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x10B7C:
|
case 0x10B7C:
|
||||||
case 0x10E69:
|
case 0x10E69:
|
||||||
case 0x1D369:
|
case 0x1D369:
|
||||||
#endif
|
|
||||||
return (double) 10.0;
|
return (double) 10.0;
|
||||||
case 0x0BF1:
|
case 0x0BF1:
|
||||||
case 0x0D71:
|
case 0x0D71:
|
||||||
|
@ -2239,7 +2223,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x4F70:
|
case 0x4F70:
|
||||||
case 0x767E:
|
case 0x767E:
|
||||||
case 0x964C:
|
case 0x964C:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10119:
|
case 0x10119:
|
||||||
case 0x1014B:
|
case 0x1014B:
|
||||||
case 0x10152:
|
case 0x10152:
|
||||||
|
@ -2251,7 +2234,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x10B5E:
|
case 0x10B5E:
|
||||||
case 0x10B7E:
|
case 0x10B7E:
|
||||||
case 0x10E72:
|
case 0x10E72:
|
||||||
#endif
|
|
||||||
return (double) 100.0;
|
return (double) 100.0;
|
||||||
case 0x0BF2:
|
case 0x0BF2:
|
||||||
case 0x0D72:
|
case 0x0D72:
|
||||||
|
@ -2261,7 +2243,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x4EDF:
|
case 0x4EDF:
|
||||||
case 0x5343:
|
case 0x5343:
|
||||||
case 0x9621:
|
case 0x9621:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10122:
|
case 0x10122:
|
||||||
case 0x1014D:
|
case 0x1014D:
|
||||||
case 0x10154:
|
case 0x10154:
|
||||||
|
@ -2270,17 +2251,14 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x10A47:
|
case 0x10A47:
|
||||||
case 0x10B5F:
|
case 0x10B5F:
|
||||||
case 0x10B7F:
|
case 0x10B7F:
|
||||||
#endif
|
|
||||||
return (double) 1000.0;
|
return (double) 1000.0;
|
||||||
case 0x137C:
|
case 0x137C:
|
||||||
case 0x2182:
|
case 0x2182:
|
||||||
case 0x4E07:
|
case 0x4E07:
|
||||||
case 0x842C:
|
case 0x842C:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1012B:
|
case 0x1012B:
|
||||||
case 0x10155:
|
case 0x10155:
|
||||||
case 0x1085F:
|
case 0x1085F:
|
||||||
#endif
|
|
||||||
return (double) 10000.0;
|
return (double) 10000.0;
|
||||||
case 0x2188:
|
case 0x2188:
|
||||||
return (double) 100000.0;
|
return (double) 100000.0;
|
||||||
|
@ -2414,7 +2392,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0xABF2:
|
case 0xABF2:
|
||||||
case 0xF978:
|
case 0xF978:
|
||||||
case 0xFF12:
|
case 0xFF12:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10108:
|
case 0x10108:
|
||||||
case 0x1015B:
|
case 0x1015B:
|
||||||
case 0x1015C:
|
case 0x1015C:
|
||||||
|
@ -2445,15 +2422,12 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x1D7F8:
|
case 0x1D7F8:
|
||||||
case 0x1F103:
|
case 0x1F103:
|
||||||
case 0x22390:
|
case 0x22390:
|
||||||
#endif
|
|
||||||
return (double) 2.0;
|
return (double) 2.0;
|
||||||
case 0x2154:
|
case 0x2154:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10177:
|
case 0x10177:
|
||||||
case 0x10E7E:
|
case 0x10E7E:
|
||||||
case 0x1245B:
|
case 0x1245B:
|
||||||
case 0x1245E:
|
case 0x1245E:
|
||||||
#endif
|
|
||||||
return (double) 2.0/3.0;
|
return (double) 2.0/3.0;
|
||||||
case 0x2156:
|
case 0x2156:
|
||||||
return (double) 2.0/5.0;
|
return (double) 2.0/5.0;
|
||||||
|
@ -2465,7 +2439,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x3039:
|
case 0x3039:
|
||||||
case 0x5344:
|
case 0x5344:
|
||||||
case 0x5EFF:
|
case 0x5EFF:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10111:
|
case 0x10111:
|
||||||
case 0x103D4:
|
case 0x103D4:
|
||||||
case 0x1085C:
|
case 0x1085C:
|
||||||
|
@ -2475,21 +2448,14 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x10B7D:
|
case 0x10B7D:
|
||||||
case 0x10E6A:
|
case 0x10E6A:
|
||||||
case 0x1D36A:
|
case 0x1D36A:
|
||||||
#endif
|
|
||||||
return (double) 20.0;
|
return (double) 20.0;
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1011A:
|
case 0x1011A:
|
||||||
case 0x10E73:
|
case 0x10E73:
|
||||||
return (double) 200.0;
|
return (double) 200.0;
|
||||||
#endif
|
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10123:
|
case 0x10123:
|
||||||
return (double) 2000.0;
|
return (double) 2000.0;
|
||||||
#endif
|
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1012C:
|
case 0x1012C:
|
||||||
return (double) 20000.0;
|
return (double) 20000.0;
|
||||||
#endif
|
|
||||||
case 0x3251:
|
case 0x3251:
|
||||||
return (double) 21.0;
|
return (double) 21.0;
|
||||||
case 0x3252:
|
case 0x3252:
|
||||||
|
@ -2571,7 +2537,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0xABF3:
|
case 0xABF3:
|
||||||
case 0xF96B:
|
case 0xF96B:
|
||||||
case 0xFF13:
|
case 0xFF13:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10109:
|
case 0x10109:
|
||||||
case 0x104A3:
|
case 0x104A3:
|
||||||
case 0x1085A:
|
case 0x1085A:
|
||||||
|
@ -2605,7 +2570,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x20B19:
|
case 0x20B19:
|
||||||
case 0x22998:
|
case 0x22998:
|
||||||
case 0x23B1B:
|
case 0x23B1B:
|
||||||
#endif
|
|
||||||
return (double) 3.0;
|
return (double) 3.0;
|
||||||
case 0x09F6:
|
case 0x09F6:
|
||||||
case 0xA835:
|
case 0xA835:
|
||||||
|
@ -2616,9 +2580,7 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x09F8:
|
case 0x09F8:
|
||||||
case 0x0D75:
|
case 0x0D75:
|
||||||
case 0xA832:
|
case 0xA832:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10178:
|
case 0x10178:
|
||||||
#endif
|
|
||||||
return (double) 3.0/4.0;
|
return (double) 3.0/4.0;
|
||||||
case 0x2157:
|
case 0x2157:
|
||||||
return (double) 3.0/5.0;
|
return (double) 3.0/5.0;
|
||||||
|
@ -2628,28 +2590,20 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x303A:
|
case 0x303A:
|
||||||
case 0x325A:
|
case 0x325A:
|
||||||
case 0x5345:
|
case 0x5345:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10112:
|
case 0x10112:
|
||||||
case 0x10165:
|
case 0x10165:
|
||||||
case 0x10E6B:
|
case 0x10E6B:
|
||||||
case 0x1D36B:
|
case 0x1D36B:
|
||||||
case 0x20983:
|
case 0x20983:
|
||||||
#endif
|
|
||||||
return (double) 30.0;
|
return (double) 30.0;
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1011B:
|
case 0x1011B:
|
||||||
case 0x1016B:
|
case 0x1016B:
|
||||||
case 0x10E74:
|
case 0x10E74:
|
||||||
return (double) 300.0;
|
return (double) 300.0;
|
||||||
#endif
|
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10124:
|
case 0x10124:
|
||||||
return (double) 3000.0;
|
return (double) 3000.0;
|
||||||
#endif
|
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1012D:
|
case 0x1012D:
|
||||||
return (double) 30000.0;
|
return (double) 30000.0;
|
||||||
#endif
|
|
||||||
case 0x325B:
|
case 0x325B:
|
||||||
return (double) 31.0;
|
return (double) 31.0;
|
||||||
case 0x325C:
|
case 0x325C:
|
||||||
|
@ -2724,7 +2678,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0xAA54:
|
case 0xAA54:
|
||||||
case 0xABF4:
|
case 0xABF4:
|
||||||
case 0xFF14:
|
case 0xFF14:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1010A:
|
case 0x1010A:
|
||||||
case 0x104A4:
|
case 0x104A4:
|
||||||
case 0x10A43:
|
case 0x10A43:
|
||||||
|
@ -2756,34 +2709,25 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x20064:
|
case 0x20064:
|
||||||
case 0x200E2:
|
case 0x200E2:
|
||||||
case 0x2626D:
|
case 0x2626D:
|
||||||
#endif
|
|
||||||
return (double) 4.0;
|
return (double) 4.0;
|
||||||
case 0x2158:
|
case 0x2158:
|
||||||
return (double) 4.0/5.0;
|
return (double) 4.0/5.0;
|
||||||
case 0x1375:
|
case 0x1375:
|
||||||
case 0x32B5:
|
case 0x32B5:
|
||||||
case 0x534C:
|
case 0x534C:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10113:
|
case 0x10113:
|
||||||
case 0x10E6C:
|
case 0x10E6C:
|
||||||
case 0x1D36C:
|
case 0x1D36C:
|
||||||
case 0x2098C:
|
case 0x2098C:
|
||||||
case 0x2099C:
|
case 0x2099C:
|
||||||
#endif
|
|
||||||
return (double) 40.0;
|
return (double) 40.0;
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1011C:
|
case 0x1011C:
|
||||||
case 0x10E75:
|
case 0x10E75:
|
||||||
return (double) 400.0;
|
return (double) 400.0;
|
||||||
#endif
|
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10125:
|
case 0x10125:
|
||||||
return (double) 4000.0;
|
return (double) 4000.0;
|
||||||
#endif
|
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1012E:
|
case 0x1012E:
|
||||||
return (double) 40000.0;
|
return (double) 40000.0;
|
||||||
#endif
|
|
||||||
case 0x32B6:
|
case 0x32B6:
|
||||||
return (double) 41.0;
|
return (double) 41.0;
|
||||||
case 0x32B7:
|
case 0x32B7:
|
||||||
|
@ -2858,7 +2802,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0xAA55:
|
case 0xAA55:
|
||||||
case 0xABF5:
|
case 0xABF5:
|
||||||
case 0xFF15:
|
case 0xFF15:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1010B:
|
case 0x1010B:
|
||||||
case 0x10143:
|
case 0x10143:
|
||||||
case 0x10148:
|
case 0x10148:
|
||||||
|
@ -2887,14 +2830,11 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x1D7FB:
|
case 0x1D7FB:
|
||||||
case 0x1F106:
|
case 0x1F106:
|
||||||
case 0x20121:
|
case 0x20121:
|
||||||
#endif
|
|
||||||
return (double) 5.0;
|
return (double) 5.0;
|
||||||
case 0x0F2C:
|
case 0x0F2C:
|
||||||
return (double) 5.0/2.0;
|
return (double) 5.0/2.0;
|
||||||
case 0x215A:
|
case 0x215A:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1245C:
|
case 0x1245C:
|
||||||
#endif
|
|
||||||
return (double) 5.0/6.0;
|
return (double) 5.0/6.0;
|
||||||
case 0x215D:
|
case 0x215D:
|
||||||
return (double) 5.0/8.0;
|
return (double) 5.0/8.0;
|
||||||
|
@ -2903,7 +2843,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x217C:
|
case 0x217C:
|
||||||
case 0x2186:
|
case 0x2186:
|
||||||
case 0x32BF:
|
case 0x32BF:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10114:
|
case 0x10114:
|
||||||
case 0x10144:
|
case 0x10144:
|
||||||
case 0x1014A:
|
case 0x1014A:
|
||||||
|
@ -2917,11 +2856,9 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x10A7E:
|
case 0x10A7E:
|
||||||
case 0x10E6D:
|
case 0x10E6D:
|
||||||
case 0x1D36D:
|
case 0x1D36D:
|
||||||
#endif
|
|
||||||
return (double) 50.0;
|
return (double) 50.0;
|
||||||
case 0x216E:
|
case 0x216E:
|
||||||
case 0x217E:
|
case 0x217E:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1011D:
|
case 0x1011D:
|
||||||
case 0x10145:
|
case 0x10145:
|
||||||
case 0x1014C:
|
case 0x1014C:
|
||||||
|
@ -2932,22 +2869,17 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x1016F:
|
case 0x1016F:
|
||||||
case 0x10170:
|
case 0x10170:
|
||||||
case 0x10E76:
|
case 0x10E76:
|
||||||
#endif
|
|
||||||
return (double) 500.0;
|
return (double) 500.0;
|
||||||
case 0x2181:
|
case 0x2181:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10126:
|
case 0x10126:
|
||||||
case 0x10146:
|
case 0x10146:
|
||||||
case 0x1014E:
|
case 0x1014E:
|
||||||
case 0x10172:
|
case 0x10172:
|
||||||
#endif
|
|
||||||
return (double) 5000.0;
|
return (double) 5000.0;
|
||||||
case 0x2187:
|
case 0x2187:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1012F:
|
case 0x1012F:
|
||||||
case 0x10147:
|
case 0x10147:
|
||||||
case 0x10156:
|
case 0x10156:
|
||||||
#endif
|
|
||||||
return (double) 50000.0;
|
return (double) 50000.0;
|
||||||
case 0x0036:
|
case 0x0036:
|
||||||
case 0x0666:
|
case 0x0666:
|
||||||
|
@ -3007,7 +2939,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0xF9D1:
|
case 0xF9D1:
|
||||||
case 0xF9D3:
|
case 0xF9D3:
|
||||||
case 0xFF16:
|
case 0xFF16:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1010C:
|
case 0x1010C:
|
||||||
case 0x104A6:
|
case 0x104A6:
|
||||||
case 0x10E65:
|
case 0x10E65:
|
||||||
|
@ -3026,28 +2957,19 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x1D7FC:
|
case 0x1D7FC:
|
||||||
case 0x1F107:
|
case 0x1F107:
|
||||||
case 0x20AEA:
|
case 0x20AEA:
|
||||||
#endif
|
|
||||||
return (double) 6.0;
|
return (double) 6.0;
|
||||||
case 0x1377:
|
case 0x1377:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10115:
|
case 0x10115:
|
||||||
case 0x10E6E:
|
case 0x10E6E:
|
||||||
case 0x1D36E:
|
case 0x1D36E:
|
||||||
#endif
|
|
||||||
return (double) 60.0;
|
return (double) 60.0;
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1011E:
|
case 0x1011E:
|
||||||
case 0x10E77:
|
case 0x10E77:
|
||||||
return (double) 600.0;
|
return (double) 600.0;
|
||||||
#endif
|
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10127:
|
case 0x10127:
|
||||||
return (double) 6000.0;
|
return (double) 6000.0;
|
||||||
#endif
|
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10130:
|
case 0x10130:
|
||||||
return (double) 60000.0;
|
return (double) 60000.0;
|
||||||
#endif
|
|
||||||
case 0x0037:
|
case 0x0037:
|
||||||
case 0x0667:
|
case 0x0667:
|
||||||
case 0x06F7:
|
case 0x06F7:
|
||||||
|
@ -3104,7 +3026,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0xAA57:
|
case 0xAA57:
|
||||||
case 0xABF7:
|
case 0xABF7:
|
||||||
case 0xFF17:
|
case 0xFF17:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1010D:
|
case 0x1010D:
|
||||||
case 0x104A7:
|
case 0x104A7:
|
||||||
case 0x10E66:
|
case 0x10E66:
|
||||||
|
@ -3124,32 +3045,23 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x1D7FD:
|
case 0x1D7FD:
|
||||||
case 0x1F108:
|
case 0x1F108:
|
||||||
case 0x20001:
|
case 0x20001:
|
||||||
#endif
|
|
||||||
return (double) 7.0;
|
return (double) 7.0;
|
||||||
case 0x0F2D:
|
case 0x0F2D:
|
||||||
return (double) 7.0/2.0;
|
return (double) 7.0/2.0;
|
||||||
case 0x215E:
|
case 0x215E:
|
||||||
return (double) 7.0/8.0;
|
return (double) 7.0/8.0;
|
||||||
case 0x1378:
|
case 0x1378:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10116:
|
case 0x10116:
|
||||||
case 0x10E6F:
|
case 0x10E6F:
|
||||||
case 0x1D36F:
|
case 0x1D36F:
|
||||||
#endif
|
|
||||||
return (double) 70.0;
|
return (double) 70.0;
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1011F:
|
case 0x1011F:
|
||||||
case 0x10E78:
|
case 0x10E78:
|
||||||
return (double) 700.0;
|
return (double) 700.0;
|
||||||
#endif
|
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10128:
|
case 0x10128:
|
||||||
return (double) 7000.0;
|
return (double) 7000.0;
|
||||||
#endif
|
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10131:
|
case 0x10131:
|
||||||
return (double) 70000.0;
|
return (double) 70000.0;
|
||||||
#endif
|
|
||||||
case 0x0038:
|
case 0x0038:
|
||||||
case 0x0668:
|
case 0x0668:
|
||||||
case 0x06F8:
|
case 0x06F8:
|
||||||
|
@ -3204,7 +3116,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0xAA58:
|
case 0xAA58:
|
||||||
case 0xABF8:
|
case 0xABF8:
|
||||||
case 0xFF18:
|
case 0xFF18:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1010E:
|
case 0x1010E:
|
||||||
case 0x104A8:
|
case 0x104A8:
|
||||||
case 0x10E67:
|
case 0x10E67:
|
||||||
|
@ -3222,28 +3133,19 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x1D7F4:
|
case 0x1D7F4:
|
||||||
case 0x1D7FE:
|
case 0x1D7FE:
|
||||||
case 0x1F109:
|
case 0x1F109:
|
||||||
#endif
|
|
||||||
return (double) 8.0;
|
return (double) 8.0;
|
||||||
case 0x1379:
|
case 0x1379:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10117:
|
case 0x10117:
|
||||||
case 0x10E70:
|
case 0x10E70:
|
||||||
case 0x1D370:
|
case 0x1D370:
|
||||||
#endif
|
|
||||||
return (double) 80.0;
|
return (double) 80.0;
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10120:
|
case 0x10120:
|
||||||
case 0x10E79:
|
case 0x10E79:
|
||||||
return (double) 800.0;
|
return (double) 800.0;
|
||||||
#endif
|
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10129:
|
case 0x10129:
|
||||||
return (double) 8000.0;
|
return (double) 8000.0;
|
||||||
#endif
|
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10132:
|
case 0x10132:
|
||||||
return (double) 80000.0;
|
return (double) 80000.0;
|
||||||
#endif
|
|
||||||
case 0x0039:
|
case 0x0039:
|
||||||
case 0x0669:
|
case 0x0669:
|
||||||
case 0x06F9:
|
case 0x06F9:
|
||||||
|
@ -3299,7 +3201,6 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0xAA59:
|
case 0xAA59:
|
||||||
case 0xABF9:
|
case 0xABF9:
|
||||||
case 0xFF19:
|
case 0xFF19:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1010F:
|
case 0x1010F:
|
||||||
case 0x104A9:
|
case 0x104A9:
|
||||||
case 0x10E68:
|
case 0x10E68:
|
||||||
|
@ -3320,32 +3221,23 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
case 0x1D7FF:
|
case 0x1D7FF:
|
||||||
case 0x1F10A:
|
case 0x1F10A:
|
||||||
case 0x2F890:
|
case 0x2F890:
|
||||||
#endif
|
|
||||||
return (double) 9.0;
|
return (double) 9.0;
|
||||||
case 0x0F2E:
|
case 0x0F2E:
|
||||||
return (double) 9.0/2.0;
|
return (double) 9.0/2.0;
|
||||||
case 0x137A:
|
case 0x137A:
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10118:
|
case 0x10118:
|
||||||
case 0x10341:
|
case 0x10341:
|
||||||
case 0x10E71:
|
case 0x10E71:
|
||||||
case 0x1D371:
|
case 0x1D371:
|
||||||
#endif
|
|
||||||
return (double) 90.0;
|
return (double) 90.0;
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10121:
|
case 0x10121:
|
||||||
case 0x1034A:
|
case 0x1034A:
|
||||||
case 0x10E7A:
|
case 0x10E7A:
|
||||||
return (double) 900.0;
|
return (double) 900.0;
|
||||||
#endif
|
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x1012A:
|
case 0x1012A:
|
||||||
return (double) 9000.0;
|
return (double) 9000.0;
|
||||||
#endif
|
|
||||||
#ifdef Py_UNICODE_WIDE
|
|
||||||
case 0x10133:
|
case 0x10133:
|
||||||
return (double) 90000.0;
|
return (double) 90000.0;
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
return -1.0;
|
return -1.0;
|
||||||
}
|
}
|
||||||
|
@ -3353,7 +3245,7 @@ double _PyUnicode_ToNumeric(Py_UNICODE ch)
|
||||||
/* Returns 1 for Unicode characters having the bidirectional
|
/* Returns 1 for Unicode characters having the bidirectional
|
||||||
* type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise.
|
* type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise.
|
||||||
*/
|
*/
|
||||||
int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)
|
int _PyUnicode_IsWhitespace(register const Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
#ifdef WANT_WCTYPE_FUNCTIONS
|
#ifdef WANT_WCTYPE_FUNCTIONS
|
||||||
return iswspace(ch);
|
return iswspace(ch);
|
||||||
|
@ -3399,7 +3291,7 @@ int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)
|
||||||
* property 'BK', 'CR', 'LF' or 'NL' or having bidirectional
|
* property 'BK', 'CR', 'LF' or 'NL' or having bidirectional
|
||||||
* type 'B', 0 otherwise.
|
* type 'B', 0 otherwise.
|
||||||
*/
|
*/
|
||||||
int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)
|
int _PyUnicode_IsLinebreak(register const Py_UCS4 ch)
|
||||||
{
|
{
|
||||||
switch (ch) {
|
switch (ch) {
|
||||||
case 0x000A:
|
case 0x000A:
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
SCRIPT = sys.argv[0]
|
SCRIPT = sys.argv[0]
|
||||||
VERSION = "2.6"
|
VERSION = "3.2"
|
||||||
|
|
||||||
# The Unicode Database
|
# The Unicode Database
|
||||||
UNIDATA_VERSION = "5.2.0"
|
UNIDATA_VERSION = "5.2.0"
|
||||||
|
@ -479,7 +479,7 @@ def makeunicodetype(unicode, trace):
|
||||||
print('/* Returns the numeric value as double for Unicode characters', file=fp)
|
print('/* Returns the numeric value as double for Unicode characters', file=fp)
|
||||||
print(' * having this property, -1.0 otherwise.', file=fp)
|
print(' * having this property, -1.0 otherwise.', file=fp)
|
||||||
print(' */', file=fp)
|
print(' */', file=fp)
|
||||||
print('double _PyUnicode_ToNumeric(Py_UNICODE ch)', file=fp)
|
print('double _PyUnicode_ToNumeric(Py_UCS4 ch)', file=fp)
|
||||||
print('{', file=fp)
|
print('{', file=fp)
|
||||||
print(' switch (ch) {', file=fp)
|
print(' switch (ch) {', file=fp)
|
||||||
for value, codepoints in numeric_items:
|
for value, codepoints in numeric_items:
|
||||||
|
@ -488,21 +488,10 @@ def makeunicodetype(unicode, trace):
|
||||||
parts = [repr(float(part)) for part in parts]
|
parts = [repr(float(part)) for part in parts]
|
||||||
value = '/'.join(parts)
|
value = '/'.join(parts)
|
||||||
|
|
||||||
haswide = False
|
|
||||||
hasnonewide = False
|
|
||||||
codepoints.sort()
|
codepoints.sort()
|
||||||
for codepoint in codepoints:
|
for codepoint in codepoints:
|
||||||
if codepoint < 0x10000:
|
|
||||||
hasnonewide = True
|
|
||||||
if codepoint >= 0x10000 and not haswide:
|
|
||||||
print('#ifdef Py_UNICODE_WIDE', file=fp)
|
|
||||||
haswide = True
|
|
||||||
print(' case 0x%04X:' % (codepoint,), file=fp)
|
print(' case 0x%04X:' % (codepoint,), file=fp)
|
||||||
if haswide and hasnonewide:
|
|
||||||
print('#endif', file=fp)
|
|
||||||
print(' return (double) %s;' % (value,), file=fp)
|
print(' return (double) %s;' % (value,), file=fp)
|
||||||
if haswide and not hasnonewide:
|
|
||||||
print('#endif', file=fp)
|
|
||||||
print(' }', file=fp)
|
print(' }', file=fp)
|
||||||
print(' return -1.0;', file=fp)
|
print(' return -1.0;', file=fp)
|
||||||
print('}', file=fp)
|
print('}', file=fp)
|
||||||
|
@ -512,27 +501,16 @@ def makeunicodetype(unicode, trace):
|
||||||
print("/* Returns 1 for Unicode characters having the bidirectional", file=fp)
|
print("/* Returns 1 for Unicode characters having the bidirectional", file=fp)
|
||||||
print(" * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise.", file=fp)
|
print(" * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise.", file=fp)
|
||||||
print(" */", file=fp)
|
print(" */", file=fp)
|
||||||
print('int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)', file=fp)
|
print('int _PyUnicode_IsWhitespace(register const Py_UCS4 ch)', file=fp)
|
||||||
print('{', file=fp)
|
print('{', file=fp)
|
||||||
print('#ifdef WANT_WCTYPE_FUNCTIONS', file=fp)
|
print('#ifdef WANT_WCTYPE_FUNCTIONS', file=fp)
|
||||||
print(' return iswspace(ch);', file=fp)
|
print(' return iswspace(ch);', file=fp)
|
||||||
print('#else', file=fp)
|
print('#else', file=fp)
|
||||||
print(' switch (ch) {', file=fp)
|
print(' switch (ch) {', file=fp)
|
||||||
|
|
||||||
haswide = False
|
|
||||||
hasnonewide = False
|
|
||||||
for codepoint in sorted(spaces):
|
for codepoint in sorted(spaces):
|
||||||
if codepoint < 0x10000:
|
|
||||||
hasnonewide = True
|
|
||||||
if codepoint >= 0x10000 and not haswide:
|
|
||||||
print('#ifdef Py_UNICODE_WIDE', file=fp)
|
|
||||||
haswide = True
|
|
||||||
print(' case 0x%04X:' % (codepoint,), file=fp)
|
print(' case 0x%04X:' % (codepoint,), file=fp)
|
||||||
if haswide and hasnonewide:
|
|
||||||
print('#endif', file=fp)
|
|
||||||
print(' return 1;', file=fp)
|
print(' return 1;', file=fp)
|
||||||
if haswide and not hasnonewide:
|
|
||||||
print('#endif', file=fp)
|
|
||||||
|
|
||||||
print(' }', file=fp)
|
print(' }', file=fp)
|
||||||
print(' return 0;', file=fp)
|
print(' return 0;', file=fp)
|
||||||
|
@ -545,23 +523,12 @@ def makeunicodetype(unicode, trace):
|
||||||
print(" * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional", file=fp)
|
print(" * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional", file=fp)
|
||||||
print(" * type 'B', 0 otherwise.", file=fp)
|
print(" * type 'B', 0 otherwise.", file=fp)
|
||||||
print(" */", file=fp)
|
print(" */", file=fp)
|
||||||
print('int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)', file=fp)
|
print('int _PyUnicode_IsLinebreak(register const Py_UCS4 ch)', file=fp)
|
||||||
print('{', file=fp)
|
print('{', file=fp)
|
||||||
print(' switch (ch) {', file=fp)
|
print(' switch (ch) {', file=fp)
|
||||||
haswide = False
|
|
||||||
hasnonewide = False
|
|
||||||
for codepoint in sorted(linebreaks):
|
for codepoint in sorted(linebreaks):
|
||||||
if codepoint < 0x10000:
|
|
||||||
hasnonewide = True
|
|
||||||
if codepoint >= 0x10000 and not haswide:
|
|
||||||
print('#ifdef Py_UNICODE_WIDE', file=fp)
|
|
||||||
haswide = True
|
|
||||||
print(' case 0x%04X:' % (codepoint,), file=fp)
|
print(' case 0x%04X:' % (codepoint,), file=fp)
|
||||||
if haswide and hasnonewide:
|
|
||||||
print('#endif', file=fp)
|
|
||||||
print(' return 1;', file=fp)
|
print(' return 1;', file=fp)
|
||||||
if haswide and not hasnonewide:
|
|
||||||
print('#endif', file=fp)
|
|
||||||
|
|
||||||
print(' }', file=fp)
|
print(' }', file=fp)
|
||||||
print(' return 0;', file=fp)
|
print(' return 0;', file=fp)
|
||||||
|
|
Loading…
Reference in New Issue