Update to Unicode 3.2 database.

This commit is contained in:
Martin v. Löwis 2002-10-18 16:11:54 +00:00
parent ecbb0eaa43
commit 9def6a3a77
8 changed files with 14036 additions and 9469 deletions

View File

@ -2,6 +2,6 @@ test_ucn
Testing General Unicode Character Name, and case insensitivity... done.
Testing name to code mapping.... done.
Testing code to name mapping for all characters.... done.
Found 10538 characters in the unicode name database
Found 11556 characters in the unicode name database
Testing misc. symbols for unicode character name expansion.... done.
Testing unicode character name expansion strict error handling.... done.

View File

@ -1,5 +1,5 @@
test_unicodedata
Testing Unicode Database...
Methods: 84b72943b1d4320bc1e64a4888f7cdf62eea219a
Functions: 41e1d4792185d6474a43c83ce4f593b1bdb01f8a
Methods: a37276dc2c158bef6dfd908ad34525c97180fad9
Functions: 79b4425f140f5f31179fde6db05772d21e75c228
API: ok

View File

@ -36,7 +36,7 @@ _getrecord(PyUnicodeObject* v)
code = (int) *PyUnicode_AS_UNICODE(v);
if (code < 0 || code >= 65536)
if (code < 0 || code >= 0x110000)
index = 0;
else {
index = index1[(code>>SHIFT)];
@ -219,7 +219,7 @@ unicodedata_decomposition(PyObject *self, PyObject *args)
code = (int) *PyUnicode_AS_UNICODE(v);
if (code < 0 || code >= 65536)
if (code < 0 || code >= 0x110000)
index = 0;
else {
index = decomp_index1[(code>>DECOMP_SHIFT)];
@ -284,7 +284,7 @@ _getucname(Py_UCS4 code, char* buffer, int buflen)
int word;
unsigned char* w;
if (code >= 65536)
if (code >= 0x110000)
return 0;
/* get offset into phrasebook */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -36,7 +36,7 @@ gettyperecord(Py_UNICODE code)
{
int index;
if (code >= 65536)
if (code >= 0x110000)
index = 0;
else {
index = index1[(code>>SHIFT)];

File diff suppressed because it is too large Load Diff

View File

@ -448,7 +448,7 @@ class UnicodeData:
def __init__(self, filename, expand=1):
file = open(filename)
table = [None] * 65536
table = [None] * 0x110000
while 1:
s = file.readline()
if not s:
@ -476,7 +476,7 @@ class UnicodeData:
# public attributes
self.filename = filename
self.table = table
self.chars = range(65536) # unicode
self.chars = range(0x110000) # unicode 3.2
def uselatin1(self):
# restrict character range to ISO Latin 1