Patch #626485: Support Unicode normalization.

This commit is contained in:
Martin v. Löwis 2002-11-23 22:08:15 +00:00
parent 74a530d42d
commit 677bde2dd1
6 changed files with 1053 additions and 23 deletions

View File

@ -5,7 +5,7 @@
\modulesynopsis{Access the Unicode Database.}
\moduleauthor{Marc-Andre Lemburg}{mal@lemburg.com}
\sectionauthor{Marc-Andre Lemburg}{mal@lemburg.com}
\sectionauthor{Martin v. L\"owis}{martin@v.loewis.de}
\index{Unicode}
\index{character}
@ -14,10 +14,10 @@
This module provides access to the Unicode Character Database which
defines character properties for all Unicode characters. The data in
this database is based on the \file{UnicodeData.txt} file version
3.0.0 which is publically available from \url{ftp://ftp.unicode.org/}.
3.2.0 which is publically available from \url{ftp://ftp.unicode.org/}.
The module uses the same names and symbols as defined by the
UnicodeData File Format 3.0.0 (see
UnicodeData File Format 3.2.0 (see
\url{http://www.unicode.org/Public/UNIDATA/UnicodeData.html}). It
defines the following functions:
@ -83,3 +83,37 @@ defines the following functions:
character \var{unichr} as string. An empty string is returned in case
no such mapping is defined.
\end{funcdesc}
\begin{funcdesc}{normalize}{form, unistr}
Return the normal form \var{form} for the Unicode string \var{unistr}.
Valid values for \var{form} are 'NFC', 'NFKC', 'NFD', and 'NFKD'.
The Unicode standard defines various normalization forms of a Unicode
string, based on the definition of canonical equivalence and
compatibility equivalence. In Unicode, several characters can be
expressed in various way. For example, the character U+00C7 (LATIN
CAPITAL LETTER C WITH CEDILLA) can also be expressed as the sequence
U+0043 (LATIN CAPITAL LETTER C) U+0327 (COMBINING CEDILLA).
For each character, there are two normal forms: normal form C and
normal form D. Normal form D (NFD) is also known as canonical
decomposition, and translates each character into its decomposed form.
Normal form C (NFC) first applies a canonical decomposition, then
composes pre-combined characters again.
In addition to these two forms, there two additional normal forms
based on compatibility equivalence. In Unicode, certain characters are
supported which normally would be unified with other characters. For
example, U+2160 (ROMAN NUMERAL ONE) is really the same thing as U+0049
(LATIN CAPITAL LETTER I). However, it is supported in Unicode for
compatibility with existing character sets (e.g. gb2312).
The normal form KD (NFKD) will apply the compatibility decomposition,
i.e. replace all compatibility characters with their equivalents. The
normal form KC (NFKC) first applies the compatibility decomposition,
followed by the canonical composition.
\versionadded{2.3}
\end{funcdesc}

View File

@ -0,0 +1,68 @@
from test.test_support import verbose, TestFailed, TestSkipped, verify
import sys
from unicodedata import normalize
try:
data = open("NormalizationTest.txt","r").readlines()
except IOError:
raise TestSkipped("NormalizationTest.txt not found, download from http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt")
class RangeError:
pass
def NFC(str):
return normalize("NFC", str)
def NFKC(str):
return normalize("NFKC", str)
def NFD(str):
return normalize("NFD", str)
def NFKD(str):
return normalize("NFKD", str)
def unistr(data):
data = [int(x, 16) for x in data.split(" ")]
for x in data:
if x > sys.maxunicode:
raise RangeError
return u"".join([unichr(x) for x in data])
part1_data = {}
for line in data:
if '#' in line:
line = line.split('#')[0]
line = line.strip()
if not line:
continue
if line.startswith("@Part"):
part = line
continue
try:
c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
except RangeError:
# Skip unsupported characters
continue
if verbose:
print line
# Perform tests
verify(c2 == NFC(c1) == NFC(c2) == NFC(c3), line)
verify(c4 == NFC(c4) == NFC(c5), line)
verify(c3 == NFD(c1) == NFD(c2) == NFD(c3), line)
verify(c5 == NFD(c4) == NFD(c5), line)
verify(c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5), line)
verify(c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5), line)
# Record part 1 data
if part == "@Part1":
part1_data[c1] = 1
# Perform tests for all other data
for c in range(sys.maxunicode+1):
X = unichr(c)
if X in part1_data:
continue
assert X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c

View File

@ -317,8 +317,8 @@ Extension modules
available in source code, but not built automatically anymore, and
is now named bsddb185.
- unicodedata was updated to Unicode 3.2. In now also supports names
for Hangul syllables and CJK unified ideographs.
- unicodedata was updated to Unicode 3.2. It supports normalization
and names for Hangul syllables and CJK unified ideographs.
- resource.getrlimit() now returns longs instead of ints.

View File

@ -30,13 +30,9 @@ typedef struct {
#include "unicodedata_db.h"
static const _PyUnicode_DatabaseRecord*
_getrecord(PyUnicodeObject* v)
_getrecord_ex(Py_UCS4 code)
{
int code;
int index;
code = (int) *PyUnicode_AS_UNICODE(v);
if (code < 0 || code >= 0x110000)
index = 0;
else {
@ -47,6 +43,12 @@ _getrecord(PyUnicodeObject* v)
return &_PyUnicode_Database_Records[index];
}
static const _PyUnicode_DatabaseRecord*
_getrecord(PyUnicodeObject* v)
{
return _getrecord_ex(*PyUnicode_AS_UNICODE(v));
}
/* --- Module API --------------------------------------------------------- */
static PyObject *
@ -253,6 +255,276 @@ unicodedata_decomposition(PyObject *self, PyObject *args)
return PyString_FromString(decomp);
}
void
get_decomp_record(Py_UCS4 code, int *index, int *prefix, int *count)
{
if (code < 0 || code >= 0x110000) {
*index = 0;
}
else {
*index = decomp_index1[(code>>DECOMP_SHIFT)];
*index = decomp_index2[(*index<<DECOMP_SHIFT)+
(code&((1<<DECOMP_SHIFT)-1))];
}
/* high byte is number of hex bytes (usually one or two), low byte
is prefix code (from*/
*count = decomp_data[*index] >> 8;
*prefix = decomp_data[*index] & 255;
(*index)++;
}
#define SBase 0xAC00
#define LBase 0x1100
#define VBase 0x1161
#define TBase 0x11A7
#define LCount 19
#define VCount 21
#define TCount 28
#define NCount (VCount*TCount)
#define SCount (LCount*NCount)
static PyObject*
nfd_nfkd(PyObject *input, int k)
{
PyObject *result;
Py_UNICODE *i, *end, *o;
/* Longest decomposition in Unicode 3.2: U+FDFA */
Py_UNICODE stack[20];
int space, stackptr, isize;
int index, prefix, count;
unsigned char prev, cur;
stackptr = 0;
isize = PyUnicode_GET_SIZE(input);
/* Overallocate atmost 10 characters. */
space = (isize > 10 ? 10 : isize) + isize;
result = PyUnicode_FromUnicode(NULL, space);
if (!result)
return NULL;
i = PyUnicode_AS_UNICODE(input);
end = i + isize;
o = PyUnicode_AS_UNICODE(result);
while (i < end) {
stack[stackptr++] = *i++;
while(stackptr) {
Py_UNICODE code = stack[--stackptr];
if (!space) {
space = PyString_GET_SIZE(result) + 10;
if (PyUnicode_Resize(&result, space) == -1)
return NULL;
o = PyUnicode_AS_UNICODE(result) + space - 10;
space = 10;
}
/* Hangul Decomposition. */
if (SBase <= code && code < (SBase+SCount)) {
int SIndex = code - SBase;
int L = LBase + SIndex / NCount;
int V = VBase + (SIndex % NCount) / TCount;
int T = TBase + SIndex % TCount;
*o++ = L;
*o++ = V;
space -= 2;
if (T != TBase) {
*o++ = T;
space --;
}
continue;
}
/* Other decompoistions. */
get_decomp_record(code, &index, &prefix, &count);
/* Copy character if it is not decomposable, or has a
compatibility decomposition, but we do NFD. */
if (!count || (prefix && !k)) {
*o++ = code;
space--;
continue;
}
/* Copy decomposition onto the stack, in reverse
order. */
while(count) {
code = decomp_data[index + (--count)];
stack[stackptr++] = code;
}
}
}
/* Drop overallocation. Cannot fail. */
PyUnicode_Resize(&result, PyUnicode_GET_SIZE(result) - space);
/* Sort canonically. */
i = PyUnicode_AS_UNICODE(result);
prev = _getrecord_ex(*i)->combining;
end = i + PyUnicode_GET_SIZE(result);
for (i++; i < end; i++) {
cur = _getrecord_ex(*i)->combining;
if (prev == 0 || cur == 0 || prev <= cur) {
prev = cur;
continue;
}
/* Non-canonical order. Need to switch *i with previous. */
o = i - 1;
while (1) {
Py_UNICODE tmp = o[1];
o[1] = o[0];
o[0] = tmp;
o--;
if (o < PyUnicode_AS_UNICODE(result))
break;
prev = _getrecord_ex(*o)->combining;
if (prev == 0 || prev <= cur)
break;
}
prev = _getrecord_ex(*i)->combining;
}
return result;
}
static int
find_nfc_index(struct reindex* nfc, Py_UNICODE code)
{
int index;
for (index = 0; nfc[index].start; index++) {
int start = nfc[index].start;
if (code < start)
return -1;
if (code <= start + nfc[index].count) {
int delta = code - start;
return nfc[index].index + delta;
}
}
return -1;
}
static PyObject*
nfc_nfkc(PyObject *input, int k)
{
PyObject *result;
Py_UNICODE *i, *i1, *o, *end;
int f,l,index,index1,comb;
Py_UNICODE code;
Py_UNICODE *skipped[20];
int cskipped = 0;
result = nfd_nfkd(input, k);
if (!result)
return NULL;
/* We are going to modify result in-place.
If nfd_nfkd is changed to sometimes return the input,
this code needs to be reviewed. */
assert(result != input);
i = PyUnicode_AS_UNICODE(result);
end = i + PyUnicode_GET_SIZE(result);
o = PyUnicode_AS_UNICODE(result);
again:
while (i < end) {
for (index = 0; index < cskipped; index++) {
if (skipped[index] == i) {
/* *i character is skipped.
Remove from list. */
skipped[index] = skipped[cskipped-1];
cskipped--;
i++;
goto again; // continue while
}
}
/* Hangul Composition. We don't need to check for <LV,T>
pairs, since we always have decomposed data. */
if (LBase <= *i && *i < (LBase+LCount) &&
i + 1 < end &&
VBase <= i[1] && i[1] <= (VBase+VCount)) {
int LIndex, VIndex;
LIndex = i[0] - LBase;
VIndex = i[1] - VBase;
code = SBase + (LIndex*VCount+VIndex)*TCount;
i+=2;
if (i < end &&
TBase <= *i && *i <= (TBase+TCount)) {
code += *i-TBase;
i++;
}
*o++ = code;
continue;
}
f = find_nfc_index(nfc_first, *i);
if (f == -1) {
*o++ = *i++;
continue;
}
/* Find next unblocked character. */
i1 = i+1;
comb = 0;
while (i1 < end) {
int comb1 = _getrecord_ex(*i1)->combining;
if (comb1 && comb == comb1) {
/* Character is blocked. */
i1++;
continue;
}
l = find_nfc_index(nfc_last, *i1);
/* *i1 cannot be combined with *i. If *i1
is a starter, we don't need to look further.
Otherwise, record the combining class. */
if (l == -1) {
not_combinable:
if (comb1 == 0)
break;
comb = comb1;
i1++;
continue;
}
index = f*TOTAL_LAST + l;
index1 = comp_index[index >> COMP_SHIFT];
code = comp_data[(index1<<COMP_SHIFT)+
(index&((1<<COMP_SHIFT)-1))];
if (code == 0)
goto not_combinable;
/* Replace the original character. */
*i = code;
/* Mark the second character unused. */
skipped[cskipped++] = i1;
i1++;
f = find_nfc_index(nfc_first, *i);
if (f == -1)
break;
}
*o++ = *i++;
}
if (o != end)
PyUnicode_Resize(&result, o - PyUnicode_AS_UNICODE(result));
return result;
}
static PyObject*
unicodedata_normalize(PyObject *self, PyObject *args)
{
char *form;
PyObject *input;
if(!PyArg_ParseTuple(args, "sO!:normalized",
&form, &PyUnicode_Type, &input))
return NULL;
if (strcmp(form, "NFC") == 0)
return nfc_nfkc(input, 0);
if (strcmp(form, "NFKC") == 0)
return nfc_nfkc(input, 1);
if (strcmp(form, "NFD") == 0)
return nfd_nfkd(input, 0);
if (strcmp(form, "NFKD") == 0)
return nfd_nfkd(input, 1);
PyErr_SetString(PyExc_ValueError, "invalid normalization form");
return NULL;
}
/* -------------------------------------------------------------------- */
/* unicode character name tables */
@ -277,16 +549,6 @@ _gethash(const char *s, int len, int scale)
return h;
}
#define SBase 0xAC00
#define LBase 0x1100
#define VBase 0x1161
#define TBase 0x11A7
#define LCount 19
#define VCount 21
#define TCount 28
#define NCount (VCount*TCount)
#define SCount (LCount*NCount)
static char *hangul_syllables[][3] = {
{ "G", "A", "" },
{ "GG", "AE", "G" },
@ -594,6 +856,7 @@ static PyMethodDef unicodedata_functions[] = {
{"decomposition",unicodedata_decomposition, METH_VARARGS},
{"name", unicodedata_name, METH_VARARGS},
{"lookup", unicodedata_lookup, METH_VARARGS},
{"normalize", unicodedata_normalize, METH_VARARGS},
{NULL, NULL} /* sentinel */
};
@ -618,5 +881,6 @@ initunicodedata(void)
/*
Local variables:
c-basic-offset: 4
indent-tabs-mode: nil
End:
*/

View File

@ -127,6 +127,241 @@ const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {
{27, 0, 1, 0},
};
/* Reindexing of NFC first characters. */
#define TOTAL_FIRST 356
#define TOTAL_LAST 53
struct reindex{int start;short count,index;};
struct reindex nfc_first[] = {
{ 60, 2, 0},
{ 65, 15, 3},
{ 82, 8, 19},
{ 97, 15, 28},
{ 114, 8, 44},
{ 168, 0, 53},
{ 194, 0, 54},
{ 196, 3, 55},
{ 202, 0, 59},
{ 207, 0, 60},
{ 212, 2, 61},
{ 216, 0, 64},
{ 220, 0, 65},
{ 226, 0, 66},
{ 228, 3, 67},
{ 234, 0, 71},
{ 239, 0, 72},
{ 244, 2, 73},
{ 248, 0, 76},
{ 252, 0, 77},
{ 258, 1, 78},
{ 274, 1, 80},
{ 332, 1, 82},
{ 346, 1, 84},
{ 352, 1, 86},
{ 360, 3, 88},
{ 383, 0, 92},
{ 416, 1, 93},
{ 431, 1, 95},
{ 439, 0, 97},
{ 490, 1, 98},
{ 550, 3, 100},
{ 558, 1, 104},
{ 658, 0, 106},
{ 913, 0, 107},
{ 917, 0, 108},
{ 919, 0, 109},
{ 921, 0, 110},
{ 927, 0, 111},
{ 929, 0, 112},
{ 933, 0, 113},
{ 937, 0, 114},
{ 940, 0, 115},
{ 942, 0, 116},
{ 945, 0, 117},
{ 949, 0, 118},
{ 951, 0, 119},
{ 953, 0, 120},
{ 959, 0, 121},
{ 961, 0, 122},
{ 965, 0, 123},
{ 969, 2, 124},
{ 974, 0, 127},
{ 978, 0, 128},
{ 1030, 0, 129},
{ 1040, 0, 130},
{ 1043, 0, 131},
{ 1045, 3, 132},
{ 1050, 0, 136},
{ 1054, 0, 137},
{ 1059, 0, 138},
{ 1063, 0, 139},
{ 1067, 0, 140},
{ 1069, 0, 141},
{ 1072, 0, 142},
{ 1075, 0, 143},
{ 1077, 3, 144},
{ 1082, 0, 148},
{ 1086, 0, 149},
{ 1091, 0, 150},
{ 1095, 0, 151},
{ 1099, 0, 152},
{ 1101, 0, 153},
{ 1110, 0, 154},
{ 1140, 1, 155},
{ 1240, 1, 157},
{ 1256, 1, 159},
{ 1575, 0, 161},
{ 1608, 0, 162},
{ 1610, 0, 163},
{ 1729, 0, 164},
{ 1746, 0, 165},
{ 1749, 0, 166},
{ 2344, 0, 167},
{ 2352, 0, 168},
{ 2355, 0, 169},
{ 2503, 0, 170},
{ 2887, 0, 171},
{ 2962, 0, 172},
{ 3014, 1, 173},
{ 3142, 0, 175},
{ 3263, 0, 176},
{ 3270, 0, 177},
{ 3274, 0, 178},
{ 3398, 1, 179},
{ 3545, 0, 181},
{ 3548, 0, 182},
{ 4133, 0, 183},
{ 7734, 1, 184},
{ 7770, 1, 186},
{ 7778, 1, 188},
{ 7840, 1, 190},
{ 7864, 1, 192},
{ 7884, 1, 194},
{ 7936, 17, 196},
{ 7960, 1, 214},
{ 7968, 17, 216},
{ 7992, 1, 234},
{ 8000, 1, 236},
{ 8008, 1, 238},
{ 8016, 1, 240},
{ 8025, 0, 242},
{ 8032, 16, 243},
{ 8052, 0, 260},
{ 8060, 0, 261},
{ 8118, 0, 262},
{ 8127, 0, 263},
{ 8134, 0, 264},
{ 8182, 0, 265},
{ 8190, 0, 266},
{ 8592, 0, 267},
{ 8594, 0, 268},
{ 8596, 0, 269},
{ 8656, 0, 270},
{ 8658, 0, 271},
{ 8660, 0, 272},
{ 8707, 0, 273},
{ 8712, 0, 274},
{ 8715, 0, 275},
{ 8739, 0, 276},
{ 8741, 0, 277},
{ 8764, 0, 278},
{ 8771, 0, 279},
{ 8773, 0, 280},
{ 8776, 0, 281},
{ 8781, 0, 282},
{ 8801, 0, 283},
{ 8804, 1, 284},
{ 8818, 1, 286},
{ 8822, 1, 288},
{ 8826, 3, 290},
{ 8834, 1, 294},
{ 8838, 1, 296},
{ 8849, 1, 298},
{ 8866, 0, 300},
{ 8872, 1, 301},
{ 8875, 0, 303},
{ 8882, 3, 304},
{ 12358, 0, 308},
{ 12363, 0, 309},
{ 12365, 0, 310},
{ 12367, 0, 311},
{ 12369, 0, 312},
{ 12371, 0, 313},
{ 12373, 0, 314},
{ 12375, 0, 315},
{ 12377, 0, 316},
{ 12379, 0, 317},
{ 12381, 0, 318},
{ 12383, 0, 319},
{ 12385, 0, 320},
{ 12388, 0, 321},
{ 12390, 0, 322},
{ 12392, 0, 323},
{ 12399, 0, 324},
{ 12402, 0, 325},
{ 12405, 0, 326},
{ 12408, 0, 327},
{ 12411, 0, 328},
{ 12445, 0, 329},
{ 12454, 0, 330},
{ 12459, 0, 331},
{ 12461, 0, 332},
{ 12463, 0, 333},
{ 12465, 0, 334},
{ 12467, 0, 335},
{ 12469, 0, 336},
{ 12471, 0, 337},
{ 12473, 0, 338},
{ 12475, 0, 339},
{ 12477, 0, 340},
{ 12479, 0, 341},
{ 12481, 0, 342},
{ 12484, 0, 343},
{ 12486, 0, 344},
{ 12488, 0, 345},
{ 12495, 0, 346},
{ 12498, 0, 347},
{ 12501, 0, 348},
{ 12504, 0, 349},
{ 12507, 0, 350},
{ 12527, 3, 351},
{ 12541, 0, 355},
{0,0,0}
};
struct reindex nfc_last[] = {
{ 768, 4, 0},
{ 774, 6, 5},
{ 783, 0, 12},
{ 785, 0, 13},
{ 787, 1, 14},
{ 795, 0, 16},
{ 803, 5, 17},
{ 813, 1, 23},
{ 816, 1, 25},
{ 824, 0, 27},
{ 834, 0, 28},
{ 837, 0, 29},
{ 1619, 2, 30},
{ 2364, 0, 33},
{ 2494, 0, 34},
{ 2519, 0, 35},
{ 2878, 0, 36},
{ 2902, 1, 37},
{ 3006, 0, 39},
{ 3031, 0, 40},
{ 3158, 0, 41},
{ 3266, 0, 42},
{ 3285, 1, 43},
{ 3390, 0, 45},
{ 3415, 0, 46},
{ 3530, 0, 47},
{ 3535, 0, 48},
{ 3551, 0, 49},
{ 4142, 0, 50},
{ 12441, 1, 51},
{0,0,0}
};
/* string literals */
const char *_PyUnicode_CategoryNames[] = {
"Cn",
@ -3209,3 +3444,345 @@ static unsigned short decomp_index2[] = {
0, 0,
};
/* NFC pairs */
#define COMP_SHIFT 3
static unsigned short comp_index[] = {
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 4, 5, 6, 7, 0,
0, 0, 0, 8, 9, 10, 0, 0, 0, 11, 12, 13, 0, 0, 0, 0, 14, 15, 16, 17, 0, 0,
18, 19, 20, 21, 0, 0, 0, 22, 0, 0, 0, 0, 0, 23, 24, 25, 26, 0, 0, 0, 27,
28, 29, 30, 0, 0, 31, 32, 33, 34, 35, 0, 0, 36, 0, 0, 0, 0, 0, 0, 37, 38,
39, 40, 0, 0, 41, 0, 42, 43, 44, 0, 0, 45, 46, 47, 0, 0, 0, 0, 48, 49,
50, 51, 0, 0, 52, 53, 54, 55, 0, 0, 0, 56, 57, 0, 0, 0, 0, 0, 58, 59, 60,
61, 0, 0, 62, 63, 64, 65, 0, 0, 0, 66, 67, 68, 69, 0, 0, 70, 71, 72, 73,
0, 0, 0, 74, 0, 75, 0, 0, 0, 0, 76, 0, 77, 0, 0, 0, 0, 78, 0, 0, 0, 0, 0,
79, 80, 81, 0, 0, 0, 0, 82, 83, 84, 85, 0, 0, 86, 87, 88, 89, 0, 0, 0,
90, 0, 91, 92, 0, 0, 93, 94, 95, 96, 0, 0, 0, 0, 97, 98, 99, 0, 0, 0,
100, 101, 102, 103, 0, 0, 0, 104, 0, 0, 0, 0, 0, 105, 106, 107, 0, 0, 0,
0, 108, 109, 110, 111, 0, 0, 112, 113, 114, 115, 0, 0, 0, 116, 117, 0, 0,
0, 0, 118, 0, 119, 120, 121, 0, 0, 122, 123, 124, 125, 0, 0, 0, 126, 0,
127, 0, 0, 0, 128, 129, 130, 131, 0, 0, 0, 132, 133, 134, 135, 0, 0, 0,
136, 0, 0, 0, 0, 0, 137, 138, 139, 140, 0, 0, 0, 141, 142, 143, 0, 0, 0,
0, 144, 145, 146, 147, 0, 0, 148, 149, 150, 151, 0, 0, 0, 152, 0, 153, 0,
0, 0, 154, 155, 156, 0, 0, 0, 0, 0, 157, 0, 0, 0, 0, 158, 159, 160, 161,
0, 0, 0, 162, 163, 164, 165, 0, 0, 0, 166, 0, 0, 167, 0, 0, 168, 169, 0,
0, 0, 0, 0, 170, 0, 0, 0, 0, 0, 0, 171, 0, 0, 0, 0, 0, 172, 173, 0, 0, 0,
0, 0, 174, 0, 0, 0, 0, 0, 175, 176, 0, 0, 0, 0, 0, 177, 0, 0, 0, 0, 0, 0,
178, 179, 0, 0, 0, 0, 180, 181, 0, 0, 0, 0, 0, 182, 0, 0, 0, 0, 0, 0,
183, 0, 0, 0, 0, 0, 184, 185, 186, 0, 0, 0, 0, 187, 188, 0, 0, 0, 0, 0,
189, 0, 0, 0, 0, 0, 190, 0, 0, 0, 0, 0, 0, 191, 0, 0, 0, 0, 0, 192, 0, 0,
0, 0, 0, 0, 193, 194, 0, 0, 0, 0, 0, 195, 0, 0, 0, 0, 0, 196, 197, 0, 0,
0, 0, 0, 198, 199, 0, 0, 0, 0, 0, 200, 0, 0, 0, 0, 0, 201, 0, 0, 0, 0, 0,
0, 202, 203, 0, 0, 0, 0, 204, 205, 0, 0, 0, 0, 0, 206, 207, 0, 0, 0, 0,
0, 208, 0, 0, 0, 0, 0, 209, 0, 0, 0, 0, 0, 0, 210, 0, 0, 0, 0, 0, 211,
212, 0, 0, 0, 0, 0, 0, 213, 0, 0, 0, 0, 0, 214, 0, 0, 0, 0, 0, 0, 215, 0,
0, 0, 0, 0, 0, 216, 0, 0, 0, 0, 0, 217, 0, 0, 0, 0, 0, 218, 0, 0, 0, 0,
0, 0, 0, 219, 0, 0, 0, 0, 0, 220, 0, 0, 0, 0, 0, 0, 221, 0, 0, 0, 0, 0,
222, 223, 224, 0, 0, 0, 225, 226, 227, 0, 0, 0, 0, 228, 229, 230, 0, 0,
0, 0, 231, 232, 233, 0, 0, 0, 0, 0, 234, 0, 0, 0, 0, 235, 0, 0, 0, 0, 0,
0, 236, 0, 0, 0, 0, 0, 0, 237, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, 239,
0, 0, 0, 0, 0, 0, 240, 0, 0, 0, 0, 0, 241, 0, 0, 0, 0, 0, 0, 242, 0, 0,
0, 0, 0, 0, 243, 0, 0, 0, 0, 244, 245, 246, 0, 247, 0, 0, 248, 0, 249, 0,
0, 0, 0, 250, 251, 252, 253, 0, 0, 254, 255, 256, 0, 0, 0, 0, 257, 0,
258, 0, 0, 0, 0, 0, 259, 0, 0, 0, 0, 260, 261, 262, 0, 0, 0, 0, 263, 0,
264, 265, 0, 0, 0, 0, 0, 0, 266, 0, 0, 0, 0, 0, 0, 267, 0, 0, 268, 269,
270, 271, 0, 0, 272, 0, 273, 0, 0, 0, 0, 274, 0, 275, 276, 277, 0, 0,
278, 279, 0, 280, 0, 0, 281, 0, 282, 0, 0, 0, 0, 0, 0, 283, 0, 0, 0, 284,
285, 286, 0, 287, 0, 0, 288, 0, 289, 0, 290, 0, 0, 291, 0, 0, 292, 0, 0,
293, 0, 0, 0, 294, 0, 0, 0, 0, 0, 0, 295, 0, 0, 296, 0, 0, 0, 0, 0, 0,
297, 0, 0, 0, 0, 0, 298, 299, 0, 0, 0, 0, 0, 300, 0, 0, 0, 0, 0, 301,
302, 0, 0, 0, 0, 0, 303, 304, 0, 0, 0, 0, 0, 305, 0, 0, 0, 0, 0, 306,
307, 0, 0, 0, 0, 0, 308, 0, 0, 0, 0, 0, 0, 309, 0, 0, 0, 0, 0, 310, 311,
0, 0, 0, 0, 0, 312, 0, 0, 0, 0, 0, 0, 313, 0, 0, 0, 0, 0, 0, 314, 0, 0,
0, 0, 0, 315, 0, 0, 0, 0, 0, 316, 0, 0, 0, 0, 0, 0, 317, 0, 0, 0, 0, 0,
0, 318, 0, 0, 0, 0, 0, 0, 319, 0, 0, 0, 0, 320, 321, 0, 0, 0, 0, 0, 322,
0, 0, 0, 0, 0, 0, 0, 323, 0, 0, 0, 0, 0, 324, 325, 0, 0, 0, 0, 0, 326, 0,
0, 0, 0, 0, 327, 0, 0, 0, 0, 0, 0, 328, 0, 0, 0, 0, 0, 0, 329, 0, 0, 0,
0, 0, 0, 330, 0, 0, 0, 0, 0, 0, 331, 0, 0, 0, 0, 0, 332, 0, 0, 0, 0, 0,
333, 0, 0, 0, 0, 0, 0, 334, 0, 0, 0, 0, 0, 335, 0, 0, 0, 0, 0, 0, 0, 0,
0, 336, 0, 0, 0, 0, 0, 0, 337, 0, 0, 0, 0, 0, 338, 0, 0, 0, 0, 0, 0, 339,
0, 0, 0, 0, 0, 0, 340, 0, 0, 0, 0, 0, 341, 0, 0, 0, 0, 0, 0, 342, 0, 0,
0, 0, 0, 0, 343, 0, 0, 0, 0, 0, 344, 0, 0, 0, 0, 0, 0, 345, 0, 0, 0, 0,
0, 0, 346, 0, 0, 0, 0, 0, 0, 347, 0, 0, 0, 0, 0, 0, 348, 0, 0, 0, 0, 0,
349, 0, 0, 0, 0, 0, 0, 350, 0, 0, 0, 0, 0, 0, 351, 0, 0, 0, 0, 0, 352,
353, 0, 0, 0, 0, 0, 354, 0, 0, 0, 0, 0, 0, 355, 0, 0, 0, 0, 0, 0, 356, 0,
0, 0, 0, 0, 0, 357, 0, 0, 0, 0, 0, 358, 0, 0, 0, 0, 0, 0, 359, 360, 0, 0,
0, 0, 0, 0, 361, 0, 0, 0, 0, 0, 362, 0, 0, 0, 0, 0, 0, 363, 0, 0, 0, 0,
0, 0, 364, 0, 0, 0, 0, 0, 365, 0, 0, 0, 0, 0, 0, 366, 0, 0, 0, 0, 0, 367,
368, 0, 0, 0, 0, 0, 369, 0, 0, 0, 0, 0, 370, 0, 0, 0, 0, 0, 0, 371, 0, 0,
0, 0, 0, 0, 372, 0, 0, 0, 0, 0, 373, 0, 0, 0, 374, 0, 0, 375, 0, 0, 376,
0, 0, 0, 0, 0, 0, 377, 0, 0, 0, 0, 0, 0, 378, 0, 0, 0, 0, 0, 379, 0, 0,
0, 0, 0, 0, 380, 0, 0, 0, 0, 0, 381, 0, 0, 0, 0, 0, 0, 382, 0, 0, 383, 0,
0, 0, 384, 0, 0, 385, 0, 0, 386, 0, 0, 0, 0, 0, 0, 387, 0, 0, 0, 0, 0, 0,
388, 0, 0, 0, 0, 0, 389, 0, 0, 0, 0, 0, 0, 390, 0, 0, 0, 0, 0, 391, 0, 0,
0, 0, 0, 0, 392, 0, 0, 393, 0, 0, 0, 0, 0, 0, 394, 0, 0, 0, 0, 0, 395, 0,
0, 0, 0, 0, 0, 396, 0, 0, 0, 0, 0, 0, 397, 0, 0, 398, 0, 0, 399, 0, 0, 0,
400, 0, 0, 0, 0, 0, 401, 0, 0, 0, 0, 0, 0, 402, 0, 0, 0, 0, 0, 0, 403, 0,
0, 0, 0, 0, 404, 0, 0, 0, 0, 0, 0, 405, 0, 0, 0, 0, 0, 0, 406, 0, 0, 407,
0, 0, 408, 0, 0, 409, 0, 0, 0, 410, 0, 0, 0, 0, 0, 411, 0, 0, 0, 0, 0, 0,
412, 0, 0, 0, 0, 0, 0, 413, 0, 0, 0, 0, 0, 414, 0, 0, 0, 0, 0, 0, 415, 0,
0, 0, 0, 0, 0, 416, 0, 0, 417, 0, 0, 418, 0, 0, 419, 0, 0, 0, 420, 0, 0,
421, 0, 0, 422, 0, 0, 423, 424, 0, 0, 425, 0, 0, 426, 0, 0, 0, 0, 0, 0,
427, 0, 0, 0, 0, 0, 428, 0, 0, 0, 0, 0, 0, 429, 0, 0, 0, 0, 0, 0, 430, 0,
0, 431, 0, 0, 432, 0, 0, 0, 433, 0, 0, 434, 0, 0, 435, 0, 0, 436, 437, 0,
0, 438, 0, 0, 439, 0, 0, 0, 440, 0, 0, 0, 0, 0, 441, 0, 0, 0, 0, 0, 0,
442, 0, 0, 0, 0, 0, 0, 443, 0, 0, 0, 0, 0, 444, 0, 0, 0, 0, 0, 0, 445, 0,
0, 0, 0, 0, 446, 0, 0, 447, 448, 0, 0, 449, 0, 0, 450, 0, 0, 0, 451, 0,
0, 0, 0, 0, 452, 0, 0, 0, 0, 0, 0, 453, 0, 0, 0, 0, 0, 0, 454, 0, 0, 0,
0, 0, 455, 0, 0, 0, 0, 0, 0, 456, 0, 0, 0, 0, 0, 457, 0, 0, 0, 0, 0, 0,
458, 0, 0, 0, 0, 0, 0, 459, 0, 0, 0, 0, 0, 460, 0, 0, 0, 0, 0, 0, 461, 0,
0, 462, 0, 0, 463, 0, 0, 0, 0, 0, 0, 464, 0, 0, 0, 0, 0, 0, 465, 0, 0,
466, 0, 0, 467, 0, 0, 0, 0, 0, 0, 468, 0, 0, 0, 0, 0, 469, 0, 0, 0, 0, 0,
0, 470, 0, 0, 0, 0, 0, 0, 471, 0, 0, 0, 0, 0, 472, 0, 0, 0, 0, 0, 0, 473,
0, 0, 0, 0, 0, 0, 474, 0, 0, 0, 0, 0, 475, 0, 0, 0, 0, 0, 0, 476, 0, 0,
0, 0, 0, 477, 0, 0, 0, 0, 0, 0, 478, 0, 0, 0, 0, 0, 0, 479, 0, 0, 0, 0,
0, 480, 0, 0, 0, 0, 0, 0, 481, 0, 0, 0, 0, 0, 0, 482, 0, 0, 0, 0, 0, 483,
0, 0, 0, 0, 0, 0, 484, 0, 0, 0, 0, 0, 485, 0, 0, 0, 0, 0, 0, 486, 0, 0,
0, 0, 0, 0, 487, 0, 0, 0, 0, 0, 488, 0, 0, 0, 0, 0, 0, 489, 0, 0, 0, 0,
0, 0, 490, 0, 0, 0, 0, 0, 491, 0, 0, 0, 0, 0, 0, 492, 0, 0, 0, 0, 0, 493,
0, 0, 0, 0, 0, 0, 494, 0, 0, 0, 0, 0, 0, 495, 0, 0, 0, 0, 0, 496, 0, 0,
0, 0, 0, 0, 497, 0, 0, 0, 0, 0, 0, 498, 0, 0, 0, 0, 0, 499, 0, 0, 0, 0,
0, 0, 500, 0, 0, 0, 0, 0, 501, 0, 0, 0, 0, 0, 0, 502, 0, 0, 0, 0, 0, 0,
503, 0, 0, 0, 0, 0, 504, 0, 0, 0, 0, 0, 0, 505, 0, 0, 0, 0, 0, 0, 506, 0,
0, 0, 0, 0, 507, 0, 0, 0, 0, 0, 0, 508, 0, 0, 0, 0, 0, 0, 0, 0, 509, 0,
0, 0, 0, 0, 0, 510, 0, 0, 0, 0, 0, 0, 511, 0, 0, 0, 0, 0, 512, 0, 0, 0,
0, 0, 0, 513, 0, 0, 0, 0, 0, 0, 514, 0, 0, 0, 0, 0, 515, 0, 0, 0, 0, 0,
0, 516, 0, 0, 0, 0, 0, 517, 0, 0, 0, 0, 0, 0, 518, 0, 0, 0, 0, 0, 0, 519,
0, 0, 0, 0, 0, 520, 0, 0, 0, 0, 0, 0, 521, 0, 0, 0, 0, 0, 0, 522, 0, 0,
0, 0, 0, 523, 0, 0, 0, 0, 0, 0, 524, 0, 0, 0, 0, 0, 525, 526, 0, 0, 0, 0,
0, 527, 0, 0, 0, 0, 0, 0, 528, 0, 0, 0, 0, 0, 529, 0, 0, 0, 0, 0, 0, 530,
0, 0, 0, 0, 0, 0, 531, 0, 0, 0, 0, 0, 532, 0, 0, 0, 0, 0, 0, 533, 0, 0,
0, 0, 0, 534, 0, 0, 0, 0, 0, 0, 535, 0, 0, 0, 0, 0, 0, 536, 0, 0, 0, 0,
0, 537, 0, 0, 0, 0, 0, 0, 538, 0, 0, 0, 0, 0, 0, 539, 0, 0, 0, 0, 0, 540,
0, 0, 0, 0, 0, 0, 541, 0, 0, 0, 0, 0, 542, 0, 0, 0, 0, 0, 0, 543, 0, 0,
0, 0, 0, 0, 544, 0, 0, 0, 0, 0, 545, 0, 0, 0, 0, 0, 0, 546, 0, 0, 0, 0,
0, 0, 547, 0, 0, 0, 0, 0, 548, 0, 0, 0, 0, 0, 0, 549, 0, 0, 0, 0, 0, 550,
551, 0, 0, 0, 0, 0, 552, 0, 0, 0, 0, 0, 0, 553, 0, 0, 0, 0, 0, 554, 0, 0,
0, 0, 0, 0, 555, 0, 0, 0, 0, 0, 0, 556, 0, 0, 0, 0, 0, 557, 0, 0, 0, 0,
0, 0, 558,
};
static unsigned short comp_data[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8814, 0, 0, 0, 0, 8800, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 8815, 0, 0, 0, 0, 0, 0, 0, 0, 0, 192, 193, 194, 195,
256, 258, 550, 196, 7842, 197, 0, 461, 512, 514, 0, 0, 0, 7840, 0, 7680,
0, 0, 260, 0, 0, 0, 0, 7682, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7684, 0, 0, 0,
0, 0, 0, 0, 0, 7686, 0, 0, 0, 262, 264, 0, 0, 0, 266, 0, 0, 0, 0, 268, 0,
0, 0, 0, 0, 0, 0, 0, 0, 199, 0, 0, 0, 0, 0, 7690, 0, 0, 0, 0, 270, 0, 0,
0, 0, 0, 7692, 0, 0, 0, 7696, 0, 7698, 0, 0, 7694, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 200, 201, 202, 7868, 274, 276, 278, 203, 7866, 0, 0, 282, 516,
518, 0, 0, 0, 7864, 0, 0, 0, 552, 280, 7704, 0, 7706, 0, 0, 0, 0, 0, 0,
0, 0, 0, 7710, 0, 0, 0, 0, 0, 0, 0, 500, 284, 0, 7712, 286, 288, 0, 0, 0,
0, 486, 0, 0, 0, 0, 0, 0, 0, 0, 0, 290, 0, 0, 0, 0, 0, 0, 0, 0, 0, 292,
0, 0, 0, 7714, 7718, 0, 0, 0, 542, 0, 0, 0, 0, 0, 7716, 0, 0, 0, 7720, 0,
0, 7722, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 204, 205, 206, 296, 298,
300, 304, 207, 7880, 0, 0, 463, 520, 522, 0, 0, 0, 7882, 0, 0, 0, 0, 302,
0, 0, 7724, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 308, 0, 0, 0, 7728, 0,
0, 0, 0, 0, 0, 0, 0, 0, 488, 0, 0, 0, 0, 0, 7730, 0, 0, 0, 310, 0, 0, 0,
0, 7732, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 313, 0, 317, 0, 0, 0, 0, 0,
7734, 0, 0, 0, 315, 0, 7740, 0, 0, 7738, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7742, 0, 0, 0, 0, 7744, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7746, 0, 0, 0, 504,
323, 0, 209, 0, 0, 7748, 0, 0, 0, 0, 327, 0, 0, 0, 0, 0, 7750, 0, 0, 0,
325, 0, 7754, 0, 0, 7752, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 210, 211, 212,
213, 332, 334, 558, 214, 7886, 0, 336, 465, 524, 526, 0, 0, 416, 7884, 0,
0, 0, 0, 490, 0, 0, 0, 0, 0, 0, 0, 7764, 0, 0, 0, 0, 7766, 0, 0, 0, 0, 0,
0, 0, 340, 0, 0, 0, 0, 7768, 0, 0, 0, 0, 344, 528, 530, 0, 0, 0, 7770, 0,
0, 0, 342, 0, 0, 0, 0, 7774, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 346, 348,
0, 0, 0, 7776, 0, 0, 0, 0, 352, 0, 0, 0, 0, 0, 7778, 0, 0, 536, 350, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7786, 0, 0, 0, 0, 356, 0, 0, 0, 0, 0,
7788, 0, 0, 538, 354, 0, 7792, 0, 0, 7790, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
217, 218, 219, 360, 362, 364, 0, 220, 7910, 366, 368, 467, 532, 534, 0,
0, 431, 7908, 7794, 0, 0, 0, 370, 7798, 0, 7796, 0, 0, 0, 0, 0, 0, 7804,
0, 0, 0, 0, 0, 7806, 0, 0, 0, 7808, 7810, 372, 0, 0, 0, 7814, 7812, 0,
7816, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7818, 7820, 0, 0, 0, 0, 0, 7922, 221,
374, 7928, 562, 0, 7822, 376, 7926, 0, 0, 0, 0, 0, 0, 0, 0, 7924, 0, 0,
0, 0, 377, 7824, 0, 0, 0, 379, 0, 0, 0, 0, 381, 0, 0, 0, 0, 0, 7826, 0,
0, 0, 0, 0, 0, 0, 0, 7828, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 224, 225, 226,
227, 257, 259, 551, 228, 7843, 229, 0, 462, 513, 515, 0, 0, 0, 7841, 0,
7681, 0, 0, 261, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7683, 0, 0, 7685, 0,
0, 0, 0, 0, 0, 0, 0, 7687, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 263, 265, 0,
0, 0, 267, 0, 0, 0, 0, 269, 0, 0, 0, 0, 0, 0, 0, 0, 0, 231, 0, 0, 0, 0,
0, 7691, 0, 0, 0, 0, 271, 0, 0, 0, 0, 0, 7693, 0, 0, 0, 7697, 0, 7699, 0,
0, 7695, 0, 0, 232, 233, 234, 7869, 275, 277, 279, 235, 7867, 0, 0, 283,
517, 519, 0, 0, 0, 7865, 0, 0, 0, 553, 281, 7705, 0, 7707, 0, 0, 0, 0, 0,
0, 0, 0, 0, 7711, 0, 0, 0, 0, 0, 0, 0, 501, 285, 0, 7713, 287, 289, 0, 0,
0, 0, 487, 0, 0, 0, 0, 0, 0, 0, 0, 0, 291, 0, 293, 0, 0, 0, 7715, 7719,
0, 0, 0, 543, 0, 0, 0, 0, 0, 7717, 0, 0, 0, 7721, 0, 0, 7723, 0, 7830, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 236, 237, 238, 297, 299, 301, 0, 239, 7881, 0,
0, 464, 521, 523, 0, 0, 0, 7883, 0, 0, 0, 0, 303, 0, 0, 7725, 0, 0, 0, 0,
0, 309, 0, 0, 0, 0, 0, 0, 0, 0, 496, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7729,
0, 489, 0, 0, 0, 0, 0, 7731, 0, 0, 0, 311, 0, 0, 0, 0, 7733, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 314, 0, 0, 0, 0, 0, 0, 0, 0, 0, 318, 0, 0, 0, 0, 0,
7735, 0, 0, 0, 316, 0, 7741, 0, 0, 7739, 0, 0, 0, 7743, 0, 0, 0, 0, 7745,
0, 0, 7747, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 505, 324, 0, 241, 0, 0,
7749, 0, 0, 0, 0, 328, 0, 0, 0, 0, 0, 7751, 0, 0, 0, 326, 0, 7755, 0, 0,
7753, 0, 0, 242, 243, 244, 245, 333, 335, 559, 246, 7887, 0, 337, 466,
525, 527, 0, 0, 417, 7885, 0, 0, 0, 0, 491, 0, 0, 0, 0, 0, 0, 0, 7765, 0,
0, 0, 0, 7767, 0, 0, 0, 0, 0, 0, 0, 341, 0, 0, 0, 0, 7769, 0, 0, 0, 0,
345, 529, 531, 0, 0, 0, 7771, 0, 0, 0, 343, 0, 0, 0, 0, 7775, 0, 0, 0,
347, 349, 0, 0, 0, 7777, 0, 0, 0, 0, 353, 0, 0, 0, 0, 0, 7779, 0, 0, 537,
351, 0, 0, 0, 0, 0, 7787, 7831, 0, 0, 0, 357, 0, 0, 0, 0, 0, 7789, 0, 0,
539, 355, 0, 7793, 0, 0, 7791, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 249, 250,
251, 361, 363, 365, 0, 252, 7911, 367, 369, 468, 533, 535, 0, 0, 432,
7909, 7795, 0, 0, 0, 371, 7799, 0, 7797, 0, 0, 0, 0, 0, 0, 7805, 0, 0, 0,
0, 0, 7807, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7809, 7811, 373, 0, 0, 0,
7815, 7813, 0, 7832, 0, 0, 0, 0, 0, 0, 0, 7817, 0, 7819, 7821, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7923, 253, 375, 7929, 563, 0, 7823, 255,
7927, 7833, 0, 0, 0, 0, 0, 0, 0, 7925, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 378, 7825, 0, 0, 0, 380, 0, 0, 0, 0, 382, 0, 0, 0, 0, 0, 7827, 0, 0,
0, 0, 0, 0, 0, 0, 7829, 0, 0, 8173, 901, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8129, 0, 0, 0, 0, 0, 0, 0, 0, 7846, 7844, 0, 7850, 0, 0, 0, 0, 7848, 0,
0, 0, 0, 0, 0, 0, 0, 478, 0, 506, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508, 0, 0, 482, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7688, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 7872, 7870, 0, 7876, 0, 0, 0, 0, 7874, 0, 0, 0, 0, 0, 7726, 0,
0, 0, 7890, 7888, 0, 7894, 0, 0, 0, 0, 7892, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 7756, 0, 0, 556, 0, 0, 7758, 0, 0, 0, 0, 0, 0, 0, 0, 0, 554,
0, 510, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 475, 471, 0, 0, 469, 0, 0, 0, 0,
0, 0, 473, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7847, 7845, 0, 7851, 0, 0, 0, 0,
7849, 0, 0, 0, 0, 0, 0, 0, 0, 479, 0, 0, 0, 0, 0, 0, 0, 0, 0, 507, 0, 0,
0, 0, 509, 0, 0, 483, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7689, 0, 0, 0, 7873,
7871, 0, 7877, 0, 0, 0, 0, 7875, 0, 0, 0, 0, 0, 7727, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 7891, 7889, 0, 7895, 0, 0, 0, 0, 7893, 0, 0, 0, 0, 0,
7757, 0, 0, 557, 0, 0, 7759, 0, 0, 0, 0, 0, 0, 0, 0, 0, 555, 0, 0, 0, 0,
0, 0, 0, 0, 0, 511, 0, 0, 0, 476, 472, 0, 0, 470, 0, 0, 0, 0, 0, 0, 474,
0, 0, 0, 0, 0, 0, 0, 0, 0, 7856, 7854, 0, 7860, 0, 0, 0, 0, 7858, 0, 0,
0, 0, 7857, 7855, 0, 7861, 0, 0, 0, 0, 7859, 0, 0, 0, 0, 7700, 7702, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7701, 7703, 0, 0, 0, 7760, 7762, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 7761, 7763, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7780, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7781, 0, 0, 0, 0, 7782, 0, 0, 0, 0,
7783, 0, 0, 0, 0, 0, 0, 0, 7800, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7801, 0, 0, 7802, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7803, 0, 0, 0,
7835, 0, 0, 0, 0, 0, 0, 7900, 7898, 0, 7904, 0, 0, 0, 0, 7902, 0, 0, 0,
0, 0, 0, 0, 0, 7906, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7901, 7899, 0,
7905, 0, 0, 0, 0, 7903, 0, 0, 0, 0, 0, 0, 0, 0, 7907, 0, 0, 0, 7914,
7912, 0, 7918, 0, 0, 0, 0, 7916, 0, 0, 0, 0, 0, 0, 0, 0, 7920, 0, 0, 0,
7915, 7913, 0, 7919, 0, 0, 0, 0, 7917, 0, 0, 0, 0, 0, 0, 0, 0, 7921, 0,
0, 0, 0, 0, 0, 494, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 492, 0, 0, 0,
0, 493, 0, 0, 0, 0, 480, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 481, 0, 0,
0, 0, 0, 7708, 0, 0, 0, 0, 7709, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 560, 0,
0, 0, 0, 561, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 495, 0, 0, 0, 0, 0, 0, 0,
0, 0, 8122, 902, 0, 0, 8121, 8120, 0, 0, 0, 0, 0, 0, 0, 0, 7944, 7945, 0,
0, 0, 0, 0, 8124, 0, 0, 0, 0, 0, 0, 0, 8136, 904, 0, 0, 0, 0, 7960, 7961,
0, 0, 0, 0, 0, 8138, 905, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7976, 7977,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8140, 0, 0, 0, 0, 0, 0, 0, 8154,
906, 0, 0, 8153, 8152, 0, 938, 0, 0, 0, 0, 0, 0, 7992, 7993, 0, 0, 0, 0,
0, 8184, 908, 0, 0, 0, 0, 8008, 8009, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8172, 0, 0, 0, 0, 0, 8170, 910, 0, 0, 8169, 8168, 0, 939, 0, 0, 0, 0, 0,
0, 0, 8025, 0, 0, 0, 0, 0, 8186, 911, 0, 0, 0, 0, 8040, 8041, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 8188, 0, 0, 0, 0, 8116, 0, 0, 0, 0, 8132, 0,
0, 0, 0, 0, 0, 0, 8048, 940, 0, 0, 8113, 8112, 0, 0, 0, 0, 0, 0, 0, 0,
7936, 7937, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8118, 8115, 0, 0, 0, 0,
0, 0, 0, 8050, 941, 0, 0, 0, 0, 7952, 7953, 0, 0, 0, 0, 0, 8052, 942, 0,
0, 0, 0, 7968, 7969, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8134, 8131, 0,
0, 0, 0, 0, 0, 0, 8054, 943, 0, 0, 8145, 8144, 0, 970, 0, 0, 0, 0, 0, 0,
7984, 7985, 0, 0, 0, 0, 8150, 0, 0, 0, 0, 0, 0, 0, 0, 8056, 972, 0, 0, 0,
0, 8000, 8001, 0, 0, 0, 8164, 8165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 8058, 973, 0, 0, 8161, 8160, 0, 971, 0, 0, 0, 0, 0, 0, 8016, 8017, 0,
0, 0, 0, 8166, 0, 0, 0, 0, 0, 0, 0, 0, 8060, 974, 0, 0, 0, 0, 8032, 8033,
0, 0, 0, 0, 8182, 8179, 0, 0, 0, 0, 0, 0, 0, 8146, 912, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 8151, 0, 0, 0, 0, 0, 0, 0, 0, 8162, 944, 0, 0, 8167, 0, 0, 0,
0, 0, 8180, 0, 0, 0, 0, 0, 0, 0, 0, 979, 0, 0, 0, 0, 0, 980, 0, 0, 0, 0,
1031, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1232, 0, 1234, 0, 0, 0, 0, 0, 0,
1027, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1024, 0, 0, 0, 0, 1238, 0, 1025,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1217, 0, 1244, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1246, 0, 0, 0, 0, 0, 1037, 0, 0, 0, 1250, 1049, 0, 1252, 0, 0,
0, 0, 0, 0, 1036, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1254, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1262, 1038, 0, 1264, 0, 0, 1266, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1268, 0, 0, 0, 0, 1272, 0, 0, 0, 0, 1260, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1233, 0, 1235, 0, 0, 0, 0, 0, 0, 1107, 0, 0, 0, 1104, 0, 0, 0, 0, 1239,
0, 1105, 0, 0, 1218, 0, 1245, 0, 0, 0, 0, 1247, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1117, 0, 0, 0, 1251, 1081, 0, 1253, 0, 0, 0, 0, 0, 0,
1116, 0, 0, 1255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1263, 1118, 0, 1265, 0, 0,
1267, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1269, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1273, 0, 0, 0, 0, 1261, 0, 0, 0, 0, 1111, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1142, 0, 0, 0, 0, 1143, 0, 0, 0, 0, 0, 0, 0, 1242, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1243, 0, 0, 0, 0, 1258, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1259, 0, 0, 0, 1570, 1571, 1573, 0, 0, 0, 1572, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1574, 0, 0, 0, 0, 1730, 0, 0, 0, 0, 1747, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1728, 0, 0, 0, 0, 0, 0, 2345, 0, 0, 0, 0, 2353, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2356, 0, 0, 0, 0, 0, 2507, 2508, 0, 0,
0, 0, 0, 2891, 2888, 2892, 0, 0, 0, 0, 0, 0, 2964, 0, 0, 0, 3018, 3020,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3019, 0, 0, 0, 0, 0, 0, 3144, 0, 0, 0,
0, 0, 0, 3264, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3274, 3271, 3272, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 3275, 0, 0, 0, 0, 0, 0, 3402, 3404, 0, 0, 0,
3403, 0, 0, 0, 0, 0, 0, 3546, 3548, 3550, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
3549, 0, 0, 0, 0, 0, 0, 0, 4134, 0, 0, 0, 0, 0, 0, 7736, 0, 0, 0, 0,
7737, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7772, 0, 0, 0, 0, 7773, 0, 0,
0, 0, 0, 0, 7784, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7785, 7852, 0, 0,
7862, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7853, 0, 0, 7863, 0, 0, 0, 0, 0, 0, 0,
0, 0, 7878, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7879, 0, 0, 0, 0, 7896,
0, 0, 0, 0, 7897, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7938, 7940, 0, 0, 7942,
8064, 0, 0, 0, 0, 0, 0, 0, 7939, 7941, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7943, 8065, 0, 0, 0, 0, 8066, 0, 0, 0, 0, 8067, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 8068, 0, 0, 0, 0, 8069, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8070, 0, 0, 0, 0, 8071, 0, 0, 0, 0, 0, 0, 0, 7946, 7948, 0, 0, 7950,
8072, 0, 0, 0, 0, 0, 0, 0, 7947, 7949, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7951, 8073, 0, 0, 0, 0, 8074, 0, 0, 0, 0, 8075, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 8076, 0, 0, 0, 0, 8077, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8078, 0, 0, 0, 0, 8079, 0, 0, 0, 0, 0, 0, 0, 7954, 7956, 0, 0, 0, 7955,
7957, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7962, 7964, 0, 0, 0, 7963, 7965,
0, 0, 0, 7970, 7972, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7974, 8080, 0, 0, 0,
0, 0, 0, 0, 7971, 7973, 0, 0, 7975, 8081, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 8082, 0, 0, 0, 0, 8083, 0, 0, 0, 0, 8084, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 8085, 0, 0, 0, 0, 8086, 0, 0, 0, 0, 8087, 0, 0, 0, 0, 0, 0,
0, 7978, 7980, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7982, 8088, 0, 0, 0, 0, 0,
0, 0, 7979, 7981, 0, 0, 7983, 8089, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8090, 0, 0, 0, 0, 8091, 0, 0, 0, 0, 8092, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 8093, 0, 0, 0, 0, 8094, 0, 0, 0, 0, 8095, 0, 0, 0, 0, 0, 0, 0,
7986, 7988, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7990, 0, 0, 0, 0, 0, 0, 0, 0,
7987, 7989, 0, 0, 7991, 0, 0, 0, 0, 0, 0, 0, 0, 7994, 7996, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 7998, 0, 0, 0, 0, 0, 0, 0, 0, 7995, 7997, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 7999, 0, 0, 0, 0, 0, 0, 0, 0, 8002, 8004, 0, 0, 0,
8003, 8005, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8010, 8012, 0, 0, 0, 8011,
8013, 0, 0, 0, 8018, 8020, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8022, 0, 0, 0,
0, 0, 0, 0, 0, 8019, 8021, 0, 0, 8023, 0, 0, 0, 0, 0, 0, 0, 0, 8027,
8029, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8031, 0, 0, 0, 0, 0, 0, 0, 0, 8034,
8036, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8038, 8096, 0, 0, 0, 0, 0, 0, 0,
8035, 8037, 0, 0, 8039, 8097, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8098,
0, 0, 0, 0, 8099, 0, 0, 0, 0, 8100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8101, 0, 0, 0, 0, 8102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8103, 0, 0,
0, 0, 0, 0, 0, 8042, 8044, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8046, 8104, 0,
0, 0, 0, 0, 0, 0, 8043, 8045, 0, 0, 8047, 8105, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 8106, 0, 0, 0, 0, 8107, 0, 0, 0, 0, 8108, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 8109, 0, 0, 0, 0, 8110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 8111, 0, 0, 0, 0, 8114, 0, 0, 0, 0, 8130, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 8178, 0, 0, 0, 0, 8119, 0, 0, 0, 0, 0, 0, 0, 8141, 8142, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 8143, 0, 0, 0, 0, 0, 8135, 0, 0, 0, 0, 8183,
0, 0, 0, 0, 0, 0, 0, 8157, 8158, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8159, 0,
0, 0, 8602, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8603, 0, 0, 0, 0, 8622,
0, 0, 0, 0, 8653, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8655, 0, 0, 0, 0,
8654, 0, 0, 0, 0, 8708, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8713, 0, 0,
0, 0, 8716, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8740, 0, 0, 0, 0, 8742,
0, 0, 0, 0, 8769, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8772, 0, 0, 0, 0,
8775, 0, 0, 0, 0, 8777, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8813, 0, 0,
0, 0, 8802, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8816, 0, 0, 0, 0, 8817,
0, 0, 0, 0, 8820, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8821, 0, 0, 0, 0,
8824, 0, 0, 0, 0, 8825, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8832, 0, 0,
0, 0, 8833, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8928, 0, 0, 0, 0, 8929,
0, 0, 0, 0, 8836, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8837, 0, 0, 0, 0,
8840, 0, 0, 0, 0, 8841, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8930, 0, 0,
0, 0, 8931, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8876, 0, 0, 0, 0, 8877,
0, 0, 0, 0, 8878, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8879, 0, 0, 0, 0,
8938, 0, 0, 0, 0, 8939, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8940, 0, 0,
0, 0, 8941, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12436, 0, 0, 0, 0, 12364,
0, 0, 0, 0, 12366, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12368, 0, 0, 0, 0,
12370, 0, 0, 0, 0, 12372, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12374, 0,
0, 0, 0, 12376, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12378, 0, 0, 0, 0,
12380, 0, 0, 0, 0, 12382, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12384, 0,
0, 0, 0, 12386, 0, 0, 0, 0, 12389, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12391, 0, 0, 0, 0, 12393, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12400,
12401, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12403, 12404, 0, 0, 0, 12406,
12407, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12409, 12410, 0, 0, 0, 12412,
12413, 0, 0, 0, 12446, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12532, 0, 0,
0, 0, 12460, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12462, 0, 0, 0, 0,
12464, 0, 0, 0, 0, 12466, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12468, 0,
0, 0, 0, 12470, 0, 0, 0, 0, 12472, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12474, 0, 0, 0, 0, 12476, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12478, 0,
0, 0, 0, 12480, 0, 0, 0, 0, 12482, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12485, 0, 0, 0, 0, 12487, 0, 0, 0, 0, 12489, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 12496, 12497, 0, 0, 0, 12499, 12500, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 12502, 12503, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12505, 12506, 0, 0,
0, 12508, 12509, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12535, 0, 0, 0, 0,
12536, 0, 0, 0, 0, 12537, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12538, 0,
0, 0, 0, 12542, 0,
};

View File

@ -13,6 +13,9 @@
# 2000-11-03 fl expand first/last ranges
# 2001-01-19 fl added character name tables (2.1)
# 2001-01-21 fl added decomp compression; dynamic phrasebook threshold
# 2002-09-11 wd use string methods
# 2002-10-18 mvl update to Unicode 3.2
# 2002-10-22 mvl generate NFC tables
#
# written by Fredrik Lundh (fredrik@pythonware.com)
#
@ -22,7 +25,8 @@ import sys
SCRIPT = sys.argv[0]
VERSION = "2.1"
UNICODE_DATA = "UnicodeData-Latest.txt"
UNICODE_DATA = "UnicodeData.txt"
COMPOSITION_EXCLUSIONS = "CompositionExclusions.txt"
CATEGORY_NAMES = [ "Cn", "Lu", "Ll", "Lt", "Mn", "Mc", "Me", "Nd",
"Nl", "No", "Zs", "Zl", "Zp", "Cc", "Cf", "Cs", "Co", "Cn", "Lm",
@ -47,7 +51,7 @@ def maketables(trace=0):
print "--- Reading", UNICODE_DATA, "..."
unicode = UnicodeData(UNICODE_DATA)
unicode = UnicodeData(UNICODE_DATA, COMPOSITION_EXCLUSIONS)
print len(filter(None, unicode.table)), "characters"
@ -96,6 +100,10 @@ def makeunicodedata(unicode, trace):
decomp_index = [0] * len(unicode.chars)
decomp_size = 0
comp_pairs = []
comp_first = [None] * len(unicode.chars)
comp_last = [None] * len(unicode.chars)
for char in unicode.chars:
record = unicode.table[char]
if record:
@ -116,6 +124,14 @@ def makeunicodedata(unicode, trace):
# content
decomp = [prefix + (len(decomp)<<8)] +\
map(lambda s: int(s, 16), decomp)
# Collect NFC pairs
if not prefix and len(decomp) == 3 and \
char not in unicode.exclusions and \
unicode.table[decomp[1]][3] == "0":
p, l, r = decomp
comp_first[l] = 1
comp_last[r] = 1
comp_pairs.append((l,r,char))
try:
i = decomp_data.index(decomp)
except ValueError:
@ -126,10 +142,49 @@ def makeunicodedata(unicode, trace):
i = 0
decomp_index[char] = i
f = l = 0
comp_first_ranges = []
comp_last_ranges = []
prev_f = prev_l = None
for i in unicode.chars:
if comp_first[i] is not None:
comp_first[i] = f
f += 1
if prev_f is None:
prev_f = (i,i)
elif prev_f[1]+1 == i:
prev_f = prev_f[0],i
else:
comp_first_ranges.append(prev_f)
prev_f = (i,i)
if comp_last[i] is not None:
comp_last[i] = l
l += 1
if prev_l is None:
prev_l = (i,i)
elif prev_l[1]+1 == i:
prev_l = prev_l[0],i
else:
comp_last_ranges.append(prev_l)
prev_l = (i,i)
comp_first_ranges.append(prev_f)
comp_last_ranges.append(prev_l)
total_first = f
total_last = l
comp_data = [0]*(total_first*total_last)
for f,l,char in comp_pairs:
f = comp_first[f]
l = comp_last[l]
comp_data[f*total_last+l] = char
print len(table), "unique properties"
print len(decomp_prefix), "unique decomposition prefixes"
print len(decomp_data), "unique decomposition entries:",
print decomp_size, "bytes"
print total_first, "first characters in NFC"
print total_last, "last characters in NFC"
print len(comp_pairs), "NFC pairs"
print "--- Writing", FILE, "..."
@ -144,6 +199,21 @@ def makeunicodedata(unicode, trace):
print >>fp, "};"
print >>fp
print >>fp, "/* Reindexing of NFC first characters. */"
print >>fp, "#define TOTAL_FIRST",total_first
print >>fp, "#define TOTAL_LAST",total_last
print >>fp, "struct reindex{int start;short count,index;};"
print >>fp, "struct reindex nfc_first[] = {"
for start,end in comp_first_ranges:
print >>fp," { %d, %d, %d}," % (start,end-start,comp_first[start])
print >>fp," {0,0,0}"
print >>fp,"};\n"
print >>fp, "struct reindex nfc_last[] = {"
for start,end in comp_last_ranges:
print >>fp," { %d, %d, %d}," % (start,end-start,comp_last[start])
print >>fp," {0,0,0}"
print >>fp,"};\n"
# FIXME: <fl> the following tables could be made static, and
# the support code moved into unicodedatabase.c
@ -185,6 +255,12 @@ def makeunicodedata(unicode, trace):
Array("decomp_index1", index1).dump(fp, trace)
Array("decomp_index2", index2).dump(fp, trace)
index, index2, shift = splitbins(comp_data, trace)
print >>fp, "/* NFC pairs */"
print >>fp, "#define COMP_SHIFT", shift
Array("comp_index", index).dump(fp, trace)
Array("comp_data", index2).dump(fp, trace)
fp.close()
# --------------------------------------------------------------------
@ -454,7 +530,7 @@ import sys
class UnicodeData:
def __init__(self, filename, expand=1):
def __init__(self, filename, exclusions, expand=1):
file = open(filename)
table = [None] * 0x110000
while 1:
@ -486,6 +562,17 @@ class UnicodeData:
self.table = table
self.chars = range(0x110000) # unicode 3.2
file = open(exclusions)
self.exclusions = {}
for s in file:
s = s.strip()
if not s:
continue
if s[0] == '#':
continue
char = int(s.split()[0],16)
self.exclusions[char] = 1
def uselatin1(self):
# restrict character range to ISO Latin 1
self.chars = range(256)