upgrade unicode db to 6.3.0 (closes #19221)

This commit is contained in:
Benjamin Peterson 2013-10-10 17:24:45 -04:00
parent 78d692f98e
commit 94d08d908b
8 changed files with 17570 additions and 17528 deletions

View File

@ -15,8 +15,8 @@
This module provides access to the Unicode Character Database (UCD) which
defines character properties for all Unicode characters. The data contained in
this database is compiled from the `UCD version 6.2.0
<http://www.unicode.org/Public/6.2.0/ucd>`_.
this database is compiled from the `UCD version 6.3.0
<http://www.unicode.org/Public/6.3.0/ucd>`_.
The module uses the same names and symbols as defined by Unicode
Standard Annex #44, `"Unicode Character Database"
@ -166,6 +166,6 @@ Examples:
.. rubric:: Footnotes
.. [#] http://www.unicode.org/Public/6.2.0/ucd/NameAliases.txt
.. [#] http://www.unicode.org/Public/6.3.0/ucd/NameAliases.txt
.. [#] http://www.unicode.org/Public/6.2.0/ucd/NamedSequences.txt
.. [#] http://www.unicode.org/Public/6.3.0/ucd/NamedSequences.txt

View File

@ -21,7 +21,7 @@ errors = 'surrogatepass'
class UnicodeMethodsTest(unittest.TestCase):
# update this, if the database changes
expectedchecksum = 'bf7a78f1a532421b5033600102e23a92044dbba9'
expectedchecksum = 'e74e878de71b6e780ffac271785c3cb58f6251f3'
def test_method_checksum(self):
h = hashlib.sha1()

View File

@ -10,6 +10,8 @@ Projected release date: 2013-10-20
Core and Builtins
-----------------
- Issue #19221: Upgrade Unicode database to version 6.3.0.
- Issue #16742: The result of the C callback PyOS_ReadlineFunctionPointer must
now be a string allocated by PyMem_RawMalloc() or PyMem_RawRealloc() (or NULL
if an error occurred), instead of a string allocated by PyMem_Malloc() or

View File

@ -1322,10 +1322,10 @@ PyDoc_STRVAR(unicodedata_docstring,
"This module provides access to the Unicode Character Database which\n\
defines character properties for all Unicode characters. The data in\n\
this database is based on the UnicodeData.txt file version\n\
6.0.0 which is publically available from ftp://ftp.unicode.org/.\n\
6.3.0 which is publically available from ftp://ftp.unicode.org/.\n\
\n\
The module uses the same names and symbols as defined by the\n\
UnicodeData File Format 6.0.0 (see\n\
UnicodeData File Format 6.3.0 (see\n\
http://www.unicode.org/reports/tr44/tr44-6.html).");

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1589,7 +1589,7 @@ static unsigned short index2[] = {
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0,
0, 0, 0, 55, 55, 55, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21,
21, 21, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 5, 0, 0, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
25, 25, 25, 5, 21, 0, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 96, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 7, 8,
@ -1801,7 +1801,7 @@ static unsigned short index2[] = {
25, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 96, 5, 5, 5, 5, 55, 25, 0, 0, 7,
8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
25, 25, 25, 2, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0,
25, 25, 25, 21, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 96,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
@ -1828,7 +1828,7 @@ static unsigned short index2[] = {
7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 132, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 25, 25, 18, 18, 18, 0, 0, 5, 5, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 25, 25, 18, 18, 25, 0, 0, 5, 5, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 25, 18, 25,
@ -1915,7 +1915,7 @@ static unsigned short index2[] = {
5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 6, 3, 3, 21, 21, 21, 21, 21, 2, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 18, 18, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 18, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 21,
21, 21, 21, 21, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 245, 95, 0, 0,
21, 21, 21, 21, 0, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 245, 95, 0, 0,
246, 247, 248, 249, 250, 251, 5, 5, 5, 5, 5, 95, 245, 26, 22, 23, 246,
247, 248, 249, 250, 251, 5, 5, 5, 5, 5, 0, 95, 95, 95, 95, 95, 95, 95,
95, 95, 95, 95, 95, 95, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
@ -2925,9 +2925,6 @@ static unsigned short index2[] = {
double _PyUnicode_ToNumeric(Py_UCS4 ch)
{
switch (ch) {
case 0x12456:
case 0x12457:
return (double) -1.0;
case 0x0F33:
return (double) -1.0/2.0;
case 0x0030:
@ -3383,6 +3380,7 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch)
case 0x12435:
case 0x1244A:
case 0x12450:
case 0x12456:
case 0x12459:
case 0x1D361:
case 0x1D7D0:
@ -3539,6 +3537,7 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch)
case 0x1243B:
case 0x1244B:
case 0x12451:
case 0x12457:
case 0x1D362:
case 0x1D7D1:
case 0x1D7DB:
@ -4294,7 +4293,6 @@ int _PyUnicode_IsWhitespace(const Py_UCS4 ch)
case 0x0085:
case 0x00A0:
case 0x1680:
case 0x180E:
case 0x2000:
case 0x2001:
case 0x2002:

View File

@ -37,7 +37,7 @@ SCRIPT = sys.argv[0]
VERSION = "3.2"
# The Unicode Database
UNIDATA_VERSION = "6.2.0"
UNIDATA_VERSION = "6.3.0"
UNICODE_DATA = "UnicodeData%s.txt"
COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
@ -68,7 +68,7 @@ CATEGORY_NAMES = [ "Cn", "Lu", "Ll", "Lt", "Mn", "Mc", "Me", "Nd",
BIDIRECTIONAL_NAMES = [ "", "L", "LRE", "LRO", "R", "AL", "RLE", "RLO",
"PDF", "EN", "ES", "ET", "AN", "CS", "NSM", "BN", "B", "S", "WS",
"ON" ]
"ON", "LRI", "RLI", "FSI", "PDI" ]
EASTASIANWIDTH_NAMES = [ "F", "H", "W", "Na", "A", "N" ]