mirror of https://github.com/python/cpython
upgrade unicode db to 6.3.0 (closes #19221)
This commit is contained in:
parent
78d692f98e
commit
94d08d908b
|
@ -15,8 +15,8 @@
|
|||
|
||||
This module provides access to the Unicode Character Database (UCD) which
|
||||
defines character properties for all Unicode characters. The data contained in
|
||||
this database is compiled from the `UCD version 6.2.0
|
||||
<http://www.unicode.org/Public/6.2.0/ucd>`_.
|
||||
this database is compiled from the `UCD version 6.3.0
|
||||
<http://www.unicode.org/Public/6.3.0/ucd>`_.
|
||||
|
||||
The module uses the same names and symbols as defined by Unicode
|
||||
Standard Annex #44, `"Unicode Character Database"
|
||||
|
@ -166,6 +166,6 @@ Examples:
|
|||
|
||||
.. rubric:: Footnotes
|
||||
|
||||
.. [#] http://www.unicode.org/Public/6.2.0/ucd/NameAliases.txt
|
||||
.. [#] http://www.unicode.org/Public/6.3.0/ucd/NameAliases.txt
|
||||
|
||||
.. [#] http://www.unicode.org/Public/6.2.0/ucd/NamedSequences.txt
|
||||
.. [#] http://www.unicode.org/Public/6.3.0/ucd/NamedSequences.txt
|
||||
|
|
|
@ -21,7 +21,7 @@ errors = 'surrogatepass'
|
|||
class UnicodeMethodsTest(unittest.TestCase):
|
||||
|
||||
# update this, if the database changes
|
||||
expectedchecksum = 'bf7a78f1a532421b5033600102e23a92044dbba9'
|
||||
expectedchecksum = 'e74e878de71b6e780ffac271785c3cb58f6251f3'
|
||||
|
||||
def test_method_checksum(self):
|
||||
h = hashlib.sha1()
|
||||
|
|
|
@ -10,6 +10,8 @@ Projected release date: 2013-10-20
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #19221: Upgrade Unicode database to version 6.3.0.
|
||||
|
||||
- Issue #16742: The result of the C callback PyOS_ReadlineFunctionPointer must
|
||||
now be a string allocated by PyMem_RawMalloc() or PyMem_RawRealloc() (or NULL
|
||||
if an error occurred), instead of a string allocated by PyMem_Malloc() or
|
||||
|
|
|
@ -1322,10 +1322,10 @@ PyDoc_STRVAR(unicodedata_docstring,
|
|||
"This module provides access to the Unicode Character Database which\n\
|
||||
defines character properties for all Unicode characters. The data in\n\
|
||||
this database is based on the UnicodeData.txt file version\n\
|
||||
6.0.0 which is publically available from ftp://ftp.unicode.org/.\n\
|
||||
6.3.0 which is publically available from ftp://ftp.unicode.org/.\n\
|
||||
\n\
|
||||
The module uses the same names and symbols as defined by the\n\
|
||||
UnicodeData File Format 6.0.0 (see\n\
|
||||
UnicodeData File Format 6.3.0 (see\n\
|
||||
http://www.unicode.org/reports/tr44/tr44-6.html).");
|
||||
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
32786
Modules/unicodename_db.h
32786
Modules/unicodename_db.h
File diff suppressed because it is too large
Load Diff
|
@ -1589,7 +1589,7 @@ static unsigned short index2[] = {
|
|||
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0,
|
||||
0, 0, 0, 55, 55, 55, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21,
|
||||
21, 21, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 5, 0, 0, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
|
||||
25, 25, 25, 5, 21, 0, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
|
||||
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
|
||||
55, 55, 55, 96, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 7, 8,
|
||||
|
@ -1801,7 +1801,7 @@ static unsigned short index2[] = {
|
|||
25, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 96, 5, 5, 5, 5, 55, 25, 0, 0, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27,
|
||||
27, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
25, 25, 25, 2, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0,
|
||||
25, 25, 25, 21, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0,
|
||||
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
|
||||
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 96,
|
||||
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
|
||||
|
@ -1828,7 +1828,7 @@ static unsigned short index2[] = {
|
|||
7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 132, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
|
||||
55, 55, 55, 55, 55, 55, 55, 25, 25, 18, 18, 18, 0, 0, 5, 5, 55, 55, 55,
|
||||
55, 55, 55, 55, 55, 55, 55, 25, 25, 18, 18, 25, 0, 0, 5, 5, 55, 55, 55,
|
||||
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
|
||||
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
|
||||
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 25, 18, 25,
|
||||
|
@ -1915,7 +1915,7 @@ static unsigned short index2[] = {
|
|||
5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 6, 3, 3, 21, 21, 21, 21, 21, 2, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 18, 18, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 18, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 21,
|
||||
21, 21, 21, 21, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 245, 95, 0, 0,
|
||||
21, 21, 21, 21, 0, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 245, 95, 0, 0,
|
||||
246, 247, 248, 249, 250, 251, 5, 5, 5, 5, 5, 95, 245, 26, 22, 23, 246,
|
||||
247, 248, 249, 250, 251, 5, 5, 5, 5, 5, 0, 95, 95, 95, 95, 95, 95, 95,
|
||||
95, 95, 95, 95, 95, 95, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
|
@ -2925,9 +2925,6 @@ static unsigned short index2[] = {
|
|||
double _PyUnicode_ToNumeric(Py_UCS4 ch)
|
||||
{
|
||||
switch (ch) {
|
||||
case 0x12456:
|
||||
case 0x12457:
|
||||
return (double) -1.0;
|
||||
case 0x0F33:
|
||||
return (double) -1.0/2.0;
|
||||
case 0x0030:
|
||||
|
@ -3383,6 +3380,7 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch)
|
|||
case 0x12435:
|
||||
case 0x1244A:
|
||||
case 0x12450:
|
||||
case 0x12456:
|
||||
case 0x12459:
|
||||
case 0x1D361:
|
||||
case 0x1D7D0:
|
||||
|
@ -3539,6 +3537,7 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch)
|
|||
case 0x1243B:
|
||||
case 0x1244B:
|
||||
case 0x12451:
|
||||
case 0x12457:
|
||||
case 0x1D362:
|
||||
case 0x1D7D1:
|
||||
case 0x1D7DB:
|
||||
|
@ -4294,7 +4293,6 @@ int _PyUnicode_IsWhitespace(const Py_UCS4 ch)
|
|||
case 0x0085:
|
||||
case 0x00A0:
|
||||
case 0x1680:
|
||||
case 0x180E:
|
||||
case 0x2000:
|
||||
case 0x2001:
|
||||
case 0x2002:
|
||||
|
|
|
@ -37,7 +37,7 @@ SCRIPT = sys.argv[0]
|
|||
VERSION = "3.2"
|
||||
|
||||
# The Unicode Database
|
||||
UNIDATA_VERSION = "6.2.0"
|
||||
UNIDATA_VERSION = "6.3.0"
|
||||
UNICODE_DATA = "UnicodeData%s.txt"
|
||||
COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
|
||||
EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
|
||||
|
@ -68,7 +68,7 @@ CATEGORY_NAMES = [ "Cn", "Lu", "Ll", "Lt", "Mn", "Mc", "Me", "Nd",
|
|||
|
||||
BIDIRECTIONAL_NAMES = [ "", "L", "LRE", "LRO", "R", "AL", "RLE", "RLO",
|
||||
"PDF", "EN", "ES", "ET", "AN", "CS", "NSM", "BN", "B", "S", "WS",
|
||||
"ON" ]
|
||||
"ON", "LRI", "RLI", "FSI", "PDI" ]
|
||||
|
||||
EASTASIANWIDTH_NAMES = [ "F", "H", "W", "Na", "A", "N" ]
|
||||
|
||||
|
|
Loading…
Reference in New Issue