Unicode 9.0.0
Not completely mechanical since support for East Asian Width changes—emoji codepoints became Wide—had to be added to unicodedata.
This commit is contained in:
parent
7ec64562b2
commit
6775231597
|
@ -17,8 +17,8 @@
|
|||
|
||||
This module provides access to the Unicode Character Database (UCD) which
|
||||
defines character properties for all Unicode characters. The data contained in
|
||||
this database is compiled from the `UCD version 8.0.0
|
||||
<http://www.unicode.org/Public/8.0.0/ucd>`_.
|
||||
this database is compiled from the `UCD version 9.0.0
|
||||
<http://www.unicode.org/Public/9.0.0/ucd>`_.
|
||||
|
||||
The module uses the same names and symbols as defined by Unicode
|
||||
Standard Annex #44, `"Unicode Character Database"
|
||||
|
@ -168,6 +168,6 @@ Examples:
|
|||
|
||||
.. rubric:: Footnotes
|
||||
|
||||
.. [#] http://www.unicode.org/Public/8.0.0/ucd/NameAliases.txt
|
||||
.. [#] http://www.unicode.org/Public/9.0.0/ucd/NameAliases.txt
|
||||
|
||||
.. [#] http://www.unicode.org/Public/8.0.0/ucd/NamedSequences.txt
|
||||
.. [#] http://www.unicode.org/Public/9.0.0/ucd/NamedSequences.txt
|
||||
|
|
|
@ -966,6 +966,13 @@ representing :class:`contextlib.AbstractContextManager`.
|
|||
(Contributed by Brett Cannon in :issue:`25609`.)
|
||||
|
||||
|
||||
unicodedata
|
||||
-----------
|
||||
|
||||
The internal database has been upgraded to use Unicode 9.0.0. (Contributed by
|
||||
Benjamin Peterson.)
|
||||
|
||||
|
||||
unittest.mock
|
||||
-------------
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ errors = 'surrogatepass'
|
|||
class UnicodeMethodsTest(unittest.TestCase):
|
||||
|
||||
# update this, if the database changes
|
||||
expectedchecksum = '5971760872b2f98bb9c701e6c0db3273d756b3ec'
|
||||
expectedchecksum = 'c1fa98674a683aa8a8d8dee0c84494f8d36346e6'
|
||||
|
||||
def test_method_checksum(self):
|
||||
h = hashlib.sha1()
|
||||
|
@ -80,7 +80,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
|
|||
|
||||
# Update this if the database changes. Make sure to do a full rebuild
|
||||
# (e.g. 'make distclean && make') to get the correct checksum.
|
||||
expectedchecksum = '5e74827cd07f9e546a30f34b7bcf6cc2eac38c8c'
|
||||
expectedchecksum = 'f891b1e6430c712531b9bc935a38e22d78ba1bf3'
|
||||
def test_function_checksum(self):
|
||||
data = []
|
||||
h = hashlib.sha1()
|
||||
|
@ -222,6 +222,10 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
|
|||
self.assertEqual(eaw('\u2010'), 'A')
|
||||
self.assertEqual(eaw('\U00020000'), 'W')
|
||||
|
||||
def test_east_asian_width_9_0_changes(self):
|
||||
self.assertEqual(self.db.ucd_3_2_0.east_asian_width('\u231a'), 'N')
|
||||
self.assertEqual(self.db.east_asian_width('\u231a'), 'W')
|
||||
|
||||
class UnicodeMiscTest(UnicodeDatabaseTest):
|
||||
|
||||
def test_failed_import_during_compiling(self):
|
||||
|
|
|
@ -10,6 +10,8 @@ What's New in Python 3.6.0 beta 2
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Upgrade internal unicode databases to Unicode version 9.0.0.
|
||||
|
||||
- Issue #28131: Fix a regression in zipimport's compile_source(). zipimport
|
||||
should use the same optimization level as the interpreter.
|
||||
|
||||
|
|
|
@ -45,6 +45,7 @@ typedef struct change_record {
|
|||
const unsigned char category_changed;
|
||||
const unsigned char decimal_changed;
|
||||
const unsigned char mirrored_changed;
|
||||
const unsigned char east_asian_width_changed;
|
||||
const double numeric_changed;
|
||||
} change_record;
|
||||
|
||||
|
@ -375,6 +376,8 @@ unicodedata_UCD_east_asian_width_impl(PyObject *self, int chr)
|
|||
const change_record *old = get_old_record(self, c);
|
||||
if (old->category_changed == 0)
|
||||
index = 0; /* unassigned */
|
||||
else if (old->east_asian_width_changed != 0xFF)
|
||||
index = old->east_asian_width_changed;
|
||||
}
|
||||
return PyUnicode_FromString(_PyUnicode_EastAsianWidthNames[index]);
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
45876
Modules/unicodename_db.h
45876
Modules/unicodename_db.h
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -42,7 +42,7 @@ VERSION = "3.2"
|
|||
# * Doc/library/stdtypes.rst, and
|
||||
# * Doc/library/unicodedata.rst
|
||||
# * Doc/reference/lexical_analysis.rst (two occurrences)
|
||||
UNIDATA_VERSION = "8.0.0"
|
||||
UNIDATA_VERSION = "9.0.0"
|
||||
UNICODE_DATA = "UnicodeData%s.txt"
|
||||
COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
|
||||
EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
|
||||
|
@ -796,6 +796,7 @@ def merge_old_version(version, new, old):
|
|||
category_changes = [0xFF]*0x110000
|
||||
decimal_changes = [0xFF]*0x110000
|
||||
mirrored_changes = [0xFF]*0x110000
|
||||
east_asian_width_changes = [0xFF]*0x110000
|
||||
# In numeric data, 0 means "no change",
|
||||
# -1 means "did not have a numeric value
|
||||
numeric_changes = [0] * 0x110000
|
||||
|
@ -862,6 +863,9 @@ def merge_old_version(version, new, old):
|
|||
elif k == 14:
|
||||
# change to simple titlecase mapping; ignore
|
||||
pass
|
||||
elif k == 15:
|
||||
# change to east asian width
|
||||
east_asian_width_changes[i] = EASTASIANWIDTH_NAMES.index(value)
|
||||
elif k == 16:
|
||||
# derived property changes; not yet
|
||||
pass
|
||||
|
@ -873,8 +877,9 @@ def merge_old_version(version, new, old):
|
|||
class Difference(Exception):pass
|
||||
raise Difference(hex(i), k, old.table[i], new.table[i])
|
||||
new.changed.append((version, list(zip(bidir_changes, category_changes,
|
||||
decimal_changes, mirrored_changes,
|
||||
numeric_changes)),
|
||||
decimal_changes, mirrored_changes,
|
||||
east_asian_width_changes,
|
||||
numeric_changes)),
|
||||
normalization_changes))
|
||||
|
||||
def open_data(template, version):
|
||||
|
|
3
setup.py
3
setup.py
|
@ -652,7 +652,8 @@ class PyBuildExt(build_ext):
|
|||
# profiler (_lsprof is for cProfile.py)
|
||||
exts.append( Extension('_lsprof', ['_lsprof.c', 'rotatingtree.c']) )
|
||||
# static Unicode character database
|
||||
exts.append( Extension('unicodedata', ['unicodedata.c']) )
|
||||
exts.append( Extension('unicodedata', ['unicodedata.c'],
|
||||
depends=['unicodedata_db.h', 'unicodename_db.h']) )
|
||||
# _opcode module
|
||||
exts.append( Extension('_opcode', ['_opcode.c']) )
|
||||
|
||||
|
|
Loading…
Reference in New Issue