update to Unicode 6.1
This commit is contained in:
parent
16fa2a1097
commit
71f660e00f
|
@ -21,7 +21,7 @@ errors = 'surrogatepass'
|
|||
class UnicodeMethodsTest(unittest.TestCase):
|
||||
|
||||
# update this, if the database changes
|
||||
expectedchecksum = 'df0b3ca6785a070b21f837b227dbdbdff3c2e921'
|
||||
expectedchecksum = 'bf7a78f1a532421b5033600102e23a92044dbba9'
|
||||
|
||||
def test_method_checksum(self):
|
||||
h = hashlib.sha1()
|
||||
|
@ -80,7 +80,7 @@ class UnicodeDatabaseTest(unittest.TestCase):
|
|||
class UnicodeFunctionsTest(UnicodeDatabaseTest):
|
||||
|
||||
# update this, if the database changes
|
||||
expectedchecksum = 'c23dfc0b5eaf3ca2aad32d733de96bb182ccda50'
|
||||
expectedchecksum = '17fe2f12b788e4fff5479b469c4404bb6ecf841f'
|
||||
def test_function_checksum(self):
|
||||
data = []
|
||||
h = hashlib.sha1()
|
||||
|
|
|
@ -10,6 +10,8 @@ What's New in Python 3.3 Alpha 1?
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Upgrade Unicode data to Unicode 6.1.
|
||||
|
||||
- Issue #14040: Remove rarely used file name suffixes for C extensions
|
||||
(under POSIX mainly).
|
||||
|
||||
|
|
|
@ -921,7 +921,7 @@ is_unified_ideograph(Py_UCS4 code)
|
|||
{
|
||||
return
|
||||
(0x3400 <= code && code <= 0x4DB5) || /* CJK Ideograph Extension A */
|
||||
(0x4E00 <= code && code <= 0x9FCB) || /* CJK Ideograph */
|
||||
(0x4E00 <= code && code <= 0x9FCC) || /* CJK Ideograph */
|
||||
(0x20000 <= code && code <= 0x2A6D6) || /* CJK Ideograph Extension B */
|
||||
(0x2A700 <= code && code <= 0x2B734) || /* CJK Ideograph Extension C */
|
||||
(0x2B740 <= code && code <= 0x2B81D); /* CJK Ideograph Extension D */
|
||||
|
|
File diff suppressed because it is too large
Load Diff
37828
Modules/unicodename_db.h
37828
Modules/unicodename_db.h
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -38,7 +38,7 @@ SCRIPT = sys.argv[0]
|
|||
VERSION = "3.2"
|
||||
|
||||
# The Unicode Database
|
||||
UNIDATA_VERSION = "6.0.0"
|
||||
UNIDATA_VERSION = "6.1.0"
|
||||
UNICODE_DATA = "UnicodeData%s.txt"
|
||||
COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
|
||||
EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
|
||||
|
@ -58,7 +58,7 @@ PUA_16 = range(0x100000, 0x10FFFE)
|
|||
|
||||
# we use this ranges of PUA_15 to store name aliases and named sequences
|
||||
NAME_ALIASES_START = 0xF0000
|
||||
NAMED_SEQUENCES_START = 0xF0100
|
||||
NAMED_SEQUENCES_START = 0xF0200
|
||||
|
||||
old_versions = ["3.2.0"]
|
||||
|
||||
|
@ -95,7 +95,7 @@ EXTENDED_CASE_MASK = 0x4000
|
|||
# these ranges need to match unicodedata.c:is_unified_ideograph
|
||||
cjk_ranges = [
|
||||
('3400', '4DB5'),
|
||||
('4E00', '9FCB'),
|
||||
('4E00', '9FCC'),
|
||||
('20000', '2A6D6'),
|
||||
('2A700', '2B734'),
|
||||
('2B740', '2B81D')
|
||||
|
@ -958,7 +958,7 @@ class UnicodeData:
|
|||
s = s.strip()
|
||||
if not s or s.startswith('#'):
|
||||
continue
|
||||
char, name = s.split(';')
|
||||
char, name, abbrev = s.split(';')
|
||||
char = int(char, 16)
|
||||
self.aliases.append((name, char))
|
||||
# also store the name in the PUA 1
|
||||
|
@ -971,6 +971,7 @@ class UnicodeData:
|
|||
# in order to take advantage of the compression and lookup
|
||||
# algorithms used for the other characters.
|
||||
|
||||
assert pua_index < NAMED_SEQUENCES_START
|
||||
pua_index = NAMED_SEQUENCES_START
|
||||
with open_data(NAMED_SEQUENCES, version) as file:
|
||||
for s in file:
|
||||
|
|
Loading…
Reference in New Issue