Issue #5828 (Invalid behavior of unicode.lower): Fixed bogus logic in
makeunicodedata.py and regenerated the Unicode database (This fixes u'\u1d79'.lower() == '\x00').
This commit is contained in:
parent
140d9d673e
commit
5d98ec76bb
|
@ -20,7 +20,7 @@ encoding = 'utf-8'
|
||||||
class UnicodeMethodsTest(unittest.TestCase):
|
class UnicodeMethodsTest(unittest.TestCase):
|
||||||
|
|
||||||
# update this, if the database changes
|
# update this, if the database changes
|
||||||
expectedchecksum = 'aef99984a58c8e1e5363a3175f2ff9608599a93e'
|
expectedchecksum = 'b7db9b5f1d804976fa921d2009cbef6f025620c1'
|
||||||
|
|
||||||
def test_method_checksum(self):
|
def test_method_checksum(self):
|
||||||
h = hashlib.sha1()
|
h = hashlib.sha1()
|
||||||
|
@ -257,6 +257,19 @@ class UnicodeMiscTest(UnicodeDatabaseTest):
|
||||||
# the upper-case mapping: as delta, or as absolute value
|
# the upper-case mapping: as delta, or as absolute value
|
||||||
self.assert_(u"a".upper()==u'A')
|
self.assert_(u"a".upper()==u'A')
|
||||||
self.assert_(u"\u1d79".upper()==u'\ua77d')
|
self.assert_(u"\u1d79".upper()==u'\ua77d')
|
||||||
|
self.assert_(u".".upper()==u".")
|
||||||
|
|
||||||
|
def test_bug_5828(self):
|
||||||
|
self.assertEqual(u"\u1d79".lower(), u"\u1d79")
|
||||||
|
# Only U+0000 should have U+0000 as its upper/lower/titlecase variant
|
||||||
|
self.assertEqual(
|
||||||
|
[
|
||||||
|
c for c in range(sys.maxunicode+1)
|
||||||
|
if u"\x00" in unichr(c).lower()+unichr(c).upper()+unichr(c).title()
|
||||||
|
],
|
||||||
|
[0]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
test.test_support.run_unittest(
|
test.test_support.run_unittest(
|
||||||
|
|
|
@ -773,6 +773,10 @@ Library
|
||||||
- Issue #2703: SimpleXMLRPCDispatcher.__init__: Provide default values for
|
- Issue #2703: SimpleXMLRPCDispatcher.__init__: Provide default values for
|
||||||
new arguments introduced in 2.5.
|
new arguments introduced in 2.5.
|
||||||
|
|
||||||
|
- Issue #5828 (Invalid behavior of unicode.lower): Fixed bogus logic in
|
||||||
|
makeunicodedata.py and regenerated the Unicode database (This fixes
|
||||||
|
u'\u1d79'.lower() == '\x00').
|
||||||
|
|
||||||
Tools/Demos
|
Tools/Demos
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
|
|
|
@ -118,7 +118,7 @@ const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {
|
||||||
{0, 0, 0, 0, 7, 4},
|
{0, 0, 0, 0, 7, 4},
|
||||||
{0, 0, 0, 0, 8, 4},
|
{0, 0, 0, 0, 8, 4},
|
||||||
{0, 0, 0, 0, 9, 4},
|
{0, 0, 0, 0, 9, 4},
|
||||||
{42877, 0, 42877, 0, 0, 265},
|
{42877, 7545, 42877, 0, 0, 265},
|
||||||
{3814, 0, 3814, 0, 0, 9},
|
{3814, 0, 3814, 0, 0, 9},
|
||||||
{65477, 0, 65477, 0, 0, 9},
|
{65477, 0, 65477, 0, 0, 9},
|
||||||
{0, 57921, 0, 0, 0, 129},
|
{0, 57921, 0, 0, 0, 129},
|
||||||
|
@ -159,7 +159,7 @@ const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {
|
||||||
{0, 54787, 0, 0, 0, 129},
|
{0, 54787, 0, 0, 0, 129},
|
||||||
{0, 54753, 0, 0, 0, 129},
|
{0, 54753, 0, 0, 0, 129},
|
||||||
{58272, 0, 58272, 0, 0, 9},
|
{58272, 0, 58272, 0, 0, 9},
|
||||||
{0, 7545, 0, 0, 0, 385},
|
{42877, 7545, 42877, 0, 0, 385},
|
||||||
{0, 40, 0, 0, 0, 129},
|
{0, 40, 0, 0, 0, 129},
|
||||||
{65496, 0, 65496, 0, 0, 9},
|
{65496, 0, 65496, 0, 0, 9},
|
||||||
};
|
};
|
||||||
|
|
|
@ -371,33 +371,32 @@ def makeunicodetype(unicode, trace):
|
||||||
flags |= UPPER_MASK
|
flags |= UPPER_MASK
|
||||||
# use delta predictor for upper/lower/title if it fits
|
# use delta predictor for upper/lower/title if it fits
|
||||||
if record[12]:
|
if record[12]:
|
||||||
upper = int(record[12], 16) - char
|
upper = int(record[12], 16)
|
||||||
if -32768 <= upper <= 32767 and delta:
|
|
||||||
upper = upper & 0xffff
|
|
||||||
else:
|
|
||||||
upper += char
|
|
||||||
delta = False
|
|
||||||
else:
|
else:
|
||||||
upper = 0
|
upper = char
|
||||||
if record[13]:
|
if record[13]:
|
||||||
lower = int(record[13], 16) - char
|
lower = int(record[13], 16)
|
||||||
if -32768 <= lower <= 32767 and delta:
|
|
||||||
lower = lower & 0xffff
|
|
||||||
else:
|
|
||||||
lower += char
|
|
||||||
delta = False
|
|
||||||
else:
|
else:
|
||||||
lower = 0
|
lower = char
|
||||||
if record[14]:
|
if record[14]:
|
||||||
title = int(record[14], 16) - char
|
title = int(record[14], 16)
|
||||||
if -32768 <= lower <= 32767 and delta:
|
else:
|
||||||
title = title & 0xffff
|
# UCD.html says that a missing title char means that
|
||||||
else:
|
# it defaults to the uppercase character, not to the
|
||||||
title += char
|
# character itself. Apparently, in the current UCD (5.x)
|
||||||
delta = False
|
# this feature is never used
|
||||||
|
title = upper
|
||||||
|
upper_d = upper - char
|
||||||
|
lower_d = lower - char
|
||||||
|
title_d = title - char
|
||||||
|
if -32768 <= upper_d <= 32767 and \
|
||||||
|
-32768 <= lower_d <= 32767 and \
|
||||||
|
-32768 <= title_d <= 32767:
|
||||||
|
# use deltas
|
||||||
|
upper = upper_d & 0xffff
|
||||||
|
lower = lower_d & 0xffff
|
||||||
|
title = title_d & 0xffff
|
||||||
else:
|
else:
|
||||||
title = 0
|
|
||||||
if not delta:
|
|
||||||
flags |= NODELTA_MASK
|
flags |= NODELTA_MASK
|
||||||
# decimal digit, integer digit
|
# decimal digit, integer digit
|
||||||
decimal = 0
|
decimal = 0
|
||||||
|
|
Loading…
Reference in New Issue