mirror of https://github.com/python/cpython
#13054: fix usage of sys.maxunicode after PEP-393.
This commit is contained in:
parent
b7591d4780
commit
a9860aeb08
|
@ -318,11 +318,13 @@ def _optimize_unicode(charset, fixup):
|
||||||
# XXX: could expand category
|
# XXX: could expand category
|
||||||
return charset # cannot compress
|
return charset # cannot compress
|
||||||
except IndexError:
|
except IndexError:
|
||||||
# non-BMP characters
|
# non-BMP characters; XXX now they should work
|
||||||
return charset
|
return charset
|
||||||
if negate:
|
if negate:
|
||||||
if sys.maxunicode != 65535:
|
if sys.maxunicode != 65535:
|
||||||
# XXX: negation does not work with big charsets
|
# XXX: negation does not work with big charsets
|
||||||
|
# XXX2: now they should work, but removing this will make the
|
||||||
|
# charmap 17 times bigger
|
||||||
return charset
|
return charset
|
||||||
for i in range(65536):
|
for i in range(65536):
|
||||||
charmap[i] = not charmap[i]
|
charmap[i] = not charmap[i]
|
||||||
|
|
|
@ -249,8 +249,7 @@ class BuiltinTest(unittest.TestCase):
|
||||||
self.assertEqual(chr(0xff), '\xff')
|
self.assertEqual(chr(0xff), '\xff')
|
||||||
self.assertRaises(ValueError, chr, 1<<24)
|
self.assertRaises(ValueError, chr, 1<<24)
|
||||||
self.assertEqual(chr(sys.maxunicode),
|
self.assertEqual(chr(sys.maxunicode),
|
||||||
str(('\\U%08x' % (sys.maxunicode)).encode("ascii"),
|
str('\\U0010ffff'.encode("ascii"), 'unicode-escape'))
|
||||||
'unicode-escape'))
|
|
||||||
self.assertRaises(TypeError, chr)
|
self.assertRaises(TypeError, chr)
|
||||||
self.assertEqual(chr(0x0000FFFF), "\U0000FFFF")
|
self.assertEqual(chr(0x0000FFFF), "\U0000FFFF")
|
||||||
self.assertEqual(chr(0x00010000), "\U00010000")
|
self.assertEqual(chr(0x00010000), "\U00010000")
|
||||||
|
|
|
@ -138,22 +138,14 @@ class CodecCallbackTest(unittest.TestCase):
|
||||||
def test_backslashescape(self):
|
def test_backslashescape(self):
|
||||||
# Does the same as the "unicode-escape" encoding, but with different
|
# Does the same as the "unicode-escape" encoding, but with different
|
||||||
# base encodings.
|
# base encodings.
|
||||||
sin = "a\xac\u1234\u20ac\u8000"
|
sin = "a\xac\u1234\u20ac\u8000\U0010ffff"
|
||||||
if sys.maxunicode > 0xffff:
|
sout = b"a\\xac\\u1234\\u20ac\\u8000\\U0010ffff"
|
||||||
sin += chr(sys.maxunicode)
|
|
||||||
sout = b"a\\xac\\u1234\\u20ac\\u8000"
|
|
||||||
if sys.maxunicode > 0xffff:
|
|
||||||
sout += bytes("\\U%08x" % sys.maxunicode, "ascii")
|
|
||||||
self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
|
self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
|
||||||
|
|
||||||
sout = b"a\xac\\u1234\\u20ac\\u8000"
|
sout = b"a\xac\\u1234\\u20ac\\u8000\\U0010ffff"
|
||||||
if sys.maxunicode > 0xffff:
|
|
||||||
sout += bytes("\\U%08x" % sys.maxunicode, "ascii")
|
|
||||||
self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
|
self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
|
||||||
|
|
||||||
sout = b"a\xac\\u1234\xa4\\u8000"
|
sout = b"a\xac\\u1234\xa4\\u8000\\U0010ffff"
|
||||||
if sys.maxunicode > 0xffff:
|
|
||||||
sout += bytes("\\U%08x" % sys.maxunicode, "ascii")
|
|
||||||
self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
|
self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
|
||||||
|
|
||||||
def test_decoding_callbacks(self):
|
def test_decoding_callbacks(self):
|
||||||
|
|
|
@ -247,14 +247,9 @@ class Test_ISO2022(unittest.TestCase):
|
||||||
self.assertFalse(any(x > 0x80 for x in e))
|
self.assertFalse(any(x > 0x80 for x in e))
|
||||||
|
|
||||||
def test_bug1572832(self):
|
def test_bug1572832(self):
|
||||||
if sys.maxunicode >= 0x10000:
|
|
||||||
myunichr = chr
|
|
||||||
else:
|
|
||||||
myunichr = lambda x: chr(0xD7C0+(x>>10)) + chr(0xDC00+(x&0x3FF))
|
|
||||||
|
|
||||||
for x in range(0x10000, 0x110000):
|
for x in range(0x10000, 0x110000):
|
||||||
# Any ISO 2022 codec will cause the segfault
|
# Any ISO 2022 codec will cause the segfault
|
||||||
myunichr(x).encode('iso_2022_jp', 'ignore')
|
chr(x).encode('iso_2022_jp', 'ignore')
|
||||||
|
|
||||||
class TestStateful(unittest.TestCase):
|
class TestStateful(unittest.TestCase):
|
||||||
text = '\u4E16\u4E16'
|
text = '\u4E16\u4E16'
|
||||||
|
|
|
@ -13,10 +13,6 @@ import warnings
|
||||||
from test import support, string_tests
|
from test import support, string_tests
|
||||||
import _string
|
import _string
|
||||||
|
|
||||||
# decorator to skip tests on narrow builds
|
|
||||||
requires_wide_build = unittest.skipIf(sys.maxunicode == 65535,
|
|
||||||
'requires wide build')
|
|
||||||
|
|
||||||
# Error handling (bad decoder return)
|
# Error handling (bad decoder return)
|
||||||
def search_function(encoding):
|
def search_function(encoding):
|
||||||
def decode1(input, errors="strict"):
|
def decode1(input, errors="strict"):
|
||||||
|
@ -519,7 +515,6 @@ class UnicodeTest(string_tests.CommonTest,
|
||||||
self.assertFalse(meth(s), '%a.%s() is False' % (s, meth_name))
|
self.assertFalse(meth(s), '%a.%s() is False' % (s, meth_name))
|
||||||
|
|
||||||
|
|
||||||
@requires_wide_build
|
|
||||||
def test_lower(self):
|
def test_lower(self):
|
||||||
string_tests.CommonTest.test_lower(self)
|
string_tests.CommonTest.test_lower(self)
|
||||||
self.assertEqual('\U00010427'.lower(), '\U0001044F')
|
self.assertEqual('\U00010427'.lower(), '\U0001044F')
|
||||||
|
@ -530,7 +525,6 @@ class UnicodeTest(string_tests.CommonTest,
|
||||||
self.assertEqual('X\U00010427x\U0001044F'.lower(),
|
self.assertEqual('X\U00010427x\U0001044F'.lower(),
|
||||||
'x\U0001044Fx\U0001044F')
|
'x\U0001044Fx\U0001044F')
|
||||||
|
|
||||||
@requires_wide_build
|
|
||||||
def test_upper(self):
|
def test_upper(self):
|
||||||
string_tests.CommonTest.test_upper(self)
|
string_tests.CommonTest.test_upper(self)
|
||||||
self.assertEqual('\U0001044F'.upper(), '\U00010427')
|
self.assertEqual('\U0001044F'.upper(), '\U00010427')
|
||||||
|
@ -541,7 +535,6 @@ class UnicodeTest(string_tests.CommonTest,
|
||||||
self.assertEqual('X\U00010427x\U0001044F'.upper(),
|
self.assertEqual('X\U00010427x\U0001044F'.upper(),
|
||||||
'X\U00010427X\U00010427')
|
'X\U00010427X\U00010427')
|
||||||
|
|
||||||
@requires_wide_build
|
|
||||||
def test_capitalize(self):
|
def test_capitalize(self):
|
||||||
string_tests.CommonTest.test_capitalize(self)
|
string_tests.CommonTest.test_capitalize(self)
|
||||||
self.assertEqual('\U0001044F'.capitalize(), '\U00010427')
|
self.assertEqual('\U0001044F'.capitalize(), '\U00010427')
|
||||||
|
@ -554,7 +547,6 @@ class UnicodeTest(string_tests.CommonTest,
|
||||||
self.assertEqual('X\U00010427x\U0001044F'.capitalize(),
|
self.assertEqual('X\U00010427x\U0001044F'.capitalize(),
|
||||||
'X\U0001044Fx\U0001044F')
|
'X\U0001044Fx\U0001044F')
|
||||||
|
|
||||||
@requires_wide_build
|
|
||||||
def test_title(self):
|
def test_title(self):
|
||||||
string_tests.MixinStrUnicodeUserStringTest.test_title(self)
|
string_tests.MixinStrUnicodeUserStringTest.test_title(self)
|
||||||
self.assertEqual('\U0001044F'.title(), '\U00010427')
|
self.assertEqual('\U0001044F'.title(), '\U00010427')
|
||||||
|
@ -569,7 +561,6 @@ class UnicodeTest(string_tests.CommonTest,
|
||||||
self.assertEqual('X\U00010427x\U0001044F X\U00010427x\U0001044F'.title(),
|
self.assertEqual('X\U00010427x\U0001044F X\U00010427x\U0001044F'.title(),
|
||||||
'X\U0001044Fx\U0001044F X\U0001044Fx\U0001044F')
|
'X\U0001044Fx\U0001044F X\U0001044Fx\U0001044F')
|
||||||
|
|
||||||
@requires_wide_build
|
|
||||||
def test_swapcase(self):
|
def test_swapcase(self):
|
||||||
string_tests.CommonTest.test_swapcase(self)
|
string_tests.CommonTest.test_swapcase(self)
|
||||||
self.assertEqual('\U0001044F'.swapcase(), '\U00010427')
|
self.assertEqual('\U0001044F'.swapcase(), '\U00010427')
|
||||||
|
@ -1114,15 +1105,12 @@ class UnicodeTest(string_tests.CommonTest,
|
||||||
def test_codecs_utf8(self):
|
def test_codecs_utf8(self):
|
||||||
self.assertEqual(''.encode('utf-8'), b'')
|
self.assertEqual(''.encode('utf-8'), b'')
|
||||||
self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
|
self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
|
||||||
if sys.maxunicode == 65535:
|
self.assertEqual('\U00010002'.encode('utf-8'), b'\xf0\x90\x80\x82')
|
||||||
self.assertEqual('\ud800\udc02'.encode('utf-8'), b'\xf0\x90\x80\x82')
|
self.assertEqual('\U00023456'.encode('utf-8'), b'\xf0\xa3\x91\x96')
|
||||||
self.assertEqual('\ud84d\udc56'.encode('utf-8'), b'\xf0\xa3\x91\x96')
|
|
||||||
self.assertEqual('\ud800'.encode('utf-8', 'surrogatepass'), b'\xed\xa0\x80')
|
self.assertEqual('\ud800'.encode('utf-8', 'surrogatepass'), b'\xed\xa0\x80')
|
||||||
self.assertEqual('\udc00'.encode('utf-8', 'surrogatepass'), b'\xed\xb0\x80')
|
self.assertEqual('\udc00'.encode('utf-8', 'surrogatepass'), b'\xed\xb0\x80')
|
||||||
if sys.maxunicode == 65535:
|
self.assertEqual(('\U00010002'*10).encode('utf-8'),
|
||||||
self.assertEqual(
|
b'\xf0\x90\x80\x82'*10)
|
||||||
('\ud800\udc02'*1000).encode('utf-8'),
|
|
||||||
b'\xf0\x90\x80\x82'*1000)
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
|
'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
|
||||||
'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
|
'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
|
||||||
|
|
|
@ -107,6 +107,7 @@ def get_machine_details():
|
||||||
print('Getting machine details...')
|
print('Getting machine details...')
|
||||||
buildno, builddate = platform.python_build()
|
buildno, builddate = platform.python_build()
|
||||||
python = platform.python_version()
|
python = platform.python_version()
|
||||||
|
# XXX this is now always UCS4, maybe replace it with 'PEP393' in 3.3+?
|
||||||
if sys.maxunicode == 65535:
|
if sys.maxunicode == 65535:
|
||||||
# UCS2 build (standard)
|
# UCS2 build (standard)
|
||||||
unitype = 'UCS2'
|
unitype = 'UCS2'
|
||||||
|
|
|
@ -14,7 +14,7 @@ def compare_codecs(encoding1, encoding2):
|
||||||
print('Comparing encoding/decoding of %r and %r' % (encoding1, encoding2))
|
print('Comparing encoding/decoding of %r and %r' % (encoding1, encoding2))
|
||||||
mismatch = 0
|
mismatch = 0
|
||||||
# Check encoding
|
# Check encoding
|
||||||
for i in range(sys.maxunicode):
|
for i in range(sys.maxunicode+1):
|
||||||
u = chr(i)
|
u = chr(i)
|
||||||
try:
|
try:
|
||||||
c1 = u.encode(encoding1)
|
c1 = u.encode(encoding1)
|
||||||
|
|
Loading…
Reference in New Issue