From 84fc66dd020931c14be8b13fcbdb9a8f295141c9 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 3 May 2007 17:18:26 +0000 Subject: [PATCH] Rename 'unicode' to 'str' in its tp_name field. Rename 'str' to 'str8'. Change all occurrences of unichr to chr. --- Lib/HTMLParser.py | 4 ++-- Lib/encodings/punycode.py | 2 +- Lib/sre_parse.py | 2 +- Lib/test/test_builtin.py | 26 ++++++++++++------------ Lib/test/test_codeccallbacks.py | 6 +++--- Lib/test/test_codecmaps_jp.py | 2 +- Lib/test/test_descr.py | 2 +- Lib/test/test_multibytecodec.py | 4 ++-- Lib/test/test_multibytecodec_support.py | 6 +++--- Lib/test/test_normalization.py | 4 ++-- Lib/test/test_ucn.py | 2 +- Lib/test/test_unicode.py | 10 ++++----- Lib/test/test_unicodedata.py | 8 ++++---- Lib/urllib.py | 2 +- Objects/stringobject.c | 2 +- Objects/unicodeobject.c | 2 +- Python/bltinmodule.c | 27 +++---------------------- 17 files changed, 45 insertions(+), 66 deletions(-) diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py index b6a26d326f8..f0e520ca227 100644 --- a/Lib/HTMLParser.py +++ b/Lib/HTMLParser.py @@ -370,7 +370,7 @@ class HTMLParser(markupbase.ParserBase): c = int(s[1:], 16) else: c = int(s) - return unichr(c) + return chr(c) else: # Cannot use name2codepoint directly, because HTMLParser supports apos, # which is not part of HTML 4 @@ -378,7 +378,7 @@ class HTMLParser(markupbase.ParserBase): if HTMLParser.entitydefs is None: entitydefs = HTMLParser.entitydefs = {'apos':"'"} for k, v in htmlentitydefs.name2codepoint.items(): - entitydefs[k] = unichr(v) + entitydefs[k] = chr(v) try: return self.entitydefs[s] except KeyError: diff --git a/Lib/encodings/punycode.py b/Lib/encodings/punycode.py index c86aad72b17..89906ae2827 100644 --- a/Lib/encodings/punycode.py +++ b/Lib/encodings/punycode.py @@ -176,7 +176,7 @@ def insertion_sort(base, extended, errors): raise UnicodeError, ("Invalid character U+%x" % char) char = ord('?') pos = pos % (len(base) + 1) - base = base[:pos] + unichr(char) + base[pos:] + base = base[:pos] + chr(char) + base[pos:] bias = adapt(delta, (extpos == 0), len(base)) extpos = newpos return base diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 49dc080ed40..d53d37cf752 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -712,7 +712,7 @@ def parse_template(source, pattern): if type(sep) is type(""): makechar = chr else: - makechar = unichr + makechar = chr while 1: this = sget() if this is None: diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index b7fba5c1efb..9233871147f 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -90,7 +90,7 @@ if have_unicode: (str(''), ValueError), (str(' '), ValueError), (str(' \t\t '), ValueError), - (unichr(0x200), ValueError), + (chr(0x200), ValueError), ] class TestFailingBool: @@ -221,7 +221,7 @@ class BuiltinTest(unittest.TestCase): mode='eval', source='0', filename='tmp') if have_unicode: compile(str(b'print(u"\xc3\xa5")\n', 'utf8'), '', 'exec') - self.assertRaises(TypeError, compile, unichr(0), 'f', 'exec') + self.assertRaises(TypeError, compile, chr(0), 'f', 'exec') self.assertRaises(ValueError, compile, str('a = 1'), 'f', 'bad') @@ -557,7 +557,7 @@ class BuiltinTest(unittest.TestCase): class shiftunicode(str): def __getitem__(self, index): - return unichr(ord(str.__getitem__(self, index))+1) + return chr(ord(str.__getitem__(self, index))+1) self.assertEqual( filter(lambda x: x>=str("3"), shiftunicode("1234")), str("345") @@ -676,7 +676,7 @@ class BuiltinTest(unittest.TestCase): self.assertRaises(TypeError, getattr, sys, 1, "foo") self.assertRaises(TypeError, getattr) if have_unicode: - self.assertRaises(UnicodeError, getattr, sys, unichr(sys.maxunicode)) + self.assertRaises(UnicodeError, getattr, sys, chr(sys.maxunicode)) def test_hasattr(self): import sys @@ -684,7 +684,7 @@ class BuiltinTest(unittest.TestCase): self.assertRaises(TypeError, hasattr, sys, 1) self.assertRaises(TypeError, hasattr) if have_unicode: - self.assertRaises(UnicodeError, hasattr, sys, unichr(sys.maxunicode)) + self.assertRaises(UnicodeError, hasattr, sys, chr(sys.maxunicode)) def test_hash(self): hash(None) @@ -789,7 +789,7 @@ class BuiltinTest(unittest.TestCase): self.assert_(isinstance(x, int)) if have_unicode: - x = int(unichr(0x661) * 600) + x = int(chr(0x661) * 600) self.assert_(isinstance(x, int)) self.assertRaises(TypeError, int, 1, 12) @@ -1387,7 +1387,7 @@ class BuiltinTest(unittest.TestCase): self.assertEqual(ord('A'), 65) self.assertEqual(ord('a'), 97) if have_unicode: - self.assertEqual(ord(unichr(sys.maxunicode)), sys.maxunicode) + self.assertEqual(ord(chr(sys.maxunicode)), sys.maxunicode) self.assertRaises(TypeError, ord, 42) if have_unicode: self.assertRaises(TypeError, ord, str("12")) @@ -1668,15 +1668,15 @@ class BuiltinTest(unittest.TestCase): def test_unichr(self): if have_unicode: - self.assertEqual(unichr(32), str(' ')) - self.assertEqual(unichr(65), str('A')) - self.assertEqual(unichr(97), str('a')) + self.assertEqual(chr(32), str(' ')) + self.assertEqual(chr(65), str('A')) + self.assertEqual(chr(97), str('a')) self.assertEqual( - unichr(sys.maxunicode), + chr(sys.maxunicode), str(('\\U%08x' % (sys.maxunicode)).encode("ascii"), 'unicode-escape') ) - self.assertRaises(ValueError, unichr, sys.maxunicode+1) - self.assertRaises(TypeError, unichr) + self.assertRaises(ValueError, chr, sys.maxunicode+1) + self.assertRaises(TypeError, chr) # We don't want self in vars(), so these are static methods diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index b9340738e62..5215b87254e 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -137,7 +137,7 @@ class CodecCallbackTest(unittest.TestCase): # base encodings. sin = "a\xac\u1234\u20ac\u8000" if sys.maxunicode > 0xffff: - sin += unichr(sys.maxunicode) + sin += chr(sys.maxunicode) sout = "a\\xac\\u1234\\u20ac\\u8000" if sys.maxunicode > 0xffff: sout += "\\U%08x" % sys.maxunicode @@ -509,7 +509,7 @@ class CodecCallbackTest(unittest.TestCase): ) # Use the correct exception cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042) - s = "".join(unichr(c) for c in cs) + s = "".join(chr(c) for c in cs) self.assertEquals( codecs.xmlcharrefreplace_errors( UnicodeEncodeError("ascii", s, 0, len(s), "ouch") @@ -650,7 +650,7 @@ class CodecCallbackTest(unittest.TestCase): v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000) if sys.maxunicode>=100000: v += (100000, 500000, 1000000) - s = "".join([unichr(x) for x in v]) + s = "".join([chr(x) for x in v]) codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors) for enc in ("ascii", "iso-8859-15"): for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"): diff --git a/Lib/test/test_codecmaps_jp.py b/Lib/test/test_codecmaps_jp.py index 58abb30a2ba..31b80eb8e95 100644 --- a/Lib/test/test_codecmaps_jp.py +++ b/Lib/test/test_codecmaps_jp.py @@ -21,7 +21,7 @@ class TestCP932Map(test_multibytecodec_support.TestBase_Mapping, ('\xff', '\uf8f3'), ] for i in range(0xa1, 0xe0): - supmaps.append((chr(i), unichr(i+0xfec0))) + supmaps.append((chr(i), chr(i+0xfec0))) class TestEUCJPCOMPATMap(test_multibytecodec_support.TestBase_Mapping, diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index 2132b8d4b03..4ae8f6098c8 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -1122,7 +1122,7 @@ def slots(): # this used to leak references try: class C(object): - __slots__ = [unichr(128)] + __slots__ = [chr(128)] except (TypeError, UnicodeEncodeError): pass else: diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py index 8ea4bf9b46a..0b2021a5256 100644 --- a/Lib/test/test_multibytecodec.py +++ b/Lib/test/test_multibytecodec.py @@ -210,9 +210,9 @@ class Test_ISO2022(unittest.TestCase): def test_bug1572832(self): if sys.maxunicode >= 0x10000: - myunichr = unichr + myunichr = chr else: - myunichr = lambda x: unichr(0xD7C0+(x>>10)) + unichr(0xDC00+(x&0x3FF)) + myunichr = lambda x: chr(0xD7C0+(x>>10)) + chr(0xDC00+(x&0x3FF)) for x in xrange(0x10000, 0x110000): # Any ISO 2022 codec will cause the segfault diff --git a/Lib/test/test_multibytecodec_support.py b/Lib/test/test_multibytecodec_support.py index 6abcdd63a12..acfb2f1aa81 100644 --- a/Lib/test/test_multibytecodec_support.py +++ b/Lib/test/test_multibytecodec_support.py @@ -244,8 +244,8 @@ class TestBase: self.assertEqual(ostream.getvalue(), self.tstring[0]) if len('\U00012345') == 2: # ucs2 build - _unichr = unichr - def unichr(v): + _unichr = chr + def chr(v): if v >= 0x10000: return _unichr(0xd800 + ((v - 0x10000) >> 10)) + \ _unichr(0xdc00 + ((v - 0x10000) & 0x3ff)) @@ -272,7 +272,7 @@ class TestBase_Mapping(unittest.TestCase): return test_support.open_urlresource(self.mapfileurl) def test_mapping_file(self): - unichrs = lambda s: ''.join(map(unichr, map(eval, s.split('+')))) + unichrs = lambda s: ''.join(map(chr, map(eval, s.split('+')))) urt_wa = {} for line in self.open_mapping_file(): diff --git a/Lib/test/test_normalization.py b/Lib/test/test_normalization.py index a48af4de422..1331e5165b5 100644 --- a/Lib/test/test_normalization.py +++ b/Lib/test/test_normalization.py @@ -28,7 +28,7 @@ def unistr(data): for x in data: if x > sys.maxunicode: raise RangeError - return "".join([unichr(x) for x in data]) + return "".join([chr(x) for x in data]) class NormalizationTest(unittest.TestCase): def test_main(self): @@ -77,7 +77,7 @@ class NormalizationTest(unittest.TestCase): # Perform tests for all other data for c in range(sys.maxunicode+1): - X = unichr(c) + X = chr(c) if X in part1_data: continue self.failUnless(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c) diff --git a/Lib/test/test_ucn.py b/Lib/test/test_ucn.py index 4472e9062d3..485e124947c 100644 --- a/Lib/test/test_ucn.py +++ b/Lib/test/test_ucn.py @@ -96,7 +96,7 @@ class UnicodeNamesTest(unittest.TestCase): import unicodedata count = 0 for code in xrange(0x10000): - char = unichr(code) + char = chr(code) name = unicodedata.name(char, None) if name is not None: self.assertEqual(unicodedata.lookup(name), char) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 125fd562994..3dd92aec507 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -90,7 +90,7 @@ class UnicodeTest( "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef" "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd" "\\xfe\\xff'") - testrepr = repr(''.join(map(unichr, xrange(256)))) + testrepr = repr(''.join(map(chr, xrange(256)))) self.assertEqual(testrepr, latin1repr) # Test repr works on wide unicode escapes without overflow. self.assertEqual(repr("\U00010000" * 39 + "\uffff" * 4096), @@ -632,7 +632,7 @@ class UnicodeTest( # Roundtrip safety for BMP (just the first 1024 chars) for c in xrange(1024): - u = unichr(c) + u = chr(c) for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le', 'utf-16-be', 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'): @@ -640,13 +640,13 @@ class UnicodeTest( # Roundtrip safety for BMP (just the first 256 chars) for c in xrange(256): - u = unichr(c) + u = chr(c) for encoding in ('latin-1',): self.assertEqual(str(u.encode(encoding),encoding), u) # Roundtrip safety for BMP (just the first 128 chars) for c in xrange(128): - u = unichr(c) + u = chr(c) for encoding in ('ascii',): self.assertEqual(str(u.encode(encoding),encoding), u) @@ -661,7 +661,7 @@ class UnicodeTest( # This excludes surrogates: in the full range, there would be # a surrogate pair (\udbff\udc00), which gets converted back # to a non-BMP character (\U0010fc00) - u = ''.join(map(unichr, range(0,0xd800)+range(0xe000,0x10000))) + u = ''.join(map(chr, range(0,0xd800)+range(0xe000,0x10000))) for encoding in ('utf-8',): self.assertEqual(str(u.encode(encoding),encoding), u) diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index 227aa5a8b6d..dc4be19157f 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -21,7 +21,7 @@ class UnicodeMethodsTest(unittest.TestCase): def test_method_checksum(self): h = hashlib.sha1() for i in range(65536): - char = unichr(i) + char = chr(i) data = [ # Predicates (single char) "01"[char.isalnum()], @@ -82,7 +82,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): h = hashlib.sha1() for i in range(0x10000): - char = unichr(i) + char = chr(i) data = [ # Properties str(self.db.digit(char, -1)), @@ -194,7 +194,7 @@ class UnicodeMiscTest(UnicodeDatabaseTest): # its numeric value should be the same. count = 0 for i in xrange(0x10000): - c = unichr(i) + c = chr(i) dec = self.db.decimal(c, -1) if dec != -1: self.assertEqual(dec, self.db.numeric(c)) @@ -207,7 +207,7 @@ class UnicodeMiscTest(UnicodeDatabaseTest): # its numeric value should be the same. count = 0 for i in xrange(0x10000): - c = unichr(i) + c = chr(i) dec = self.db.digit(c, -1) if dec != -1: self.assertEqual(dec, self.db.numeric(c)) diff --git a/Lib/urllib.py b/Lib/urllib.py index 6d9460579f7..fe1c74feccb 100644 --- a/Lib/urllib.py +++ b/Lib/urllib.py @@ -1158,7 +1158,7 @@ def unquote(s): except KeyError: res[i] = '%' + item except UnicodeDecodeError: - res[i] = unichr(int(item[:2], 16)) + item[2:] + res[i] = chr(int(item[:2], 16)) + item[2:] return "".join(res) def unquote_plus(s): diff --git a/Objects/stringobject.c b/Objects/stringobject.c index ee29c70d5ad..68bf70370a0 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -4009,7 +4009,7 @@ static PyObject *str_iter(PyObject *seq); PyTypeObject PyString_Type = { PyObject_HEAD_INIT(&PyType_Type) 0, - "str", + "str8", sizeof(PyStringObject), sizeof(char), string_dealloc, /* tp_dealloc */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e2f1b2fd206..d4a7e7e48e2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -7954,7 +7954,7 @@ static PyObject *unicode_iter(PyObject *seq); PyTypeObject PyUnicode_Type = { PyObject_HEAD_INIT(&PyType_Type) 0, /* ob_size */ - "unicode", /* tp_name */ + "str", /* tp_name */ sizeof(PyUnicodeObject), /* tp_size */ 0, /* tp_itemsize */ /* Slots */ diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index fef001d051c..7988c89df6a 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -380,28 +380,6 @@ PyDoc_STRVAR(filter_doc, "function is None, return the items that are true. If sequence is a tuple\n" "or string, return the same type, else return a list."); -static PyObject * -builtin_chr(PyObject *self, PyObject *args) -{ - long x; - char s[1]; - - if (!PyArg_ParseTuple(args, "l:chr", &x)) - return NULL; - if (x < 0 || x >= 256) { - PyErr_SetString(PyExc_ValueError, - "chr() arg not in range(256)"); - return NULL; - } - s[0] = (char)x; - return PyString_FromStringAndSize(s, 1); -} - -PyDoc_STRVAR(chr_doc, -"chr(i) -> character\n\ -\n\ -Return a string of one character with ordinal i; 0 <= i < 256."); - #ifdef Py_USING_UNICODE static PyObject * @@ -416,7 +394,7 @@ builtin_unichr(PyObject *self, PyObject *args) } PyDoc_STRVAR(unichr_doc, -"unichr(i) -> Unicode character\n\ +"chr(i) -> Unicode character\n\ \n\ Return a Unicode string of one character with ordinal i; 0 <= i <= 0x10ffff."); #endif @@ -2270,7 +2248,7 @@ static PyMethodDef builtin_methods[] = { {"all", builtin_all, METH_O, all_doc}, {"any", builtin_any, METH_O, any_doc}, {"callable", builtin_callable, METH_O, callable_doc}, - {"chr", builtin_unichr, METH_VARARGS, chr_doc}, + {"chr", builtin_unichr, METH_VARARGS, unichr_doc}, {"cmp", builtin_cmp, METH_VARARGS, cmp_doc}, {"compile", (PyCFunction)builtin_compile, METH_VARARGS | METH_KEYWORDS, compile_doc}, {"delattr", builtin_delattr, METH_VARARGS, delattr_doc}, @@ -2376,6 +2354,7 @@ _PyBuiltin_Init(void) SETBUILTIN("slice", &PySlice_Type); SETBUILTIN("staticmethod", &PyStaticMethod_Type); SETBUILTIN("str", &PyUnicode_Type); + SETBUILTIN("str8", &PyString_Type); SETBUILTIN("super", &PySuper_Type); SETBUILTIN("tuple", &PyTuple_Type); SETBUILTIN("type", &PyType_Type);