Changes in anticipation of stricter str vs. bytes enforcement.

2007-08-27 17:23:59 +00:00 · 2007-08-27 17:23:59 +00:00 · 39478e8528
parent 85825dc1ff
commit 39478e8528
15 changed files with 78 additions and 76 deletions
--- a/Lib/ctypes/test/test_internals.py
+++ b/Lib/ctypes/test/test_internals.py
@ -76,11 +76,13 @@ class ObjectsTestCase(unittest.TestCase):
        x = X()
        x.a = s1
        x.b = s2
-        self.failUnlessEqual(x._objects, {"0": bytes(s1), "1": bytes(s2)})
+        self.failUnlessEqual(x._objects, {"0": bytes(s1, "ascii"),
+                                          "1": bytes(s2, "ascii")})

        y = Y()
        y.x = x
-        self.failUnlessEqual(y._objects, {"0": {"0": bytes(s1), "1": bytes(s2)}})
+        self.failUnlessEqual(y._objects, {"0": {"0": bytes(s1, "ascii"),
+                                                "1": bytes(s2, "ascii")}})
 ##        x = y.x
 ##        del y
 ##        print x._b_base_._objects
--- a/Lib/hmac.py
+++ b/Lib/hmac.py
@ -37,10 +37,7 @@ class HMAC:
        if key is _secret_backdoor_key: # cheap
            return

-        if not isinstance(key, bytes):
-            if hasattr(key, "__index__"):
-                raise TypeError("key can't be a number")
-            key = bytes(key)
+        assert isinstance(key, bytes), repr(key)

        if digestmod is None:
            import hashlib
@ -71,10 +68,7 @@ class HMAC:
    def update(self, msg):
        """Update this hashing object with the string msg.
        """
-        if not isinstance(msg, bytes):
-            if hasattr(msg, "__index__"):
-                raise TypeError("msg can't be a number")
-            msg = bytes(msg)
+        assert isinstance(msg, bytes), repr(msg)
        self.inner.update(msg)

    def copy(self):
--- a/Lib/mimetools.py
+++ b/Lib/mimetools.py
@ -158,7 +158,7 @@ def decode(input, output, encoding):
        import uu
        return uu.decode(input, output)
    if encoding in ('7bit', '8bit'):
-        return output.write(input.read().decode("Latin-1"))
+        return output.write(input.read())
    if encoding in decodetab:
        pipethrough(input, decodetab[encoding], output)
    else:
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@ -248,7 +248,7 @@ class Pickler:
            else:
                return LONG_BINPUT + pack("<i", i)

-        return PUT + bytes(repr(i)) + b'\n'
+        return PUT + repr(i).encode("ascii") + b'\n'

    # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
    def get(self, i, pack=struct.pack):
@ -258,7 +258,7 @@ class Pickler:
            else:
                return LONG_BINGET + pack("<i", i)

-        return GET + bytes(repr(i)) + b'\n'
+        return GET + repr(i).encode("ascii") + b'\n'

    def save(self, obj):
        # Check for persistent id (defined by a subclass)
@ -334,7 +334,7 @@ class Pickler:
            self.save(pid)
            self.write(BINPERSID)
        else:
-            self.write(PERSID + bytes(str(pid)) + b'\n')
+            self.write(PERSID + str(pid).encode("ascii") + b'\n')

    def save_reduce(self, func, args, state=None,
                    listitems=None, dictitems=None, obj=None):
@ -449,7 +449,7 @@ class Pickler:
                self.write(BININT + pack("<i", obj))
                return
        # Text pickle, or int too big to fit in signed 4-byte format.
-        self.write(INT + bytes(repr(obj)) + b'\n')
+        self.write(INT + repr(obj).encode("ascii") + b'\n')
    # XXX save_int is merged into save_long
    # dispatch[int] = save_int

@ -481,14 +481,14 @@ class Pickler:
            else:
                self.write(LONG4 + pack("<i", n) + encoded)
            return
-        self.write(LONG + bytes(repr(obj)) + b'\n')
+        self.write(LONG + repr(obj).encode("ascii") + b'\n')
    dispatch[int] = save_long

    def save_float(self, obj, pack=struct.pack):
        if self.bin:
            self.write(BINFLOAT + pack('>d', obj))
        else:
-            self.write(FLOAT + bytes(repr(obj)) + b'\n')
+            self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
    dispatch[float] = save_float

    def save_string(self, obj, pack=struct.pack):
@ -500,7 +500,7 @@ class Pickler:
                self.write(BINSTRING + pack("<i", n) + bytes(obj))
        else:
            # Strip leading 's' due to repr() of str8() returning s'...'
-            self.write(STRING + bytes(repr(obj).lstrip("s")) + b'\n')
+            self.write(STRING + repr(obj).lstrip("s").encode("ascii") + b'\n')
        self.memoize(obj)
    dispatch[str8] = save_string

@ -710,7 +710,8 @@ class Pickler:
                    write(EXT4 + pack("<i", code))
                return

-        write(GLOBAL + bytes(module) + b'\n' + bytes(name) + b'\n')
+        write(GLOBAL + bytes(module, "utf-8") + b'\n' +
+              bytes(name, "utf-8") + b'\n')
        self.memoize(obj)

    dispatch[FunctionType] = save_global
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@ -461,18 +461,19 @@ class AbstractPickleTests(unittest.TestCase):
        self.assertRaises(self.error, self.loads, b'garyp')

    def test_insecure_strings(self):
-        insecure = ["abc", "2 + 2", # not quoted
-                    #"'abc' + 'def'", # not a single quoted string
-                    "'abc", # quote is not closed
-                    "'abc\"", # open quote and close quote don't match
-                    "'abc'   ?", # junk after close quote
-                    "'\\'", # trailing backslash
+        # XXX Some of these tests are temporarily disabled
+        insecure = [b"abc", b"2 + 2", # not quoted
+                    ## b"'abc' + 'def'", # not a single quoted string
+                    b"'abc", # quote is not closed
+                    b"'abc\"", # open quote and close quote don't match
+                    b"'abc'   ?", # junk after close quote
+                    b"'\\'", # trailing backslash
                    # some tests of the quoting rules
-                    #"'abc\"\''",
-                    #"'\\\\a\'\'\'\\\'\\\\\''",
+                    ## b"'abc\"\''",
+                    ## b"'\\\\a\'\'\'\\\'\\\\\''",
                    ]
-        for s in insecure:
-            buf = b"S" + bytes(s) + b"\012p0\012."
+        for b in insecure:
+            buf = b"S" + b + b"\012p0\012."
            self.assertRaises(ValueError, self.loads, buf)

    def test_unicode(self):
@ -496,12 +497,12 @@ class AbstractPickleTests(unittest.TestCase):

    def test_maxint64(self):
        maxint64 = (1 << 63) - 1
-        data = b'I' + bytes(str(maxint64)) + b'\n.'
+        data = b'I' + str(maxint64).encode("ascii") + b'\n.'
        got = self.loads(data)
        self.assertEqual(got, maxint64)

        # Try too with a bogus literal.
-        data = b'I' + bytes(str(maxint64)) + b'JUNK\n.'
+        data = b'I' + str(maxint64).encode("ascii") + b'JUNK\n.'
        self.assertRaises(ValueError, self.loads, data)

    def test_long(self):
@ -699,7 +700,7 @@ class AbstractPickleTests(unittest.TestCase):

            # Dump using protocol 1 for comparison.
            s1 = self.dumps(x, 1)
-            self.assert_(bytes(__name__) in s1)
+            self.assert_(__name__.encode("utf-8") in s1)
            self.assert_(b"MyList" in s1)
            self.assertEqual(opcode_in_pickle(opcode, s1), False)

@ -709,7 +710,7 @@ class AbstractPickleTests(unittest.TestCase):

            # Dump using protocol 2 for test.
            s2 = self.dumps(x, 2)
-            self.assert_(bytes(__name__) not in s2)
+            self.assert_(__name__.encode("utf-8") not in s2)
            self.assert_(b"MyList" not in s2)
            self.assertEqual(opcode_in_pickle(opcode, s2), True, repr(s2))

--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@ -498,10 +498,12 @@ class BaseTest(unittest.TestCase):
        EQ("bobobXbobob", "bobobobXbobobob", "replace", "bobob", "bob")
        EQ("BOBOBOB", "BOBOBOB", "replace", "bob", "bobby")

-        ba = buffer('a')
-        bb = buffer('b')
-        EQ("bbc", "abc", "replace", ba, bb)
-        EQ("aac", "abc", "replace", bb, ba)
+        # XXX Commented out. Is there any reason to support buffer objects
+        # as arguments for str.replace()?  GvR
+##         ba = buffer('a')
+##         bb = buffer('b')
+##         EQ("bbc", "abc", "replace", ba, bb)
+##         EQ("aac", "abc", "replace", bb, ba)

        #
        self.checkequal('one@two!three!', 'one!two!three!', 'replace', '!', '@', 1)
--- a/Lib/test/test_complex.py
+++ b/Lib/test/test_complex.py
@ -322,7 +322,7 @@ class ComplexTest(unittest.TestCase):
            print(a, b, file=fo)
            fo.close()
            fo = open(test_support.TESTFN, "rb")
-            self.assertEqual(fo.read(), bytes("%s %s\n" % (a, b)))
+            self.assertEqual(fo.read(), ("%s %s\n" % (a, b)).encode("ascii"))
        finally:
            if (fo is not None) and (not fo.closed):
                fo.close()
--- a/Lib/test/test_datetime.py
+++ b/Lib/test/test_datetime.py
@ -1085,7 +1085,7 @@ class TestDate(HarmlessMixedComparison, unittest.TestCase):
            # This shouldn't blow up because of the month byte alone.  If
            # the implementation changes to do more-careful checking, it may
            # blow up because other fields are insane.
-            self.theclass(bytes(base[:2] + chr(ord_byte) + base[3:]))
+            self.theclass(bytes(base[:2] + chr(ord_byte) + base[3:], "ascii"))

 #############################################################################
 # datetime tests
--- a/Lib/test/test_gettext.py
+++ b/Lib/test/test_gettext.py
@ -13,7 +13,7 @@ from test import test_support
 #    has no sense, it would have if we were testing a parser (i.e. pygettext)
 #  - Tests should have only one assert.

-GNU_MO_DATA = '''\
+GNU_MO_DATA = b'''\
 3hIElQAAAAAGAAAAHAAAAEwAAAALAAAAfAAAAAAAAACoAAAAFQAAAKkAAAAjAAAAvwAAAKEAAADj
 AAAABwAAAIUBAAALAAAAjQEAAEUBAACZAQAAFgAAAN8CAAAeAAAA9gIAAKEAAAAVAwAABQAAALcD
 AAAJAAAAvQMAAAEAAAADAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAABQAAAAYAAAACAAAAAFJh
@ -33,7 +33,7 @@ IHNiZSBsYmhlIENsZ3ViYSBjZWJ0ZW56ZiBvbCBjZWJpdnF2YXQgbmEgdmFncmVzbnByIGdiIGd1
 ciBUQUgKdHJnZ3JrZyB6cmZmbnRyIHBuZ255YnQgeXZvZW5lbC4AYmFjb24Ad2luayB3aW5rAA==
 '''

-UMO_DATA = '''\
+UMO_DATA = b'''\
 3hIElQAAAAACAAAAHAAAACwAAAAFAAAAPAAAAAAAAABQAAAABAAAAFEAAAAPAQAAVgAAAAQAAABm
 AQAAAQAAAAIAAAAAAAAAAAAAAAAAAAAAYWLDngBQcm9qZWN0LUlkLVZlcnNpb246IDIuMApQTy1S
 ZXZpc2lvbi1EYXRlOiAyMDAzLTA0LTExIDEyOjQyLTA0MDAKTGFzdC1UcmFuc2xhdG9yOiBCYXJy
@ -43,7 +43,7 @@ bjsgY2hhcnNldD11dGYtOApDb250ZW50LVRyYW5zZmVyLUVuY29kaW5nOiA3Yml0CkdlbmVyYXRl
 ZC1CeTogbWFudWFsbHkKAMKkeXoA
 '''

-MMO_DATA = '''\
+MMO_DATA = b'''\
 3hIElQAAAAABAAAAHAAAACQAAAADAAAALAAAAAAAAAA4AAAAeAEAADkAAAABAAAAAAAAAAAAAAAA
 UHJvamVjdC1JZC1WZXJzaW9uOiBObyBQcm9qZWN0IDAuMApQT1QtQ3JlYXRpb24tRGF0ZTogV2Vk
 IERlYyAxMSAwNzo0NDoxNSAyMDAyClBPLVJldmlzaW9uLURhdGU6IDIwMDItMDgtMTQgMDE6MTg6
--- a/Lib/test/test_hmac.py
+++ b/Lib/test/test_hmac.py
@ -24,21 +24,21 @@ class TestVectorsTestCase(unittest.TestCase):
                b"\xdd" * 50,
                "56be34521d144c88dbb8c733f0e8b3f6")

-        md5test("".join([chr(i) for i in range(1, 26)]),
+        md5test(bytes(range(1, 26)),
                b"\xcd" * 50,
                "697eaf0aca3a3aea3a75164746ffaa79")

-        md5test(chr(0x0C) * 16,
-                "Test With Truncation",
+        md5test(b"\x0C" * 16,
+                b"Test With Truncation",
                "56461ef2342edc00f9bab995690efd4c")

        md5test(b"\xaa" * 80,
-                "Test Using Larger Than Block-Size Key - Hash Key First",
+                b"Test Using Larger Than Block-Size Key - Hash Key First",
                "6b1ab7fe4bd7bf8f0b62e6ce61b9d0cd")

        md5test(b"\xaa" * 80,
-                ("Test Using Larger Than Block-Size Key "
-                 "and Larger Than One Block-Size Data"),
+                (b"Test Using Larger Than Block-Size Key "
+                 b"and Larger Than One Block-Size Data"),
                "6f630fad67cda0ee1fb1f562db3aa53e")

    def test_sha_vectors(self):
@ -62,8 +62,8 @@ class TestVectorsTestCase(unittest.TestCase):
                b"\xCD" * 50,
                "4c9007f4026250c6bc8414f9bf50c86c2d7235da")

-        shatest(chr(0x0C) * 20,
-                "Test With Truncation",
+        shatest(b"\x0C" * 20,
+                b"Test With Truncation",
                "4c1a03424b55e07fe7f27be1d58bb9324a9a5a04")

        shatest(b"\xAA" * 80,
@ -82,14 +82,14 @@ class ConstructorTestCase(unittest.TestCase):
        # Standard constructor call.
        failed = 0
        try:
-            h = hmac.HMAC("key")
+            h = hmac.HMAC(b"key")
        except:
            self.fail("Standard constructor call raised exception.")

    def test_withtext(self):
        # Constructor call with text.
        try:
-            h = hmac.HMAC("key", "hash this!")
+            h = hmac.HMAC(b"key", b"hash this!")
        except:
            self.fail("Constructor call with text argument raised exception.")

@ -97,7 +97,7 @@ class ConstructorTestCase(unittest.TestCase):
        # Constructor call with text and digest module.
        from hashlib import sha1
        try:
-            h = hmac.HMAC("key", "", sha1)
+            h = hmac.HMAC(b"key", b"", sha1)
        except:
            self.fail("Constructor call with hashlib.sha1 raised exception.")

@ -107,15 +107,15 @@ class SanityTestCase(unittest.TestCase):
        # Testing if HMAC defaults to MD5 algorithm.
        # NOTE: this whitebox test depends on the hmac class internals
        import hashlib
-        h = hmac.HMAC("key")
+        h = hmac.HMAC(b"key")
        self.assertEqual(h.digest_cons, hashlib.md5)

    def test_exercise_all_methods(self):
        # Exercising all methods once.
        # This must not raise any exceptions
        try:
-            h = hmac.HMAC("my secret key")
-            h.update("compute the hash of this text!")
+            h = hmac.HMAC(b"my secret key")
+            h.update(b"compute the hash of this text!")
            dig = h.digest()
            dig = h.hexdigest()
            h2 = h.copy()
@ -126,7 +126,7 @@ class CopyTestCase(unittest.TestCase):

    def test_attributes(self):
        # Testing if attributes are of same type.
-        h1 = hmac.HMAC("key")
+        h1 = hmac.HMAC(b"key")
        h2 = h1.copy()
        self.failUnless(h1.digest_cons == h2.digest_cons,
            "digest constructors don't match.")
@ -137,7 +137,7 @@ class CopyTestCase(unittest.TestCase):

    def test_realcopy(self):
        # Testing if the copy method created a real copy.
-        h1 = hmac.HMAC("key")
+        h1 = hmac.HMAC(b"key")
        h2 = h1.copy()
        # Using id() in case somebody has overridden __cmp__.
        self.failUnless(id(h1) != id(h2), "No real copy of the HMAC instance.")
@ -148,8 +148,8 @@ class CopyTestCase(unittest.TestCase):

    def test_equality(self):
        # Testing if the copy has the same digests.
-        h1 = hmac.HMAC("key")
-        h1.update("some random text")
+        h1 = hmac.HMAC(b"key")
+        h1.update(b"some random text")
        h2 = h1.copy()
        self.assertEqual(h1.digest(), h2.digest(),
            "Digest of copy doesn't match original digest.")
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@ -10,7 +10,7 @@ from test import test_support
 class FakeSocket:
    def __init__(self, text, fileclass=io.BytesIO):
        if isinstance(text, str):
-            text = bytes(text)
+            text = text.encode("ascii")
        self.text = text
        self.fileclass = fileclass
        self.data = b''
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@ -305,7 +305,9 @@ class MemorySeekTestMixin:


 class BytesIOTest(MemorySeekTestMixin, unittest.TestCase):
-    buftype = bytes
+    @staticmethod
+    def buftype(s):
+        return s.encode("utf-8")
    ioclass = io.BytesIO
    EOF = b""

--- a/Lib/test/test_tempfile.py
+++ b/Lib/test/test_tempfile.py
@ -660,7 +660,7 @@ class test_SpooledTemporaryFile(TC):
        try:
            f = tempfile.SpooledTemporaryFile(max_size=10, dir=dir)
            self.failIf(f._rolled)
-            f.write('blat ' * 5)
+            f.write(b'blat ' * 5)
            self.failUnless(f._rolled)
            filename = f.name
            f.close()
@ -675,7 +675,7 @@ class test_SpooledTemporaryFile(TC):
        self.failIf(f._rolled)
        for i in range(5):
            f.seek(0, 0)
-            f.write('x' * 20)
+            f.write(b'x' * 20)
        self.failIf(f._rolled)

    def test_write_sequential(self):
@ -683,11 +683,11 @@ class test_SpooledTemporaryFile(TC):
        # over afterward
        f = self.do_create(max_size=30)
        self.failIf(f._rolled)
-        f.write('x' * 20)
+        f.write(b'x' * 20)
        self.failIf(f._rolled)
-        f.write('x' * 10)
+        f.write(b'x' * 10)
        self.failIf(f._rolled)
-        f.write('x')
+        f.write(b'x')
        self.failUnless(f._rolled)

    def test_sparse(self):
@ -697,7 +697,7 @@ class test_SpooledTemporaryFile(TC):
        self.failIf(f._rolled)
        f.seek(100, 0)
        self.failIf(f._rolled)
-        f.write('x')
+        f.write(b'x')
        self.failUnless(f._rolled)

    def test_fileno(self):
@ -710,7 +710,7 @@ class test_SpooledTemporaryFile(TC):
    def test_multiple_close(self):
        # A SpooledTemporaryFile can be closed many times without error
        f = tempfile.SpooledTemporaryFile()
-        f.write('abc\n')
+        f.write(b'abc\n')
        f.close()
        try:
            f.close()
@ -727,8 +727,8 @@ class test_SpooledTemporaryFile(TC):
        write = f.write
        seek = f.seek

-        write("a" * 35)
-        write("b" * 35)
+        write(b"a" * 35)
+        write(b"b" * 35)
        seek(0, 0)
        self.assertEqual(read(70), b'a'*35 + b'b'*35)

--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@ -701,7 +701,7 @@ class UnicodeTest(
        if not sys.platform.startswith('java'):
            self.assertEqual(
                str(
-                    buffer('character buffers are decoded to unicode'),
+                    buffer(b'character buffers are decoded to unicode'),
                    'utf-8',
                    'strict'
                ),
@ -791,7 +791,7 @@ class UnicodeTest(
        self.assertEqual(str(b'Andr\202 x', 'ascii', 'replace'), 'Andr\uFFFD x')

        # Error handling (unknown character names)
-        self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), "xx")
+        self.assertEqual(b"\\N{foo}xx".decode("unicode-escape", "ignore"), "xx")

        # Error handling (truncated escape sequence)
        self.assertRaises(UnicodeError, "\\".decode, "unicode-escape")
--- a/Lib/uu.py
+++ b/Lib/uu.py
@ -72,12 +72,12 @@ def encode(in_file, out_file, name=None, mode=None):
    #
    # Write the data
    #
-    out_file.write('begin %o %s\n' % ((mode & 0o777),name))
+    out_file.write(('begin %o %s\n' % ((mode & 0o777), name)).encode("ascii"))
    data = in_file.read(45)
    while len(data) > 0:
-        out_file.write(str(binascii.b2a_uu(data), "ascii"))
+        out_file.write(binascii.b2a_uu(data))
        data = in_file.read(45)
-    out_file.write(' \nend\n')
+    out_file.write(b' \nend\n')


 def decode(in_file, out_file=None, mode=None, quiet=0):