Patch #1303: Adapt str8 constructor to bytes (now buffer) one.

This commit is contained in:
Georg Brandl 2007-10-24 18:55:37 +00:00
parent 97f9d4f312
commit bd1c68c94f
15 changed files with 212 additions and 72 deletions

View File

@ -17,12 +17,12 @@ else:
READ_MODE = "r"
# XXX Clean up once str8's cstor matches bytes.
LOAD_CONST = str8(chr(dis.opname.index('LOAD_CONST')))
IMPORT_NAME = str8(chr(dis.opname.index('IMPORT_NAME')))
STORE_NAME = str8(chr(dis.opname.index('STORE_NAME')))
STORE_GLOBAL = str8(chr(dis.opname.index('STORE_GLOBAL')))
LOAD_CONST = str8([dis.opname.index('LOAD_CONST')])
IMPORT_NAME = str8([dis.opname.index('IMPORT_NAME')])
STORE_NAME = str8([dis.opname.index('STORE_NAME')])
STORE_GLOBAL = str8([dis.opname.index('STORE_GLOBAL')])
STORE_OPS = [STORE_NAME, STORE_GLOBAL]
HAVE_ARGUMENT = str8(chr(dis.HAVE_ARGUMENT))
HAVE_ARGUMENT = str8([dis.HAVE_ARGUMENT])
# Modulefinder does a good job at simulating Python's, but it can not
# handle __path__ modifications packages make at runtime. Therefore there
@ -368,7 +368,7 @@ class ModuleFinder:
consts = co.co_consts
LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME
while code:
c = str8(chr(code[0]))
c = str8([code[0]])
if c in STORE_OPS:
oparg, = unpack('<H', code[1:3])
yield "store", (names[oparg],)

View File

@ -1978,7 +1978,7 @@ class _Example:
_dis_test = r"""
>>> import pickle
>>> x = [1, 2, (3, 4), {str8('abc'): "def"}]
>>> x = [1, 2, (3, 4), {str8(b'abc'): "def"}]
>>> pkl = pickle.dumps(x, 0)
>>> dis(pkl)
0: ( MARK

View File

@ -36,7 +36,7 @@ from _struct import Struct as _Struct, error
class Struct(_Struct):
def __init__(self, fmt):
if isinstance(fmt, str):
fmt = str8(fmt)
fmt = str8(fmt, 'latin1')
_Struct.__init__(self, fmt)
_MAXCACHE = 100

View File

@ -580,7 +580,8 @@ class BuiltinTest(unittest.TestCase):
self.assertEqual(hash(1), hash(1))
self.assertEqual(hash(1), hash(1.0))
hash('spam')
self.assertEqual(hash('spam'), hash(str8('spam')))
self.assertEqual(hash('spam'), hash(str8(b'spam'))) # remove str8()
# when b"" is immutable
hash((0,1,2,3))
def f(): pass
self.assertRaises(TypeError, hash, [])

View File

@ -103,33 +103,33 @@ class BytesTest(unittest.TestCase):
self.failIf(b3 <= b2)
def test_compare_to_str(self):
self.assertEqual(b"abc" == str8("abc"), True)
self.assertEqual(b"ab" != str8("abc"), True)
self.assertEqual(b"ab" <= str8("abc"), True)
self.assertEqual(b"ab" < str8("abc"), True)
self.assertEqual(b"abc" >= str8("ab"), True)
self.assertEqual(b"abc" > str8("ab"), True)
self.assertEqual(b"abc" == str8(b"abc"), True)
self.assertEqual(b"ab" != str8(b"abc"), True)
self.assertEqual(b"ab" <= str8(b"abc"), True)
self.assertEqual(b"ab" < str8(b"abc"), True)
self.assertEqual(b"abc" >= str8(b"ab"), True)
self.assertEqual(b"abc" > str8(b"ab"), True)
self.assertEqual(b"abc" != str8("abc"), False)
self.assertEqual(b"ab" == str8("abc"), False)
self.assertEqual(b"ab" > str8("abc"), False)
self.assertEqual(b"ab" >= str8("abc"), False)
self.assertEqual(b"abc" < str8("ab"), False)
self.assertEqual(b"abc" <= str8("ab"), False)
self.assertEqual(b"abc" != str8(b"abc"), False)
self.assertEqual(b"ab" == str8(b"abc"), False)
self.assertEqual(b"ab" > str8(b"abc"), False)
self.assertEqual(b"ab" >= str8(b"abc"), False)
self.assertEqual(b"abc" < str8(b"ab"), False)
self.assertEqual(b"abc" <= str8(b"ab"), False)
self.assertEqual(str8("abc") == b"abc", True)
self.assertEqual(str8("ab") != b"abc", True)
self.assertEqual(str8("ab") <= b"abc", True)
self.assertEqual(str8("ab") < b"abc", True)
self.assertEqual(str8("abc") >= b"ab", True)
self.assertEqual(str8("abc") > b"ab", True)
self.assertEqual(str8(b"abc") == b"abc", True)
self.assertEqual(str8(b"ab") != b"abc", True)
self.assertEqual(str8(b"ab") <= b"abc", True)
self.assertEqual(str8(b"ab") < b"abc", True)
self.assertEqual(str8(b"abc") >= b"ab", True)
self.assertEqual(str8(b"abc") > b"ab", True)
self.assertEqual(str8("abc") != b"abc", False)
self.assertEqual(str8("ab") == b"abc", False)
self.assertEqual(str8("ab") > b"abc", False)
self.assertEqual(str8("ab") >= b"abc", False)
self.assertEqual(str8("abc") < b"ab", False)
self.assertEqual(str8("abc") <= b"ab", False)
self.assertEqual(str8(b"abc") != b"abc", False)
self.assertEqual(str8(b"ab") == b"abc", False)
self.assertEqual(str8(b"ab") > b"abc", False)
self.assertEqual(str8(b"ab") >= b"abc", False)
self.assertEqual(str8(b"abc") < b"ab", False)
self.assertEqual(str8(b"abc") <= b"ab", False)
# Byte comparisons with unicode should always fail!
# Test this for all expected byte orders and Unicode character sizes
@ -345,7 +345,7 @@ class BytesTest(unittest.TestCase):
self.assertEqual(b.decode("utf8", "ignore"), "Hello world\n")
def test_from_buffer(self):
sample = str8("Hello world\n\x80\x81\xfe\xff")
sample = str8(b"Hello world\n\x80\x81\xfe\xff")
buf = memoryview(sample)
b = bytes(buf)
self.assertEqual(b, bytes(sample))
@ -367,8 +367,8 @@ class BytesTest(unittest.TestCase):
b1 = b"abc"
b2 = b"def"
self.assertEqual(b1 + b2, b"abcdef")
self.assertEqual(b1 + str8("def"), b"abcdef")
self.assertEqual(str8("def") + b1, b"defabc")
self.assertEqual(b1 + str8(b"def"), b"abcdef")
self.assertEqual(str8(b"def") + b1, b"defabc")
self.assertRaises(TypeError, lambda: b1 + "def")
self.assertRaises(TypeError, lambda: "abc" + b2)
@ -391,7 +391,7 @@ class BytesTest(unittest.TestCase):
self.assertEqual(b, b"abcdef")
self.assertEqual(b, b1)
self.failUnless(b is b1)
b += str8("xyz")
b += str8(b"xyz")
self.assertEqual(b, b"abcdefxyz")
try:
b += ""

View File

@ -181,7 +181,7 @@ class CodecCallbackTest(unittest.TestCase):
# mapped through the encoding again. This means, that
# to be able to use e.g. the "replace" handler, the
# charmap has to have a mapping for "?".
charmap = dict((ord(c), str8(2*c.upper())) for c in "abcdefgh")
charmap = dict((ord(c), str8(2*c.upper(), 'ascii')) for c in "abcdefgh")
sin = "abc"
sout = b"AABBCC"
self.assertEquals(codecs.charmap_encode(sin, "strict", charmap)[0], sout)
@ -189,7 +189,7 @@ class CodecCallbackTest(unittest.TestCase):
sin = "abcA"
self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap)
charmap[ord("?")] = str8("XYZ")
charmap[ord("?")] = str8(b"XYZ")
sin = "abcDEF"
sout = b"AABBCCXYZXYZXYZ"
self.assertEquals(codecs.charmap_encode(sin, "replace", charmap)[0], sout)
@ -309,7 +309,7 @@ class CodecCallbackTest(unittest.TestCase):
# check with one argument too much
self.assertRaises(TypeError, exctype, *(args + ["too much"]))
# check with one argument of the wrong type
wrongargs = [ "spam", str8("eggs"), b"spam", 42, 1.0, None ]
wrongargs = [ "spam", str8(b"eggs"), b"spam", 42, 1.0, None ]
for i in range(len(args)):
for wrongarg in wrongargs:
if type(wrongarg) is type(args[i]):

View File

@ -157,7 +157,7 @@ if 1:
s256 = "".join(["\n"] * 256 + ["spam"])
co = compile(s256, 'fn', 'exec')
self.assertEqual(co.co_firstlineno, 257)
self.assertEqual(co.co_lnotab, str8(''))
self.assertEqual(co.co_lnotab, str8())
def test_literals_with_leading_zeroes(self):
for arg in ["077787", "0xj", "0x.", "0e", "090000000000000",

View File

@ -88,7 +88,7 @@ class IOTest(unittest.TestCase):
self.assertEqual(f.tell(), 6)
self.assertEqual(f.seek(-1, 1), 5)
self.assertEqual(f.tell(), 5)
self.assertEqual(f.write(str8(" world\n\n\n")), 9)
self.assertEqual(f.write(str8(b" world\n\n\n")), 9)
self.assertEqual(f.seek(0), 0)
self.assertEqual(f.write(b"h"), 1)
self.assertEqual(f.seek(-1, 2), 13)

View File

@ -82,7 +82,7 @@ finally:
# Test BSD Rune locale's bug for isctype functions.
def teststrop(s, method, output):
s = str8(s)
s = str8(s, 'latin1') # XXX
if verbose:
print("%s.%s() =? %s ..." % (repr(s), method, repr(output)), end=' ')
result = getattr(s, method)()

View File

@ -101,7 +101,7 @@ s = struct.pack('ii', 1, 2)
simple_err(struct.unpack, 'iii', s)
simple_err(struct.unpack, 'i', s)
c = str8('a')
c = str8(b'a')
b = 1
h = 255
i = 65535
@ -186,7 +186,7 @@ for fmt, arg, big, lil, asy in tests:
if isinstance(arg, str):
# Strings are returned as str8 since you can't know the encoding of
# the string when packed.
arg = str8(arg)
arg = str8(arg, 'latin1')
if rev != arg and not asy:
raise TestFailed("unpack(%r, %r) -> (%r,) # expected (%r,)" % (
fmt, res, rev, arg))
@ -428,14 +428,14 @@ for args in [("bB", 1),
def test_p_code():
for code, input, expected, expectedback in [
('p','abc', '\x00', str8('')),
('1p', 'abc', '\x00', str8('')),
('2p', 'abc', '\x01a', str8('a')),
('3p', 'abc', '\x02ab', str8('ab')),
('4p', 'abc', '\x03abc', str8('abc')),
('5p', 'abc', '\x03abc\x00', str8('abc')),
('6p', 'abc', '\x03abc\x00\x00', str8('abc')),
('1000p', 'x'*1000, '\xff' + 'x'*999, str8('x'*255))]:
('p','abc', '\x00', str8()),
('1p', 'abc', '\x00', str8()),
('2p', 'abc', '\x01a', str8(b'a')),
('3p', 'abc', '\x02ab', str8(b'ab')),
('4p', 'abc', '\x03abc', str8(b'abc')),
('5p', 'abc', '\x03abc\x00', str8(b'abc')),
('6p', 'abc', '\x03abc\x00\x00', str8(b'abc')),
('1000p', 'x'*1000, '\xff' + 'x'*999, str8(b'x'*255))]:
expected = bytes(expected, "latin-1")
got = struct.pack(code, input)
if got != expected:
@ -564,20 +564,24 @@ def test_unpack_from():
if verbose:
print("test_unpack_from using", cls.__name__)
data = cls(test_string)
vereq(s.unpack_from(data), (str8('abcd'),))
vereq(s.unpack_from(data, 2), (str8('cd01'),))
vereq(s.unpack_from(data, 4), (str8('0123'),))
if not isinstance(data, (str8, bytes)):
bytes_data = str8(data, 'latin1')
else:
bytes_data = data
vereq(s.unpack_from(data), (str8(b'abcd'),))
vereq(s.unpack_from(data, 2), (str8(b'cd01'),))
vereq(s.unpack_from(data, 4), (str8(b'0123'),))
for i in range(6):
vereq(s.unpack_from(data, i), (str8(data[i:i+4]),))
vereq(s.unpack_from(data, i), (bytes_data[i:i+4],))
for i in range(6, len(test_string) + 1):
simple_err(s.unpack_from, data, i)
for cls in (str, str8, bytes): # XXX + memoryview
data = cls(test_string)
vereq(struct.unpack_from(fmt, data), (str8('abcd'),))
vereq(struct.unpack_from(fmt, data, 2), (str8('cd01'),))
vereq(struct.unpack_from(fmt, data, 4), (str8('0123'),))
vereq(struct.unpack_from(fmt, data), (str8(b'abcd'),))
vereq(struct.unpack_from(fmt, data, 2), (str8(b'cd01'),))
vereq(struct.unpack_from(fmt, data, 4), (str8(b'0123'),))
for i in range(6):
vereq(struct.unpack_from(fmt, data, i), (str8(data[i:i+4]),))
vereq(struct.unpack_from(fmt, data, i), (bytes_data[i:i+4],))
for i in range(6, len(test_string) + 1):
simple_err(struct.unpack_from, fmt, data, i)

View File

@ -300,7 +300,7 @@ class SysModuleTest(unittest.TestCase):
def test_intern(self):
self.assertRaises(TypeError, sys.intern)
s = str8("never interned before")
s = str8(b"never interned before")
self.assert_(sys.intern(s) is s)
s2 = s.swapcase().swapcase()
self.assert_(sys.intern(s2) is s)
@ -314,7 +314,7 @@ class SysModuleTest(unittest.TestCase):
def __hash__(self):
return 123
self.assertRaises(TypeError, sys.intern, S("abc"))
self.assertRaises(TypeError, sys.intern, S(b"abc"))
s = "never interned as unicode before"
self.assert_(sys.intern(s) is s)

View File

@ -203,8 +203,8 @@ class UnicodeTest(
self.assertRaises(TypeError, 'replace'.replace, "r", 42)
def test_str8_comparison(self):
self.assertEqual('abc' == str8('abc'), False)
self.assertEqual('abc' != str8('abc'), True)
self.assertEqual('abc' == str8(b'abc'), False)
self.assertEqual('abc' != str8(b'abc'), True)
def test_comparison(self):
# Comparisons:

View File

@ -176,7 +176,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
def test_east_asian_width(self):
eaw = self.db.east_asian_width
self.assertRaises(TypeError, eaw, str8('a'))
self.assertRaises(TypeError, eaw, str8(b'a'))
self.assertRaises(TypeError, eaw, '')
self.assertRaises(TypeError, eaw, 'ra')
self.assertEqual(eaw('\x1e'), 'N')

View File

@ -36,7 +36,7 @@ def getregentry():
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x78: "abc", # 1-n decoding mapping
str8("abc"): 0x0078,# 1-n encoding mapping
str8(b"abc"): 0x0078,# 1-n encoding mapping
0x01: None, # decoding mapping to <undefined>
0x79: "", # decoding mapping to <remove character>
})

View File

@ -3020,16 +3020,151 @@ str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
static PyObject *
string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
PyObject *x = NULL;
static char *kwlist[] = {"object", 0};
PyObject *x = NULL, *it;
PyObject *(*iternext)(PyObject *);
const char *encoding = NULL;
const char *errors = NULL;
PyObject *new = NULL;
Py_ssize_t i, size;
static char *kwlist[] = {"object", "encoding", "errors", 0};
if (type != &PyString_Type)
return str_subtype_new(type, args, kwds);
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str8", kwlist, &x))
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str8", kwlist, &x,
&encoding, &errors))
return NULL;
if (x == NULL)
if (x == NULL) {
if (encoding != NULL || errors != NULL) {
PyErr_SetString(PyExc_TypeError,
"encoding or errors without sequence "
"argument");
return NULL;
}
return PyString_FromString("");
return PyObject_Str(x);
}
if (PyUnicode_Check(x)) {
/* Encode via the codec registry */
if (encoding == NULL) {
PyErr_SetString(PyExc_TypeError,
"string argument without an encoding");
return NULL;
}
new = PyCodec_Encode(x, encoding, errors);
if (new == NULL)
return NULL;
/* XXX(gb): must accept bytes here since codecs output bytes
at the moment */
if (PyBytes_Check(new)) {
PyObject *str;
str = PyString_FromString(PyBytes_AsString(new));
Py_DECREF(new);
if (!str)
return NULL;
return str;
}
if (!PyString_Check(new)) {
PyErr_Format(PyExc_TypeError,
"encoder did not return a str8 "
"object (type=%.400s)",
Py_Type(new)->tp_name);
Py_DECREF(new);
return NULL;
}
return new;
}
/* If it's not unicode, there can't be encoding or errors */
if (encoding != NULL || errors != NULL) {
PyErr_SetString(PyExc_TypeError,
"encoding or errors without a string argument");
return NULL;
}
/* Use the modern buffer interface */
if (PyObject_CheckBuffer(x)) {
Py_buffer view;
if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
return NULL;
new = PyString_FromStringAndSize(NULL, view.len);
if (!new)
goto fail;
// XXX(brett.cannon): Better way to get to internal buffer?
if (PyBuffer_ToContiguous(((PyStringObject *)new)->ob_sval,
&view, view.len, 'C') < 0)
goto fail;
PyObject_ReleaseBuffer(x, &view);
return new;
fail:
Py_XDECREF(new);
PyObject_ReleaseBuffer(x, &view);
return NULL;
}
/* For the iterator version, create a string object and resize as needed. */
/* XXX(gb): is 64 a good value? also, optimize this if length is known */
size = 64;
new = PyString_FromStringAndSize(NULL, size);
if (new == NULL)
return NULL;
/* XXX Optimize this if the arguments is a list, tuple */
/* Get the iterator */
it = PyObject_GetIter(x);
if (it == NULL)
goto error;
// XXX(brett.cannon): No API for this?
iternext = *Py_Type(it)->tp_iternext;
/* Run the iterator to exhaustion */
for (i = 0; ; i++) {
PyObject *item;
Py_ssize_t value;
/* Get the next item */
item = iternext(it);
if (item == NULL) {
if (PyErr_Occurred()) {
if (!PyErr_ExceptionMatches(PyExc_StopIteration))
goto error;
PyErr_Clear();
}
break;
}
/* Interpret it as an int (__index__) */
value = PyNumber_AsSsize_t(item, PyExc_ValueError);
Py_DECREF(item);
if (value == -1 && PyErr_Occurred())
goto error;
/* Range check */
if (value < 0 || value >= 256) {
PyErr_SetString(PyExc_ValueError,
"bytes must be in range(0, 256)");
goto error;
}
/* Append the byte */
if (i >= size) {
size *= 2;
if (_PyString_Resize(&new, size) < 0)
goto error;
}
((PyStringObject *)new)->ob_sval[i] = value;
}
_PyString_Resize(&new, i);
/* Clean up and return success */
Py_DECREF(it);
return new;
error:
/* Error handling when it != NULL */
Py_XDECREF(it);
Py_DECREF(new);
return NULL;
}
static PyObject *