Added much functionality to the bytes type.

Change file.readinto() to require binary mode.
This commit is contained in:
Guido van Rossum 2006-04-24 13:47:05 +00:00
parent e06b6b8ff5
commit d624f18a21
5 changed files with 709 additions and 198 deletions

View File

@ -21,7 +21,7 @@ extern "C" {
/* Object layout */ /* Object layout */
typedef struct { typedef struct {
PyObject_VAR_HEAD PyObject_VAR_HEAD
char *ob_sval; char *ob_bytes;
} PyBytesObject; } PyBytesObject;
/* Type object */ /* Type object */
@ -32,13 +32,14 @@ PyAPI_DATA(PyTypeObject) PyBytes_Type;
#define PyBytes_CheckExact(self) ((self)->ob_type == &PyBytes_Type) #define PyBytes_CheckExact(self) ((self)->ob_type == &PyBytes_Type)
/* Direct API functions */ /* Direct API functions */
PyAPI_FUNC(PyObject *) PyBytes_FromObject(PyObject *);
PyAPI_FUNC(PyObject *) PyBytes_FromStringAndSize(const char *, Py_ssize_t); PyAPI_FUNC(PyObject *) PyBytes_FromStringAndSize(const char *, Py_ssize_t);
PyAPI_FUNC(Py_ssize_t) PyBytes_Size(PyObject *); PyAPI_FUNC(Py_ssize_t) PyBytes_Size(PyObject *);
PyAPI_FUNC(char *) PyBytes_AsString(PyObject *); PyAPI_FUNC(char *) PyBytes_AsString(PyObject *);
PyAPI_FUNC(int) PyBytes_Resize(PyObject *, Py_ssize_t); PyAPI_FUNC(int) PyBytes_Resize(PyObject *, Py_ssize_t);
/* Macros, trading safety for speed */ /* Macros, trading safety for speed */
#define PyBytes_AS_STRING(self) (((PyBytesObject *)(self))->ob_sval) #define PyBytes_AS_STRING(self) (((PyBytesObject *)(self))->ob_bytes)
#define PyBytes_GET_SIZE(self) (((PyBytesObject *)(self))->ob_size) #define PyBytes_GET_SIZE(self) (((PyBytesObject *)(self))->ob_size)
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -1,6 +1,9 @@
"""Unit tests for the bytes type.""" """Unit tests for the bytes type."""
import os
import re
import sys import sys
import tempfile
import unittest import unittest
import test.test_support import test.test_support
@ -45,7 +48,7 @@ class BytesTest(unittest.TestCase):
self.assertRaises(ValueError, bytes, [C(256)]) self.assertRaises(ValueError, bytes, [C(256)])
def test_constructor_type_errors(self): def test_constructor_type_errors(self):
self.assertRaises(TypeError, bytes, 0) self.assertRaises(TypeError, bytes, 0.0)
class C: class C:
pass pass
self.assertRaises(TypeError, bytes, ["0"]) self.assertRaises(TypeError, bytes, ["0"])
@ -100,36 +103,233 @@ class BytesTest(unittest.TestCase):
self.failUnless(bytes.__doc__ != None) self.failUnless(bytes.__doc__ != None)
self.failUnless(bytes.__doc__.startswith("bytes(")) self.failUnless(bytes.__doc__.startswith("bytes("))
# XXX More stuff to test and build (TDD): def test_buffer_api(self):
# constructor from str: bytes(<str>) == bytes(map(ord, <str>))? short_sample = "Hello world\n"
# encoding constructor: bytes(<unicode>[, <encoding>[, <errors>]]) sample = short_sample + "x"*(20 - len(short_sample))
# default encoding Latin-1? (Matching ord) tfn = tempfile.mktemp()
# slicing try:
# extended slicing? # Prepare
# item assignment with open(tfn, "wb") as f:
# slice assignment f.write(short_sample)
# extended slice assignment? # Test readinto
# __contains__ with simple int arg with open(tfn, "rb") as f:
# __contains__ with another bytes arg? b = bytes([ord('x')]*20)
# find/index? (int or bytes arg?) n = f.readinto(b)
# count? (int arg) self.assertEqual(n, len(short_sample))
# concatenation (+) self.assertEqual(list(b), map(ord, sample))
# repeat? # Test writing in binary mode
# extend? with open(tfn, "wb") as f:
# append? f.write(b)
# insert? with open(tfn, "rb") as f:
# pop? self.assertEqual(f.read(), sample)
# __reversed__? # Test writing in text mode
# reverse? (inplace) with open(tfn, "w") as f:
# NOT sort! f.write(b)
with open(tfn, "r") as f:
self.assertEqual(f.read(), sample)
# Can't use readinto in text mode
with open(tfn, "r") as f:
self.assertRaises(TypeError, f.readinto, b)
finally:
try:
os.remove(tfn)
except os.error:
pass
def test_reversed(self):
input = map(ord, "Hello")
b = bytes(input)
output = list(reversed(b))
input.reverse()
self.assertEqual(output, input)
def test_getslice(self):
def by(s):
return bytes(map(ord, s))
b = by("Hello, world")
self.assertEqual(b[:5], by("Hello"))
self.assertEqual(b[1:5], by("ello"))
self.assertEqual(b[5:7], by(", "))
self.assertEqual(b[7:], by("world"))
self.assertEqual(b[7:12], by("world"))
self.assertEqual(b[7:100], by("world"))
self.assertEqual(b[:-7], by("Hello"))
self.assertEqual(b[-11:-7], by("ello"))
self.assertEqual(b[-7:-5], by(", "))
self.assertEqual(b[-5:], by("world"))
self.assertEqual(b[-5:12], by("world"))
self.assertEqual(b[-5:100], by("world"))
self.assertEqual(b[-100:5], by("Hello"))
def test_regexps(self):
def by(s):
return bytes(map(ord, s))
b = by("Hello, world")
self.assertEqual(re.findall(r"\w+", b), [by("Hello"), by("world")])
def test_setitem(self):
b = bytes([1, 2, 3])
b[1] = 100
self.assertEqual(b, bytes([1, 100, 3]))
b[-1] = 200
self.assertEqual(b, bytes([1, 100, 200]))
class C:
def __init__(self, i=0):
self.i = i
def __index__(self):
return self.i
b[0] = C(10)
self.assertEqual(b, bytes([10, 100, 200]))
try:
b[3] = 0
self.fail("Didn't raise IndexError")
except IndexError:
pass
try:
b[-10] = 0
self.fail("Didn't raise IndexError")
except IndexError:
pass
try:
b[0] = 256
self.fail("Didn't raise ValueError")
except ValueError:
pass
try:
b[0] = C(-1)
self.fail("Didn't raise ValueError")
except ValueError:
pass
try:
b[0] = None
self.fail("Didn't raise TypeError")
except TypeError:
pass
def test_delitem(self):
b = bytes(range(10))
del b[0]
self.assertEqual(b, bytes(range(1, 10)))
del b[-1]
self.assertEqual(b, bytes(range(1, 9)))
del b[4]
self.assertEqual(b, bytes([1, 2, 3, 4, 6, 7, 8]))
def test_setslice(self):
b = bytes(range(10))
self.assertEqual(list(b), list(range(10)))
b[0:5] = bytes([1, 1, 1, 1, 1])
self.assertEqual(b, bytes([1, 1, 1, 1, 1, 5, 6, 7, 8, 9]))
del b[0:-5]
self.assertEqual(b, bytes([5, 6, 7, 8, 9]))
b[0:0] = bytes([0, 1, 2, 3, 4])
self.assertEqual(b, bytes(range(10)))
b[-7:-3] = bytes([100, 101])
self.assertEqual(b, bytes([0, 1, 2, 100, 101, 7, 8, 9]))
b[3:5] = [3, 4, 5, 6]
self.assertEqual(b, bytes(range(10)))
def test_setslice_trap(self):
# This test verifies that we correctly handle assigning self
# to a slice of self (the old Lambert Meertens trap).
b = bytes(range(256))
b[8:] = b
self.assertEqual(b, bytes(list(range(8)) + list(range(256))))
def test_encoding(self):
sample = u"Hello world\n\u1234\u5678\u9abc\udef0"
for enc in ("utf8", "utf16"):
b = bytes(sample, enc)
self.assertEqual(b, bytes(map(ord, sample.encode(enc))))
self.assertRaises(UnicodeEncodeError, bytes, sample, "latin1")
b = bytes(sample, "latin1", "ignore")
self.assertEqual(b, bytes(sample[:-4]))
def test_decode(self):
sample = u"Hello world\n\u1234\u5678\u9abc\def0\def0"
for enc in ("utf8", "utf16"):
b = bytes(sample, enc)
self.assertEqual(b.decode(enc), sample)
sample = u"Hello world\n\x80\x81\xfe\xff"
b = bytes(sample, "latin1")
self.assertRaises(UnicodeDecodeError, b.decode, "utf8")
self.assertEqual(b.decode("utf8", "ignore"), "Hello world\n")
def test_from_buffer(self):
sample = "Hello world\n\x80\x81\xfe\xff"
buf = buffer(sample)
b = bytes(buf)
self.assertEqual(b, bytes(map(ord, sample)))
def test_to_str(self):
sample = "Hello world\n\x80\x81\xfe\xff"
b = bytes(sample)
self.assertEqual(str(b), sample)
def test_from_int(self):
b = bytes(0)
self.assertEqual(b, bytes())
b = bytes(10)
self.assertEqual(b, bytes([0]*10))
b = bytes(10000)
self.assertEqual(b, bytes([0]*10000))
def test_concat(self):
b1 = bytes("abc")
b2 = bytes("def")
self.assertEqual(b1 + b2, bytes("abcdef"))
self.assertRaises(TypeError, lambda: b1 + "def")
self.assertRaises(TypeError, lambda: "abc" + b2)
def test_repeat(self):
b = bytes("abc")
self.assertEqual(b * 3, bytes("abcabcabc"))
self.assertEqual(b * 0, bytes())
self.assertEqual(b * -1, bytes())
self.assertRaises(TypeError, lambda: b * 3.14)
self.assertRaises(TypeError, lambda: 3.14 * b)
self.assertRaises(MemoryError, lambda: b * sys.maxint)
self.assertEqual(bytes('x')*100, bytes('x'*100))
# Optimizations:
# __iter__? (optimization) # __iter__? (optimization)
# __str__? (could return "".join(map(chr, self)) # __reversed__? (optimization)
# decode
# buffer API # XXX Some list methods?
# check that regexp searches work # extended slicing
# (I suppose re.sub() returns a string) # extended slice assignment
# file.readinto # extend (same as b[len(b):] = src)
# file.write # reverse (in-place)
# remove
# pop
# NOT sort!
# With int arg:
# __contains__
# index
# count
# append
# insert
# XXX Some string methods? (Those that don't use character properties)
# startswith
# endswidth
# find, rfind
# __contains__ (bytes arg)
# index, rindex (bytes arg)
# join
# replace
# translate
# split, rsplit
# lstrip, rstrip, strip??
# XXX pickle and marshal support?
def test_main(): def test_main():
@ -137,5 +337,5 @@ def test_main():
if __name__ == "__main__": if __name__ == "__main__":
##test_main() test_main()
unittest.main() ##unittest.main()

View File

@ -67,6 +67,17 @@ n = f.readinto(a)
f.close() f.close()
verify(buf == a.tostring()[:n]) verify(buf == a.tostring()[:n])
# verify readinto refuses text files
a = array('c', 'x'*10)
f = open(TESTFN, 'r')
try:
f.readinto(a)
raise TestFailed("readinto shouldn't work in text mode")
except TypeError:
pass
finally:
f.close()
# verify writelines with integers # verify writelines with integers
f = open(TESTFN, 'wb') f = open(TESTFN, 'wb')
try: try:
@ -261,13 +272,13 @@ methods = [("readline", ()), ("read", ()), ("readlines", ()),
try: try:
# Prepare the testfile # Prepare the testfile
bag = open(TESTFN, "w") bag = open(TESTFN, "wb")
bag.write(filler * nchunks) bag.write(filler * nchunks)
bag.writelines(testlines) bag.writelines(testlines)
bag.close() bag.close()
# Test for appropriate errors mixing read* and iteration # Test for appropriate errors mixing read* and iteration
for methodname, args in methods: for methodname, args in methods:
f = open(TESTFN) f = open(TESTFN, 'rb')
if f.next() != filler: if f.next() != filler:
raise TestFailed, "Broken testfile" raise TestFailed, "Broken testfile"
meth = getattr(f, methodname) meth = getattr(f, methodname)
@ -286,7 +297,7 @@ try:
# Each line in the bag o' ham is 4 bytes ("h", "a", "m", "\n"), so # Each line in the bag o' ham is 4 bytes ("h", "a", "m", "\n"), so
# 4096 lines of that should get us exactly on the buffer boundary for # 4096 lines of that should get us exactly on the buffer boundary for
# any power-of-2 buffersize between 4 and 16384 (inclusive). # any power-of-2 buffersize between 4 and 16384 (inclusive).
f = open(TESTFN) f = open(TESTFN, 'rb')
for i in range(nchunks): for i in range(nchunks):
f.next() f.next()
testline = testlines.pop(0) testline = testlines.pop(0)
@ -328,7 +339,7 @@ try:
raise TestFailed("readlines() after next() with empty buffer " raise TestFailed("readlines() after next() with empty buffer "
"failed. Got %r, expected %r" % (line, testline)) "failed. Got %r, expected %r" % (line, testline))
# Reading after iteration hit EOF shouldn't hurt either # Reading after iteration hit EOF shouldn't hurt either
f = open(TESTFN) f = open(TESTFN, 'rb')
try: try:
for line in f: for line in f:
pass pass

View File

@ -8,27 +8,34 @@
/* Direct API functions */ /* Direct API functions */
PyObject * PyObject *
PyBytes_FromStringAndSize(const char *sval, Py_ssize_t size) PyBytes_FromObject(PyObject *input)
{
return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
input, NULL);
}
PyObject *
PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
{ {
PyBytesObject *new; PyBytesObject *new;
if (size != 0) { assert(size >= 0);
assert(sval != NULL);
assert(size > 0);
}
new = PyObject_New(PyBytesObject, &PyBytes_Type); new = PyObject_New(PyBytesObject, &PyBytes_Type);
if (new == NULL) if (new == NULL)
return NULL; return NULL;
if (size > 0) { new->ob_size = size;
new->ob_sval = PyMem_Malloc(size); if (size == 0)
if (new->ob_sval == NULL) { new->ob_bytes = NULL;
Py_DECREF(new); else {
return NULL; new->ob_bytes = PyMem_Malloc(size);
} if (new->ob_bytes == NULL) {
memcpy(new->ob_sval, sval, size); Py_DECREF(new);
new->ob_size = size; return NULL;
}
if (bytes != NULL)
memcpy(new->ob_bytes, bytes, size);
} }
return (PyObject *)new; return (PyObject *)new;
@ -49,7 +56,7 @@ PyBytes_AsString(PyObject *self)
assert(self != NULL); assert(self != NULL);
assert(PyBytes_Check(self)); assert(PyBytes_Check(self));
return ((PyBytesObject *)self)->ob_sval; return ((PyBytesObject *)self)->ob_bytes;
} }
int int
@ -61,13 +68,13 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
assert(PyBytes_Check(self)); assert(PyBytes_Check(self));
assert(size >= 0); assert(size >= 0);
sval = PyMem_Realloc(((PyBytesObject *)self)->ob_sval, size); sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, size);
if (sval == NULL) { if (sval == NULL) {
PyErr_NoMemory(); PyErr_NoMemory();
return -1; return -1;
} }
((PyBytesObject *)self)->ob_sval = sval; ((PyBytesObject *)self)->ob_bytes = sval;
((PyBytesObject *)self)->ob_size = size; ((PyBytesObject *)self)->ob_size = size;
return 0; return 0;
@ -81,16 +88,179 @@ bytes_length(PyBytesObject *self)
return self->ob_size; return self->ob_size;
} }
static PyObject *
bytes_concat(PyBytesObject *self, PyObject *other)
{
PyBytesObject *result;
Py_ssize_t mysize;
Py_ssize_t size;
if (!PyBytes_Check(other)) {
PyErr_Format(PyExc_TypeError,
"can't concat bytes to %.100s", other->ob_type->tp_name);
return NULL;
}
mysize = self->ob_size;
size = mysize + ((PyBytesObject *)other)->ob_size;
if (size < 0)
return PyErr_NoMemory();
result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
if (result != NULL) {
memcpy(result->ob_bytes, self->ob_bytes, self->ob_size);
memcpy(result->ob_bytes + self->ob_size,
((PyBytesObject *)other)->ob_bytes,
((PyBytesObject *)other)->ob_size);
}
return (PyObject *)result;
}
static PyObject *
bytes_repeat(PyBytesObject *self, Py_ssize_t count)
{
PyBytesObject *result;
Py_ssize_t mysize;
Py_ssize_t size;
if (count < 0)
count = 0;
mysize = self->ob_size;
size = mysize * count;
if (count != 0 && size / count != mysize)
return PyErr_NoMemory();
result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
if (result != NULL && size != 0) {
if (mysize == 1)
memset(result->ob_bytes, self->ob_bytes[0], size);
else {
int i;
for (i = 0; i < count; i++)
memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
}
}
return (PyObject *)result;
}
static PyObject * static PyObject *
bytes_getitem(PyBytesObject *self, Py_ssize_t i) bytes_getitem(PyBytesObject *self, Py_ssize_t i)
{ {
if (i < 0) if (i < 0)
i += self->ob_size; i += self->ob_size;
if (i < 0 || i >= self->ob_size) { if (i < 0 || i >= self->ob_size) {
PyErr_SetString(PyExc_IndexError, "bytes index out of range"); PyErr_SetString(PyExc_IndexError, "bytes index out of range");
return NULL; return NULL;
} }
return PyInt_FromLong((unsigned char)(self->ob_sval[i])); return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
}
static PyObject *
bytes_getslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi)
{
if (lo < 0)
lo = 0;
if (hi > self->ob_size)
hi = self->ob_size;
if (lo >= hi)
lo = hi = 0;
return PyBytes_FromStringAndSize(self->ob_bytes + lo, hi - lo);
}
static int
bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
PyObject *values)
{
int avail;
int needed;
char *bytes;
if (values == NULL) {
bytes = NULL;
needed = 0;
}
else if (values == (PyObject *)self || !PyBytes_Check(values)) {
/* Make a copy an call this function recursively */
int err;
values = PyBytes_FromObject(values);
if (values == NULL)
return -1;
err = bytes_setslice(self, lo, hi, values);
Py_DECREF(values);
return err;
}
else {
assert(PyBytes_Check(values));
bytes = ((PyBytesObject *)values)->ob_bytes;
needed = ((PyBytesObject *)values)->ob_size;
}
if (lo < 0)
lo = 0;
if (hi > self->ob_size)
hi = self->ob_size;
avail = hi - lo;
if (avail < 0)
lo = hi = avail = 0;
if (avail != needed) {
if (avail > needed) {
/*
0 lo hi old_size
| |<----avail----->|<-----tomove------>|
| |<-needed->|<-----tomove------>|
0 lo new_hi new_size
*/
memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
self->ob_size - hi);
}
if (PyBytes_Resize((PyObject *)self,
self->ob_size + needed - avail) < 0)
return -1;
if (avail < needed) {
/*
0 lo hi old_size
| |<-avail->|<-----tomove------>|
| |<----needed---->|<-----tomove------>|
0 lo new_hi new_size
*/
memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
self->ob_size - lo - needed);
}
}
if (needed > 0)
memcpy(self->ob_bytes + lo, bytes, needed);
return 0;
}
static int
bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
{
Py_ssize_t ival;
if (i < 0)
i += self->ob_size;
if (i < 0 || i >= self->ob_size) {
PyErr_SetString(PyExc_IndexError, "bytes index out of range");
return -1;
}
if (value == NULL)
return bytes_setslice(self, i, i+1, NULL);
ival = PyNumber_Index(value);
if (ival == -1 && PyErr_Occurred())
return -1;
if (ival < 0 || ival >= 256) {
PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
return -1;
}
self->ob_bytes[i] = ival;
return 0;
} }
static long static long
@ -103,69 +273,138 @@ bytes_nohash(PyObject *self)
static int static int
bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds) bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
{ {
static char *kwlist[] = {"sequence", 0}; static char *kwlist[] = {"source", "encoding", "errors", 0};
PyObject *arg = NULL; PyObject *arg = NULL;
PyObject *it; /* iter(arg) */ const char *encoding = NULL;
const char *errors = NULL;
Py_ssize_t count;
PyObject *it;
PyObject *(*iternext)(PyObject *); PyObject *(*iternext)(PyObject *);
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:bytes", kwlist, &arg)) /* Empty previous contents (yes, do this first of all!) */
return -1; if (PyBytes_Resize((PyObject *)self, 0) < 0)
return -1;
/* Verify list invariants established by PyType_GenericAlloc() */ /* Parse arguments */
if (self->ob_size != 0) { if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
assert(self->ob_sval != NULL); &arg, &encoding, &errors))
assert(self->ob_size > 0); return -1;
/* Make a quick exit if no first argument */
if (arg == NULL) {
if (encoding != NULL || errors != NULL) {
PyErr_SetString(PyExc_TypeError,
"encoding or errors without sequence argument");
return -1;
}
return 0;
} }
/* Empty previous contents */ if (PyUnicode_Check(arg)) {
if (PyBytes_Resize((PyObject *)self, 0) < 0) /* Encode via the codec registry */
return -1; PyObject *encoded;
char *bytes;
Py_ssize_t size;
if (encoding == NULL)
encoding = PyUnicode_GetDefaultEncoding();
encoded = PyCodec_Encode(arg, encoding, errors);
if (encoded == NULL)
return -1;
if (!PyString_Check(encoded)) {
PyErr_Format(PyExc_TypeError,
"encoder did not return a string object (type=%.400s)",
encoded->ob_type->tp_name);
Py_DECREF(encoded);
return -1;
}
bytes = PyString_AS_STRING(encoded);
size = PyString_GET_SIZE(encoded);
if (PyBytes_Resize((PyObject *)self, size) < 0) {
Py_DECREF(encoded);
return -1;
}
memcpy(self->ob_bytes, bytes, size);
Py_DECREF(encoded);
return 0;
}
/* Quick check if we're done */ /* If it's not unicode, there can't be encoding or errors */
if (arg == 0) if (encoding != NULL || errors != NULL) {
return 0; PyErr_SetString(PyExc_TypeError,
"encoding or errors without a string argument");
return -1;
}
/* XXX Optimize this if the arguments is a list, tuple, or bytes */ /* Is it an int? */
count = PyNumber_Index(arg);
if (count == -1 && PyErr_Occurred())
PyErr_Clear();
else {
if (count < 0) {
PyErr_SetString(PyExc_ValueError, "negative count");
return -1;
}
if (count > 0) {
if (PyBytes_Resize((PyObject *)self, count))
return -1;
memset(self->ob_bytes, 0, count);
}
return 0;
}
if (PyObject_CheckReadBuffer(arg)) {
const void *bytes;
Py_ssize_t size;
if (PyObject_AsReadBuffer(arg, &bytes, &size) < 0)
return -1;
if (PyBytes_Resize((PyObject *)self, size) < 0)
return -1;
memcpy(self->ob_bytes, bytes, size);
return 0;
}
/* XXX Optimize this if the arguments is a list, tuple */
/* Get the iterator */ /* Get the iterator */
it = PyObject_GetIter(arg); it = PyObject_GetIter(arg);
if (it == NULL) if (it == NULL)
return -1; return -1;
iternext = *it->ob_type->tp_iternext; iternext = *it->ob_type->tp_iternext;
/* Run the iterator to exhaustion */ /* Run the iterator to exhaustion */
for (;;) { for (;;) {
PyObject *item; PyObject *item;
Py_ssize_t value; Py_ssize_t value;
/* Get the next item */ /* Get the next item */
item = iternext(it); item = iternext(it);
if (item == NULL) { if (item == NULL) {
if (PyErr_Occurred()) { if (PyErr_Occurred()) {
if (!PyErr_ExceptionMatches(PyExc_StopIteration)) if (!PyErr_ExceptionMatches(PyExc_StopIteration))
goto error; goto error;
PyErr_Clear(); PyErr_Clear();
} }
break; break;
} }
/* Interpret it as an int (__index__) */ /* Interpret it as an int (__index__) */
value = PyNumber_Index(item); value = PyNumber_Index(item);
Py_DECREF(item); Py_DECREF(item);
if (value == -1 && PyErr_Occurred()) if (value == -1 && PyErr_Occurred())
goto error; goto error;
/* Range check */ /* Range check */
if (value < 0 || value >= 256) { if (value < 0 || value >= 256) {
PyErr_SetString(PyExc_ValueError, "bytes must be in range(0, 256)"); PyErr_SetString(PyExc_ValueError,
goto error; "bytes must be in range(0, 256)");
} goto error;
}
/* Append the byte */ /* Append the byte */
/* XXX Speed this up */ /* XXX Speed this up */
if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0) if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
goto error; goto error;
self->ob_sval[self->ob_size-1] = value; self->ob_bytes[self->ob_size-1] = value;
} }
/* Clean up and return success */ /* Clean up and return success */
@ -188,45 +427,45 @@ bytes_repr(PyBytesObject *self)
int i; int i;
if (self->ob_size == 0) if (self->ob_size == 0)
return PyString_FromString("bytes()"); return PyString_FromString("bytes()");
list = PyList_New(0); list = PyList_New(0);
if (list == NULL) if (list == NULL)
return NULL; return NULL;
str = PyString_FromString("bytes(["); str = PyString_FromString("bytes([");
if (str == NULL) if (str == NULL)
goto error; goto error;
err = PyList_Append(list, str); err = PyList_Append(list, str);
Py_DECREF(str); Py_DECREF(str);
if (err < 0) if (err < 0)
goto error; goto error;
for (i = 0; i < self->ob_size; i++) { for (i = 0; i < self->ob_size; i++) {
char buffer[20]; char buffer[20];
sprintf(buffer, ", 0x%02x", (unsigned char) (self->ob_sval[i])); sprintf(buffer, ", 0x%02x", (unsigned char) (self->ob_bytes[i]));
str = PyString_FromString((i == 0) ? buffer+2 : buffer); str = PyString_FromString((i == 0) ? buffer+2 : buffer);
if (str == NULL) if (str == NULL)
goto error; goto error;
err = PyList_Append(list, str); err = PyList_Append(list, str);
Py_DECREF(str); Py_DECREF(str);
if (err < 0) if (err < 0)
goto error; goto error;
} }
str = PyString_FromString("])"); str = PyString_FromString("])");
if (str == NULL) if (str == NULL)
goto error; goto error;
err = PyList_Append(list, str); err = PyList_Append(list, str);
Py_DECREF(str); Py_DECREF(str);
if (err < 0) if (err < 0)
goto error; goto error;
str = PyString_FromString(""); str = PyString_FromString("");
if (str == NULL) if (str == NULL)
goto error; goto error;
result = _PyString_Join(str, list); result = _PyString_Join(str, list);
Py_DECREF(str); Py_DECREF(str);
@ -239,6 +478,12 @@ bytes_repr(PyBytesObject *self)
return NULL; return NULL;
} }
static PyObject *
bytes_str(PyBytesObject *self)
{
return PyString_FromStringAndSize(self->ob_bytes, self->ob_size);
}
static PyObject * static PyObject *
bytes_richcompare(PyBytesObject *self, PyBytesObject *other, int op) bytes_richcompare(PyBytesObject *self, PyBytesObject *other, int op)
{ {
@ -247,37 +492,37 @@ bytes_richcompare(PyBytesObject *self, PyBytesObject *other, int op)
int cmp; int cmp;
if (!PyBytes_Check(self) || !PyBytes_Check(other)) { if (!PyBytes_Check(self) || !PyBytes_Check(other)) {
Py_INCREF(Py_NotImplemented); Py_INCREF(Py_NotImplemented);
return Py_NotImplemented; return Py_NotImplemented;
} }
if (self->ob_size != other->ob_size && (op == Py_EQ || op == Py_NE)) { if (self->ob_size != other->ob_size && (op == Py_EQ || op == Py_NE)) {
/* Shortcut: if the lengths differ, the objects differ */ /* Shortcut: if the lengths differ, the objects differ */
cmp = (op == Py_NE); cmp = (op == Py_NE);
} }
else { else {
minsize = self->ob_size; minsize = self->ob_size;
if (other->ob_size < minsize) if (other->ob_size < minsize)
minsize = other->ob_size; minsize = other->ob_size;
cmp = memcmp(self->ob_sval, other->ob_sval, minsize); cmp = memcmp(self->ob_bytes, other->ob_bytes, minsize);
/* In ISO C, memcmp() guarantees to use unsigned bytes! */ /* In ISO C, memcmp() guarantees to use unsigned bytes! */
if (cmp == 0) { if (cmp == 0) {
if (self->ob_size < other->ob_size) if (self->ob_size < other->ob_size)
cmp = -1; cmp = -1;
else if (self->ob_size > other->ob_size) else if (self->ob_size > other->ob_size)
cmp = 1; cmp = 1;
} }
switch (op) { switch (op) {
case Py_LT: cmp = cmp < 0; break; case Py_LT: cmp = cmp < 0; break;
case Py_LE: cmp = cmp <= 0; break; case Py_LE: cmp = cmp <= 0; break;
case Py_EQ: cmp = cmp == 0; break; case Py_EQ: cmp = cmp == 0; break;
case Py_NE: cmp = cmp != 0; break; case Py_NE: cmp = cmp != 0; break;
case Py_GT: cmp = cmp > 0; break; case Py_GT: cmp = cmp > 0; break;
case Py_GE: cmp = cmp >= 0; break; case Py_GE: cmp = cmp >= 0; break;
} }
} }
res = cmp ? Py_True : Py_False; res = cmp ? Py_True : Py_False;
@ -288,41 +533,89 @@ bytes_richcompare(PyBytesObject *self, PyBytesObject *other, int op)
static void static void
bytes_dealloc(PyBytesObject *self) bytes_dealloc(PyBytesObject *self)
{ {
if (self->ob_sval != 0) { if (self->ob_bytes != 0) {
PyMem_Free(self->ob_sval); PyMem_Free(self->ob_bytes);
} }
self->ob_type->tp_free((PyObject *)self); self->ob_type->tp_free((PyObject *)self);
} }
static Py_ssize_t
bytes_getbuffer(PyBytesObject *self, Py_ssize_t index, const void **ptr)
{
if (index != 0) {
PyErr_SetString(PyExc_SystemError,
"accessing non-existent string segment");
return -1;
}
*ptr = (void *)self->ob_bytes;
return self->ob_size;
}
static Py_ssize_t
bytes_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
{
if (lenp)
*lenp = self->ob_size;
return 1;
}
PyDoc_STRVAR(decode_doc,
"B.decode([encoding[,errors]]) -> unicode obect.\n\
\n\
Decodes B using the codec registered for encoding. encoding defaults\n\
to the default encoding. errors may be given to set a different error\n\
handling scheme. Default is 'strict' meaning that encoding errors raise\n\
a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
as well as any other name registerd with codecs.register_error that is\n\
able to handle UnicodeDecodeErrors.");
static PyObject *
bytes_decode(PyObject *self, PyObject *args)
{
const char *encoding = NULL;
const char *errors = NULL;
if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
return NULL;
if (encoding == NULL)
encoding = PyUnicode_GetDefaultEncoding();
return PyCodec_Decode(self, encoding, errors);
}
static PySequenceMethods bytes_as_sequence = { static PySequenceMethods bytes_as_sequence = {
(lenfunc)bytes_length, /*sq_length*/ (lenfunc)bytes_length, /*sq_length*/
(binaryfunc)0, /*sq_concat*/ (binaryfunc)bytes_concat, /*sq_concat*/
(ssizeargfunc)0, /*sq_repeat*/ (ssizeargfunc)bytes_repeat, /*sq_repeat*/
(ssizeargfunc)bytes_getitem, /*sq_item*/ (ssizeargfunc)bytes_getitem, /*sq_item*/
(ssizessizeargfunc)0, /*sq_slice*/ (ssizessizeargfunc)bytes_getslice, /*sq_slice*/
0, /*sq_ass_item*/ (ssizeobjargproc)bytes_setitem, /*sq_ass_item*/
0, /*sq_ass_slice*/ (ssizessizeobjargproc)bytes_setslice, /* sq_ass_slice */
(objobjproc)0, /*sq_contains*/ #if 0
(objobjproc)bytes_contains, /* sq_contains */
(binaryfunc)bytes_inplace_concat, /* sq_inplace_concat */
(ssizeargfunc)bytes_inplace_repeat, /* sq_inplace_repeat */
#endif
}; };
static PyMappingMethods bytes_as_mapping = { static PyMappingMethods bytes_as_mapping = {
(lenfunc)bytes_length, (lenfunc)bytes_length,
(binaryfunc)0, (binaryfunc)0,
0, 0,
}; };
static PyBufferProcs bytes_as_buffer = { static PyBufferProcs bytes_as_buffer = {
/* (readbufferproc)bytes_getbuffer,
(readbufferproc)bytes_buffer_getreadbuf, (writebufferproc)bytes_getbuffer,
(writebufferproc)bytes_buffer_getwritebuf, (segcountproc)bytes_getsegcount,
(segcountproc)bytes_buffer_getsegcount, /* XXX Bytes are not characters! But we need to implement
(charbufferproc)bytes_buffer_getcharbuf, bf_getcharbuffer() so we can be used as 't#' argument to codecs. */
*/ (charbufferproc)bytes_getbuffer,
}; };
static PyMethodDef static PyMethodDef
bytes_methods[] = { bytes_methods[] = {
{NULL, NULL} {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
{NULL, NULL}
}; };
PyDoc_STRVAR(bytes_doc, PyDoc_STRVAR(bytes_doc,
@ -336,39 +629,40 @@ PyTypeObject PyBytes_Type = {
"bytes", "bytes",
sizeof(PyBytesObject), sizeof(PyBytesObject),
0, 0,
(destructor)bytes_dealloc, /* tp_dealloc */ (destructor)bytes_dealloc, /* tp_dealloc */
0, /* tp_print */ 0, /* tp_print */
0, /* tp_getattr */ 0, /* tp_getattr */
0, /* tp_setattr */ 0, /* tp_setattr */
0, /* tp_compare */ 0, /* tp_compare */
(reprfunc)bytes_repr, /* tp_repr */ (reprfunc)bytes_repr, /* tp_repr */
0, /* tp_as_number */ 0, /* tp_as_number */
&bytes_as_sequence, /* tp_as_sequence */ &bytes_as_sequence, /* tp_as_sequence */
&bytes_as_mapping, /* tp_as_mapping */ &bytes_as_mapping, /* tp_as_mapping */
bytes_nohash, /* tp_hash */ bytes_nohash, /* tp_hash */
0, /* tp_call */ 0, /* tp_call */
0, /* tp_str */ (reprfunc)bytes_str, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */ PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */ 0, /* tp_setattro */
&bytes_as_buffer, /* tp_as_buffer */ &bytes_as_buffer, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES, /* tp_flags */ /* bytes is 'final' or 'sealed' */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES, /* tp_flags */
bytes_doc, /* tp_doc */ /* bytes is 'final' or 'sealed' */
0, /* tp_traverse */ bytes_doc, /* tp_doc */
0, /* tp_clear */ 0, /* tp_traverse */
(richcmpfunc)bytes_richcompare, /* tp_richcompare */ 0, /* tp_clear */
0, /* tp_weaklistoffset */ (richcmpfunc)bytes_richcompare, /* tp_richcompare */
0, /* tp_iter */ 0, /* tp_weaklistoffset */
0, /* tp_iternext */ 0, /* tp_iter */
bytes_methods, /* tp_methods */ 0, /* tp_iternext */
0, /* tp_members */ bytes_methods, /* tp_methods */
0, /* tp_getset */ 0, /* tp_members */
0, /* tp_base */ 0, /* tp_getset */
0, /* tp_dict */ 0, /* tp_base */
0, /* tp_descr_get */ 0, /* tp_dict */
0, /* tp_descr_set */ 0, /* tp_descr_get */
0, /* tp_dictoffset */ 0, /* tp_descr_set */
(initproc)bytes_init, /* tp_init */ 0, /* tp_dictoffset */
PyType_GenericAlloc, /* tp_alloc */ (initproc)bytes_init, /* tp_init */
PyType_GenericNew, /* tp_new */ PyType_GenericAlloc, /* tp_alloc */
PyObject_Del, /* tp_free */ PyType_GenericNew, /* tp_new */
PyObject_Del, /* tp_free */
}; };

View File

@ -880,6 +880,11 @@ file_readinto(PyFileObject *f, PyObject *args)
if (f->f_fp == NULL) if (f->f_fp == NULL)
return err_closed(); return err_closed();
if (!f->f_binary) {
PyErr_SetString(PyExc_TypeError,
"readinto() requires binary mode");
return NULL;
}
/* refuse to mix with f.next() */ /* refuse to mix with f.next() */
if (f->f_buf != NULL && if (f->f_buf != NULL &&
(f->f_bufend - f->f_bufptr) > 0 && (f->f_bufend - f->f_bufptr) > 0 &&