Added much functionality to the bytes type.
Change file.readinto() to require binary mode.
This commit is contained in:
parent
e06b6b8ff5
commit
d624f18a21
|
@ -21,7 +21,7 @@ extern "C" {
|
|||
/* Object layout */
|
||||
typedef struct {
|
||||
PyObject_VAR_HEAD
|
||||
char *ob_sval;
|
||||
char *ob_bytes;
|
||||
} PyBytesObject;
|
||||
|
||||
/* Type object */
|
||||
|
@ -32,13 +32,14 @@ PyAPI_DATA(PyTypeObject) PyBytes_Type;
|
|||
#define PyBytes_CheckExact(self) ((self)->ob_type == &PyBytes_Type)
|
||||
|
||||
/* Direct API functions */
|
||||
PyAPI_FUNC(PyObject *) PyBytes_FromObject(PyObject *);
|
||||
PyAPI_FUNC(PyObject *) PyBytes_FromStringAndSize(const char *, Py_ssize_t);
|
||||
PyAPI_FUNC(Py_ssize_t) PyBytes_Size(PyObject *);
|
||||
PyAPI_FUNC(char *) PyBytes_AsString(PyObject *);
|
||||
PyAPI_FUNC(int) PyBytes_Resize(PyObject *, Py_ssize_t);
|
||||
|
||||
/* Macros, trading safety for speed */
|
||||
#define PyBytes_AS_STRING(self) (((PyBytesObject *)(self))->ob_sval)
|
||||
#define PyBytes_AS_STRING(self) (((PyBytesObject *)(self))->ob_bytes)
|
||||
#define PyBytes_GET_SIZE(self) (((PyBytesObject *)(self))->ob_size)
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
"""Unit tests for the bytes type."""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
import test.test_support
|
||||
|
||||
|
@ -45,7 +48,7 @@ class BytesTest(unittest.TestCase):
|
|||
self.assertRaises(ValueError, bytes, [C(256)])
|
||||
|
||||
def test_constructor_type_errors(self):
|
||||
self.assertRaises(TypeError, bytes, 0)
|
||||
self.assertRaises(TypeError, bytes, 0.0)
|
||||
class C:
|
||||
pass
|
||||
self.assertRaises(TypeError, bytes, ["0"])
|
||||
|
@ -100,36 +103,233 @@ class BytesTest(unittest.TestCase):
|
|||
self.failUnless(bytes.__doc__ != None)
|
||||
self.failUnless(bytes.__doc__.startswith("bytes("))
|
||||
|
||||
# XXX More stuff to test and build (TDD):
|
||||
# constructor from str: bytes(<str>) == bytes(map(ord, <str>))?
|
||||
# encoding constructor: bytes(<unicode>[, <encoding>[, <errors>]])
|
||||
# default encoding Latin-1? (Matching ord)
|
||||
# slicing
|
||||
# extended slicing?
|
||||
# item assignment
|
||||
# slice assignment
|
||||
# extended slice assignment?
|
||||
# __contains__ with simple int arg
|
||||
# __contains__ with another bytes arg?
|
||||
# find/index? (int or bytes arg?)
|
||||
# count? (int arg)
|
||||
# concatenation (+)
|
||||
# repeat?
|
||||
# extend?
|
||||
# append?
|
||||
# insert?
|
||||
# pop?
|
||||
# __reversed__?
|
||||
# reverse? (inplace)
|
||||
# NOT sort!
|
||||
def test_buffer_api(self):
|
||||
short_sample = "Hello world\n"
|
||||
sample = short_sample + "x"*(20 - len(short_sample))
|
||||
tfn = tempfile.mktemp()
|
||||
try:
|
||||
# Prepare
|
||||
with open(tfn, "wb") as f:
|
||||
f.write(short_sample)
|
||||
# Test readinto
|
||||
with open(tfn, "rb") as f:
|
||||
b = bytes([ord('x')]*20)
|
||||
n = f.readinto(b)
|
||||
self.assertEqual(n, len(short_sample))
|
||||
self.assertEqual(list(b), map(ord, sample))
|
||||
# Test writing in binary mode
|
||||
with open(tfn, "wb") as f:
|
||||
f.write(b)
|
||||
with open(tfn, "rb") as f:
|
||||
self.assertEqual(f.read(), sample)
|
||||
# Test writing in text mode
|
||||
with open(tfn, "w") as f:
|
||||
f.write(b)
|
||||
with open(tfn, "r") as f:
|
||||
self.assertEqual(f.read(), sample)
|
||||
# Can't use readinto in text mode
|
||||
with open(tfn, "r") as f:
|
||||
self.assertRaises(TypeError, f.readinto, b)
|
||||
finally:
|
||||
try:
|
||||
os.remove(tfn)
|
||||
except os.error:
|
||||
pass
|
||||
|
||||
def test_reversed(self):
|
||||
input = map(ord, "Hello")
|
||||
b = bytes(input)
|
||||
output = list(reversed(b))
|
||||
input.reverse()
|
||||
self.assertEqual(output, input)
|
||||
|
||||
def test_getslice(self):
|
||||
def by(s):
|
||||
return bytes(map(ord, s))
|
||||
b = by("Hello, world")
|
||||
|
||||
self.assertEqual(b[:5], by("Hello"))
|
||||
self.assertEqual(b[1:5], by("ello"))
|
||||
self.assertEqual(b[5:7], by(", "))
|
||||
self.assertEqual(b[7:], by("world"))
|
||||
self.assertEqual(b[7:12], by("world"))
|
||||
self.assertEqual(b[7:100], by("world"))
|
||||
|
||||
self.assertEqual(b[:-7], by("Hello"))
|
||||
self.assertEqual(b[-11:-7], by("ello"))
|
||||
self.assertEqual(b[-7:-5], by(", "))
|
||||
self.assertEqual(b[-5:], by("world"))
|
||||
self.assertEqual(b[-5:12], by("world"))
|
||||
self.assertEqual(b[-5:100], by("world"))
|
||||
self.assertEqual(b[-100:5], by("Hello"))
|
||||
|
||||
def test_regexps(self):
|
||||
def by(s):
|
||||
return bytes(map(ord, s))
|
||||
b = by("Hello, world")
|
||||
self.assertEqual(re.findall(r"\w+", b), [by("Hello"), by("world")])
|
||||
|
||||
def test_setitem(self):
|
||||
b = bytes([1, 2, 3])
|
||||
b[1] = 100
|
||||
self.assertEqual(b, bytes([1, 100, 3]))
|
||||
b[-1] = 200
|
||||
self.assertEqual(b, bytes([1, 100, 200]))
|
||||
class C:
|
||||
def __init__(self, i=0):
|
||||
self.i = i
|
||||
def __index__(self):
|
||||
return self.i
|
||||
b[0] = C(10)
|
||||
self.assertEqual(b, bytes([10, 100, 200]))
|
||||
try:
|
||||
b[3] = 0
|
||||
self.fail("Didn't raise IndexError")
|
||||
except IndexError:
|
||||
pass
|
||||
try:
|
||||
b[-10] = 0
|
||||
self.fail("Didn't raise IndexError")
|
||||
except IndexError:
|
||||
pass
|
||||
try:
|
||||
b[0] = 256
|
||||
self.fail("Didn't raise ValueError")
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
b[0] = C(-1)
|
||||
self.fail("Didn't raise ValueError")
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
b[0] = None
|
||||
self.fail("Didn't raise TypeError")
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
def test_delitem(self):
|
||||
b = bytes(range(10))
|
||||
del b[0]
|
||||
self.assertEqual(b, bytes(range(1, 10)))
|
||||
del b[-1]
|
||||
self.assertEqual(b, bytes(range(1, 9)))
|
||||
del b[4]
|
||||
self.assertEqual(b, bytes([1, 2, 3, 4, 6, 7, 8]))
|
||||
|
||||
def test_setslice(self):
|
||||
b = bytes(range(10))
|
||||
self.assertEqual(list(b), list(range(10)))
|
||||
|
||||
b[0:5] = bytes([1, 1, 1, 1, 1])
|
||||
self.assertEqual(b, bytes([1, 1, 1, 1, 1, 5, 6, 7, 8, 9]))
|
||||
|
||||
del b[0:-5]
|
||||
self.assertEqual(b, bytes([5, 6, 7, 8, 9]))
|
||||
|
||||
b[0:0] = bytes([0, 1, 2, 3, 4])
|
||||
self.assertEqual(b, bytes(range(10)))
|
||||
|
||||
b[-7:-3] = bytes([100, 101])
|
||||
self.assertEqual(b, bytes([0, 1, 2, 100, 101, 7, 8, 9]))
|
||||
|
||||
b[3:5] = [3, 4, 5, 6]
|
||||
self.assertEqual(b, bytes(range(10)))
|
||||
|
||||
def test_setslice_trap(self):
|
||||
# This test verifies that we correctly handle assigning self
|
||||
# to a slice of self (the old Lambert Meertens trap).
|
||||
b = bytes(range(256))
|
||||
b[8:] = b
|
||||
self.assertEqual(b, bytes(list(range(8)) + list(range(256))))
|
||||
|
||||
def test_encoding(self):
|
||||
sample = u"Hello world\n\u1234\u5678\u9abc\udef0"
|
||||
for enc in ("utf8", "utf16"):
|
||||
b = bytes(sample, enc)
|
||||
self.assertEqual(b, bytes(map(ord, sample.encode(enc))))
|
||||
self.assertRaises(UnicodeEncodeError, bytes, sample, "latin1")
|
||||
b = bytes(sample, "latin1", "ignore")
|
||||
self.assertEqual(b, bytes(sample[:-4]))
|
||||
|
||||
def test_decode(self):
|
||||
sample = u"Hello world\n\u1234\u5678\u9abc\def0\def0"
|
||||
for enc in ("utf8", "utf16"):
|
||||
b = bytes(sample, enc)
|
||||
self.assertEqual(b.decode(enc), sample)
|
||||
sample = u"Hello world\n\x80\x81\xfe\xff"
|
||||
b = bytes(sample, "latin1")
|
||||
self.assertRaises(UnicodeDecodeError, b.decode, "utf8")
|
||||
self.assertEqual(b.decode("utf8", "ignore"), "Hello world\n")
|
||||
|
||||
def test_from_buffer(self):
|
||||
sample = "Hello world\n\x80\x81\xfe\xff"
|
||||
buf = buffer(sample)
|
||||
b = bytes(buf)
|
||||
self.assertEqual(b, bytes(map(ord, sample)))
|
||||
|
||||
def test_to_str(self):
|
||||
sample = "Hello world\n\x80\x81\xfe\xff"
|
||||
b = bytes(sample)
|
||||
self.assertEqual(str(b), sample)
|
||||
|
||||
def test_from_int(self):
|
||||
b = bytes(0)
|
||||
self.assertEqual(b, bytes())
|
||||
b = bytes(10)
|
||||
self.assertEqual(b, bytes([0]*10))
|
||||
b = bytes(10000)
|
||||
self.assertEqual(b, bytes([0]*10000))
|
||||
|
||||
def test_concat(self):
|
||||
b1 = bytes("abc")
|
||||
b2 = bytes("def")
|
||||
self.assertEqual(b1 + b2, bytes("abcdef"))
|
||||
self.assertRaises(TypeError, lambda: b1 + "def")
|
||||
self.assertRaises(TypeError, lambda: "abc" + b2)
|
||||
|
||||
def test_repeat(self):
|
||||
b = bytes("abc")
|
||||
self.assertEqual(b * 3, bytes("abcabcabc"))
|
||||
self.assertEqual(b * 0, bytes())
|
||||
self.assertEqual(b * -1, bytes())
|
||||
self.assertRaises(TypeError, lambda: b * 3.14)
|
||||
self.assertRaises(TypeError, lambda: 3.14 * b)
|
||||
self.assertRaises(MemoryError, lambda: b * sys.maxint)
|
||||
self.assertEqual(bytes('x')*100, bytes('x'*100))
|
||||
|
||||
# Optimizations:
|
||||
# __iter__? (optimization)
|
||||
# __str__? (could return "".join(map(chr, self))
|
||||
# decode
|
||||
# buffer API
|
||||
# check that regexp searches work
|
||||
# (I suppose re.sub() returns a string)
|
||||
# file.readinto
|
||||
# file.write
|
||||
# __reversed__? (optimization)
|
||||
|
||||
# XXX Some list methods?
|
||||
# extended slicing
|
||||
# extended slice assignment
|
||||
# extend (same as b[len(b):] = src)
|
||||
# reverse (in-place)
|
||||
# remove
|
||||
# pop
|
||||
# NOT sort!
|
||||
# With int arg:
|
||||
# __contains__
|
||||
# index
|
||||
# count
|
||||
# append
|
||||
# insert
|
||||
|
||||
# XXX Some string methods? (Those that don't use character properties)
|
||||
# startswith
|
||||
# endswidth
|
||||
# find, rfind
|
||||
# __contains__ (bytes arg)
|
||||
# index, rindex (bytes arg)
|
||||
# join
|
||||
# replace
|
||||
# translate
|
||||
# split, rsplit
|
||||
# lstrip, rstrip, strip??
|
||||
|
||||
# XXX pickle and marshal support?
|
||||
|
||||
|
||||
def test_main():
|
||||
|
@ -137,5 +337,5 @@ def test_main():
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
##test_main()
|
||||
unittest.main()
|
||||
test_main()
|
||||
##unittest.main()
|
||||
|
|
|
@ -67,6 +67,17 @@ n = f.readinto(a)
|
|||
f.close()
|
||||
verify(buf == a.tostring()[:n])
|
||||
|
||||
# verify readinto refuses text files
|
||||
a = array('c', 'x'*10)
|
||||
f = open(TESTFN, 'r')
|
||||
try:
|
||||
f.readinto(a)
|
||||
raise TestFailed("readinto shouldn't work in text mode")
|
||||
except TypeError:
|
||||
pass
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
# verify writelines with integers
|
||||
f = open(TESTFN, 'wb')
|
||||
try:
|
||||
|
@ -261,13 +272,13 @@ methods = [("readline", ()), ("read", ()), ("readlines", ()),
|
|||
|
||||
try:
|
||||
# Prepare the testfile
|
||||
bag = open(TESTFN, "w")
|
||||
bag = open(TESTFN, "wb")
|
||||
bag.write(filler * nchunks)
|
||||
bag.writelines(testlines)
|
||||
bag.close()
|
||||
# Test for appropriate errors mixing read* and iteration
|
||||
for methodname, args in methods:
|
||||
f = open(TESTFN)
|
||||
f = open(TESTFN, 'rb')
|
||||
if f.next() != filler:
|
||||
raise TestFailed, "Broken testfile"
|
||||
meth = getattr(f, methodname)
|
||||
|
@ -286,7 +297,7 @@ try:
|
|||
# Each line in the bag o' ham is 4 bytes ("h", "a", "m", "\n"), so
|
||||
# 4096 lines of that should get us exactly on the buffer boundary for
|
||||
# any power-of-2 buffersize between 4 and 16384 (inclusive).
|
||||
f = open(TESTFN)
|
||||
f = open(TESTFN, 'rb')
|
||||
for i in range(nchunks):
|
||||
f.next()
|
||||
testline = testlines.pop(0)
|
||||
|
@ -328,7 +339,7 @@ try:
|
|||
raise TestFailed("readlines() after next() with empty buffer "
|
||||
"failed. Got %r, expected %r" % (line, testline))
|
||||
# Reading after iteration hit EOF shouldn't hurt either
|
||||
f = open(TESTFN)
|
||||
f = open(TESTFN, 'rb')
|
||||
try:
|
||||
for line in f:
|
||||
pass
|
||||
|
|
|
@ -8,27 +8,34 @@
|
|||
/* Direct API functions */
|
||||
|
||||
PyObject *
|
||||
PyBytes_FromStringAndSize(const char *sval, Py_ssize_t size)
|
||||
PyBytes_FromObject(PyObject *input)
|
||||
{
|
||||
return PyObject_CallFunctionObjArgs((PyObject *)&PyBytes_Type,
|
||||
input, NULL);
|
||||
}
|
||||
|
||||
PyObject *
|
||||
PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
|
||||
{
|
||||
PyBytesObject *new;
|
||||
|
||||
if (size != 0) {
|
||||
assert(sval != NULL);
|
||||
assert(size > 0);
|
||||
}
|
||||
assert(size >= 0);
|
||||
|
||||
new = PyObject_New(PyBytesObject, &PyBytes_Type);
|
||||
if (new == NULL)
|
||||
return NULL;
|
||||
return NULL;
|
||||
|
||||
if (size > 0) {
|
||||
new->ob_sval = PyMem_Malloc(size);
|
||||
if (new->ob_sval == NULL) {
|
||||
Py_DECREF(new);
|
||||
return NULL;
|
||||
}
|
||||
memcpy(new->ob_sval, sval, size);
|
||||
new->ob_size = size;
|
||||
new->ob_size = size;
|
||||
if (size == 0)
|
||||
new->ob_bytes = NULL;
|
||||
else {
|
||||
new->ob_bytes = PyMem_Malloc(size);
|
||||
if (new->ob_bytes == NULL) {
|
||||
Py_DECREF(new);
|
||||
return NULL;
|
||||
}
|
||||
if (bytes != NULL)
|
||||
memcpy(new->ob_bytes, bytes, size);
|
||||
}
|
||||
|
||||
return (PyObject *)new;
|
||||
|
@ -49,7 +56,7 @@ PyBytes_AsString(PyObject *self)
|
|||
assert(self != NULL);
|
||||
assert(PyBytes_Check(self));
|
||||
|
||||
return ((PyBytesObject *)self)->ob_sval;
|
||||
return ((PyBytesObject *)self)->ob_bytes;
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -61,13 +68,13 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
|
|||
assert(PyBytes_Check(self));
|
||||
assert(size >= 0);
|
||||
|
||||
sval = PyMem_Realloc(((PyBytesObject *)self)->ob_sval, size);
|
||||
sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, size);
|
||||
if (sval == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
|
||||
((PyBytesObject *)self)->ob_sval = sval;
|
||||
((PyBytesObject *)self)->ob_bytes = sval;
|
||||
((PyBytesObject *)self)->ob_size = size;
|
||||
|
||||
return 0;
|
||||
|
@ -81,16 +88,179 @@ bytes_length(PyBytesObject *self)
|
|||
return self->ob_size;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
bytes_concat(PyBytesObject *self, PyObject *other)
|
||||
{
|
||||
PyBytesObject *result;
|
||||
Py_ssize_t mysize;
|
||||
Py_ssize_t size;
|
||||
|
||||
if (!PyBytes_Check(other)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"can't concat bytes to %.100s", other->ob_type->tp_name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
mysize = self->ob_size;
|
||||
size = mysize + ((PyBytesObject *)other)->ob_size;
|
||||
if (size < 0)
|
||||
return PyErr_NoMemory();
|
||||
result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
|
||||
if (result != NULL) {
|
||||
memcpy(result->ob_bytes, self->ob_bytes, self->ob_size);
|
||||
memcpy(result->ob_bytes + self->ob_size,
|
||||
((PyBytesObject *)other)->ob_bytes,
|
||||
((PyBytesObject *)other)->ob_size);
|
||||
}
|
||||
return (PyObject *)result;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
bytes_repeat(PyBytesObject *self, Py_ssize_t count)
|
||||
{
|
||||
PyBytesObject *result;
|
||||
Py_ssize_t mysize;
|
||||
Py_ssize_t size;
|
||||
|
||||
if (count < 0)
|
||||
count = 0;
|
||||
mysize = self->ob_size;
|
||||
size = mysize * count;
|
||||
if (count != 0 && size / count != mysize)
|
||||
return PyErr_NoMemory();
|
||||
result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
|
||||
if (result != NULL && size != 0) {
|
||||
if (mysize == 1)
|
||||
memset(result->ob_bytes, self->ob_bytes[0], size);
|
||||
else {
|
||||
int i;
|
||||
for (i = 0; i < count; i++)
|
||||
memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
|
||||
}
|
||||
}
|
||||
return (PyObject *)result;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
bytes_getitem(PyBytesObject *self, Py_ssize_t i)
|
||||
{
|
||||
if (i < 0)
|
||||
i += self->ob_size;
|
||||
i += self->ob_size;
|
||||
if (i < 0 || i >= self->ob_size) {
|
||||
PyErr_SetString(PyExc_IndexError, "bytes index out of range");
|
||||
return NULL;
|
||||
PyErr_SetString(PyExc_IndexError, "bytes index out of range");
|
||||
return NULL;
|
||||
}
|
||||
return PyInt_FromLong((unsigned char)(self->ob_sval[i]));
|
||||
return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
bytes_getslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi)
|
||||
{
|
||||
if (lo < 0)
|
||||
lo = 0;
|
||||
if (hi > self->ob_size)
|
||||
hi = self->ob_size;
|
||||
if (lo >= hi)
|
||||
lo = hi = 0;
|
||||
return PyBytes_FromStringAndSize(self->ob_bytes + lo, hi - lo);
|
||||
}
|
||||
|
||||
static int
|
||||
bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
|
||||
PyObject *values)
|
||||
{
|
||||
int avail;
|
||||
int needed;
|
||||
char *bytes;
|
||||
|
||||
if (values == NULL) {
|
||||
bytes = NULL;
|
||||
needed = 0;
|
||||
}
|
||||
else if (values == (PyObject *)self || !PyBytes_Check(values)) {
|
||||
/* Make a copy an call this function recursively */
|
||||
int err;
|
||||
values = PyBytes_FromObject(values);
|
||||
if (values == NULL)
|
||||
return -1;
|
||||
err = bytes_setslice(self, lo, hi, values);
|
||||
Py_DECREF(values);
|
||||
return err;
|
||||
}
|
||||
else {
|
||||
assert(PyBytes_Check(values));
|
||||
bytes = ((PyBytesObject *)values)->ob_bytes;
|
||||
needed = ((PyBytesObject *)values)->ob_size;
|
||||
}
|
||||
|
||||
if (lo < 0)
|
||||
lo = 0;
|
||||
if (hi > self->ob_size)
|
||||
hi = self->ob_size;
|
||||
|
||||
avail = hi - lo;
|
||||
if (avail < 0)
|
||||
lo = hi = avail = 0;
|
||||
|
||||
if (avail != needed) {
|
||||
if (avail > needed) {
|
||||
/*
|
||||
0 lo hi old_size
|
||||
| |<----avail----->|<-----tomove------>|
|
||||
| |<-needed->|<-----tomove------>|
|
||||
0 lo new_hi new_size
|
||||
*/
|
||||
memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
|
||||
self->ob_size - hi);
|
||||
}
|
||||
if (PyBytes_Resize((PyObject *)self,
|
||||
self->ob_size + needed - avail) < 0)
|
||||
return -1;
|
||||
if (avail < needed) {
|
||||
/*
|
||||
0 lo hi old_size
|
||||
| |<-avail->|<-----tomove------>|
|
||||
| |<----needed---->|<-----tomove------>|
|
||||
0 lo new_hi new_size
|
||||
*/
|
||||
memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
|
||||
self->ob_size - lo - needed);
|
||||
}
|
||||
}
|
||||
|
||||
if (needed > 0)
|
||||
memcpy(self->ob_bytes + lo, bytes, needed);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
bytes_setitem(PyBytesObject *self, Py_ssize_t i, PyObject *value)
|
||||
{
|
||||
Py_ssize_t ival;
|
||||
|
||||
if (i < 0)
|
||||
i += self->ob_size;
|
||||
|
||||
if (i < 0 || i >= self->ob_size) {
|
||||
PyErr_SetString(PyExc_IndexError, "bytes index out of range");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (value == NULL)
|
||||
return bytes_setslice(self, i, i+1, NULL);
|
||||
|
||||
ival = PyNumber_Index(value);
|
||||
if (ival == -1 && PyErr_Occurred())
|
||||
return -1;
|
||||
|
||||
if (ival < 0 || ival >= 256) {
|
||||
PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
|
||||
return -1;
|
||||
}
|
||||
|
||||
self->ob_bytes[i] = ival;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long
|
||||
|
@ -103,69 +273,138 @@ bytes_nohash(PyObject *self)
|
|||
static int
|
||||
bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
static char *kwlist[] = {"sequence", 0};
|
||||
static char *kwlist[] = {"source", "encoding", "errors", 0};
|
||||
PyObject *arg = NULL;
|
||||
PyObject *it; /* iter(arg) */
|
||||
const char *encoding = NULL;
|
||||
const char *errors = NULL;
|
||||
Py_ssize_t count;
|
||||
PyObject *it;
|
||||
PyObject *(*iternext)(PyObject *);
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:bytes", kwlist, &arg))
|
||||
return -1;
|
||||
/* Empty previous contents (yes, do this first of all!) */
|
||||
if (PyBytes_Resize((PyObject *)self, 0) < 0)
|
||||
return -1;
|
||||
|
||||
/* Verify list invariants established by PyType_GenericAlloc() */
|
||||
if (self->ob_size != 0) {
|
||||
assert(self->ob_sval != NULL);
|
||||
assert(self->ob_size > 0);
|
||||
/* Parse arguments */
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
|
||||
&arg, &encoding, &errors))
|
||||
return -1;
|
||||
|
||||
/* Make a quick exit if no first argument */
|
||||
if (arg == NULL) {
|
||||
if (encoding != NULL || errors != NULL) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"encoding or errors without sequence argument");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Empty previous contents */
|
||||
if (PyBytes_Resize((PyObject *)self, 0) < 0)
|
||||
return -1;
|
||||
if (PyUnicode_Check(arg)) {
|
||||
/* Encode via the codec registry */
|
||||
PyObject *encoded;
|
||||
char *bytes;
|
||||
Py_ssize_t size;
|
||||
if (encoding == NULL)
|
||||
encoding = PyUnicode_GetDefaultEncoding();
|
||||
encoded = PyCodec_Encode(arg, encoding, errors);
|
||||
if (encoded == NULL)
|
||||
return -1;
|
||||
if (!PyString_Check(encoded)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"encoder did not return a string object (type=%.400s)",
|
||||
encoded->ob_type->tp_name);
|
||||
Py_DECREF(encoded);
|
||||
return -1;
|
||||
}
|
||||
bytes = PyString_AS_STRING(encoded);
|
||||
size = PyString_GET_SIZE(encoded);
|
||||
if (PyBytes_Resize((PyObject *)self, size) < 0) {
|
||||
Py_DECREF(encoded);
|
||||
return -1;
|
||||
}
|
||||
memcpy(self->ob_bytes, bytes, size);
|
||||
Py_DECREF(encoded);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Quick check if we're done */
|
||||
if (arg == 0)
|
||||
return 0;
|
||||
/* If it's not unicode, there can't be encoding or errors */
|
||||
if (encoding != NULL || errors != NULL) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"encoding or errors without a string argument");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* XXX Optimize this if the arguments is a list, tuple, or bytes */
|
||||
/* Is it an int? */
|
||||
count = PyNumber_Index(arg);
|
||||
if (count == -1 && PyErr_Occurred())
|
||||
PyErr_Clear();
|
||||
else {
|
||||
if (count < 0) {
|
||||
PyErr_SetString(PyExc_ValueError, "negative count");
|
||||
return -1;
|
||||
}
|
||||
if (count > 0) {
|
||||
if (PyBytes_Resize((PyObject *)self, count))
|
||||
return -1;
|
||||
memset(self->ob_bytes, 0, count);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (PyObject_CheckReadBuffer(arg)) {
|
||||
const void *bytes;
|
||||
Py_ssize_t size;
|
||||
if (PyObject_AsReadBuffer(arg, &bytes, &size) < 0)
|
||||
return -1;
|
||||
if (PyBytes_Resize((PyObject *)self, size) < 0)
|
||||
return -1;
|
||||
memcpy(self->ob_bytes, bytes, size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* XXX Optimize this if the arguments is a list, tuple */
|
||||
|
||||
/* Get the iterator */
|
||||
it = PyObject_GetIter(arg);
|
||||
if (it == NULL)
|
||||
return -1;
|
||||
return -1;
|
||||
iternext = *it->ob_type->tp_iternext;
|
||||
|
||||
/* Run the iterator to exhaustion */
|
||||
for (;;) {
|
||||
PyObject *item;
|
||||
Py_ssize_t value;
|
||||
PyObject *item;
|
||||
Py_ssize_t value;
|
||||
|
||||
/* Get the next item */
|
||||
item = iternext(it);
|
||||
if (item == NULL) {
|
||||
if (PyErr_Occurred()) {
|
||||
if (!PyErr_ExceptionMatches(PyExc_StopIteration))
|
||||
goto error;
|
||||
PyErr_Clear();
|
||||
}
|
||||
break;
|
||||
}
|
||||
/* Get the next item */
|
||||
item = iternext(it);
|
||||
if (item == NULL) {
|
||||
if (PyErr_Occurred()) {
|
||||
if (!PyErr_ExceptionMatches(PyExc_StopIteration))
|
||||
goto error;
|
||||
PyErr_Clear();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Interpret it as an int (__index__) */
|
||||
value = PyNumber_Index(item);
|
||||
Py_DECREF(item);
|
||||
if (value == -1 && PyErr_Occurred())
|
||||
goto error;
|
||||
/* Interpret it as an int (__index__) */
|
||||
value = PyNumber_Index(item);
|
||||
Py_DECREF(item);
|
||||
if (value == -1 && PyErr_Occurred())
|
||||
goto error;
|
||||
|
||||
/* Range check */
|
||||
if (value < 0 || value >= 256) {
|
||||
PyErr_SetString(PyExc_ValueError, "bytes must be in range(0, 256)");
|
||||
goto error;
|
||||
}
|
||||
/* Range check */
|
||||
if (value < 0 || value >= 256) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"bytes must be in range(0, 256)");
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Append the byte */
|
||||
/* XXX Speed this up */
|
||||
if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
|
||||
goto error;
|
||||
self->ob_sval[self->ob_size-1] = value;
|
||||
/* Append the byte */
|
||||
/* XXX Speed this up */
|
||||
if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
|
||||
goto error;
|
||||
self->ob_bytes[self->ob_size-1] = value;
|
||||
}
|
||||
|
||||
/* Clean up and return success */
|
||||
|
@ -188,45 +427,45 @@ bytes_repr(PyBytesObject *self)
|
|||
int i;
|
||||
|
||||
if (self->ob_size == 0)
|
||||
return PyString_FromString("bytes()");
|
||||
return PyString_FromString("bytes()");
|
||||
|
||||
list = PyList_New(0);
|
||||
if (list == NULL)
|
||||
return NULL;
|
||||
return NULL;
|
||||
|
||||
str = PyString_FromString("bytes([");
|
||||
if (str == NULL)
|
||||
goto error;
|
||||
goto error;
|
||||
|
||||
err = PyList_Append(list, str);
|
||||
Py_DECREF(str);
|
||||
if (err < 0)
|
||||
goto error;
|
||||
goto error;
|
||||
|
||||
for (i = 0; i < self->ob_size; i++) {
|
||||
char buffer[20];
|
||||
sprintf(buffer, ", 0x%02x", (unsigned char) (self->ob_sval[i]));
|
||||
str = PyString_FromString((i == 0) ? buffer+2 : buffer);
|
||||
if (str == NULL)
|
||||
goto error;
|
||||
err = PyList_Append(list, str);
|
||||
Py_DECREF(str);
|
||||
if (err < 0)
|
||||
goto error;
|
||||
char buffer[20];
|
||||
sprintf(buffer, ", 0x%02x", (unsigned char) (self->ob_bytes[i]));
|
||||
str = PyString_FromString((i == 0) ? buffer+2 : buffer);
|
||||
if (str == NULL)
|
||||
goto error;
|
||||
err = PyList_Append(list, str);
|
||||
Py_DECREF(str);
|
||||
if (err < 0)
|
||||
goto error;
|
||||
}
|
||||
|
||||
str = PyString_FromString("])");
|
||||
if (str == NULL)
|
||||
goto error;
|
||||
goto error;
|
||||
|
||||
err = PyList_Append(list, str);
|
||||
Py_DECREF(str);
|
||||
if (err < 0)
|
||||
goto error;
|
||||
goto error;
|
||||
|
||||
str = PyString_FromString("");
|
||||
if (str == NULL)
|
||||
goto error;
|
||||
goto error;
|
||||
|
||||
result = _PyString_Join(str, list);
|
||||
Py_DECREF(str);
|
||||
|
@ -239,6 +478,12 @@ bytes_repr(PyBytesObject *self)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
bytes_str(PyBytesObject *self)
|
||||
{
|
||||
return PyString_FromStringAndSize(self->ob_bytes, self->ob_size);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
bytes_richcompare(PyBytesObject *self, PyBytesObject *other, int op)
|
||||
{
|
||||
|
@ -247,37 +492,37 @@ bytes_richcompare(PyBytesObject *self, PyBytesObject *other, int op)
|
|||
int cmp;
|
||||
|
||||
if (!PyBytes_Check(self) || !PyBytes_Check(other)) {
|
||||
Py_INCREF(Py_NotImplemented);
|
||||
return Py_NotImplemented;
|
||||
Py_INCREF(Py_NotImplemented);
|
||||
return Py_NotImplemented;
|
||||
}
|
||||
|
||||
if (self->ob_size != other->ob_size && (op == Py_EQ || op == Py_NE)) {
|
||||
/* Shortcut: if the lengths differ, the objects differ */
|
||||
cmp = (op == Py_NE);
|
||||
/* Shortcut: if the lengths differ, the objects differ */
|
||||
cmp = (op == Py_NE);
|
||||
}
|
||||
else {
|
||||
minsize = self->ob_size;
|
||||
if (other->ob_size < minsize)
|
||||
minsize = other->ob_size;
|
||||
minsize = self->ob_size;
|
||||
if (other->ob_size < minsize)
|
||||
minsize = other->ob_size;
|
||||
|
||||
cmp = memcmp(self->ob_sval, other->ob_sval, minsize);
|
||||
/* In ISO C, memcmp() guarantees to use unsigned bytes! */
|
||||
cmp = memcmp(self->ob_bytes, other->ob_bytes, minsize);
|
||||
/* In ISO C, memcmp() guarantees to use unsigned bytes! */
|
||||
|
||||
if (cmp == 0) {
|
||||
if (self->ob_size < other->ob_size)
|
||||
cmp = -1;
|
||||
else if (self->ob_size > other->ob_size)
|
||||
cmp = 1;
|
||||
}
|
||||
if (cmp == 0) {
|
||||
if (self->ob_size < other->ob_size)
|
||||
cmp = -1;
|
||||
else if (self->ob_size > other->ob_size)
|
||||
cmp = 1;
|
||||
}
|
||||
|
||||
switch (op) {
|
||||
case Py_LT: cmp = cmp < 0; break;
|
||||
case Py_LE: cmp = cmp <= 0; break;
|
||||
case Py_EQ: cmp = cmp == 0; break;
|
||||
case Py_NE: cmp = cmp != 0; break;
|
||||
case Py_GT: cmp = cmp > 0; break;
|
||||
case Py_GE: cmp = cmp >= 0; break;
|
||||
}
|
||||
switch (op) {
|
||||
case Py_LT: cmp = cmp < 0; break;
|
||||
case Py_LE: cmp = cmp <= 0; break;
|
||||
case Py_EQ: cmp = cmp == 0; break;
|
||||
case Py_NE: cmp = cmp != 0; break;
|
||||
case Py_GT: cmp = cmp > 0; break;
|
||||
case Py_GE: cmp = cmp >= 0; break;
|
||||
}
|
||||
}
|
||||
|
||||
res = cmp ? Py_True : Py_False;
|
||||
|
@ -288,41 +533,89 @@ bytes_richcompare(PyBytesObject *self, PyBytesObject *other, int op)
|
|||
static void
|
||||
bytes_dealloc(PyBytesObject *self)
|
||||
{
|
||||
if (self->ob_sval != 0) {
|
||||
PyMem_Free(self->ob_sval);
|
||||
if (self->ob_bytes != 0) {
|
||||
PyMem_Free(self->ob_bytes);
|
||||
}
|
||||
self->ob_type->tp_free((PyObject *)self);
|
||||
}
|
||||
|
||||
static Py_ssize_t
|
||||
bytes_getbuffer(PyBytesObject *self, Py_ssize_t index, const void **ptr)
|
||||
{
|
||||
if (index != 0) {
|
||||
PyErr_SetString(PyExc_SystemError,
|
||||
"accessing non-existent string segment");
|
||||
return -1;
|
||||
}
|
||||
*ptr = (void *)self->ob_bytes;
|
||||
return self->ob_size;
|
||||
}
|
||||
|
||||
static Py_ssize_t
|
||||
bytes_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
|
||||
{
|
||||
if (lenp)
|
||||
*lenp = self->ob_size;
|
||||
return 1;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(decode_doc,
|
||||
"B.decode([encoding[,errors]]) -> unicode obect.\n\
|
||||
\n\
|
||||
Decodes B using the codec registered for encoding. encoding defaults\n\
|
||||
to the default encoding. errors may be given to set a different error\n\
|
||||
handling scheme. Default is 'strict' meaning that encoding errors raise\n\
|
||||
a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
|
||||
as well as any other name registerd with codecs.register_error that is\n\
|
||||
able to handle UnicodeDecodeErrors.");
|
||||
|
||||
static PyObject *
|
||||
bytes_decode(PyObject *self, PyObject *args)
|
||||
{
|
||||
const char *encoding = NULL;
|
||||
const char *errors = NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
|
||||
return NULL;
|
||||
if (encoding == NULL)
|
||||
encoding = PyUnicode_GetDefaultEncoding();
|
||||
return PyCodec_Decode(self, encoding, errors);
|
||||
}
|
||||
|
||||
static PySequenceMethods bytes_as_sequence = {
|
||||
(lenfunc)bytes_length, /*sq_length*/
|
||||
(binaryfunc)0, /*sq_concat*/
|
||||
(ssizeargfunc)0, /*sq_repeat*/
|
||||
(ssizeargfunc)bytes_getitem, /*sq_item*/
|
||||
(ssizessizeargfunc)0, /*sq_slice*/
|
||||
0, /*sq_ass_item*/
|
||||
0, /*sq_ass_slice*/
|
||||
(objobjproc)0, /*sq_contains*/
|
||||
(lenfunc)bytes_length, /*sq_length*/
|
||||
(binaryfunc)bytes_concat, /*sq_concat*/
|
||||
(ssizeargfunc)bytes_repeat, /*sq_repeat*/
|
||||
(ssizeargfunc)bytes_getitem, /*sq_item*/
|
||||
(ssizessizeargfunc)bytes_getslice, /*sq_slice*/
|
||||
(ssizeobjargproc)bytes_setitem, /*sq_ass_item*/
|
||||
(ssizessizeobjargproc)bytes_setslice, /* sq_ass_slice */
|
||||
#if 0
|
||||
(objobjproc)bytes_contains, /* sq_contains */
|
||||
(binaryfunc)bytes_inplace_concat, /* sq_inplace_concat */
|
||||
(ssizeargfunc)bytes_inplace_repeat, /* sq_inplace_repeat */
|
||||
#endif
|
||||
};
|
||||
|
||||
static PyMappingMethods bytes_as_mapping = {
|
||||
(lenfunc)bytes_length,
|
||||
(binaryfunc)0,
|
||||
0,
|
||||
(lenfunc)bytes_length,
|
||||
(binaryfunc)0,
|
||||
0,
|
||||
};
|
||||
|
||||
static PyBufferProcs bytes_as_buffer = {
|
||||
/*
|
||||
(readbufferproc)bytes_buffer_getreadbuf,
|
||||
(writebufferproc)bytes_buffer_getwritebuf,
|
||||
(segcountproc)bytes_buffer_getsegcount,
|
||||
(charbufferproc)bytes_buffer_getcharbuf,
|
||||
*/
|
||||
(readbufferproc)bytes_getbuffer,
|
||||
(writebufferproc)bytes_getbuffer,
|
||||
(segcountproc)bytes_getsegcount,
|
||||
/* XXX Bytes are not characters! But we need to implement
|
||||
bf_getcharbuffer() so we can be used as 't#' argument to codecs. */
|
||||
(charbufferproc)bytes_getbuffer,
|
||||
};
|
||||
|
||||
static PyMethodDef
|
||||
bytes_methods[] = {
|
||||
{NULL, NULL}
|
||||
{"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
||||
PyDoc_STRVAR(bytes_doc,
|
||||
|
@ -336,39 +629,40 @@ PyTypeObject PyBytes_Type = {
|
|||
"bytes",
|
||||
sizeof(PyBytesObject),
|
||||
0,
|
||||
(destructor)bytes_dealloc, /* tp_dealloc */
|
||||
0, /* tp_print */
|
||||
0, /* tp_getattr */
|
||||
0, /* tp_setattr */
|
||||
0, /* tp_compare */
|
||||
(reprfunc)bytes_repr, /* tp_repr */
|
||||
0, /* tp_as_number */
|
||||
&bytes_as_sequence, /* tp_as_sequence */
|
||||
&bytes_as_mapping, /* tp_as_mapping */
|
||||
bytes_nohash, /* tp_hash */
|
||||
0, /* tp_call */
|
||||
0, /* tp_str */
|
||||
PyObject_GenericGetAttr, /* tp_getattro */
|
||||
0, /* tp_setattro */
|
||||
&bytes_as_buffer, /* tp_as_buffer */
|
||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES, /* tp_flags */ /* bytes is 'final' or 'sealed' */
|
||||
bytes_doc, /* tp_doc */
|
||||
0, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
(richcmpfunc)bytes_richcompare, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
bytes_methods, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
0, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
(initproc)bytes_init, /* tp_init */
|
||||
PyType_GenericAlloc, /* tp_alloc */
|
||||
PyType_GenericNew, /* tp_new */
|
||||
PyObject_Del, /* tp_free */
|
||||
(destructor)bytes_dealloc, /* tp_dealloc */
|
||||
0, /* tp_print */
|
||||
0, /* tp_getattr */
|
||||
0, /* tp_setattr */
|
||||
0, /* tp_compare */
|
||||
(reprfunc)bytes_repr, /* tp_repr */
|
||||
0, /* tp_as_number */
|
||||
&bytes_as_sequence, /* tp_as_sequence */
|
||||
&bytes_as_mapping, /* tp_as_mapping */
|
||||
bytes_nohash, /* tp_hash */
|
||||
0, /* tp_call */
|
||||
(reprfunc)bytes_str, /* tp_str */
|
||||
PyObject_GenericGetAttr, /* tp_getattro */
|
||||
0, /* tp_setattro */
|
||||
&bytes_as_buffer, /* tp_as_buffer */
|
||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES, /* tp_flags */
|
||||
/* bytes is 'final' or 'sealed' */
|
||||
bytes_doc, /* tp_doc */
|
||||
0, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
(richcmpfunc)bytes_richcompare, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
bytes_methods, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
0, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
(initproc)bytes_init, /* tp_init */
|
||||
PyType_GenericAlloc, /* tp_alloc */
|
||||
PyType_GenericNew, /* tp_new */
|
||||
PyObject_Del, /* tp_free */
|
||||
};
|
||||
|
|
|
@ -880,6 +880,11 @@ file_readinto(PyFileObject *f, PyObject *args)
|
|||
|
||||
if (f->f_fp == NULL)
|
||||
return err_closed();
|
||||
if (!f->f_binary) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"readinto() requires binary mode");
|
||||
return NULL;
|
||||
}
|
||||
/* refuse to mix with f.next() */
|
||||
if (f->f_buf != NULL &&
|
||||
(f->f_bufend - f->f_bufptr) > 0 &&
|
||||
|
|
Loading…
Reference in New Issue