Commit strict str/bytes distinction.

From now on, trying to write str to a binary stream
is an error (I'm still working on the reverse).
There are still (at least) two failing tests:
- test_asynchat
- test_urllib2_localnet
but I'm sure these will be fixed by someone.
This commit is contained in:
Guido van Rossum 2007-08-29 04:05:57 +00:00
parent 245b42ec4b
commit a74184eb1d
3 changed files with 59 additions and 119 deletions

View File

@ -659,12 +659,14 @@ class BytesIO(BufferedIOBase):
def write(self, b):
if self.closed:
raise ValueError("write to closed file")
if isinstance(b, str):
raise TypeError("can't write str to binary stream")
n = len(b)
newpos = self._pos + n
if newpos > len(self._buffer):
# Inserts null bytes between the current end of the file
# and the new write position.
padding = '\x00' * (newpos - len(self._buffer) - n)
padding = b'\x00' * (newpos - len(self._buffer) - n)
self._buffer[self._pos:newpos - n] = padding
self._buffer[self._pos:newpos] = b
self._pos = newpos
@ -801,11 +803,8 @@ class BufferedWriter(_BufferedIOMixin):
def write(self, b):
if self.closed:
raise ValueError("write to closed file")
if not isinstance(b, bytes):
if hasattr(b, "__index__"):
raise TypeError("Can't write object of type %s" %
type(b).__name__)
b = bytes(b)
if isinstance(b, str):
raise TypeError("can't write str to binary stream")
# XXX we can implement some more tricks to try and avoid partial writes
if len(self._write_buf) > self.buffer_size:
# We're full, so let's pre-flush the buffer
@ -1099,8 +1098,6 @@ class TextIOWrapper(TextIOBase):
s = s.replace("\n", self._writenl)
# XXX What if we were just reading?
b = s.encode(self._encoding)
if isinstance(b, str):
b = bytes(b)
self.buffer.write(b)
if haslf and self.isatty():
self.flush()

View File

@ -82,7 +82,13 @@ _getbuffer(PyObject *obj, PyBuffer *view)
if (buffer == NULL ||
PyUnicode_Check(obj) ||
buffer->bf_getbuffer == NULL) return -1;
buffer->bf_getbuffer == NULL)
{
PyErr_Format(PyExc_TypeError,
"Type %.100s doesn't support the buffer API",
Py_Type(obj)->tp_name);
return -1;
}
if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
return -1;
@ -167,7 +173,7 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
else if (size < alloc) {
/* Within allocated size; quick exit */
Py_Size(self) = size;
((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
return 0;
}
else if (size <= alloc * 1.125) {
@ -181,10 +187,11 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
if (((PyBytesObject *)self)->ob_exports > 0) {
/*
fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports, ((PyBytesObject *)self)->ob_bytes);
fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports,
((PyBytesObject *)self)->ob_bytes);
*/
PyErr_SetString(PyExc_BufferError,
"Existing exports of data: object cannot be re-sized");
"Existing exports of data: object cannot be re-sized");
return -1;
}
@ -262,24 +269,24 @@ bytes_iconcat(PyBytesObject *self, PyObject *other)
PyBuffer vo;
if (_getbuffer(other, &vo) < 0) {
PyErr_Format(PyExc_TypeError,
"can't concat bytes to %.100s", Py_Type(self)->tp_name);
return NULL;
PyErr_Format(PyExc_TypeError, "can't concat bytes to %.100s",
Py_Type(self)->tp_name);
return NULL;
}
mysize = Py_Size(self);
size = mysize + vo.len;
if (size < 0) {
PyObject_ReleaseBuffer(other, &vo);
return PyErr_NoMemory();
PyObject_ReleaseBuffer(other, &vo);
return PyErr_NoMemory();
}
if (size < self->ob_alloc) {
Py_Size(self) = size;
self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
Py_Size(self) = size;
self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
}
else if (PyBytes_Resize((PyObject *)self, size) < 0) {
PyObject_ReleaseBuffer(other, &vo);
return NULL;
PyObject_ReleaseBuffer(other, &vo);
return NULL;
}
memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
PyObject_ReleaseBuffer(other, &vo);
@ -327,7 +334,7 @@ bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
return PyErr_NoMemory();
if (size < self->ob_alloc) {
Py_Size(self) = size;
self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
}
else if (PyBytes_Resize((PyObject *)self, size) < 0)
return NULL;
@ -507,7 +514,7 @@ bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
Py_Size(self) - hi);
}
/* XXX(nnorwitz): need to verify this can't overflow! */
/* XXX(nnorwitz): need to verify this can't overflow! */
if (PyBytes_Resize((PyObject *)self,
Py_Size(self) + needed - avail) < 0) {
res = -1;
@ -757,8 +764,11 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
if (PyUnicode_Check(arg)) {
/* Encode via the codec registry */
PyObject *encoded, *new;
if (encoding == NULL)
encoding = PyUnicode_GetDefaultEncoding();
if (encoding == NULL) {
PyErr_SetString(PyExc_TypeError,
"string argument without an encoding");
return -1;
}
encoded = PyCodec_Encode(arg, encoding, errors);
if (encoded == NULL)
return -1;
@ -769,12 +779,12 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
Py_DECREF(encoded);
return -1;
}
new = bytes_iconcat(self, encoded);
Py_DECREF(encoded);
if (new == NULL)
return -1;
Py_DECREF(new);
return 0;
new = bytes_iconcat(self, encoded);
Py_DECREF(encoded);
if (new == NULL)
return -1;
Py_DECREF(new);
return 0;
}
/* If it's not unicode, there can't be encoding or errors */
@ -954,12 +964,14 @@ bytes_richcompare(PyObject *self, PyObject *other, int op)
self_size = _getbuffer(self, &self_bytes);
if (self_size < 0) {
PyErr_Clear();
Py_INCREF(Py_NotImplemented);
return Py_NotImplemented;
}
other_size = _getbuffer(other, &other_bytes);
if (other_size < 0) {
PyErr_Clear();
PyObject_ReleaseBuffer(self, &self_bytes);
Py_INCREF(Py_NotImplemented);
return Py_NotImplemented;
@ -1061,10 +1073,11 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
sub_len = PyBytes_GET_SIZE(subobj);
}
/* XXX --> use the modern buffer interface */
else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) {
/* XXX - the "expected a character buffer object" is pretty
confusing for a non-expert. remap to something else ? */
return -2;
}
if (dir > 0)
return stringlib_find_slice(
@ -2021,49 +2034,24 @@ bytes_replace(PyBytesObject *self, PyObject *args)
{
Py_ssize_t count = -1;
PyObject *from, *to, *res;
const char *from_s, *to_s;
Py_ssize_t from_len, to_len;
int relfrom=0, relto=0;
PyBuffer vfrom, vto;
if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
return NULL;
if (PyBytes_Check(from)) {
from_s = PyBytes_AS_STRING(from);
from_len = PyBytes_GET_SIZE(from);
}
else {
if (PyObject_GetBuffer(from, &vfrom, PyBUF_CHARACTER) < 0)
return NULL;
from_s = vfrom.buf;
from_len = vfrom.len;
relfrom = 1;
}
if (PyBytes_Check(to)) {
to_s = PyBytes_AS_STRING(to);
to_len = PyBytes_GET_SIZE(to);
}
else {
if (PyObject_GetBuffer(to, &vto, PyBUF_CHARACTER) < 0) {
if (relfrom)
PyObject_ReleaseBuffer(from, &vfrom);
return NULL;
}
to_s = vto.buf;
to_len = vto.len;
relto = 1;
if (_getbuffer(from, &vfrom) < 0)
return NULL;
if (_getbuffer(to, &vto) < 0) {
PyObject_ReleaseBuffer(from, &vfrom);
return NULL;
}
res = (PyObject *)replace((PyBytesObject *) self,
from_s, from_len,
to_s, to_len, count);
vfrom.buf, vfrom.len,
vto.buf, vto.len, count);
if (relfrom)
PyObject_ReleaseBuffer(from, &vfrom);
if (relto)
PyObject_ReleaseBuffer(to, &vto);
PyObject_ReleaseBuffer(from, &vfrom);
PyObject_ReleaseBuffer(to, &vto);
return res;
}
@ -2799,10 +2787,10 @@ bytes_reduce(PyBytesObject *self)
{
PyObject *latin1;
if (self->ob_bytes)
latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
Py_Size(self), NULL);
latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
Py_Size(self), NULL);
else
latin1 = PyUnicode_FromString("");
latin1 = PyUnicode_FromString("");
return Py_BuildValue("(O(Ns))", Py_Type(self), latin1, "latin-1");
}

View File

@ -965,31 +965,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
return NULL;
}
#if 0
/* For b/w compatibility we also accept Unicode objects provided
that no encodings is given and then redirect to
PyObject_Unicode() which then applies the additional logic for
Unicode subclasses.
NOTE: This API should really only be used for object which
represent *encoded* Unicode !
*/
if (PyUnicode_Check(obj)) {
if (encoding) {
PyErr_SetString(PyExc_TypeError,
"decoding Unicode is not supported");
return NULL;
}
return PyObject_Unicode(obj);
}
#else
if (PyUnicode_Check(obj)) {
PyErr_SetString(PyExc_TypeError,
"decoding Unicode is not supported");
return NULL;
}
#endif
/* Coerce object */
if (PyString_Check(obj)) {
@ -6440,26 +6420,7 @@ able to handle UnicodeDecodeErrors.");
static PyObject *
unicode_decode(PyUnicodeObject *self, PyObject *args)
{
char *encoding = NULL;
char *errors = NULL;
PyObject *v;
if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
return NULL;
v = PyUnicode_AsDecodedObject((PyObject *)self, encoding, errors);
if (v == NULL)
goto onError;
if (!PyString_Check(v) && !PyUnicode_Check(v)) {
PyErr_Format(PyExc_TypeError,
"decoder did not return a string/unicode object "
"(type=%.400s)",
Py_Type(v)->tp_name);
Py_DECREF(v);
return NULL;
}
return v;
onError:
PyErr_Format(PyExc_TypeError, "decoding str is not supported");
return NULL;
}
@ -8136,17 +8097,11 @@ unicode_buffer_getbuffer(PyUnicodeObject *self, PyBuffer *view, int flags)
{
if (flags & PyBUF_CHARACTER) {
PyObject *str;
str = _PyUnicode_AsDefaultEncodedString((PyObject *)self, NULL);
if (str == NULL) return -1;
return PyBuffer_FillInfo(view, (void *)PyString_AS_STRING(str),
PyString_GET_SIZE(str), 1, flags);
}
else {
return PyBuffer_FillInfo(view, (void *)self->str,
PyUnicode_GET_DATA_SIZE(self), 1, flags);
PyErr_SetString(PyExc_SystemError, "can't use str as char buffer");
return -1;
}
return PyBuffer_FillInfo(view, (void *)self->str,
PyUnicode_GET_DATA_SIZE(self), 1, flags);
}