Change UnicodeDecodeError objects so that the 'object' attribute

is a bytes object.

Add 'y' and 'y#' format specifiers that work like 's' and 's#'
but only accept bytes objects.
This commit is contained in:
Walter Dörwald 2007-05-04 19:28:21 +00:00
parent c2b87a6dff
commit 612344f127
5 changed files with 98 additions and 8 deletions

View File

@ -424,6 +424,18 @@ whose address should be passed.
compatible objects pass back a reference to the raw internal data
representation.
\item[\samp{y} (bytes object)
{[const char *]}]
This variant on \samp{s} convert a Python bytes object to a C pointer to a
character string. The bytes object must not contain embedded NUL bytes;
if it does, a \exception{TypeError} exception is raised.
\item[\samp{y\#} (bytes object)
{[const char *, int]}]
This variant on \samp{s#} stores into two C variables, the first one
a pointer to a character string, the second one its length. This only
accepts bytes objects.
\item[\samp{z} (string or \code{None}) {[const char *]}]
Like \samp{s}, but the Python object may also be \code{None}, in
which case the C pointer is set to \NULL.

View File

@ -802,8 +802,10 @@ Examples (to the left the call, to the right the resulting Python value):
Py_BuildValue("i", 123) 123
Py_BuildValue("iii", 123, 456, 789) (123, 456, 789)
Py_BuildValue("s", "hello") 'hello'
Py_BuildValue("y", "hello") b'hello'
Py_BuildValue("ss", "hello", "world") ('hello', 'world')
Py_BuildValue("s#", "hello", 4) 'hell'
Py_BuildValue("y#", "hello", 4) b'hell'
Py_BuildValue("()") ()
Py_BuildValue("(i)", 123) (123,)
Py_BuildValue("(ii)", 123, 456) (123, 456)

View File

@ -1242,6 +1242,22 @@ set_string(PyObject **attr, const char *value)
}
static PyObject *
get_bytes(PyObject *attr, const char *name)
{
if (!attr) {
PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name);
return NULL;
}
if (!PyBytes_Check(attr)) {
PyErr_Format(PyExc_TypeError, "%.200s attribute must be bytes", name);
return NULL;
}
Py_INCREF(attr);
return attr;
}
static PyObject *
get_unicode(PyObject *attr, const char *name)
{
@ -1280,7 +1296,7 @@ PyUnicodeEncodeError_GetObject(PyObject *exc)
PyObject *
PyUnicodeDecodeError_GetObject(PyObject *exc)
{
return get_string(((PyUnicodeErrorObject *)exc)->object, "object");
return get_bytes(((PyUnicodeErrorObject *)exc)->object, "object");
}
PyObject *
@ -1314,10 +1330,10 @@ PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start)
{
if (!get_int(((PyUnicodeErrorObject *)exc)->start, start, "start")) {
Py_ssize_t size;
PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object,
PyObject *obj = get_bytes(((PyUnicodeErrorObject *)exc)->object,
"object");
if (!obj) return -1;
size = PyString_GET_SIZE(obj);
size = PyBytes_GET_SIZE(obj);
if (*start<0)
*start = 0;
if (*start>=size)
@ -1382,10 +1398,10 @@ PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
{
if (!get_int(((PyUnicodeErrorObject *)exc)->end, end, "end")) {
Py_ssize_t size;
PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object,
PyObject *obj = get_bytes(((PyUnicodeErrorObject *)exc)->object,
"object");
if (!obj) return -1;
size = PyString_GET_SIZE(obj);
size = PyBytes_GET_SIZE(obj);
if (*end<1)
*end = 1;
if (*end>size)
@ -1629,7 +1645,7 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
return -1;
return UnicodeError_init((PyUnicodeErrorObject *)self, args,
kwds, &PyString_Type);
kwds, &PyBytes_Type);
}
static PyObject *
@ -1648,7 +1664,7 @@ UnicodeDecodeError_str(PyObject *self)
/* FromFormat does not support %02x, so format that separately */
char byte[4];
PyOS_snprintf(byte, sizeof(byte), "%02x",
((int)PyString_AS_STRING(((PyUnicodeErrorObject *)self)->object)[start])&0xff);
((int)PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[start])&0xff);
return PyString_FromFormat(
"'%.400s' codec can't decode byte 0x%s in position %zd: %.400s",
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
@ -1689,7 +1705,7 @@ PyUnicodeDecodeError_Create(
assert(length < INT_MAX);
assert(start < INT_MAX);
assert(end < INT_MAX);
return PyObject_CallFunction(PyExc_UnicodeDecodeError, "ss#nns",
return PyObject_CallFunction(PyExc_UnicodeDecodeError, "sy#nns",
encoding, object, length, start, end, reason);
}

View File

@ -819,6 +819,32 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
break;
}
case 'y': {/* bytes */
if (*format == '#') {
void **p = (void **)va_arg(*p_va, char **);
FETCH_SIZE;
if (PyBytes_Check(arg)) {
*p = PyBytes_AS_STRING(arg);
STORE_SIZE(PyBytes_GET_SIZE(arg));
}
else
return converterr("bytes", arg, msgbuf, bufsize);
format++;
} else {
char **p = va_arg(*p_va, char **);
if (PyBytes_Check(arg))
*p = PyBytes_AS_STRING(arg);
else
return converterr("bytes", arg, msgbuf, bufsize);
if ((Py_ssize_t)strlen(*p) != PyBytes_Size(arg))
return converterr("bytes without null bytes",
arg, msgbuf, bufsize);
}
break;
}
case 'z': {/* string, may be NULL (None) */
if (*format == '#') { /* any buffer-like object */
void **p = (void **)va_arg(*p_va, char **);
@ -1595,6 +1621,7 @@ skipitem(const char **p_format, va_list *p_va, int flags)
case 's': /* string */
case 'z': /* string or None */
case 'y': /* bytes */
case 'u': /* unicode string */
case 't': /* buffer, read-only */
case 'w': /* buffer, read-write */

View File

@ -424,6 +424,39 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
return v;
}
case 'y':
{
PyObject *v;
char *str = va_arg(*p_va, char *);
Py_ssize_t n;
if (**p_format == '#') {
++*p_format;
if (flags & FLAG_SIZE_T)
n = va_arg(*p_va, Py_ssize_t);
else
n = va_arg(*p_va, int);
}
else
n = -1;
if (str == NULL) {
v = Py_None;
Py_INCREF(v);
}
else {
if (n < 0) {
size_t m = strlen(str);
if (m > PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError,
"string too long for Python bytes");
return NULL;
}
n = (Py_ssize_t)m;
}
v = PyBytes_FromStringAndSize(str, n);
}
return v;
}
case 'N':
case 'S':
case 'O':