From 612344f12774cbbefd735d9fcbfb2001fe187362 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Walter=20D=C3=B6rwald?= Date: Fri, 4 May 2007 19:28:21 +0000 Subject: [PATCH] Change UnicodeDecodeError objects so that the 'object' attribute is a bytes object. Add 'y' and 'y#' format specifiers that work like 's' and 's#' but only accept bytes objects. --- Doc/api/utilities.tex | 12 ++++++++++++ Doc/ext/extending.tex | 2 ++ Objects/exceptions.c | 32 ++++++++++++++++++++++++-------- Python/getargs.c | 27 +++++++++++++++++++++++++++ Python/modsupport.c | 33 +++++++++++++++++++++++++++++++++ 5 files changed, 98 insertions(+), 8 deletions(-) diff --git a/Doc/api/utilities.tex b/Doc/api/utilities.tex index 93e37964556..fb9c9099699 100644 --- a/Doc/api/utilities.tex +++ b/Doc/api/utilities.tex @@ -424,6 +424,18 @@ whose address should be passed. compatible objects pass back a reference to the raw internal data representation. + \item[\samp{y} (bytes object) + {[const char *]}] + This variant on \samp{s} convert a Python bytes object to a C pointer to a + character string. The bytes object must not contain embedded NUL bytes; + if it does, a \exception{TypeError} exception is raised. + + \item[\samp{y\#} (bytes object) + {[const char *, int]}] + This variant on \samp{s#} stores into two C variables, the first one + a pointer to a character string, the second one its length. This only + accepts bytes objects. + \item[\samp{z} (string or \code{None}) {[const char *]}] Like \samp{s}, but the Python object may also be \code{None}, in which case the C pointer is set to \NULL. diff --git a/Doc/ext/extending.tex b/Doc/ext/extending.tex index 2af88b53156..1f3e2d5b4fc 100644 --- a/Doc/ext/extending.tex +++ b/Doc/ext/extending.tex @@ -802,8 +802,10 @@ Examples (to the left the call, to the right the resulting Python value): Py_BuildValue("i", 123) 123 Py_BuildValue("iii", 123, 456, 789) (123, 456, 789) Py_BuildValue("s", "hello") 'hello' + Py_BuildValue("y", "hello") b'hello' Py_BuildValue("ss", "hello", "world") ('hello', 'world') Py_BuildValue("s#", "hello", 4) 'hell' + Py_BuildValue("y#", "hello", 4) b'hell' Py_BuildValue("()") () Py_BuildValue("(i)", 123) (123,) Py_BuildValue("(ii)", 123, 456) (123, 456) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index e30e9df6552..1096bace5a6 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -1242,6 +1242,22 @@ set_string(PyObject **attr, const char *value) } +static PyObject * +get_bytes(PyObject *attr, const char *name) +{ + if (!attr) { + PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name); + return NULL; + } + + if (!PyBytes_Check(attr)) { + PyErr_Format(PyExc_TypeError, "%.200s attribute must be bytes", name); + return NULL; + } + Py_INCREF(attr); + return attr; +} + static PyObject * get_unicode(PyObject *attr, const char *name) { @@ -1280,7 +1296,7 @@ PyUnicodeEncodeError_GetObject(PyObject *exc) PyObject * PyUnicodeDecodeError_GetObject(PyObject *exc) { - return get_string(((PyUnicodeErrorObject *)exc)->object, "object"); + return get_bytes(((PyUnicodeErrorObject *)exc)->object, "object"); } PyObject * @@ -1314,10 +1330,10 @@ PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start) { if (!get_int(((PyUnicodeErrorObject *)exc)->start, start, "start")) { Py_ssize_t size; - PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, + PyObject *obj = get_bytes(((PyUnicodeErrorObject *)exc)->object, "object"); if (!obj) return -1; - size = PyString_GET_SIZE(obj); + size = PyBytes_GET_SIZE(obj); if (*start<0) *start = 0; if (*start>=size) @@ -1382,10 +1398,10 @@ PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end) { if (!get_int(((PyUnicodeErrorObject *)exc)->end, end, "end")) { Py_ssize_t size; - PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, + PyObject *obj = get_bytes(((PyUnicodeErrorObject *)exc)->object, "object"); if (!obj) return -1; - size = PyString_GET_SIZE(obj); + size = PyBytes_GET_SIZE(obj); if (*end<1) *end = 1; if (*end>size) @@ -1629,7 +1645,7 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds) if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) return -1; return UnicodeError_init((PyUnicodeErrorObject *)self, args, - kwds, &PyString_Type); + kwds, &PyBytes_Type); } static PyObject * @@ -1648,7 +1664,7 @@ UnicodeDecodeError_str(PyObject *self) /* FromFormat does not support %02x, so format that separately */ char byte[4]; PyOS_snprintf(byte, sizeof(byte), "%02x", - ((int)PyString_AS_STRING(((PyUnicodeErrorObject *)self)->object)[start])&0xff); + ((int)PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[start])&0xff); return PyString_FromFormat( "'%.400s' codec can't decode byte 0x%s in position %zd: %.400s", PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding), @@ -1689,7 +1705,7 @@ PyUnicodeDecodeError_Create( assert(length < INT_MAX); assert(start < INT_MAX); assert(end < INT_MAX); - return PyObject_CallFunction(PyExc_UnicodeDecodeError, "ss#nns", + return PyObject_CallFunction(PyExc_UnicodeDecodeError, "sy#nns", encoding, object, length, start, end, reason); } diff --git a/Python/getargs.c b/Python/getargs.c index f7a66048fbe..8331a18965a 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -819,6 +819,32 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, break; } + case 'y': {/* bytes */ + if (*format == '#') { + void **p = (void **)va_arg(*p_va, char **); + FETCH_SIZE; + + if (PyBytes_Check(arg)) { + *p = PyBytes_AS_STRING(arg); + STORE_SIZE(PyBytes_GET_SIZE(arg)); + } + else + return converterr("bytes", arg, msgbuf, bufsize); + format++; + } else { + char **p = va_arg(*p_va, char **); + + if (PyBytes_Check(arg)) + *p = PyBytes_AS_STRING(arg); + else + return converterr("bytes", arg, msgbuf, bufsize); + if ((Py_ssize_t)strlen(*p) != PyBytes_Size(arg)) + return converterr("bytes without null bytes", + arg, msgbuf, bufsize); + } + break; + } + case 'z': {/* string, may be NULL (None) */ if (*format == '#') { /* any buffer-like object */ void **p = (void **)va_arg(*p_va, char **); @@ -1595,6 +1621,7 @@ skipitem(const char **p_format, va_list *p_va, int flags) case 's': /* string */ case 'z': /* string or None */ + case 'y': /* bytes */ case 'u': /* unicode string */ case 't': /* buffer, read-only */ case 'w': /* buffer, read-write */ diff --git a/Python/modsupport.c b/Python/modsupport.c index af774f0b602..8f600dc3459 100644 --- a/Python/modsupport.c +++ b/Python/modsupport.c @@ -424,6 +424,39 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags) return v; } + case 'y': + { + PyObject *v; + char *str = va_arg(*p_va, char *); + Py_ssize_t n; + if (**p_format == '#') { + ++*p_format; + if (flags & FLAG_SIZE_T) + n = va_arg(*p_va, Py_ssize_t); + else + n = va_arg(*p_va, int); + } + else + n = -1; + if (str == NULL) { + v = Py_None; + Py_INCREF(v); + } + else { + if (n < 0) { + size_t m = strlen(str); + if (m > PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, + "string too long for Python bytes"); + return NULL; + } + n = (Py_ssize_t)m; + } + v = PyBytes_FromStringAndSize(str, n); + } + return v; + } + case 'N': case 'S': case 'O':