Change UnicodeDecodeError objects so that the 'object' attribute
is a bytes object. Add 'y' and 'y#' format specifiers that work like 's' and 's#' but only accept bytes objects.
This commit is contained in:
parent
c2b87a6dff
commit
612344f127
|
@ -424,6 +424,18 @@ whose address should be passed.
|
|||
compatible objects pass back a reference to the raw internal data
|
||||
representation.
|
||||
|
||||
\item[\samp{y} (bytes object)
|
||||
{[const char *]}]
|
||||
This variant on \samp{s} convert a Python bytes object to a C pointer to a
|
||||
character string. The bytes object must not contain embedded NUL bytes;
|
||||
if it does, a \exception{TypeError} exception is raised.
|
||||
|
||||
\item[\samp{y\#} (bytes object)
|
||||
{[const char *, int]}]
|
||||
This variant on \samp{s#} stores into two C variables, the first one
|
||||
a pointer to a character string, the second one its length. This only
|
||||
accepts bytes objects.
|
||||
|
||||
\item[\samp{z} (string or \code{None}) {[const char *]}]
|
||||
Like \samp{s}, but the Python object may also be \code{None}, in
|
||||
which case the C pointer is set to \NULL.
|
||||
|
|
|
@ -802,8 +802,10 @@ Examples (to the left the call, to the right the resulting Python value):
|
|||
Py_BuildValue("i", 123) 123
|
||||
Py_BuildValue("iii", 123, 456, 789) (123, 456, 789)
|
||||
Py_BuildValue("s", "hello") 'hello'
|
||||
Py_BuildValue("y", "hello") b'hello'
|
||||
Py_BuildValue("ss", "hello", "world") ('hello', 'world')
|
||||
Py_BuildValue("s#", "hello", 4) 'hell'
|
||||
Py_BuildValue("y#", "hello", 4) b'hell'
|
||||
Py_BuildValue("()") ()
|
||||
Py_BuildValue("(i)", 123) (123,)
|
||||
Py_BuildValue("(ii)", 123, 456) (123, 456)
|
||||
|
|
|
@ -1242,6 +1242,22 @@ set_string(PyObject **attr, const char *value)
|
|||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
get_bytes(PyObject *attr, const char *name)
|
||||
{
|
||||
if (!attr) {
|
||||
PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!PyBytes_Check(attr)) {
|
||||
PyErr_Format(PyExc_TypeError, "%.200s attribute must be bytes", name);
|
||||
return NULL;
|
||||
}
|
||||
Py_INCREF(attr);
|
||||
return attr;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
get_unicode(PyObject *attr, const char *name)
|
||||
{
|
||||
|
@ -1280,7 +1296,7 @@ PyUnicodeEncodeError_GetObject(PyObject *exc)
|
|||
PyObject *
|
||||
PyUnicodeDecodeError_GetObject(PyObject *exc)
|
||||
{
|
||||
return get_string(((PyUnicodeErrorObject *)exc)->object, "object");
|
||||
return get_bytes(((PyUnicodeErrorObject *)exc)->object, "object");
|
||||
}
|
||||
|
||||
PyObject *
|
||||
|
@ -1314,10 +1330,10 @@ PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start)
|
|||
{
|
||||
if (!get_int(((PyUnicodeErrorObject *)exc)->start, start, "start")) {
|
||||
Py_ssize_t size;
|
||||
PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object,
|
||||
PyObject *obj = get_bytes(((PyUnicodeErrorObject *)exc)->object,
|
||||
"object");
|
||||
if (!obj) return -1;
|
||||
size = PyString_GET_SIZE(obj);
|
||||
size = PyBytes_GET_SIZE(obj);
|
||||
if (*start<0)
|
||||
*start = 0;
|
||||
if (*start>=size)
|
||||
|
@ -1382,10 +1398,10 @@ PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
|
|||
{
|
||||
if (!get_int(((PyUnicodeErrorObject *)exc)->end, end, "end")) {
|
||||
Py_ssize_t size;
|
||||
PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object,
|
||||
PyObject *obj = get_bytes(((PyUnicodeErrorObject *)exc)->object,
|
||||
"object");
|
||||
if (!obj) return -1;
|
||||
size = PyString_GET_SIZE(obj);
|
||||
size = PyBytes_GET_SIZE(obj);
|
||||
if (*end<1)
|
||||
*end = 1;
|
||||
if (*end>size)
|
||||
|
@ -1629,7 +1645,7 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
|
|||
if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
|
||||
return -1;
|
||||
return UnicodeError_init((PyUnicodeErrorObject *)self, args,
|
||||
kwds, &PyString_Type);
|
||||
kwds, &PyBytes_Type);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
@ -1648,7 +1664,7 @@ UnicodeDecodeError_str(PyObject *self)
|
|||
/* FromFormat does not support %02x, so format that separately */
|
||||
char byte[4];
|
||||
PyOS_snprintf(byte, sizeof(byte), "%02x",
|
||||
((int)PyString_AS_STRING(((PyUnicodeErrorObject *)self)->object)[start])&0xff);
|
||||
((int)PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[start])&0xff);
|
||||
return PyString_FromFormat(
|
||||
"'%.400s' codec can't decode byte 0x%s in position %zd: %.400s",
|
||||
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
|
||||
|
@ -1689,7 +1705,7 @@ PyUnicodeDecodeError_Create(
|
|||
assert(length < INT_MAX);
|
||||
assert(start < INT_MAX);
|
||||
assert(end < INT_MAX);
|
||||
return PyObject_CallFunction(PyExc_UnicodeDecodeError, "ss#nns",
|
||||
return PyObject_CallFunction(PyExc_UnicodeDecodeError, "sy#nns",
|
||||
encoding, object, length, start, end, reason);
|
||||
}
|
||||
|
||||
|
|
|
@ -819,6 +819,32 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
|
|||
break;
|
||||
}
|
||||
|
||||
case 'y': {/* bytes */
|
||||
if (*format == '#') {
|
||||
void **p = (void **)va_arg(*p_va, char **);
|
||||
FETCH_SIZE;
|
||||
|
||||
if (PyBytes_Check(arg)) {
|
||||
*p = PyBytes_AS_STRING(arg);
|
||||
STORE_SIZE(PyBytes_GET_SIZE(arg));
|
||||
}
|
||||
else
|
||||
return converterr("bytes", arg, msgbuf, bufsize);
|
||||
format++;
|
||||
} else {
|
||||
char **p = va_arg(*p_va, char **);
|
||||
|
||||
if (PyBytes_Check(arg))
|
||||
*p = PyBytes_AS_STRING(arg);
|
||||
else
|
||||
return converterr("bytes", arg, msgbuf, bufsize);
|
||||
if ((Py_ssize_t)strlen(*p) != PyBytes_Size(arg))
|
||||
return converterr("bytes without null bytes",
|
||||
arg, msgbuf, bufsize);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 'z': {/* string, may be NULL (None) */
|
||||
if (*format == '#') { /* any buffer-like object */
|
||||
void **p = (void **)va_arg(*p_va, char **);
|
||||
|
@ -1595,6 +1621,7 @@ skipitem(const char **p_format, va_list *p_va, int flags)
|
|||
|
||||
case 's': /* string */
|
||||
case 'z': /* string or None */
|
||||
case 'y': /* bytes */
|
||||
case 'u': /* unicode string */
|
||||
case 't': /* buffer, read-only */
|
||||
case 'w': /* buffer, read-write */
|
||||
|
|
|
@ -424,6 +424,39 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
|
|||
return v;
|
||||
}
|
||||
|
||||
case 'y':
|
||||
{
|
||||
PyObject *v;
|
||||
char *str = va_arg(*p_va, char *);
|
||||
Py_ssize_t n;
|
||||
if (**p_format == '#') {
|
||||
++*p_format;
|
||||
if (flags & FLAG_SIZE_T)
|
||||
n = va_arg(*p_va, Py_ssize_t);
|
||||
else
|
||||
n = va_arg(*p_va, int);
|
||||
}
|
||||
else
|
||||
n = -1;
|
||||
if (str == NULL) {
|
||||
v = Py_None;
|
||||
Py_INCREF(v);
|
||||
}
|
||||
else {
|
||||
if (n < 0) {
|
||||
size_t m = strlen(str);
|
||||
if (m > PY_SSIZE_T_MAX) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"string too long for Python bytes");
|
||||
return NULL;
|
||||
}
|
||||
n = (Py_ssize_t)m;
|
||||
}
|
||||
v = PyBytes_FromStringAndSize(str, n);
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
case 'N':
|
||||
case 'S':
|
||||
case 'O':
|
||||
|
|
Loading…
Reference in New Issue