mirror of https://github.com/python/cpython
PyUnicode_CopyCharacters() checks for buffer and character overflow
It now returns the number of written characters on success.
This commit is contained in:
parent
fb5f5f2420
commit
be78eaf2de
|
@ -519,10 +519,22 @@ PyAPI_FUNC(int) _PyUnicode_Ready(
|
|||
#endif
|
||||
|
||||
/* Copy character from one unicode object into another, this function performs
|
||||
character conversion when nessesary and falls back to memcpy if possible.
|
||||
Return -1 and raise an exception on error, return 0 on success. */
|
||||
character conversion when necessary and falls back to memcpy if possible.
|
||||
|
||||
Fail if 'to' is smaller than how_many or smaller than len(from)-from_start,
|
||||
or if kind(from[from_start:from_start+how_many]) > kind(to).
|
||||
|
||||
Return the number of written character, or return -1 and raise an exception
|
||||
on error.
|
||||
|
||||
Pseudo-code:
|
||||
|
||||
how_many = min(how_many, len(from) - from_start)
|
||||
to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
|
||||
return how_many
|
||||
*/
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(int) PyUnicode_CopyCharacters(
|
||||
PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
|
||||
PyObject *to,
|
||||
Py_ssize_t to_start,
|
||||
PyObject *from,
|
||||
|
|
|
@ -606,13 +606,13 @@ unicode_convert_wchar_to_ucs4(const wchar_t *begin, const wchar_t *end,
|
|||
}
|
||||
#endif
|
||||
|
||||
int
|
||||
Py_ssize_t
|
||||
PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
|
||||
PyObject *from, Py_ssize_t from_start,
|
||||
Py_ssize_t how_many)
|
||||
{
|
||||
int from_kind;
|
||||
int to_kind;
|
||||
unsigned int from_kind;
|
||||
unsigned int to_kind;
|
||||
|
||||
assert(PyUnicode_Check(from));
|
||||
assert(PyUnicode_Check(to));
|
||||
|
@ -622,94 +622,89 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
|
|||
if (PyUnicode_READY(to))
|
||||
return -1;
|
||||
|
||||
how_many = PY_MIN(PyUnicode_GET_LENGTH(from), how_many);
|
||||
if (to_start + how_many > PyUnicode_GET_LENGTH(to)) {
|
||||
PyErr_Format(PyExc_ValueError,
|
||||
"Cannot write %zi characters at %zi "
|
||||
"in a string of %zi characters",
|
||||
how_many, to_start, PyUnicode_GET_LENGTH(to));
|
||||
return -1;
|
||||
}
|
||||
|
||||
from_kind = PyUnicode_KIND(from);
|
||||
to_kind = PyUnicode_KIND(to);
|
||||
|
||||
if (from_kind == to_kind) {
|
||||
const Py_ssize_t char_size = PyUnicode_CHARACTER_SIZE(to);
|
||||
Py_MEMCPY(PyUnicode_1BYTE_DATA(to) + (to_start * char_size),
|
||||
PyUnicode_1BYTE_DATA(from) + (from_start * char_size),
|
||||
how_many * char_size);
|
||||
return 0;
|
||||
/* fast path */
|
||||
Py_MEMCPY((char*)PyUnicode_DATA(to)
|
||||
+ PyUnicode_KIND_SIZE(to_kind, to_start),
|
||||
(char*)PyUnicode_DATA(from)
|
||||
+ PyUnicode_KIND_SIZE(from_kind, from_start),
|
||||
PyUnicode_KIND_SIZE(to_kind, how_many));
|
||||
return how_many;
|
||||
}
|
||||
|
||||
if (from_kind > to_kind) {
|
||||
/* slow path to check for character overflow */
|
||||
const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
|
||||
void *from_data = PyUnicode_DATA(from);
|
||||
void *to_data = PyUnicode_DATA(to);
|
||||
Py_UCS4 ch, maxchar;
|
||||
Py_ssize_t i;
|
||||
int overflow;
|
||||
|
||||
switch (from_kind) {
|
||||
case PyUnicode_1BYTE_KIND:
|
||||
switch (to_kind) {
|
||||
case PyUnicode_2BYTE_KIND:
|
||||
_PyUnicode_CONVERT_BYTES(
|
||||
unsigned char, Py_UCS2,
|
||||
PyUnicode_1BYTE_DATA(from) + from_start,
|
||||
PyUnicode_1BYTE_DATA(from) + from_start + how_many,
|
||||
PyUnicode_2BYTE_DATA(to) + to_start
|
||||
);
|
||||
maxchar = 0;
|
||||
for (i=0; i < how_many; i++) {
|
||||
ch = PyUnicode_READ(from_kind, from_data, from_start + i);
|
||||
if (ch > maxchar) {
|
||||
maxchar = ch;
|
||||
if (maxchar > to_maxchar) {
|
||||
overflow = 1;
|
||||
break;
|
||||
case PyUnicode_4BYTE_KIND:
|
||||
_PyUnicode_CONVERT_BYTES(
|
||||
unsigned char, Py_UCS4,
|
||||
PyUnicode_1BYTE_DATA(from) + from_start,
|
||||
PyUnicode_1BYTE_DATA(from) + from_start + how_many,
|
||||
PyUnicode_4BYTE_DATA(to) + to_start
|
||||
);
|
||||
break;
|
||||
default:
|
||||
goto invalid_state;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case PyUnicode_2BYTE_KIND:
|
||||
switch (to_kind) {
|
||||
case PyUnicode_1BYTE_KIND:
|
||||
_PyUnicode_CONVERT_BYTES(
|
||||
Py_UCS2, unsigned char,
|
||||
PyUnicode_2BYTE_DATA(from) + from_start,
|
||||
PyUnicode_2BYTE_DATA(from) + from_start + how_many,
|
||||
PyUnicode_1BYTE_DATA(to) + to_start
|
||||
);
|
||||
break;
|
||||
case PyUnicode_4BYTE_KIND:
|
||||
_PyUnicode_CONVERT_BYTES(
|
||||
Py_UCS2, Py_UCS4,
|
||||
PyUnicode_2BYTE_DATA(from) + from_start,
|
||||
PyUnicode_2BYTE_DATA(from) + from_start + how_many,
|
||||
PyUnicode_4BYTE_DATA(to) + to_start
|
||||
);
|
||||
break;
|
||||
default:
|
||||
goto invalid_state;
|
||||
}
|
||||
break;
|
||||
case PyUnicode_4BYTE_KIND:
|
||||
switch (to_kind) {
|
||||
case PyUnicode_1BYTE_KIND:
|
||||
_PyUnicode_CONVERT_BYTES(
|
||||
Py_UCS4, unsigned char,
|
||||
PyUnicode_4BYTE_DATA(from) + from_start,
|
||||
PyUnicode_4BYTE_DATA(from) + from_start + how_many,
|
||||
PyUnicode_1BYTE_DATA(to) + to_start
|
||||
);
|
||||
break;
|
||||
case PyUnicode_2BYTE_KIND:
|
||||
_PyUnicode_CONVERT_BYTES(
|
||||
Py_UCS4, Py_UCS2,
|
||||
PyUnicode_4BYTE_DATA(from) + from_start,
|
||||
PyUnicode_4BYTE_DATA(from) + from_start + how_many,
|
||||
PyUnicode_2BYTE_DATA(to) + to_start
|
||||
);
|
||||
break;
|
||||
default:
|
||||
goto invalid_state;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
goto invalid_state;
|
||||
PyUnicode_WRITE(to_kind, to_data, to_start + i, ch);
|
||||
}
|
||||
if (!overflow)
|
||||
return how_many;
|
||||
}
|
||||
else if (from_kind == PyUnicode_1BYTE_KIND && to_kind == PyUnicode_2BYTE_KIND)
|
||||
{
|
||||
_PyUnicode_CONVERT_BYTES(
|
||||
Py_UCS1, Py_UCS2,
|
||||
PyUnicode_1BYTE_DATA(from) + from_start,
|
||||
PyUnicode_1BYTE_DATA(from) + from_start + how_many,
|
||||
PyUnicode_2BYTE_DATA(to) + to_start
|
||||
);
|
||||
return how_many;
|
||||
}
|
||||
else if (from_kind == PyUnicode_1BYTE_KIND
|
||||
&& to_kind == PyUnicode_4BYTE_KIND)
|
||||
{
|
||||
_PyUnicode_CONVERT_BYTES(
|
||||
Py_UCS1, Py_UCS4,
|
||||
PyUnicode_1BYTE_DATA(from) + from_start,
|
||||
PyUnicode_1BYTE_DATA(from) + from_start + how_many,
|
||||
PyUnicode_4BYTE_DATA(to) + to_start
|
||||
);
|
||||
return how_many;
|
||||
}
|
||||
else if (from_kind == PyUnicode_2BYTE_KIND
|
||||
&& to_kind == PyUnicode_4BYTE_KIND)
|
||||
{
|
||||
_PyUnicode_CONVERT_BYTES(
|
||||
Py_UCS2, Py_UCS4,
|
||||
PyUnicode_2BYTE_DATA(from) + from_start,
|
||||
PyUnicode_2BYTE_DATA(from) + from_start + how_many,
|
||||
PyUnicode_4BYTE_DATA(to) + to_start
|
||||
);
|
||||
return how_many;
|
||||
}
|
||||
return 0;
|
||||
|
||||
invalid_state:
|
||||
PyErr_Format(PyExc_ValueError,
|
||||
"Impossible kind state (from=%i, to=%i) "
|
||||
"in PyUnicode_CopyCharacters",
|
||||
from_kind, to_kind);
|
||||
"Cannot copy UCS%u characters "
|
||||
"into a string of UCS%u characters",
|
||||
1 << (from_kind - 1),
|
||||
1 << (to_kind -1));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue