PyUnicode_CopyCharacters() fails when copying latin1 into ascii

This commit is contained in:
Victor Stinner 2011-10-02 23:33:16 +02:00
parent c53be96c54
commit f42dc448e0
1 changed files with 56 additions and 7 deletions

View File

@ -455,6 +455,46 @@ _PyUnicode_New(Py_ssize_t length)
return NULL;
}
static const char*
unicode_kind_name(PyObject *unicode)
{
assert(PyUnicode_Check(unicode));
if (!PyUnicode_IS_COMPACT(unicode))
{
if (!PyUnicode_IS_READY(unicode))
return "wstr";
switch(PyUnicode_KIND(unicode))
{
case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_COMPACT_ASCII(unicode))
return "legacy ascii";
else
return "legacy latin1";
case PyUnicode_2BYTE_KIND:
return "legacy UCS2";
case PyUnicode_4BYTE_KIND:
return "legacy UCS4";
default:
return "<legacy invalid kind>";
}
}
assert(PyUnicode_IS_READY(unicode));
switch(PyUnicode_KIND(unicode))
{
case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_COMPACT_ASCII(unicode))
return "ascii";
else
return "compact latin1";
case PyUnicode_2BYTE_KIND:
return "compact UCS2";
case PyUnicode_4BYTE_KIND:
return "compact UCS4";
default:
return "<invalid compact kind>";
}
}
#ifdef Py_DEBUG
int unicode_new_new_calls = 0;
@ -672,8 +712,10 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
to_kind = PyUnicode_KIND(to);
to_data = PyUnicode_DATA(to);
if (from_kind == to_kind) {
/* fast path */
if (from_kind == to_kind
/* deny latin1 => ascii */
&& PyUnicode_MAX_CHAR_VALUE(to) >= PyUnicode_MAX_CHAR_VALUE(from))
{
Py_MEMCPY((char*)to_data
+ PyUnicode_KIND_SIZE(to_kind, to_start),
(char*)from_data
@ -712,7 +754,14 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
}
else {
int invalid_kinds;
if (from_kind > to_kind) {
/* check if max_char(from substring) <= max_char(to) */
if (from_kind > to_kind
/* latin1 => ascii */
|| (PyUnicode_IS_COMPACT_ASCII(to)
&& to_kind == PyUnicode_1BYTE_KIND
&& !PyUnicode_IS_COMPACT_ASCII(from)))
{
/* slow path to check for character overflow */
const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
Py_UCS4 ch, maxchar;
@ -736,10 +785,10 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
invalid_kinds = 1;
if (invalid_kinds) {
PyErr_Format(PyExc_ValueError,
"Cannot copy UCS%u characters "
"into a string of UCS%u characters",
1 << (from_kind - 1),
1 << (to_kind -1));
"Cannot copy %s characters "
"into a string of %s characters",
unicode_kind_name(from),
unicode_kind_name(to));
return -1;
}
}