I want a super fast 'a' * n!

* Optimize unicode_repeat() for a special case with memset()
 * Simplify integer overflow checking; remove the second check because
   PyUnicode_New() already does it and uses a smaller limit (Py_ssize_t vs
   size_t)
This commit is contained in:
Victor Stinner 2011-10-01 02:47:29 +02:00
parent 4584a5ba1a
commit 67ca64ce54
1 changed files with 13 additions and 18 deletions

View File

@ -10583,7 +10583,6 @@ unicode_repeat(PyUnicodeObject *str, Py_ssize_t len)
{ {
PyUnicodeObject *u; PyUnicodeObject *u;
Py_ssize_t nchars, n; Py_ssize_t nchars, n;
size_t nbytes, char_size;
if (len < 1) { if (len < 1) {
Py_INCREF(unicode_empty); Py_INCREF(unicode_empty);
@ -10599,32 +10598,28 @@ unicode_repeat(PyUnicodeObject *str, Py_ssize_t len)
if (PyUnicode_READY(str) == -1) if (PyUnicode_READY(str) == -1)
return NULL; return NULL;
/* ensure # of chars needed doesn't overflow int and # of bytes if (len > PY_SSIZE_T_MAX / PyUnicode_GET_LENGTH(str)) {
* needed doesn't overflow size_t PyErr_SetString(PyExc_OverflowError,
*/ "repeated string is too long");
return NULL;
}
nchars = len * PyUnicode_GET_LENGTH(str); nchars = len * PyUnicode_GET_LENGTH(str);
if (nchars / len != PyUnicode_GET_LENGTH(str)) {
PyErr_SetString(PyExc_OverflowError,
"repeated string is too long");
return NULL;
}
char_size = PyUnicode_CHARACTER_SIZE(str);
nbytes = (nchars + 1) * char_size;
if (nbytes / char_size != (size_t)(nchars + 1)) {
PyErr_SetString(PyExc_OverflowError,
"repeated string is too long");
return NULL;
}
u = (PyUnicodeObject *)PyUnicode_New(nchars, PyUnicode_MAX_CHAR_VALUE(str)); u = (PyUnicodeObject *)PyUnicode_New(nchars, PyUnicode_MAX_CHAR_VALUE(str));
if (!u) if (!u)
return NULL; return NULL;
assert(PyUnicode_KIND(u) == PyUnicode_KIND(str));
if (PyUnicode_GET_LENGTH(str) == 1) { if (PyUnicode_GET_LENGTH(str) == 1) {
const int kind = PyUnicode_KIND(str); const int kind = PyUnicode_KIND(str);
const Py_UCS4 fill_char = PyUnicode_READ(kind, PyUnicode_DATA(str), 0); const Py_UCS4 fill_char = PyUnicode_READ(kind, PyUnicode_DATA(str), 0);
void *to = PyUnicode_DATA(u); void *to = PyUnicode_DATA(u);
for (n = 0; n < len; ++n) if (kind == PyUnicode_1BYTE_KIND)
PyUnicode_WRITE(kind, to, n, fill_char); memset(to, (unsigned char)fill_char, len);
else {
for (n = 0; n < len; ++n)
PyUnicode_WRITE(kind, to, n, fill_char);
}
} }
else { else {
/* number of characters copied this far */ /* number of characters copied this far */