Unicode: optimize creating of 1-character strings

This commit is contained in:
Victor Stinner 2012-05-03 02:17:04 +02:00
parent bff7c96834
commit b6cd014d75
1 changed files with 50 additions and 8 deletions

View File

@ -1919,8 +1919,18 @@ _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size)
return unicode_empty;
}
assert(size > 0);
if (size == 1 && u[0] < 256)
return get_latin1_char((unsigned char)u[0]);
if (size == 1) {
Py_UCS4 ch = u[0];
if (ch < 256)
return get_latin1_char((unsigned char)ch);
res = PyUnicode_New(1, ch);
if (res == NULL)
return NULL;
PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch);
assert(_PyUnicode_CheckConsistency(res, 1));
return res;
}
max_char = ucs2lib_find_max_char(u, u + size);
res = PyUnicode_New(size, max_char);
@ -1947,8 +1957,18 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
return unicode_empty;
}
assert(size > 0);
if (size == 1 && u[0] < 256)
return get_latin1_char((unsigned char)u[0]);
if (size == 1) {
Py_UCS4 ch = u[0];
if (ch < 256)
return get_latin1_char((unsigned char)ch);
res = PyUnicode_New(1, ch);
if (res == NULL)
return NULL;
PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch);
assert(_PyUnicode_CheckConsistency(res, 1));
return res;
}
max_char = ucs4lib_find_max_char(u, u + size);
res = PyUnicode_New(size, max_char);
@ -11368,10 +11388,33 @@ unicode_find(PyObject *self, PyObject *args)
static PyObject *
unicode_getitem(PyObject *self, Py_ssize_t index)
{
Py_UCS4 ch = PyUnicode_ReadChar(self, index);
if (ch == (Py_UCS4)-1)
void *data;
enum PyUnicode_Kind kind;
Py_UCS4 ch;
PyObject *res;
if (!PyUnicode_Check(self) || PyUnicode_READY(self) == -1) {
PyErr_BadArgument();
return NULL;
return PyUnicode_FromOrdinal(ch);
}
if (index < 0 || index >= PyUnicode_GET_LENGTH(self)) {
PyErr_SetString(PyExc_IndexError, "string index out of range");
return NULL;
}
kind = PyUnicode_KIND(self);
data = PyUnicode_DATA(self);
ch = PyUnicode_READ(kind, data, index);
if (ch < 256)
return get_latin1_char(ch);
res = PyUnicode_New(1, ch);
if (res == NULL)
return NULL;
kind = PyUnicode_KIND(res);
data = PyUnicode_DATA(res);
PyUnicode_WRITE(kind, data, 0, ch);
assert(_PyUnicode_CheckConsistency(res, 1));
return res;
}
/* Believe it or not, this produces the same value for ASCII strings
@ -12039,7 +12082,6 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
}
if (PyUnicode_IS_ASCII(self)) {
kind = PyUnicode_KIND(self);
data = PyUnicode_1BYTE_DATA(self);
return unicode_fromascii(data + start, length);
}