Unicode: optimize creating of 1-character strings
This commit is contained in:
parent
bff7c96834
commit
b6cd014d75
|
@ -1919,8 +1919,18 @@ _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size)
|
|||
return unicode_empty;
|
||||
}
|
||||
assert(size > 0);
|
||||
if (size == 1 && u[0] < 256)
|
||||
return get_latin1_char((unsigned char)u[0]);
|
||||
if (size == 1) {
|
||||
Py_UCS4 ch = u[0];
|
||||
if (ch < 256)
|
||||
return get_latin1_char((unsigned char)ch);
|
||||
|
||||
res = PyUnicode_New(1, ch);
|
||||
if (res == NULL)
|
||||
return NULL;
|
||||
PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch);
|
||||
assert(_PyUnicode_CheckConsistency(res, 1));
|
||||
return res;
|
||||
}
|
||||
|
||||
max_char = ucs2lib_find_max_char(u, u + size);
|
||||
res = PyUnicode_New(size, max_char);
|
||||
|
@ -1947,8 +1957,18 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
|
|||
return unicode_empty;
|
||||
}
|
||||
assert(size > 0);
|
||||
if (size == 1 && u[0] < 256)
|
||||
return get_latin1_char((unsigned char)u[0]);
|
||||
if (size == 1) {
|
||||
Py_UCS4 ch = u[0];
|
||||
if (ch < 256)
|
||||
return get_latin1_char((unsigned char)ch);
|
||||
|
||||
res = PyUnicode_New(1, ch);
|
||||
if (res == NULL)
|
||||
return NULL;
|
||||
PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch);
|
||||
assert(_PyUnicode_CheckConsistency(res, 1));
|
||||
return res;
|
||||
}
|
||||
|
||||
max_char = ucs4lib_find_max_char(u, u + size);
|
||||
res = PyUnicode_New(size, max_char);
|
||||
|
@ -11368,10 +11388,33 @@ unicode_find(PyObject *self, PyObject *args)
|
|||
static PyObject *
|
||||
unicode_getitem(PyObject *self, Py_ssize_t index)
|
||||
{
|
||||
Py_UCS4 ch = PyUnicode_ReadChar(self, index);
|
||||
if (ch == (Py_UCS4)-1)
|
||||
void *data;
|
||||
enum PyUnicode_Kind kind;
|
||||
Py_UCS4 ch;
|
||||
PyObject *res;
|
||||
|
||||
if (!PyUnicode_Check(self) || PyUnicode_READY(self) == -1) {
|
||||
PyErr_BadArgument();
|
||||
return NULL;
|
||||
return PyUnicode_FromOrdinal(ch);
|
||||
}
|
||||
if (index < 0 || index >= PyUnicode_GET_LENGTH(self)) {
|
||||
PyErr_SetString(PyExc_IndexError, "string index out of range");
|
||||
return NULL;
|
||||
}
|
||||
kind = PyUnicode_KIND(self);
|
||||
data = PyUnicode_DATA(self);
|
||||
ch = PyUnicode_READ(kind, data, index);
|
||||
if (ch < 256)
|
||||
return get_latin1_char(ch);
|
||||
|
||||
res = PyUnicode_New(1, ch);
|
||||
if (res == NULL)
|
||||
return NULL;
|
||||
kind = PyUnicode_KIND(res);
|
||||
data = PyUnicode_DATA(res);
|
||||
PyUnicode_WRITE(kind, data, 0, ch);
|
||||
assert(_PyUnicode_CheckConsistency(res, 1));
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Believe it or not, this produces the same value for ASCII strings
|
||||
|
@ -12039,7 +12082,6 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
|
|||
}
|
||||
|
||||
if (PyUnicode_IS_ASCII(self)) {
|
||||
kind = PyUnicode_KIND(self);
|
||||
data = PyUnicode_1BYTE_DATA(self);
|
||||
return unicode_fromascii(data + start, length);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue