gh-91146: More reduce allocation size of list from str.split/rsplit (gh-95493)

Co-authored-by: Inada Naoki <songofacandy@gmail.com>
This commit is contained in:
Dong-hee Na 2022-08-01 22:15:07 +09:00 committed by GitHub
parent 347c783673
commit fb75d015f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 23 additions and 10 deletions

View File

@ -1,2 +1,2 @@
Reduce allocation size of :class:`list` from :meth:`str.split` Reduce allocation size of :class:`list` from :meth:`str.split`
and :meth:`str.rsplit`. Patch by Dong-hee Na. and :meth:`str.rsplit`. Patch by Dong-hee Na and Inada Naoki.

View File

@ -9698,11 +9698,11 @@ split(PyObject *self,
PyObject* out; PyObject* out;
len1 = PyUnicode_GET_LENGTH(self); len1 = PyUnicode_GET_LENGTH(self);
kind1 = PyUnicode_KIND(self); kind1 = PyUnicode_KIND(self);
if (maxcount < 0) {
maxcount = len1;
}
if (substring == NULL) if (substring == NULL) {
if (maxcount < 0) {
maxcount = (len1 - 1) / 2 + 1;
}
switch (kind1) { switch (kind1) {
case PyUnicode_1BYTE_KIND: case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(self)) if (PyUnicode_IS_ASCII(self))
@ -9728,9 +9728,16 @@ split(PyObject *self,
default: default:
Py_UNREACHABLE(); Py_UNREACHABLE();
} }
}
kind2 = PyUnicode_KIND(substring); kind2 = PyUnicode_KIND(substring);
len2 = PyUnicode_GET_LENGTH(substring); len2 = PyUnicode_GET_LENGTH(substring);
if (maxcount < 0) {
// if len2 == 0, it will raise ValueError.
maxcount = len2 == 0 ? 0 : (len1 / len2) + 1;
// handle expected overflow case: (Py_SSIZE_T_MAX / 1) + 1
maxcount = maxcount < 0 ? len1 : maxcount;
}
if (kind1 < kind2 || len1 < len2) { if (kind1 < kind2 || len1 < len2) {
out = PyList_New(1); out = PyList_New(1);
if (out == NULL) if (out == NULL)
@ -9785,11 +9792,11 @@ rsplit(PyObject *self,
len1 = PyUnicode_GET_LENGTH(self); len1 = PyUnicode_GET_LENGTH(self);
kind1 = PyUnicode_KIND(self); kind1 = PyUnicode_KIND(self);
if (maxcount < 0) {
maxcount = len1;
}
if (substring == NULL) if (substring == NULL) {
if (maxcount < 0) {
maxcount = (len1 - 1) / 2 + 1;
}
switch (kind1) { switch (kind1) {
case PyUnicode_1BYTE_KIND: case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(self)) if (PyUnicode_IS_ASCII(self))
@ -9815,9 +9822,15 @@ rsplit(PyObject *self,
default: default:
Py_UNREACHABLE(); Py_UNREACHABLE();
} }
}
kind2 = PyUnicode_KIND(substring); kind2 = PyUnicode_KIND(substring);
len2 = PyUnicode_GET_LENGTH(substring); len2 = PyUnicode_GET_LENGTH(substring);
if (maxcount < 0) {
// if len2 == 0, it will raise ValueError.
maxcount = len2 == 0 ? 0 : (len1 / len2) + 1;
// handle expected overflow case: (Py_SSIZE_T_MAX / 1) + 1
maxcount = maxcount < 0 ? len1 : maxcount;
}
if (kind1 < kind2 || len1 < len2) { if (kind1 < kind2 || len1 < len2) {
out = PyList_New(1); out = PyList_New(1);
if (out == NULL) if (out == NULL)