diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst new file mode 100644 index 00000000000..52568dbedd1 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst @@ -0,0 +1,2 @@ +Reduce allocation size of :class:`list` from :meth:`str.split` +and :meth:`str.rsplit`. Patch by Dong-hee Na. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ad16ada16fe..355d74fe3bb 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9696,40 +9696,40 @@ split(PyObject *self, const void *buf1, *buf2; Py_ssize_t len1, len2; PyObject* out; - - if (maxcount < 0) - maxcount = PY_SSIZE_T_MAX; + len1 = PyUnicode_GET_LENGTH(self); + kind1 = PyUnicode_KIND(self); + if (maxcount < 0) { + maxcount = len1; + } if (substring == NULL) - switch (PyUnicode_KIND(self)) { + switch (kind1) { case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(self)) return asciilib_split_whitespace( self, PyUnicode_1BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); else return ucs1lib_split_whitespace( self, PyUnicode_1BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); case PyUnicode_2BYTE_KIND: return ucs2lib_split_whitespace( self, PyUnicode_2BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); case PyUnicode_4BYTE_KIND: return ucs4lib_split_whitespace( self, PyUnicode_4BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); default: Py_UNREACHABLE(); } - kind1 = PyUnicode_KIND(self); kind2 = PyUnicode_KIND(substring); - len1 = PyUnicode_GET_LENGTH(self); len2 = PyUnicode_GET_LENGTH(substring); if (kind1 < kind2 || len1 < len2) { out = PyList_New(1); @@ -9783,39 +9783,40 @@ rsplit(PyObject *self, Py_ssize_t len1, len2; PyObject* out; - if (maxcount < 0) - maxcount = PY_SSIZE_T_MAX; + len1 = PyUnicode_GET_LENGTH(self); + kind1 = PyUnicode_KIND(self); + if (maxcount < 0) { + maxcount = len1; + } if (substring == NULL) - switch (PyUnicode_KIND(self)) { + switch (kind1) { case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(self)) return asciilib_rsplit_whitespace( self, PyUnicode_1BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); else return ucs1lib_rsplit_whitespace( self, PyUnicode_1BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); case PyUnicode_2BYTE_KIND: return ucs2lib_rsplit_whitespace( self, PyUnicode_2BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); case PyUnicode_4BYTE_KIND: return ucs4lib_rsplit_whitespace( self, PyUnicode_4BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); default: Py_UNREACHABLE(); } - kind1 = PyUnicode_KIND(self); kind2 = PyUnicode_KIND(substring); - len1 = PyUnicode_GET_LENGTH(self); len2 = PyUnicode_GET_LENGTH(substring); if (kind1 < kind2 || len1 < len2) { out = PyList_New(1);