From 50b2261bdac98303087287b24eef96abd45a82f9 Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Sun, 31 Jul 2022 12:14:53 +0900 Subject: [PATCH] gh-91146: Reduce allocation size of list from str.split()/rsplit() (gh-95473) --- ...2-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst | 2 + Objects/unicodeobject.c | 39 ++++++++++--------- 2 files changed, 22 insertions(+), 19 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst new file mode 100644 index 00000000000..52568dbedd1 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst @@ -0,0 +1,2 @@ +Reduce allocation size of :class:`list` from :meth:`str.split` +and :meth:`str.rsplit`. Patch by Dong-hee Na. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ad16ada16fe..355d74fe3bb 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9696,40 +9696,40 @@ split(PyObject *self, const void *buf1, *buf2; Py_ssize_t len1, len2; PyObject* out; - - if (maxcount < 0) - maxcount = PY_SSIZE_T_MAX; + len1 = PyUnicode_GET_LENGTH(self); + kind1 = PyUnicode_KIND(self); + if (maxcount < 0) { + maxcount = len1; + } if (substring == NULL) - switch (PyUnicode_KIND(self)) { + switch (kind1) { case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(self)) return asciilib_split_whitespace( self, PyUnicode_1BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); else return ucs1lib_split_whitespace( self, PyUnicode_1BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); case PyUnicode_2BYTE_KIND: return ucs2lib_split_whitespace( self, PyUnicode_2BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); case PyUnicode_4BYTE_KIND: return ucs4lib_split_whitespace( self, PyUnicode_4BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); default: Py_UNREACHABLE(); } - kind1 = PyUnicode_KIND(self); kind2 = PyUnicode_KIND(substring); - len1 = PyUnicode_GET_LENGTH(self); len2 = PyUnicode_GET_LENGTH(substring); if (kind1 < kind2 || len1 < len2) { out = PyList_New(1); @@ -9783,39 +9783,40 @@ rsplit(PyObject *self, Py_ssize_t len1, len2; PyObject* out; - if (maxcount < 0) - maxcount = PY_SSIZE_T_MAX; + len1 = PyUnicode_GET_LENGTH(self); + kind1 = PyUnicode_KIND(self); + if (maxcount < 0) { + maxcount = len1; + } if (substring == NULL) - switch (PyUnicode_KIND(self)) { + switch (kind1) { case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(self)) return asciilib_rsplit_whitespace( self, PyUnicode_1BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); else return ucs1lib_rsplit_whitespace( self, PyUnicode_1BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); case PyUnicode_2BYTE_KIND: return ucs2lib_rsplit_whitespace( self, PyUnicode_2BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); case PyUnicode_4BYTE_KIND: return ucs4lib_rsplit_whitespace( self, PyUnicode_4BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); default: Py_UNREACHABLE(); } - kind1 = PyUnicode_KIND(self); kind2 = PyUnicode_KIND(substring); - len1 = PyUnicode_GET_LENGTH(self); len2 = PyUnicode_GET_LENGTH(substring); if (kind1 < kind2 || len1 < len2) { out = PyList_New(1);