From a81849b0315277bb3937271174aaaa5059c0b445 Mon Sep 17 00:00:00 2001 From: sweeneyde <36520290+sweeneyde@users.noreply.github.com> Date: Wed, 22 Apr 2020 17:05:48 -0400 Subject: [PATCH] bpo-39939: Add str.removeprefix and str.removesuffix (GH-18939) Added str.removeprefix and str.removesuffix methods and corresponding bytes, bytearray, and collections.UserString methods to remove affixes from a string if present. See PEP 616 for a full description. --- Doc/library/stdtypes.rst | 104 +++++++++++++++++- Doc/whatsnew/3.9.rst | 10 ++ Lib/collections/__init__.py | 8 ++ Lib/test/string_tests.py | 36 ++++++ Lib/test/test_doctest.py | 2 +- Misc/ACKS | 1 + .../2020-03-11-19-17-36.bpo-39939.NwCnAM.rst | 5 + Objects/bytearrayobject.c | 67 +++++++++++ Objects/bytesobject.c | 77 +++++++++++++ Objects/clinic/bytearrayobject.c.h | 82 +++++++++++++- Objects/clinic/bytesobject.c.h | 81 +++++++++++++- Objects/clinic/unicodeobject.c.h | 73 +++++++++++- Objects/unicodeobject.c | 57 ++++++++++ 13 files changed, 597 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2020-03-11-19-17-36.bpo-39939.NwCnAM.rst diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index a1364d472da..4e7729c83f4 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1549,6 +1549,33 @@ expression support in the :mod:`re` module). interpreted as in slice notation. +.. method:: str.removeprefix(prefix, /) + + If the string starts with the *prefix* string, return + ``string[len(prefix):]``. Otherwise, return a copy of the original + string:: + + >>> 'TestHook'.removeprefix('Test') + 'Hook' + >>> 'BaseTestCase'.removeprefix('Test') + 'BaseTestCase' + + .. versionadded:: 3.9 + +.. method:: str.removesuffix(suffix, /) + + If the string ends with the *suffix* string and that *suffix* is not empty, + return ``string[:-len(suffix)]``. Otherwise, return a copy of the + original string:: + + >>> 'MiscTests'.removesuffix('Tests') + 'Misc' + >>> 'TmpDirMixin'.removesuffix('Tests') + 'TmpDirMixin' + + .. versionadded:: 3.9 + + .. method:: str.encode(encoding="utf-8", errors="strict") Return an encoded version of the string as a bytes object. Default encoding @@ -1831,6 +1858,14 @@ expression support in the :mod:`re` module). >>> 'www.example.com'.lstrip('cmowz.') 'example.com' + See :meth:`str.removeprefix` for a method that will remove a single prefix + string rather than all of a set of characters. For example:: + + >>> 'Arthur: three!'.lstrip('Arthur: ') + 'ee!' + >>> 'Arthur: three!'.removeprefix('Arthur: ') + 'three!' + .. staticmethod:: str.maketrans(x[, y[, z]]) @@ -1911,6 +1946,13 @@ expression support in the :mod:`re` module). >>> 'mississippi'.rstrip('ipz') 'mississ' + See :meth:`str.removesuffix` for a method that will remove a single suffix + string rather than all of a set of characters. For example:: + + >>> 'Monty Python'.rstrip(' Python') + 'M' + >>> 'Monty Python'.removesuffix(' Python') + 'Monty' .. method:: str.split(sep=None, maxsplit=-1) @@ -2591,6 +2633,50 @@ arbitrary binary data. Also accept an integer in the range 0 to 255 as the subsequence. +.. method:: bytes.removeprefix(prefix, /) + bytearray.removeprefix(prefix, /) + + If the binary data starts with the *prefix* string, return + ``bytes[len(prefix):]``. Otherwise, return a copy of the original + binary data:: + + >>> b'TestHook'.removeprefix(b'Test') + b'Hook' + >>> b'BaseTestCase'.removeprefix(b'Test') + b'BaseTestCase' + + The *prefix* may be any :term:`bytes-like object`. + + .. note:: + + The bytearray version of this method does *not* operate in place - + it always produces a new object, even if no changes were made. + + .. versionadded:: 3.9 + + +.. method:: bytes.removesuffix(suffix, /) + bytearray.removesuffix(suffix, /) + + If the binary data ends with the *suffix* string and that *suffix* is + not empty, return ``bytes[:-len(suffix)]``. Otherwise, return a copy of + the original binary data:: + + >>> b'MiscTests'.removesuffix(b'Tests') + b'Misc' + >>> b'TmpDirMixin'.removesuffix(b'Tests') + b'TmpDirMixin' + + The *suffix* may be any :term:`bytes-like object`. + + .. note:: + + The bytearray version of this method does *not* operate in place - + it always produces a new object, even if no changes were made. + + .. versionadded:: 3.9 + + .. method:: bytes.decode(encoding="utf-8", errors="strict") bytearray.decode(encoding="utf-8", errors="strict") @@ -2841,7 +2927,14 @@ produce new objects. b'example.com' The binary sequence of byte values to remove may be any - :term:`bytes-like object`. + :term:`bytes-like object`. See :meth:`~bytes.removeprefix` for a method + that will remove a single prefix string rather than all of a set of + characters. For example:: + + >>> b'Arthur: three!'.lstrip(b'Arthur: ') + b'ee!' + >>> b'Arthur: three!'.removeprefix(b'Arthur: ') + b'three!' .. note:: @@ -2890,7 +2983,14 @@ produce new objects. b'mississ' The binary sequence of byte values to remove may be any - :term:`bytes-like object`. + :term:`bytes-like object`. See :meth:`~bytes.removesuffix` for a method + that will remove a single suffix string rather than all of a set of + characters. For example:: + + >>> b'Monty Python'.rstrip(b' Python') + b'M' + >>> b'Monty Python'.removesuffix(b' Python') + b'Monty' .. note:: diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index 8064785178c..ee851706055 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -105,6 +105,16 @@ Merge (``|``) and update (``|=``) operators have been added to the built-in :class:`dict` class. See :pep:`584` for a full description. (Contributed by Brandt Bucher in :issue:`36144`.) +PEP 616: New removeprefix() and removesuffix() string methods +------------------------------------------------------------- + +:meth:`str.removeprefix(prefix)` and +:meth:`str.removesuffix(suffix)` have been added +to easily remove an unneeded prefix or a suffix from a string. Corresponding +``bytes``, ``bytearray``, and ``collections.UserString`` methods have also been +added. See :pep:`616` for a full description. (Contributed by Dennis Sweeney in +:issue:`18939`.) + Other Language Changes ====================== diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index e19840650dd..bb9a605a994 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -1239,6 +1239,14 @@ class UserString(_collections_abc.Sequence): if isinstance(sub, UserString): sub = sub.data return self.data.count(sub, start, end) + def removeprefix(self, prefix, /): + if isinstance(prefix, UserString): + prefix = prefix.data + return self.__class__(self.data.removeprefix(prefix)) + def removesuffix(self, suffix, /): + if isinstance(suffix, UserString): + suffix = suffix.data + return self.__class__(self.data.removesuffix(suffix)) def encode(self, encoding='utf-8', errors='strict'): encoding = 'utf-8' if encoding is None else encoding errors = 'strict' if errors is None else errors diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 948e2a3c3c5..527f505c016 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -682,6 +682,42 @@ class BaseTest: self.checkraises(OverflowError, A2_16, "replace", "A", A2_16) self.checkraises(OverflowError, A2_16, "replace", "AA", A2_16+A2_16) + def test_removeprefix(self): + self.checkequal('am', 'spam', 'removeprefix', 'sp') + self.checkequal('spamspam', 'spamspamspam', 'removeprefix', 'spam') + self.checkequal('spam', 'spam', 'removeprefix', 'python') + self.checkequal('spam', 'spam', 'removeprefix', 'spider') + self.checkequal('spam', 'spam', 'removeprefix', 'spam and eggs') + + self.checkequal('', '', 'removeprefix', '') + self.checkequal('', '', 'removeprefix', 'abcde') + self.checkequal('abcde', 'abcde', 'removeprefix', '') + self.checkequal('', 'abcde', 'removeprefix', 'abcde') + + self.checkraises(TypeError, 'hello', 'removeprefix') + self.checkraises(TypeError, 'hello', 'removeprefix', 42) + self.checkraises(TypeError, 'hello', 'removeprefix', 42, 'h') + self.checkraises(TypeError, 'hello', 'removeprefix', 'h', 42) + self.checkraises(TypeError, 'hello', 'removeprefix', ("he", "l")) + + def test_removesuffix(self): + self.checkequal('sp', 'spam', 'removesuffix', 'am') + self.checkequal('spamspam', 'spamspamspam', 'removesuffix', 'spam') + self.checkequal('spam', 'spam', 'removesuffix', 'python') + self.checkequal('spam', 'spam', 'removesuffix', 'blam') + self.checkequal('spam', 'spam', 'removesuffix', 'eggs and spam') + + self.checkequal('', '', 'removesuffix', '') + self.checkequal('', '', 'removesuffix', 'abcde') + self.checkequal('abcde', 'abcde', 'removesuffix', '') + self.checkequal('', 'abcde', 'removesuffix', 'abcde') + + self.checkraises(TypeError, 'hello', 'removesuffix') + self.checkraises(TypeError, 'hello', 'removesuffix', 42) + self.checkraises(TypeError, 'hello', 'removesuffix', 42, 'h') + self.checkraises(TypeError, 'hello', 'removesuffix', 'h', 42) + self.checkraises(TypeError, 'hello', 'removesuffix', ("lo", "l")) + def test_capitalize(self): self.checkequal(' hello ', ' hello ', 'capitalize') self.checkequal('Hello ', 'Hello ','capitalize') diff --git a/Lib/test/test_doctest.py b/Lib/test/test_doctest.py index 71426277d2d..16196fed392 100644 --- a/Lib/test/test_doctest.py +++ b/Lib/test/test_doctest.py @@ -665,7 +665,7 @@ plain ol' Python and is guaranteed to be available. >>> import builtins >>> tests = doctest.DocTestFinder().find(builtins) - >>> 810 < len(tests) < 830 # approximate number of objects with docstrings + >>> 816 < len(tests) < 836 # approximate number of objects with docstrings True >>> real_tests = [t for t in tests if len(t.examples) > 0] >>> len(real_tests) # objects that actually have doctests diff --git a/Misc/ACKS b/Misc/ACKS index 8cb95dc0cf8..69865febeea 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1660,6 +1660,7 @@ Hisao Suzuki Kalle Svensson Andrew Svetlov Paul Swartz +Dennis Sweeney Al Sweigart Sviatoslav Sydorenko Thenault Sylvain diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-03-11-19-17-36.bpo-39939.NwCnAM.rst b/Misc/NEWS.d/next/Core and Builtins/2020-03-11-19-17-36.bpo-39939.NwCnAM.rst new file mode 100644 index 00000000000..bf094f1ce9b --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-03-11-19-17-36.bpo-39939.NwCnAM.rst @@ -0,0 +1,5 @@ +Added str.removeprefix and str.removesuffix methods and corresponding +bytes, bytearray, and collections.UserString methods to remove affixes +from a string if present. +See :pep:`616` for a full description. +Patch by Dennis Sweeney. diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index b271e57abb6..5a803be6277 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -1181,6 +1181,71 @@ bytearray_endswith(PyByteArrayObject *self, PyObject *args) return _Py_bytes_endswith(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args); } +/*[clinic input] +bytearray.removeprefix as bytearray_removeprefix + + prefix: Py_buffer + / + +Return a bytearray with the given prefix string removed if present. + +If the bytearray starts with the prefix string, return +bytearray[len(prefix):]. Otherwise, return a copy of the original +bytearray. +[clinic start generated code]*/ + +static PyObject * +bytearray_removeprefix_impl(PyByteArrayObject *self, Py_buffer *prefix) +/*[clinic end generated code: output=6cabc585e7f502e0 input=968aada38aedd262]*/ +{ + const char *self_start = PyByteArray_AS_STRING(self); + Py_ssize_t self_len = PyByteArray_GET_SIZE(self); + const char *prefix_start = prefix->buf; + Py_ssize_t prefix_len = prefix->len; + + if (self_len >= prefix_len + && memcmp(self_start, prefix_start, prefix_len) == 0) + { + return PyByteArray_FromStringAndSize(self_start + prefix_len, + self_len - prefix_len); + } + + return PyByteArray_FromStringAndSize(self_start, self_len); +} + +/*[clinic input] +bytearray.removesuffix as bytearray_removesuffix + + suffix: Py_buffer + / + +Return a bytearray with the given suffix string removed if present. + +If the bytearray ends with the suffix string and that suffix is not +empty, return bytearray[:-len(suffix)]. Otherwise, return a copy of +the original bytearray. +[clinic start generated code]*/ + +static PyObject * +bytearray_removesuffix_impl(PyByteArrayObject *self, Py_buffer *suffix) +/*[clinic end generated code: output=2bc8cfb79de793d3 input=c1827e810b2f6b99]*/ +{ + const char *self_start = PyByteArray_AS_STRING(self); + Py_ssize_t self_len = PyByteArray_GET_SIZE(self); + const char *suffix_start = suffix->buf; + Py_ssize_t suffix_len = suffix->len; + + if (self_len >= suffix_len + && memcmp(self_start + self_len - suffix_len, + suffix_start, suffix_len) == 0) + { + return PyByteArray_FromStringAndSize(self_start, + self_len - suffix_len); + } + + return PyByteArray_FromStringAndSize(self_start, self_len); +} + /*[clinic input] bytearray.translate @@ -2203,6 +2268,8 @@ bytearray_methods[] = { BYTEARRAY_POP_METHODDEF BYTEARRAY_REMOVE_METHODDEF BYTEARRAY_REPLACE_METHODDEF + BYTEARRAY_REMOVEPREFIX_METHODDEF + BYTEARRAY_REMOVESUFFIX_METHODDEF BYTEARRAY_REVERSE_METHODDEF {"rfind", (PyCFunction)bytearray_rfind, METH_VARARGS, _Py_rfind__doc__}, {"rindex", (PyCFunction)bytearray_rindex, METH_VARARGS, _Py_rindex__doc__}, diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 06ead2b58f9..25d9814dd6d 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2182,6 +2182,81 @@ bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new, /** End DALKE **/ +/*[clinic input] +bytes.removeprefix as bytes_removeprefix + + prefix: Py_buffer + / + +Return a bytes object with the given prefix string removed if present. + +If the bytes starts with the prefix string, return bytes[len(prefix):]. +Otherwise, return a copy of the original bytes. +[clinic start generated code]*/ + +static PyObject * +bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix) +/*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/ +{ + const char *self_start = PyBytes_AS_STRING(self); + Py_ssize_t self_len = PyBytes_GET_SIZE(self); + const char *prefix_start = prefix->buf; + Py_ssize_t prefix_len = prefix->len; + + if (self_len >= prefix_len + && prefix_len > 0 + && memcmp(self_start, prefix_start, prefix_len) == 0) + { + return PyBytes_FromStringAndSize(self_start + prefix_len, + self_len - prefix_len); + } + + if (PyBytes_CheckExact(self)) { + Py_INCREF(self); + return (PyObject *)self; + } + + return PyBytes_FromStringAndSize(self_start, self_len); +} + +/*[clinic input] +bytes.removesuffix as bytes_removesuffix + + suffix: Py_buffer + / + +Return a bytes object with the given suffix string removed if present. + +If the bytes ends with the suffix string and that suffix is not empty, +return bytes[:-len(prefix)]. Otherwise, return a copy of the original +bytes. +[clinic start generated code]*/ + +static PyObject * +bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix) +/*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/ +{ + const char *self_start = PyBytes_AS_STRING(self); + Py_ssize_t self_len = PyBytes_GET_SIZE(self); + const char *suffix_start = suffix->buf; + Py_ssize_t suffix_len = suffix->len; + + if (self_len >= suffix_len + && suffix_len > 0 + && memcmp(self_start + self_len - suffix_len, + suffix_start, suffix_len) == 0) + { + return PyBytes_FromStringAndSize(self_start, + self_len - suffix_len); + } + + if (PyBytes_CheckExact(self)) { + Py_INCREF(self); + return (PyObject *)self; + } + + return PyBytes_FromStringAndSize(self_start, self_len); +} static PyObject * bytes_startswith(PyBytesObject *self, PyObject *args) @@ -2421,6 +2496,8 @@ bytes_methods[] = { BYTES_MAKETRANS_METHODDEF BYTES_PARTITION_METHODDEF BYTES_REPLACE_METHODDEF + BYTES_REMOVEPREFIX_METHODDEF + BYTES_REMOVESUFFIX_METHODDEF {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__}, {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__}, STRINGLIB_RJUST_METHODDEF diff --git a/Objects/clinic/bytearrayobject.c.h b/Objects/clinic/bytearrayobject.c.h index 05577077a5f..35ba1ff3d57 100644 --- a/Objects/clinic/bytearrayobject.c.h +++ b/Objects/clinic/bytearrayobject.c.h @@ -38,6 +38,86 @@ bytearray_copy(PyByteArrayObject *self, PyObject *Py_UNUSED(ignored)) return bytearray_copy_impl(self); } +PyDoc_STRVAR(bytearray_removeprefix__doc__, +"removeprefix($self, prefix, /)\n" +"--\n" +"\n" +"Return a bytearray with the given prefix string removed if present.\n" +"\n" +"If the bytearray starts with the prefix string, return\n" +"bytearray[len(prefix):]. Otherwise, return a copy of the original\n" +"bytearray."); + +#define BYTEARRAY_REMOVEPREFIX_METHODDEF \ + {"removeprefix", (PyCFunction)bytearray_removeprefix, METH_O, bytearray_removeprefix__doc__}, + +static PyObject * +bytearray_removeprefix_impl(PyByteArrayObject *self, Py_buffer *prefix); + +static PyObject * +bytearray_removeprefix(PyByteArrayObject *self, PyObject *arg) +{ + PyObject *return_value = NULL; + Py_buffer prefix = {NULL, NULL}; + + if (PyObject_GetBuffer(arg, &prefix, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!PyBuffer_IsContiguous(&prefix, 'C')) { + _PyArg_BadArgument("removeprefix", "argument", "contiguous buffer", arg); + goto exit; + } + return_value = bytearray_removeprefix_impl(self, &prefix); + +exit: + /* Cleanup for prefix */ + if (prefix.obj) { + PyBuffer_Release(&prefix); + } + + return return_value; +} + +PyDoc_STRVAR(bytearray_removesuffix__doc__, +"removesuffix($self, suffix, /)\n" +"--\n" +"\n" +"Return a bytearray with the given suffix string removed if present.\n" +"\n" +"If the bytearray ends with the suffix string and that suffix is not\n" +"empty, return bytearray[:-len(suffix)]. Otherwise, return a copy of\n" +"the original bytearray."); + +#define BYTEARRAY_REMOVESUFFIX_METHODDEF \ + {"removesuffix", (PyCFunction)bytearray_removesuffix, METH_O, bytearray_removesuffix__doc__}, + +static PyObject * +bytearray_removesuffix_impl(PyByteArrayObject *self, Py_buffer *suffix); + +static PyObject * +bytearray_removesuffix(PyByteArrayObject *self, PyObject *arg) +{ + PyObject *return_value = NULL; + Py_buffer suffix = {NULL, NULL}; + + if (PyObject_GetBuffer(arg, &suffix, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!PyBuffer_IsContiguous(&suffix, 'C')) { + _PyArg_BadArgument("removesuffix", "argument", "contiguous buffer", arg); + goto exit; + } + return_value = bytearray_removesuffix_impl(self, &suffix); + +exit: + /* Cleanup for suffix */ + if (suffix.obj) { + PyBuffer_Release(&suffix); + } + + return return_value; +} + PyDoc_STRVAR(bytearray_translate__doc__, "translate($self, table, /, delete=b\'\')\n" "--\n" @@ -1011,4 +1091,4 @@ bytearray_sizeof(PyByteArrayObject *self, PyObject *Py_UNUSED(ignored)) { return bytearray_sizeof_impl(self); } -/*[clinic end generated code: output=508dce79cf2dffcc input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b2919f76709e48dc input=a9049054013a1b77]*/ diff --git a/Objects/clinic/bytesobject.c.h b/Objects/clinic/bytesobject.c.h index 22024ab155c..063a3777b49 100644 --- a/Objects/clinic/bytesobject.c.h +++ b/Objects/clinic/bytesobject.c.h @@ -526,6 +526,85 @@ exit: return return_value; } +PyDoc_STRVAR(bytes_removeprefix__doc__, +"removeprefix($self, prefix, /)\n" +"--\n" +"\n" +"Return a bytes object with the given prefix string removed if present.\n" +"\n" +"If the bytes starts with the prefix string, return bytes[len(prefix):].\n" +"Otherwise, return a copy of the original bytes."); + +#define BYTES_REMOVEPREFIX_METHODDEF \ + {"removeprefix", (PyCFunction)bytes_removeprefix, METH_O, bytes_removeprefix__doc__}, + +static PyObject * +bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix); + +static PyObject * +bytes_removeprefix(PyBytesObject *self, PyObject *arg) +{ + PyObject *return_value = NULL; + Py_buffer prefix = {NULL, NULL}; + + if (PyObject_GetBuffer(arg, &prefix, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!PyBuffer_IsContiguous(&prefix, 'C')) { + _PyArg_BadArgument("removeprefix", "argument", "contiguous buffer", arg); + goto exit; + } + return_value = bytes_removeprefix_impl(self, &prefix); + +exit: + /* Cleanup for prefix */ + if (prefix.obj) { + PyBuffer_Release(&prefix); + } + + return return_value; +} + +PyDoc_STRVAR(bytes_removesuffix__doc__, +"removesuffix($self, suffix, /)\n" +"--\n" +"\n" +"Return a bytes object with the given suffix string removed if present.\n" +"\n" +"If the bytes ends with the suffix string and that suffix is not empty,\n" +"return bytes[:-len(prefix)]. Otherwise, return a copy of the original\n" +"bytes."); + +#define BYTES_REMOVESUFFIX_METHODDEF \ + {"removesuffix", (PyCFunction)bytes_removesuffix, METH_O, bytes_removesuffix__doc__}, + +static PyObject * +bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix); + +static PyObject * +bytes_removesuffix(PyBytesObject *self, PyObject *arg) +{ + PyObject *return_value = NULL; + Py_buffer suffix = {NULL, NULL}; + + if (PyObject_GetBuffer(arg, &suffix, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!PyBuffer_IsContiguous(&suffix, 'C')) { + _PyArg_BadArgument("removesuffix", "argument", "contiguous buffer", arg); + goto exit; + } + return_value = bytes_removesuffix_impl(self, &suffix); + +exit: + /* Cleanup for suffix */ + if (suffix.obj) { + PyBuffer_Release(&suffix); + } + + return return_value; +} + PyDoc_STRVAR(bytes_decode__doc__, "decode($self, /, encoding=\'utf-8\', errors=\'strict\')\n" "--\n" @@ -755,4 +834,4 @@ skip_optional_pos: exit: return return_value; } -/*[clinic end generated code: output=ca60dfccf8d51e88 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=220388917d7bf751 input=a9049054013a1b77]*/ diff --git a/Objects/clinic/unicodeobject.c.h b/Objects/clinic/unicodeobject.c.h index 0d134064bab..cf81df4af67 100644 --- a/Objects/clinic/unicodeobject.c.h +++ b/Objects/clinic/unicodeobject.c.h @@ -754,6 +754,77 @@ exit: return return_value; } +PyDoc_STRVAR(unicode_removeprefix__doc__, +"removeprefix($self, prefix, /)\n" +"--\n" +"\n" +"Return a str with the given prefix string removed if present.\n" +"\n" +"If the string starts with the prefix string, return string[len(prefix):].\n" +"Otherwise, return a copy of the original string."); + +#define UNICODE_REMOVEPREFIX_METHODDEF \ + {"removeprefix", (PyCFunction)unicode_removeprefix, METH_O, unicode_removeprefix__doc__}, + +static PyObject * +unicode_removeprefix_impl(PyObject *self, PyObject *prefix); + +static PyObject * +unicode_removeprefix(PyObject *self, PyObject *arg) +{ + PyObject *return_value = NULL; + PyObject *prefix; + + if (!PyUnicode_Check(arg)) { + _PyArg_BadArgument("removeprefix", "argument", "str", arg); + goto exit; + } + if (PyUnicode_READY(arg) == -1) { + goto exit; + } + prefix = arg; + return_value = unicode_removeprefix_impl(self, prefix); + +exit: + return return_value; +} + +PyDoc_STRVAR(unicode_removesuffix__doc__, +"removesuffix($self, suffix, /)\n" +"--\n" +"\n" +"Return a str with the given suffix string removed if present.\n" +"\n" +"If the string ends with the suffix string and that suffix is not empty,\n" +"return string[:-len(suffix)]. Otherwise, return a copy of the original\n" +"string."); + +#define UNICODE_REMOVESUFFIX_METHODDEF \ + {"removesuffix", (PyCFunction)unicode_removesuffix, METH_O, unicode_removesuffix__doc__}, + +static PyObject * +unicode_removesuffix_impl(PyObject *self, PyObject *suffix); + +static PyObject * +unicode_removesuffix(PyObject *self, PyObject *arg) +{ + PyObject *return_value = NULL; + PyObject *suffix; + + if (!PyUnicode_Check(arg)) { + _PyArg_BadArgument("removesuffix", "argument", "str", arg); + goto exit; + } + if (PyUnicode_READY(arg) == -1) { + goto exit; + } + suffix = arg; + return_value = unicode_removesuffix_impl(self, suffix); + +exit: + return return_value; +} + PyDoc_STRVAR(unicode_rjust__doc__, "rjust($self, width, fillchar=\' \', /)\n" "--\n" @@ -1232,4 +1303,4 @@ unicode_sizeof(PyObject *self, PyObject *Py_UNUSED(ignored)) { return unicode_sizeof_impl(self); } -/*[clinic end generated code: output=e4ed33400979c7e8 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b91233f3722643be input=a9049054013a1b77]*/ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 51775df199d..aba7407533c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12789,6 +12789,61 @@ unicode_replace_impl(PyObject *self, PyObject *old, PyObject *new, return replace(self, old, new, count); } +/*[clinic input] +str.removeprefix as unicode_removeprefix + + prefix: unicode + / + +Return a str with the given prefix string removed if present. + +If the string starts with the prefix string, return string[len(prefix):]. +Otherwise, return a copy of the original string. +[clinic start generated code]*/ + +static PyObject * +unicode_removeprefix_impl(PyObject *self, PyObject *prefix) +/*[clinic end generated code: output=f1e5945e9763bcb9 input=27ec40b99a37eb88]*/ +{ + int match = tailmatch(self, prefix, 0, PY_SSIZE_T_MAX, -1); + if (match == -1) { + return NULL; + } + if (match) { + return PyUnicode_Substring(self, PyUnicode_GET_LENGTH(prefix), + PyUnicode_GET_LENGTH(self)); + } + return unicode_result_unchanged(self); +} + +/*[clinic input] +str.removesuffix as unicode_removesuffix + + suffix: unicode + / + +Return a str with the given suffix string removed if present. + +If the string ends with the suffix string and that suffix is not empty, +return string[:-len(suffix)]. Otherwise, return a copy of the original +string. +[clinic start generated code]*/ + +static PyObject * +unicode_removesuffix_impl(PyObject *self, PyObject *suffix) +/*[clinic end generated code: output=d36629e227636822 input=12cc32561e769be4]*/ +{ + int match = tailmatch(self, suffix, 0, PY_SSIZE_T_MAX, +1); + if (match == -1) { + return NULL; + } + if (match) { + return PyUnicode_Substring(self, 0, PyUnicode_GET_LENGTH(self) + - PyUnicode_GET_LENGTH(suffix)); + } + return unicode_result_unchanged(self); +} + static PyObject * unicode_repr(PyObject *unicode) { @@ -14105,6 +14160,8 @@ static PyMethodDef unicode_methods[] = { UNICODE_UPPER_METHODDEF {"startswith", (PyCFunction) unicode_startswith, METH_VARARGS, startswith__doc__}, {"endswith", (PyCFunction) unicode_endswith, METH_VARARGS, endswith__doc__}, + UNICODE_REMOVEPREFIX_METHODDEF + UNICODE_REMOVESUFFIX_METHODDEF UNICODE_ISASCII_METHODDEF UNICODE_ISLOWER_METHODDEF UNICODE_ISUPPER_METHODDEF