From cda6b6d60d96e6f755da92deb5e4066839095791 Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Sun, 26 Feb 2012 09:39:55 +0200 Subject: [PATCH] #14081: The sep and maxsplit parameter to str.split, bytes.split, and bytearray.split may now be passed as keyword arguments. --- Doc/library/stdtypes.rst | 4 ++-- Lib/test/string_tests.py | 24 +++++++++++++++++++++++- Lib/test/test_bytes.py | 16 ++++++++++++++++ Misc/NEWS | 3 +++ Objects/bytearrayobject.c | 20 ++++++++++++-------- Objects/bytesobject.c | 20 ++++++++++++-------- Objects/unicodeobject.c | 20 ++++++++++++-------- 7 files changed, 80 insertions(+), 27 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 183b2f78fc1..be065958d49 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1301,7 +1301,7 @@ functions based on regular expressions. two empty strings, followed by the string itself. -.. method:: str.rsplit([sep[, maxsplit]]) +.. method:: str.rsplit(sep=None, maxsplit=-1) Return a list of the words in the string, using *sep* as the delimiter string. If *maxsplit* is given, at most *maxsplit* splits are done, the *rightmost* @@ -1323,7 +1323,7 @@ functions based on regular expressions. 'mississ' -.. method:: str.split([sep[, maxsplit]]) +.. method:: str.split(sep=None, maxsplit=-1) Return a list of the words in the string, using *sep* as the delimiter string. If *maxsplit* is given, at most *maxsplit* splits are done (thus, diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index feeb4ce52d5..b7246ebf39c 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -56,7 +56,7 @@ class BaseTest(unittest.TestCase): result = self.fixtype(result) obj = self.fixtype(obj) args = self.fixtype(args) - kwargs = self.fixtype(kwargs) + kwargs = {k: self.fixtype(v) for k,v in kwargs.items()} realresult = getattr(obj, methodname)(*args, **kwargs) self.assertEqual( result, @@ -389,6 +389,17 @@ class BaseTest(unittest.TestCase): self.checkequal(['a']*18 + ['aBLAHa'], ('aBLAH'*20)[:-4], 'split', 'BLAH', 18) + # with keyword args + self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', sep='|') + self.checkequal(['a', 'b|c|d'], + 'a|b|c|d', 'split', '|', maxsplit=1) + self.checkequal(['a', 'b|c|d'], + 'a|b|c|d', 'split', sep='|', maxsplit=1) + self.checkequal(['a', 'b|c|d'], + 'a|b|c|d', 'split', maxsplit=1, sep='|') + self.checkequal(['a', 'b c d'], + 'a b c d', 'split', maxsplit=1) + # argument type self.checkraises(TypeError, 'hello', 'split', 42, 42, 42) @@ -446,6 +457,17 @@ class BaseTest(unittest.TestCase): self.checkequal(['aBLAHa'] + ['a']*18, ('aBLAH'*20)[:-4], 'rsplit', 'BLAH', 18) + # with keyword args + self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', sep='|') + self.checkequal(['a|b|c', 'd'], + 'a|b|c|d', 'rsplit', '|', maxsplit=1) + self.checkequal(['a|b|c', 'd'], + 'a|b|c|d', 'rsplit', sep='|', maxsplit=1) + self.checkequal(['a|b|c', 'd'], + 'a|b|c|d', 'rsplit', maxsplit=1, sep='|') + self.checkequal(['a b c', 'd'], + 'a b c d', 'rsplit', maxsplit=1) + # argument type self.checkraises(TypeError, 'hello', 'rsplit', 42, 42, 42) diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index bfb88de3e69..203fc5c49df 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -435,6 +435,14 @@ class BaseBytesTest(unittest.TestCase): self.assertEqual(b.split(b'i'), [b'm', b'ss', b'ss', b'pp', b'']) self.assertEqual(b.split(b'ss'), [b'mi', b'i', b'ippi']) self.assertEqual(b.split(b'w'), [b]) + # with keyword args + b = self.type2test(b'a|b|c|d') + self.assertEqual(b.split(sep=b'|'), [b'a', b'b', b'c', b'd']) + self.assertEqual(b.split(b'|', maxsplit=1), [b'a', b'b|c|d']) + self.assertEqual(b.split(sep=b'|', maxsplit=1), [b'a', b'b|c|d']) + self.assertEqual(b.split(maxsplit=1, sep=b'|'), [b'a', b'b|c|d']) + b = self.type2test(b'a b c d') + self.assertEqual(b.split(maxsplit=1), [b'a', b'b c d']) def test_split_whitespace(self): for b in (b' arf barf ', b'arf\tbarf', b'arf\nbarf', b'arf\rbarf', @@ -463,6 +471,14 @@ class BaseBytesTest(unittest.TestCase): self.assertEqual(b.rsplit(b'i'), [b'm', b'ss', b'ss', b'pp', b'']) self.assertEqual(b.rsplit(b'ss'), [b'mi', b'i', b'ippi']) self.assertEqual(b.rsplit(b'w'), [b]) + # with keyword args + b = self.type2test(b'a|b|c|d') + self.assertEqual(b.rsplit(sep=b'|'), [b'a', b'b', b'c', b'd']) + self.assertEqual(b.rsplit(b'|', maxsplit=1), [b'a|b|c', b'd']) + self.assertEqual(b.rsplit(sep=b'|', maxsplit=1), [b'a|b|c', b'd']) + self.assertEqual(b.rsplit(maxsplit=1, sep=b'|'), [b'a|b|c', b'd']) + b = self.type2test(b'a b c d') + self.assertEqual(b.rsplit(maxsplit=1), [b'a b c', b'd']) def test_rsplit_whitespace(self): for b in (b' arf barf ', b'arf\tbarf', b'arf\nbarf', b'arf\rbarf', diff --git a/Misc/NEWS b/Misc/NEWS index 35652037443..bc9fe5d5dbf 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -223,6 +223,9 @@ Core and Builtins - PEP 393: flexible string representation. Thanks to Torsten Becker for the initial implementation, and Victor Stinner for various bug fixes. +- Issue #14081: The 'sep' and 'maxsplit' parameter to str.split, bytes.split, + and bytearray.split may now be passed as keyword arguments. + - Issue #13012: The 'keepends' parameter to str.splitlines may now be passed as a keyword argument: "my_string.splitlines(keepends=True)". The same change also applies to bytes.splitlines and bytearray.splitlines. diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 25718582ac4..1b88f121a3f 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -2039,7 +2039,7 @@ bytearray_replace(PyByteArrayObject *self, PyObject *args) } PyDoc_STRVAR(split__doc__, -"B.split([sep[, maxsplit]]) -> list of bytearrays\n\ +"B.split(sep=None, maxsplit=-1) -> list of bytearrays\n\ \n\ Return a list of the sections in B, using sep as the delimiter.\n\ If sep is not given, B is split on ASCII whitespace characters\n\ @@ -2047,15 +2047,17 @@ If sep is not given, B is split on ASCII whitespace characters\n\ If maxsplit is given, at most maxsplit splits are done."); static PyObject * -bytearray_split(PyByteArrayObject *self, PyObject *args) +bytearray_split(PyByteArrayObject *self, PyObject *args, PyObject *kwds) { + static char *kwlist[] = {"sep", "maxsplit", 0}; Py_ssize_t len = PyByteArray_GET_SIZE(self), n; Py_ssize_t maxsplit = -1; const char *s = PyByteArray_AS_STRING(self), *sub; PyObject *list, *subobj = Py_None; Py_buffer vsub; - if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split", + kwlist, &subobj, &maxsplit)) return NULL; if (maxsplit < 0) maxsplit = PY_SSIZE_T_MAX; @@ -2131,7 +2133,7 @@ bytearray_rpartition(PyByteArrayObject *self, PyObject *sep_obj) } PyDoc_STRVAR(rsplit__doc__, -"B.rsplit(sep[, maxsplit]) -> list of bytearrays\n\ +"B.rsplit(sep=None, maxsplit=-1) -> list of bytearrays\n\ \n\ Return a list of the sections in B, using sep as the delimiter,\n\ starting at the end of B and working to the front.\n\ @@ -2140,15 +2142,17 @@ If sep is not given, B is split on ASCII whitespace characters\n\ If maxsplit is given, at most maxsplit splits are done."); static PyObject * -bytearray_rsplit(PyByteArrayObject *self, PyObject *args) +bytearray_rsplit(PyByteArrayObject *self, PyObject *args, PyObject *kwds) { + static char *kwlist[] = {"sep", "maxsplit", 0}; Py_ssize_t len = PyByteArray_GET_SIZE(self), n; Py_ssize_t maxsplit = -1; const char *s = PyByteArray_AS_STRING(self), *sub; PyObject *list, *subobj = Py_None; Py_buffer vsub; - if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit", + kwlist, &subobj, &maxsplit)) return NULL; if (maxsplit < 0) maxsplit = PY_SSIZE_T_MAX; @@ -2869,9 +2873,9 @@ bytearray_methods[] = { {"rindex", (PyCFunction)bytearray_rindex, METH_VARARGS, rindex__doc__}, {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__}, {"rpartition", (PyCFunction)bytearray_rpartition, METH_O, rpartition__doc__}, - {"rsplit", (PyCFunction)bytearray_rsplit, METH_VARARGS, rsplit__doc__}, + {"rsplit", (PyCFunction)bytearray_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__}, {"rstrip", (PyCFunction)bytearray_rstrip, METH_VARARGS, rstrip__doc__}, - {"split", (PyCFunction)bytearray_split, METH_VARARGS, split__doc__}, + {"split", (PyCFunction)bytearray_split, METH_VARARGS | METH_KEYWORDS, split__doc__}, {"splitlines", (PyCFunction)bytearray_splitlines, METH_VARARGS | METH_KEYWORDS, splitlines__doc__}, {"startswith", (PyCFunction)bytearray_startswith, METH_VARARGS , diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index a98cdcff71e..1d2fed7030a 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -972,7 +972,7 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; #define STRIPNAME(i) (stripformat[i]+3) PyDoc_STRVAR(split__doc__, -"B.split([sep[, maxsplit]]) -> list of bytes\n\ +"B.split(sep=None, maxsplit=-1) -> list of bytes\n\ \n\ Return a list of the sections in B, using sep as the delimiter.\n\ If sep is not specified or is None, B is split on ASCII whitespace\n\ @@ -980,15 +980,17 @@ characters (space, tab, return, newline, formfeed, vertical tab).\n\ If maxsplit is given, at most maxsplit splits are done."); static PyObject * -bytes_split(PyBytesObject *self, PyObject *args) +bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds) { + static char *kwlist[] = {"sep", "maxsplit", 0}; Py_ssize_t len = PyBytes_GET_SIZE(self), n; Py_ssize_t maxsplit = -1; const char *s = PyBytes_AS_STRING(self), *sub; Py_buffer vsub; PyObject *list, *subobj = Py_None; - if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split", + kwlist, &subobj, &maxsplit)) return NULL; if (maxsplit < 0) maxsplit = PY_SSIZE_T_MAX; @@ -1060,7 +1062,7 @@ bytes_rpartition(PyBytesObject *self, PyObject *sep_obj) } PyDoc_STRVAR(rsplit__doc__, -"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\ +"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\ \n\ Return a list of the sections in B, using sep as the delimiter,\n\ starting at the end of B and working to the front.\n\ @@ -1070,15 +1072,17 @@ If maxsplit is given, at most maxsplit splits are done."); static PyObject * -bytes_rsplit(PyBytesObject *self, PyObject *args) +bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds) { + static char *kwlist[] = {"sep", "maxsplit", 0}; Py_ssize_t len = PyBytes_GET_SIZE(self), n; Py_ssize_t maxsplit = -1; const char *s = PyBytes_AS_STRING(self), *sub; Py_buffer vsub; PyObject *list, *subobj = Py_None; - if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit", + kwlist, &subobj, &maxsplit)) return NULL; if (maxsplit < 0) maxsplit = PY_SSIZE_T_MAX; @@ -2470,9 +2474,9 @@ bytes_methods[] = { {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__}, {"rpartition", (PyCFunction)bytes_rpartition, METH_O, rpartition__doc__}, - {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__}, + {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__}, {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__}, - {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__}, + {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__}, {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS, splitlines__doc__}, {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS, diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7cc6b1bd9df..a4dcdf61c21 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12499,7 +12499,7 @@ PyUnicode_Split(PyObject *s, PyObject *sep, Py_ssize_t maxsplit) } PyDoc_STRVAR(split__doc__, - "S.split([sep[, maxsplit]]) -> list of strings\n\ + "S.split(sep=None, maxsplit=-1) -> list of strings\n\ \n\ Return a list of the words in S, using sep as the\n\ delimiter string. If maxsplit is given, at most maxsplit\n\ @@ -12508,12 +12508,14 @@ whitespace string is a separator and empty strings are\n\ removed from the result."); static PyObject* -unicode_split(PyObject *self, PyObject *args) +unicode_split(PyObject *self, PyObject *args, PyObject *kwds) { + static char *kwlist[] = {"sep", "maxsplit", 0}; PyObject *substring = Py_None; Py_ssize_t maxcount = -1; - if (!PyArg_ParseTuple(args, "|On:split", &substring, &maxcount)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split", + kwlist, &substring, &maxcount)) return NULL; if (substring == Py_None) @@ -12722,7 +12724,7 @@ PyUnicode_RSplit(PyObject *s, PyObject *sep, Py_ssize_t maxsplit) } PyDoc_STRVAR(rsplit__doc__, - "S.rsplit([sep[, maxsplit]]) -> list of strings\n\ + "S.rsplit(sep=None, maxsplit=-1) -> list of strings\n\ \n\ Return a list of the words in S, using sep as the\n\ delimiter string, starting at the end of the string and\n\ @@ -12731,12 +12733,14 @@ splits are done. If sep is not specified, any whitespace string\n\ is a separator."); static PyObject* -unicode_rsplit(PyObject *self, PyObject *args) +unicode_rsplit(PyObject *self, PyObject *args, PyObject *kwds) { + static char *kwlist[] = {"sep", "maxsplit", 0}; PyObject *substring = Py_None; Py_ssize_t maxcount = -1; - if (!PyArg_ParseTuple(args, "|On:rsplit", &substring, &maxcount)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit", + kwlist, &substring, &maxcount)) return NULL; if (substring == Py_None) @@ -13167,8 +13171,8 @@ static PyMethodDef unicode_methods[] = { {"encode", (PyCFunction) unicode_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__}, {"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__}, - {"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__}, - {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__}, + {"split", (PyCFunction) unicode_split, METH_VARARGS | METH_KEYWORDS, split__doc__}, + {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__}, {"join", (PyCFunction) unicode_join, METH_O, join__doc__}, {"capitalize", (PyCFunction) unicode_capitalize, METH_NOARGS, capitalize__doc__}, {"casefold", (PyCFunction) unicode_casefold, METH_NOARGS, casefold__doc__},