mirror of https://github.com/python/cpython
gh-56166: Deprecate passing confusing positional arguments in re functions (#107778)
Deprecate passing optional arguments maxsplit, count and flags in module-level functions re.split(), re.sub() and re.subn() as positional. They should only be passed by keyword.
This commit is contained in:
parent
fb8fe377c4
commit
882cb79afa
|
@ -898,7 +898,7 @@ Functions
|
|||
['Words', 'words', 'words', '']
|
||||
>>> re.split(r'(\W+)', 'Words, words, words.')
|
||||
['Words', ', ', 'words', ', ', 'words', '.', '']
|
||||
>>> re.split(r'\W+', 'Words, words, words.', 1)
|
||||
>>> re.split(r'\W+', 'Words, words, words.', maxsplit=1)
|
||||
['Words', 'words, words.']
|
||||
>>> re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE)
|
||||
['0', '3', '9']
|
||||
|
@ -929,6 +929,11 @@ Functions
|
|||
.. versionchanged:: 3.7
|
||||
Added support of splitting on a pattern that could match an empty string.
|
||||
|
||||
.. deprecated:: 3.13
|
||||
Passing *maxsplit* and *flags* as positional arguments is deprecated.
|
||||
In future Python versions they will be
|
||||
:ref:`keyword-only parameters <keyword-only_parameter>`.
|
||||
|
||||
|
||||
.. function:: findall(pattern, string, flags=0)
|
||||
|
||||
|
@ -1027,8 +1032,6 @@ Functions
|
|||
.. versionchanged:: 3.7
|
||||
Unknown escapes in *repl* consisting of ``'\'`` and an ASCII letter
|
||||
now are errors.
|
||||
|
||||
.. versionchanged:: 3.7
|
||||
Empty matches for the pattern are replaced when adjacent to a previous
|
||||
non-empty match.
|
||||
|
||||
|
@ -1037,18 +1040,17 @@ Functions
|
|||
In :class:`bytes` replacement strings, group *name* can only contain bytes
|
||||
in the ASCII range (``b'\x00'``-``b'\x7f'``).
|
||||
|
||||
.. deprecated:: 3.13
|
||||
Passing *count* and *flags* as positional arguments is deprecated.
|
||||
In future Python versions they will be
|
||||
:ref:`keyword-only parameters <keyword-only_parameter>`.
|
||||
|
||||
|
||||
.. function:: subn(pattern, repl, string, count=0, flags=0)
|
||||
|
||||
Perform the same operation as :func:`sub`, but return a tuple ``(new_string,
|
||||
number_of_subs_made)``.
|
||||
|
||||
.. versionchanged:: 3.1
|
||||
Added the optional flags argument.
|
||||
|
||||
.. versionchanged:: 3.5
|
||||
Unmatched groups are replaced with an empty string.
|
||||
|
||||
|
||||
.. function:: escape(pattern)
|
||||
|
||||
|
@ -1656,7 +1658,7 @@ because the address has spaces, our splitting pattern, in it:
|
|||
.. doctest::
|
||||
:options: +NORMALIZE_WHITESPACE
|
||||
|
||||
>>> [re.split(":? ", entry, 3) for entry in entries]
|
||||
>>> [re.split(":? ", entry, maxsplit=3) for entry in entries]
|
||||
[['Ross', 'McFluff', '834.345.1254', '155 Elm Street'],
|
||||
['Ronald', 'Heathmore', '892.345.3428', '436 Finley Avenue'],
|
||||
['Frank', 'Burger', '925.541.7625', '662 South Dogwood Way'],
|
||||
|
@ -1669,7 +1671,7 @@ house number from the street name:
|
|||
.. doctest::
|
||||
:options: +NORMALIZE_WHITESPACE
|
||||
|
||||
>>> [re.split(":? ", entry, 4) for entry in entries]
|
||||
>>> [re.split(":? ", entry, maxsplit=4) for entry in entries]
|
||||
[['Ross', 'McFluff', '834.345.1254', '155', 'Elm Street'],
|
||||
['Ronald', 'Heathmore', '892.345.3428', '436', 'Finley Avenue'],
|
||||
['Frank', 'Burger', '925.541.7625', '662', 'South Dogwood Way'],
|
||||
|
|
|
@ -832,6 +832,13 @@ Porting to Python 3.13
|
|||
Deprecated
|
||||
----------
|
||||
|
||||
* Passing optional arguments *maxsplit*, *count* and *flags* in module-level
|
||||
functions :func:`re.split`, :func:`re.sub` and :func:`re.subn` as positional
|
||||
arguments is now deprecated.
|
||||
In future Python versions these parameters will be
|
||||
:ref:`keyword-only <keyword-only_parameter>`.
|
||||
(Contributed by Serhiy Storchaka in :gh:`56166`.)
|
||||
|
||||
* Deprecate the old ``Py_UNICODE`` and ``PY_UNICODE_TYPE`` types: use directly
|
||||
the :c:type:`wchar_t` type instead. Since Python 3.3, ``Py_UNICODE`` and
|
||||
``PY_UNICODE_TYPE`` are just aliases to :c:type:`wchar_t`.
|
||||
|
|
|
@ -175,16 +175,39 @@ def search(pattern, string, flags=0):
|
|||
a Match object, or None if no match was found."""
|
||||
return _compile(pattern, flags).search(string)
|
||||
|
||||
def sub(pattern, repl, string, count=0, flags=0):
|
||||
class _ZeroSentinel(int):
|
||||
pass
|
||||
_zero_sentinel = _ZeroSentinel()
|
||||
|
||||
def sub(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel):
|
||||
"""Return the string obtained by replacing the leftmost
|
||||
non-overlapping occurrences of the pattern in string by the
|
||||
replacement repl. repl can be either a string or a callable;
|
||||
if a string, backslash escapes in it are processed. If it is
|
||||
a callable, it's passed the Match object and must return
|
||||
a replacement string to be used."""
|
||||
return _compile(pattern, flags).sub(repl, string, count)
|
||||
if args:
|
||||
if count is not _zero_sentinel:
|
||||
raise TypeError("sub() got multiple values for argument 'count'")
|
||||
count, *args = args
|
||||
if args:
|
||||
if flags is not _zero_sentinel:
|
||||
raise TypeError("sub() got multiple values for argument 'flags'")
|
||||
flags, *args = args
|
||||
if args:
|
||||
raise TypeError("sub() takes from 3 to 5 positional arguments "
|
||||
"but %d were given" % (5 + len(args)))
|
||||
|
||||
def subn(pattern, repl, string, count=0, flags=0):
|
||||
import warnings
|
||||
warnings.warn(
|
||||
"'count' is passed as positional argument",
|
||||
DeprecationWarning, stacklevel=2
|
||||
)
|
||||
|
||||
return _compile(pattern, flags).sub(repl, string, count)
|
||||
sub.__text_signature__ = '(pattern, repl, string, count=0, flags=0)'
|
||||
|
||||
def subn(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel):
|
||||
"""Return a 2-tuple containing (new_string, number).
|
||||
new_string is the string obtained by replacing the leftmost
|
||||
non-overlapping occurrences of the pattern in the source
|
||||
|
@ -193,9 +216,28 @@ def subn(pattern, repl, string, count=0, flags=0):
|
|||
callable; if a string, backslash escapes in it are processed.
|
||||
If it is a callable, it's passed the Match object and must
|
||||
return a replacement string to be used."""
|
||||
return _compile(pattern, flags).subn(repl, string, count)
|
||||
if args:
|
||||
if count is not _zero_sentinel:
|
||||
raise TypeError("subn() got multiple values for argument 'count'")
|
||||
count, *args = args
|
||||
if args:
|
||||
if flags is not _zero_sentinel:
|
||||
raise TypeError("subn() got multiple values for argument 'flags'")
|
||||
flags, *args = args
|
||||
if args:
|
||||
raise TypeError("subn() takes from 3 to 5 positional arguments "
|
||||
"but %d were given" % (5 + len(args)))
|
||||
|
||||
def split(pattern, string, maxsplit=0, flags=0):
|
||||
import warnings
|
||||
warnings.warn(
|
||||
"'count' is passed as positional argument",
|
||||
DeprecationWarning, stacklevel=2
|
||||
)
|
||||
|
||||
return _compile(pattern, flags).subn(repl, string, count)
|
||||
subn.__text_signature__ = '(pattern, repl, string, count=0, flags=0)'
|
||||
|
||||
def split(pattern, string, *args, maxsplit=_zero_sentinel, flags=_zero_sentinel):
|
||||
"""Split the source string by the occurrences of the pattern,
|
||||
returning a list containing the resulting substrings. If
|
||||
capturing parentheses are used in pattern, then the text of all
|
||||
|
@ -203,7 +245,26 @@ def split(pattern, string, maxsplit=0, flags=0):
|
|||
list. If maxsplit is nonzero, at most maxsplit splits occur,
|
||||
and the remainder of the string is returned as the final element
|
||||
of the list."""
|
||||
if args:
|
||||
if maxsplit is not _zero_sentinel:
|
||||
raise TypeError("split() got multiple values for argument 'maxsplit'")
|
||||
maxsplit, *args = args
|
||||
if args:
|
||||
if flags is not _zero_sentinel:
|
||||
raise TypeError("split() got multiple values for argument 'flags'")
|
||||
flags, *args = args
|
||||
if args:
|
||||
raise TypeError("split() takes from 2 to 4 positional arguments "
|
||||
"but %d were given" % (4 + len(args)))
|
||||
|
||||
import warnings
|
||||
warnings.warn(
|
||||
"'maxsplit' is passed as positional argument",
|
||||
DeprecationWarning, stacklevel=2
|
||||
)
|
||||
|
||||
return _compile(pattern, flags).split(string, maxsplit)
|
||||
split.__text_signature__ = '(pattern, string, maxsplit=0, flags=0)'
|
||||
|
||||
def findall(pattern, string, flags=0):
|
||||
"""Return a list of all non-overlapping matches in the string.
|
||||
|
|
|
@ -127,8 +127,10 @@ class ReTests(unittest.TestCase):
|
|||
self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
|
||||
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
|
||||
'9.3 -3 24x100y')
|
||||
with self.assertWarns(DeprecationWarning) as w:
|
||||
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
|
||||
'9.3 -3 23x99y')
|
||||
self.assertEqual(w.filename, __file__)
|
||||
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3),
|
||||
'9.3 -3 23x99y')
|
||||
|
||||
|
@ -235,9 +237,42 @@ class ReTests(unittest.TestCase):
|
|||
|
||||
def test_qualified_re_sub(self):
|
||||
self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
|
||||
with self.assertWarns(DeprecationWarning) as w:
|
||||
self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
|
||||
self.assertEqual(w.filename, __file__)
|
||||
self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa')
|
||||
|
||||
with self.assertRaisesRegex(TypeError,
|
||||
r"sub\(\) got multiple values for argument 'count'"):
|
||||
re.sub('a', 'b', 'aaaaa', 1, count=1)
|
||||
with self.assertRaisesRegex(TypeError,
|
||||
r"sub\(\) got multiple values for argument 'flags'"):
|
||||
re.sub('a', 'b', 'aaaaa', 1, 0, flags=0)
|
||||
with self.assertRaisesRegex(TypeError,
|
||||
r"sub\(\) takes from 3 to 5 positional arguments but 6 "
|
||||
r"were given"):
|
||||
re.sub('a', 'b', 'aaaaa', 1, 0, 0)
|
||||
|
||||
def test_misuse_flags(self):
|
||||
with self.assertWarns(DeprecationWarning) as w:
|
||||
result = re.sub('a', 'b', 'aaaaa', re.I)
|
||||
self.assertEqual(result, re.sub('a', 'b', 'aaaaa', count=int(re.I)))
|
||||
self.assertEqual(str(w.warning),
|
||||
"'count' is passed as positional argument")
|
||||
self.assertEqual(w.filename, __file__)
|
||||
with self.assertWarns(DeprecationWarning) as w:
|
||||
result = re.subn("b*", "x", "xyz", re.I)
|
||||
self.assertEqual(result, re.subn("b*", "x", "xyz", count=int(re.I)))
|
||||
self.assertEqual(str(w.warning),
|
||||
"'count' is passed as positional argument")
|
||||
self.assertEqual(w.filename, __file__)
|
||||
with self.assertWarns(DeprecationWarning) as w:
|
||||
result = re.split(":", ":a:b::c", re.I)
|
||||
self.assertEqual(result, re.split(":", ":a:b::c", maxsplit=int(re.I)))
|
||||
self.assertEqual(str(w.warning),
|
||||
"'maxsplit' is passed as positional argument")
|
||||
self.assertEqual(w.filename, __file__)
|
||||
|
||||
def test_bug_114660(self):
|
||||
self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
|
||||
'hello there')
|
||||
|
@ -344,9 +379,22 @@ class ReTests(unittest.TestCase):
|
|||
self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
|
||||
self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
|
||||
self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
|
||||
with self.assertWarns(DeprecationWarning) as w:
|
||||
self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
|
||||
self.assertEqual(w.filename, __file__)
|
||||
self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2))
|
||||
|
||||
with self.assertRaisesRegex(TypeError,
|
||||
r"subn\(\) got multiple values for argument 'count'"):
|
||||
re.subn('a', 'b', 'aaaaa', 1, count=1)
|
||||
with self.assertRaisesRegex(TypeError,
|
||||
r"subn\(\) got multiple values for argument 'flags'"):
|
||||
re.subn('a', 'b', 'aaaaa', 1, 0, flags=0)
|
||||
with self.assertRaisesRegex(TypeError,
|
||||
r"subn\(\) takes from 3 to 5 positional arguments but 6 "
|
||||
r"were given"):
|
||||
re.subn('a', 'b', 'aaaaa', 1, 0, 0)
|
||||
|
||||
def test_re_split(self):
|
||||
for string in ":a:b::c", S(":a:b::c"):
|
||||
self.assertTypedEqual(re.split(":", string),
|
||||
|
@ -401,7 +449,9 @@ class ReTests(unittest.TestCase):
|
|||
self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
|
||||
|
||||
def test_qualified_re_split(self):
|
||||
with self.assertWarns(DeprecationWarning) as w:
|
||||
self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
|
||||
self.assertEqual(w.filename, __file__)
|
||||
self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c'])
|
||||
self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d'])
|
||||
self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2),
|
||||
|
@ -411,6 +461,17 @@ class ReTests(unittest.TestCase):
|
|||
self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2),
|
||||
['', ':', '', '', 'a:b::c'])
|
||||
|
||||
with self.assertRaisesRegex(TypeError,
|
||||
r"split\(\) got multiple values for argument 'maxsplit'"):
|
||||
re.split(":", ":a:b::c", 2, maxsplit=2)
|
||||
with self.assertRaisesRegex(TypeError,
|
||||
r"split\(\) got multiple values for argument 'flags'"):
|
||||
re.split(":", ":a:b::c", 2, 0, flags=0)
|
||||
with self.assertRaisesRegex(TypeError,
|
||||
r"split\(\) takes from 2 to 4 positional arguments but 5 "
|
||||
r"were given"):
|
||||
re.split(":", ":a:b::c", 2, 0, 0)
|
||||
|
||||
def test_re_findall(self):
|
||||
self.assertEqual(re.findall(":+", "abc"), [])
|
||||
for string in "a:b::c:::d", S("a:b::c:::d"):
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
Deprecate passing optional arguments *maxsplit*, *count* and *flags* in
|
||||
module-level functions :func:`re.split`, :func:`re.sub` and :func:`re.subn` as positional.
|
||||
They should only be passed by keyword.
|
Loading…
Reference in New Issue