gh-56166: Deprecate passing confusing positional arguments in re functions (#107778)

Deprecate passing optional arguments maxsplit, count and flags in
module-level functions re.split(), re.sub() and re.subn() as positional.
They should only be passed by keyword.
This commit is contained in:
Serhiy Storchaka 2023-08-16 23:35:35 +03:00 committed by GitHub
parent fb8fe377c4
commit 882cb79afa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 155 additions and 21 deletions

View File

@ -898,7 +898,7 @@ Functions
['Words', 'words', 'words', '']
>>> re.split(r'(\W+)', 'Words, words, words.')
['Words', ', ', 'words', ', ', 'words', '.', '']
>>> re.split(r'\W+', 'Words, words, words.', 1)
>>> re.split(r'\W+', 'Words, words, words.', maxsplit=1)
['Words', 'words, words.']
>>> re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE)
['0', '3', '9']
@ -929,6 +929,11 @@ Functions
.. versionchanged:: 3.7
Added support of splitting on a pattern that could match an empty string.
.. deprecated:: 3.13
Passing *maxsplit* and *flags* as positional arguments is deprecated.
In future Python versions they will be
:ref:`keyword-only parameters <keyword-only_parameter>`.
.. function:: findall(pattern, string, flags=0)
@ -1027,8 +1032,6 @@ Functions
.. versionchanged:: 3.7
Unknown escapes in *repl* consisting of ``'\'`` and an ASCII letter
now are errors.
.. versionchanged:: 3.7
Empty matches for the pattern are replaced when adjacent to a previous
non-empty match.
@ -1037,18 +1040,17 @@ Functions
In :class:`bytes` replacement strings, group *name* can only contain bytes
in the ASCII range (``b'\x00'``-``b'\x7f'``).
.. deprecated:: 3.13
Passing *count* and *flags* as positional arguments is deprecated.
In future Python versions they will be
:ref:`keyword-only parameters <keyword-only_parameter>`.
.. function:: subn(pattern, repl, string, count=0, flags=0)
Perform the same operation as :func:`sub`, but return a tuple ``(new_string,
number_of_subs_made)``.
.. versionchanged:: 3.1
Added the optional flags argument.
.. versionchanged:: 3.5
Unmatched groups are replaced with an empty string.
.. function:: escape(pattern)
@ -1656,7 +1658,7 @@ because the address has spaces, our splitting pattern, in it:
.. doctest::
:options: +NORMALIZE_WHITESPACE
>>> [re.split(":? ", entry, 3) for entry in entries]
>>> [re.split(":? ", entry, maxsplit=3) for entry in entries]
[['Ross', 'McFluff', '834.345.1254', '155 Elm Street'],
['Ronald', 'Heathmore', '892.345.3428', '436 Finley Avenue'],
['Frank', 'Burger', '925.541.7625', '662 South Dogwood Way'],
@ -1669,7 +1671,7 @@ house number from the street name:
.. doctest::
:options: +NORMALIZE_WHITESPACE
>>> [re.split(":? ", entry, 4) for entry in entries]
>>> [re.split(":? ", entry, maxsplit=4) for entry in entries]
[['Ross', 'McFluff', '834.345.1254', '155', 'Elm Street'],
['Ronald', 'Heathmore', '892.345.3428', '436', 'Finley Avenue'],
['Frank', 'Burger', '925.541.7625', '662', 'South Dogwood Way'],

View File

@ -832,6 +832,13 @@ Porting to Python 3.13
Deprecated
----------
* Passing optional arguments *maxsplit*, *count* and *flags* in module-level
functions :func:`re.split`, :func:`re.sub` and :func:`re.subn` as positional
arguments is now deprecated.
In future Python versions these parameters will be
:ref:`keyword-only <keyword-only_parameter>`.
(Contributed by Serhiy Storchaka in :gh:`56166`.)
* Deprecate the old ``Py_UNICODE`` and ``PY_UNICODE_TYPE`` types: use directly
the :c:type:`wchar_t` type instead. Since Python 3.3, ``Py_UNICODE`` and
``PY_UNICODE_TYPE`` are just aliases to :c:type:`wchar_t`.

View File

@ -175,16 +175,39 @@ def search(pattern, string, flags=0):
a Match object, or None if no match was found."""
return _compile(pattern, flags).search(string)
def sub(pattern, repl, string, count=0, flags=0):
class _ZeroSentinel(int):
pass
_zero_sentinel = _ZeroSentinel()
def sub(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel):
"""Return the string obtained by replacing the leftmost
non-overlapping occurrences of the pattern in string by the
replacement repl. repl can be either a string or a callable;
if a string, backslash escapes in it are processed. If it is
a callable, it's passed the Match object and must return
a replacement string to be used."""
return _compile(pattern, flags).sub(repl, string, count)
if args:
if count is not _zero_sentinel:
raise TypeError("sub() got multiple values for argument 'count'")
count, *args = args
if args:
if flags is not _zero_sentinel:
raise TypeError("sub() got multiple values for argument 'flags'")
flags, *args = args
if args:
raise TypeError("sub() takes from 3 to 5 positional arguments "
"but %d were given" % (5 + len(args)))
def subn(pattern, repl, string, count=0, flags=0):
import warnings
warnings.warn(
"'count' is passed as positional argument",
DeprecationWarning, stacklevel=2
)
return _compile(pattern, flags).sub(repl, string, count)
sub.__text_signature__ = '(pattern, repl, string, count=0, flags=0)'
def subn(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel):
"""Return a 2-tuple containing (new_string, number).
new_string is the string obtained by replacing the leftmost
non-overlapping occurrences of the pattern in the source
@ -193,9 +216,28 @@ def subn(pattern, repl, string, count=0, flags=0):
callable; if a string, backslash escapes in it are processed.
If it is a callable, it's passed the Match object and must
return a replacement string to be used."""
return _compile(pattern, flags).subn(repl, string, count)
if args:
if count is not _zero_sentinel:
raise TypeError("subn() got multiple values for argument 'count'")
count, *args = args
if args:
if flags is not _zero_sentinel:
raise TypeError("subn() got multiple values for argument 'flags'")
flags, *args = args
if args:
raise TypeError("subn() takes from 3 to 5 positional arguments "
"but %d were given" % (5 + len(args)))
def split(pattern, string, maxsplit=0, flags=0):
import warnings
warnings.warn(
"'count' is passed as positional argument",
DeprecationWarning, stacklevel=2
)
return _compile(pattern, flags).subn(repl, string, count)
subn.__text_signature__ = '(pattern, repl, string, count=0, flags=0)'
def split(pattern, string, *args, maxsplit=_zero_sentinel, flags=_zero_sentinel):
"""Split the source string by the occurrences of the pattern,
returning a list containing the resulting substrings. If
capturing parentheses are used in pattern, then the text of all
@ -203,7 +245,26 @@ def split(pattern, string, maxsplit=0, flags=0):
list. If maxsplit is nonzero, at most maxsplit splits occur,
and the remainder of the string is returned as the final element
of the list."""
if args:
if maxsplit is not _zero_sentinel:
raise TypeError("split() got multiple values for argument 'maxsplit'")
maxsplit, *args = args
if args:
if flags is not _zero_sentinel:
raise TypeError("split() got multiple values for argument 'flags'")
flags, *args = args
if args:
raise TypeError("split() takes from 2 to 4 positional arguments "
"but %d were given" % (4 + len(args)))
import warnings
warnings.warn(
"'maxsplit' is passed as positional argument",
DeprecationWarning, stacklevel=2
)
return _compile(pattern, flags).split(string, maxsplit)
split.__text_signature__ = '(pattern, string, maxsplit=0, flags=0)'
def findall(pattern, string, flags=0):
"""Return a list of all non-overlapping matches in the string.

View File

@ -127,8 +127,10 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
'9.3 -3 24x100y')
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
'9.3 -3 23x99y')
with self.assertWarns(DeprecationWarning) as w:
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
'9.3 -3 23x99y')
self.assertEqual(w.filename, __file__)
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3),
'9.3 -3 23x99y')
@ -235,9 +237,42 @@ class ReTests(unittest.TestCase):
def test_qualified_re_sub(self):
self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
with self.assertWarns(DeprecationWarning) as w:
self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
self.assertEqual(w.filename, __file__)
self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa')
with self.assertRaisesRegex(TypeError,
r"sub\(\) got multiple values for argument 'count'"):
re.sub('a', 'b', 'aaaaa', 1, count=1)
with self.assertRaisesRegex(TypeError,
r"sub\(\) got multiple values for argument 'flags'"):
re.sub('a', 'b', 'aaaaa', 1, 0, flags=0)
with self.assertRaisesRegex(TypeError,
r"sub\(\) takes from 3 to 5 positional arguments but 6 "
r"were given"):
re.sub('a', 'b', 'aaaaa', 1, 0, 0)
def test_misuse_flags(self):
with self.assertWarns(DeprecationWarning) as w:
result = re.sub('a', 'b', 'aaaaa', re.I)
self.assertEqual(result, re.sub('a', 'b', 'aaaaa', count=int(re.I)))
self.assertEqual(str(w.warning),
"'count' is passed as positional argument")
self.assertEqual(w.filename, __file__)
with self.assertWarns(DeprecationWarning) as w:
result = re.subn("b*", "x", "xyz", re.I)
self.assertEqual(result, re.subn("b*", "x", "xyz", count=int(re.I)))
self.assertEqual(str(w.warning),
"'count' is passed as positional argument")
self.assertEqual(w.filename, __file__)
with self.assertWarns(DeprecationWarning) as w:
result = re.split(":", ":a:b::c", re.I)
self.assertEqual(result, re.split(":", ":a:b::c", maxsplit=int(re.I)))
self.assertEqual(str(w.warning),
"'maxsplit' is passed as positional argument")
self.assertEqual(w.filename, __file__)
def test_bug_114660(self):
self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
'hello there')
@ -344,9 +379,22 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
with self.assertWarns(DeprecationWarning) as w:
self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
self.assertEqual(w.filename, __file__)
self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2))
with self.assertRaisesRegex(TypeError,
r"subn\(\) got multiple values for argument 'count'"):
re.subn('a', 'b', 'aaaaa', 1, count=1)
with self.assertRaisesRegex(TypeError,
r"subn\(\) got multiple values for argument 'flags'"):
re.subn('a', 'b', 'aaaaa', 1, 0, flags=0)
with self.assertRaisesRegex(TypeError,
r"subn\(\) takes from 3 to 5 positional arguments but 6 "
r"were given"):
re.subn('a', 'b', 'aaaaa', 1, 0, 0)
def test_re_split(self):
for string in ":a:b::c", S(":a:b::c"):
self.assertTypedEqual(re.split(":", string),
@ -401,7 +449,9 @@ class ReTests(unittest.TestCase):
self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
def test_qualified_re_split(self):
self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
with self.assertWarns(DeprecationWarning) as w:
self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
self.assertEqual(w.filename, __file__)
self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c'])
self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d'])
self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2),
@ -411,6 +461,17 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2),
['', ':', '', '', 'a:b::c'])
with self.assertRaisesRegex(TypeError,
r"split\(\) got multiple values for argument 'maxsplit'"):
re.split(":", ":a:b::c", 2, maxsplit=2)
with self.assertRaisesRegex(TypeError,
r"split\(\) got multiple values for argument 'flags'"):
re.split(":", ":a:b::c", 2, 0, flags=0)
with self.assertRaisesRegex(TypeError,
r"split\(\) takes from 2 to 4 positional arguments but 5 "
r"were given"):
re.split(":", ":a:b::c", 2, 0, 0)
def test_re_findall(self):
self.assertEqual(re.findall(":+", "abc"), [])
for string in "a:b::c:::d", S("a:b::c:::d"):

View File

@ -0,0 +1,3 @@
Deprecate passing optional arguments *maxsplit*, *count* and *flags* in
module-level functions :func:`re.split`, :func:`re.sub` and :func:`re.subn` as positional.
They should only be passed by keyword.