Issue #22493: Inline flags now should be used only at the start of the
regular expression. Deprecation warning is emitted if uses them in the middle of the regular expression.
This commit is contained in:
parent
352601ca00
commit
bd48d27944
|
@ -224,12 +224,8 @@ The special characters are:
|
||||||
flags are described in :ref:`contents-of-module-re`.) This
|
flags are described in :ref:`contents-of-module-re`.) This
|
||||||
is useful if you wish to include the flags as part of the regular
|
is useful if you wish to include the flags as part of the regular
|
||||||
expression, instead of passing a *flag* argument to the
|
expression, instead of passing a *flag* argument to the
|
||||||
:func:`re.compile` function.
|
:func:`re.compile` function. Flags should be used first in the
|
||||||
|
expression string.
|
||||||
Note that the ``(?x)`` flag changes how the expression is parsed. It should be
|
|
||||||
used first in the expression string, or after one or more whitespace characters.
|
|
||||||
If there are non-whitespace characters before the flag, the results are
|
|
||||||
undefined.
|
|
||||||
|
|
||||||
``(?:...)``
|
``(?:...)``
|
||||||
A non-capturing version of regular parentheses. Matches whatever regular
|
A non-capturing version of regular parentheses. Matches whatever regular
|
||||||
|
|
|
@ -1124,6 +1124,15 @@ Deprecated features
|
||||||
that will not be for several Python releases. (Contributed by Emanuel Barry
|
that will not be for several Python releases. (Contributed by Emanuel Barry
|
||||||
in :issue:`27364`.)
|
in :issue:`27364`.)
|
||||||
|
|
||||||
|
* Inline flags ``(?letters)`` now should be used only at the start of the
|
||||||
|
regular expression. Inline flags in the middle of the regular expression
|
||||||
|
affects global flags in Python :mod:`re` module. This is an exception to
|
||||||
|
other regular expression engines that either apply flags to only part of
|
||||||
|
the regular expression or treat them as an error. To avoid distinguishing
|
||||||
|
inline flags in the middle of the regular expression now emit a deprecation
|
||||||
|
warning. It will be an error in future Python releases.
|
||||||
|
(Contributed by Serhiy Storchaka in :issue:`22493`.)
|
||||||
|
|
||||||
|
|
||||||
Deprecated Python behavior
|
Deprecated Python behavior
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
|
@ -302,21 +302,26 @@ def translate_pattern(pattern, anchor=1, prefix=None, is_regex=0):
|
||||||
else:
|
else:
|
||||||
return pattern
|
return pattern
|
||||||
|
|
||||||
|
# ditch start and end characters
|
||||||
|
start, _, end = glob_to_re('_').partition('_')
|
||||||
|
|
||||||
if pattern:
|
if pattern:
|
||||||
pattern_re = glob_to_re(pattern)
|
pattern_re = glob_to_re(pattern)
|
||||||
|
assert pattern_re.startswith(start) and pattern_re.endswith(end)
|
||||||
else:
|
else:
|
||||||
pattern_re = ''
|
pattern_re = ''
|
||||||
|
|
||||||
if prefix is not None:
|
if prefix is not None:
|
||||||
# ditch end of pattern character
|
prefix_re = glob_to_re(prefix)
|
||||||
empty_pattern = glob_to_re('')
|
assert prefix_re.startswith(start) and prefix_re.endswith(end)
|
||||||
prefix_re = glob_to_re(prefix)[:-len(empty_pattern)]
|
prefix_re = prefix_re[len(start): len(prefix_re) - len(end)]
|
||||||
sep = os.sep
|
sep = os.sep
|
||||||
if os.sep == '\\':
|
if os.sep == '\\':
|
||||||
sep = r'\\'
|
sep = r'\\'
|
||||||
pattern_re = "^" + sep.join((prefix_re, ".*" + pattern_re))
|
pattern_re = pattern_re[len(start): len(pattern_re) - len(end)]
|
||||||
|
pattern_re = r'%s\A%s%s.*%s%s' % (start, prefix_re, sep, pattern_re, end)
|
||||||
else: # no prefix -- respect anchor flag
|
else: # no prefix -- respect anchor flag
|
||||||
if anchor:
|
if anchor:
|
||||||
pattern_re = "^" + pattern_re
|
pattern_re = r'%s\A%s' % (start, pattern_re[len(start):])
|
||||||
|
|
||||||
return re.compile(pattern_re)
|
return re.compile(pattern_re)
|
||||||
|
|
|
@ -51,14 +51,14 @@ class FileListTestCase(support.LoggingSilencer,
|
||||||
|
|
||||||
for glob, regex in (
|
for glob, regex in (
|
||||||
# simple cases
|
# simple cases
|
||||||
('foo*', r'foo[^%(sep)s]*\Z(?ms)'),
|
('foo*', r'(?s:foo[^%(sep)s]*)\Z'),
|
||||||
('foo?', r'foo[^%(sep)s]\Z(?ms)'),
|
('foo?', r'(?s:foo[^%(sep)s])\Z'),
|
||||||
('foo??', r'foo[^%(sep)s][^%(sep)s]\Z(?ms)'),
|
('foo??', r'(?s:foo[^%(sep)s][^%(sep)s])\Z'),
|
||||||
# special cases
|
# special cases
|
||||||
(r'foo\\*', r'foo\\\\[^%(sep)s]*\Z(?ms)'),
|
(r'foo\\*', r'(?s:foo\\\\[^%(sep)s]*)\Z'),
|
||||||
(r'foo\\\*', r'foo\\\\\\[^%(sep)s]*\Z(?ms)'),
|
(r'foo\\\*', r'(?s:foo\\\\\\[^%(sep)s]*)\Z'),
|
||||||
('foo????', r'foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s]\Z(?ms)'),
|
('foo????', r'(?s:foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s])\Z'),
|
||||||
(r'foo\\??', r'foo\\\\[^%(sep)s][^%(sep)s]\Z(?ms)')):
|
(r'foo\\??', r'(?s:foo\\\\[^%(sep)s][^%(sep)s])\Z')):
|
||||||
regex = regex % {'sep': sep}
|
regex = regex % {'sep': sep}
|
||||||
self.assertEqual(glob_to_re(glob), regex)
|
self.assertEqual(glob_to_re(glob), regex)
|
||||||
|
|
||||||
|
|
|
@ -106,4 +106,4 @@ def translate(pat):
|
||||||
res = '%s[%s]' % (res, stuff)
|
res = '%s[%s]' % (res, stuff)
|
||||||
else:
|
else:
|
||||||
res = res + re.escape(c)
|
res = res + re.escape(c)
|
||||||
return res + r'\Z(?ms)'
|
return r'(?s:%s)\Z' % res
|
||||||
|
|
|
@ -458,7 +458,6 @@ class Morsel(dict):
|
||||||
_LegalKeyChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
|
_LegalKeyChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
|
||||||
_LegalValueChars = _LegalKeyChars + r'\[\]'
|
_LegalValueChars = _LegalKeyChars + r'\[\]'
|
||||||
_CookiePattern = re.compile(r"""
|
_CookiePattern = re.compile(r"""
|
||||||
(?x) # This is a verbose pattern
|
|
||||||
\s* # Optional whitespace at start of cookie
|
\s* # Optional whitespace at start of cookie
|
||||||
(?P<key> # Start of group 'key'
|
(?P<key> # Start of group 'key'
|
||||||
[""" + _LegalKeyChars + r"""]+? # Any word of at least one letter
|
[""" + _LegalKeyChars + r"""]+? # Any word of at least one letter
|
||||||
|
@ -475,7 +474,7 @@ _CookiePattern = re.compile(r"""
|
||||||
)? # End of optional value group
|
)? # End of optional value group
|
||||||
\s* # Any number of spaces.
|
\s* # Any number of spaces.
|
||||||
(\s+|;|$) # Ending either at space, semicolon, or EOS.
|
(\s+|;|$) # Ending either at space, semicolon, or EOS.
|
||||||
""", re.ASCII) # May be removed if safe.
|
""", re.ASCII | re.VERBOSE) # re.ASCII may be removed if safe.
|
||||||
|
|
||||||
|
|
||||||
# At long last, here is the cookie class. Using this class is almost just like
|
# At long last, here is the cookie class. Using this class is almost just like
|
||||||
|
|
|
@ -279,6 +279,9 @@ class Tokenizer:
|
||||||
break
|
break
|
||||||
result += c
|
result += c
|
||||||
return result
|
return result
|
||||||
|
@property
|
||||||
|
def pos(self):
|
||||||
|
return self.index - len(self.next or '')
|
||||||
def tell(self):
|
def tell(self):
|
||||||
return self.index - len(self.next or '')
|
return self.index - len(self.next or '')
|
||||||
def seek(self, index):
|
def seek(self, index):
|
||||||
|
@ -727,8 +730,13 @@ def _parse(source, state, verbose):
|
||||||
state.checklookbehindgroup(condgroup, source)
|
state.checklookbehindgroup(condgroup, source)
|
||||||
elif char in FLAGS or char == "-":
|
elif char in FLAGS or char == "-":
|
||||||
# flags
|
# flags
|
||||||
|
pos = source.pos
|
||||||
flags = _parse_flags(source, state, char)
|
flags = _parse_flags(source, state, char)
|
||||||
if flags is None: # global flags
|
if flags is None: # global flags
|
||||||
|
if pos != 3: # "(?x"
|
||||||
|
import warnings
|
||||||
|
warnings.warn('Flags not at the start of the expression',
|
||||||
|
DeprecationWarning, stacklevel=7)
|
||||||
continue
|
continue
|
||||||
add_flags, del_flags = flags
|
add_flags, del_flags = flags
|
||||||
group = None
|
group = None
|
||||||
|
|
|
@ -106,8 +106,8 @@ tests = [
|
||||||
('a.*b', 'acc\nccb', FAIL),
|
('a.*b', 'acc\nccb', FAIL),
|
||||||
('a.{4,5}b', 'acc\nccb', FAIL),
|
('a.{4,5}b', 'acc\nccb', FAIL),
|
||||||
('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
|
('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
|
||||||
('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'),
|
('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
|
||||||
('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
|
('(?s)a.*b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
|
||||||
('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
|
('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
|
||||||
('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
|
('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
|
||||||
|
|
||||||
|
@ -563,7 +563,7 @@ tests = [
|
||||||
# Check odd placement of embedded pattern modifiers
|
# Check odd placement of embedded pattern modifiers
|
||||||
|
|
||||||
# not an error under PCRE/PRE:
|
# not an error under PCRE/PRE:
|
||||||
('w(?i)', 'W', SUCCEED, 'found', 'W'),
|
('(?i)w', 'W', SUCCEED, 'found', 'W'),
|
||||||
# ('w(?i)', 'W', SYNTAX_ERROR),
|
# ('w(?i)', 'W', SYNTAX_ERROR),
|
||||||
|
|
||||||
# Comments using the x embedded pattern modifier
|
# Comments using the x embedded pattern modifier
|
||||||
|
@ -627,7 +627,7 @@ xyzabc
|
||||||
# bug 114033: nothing to repeat
|
# bug 114033: nothing to repeat
|
||||||
(r'(x?)?', 'x', SUCCEED, 'found', 'x'),
|
(r'(x?)?', 'x', SUCCEED, 'found', 'x'),
|
||||||
# bug 115040: rescan if flags are modified inside pattern
|
# bug 115040: rescan if flags are modified inside pattern
|
||||||
(r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'),
|
(r'(?x) foo ', 'foo', SUCCEED, 'found', 'foo'),
|
||||||
# bug 115618: negative lookahead
|
# bug 115618: negative lookahead
|
||||||
(r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'),
|
(r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'),
|
||||||
# bug 116251: character class bug
|
# bug 116251: character class bug
|
||||||
|
|
|
@ -62,14 +62,14 @@ class FnmatchTestCase(unittest.TestCase):
|
||||||
class TranslateTestCase(unittest.TestCase):
|
class TranslateTestCase(unittest.TestCase):
|
||||||
|
|
||||||
def test_translate(self):
|
def test_translate(self):
|
||||||
self.assertEqual(translate('*'), r'.*\Z(?ms)')
|
self.assertEqual(translate('*'), r'(?s:.*)\Z')
|
||||||
self.assertEqual(translate('?'), r'.\Z(?ms)')
|
self.assertEqual(translate('?'), r'(?s:.)\Z')
|
||||||
self.assertEqual(translate('a?b*'), r'a.b.*\Z(?ms)')
|
self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z')
|
||||||
self.assertEqual(translate('[abc]'), r'[abc]\Z(?ms)')
|
self.assertEqual(translate('[abc]'), r'(?s:[abc])\Z')
|
||||||
self.assertEqual(translate('[]]'), r'[]]\Z(?ms)')
|
self.assertEqual(translate('[]]'), r'(?s:[]])\Z')
|
||||||
self.assertEqual(translate('[!x]'), r'[^x]\Z(?ms)')
|
self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z')
|
||||||
self.assertEqual(translate('[^x]'), r'[\^x]\Z(?ms)')
|
self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z')
|
||||||
self.assertEqual(translate('[x'), r'\[x\Z(?ms)')
|
self.assertEqual(translate('[x'), r'(?s:\[x)\Z')
|
||||||
|
|
||||||
|
|
||||||
class FilterTestCase(unittest.TestCase):
|
class FilterTestCase(unittest.TestCase):
|
||||||
|
|
|
@ -158,7 +158,7 @@ class PyclbrTest(TestCase):
|
||||||
cm('cgi', ignore=('log',)) # set with = in module
|
cm('cgi', ignore=('log',)) # set with = in module
|
||||||
cm('pickle', ignore=('partial',))
|
cm('pickle', ignore=('partial',))
|
||||||
cm('aifc', ignore=('openfp', '_aifc_params')) # set with = in module
|
cm('aifc', ignore=('openfp', '_aifc_params')) # set with = in module
|
||||||
cm('sre_parse', ignore=('dump', 'groups')) # from sre_constants import *; property
|
cm('sre_parse', ignore=('dump', 'groups', 'pos')) # from sre_constants import *; property
|
||||||
cm('pdb')
|
cm('pdb')
|
||||||
cm('pydoc')
|
cm('pydoc')
|
||||||
|
|
||||||
|
|
|
@ -1279,6 +1279,9 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char))
|
self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char))
|
||||||
self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char))
|
self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char))
|
||||||
|
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
self.assertTrue(re.match(upper_char + '(?i)', lower_char))
|
||||||
|
|
||||||
def test_dollar_matches_twice(self):
|
def test_dollar_matches_twice(self):
|
||||||
"$ matches the end of string, and just before the terminating \n"
|
"$ matches the end of string, and just before the terminating \n"
|
||||||
pattern = re.compile('$')
|
pattern = re.compile('$')
|
||||||
|
|
|
@ -143,6 +143,10 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #22493: Inline flags now should be used only at the start of the
|
||||||
|
regular expression. Deprecation warning is emitted if uses them in the
|
||||||
|
middle of the regular expression.
|
||||||
|
|
||||||
- Issue #26885: xmlrpc now supports unmarshalling additional data types used
|
- Issue #26885: xmlrpc now supports unmarshalling additional data types used
|
||||||
by Apache XML-RPC implementation for numerics and None.
|
by Apache XML-RPC implementation for numerics and None.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue