Issue #22493: Inline flags now should be used only at the start of the
regular expression. Deprecation warning is emitted if uses them in the middle of the regular expression.
This commit is contained in:
parent
352601ca00
commit
bd48d27944
|
@ -224,12 +224,8 @@ The special characters are:
|
|||
flags are described in :ref:`contents-of-module-re`.) This
|
||||
is useful if you wish to include the flags as part of the regular
|
||||
expression, instead of passing a *flag* argument to the
|
||||
:func:`re.compile` function.
|
||||
|
||||
Note that the ``(?x)`` flag changes how the expression is parsed. It should be
|
||||
used first in the expression string, or after one or more whitespace characters.
|
||||
If there are non-whitespace characters before the flag, the results are
|
||||
undefined.
|
||||
:func:`re.compile` function. Flags should be used first in the
|
||||
expression string.
|
||||
|
||||
``(?:...)``
|
||||
A non-capturing version of regular parentheses. Matches whatever regular
|
||||
|
|
|
@ -1124,6 +1124,15 @@ Deprecated features
|
|||
that will not be for several Python releases. (Contributed by Emanuel Barry
|
||||
in :issue:`27364`.)
|
||||
|
||||
* Inline flags ``(?letters)`` now should be used only at the start of the
|
||||
regular expression. Inline flags in the middle of the regular expression
|
||||
affects global flags in Python :mod:`re` module. This is an exception to
|
||||
other regular expression engines that either apply flags to only part of
|
||||
the regular expression or treat them as an error. To avoid distinguishing
|
||||
inline flags in the middle of the regular expression now emit a deprecation
|
||||
warning. It will be an error in future Python releases.
|
||||
(Contributed by Serhiy Storchaka in :issue:`22493`.)
|
||||
|
||||
|
||||
Deprecated Python behavior
|
||||
--------------------------
|
||||
|
|
|
@ -302,21 +302,26 @@ def translate_pattern(pattern, anchor=1, prefix=None, is_regex=0):
|
|||
else:
|
||||
return pattern
|
||||
|
||||
# ditch start and end characters
|
||||
start, _, end = glob_to_re('_').partition('_')
|
||||
|
||||
if pattern:
|
||||
pattern_re = glob_to_re(pattern)
|
||||
assert pattern_re.startswith(start) and pattern_re.endswith(end)
|
||||
else:
|
||||
pattern_re = ''
|
||||
|
||||
if prefix is not None:
|
||||
# ditch end of pattern character
|
||||
empty_pattern = glob_to_re('')
|
||||
prefix_re = glob_to_re(prefix)[:-len(empty_pattern)]
|
||||
prefix_re = glob_to_re(prefix)
|
||||
assert prefix_re.startswith(start) and prefix_re.endswith(end)
|
||||
prefix_re = prefix_re[len(start): len(prefix_re) - len(end)]
|
||||
sep = os.sep
|
||||
if os.sep == '\\':
|
||||
sep = r'\\'
|
||||
pattern_re = "^" + sep.join((prefix_re, ".*" + pattern_re))
|
||||
pattern_re = pattern_re[len(start): len(pattern_re) - len(end)]
|
||||
pattern_re = r'%s\A%s%s.*%s%s' % (start, prefix_re, sep, pattern_re, end)
|
||||
else: # no prefix -- respect anchor flag
|
||||
if anchor:
|
||||
pattern_re = "^" + pattern_re
|
||||
pattern_re = r'%s\A%s' % (start, pattern_re[len(start):])
|
||||
|
||||
return re.compile(pattern_re)
|
||||
|
|
|
@ -51,14 +51,14 @@ class FileListTestCase(support.LoggingSilencer,
|
|||
|
||||
for glob, regex in (
|
||||
# simple cases
|
||||
('foo*', r'foo[^%(sep)s]*\Z(?ms)'),
|
||||
('foo?', r'foo[^%(sep)s]\Z(?ms)'),
|
||||
('foo??', r'foo[^%(sep)s][^%(sep)s]\Z(?ms)'),
|
||||
('foo*', r'(?s:foo[^%(sep)s]*)\Z'),
|
||||
('foo?', r'(?s:foo[^%(sep)s])\Z'),
|
||||
('foo??', r'(?s:foo[^%(sep)s][^%(sep)s])\Z'),
|
||||
# special cases
|
||||
(r'foo\\*', r'foo\\\\[^%(sep)s]*\Z(?ms)'),
|
||||
(r'foo\\\*', r'foo\\\\\\[^%(sep)s]*\Z(?ms)'),
|
||||
('foo????', r'foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s]\Z(?ms)'),
|
||||
(r'foo\\??', r'foo\\\\[^%(sep)s][^%(sep)s]\Z(?ms)')):
|
||||
(r'foo\\*', r'(?s:foo\\\\[^%(sep)s]*)\Z'),
|
||||
(r'foo\\\*', r'(?s:foo\\\\\\[^%(sep)s]*)\Z'),
|
||||
('foo????', r'(?s:foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s])\Z'),
|
||||
(r'foo\\??', r'(?s:foo\\\\[^%(sep)s][^%(sep)s])\Z')):
|
||||
regex = regex % {'sep': sep}
|
||||
self.assertEqual(glob_to_re(glob), regex)
|
||||
|
||||
|
|
|
@ -106,4 +106,4 @@ def translate(pat):
|
|||
res = '%s[%s]' % (res, stuff)
|
||||
else:
|
||||
res = res + re.escape(c)
|
||||
return res + r'\Z(?ms)'
|
||||
return r'(?s:%s)\Z' % res
|
||||
|
|
|
@ -458,7 +458,6 @@ class Morsel(dict):
|
|||
_LegalKeyChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
|
||||
_LegalValueChars = _LegalKeyChars + r'\[\]'
|
||||
_CookiePattern = re.compile(r"""
|
||||
(?x) # This is a verbose pattern
|
||||
\s* # Optional whitespace at start of cookie
|
||||
(?P<key> # Start of group 'key'
|
||||
[""" + _LegalKeyChars + r"""]+? # Any word of at least one letter
|
||||
|
@ -475,7 +474,7 @@ _CookiePattern = re.compile(r"""
|
|||
)? # End of optional value group
|
||||
\s* # Any number of spaces.
|
||||
(\s+|;|$) # Ending either at space, semicolon, or EOS.
|
||||
""", re.ASCII) # May be removed if safe.
|
||||
""", re.ASCII | re.VERBOSE) # re.ASCII may be removed if safe.
|
||||
|
||||
|
||||
# At long last, here is the cookie class. Using this class is almost just like
|
||||
|
|
|
@ -279,6 +279,9 @@ class Tokenizer:
|
|||
break
|
||||
result += c
|
||||
return result
|
||||
@property
|
||||
def pos(self):
|
||||
return self.index - len(self.next or '')
|
||||
def tell(self):
|
||||
return self.index - len(self.next or '')
|
||||
def seek(self, index):
|
||||
|
@ -727,8 +730,13 @@ def _parse(source, state, verbose):
|
|||
state.checklookbehindgroup(condgroup, source)
|
||||
elif char in FLAGS or char == "-":
|
||||
# flags
|
||||
pos = source.pos
|
||||
flags = _parse_flags(source, state, char)
|
||||
if flags is None: # global flags
|
||||
if pos != 3: # "(?x"
|
||||
import warnings
|
||||
warnings.warn('Flags not at the start of the expression',
|
||||
DeprecationWarning, stacklevel=7)
|
||||
continue
|
||||
add_flags, del_flags = flags
|
||||
group = None
|
||||
|
|
|
@ -106,8 +106,8 @@ tests = [
|
|||
('a.*b', 'acc\nccb', FAIL),
|
||||
('a.{4,5}b', 'acc\nccb', FAIL),
|
||||
('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
|
||||
('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'),
|
||||
('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
|
||||
('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
|
||||
('(?s)a.*b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
|
||||
('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
|
||||
('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
|
||||
|
||||
|
@ -563,7 +563,7 @@ tests = [
|
|||
# Check odd placement of embedded pattern modifiers
|
||||
|
||||
# not an error under PCRE/PRE:
|
||||
('w(?i)', 'W', SUCCEED, 'found', 'W'),
|
||||
('(?i)w', 'W', SUCCEED, 'found', 'W'),
|
||||
# ('w(?i)', 'W', SYNTAX_ERROR),
|
||||
|
||||
# Comments using the x embedded pattern modifier
|
||||
|
|
|
@ -62,14 +62,14 @@ class FnmatchTestCase(unittest.TestCase):
|
|||
class TranslateTestCase(unittest.TestCase):
|
||||
|
||||
def test_translate(self):
|
||||
self.assertEqual(translate('*'), r'.*\Z(?ms)')
|
||||
self.assertEqual(translate('?'), r'.\Z(?ms)')
|
||||
self.assertEqual(translate('a?b*'), r'a.b.*\Z(?ms)')
|
||||
self.assertEqual(translate('[abc]'), r'[abc]\Z(?ms)')
|
||||
self.assertEqual(translate('[]]'), r'[]]\Z(?ms)')
|
||||
self.assertEqual(translate('[!x]'), r'[^x]\Z(?ms)')
|
||||
self.assertEqual(translate('[^x]'), r'[\^x]\Z(?ms)')
|
||||
self.assertEqual(translate('[x'), r'\[x\Z(?ms)')
|
||||
self.assertEqual(translate('*'), r'(?s:.*)\Z')
|
||||
self.assertEqual(translate('?'), r'(?s:.)\Z')
|
||||
self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z')
|
||||
self.assertEqual(translate('[abc]'), r'(?s:[abc])\Z')
|
||||
self.assertEqual(translate('[]]'), r'(?s:[]])\Z')
|
||||
self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z')
|
||||
self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z')
|
||||
self.assertEqual(translate('[x'), r'(?s:\[x)\Z')
|
||||
|
||||
|
||||
class FilterTestCase(unittest.TestCase):
|
||||
|
|
|
@ -158,7 +158,7 @@ class PyclbrTest(TestCase):
|
|||
cm('cgi', ignore=('log',)) # set with = in module
|
||||
cm('pickle', ignore=('partial',))
|
||||
cm('aifc', ignore=('openfp', '_aifc_params')) # set with = in module
|
||||
cm('sre_parse', ignore=('dump', 'groups')) # from sre_constants import *; property
|
||||
cm('sre_parse', ignore=('dump', 'groups', 'pos')) # from sre_constants import *; property
|
||||
cm('pdb')
|
||||
cm('pydoc')
|
||||
|
||||
|
|
|
@ -1279,6 +1279,9 @@ class ReTests(unittest.TestCase):
|
|||
self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char))
|
||||
self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char))
|
||||
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
self.assertTrue(re.match(upper_char + '(?i)', lower_char))
|
||||
|
||||
def test_dollar_matches_twice(self):
|
||||
"$ matches the end of string, and just before the terminating \n"
|
||||
pattern = re.compile('$')
|
||||
|
|
|
@ -143,6 +143,10 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #22493: Inline flags now should be used only at the start of the
|
||||
regular expression. Deprecation warning is emitted if uses them in the
|
||||
middle of the regular expression.
|
||||
|
||||
- Issue #26885: xmlrpc now supports unmarshalling additional data types used
|
||||
by Apache XML-RPC implementation for numerics and None.
|
||||
|
||||
|
|
Loading…
Reference in New Issue