Issue #22493: Inline flags now should be used only at the start of the

regular expression.  Deprecation warning is emitted if uses them in the
middle of the regular expression.
This commit is contained in:
Serhiy Storchaka 2016-09-11 12:50:02 +03:00
parent 352601ca00
commit bd48d27944
12 changed files with 58 additions and 34 deletions

View File

@ -224,12 +224,8 @@ The special characters are:
flags are described in :ref:`contents-of-module-re`.) This
is useful if you wish to include the flags as part of the regular
expression, instead of passing a *flag* argument to the
:func:`re.compile` function.
Note that the ``(?x)`` flag changes how the expression is parsed. It should be
used first in the expression string, or after one or more whitespace characters.
If there are non-whitespace characters before the flag, the results are
undefined.
:func:`re.compile` function. Flags should be used first in the
expression string.
``(?:...)``
A non-capturing version of regular parentheses. Matches whatever regular

View File

@ -1124,6 +1124,15 @@ Deprecated features
that will not be for several Python releases. (Contributed by Emanuel Barry
in :issue:`27364`.)
* Inline flags ``(?letters)`` now should be used only at the start of the
regular expression. Inline flags in the middle of the regular expression
affects global flags in Python :mod:`re` module. This is an exception to
other regular expression engines that either apply flags to only part of
the regular expression or treat them as an error. To avoid distinguishing
inline flags in the middle of the regular expression now emit a deprecation
warning. It will be an error in future Python releases.
(Contributed by Serhiy Storchaka in :issue:`22493`.)
Deprecated Python behavior
--------------------------

View File

@ -302,21 +302,26 @@ def translate_pattern(pattern, anchor=1, prefix=None, is_regex=0):
else:
return pattern
# ditch start and end characters
start, _, end = glob_to_re('_').partition('_')
if pattern:
pattern_re = glob_to_re(pattern)
assert pattern_re.startswith(start) and pattern_re.endswith(end)
else:
pattern_re = ''
if prefix is not None:
# ditch end of pattern character
empty_pattern = glob_to_re('')
prefix_re = glob_to_re(prefix)[:-len(empty_pattern)]
prefix_re = glob_to_re(prefix)
assert prefix_re.startswith(start) and prefix_re.endswith(end)
prefix_re = prefix_re[len(start): len(prefix_re) - len(end)]
sep = os.sep
if os.sep == '\\':
sep = r'\\'
pattern_re = "^" + sep.join((prefix_re, ".*" + pattern_re))
pattern_re = pattern_re[len(start): len(pattern_re) - len(end)]
pattern_re = r'%s\A%s%s.*%s%s' % (start, prefix_re, sep, pattern_re, end)
else: # no prefix -- respect anchor flag
if anchor:
pattern_re = "^" + pattern_re
pattern_re = r'%s\A%s' % (start, pattern_re[len(start):])
return re.compile(pattern_re)

View File

@ -51,14 +51,14 @@ class FileListTestCase(support.LoggingSilencer,
for glob, regex in (
# simple cases
('foo*', r'foo[^%(sep)s]*\Z(?ms)'),
('foo?', r'foo[^%(sep)s]\Z(?ms)'),
('foo??', r'foo[^%(sep)s][^%(sep)s]\Z(?ms)'),
('foo*', r'(?s:foo[^%(sep)s]*)\Z'),
('foo?', r'(?s:foo[^%(sep)s])\Z'),
('foo??', r'(?s:foo[^%(sep)s][^%(sep)s])\Z'),
# special cases
(r'foo\\*', r'foo\\\\[^%(sep)s]*\Z(?ms)'),
(r'foo\\\*', r'foo\\\\\\[^%(sep)s]*\Z(?ms)'),
('foo????', r'foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s]\Z(?ms)'),
(r'foo\\??', r'foo\\\\[^%(sep)s][^%(sep)s]\Z(?ms)')):
(r'foo\\*', r'(?s:foo\\\\[^%(sep)s]*)\Z'),
(r'foo\\\*', r'(?s:foo\\\\\\[^%(sep)s]*)\Z'),
('foo????', r'(?s:foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s])\Z'),
(r'foo\\??', r'(?s:foo\\\\[^%(sep)s][^%(sep)s])\Z')):
regex = regex % {'sep': sep}
self.assertEqual(glob_to_re(glob), regex)

View File

@ -106,4 +106,4 @@ def translate(pat):
res = '%s[%s]' % (res, stuff)
else:
res = res + re.escape(c)
return res + r'\Z(?ms)'
return r'(?s:%s)\Z' % res

View File

@ -458,7 +458,6 @@ class Morsel(dict):
_LegalKeyChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
_LegalValueChars = _LegalKeyChars + r'\[\]'
_CookiePattern = re.compile(r"""
(?x) # This is a verbose pattern
\s* # Optional whitespace at start of cookie
(?P<key> # Start of group 'key'
[""" + _LegalKeyChars + r"""]+? # Any word of at least one letter
@ -475,7 +474,7 @@ _CookiePattern = re.compile(r"""
)? # End of optional value group
\s* # Any number of spaces.
(\s+|;|$) # Ending either at space, semicolon, or EOS.
""", re.ASCII) # May be removed if safe.
""", re.ASCII | re.VERBOSE) # re.ASCII may be removed if safe.
# At long last, here is the cookie class. Using this class is almost just like

View File

@ -279,6 +279,9 @@ class Tokenizer:
break
result += c
return result
@property
def pos(self):
return self.index - len(self.next or '')
def tell(self):
return self.index - len(self.next or '')
def seek(self, index):
@ -727,8 +730,13 @@ def _parse(source, state, verbose):
state.checklookbehindgroup(condgroup, source)
elif char in FLAGS or char == "-":
# flags
pos = source.pos
flags = _parse_flags(source, state, char)
if flags is None: # global flags
if pos != 3: # "(?x"
import warnings
warnings.warn('Flags not at the start of the expression',
DeprecationWarning, stacklevel=7)
continue
add_flags, del_flags = flags
group = None

View File

@ -106,8 +106,8 @@ tests = [
('a.*b', 'acc\nccb', FAIL),
('a.{4,5}b', 'acc\nccb', FAIL),
('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'),
('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
('(?s)a.*b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
@ -563,7 +563,7 @@ tests = [
# Check odd placement of embedded pattern modifiers
# not an error under PCRE/PRE:
('w(?i)', 'W', SUCCEED, 'found', 'W'),
('(?i)w', 'W', SUCCEED, 'found', 'W'),
# ('w(?i)', 'W', SYNTAX_ERROR),
# Comments using the x embedded pattern modifier

View File

@ -62,14 +62,14 @@ class FnmatchTestCase(unittest.TestCase):
class TranslateTestCase(unittest.TestCase):
def test_translate(self):
self.assertEqual(translate('*'), r'.*\Z(?ms)')
self.assertEqual(translate('?'), r'.\Z(?ms)')
self.assertEqual(translate('a?b*'), r'a.b.*\Z(?ms)')
self.assertEqual(translate('[abc]'), r'[abc]\Z(?ms)')
self.assertEqual(translate('[]]'), r'[]]\Z(?ms)')
self.assertEqual(translate('[!x]'), r'[^x]\Z(?ms)')
self.assertEqual(translate('[^x]'), r'[\^x]\Z(?ms)')
self.assertEqual(translate('[x'), r'\[x\Z(?ms)')
self.assertEqual(translate('*'), r'(?s:.*)\Z')
self.assertEqual(translate('?'), r'(?s:.)\Z')
self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z')
self.assertEqual(translate('[abc]'), r'(?s:[abc])\Z')
self.assertEqual(translate('[]]'), r'(?s:[]])\Z')
self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z')
self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z')
self.assertEqual(translate('[x'), r'(?s:\[x)\Z')
class FilterTestCase(unittest.TestCase):

View File

@ -158,7 +158,7 @@ class PyclbrTest(TestCase):
cm('cgi', ignore=('log',)) # set with = in module
cm('pickle', ignore=('partial',))
cm('aifc', ignore=('openfp', '_aifc_params')) # set with = in module
cm('sre_parse', ignore=('dump', 'groups')) # from sre_constants import *; property
cm('sre_parse', ignore=('dump', 'groups', 'pos')) # from sre_constants import *; property
cm('pdb')
cm('pydoc')

View File

@ -1279,6 +1279,9 @@ class ReTests(unittest.TestCase):
self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char))
self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match(upper_char + '(?i)', lower_char))
def test_dollar_matches_twice(self):
"$ matches the end of string, and just before the terminating \n"
pattern = re.compile('$')

View File

@ -143,6 +143,10 @@ Core and Builtins
Library
-------
- Issue #22493: Inline flags now should be used only at the start of the
regular expression. Deprecation warning is emitted if uses them in the
middle of the regular expression.
- Issue #26885: xmlrpc now supports unmarshalling additional data types used
by Apache XML-RPC implementation for numerics and None.