Issue #22493: Inline flags now should be used only at the start of the

regular expression.  Deprecation warning is emitted if uses them in the
middle of the regular expression.
This commit is contained in:
Serhiy Storchaka 2016-09-11 12:50:02 +03:00
parent 352601ca00
commit bd48d27944
12 changed files with 58 additions and 34 deletions

View File

@ -224,12 +224,8 @@ The special characters are:
flags are described in :ref:`contents-of-module-re`.) This flags are described in :ref:`contents-of-module-re`.) This
is useful if you wish to include the flags as part of the regular is useful if you wish to include the flags as part of the regular
expression, instead of passing a *flag* argument to the expression, instead of passing a *flag* argument to the
:func:`re.compile` function. :func:`re.compile` function. Flags should be used first in the
expression string.
Note that the ``(?x)`` flag changes how the expression is parsed. It should be
used first in the expression string, or after one or more whitespace characters.
If there are non-whitespace characters before the flag, the results are
undefined.
``(?:...)`` ``(?:...)``
A non-capturing version of regular parentheses. Matches whatever regular A non-capturing version of regular parentheses. Matches whatever regular

View File

@ -1124,6 +1124,15 @@ Deprecated features
that will not be for several Python releases. (Contributed by Emanuel Barry that will not be for several Python releases. (Contributed by Emanuel Barry
in :issue:`27364`.) in :issue:`27364`.)
* Inline flags ``(?letters)`` now should be used only at the start of the
regular expression. Inline flags in the middle of the regular expression
affects global flags in Python :mod:`re` module. This is an exception to
other regular expression engines that either apply flags to only part of
the regular expression or treat them as an error. To avoid distinguishing
inline flags in the middle of the regular expression now emit a deprecation
warning. It will be an error in future Python releases.
(Contributed by Serhiy Storchaka in :issue:`22493`.)
Deprecated Python behavior Deprecated Python behavior
-------------------------- --------------------------

View File

@ -302,21 +302,26 @@ def translate_pattern(pattern, anchor=1, prefix=None, is_regex=0):
else: else:
return pattern return pattern
# ditch start and end characters
start, _, end = glob_to_re('_').partition('_')
if pattern: if pattern:
pattern_re = glob_to_re(pattern) pattern_re = glob_to_re(pattern)
assert pattern_re.startswith(start) and pattern_re.endswith(end)
else: else:
pattern_re = '' pattern_re = ''
if prefix is not None: if prefix is not None:
# ditch end of pattern character prefix_re = glob_to_re(prefix)
empty_pattern = glob_to_re('') assert prefix_re.startswith(start) and prefix_re.endswith(end)
prefix_re = glob_to_re(prefix)[:-len(empty_pattern)] prefix_re = prefix_re[len(start): len(prefix_re) - len(end)]
sep = os.sep sep = os.sep
if os.sep == '\\': if os.sep == '\\':
sep = r'\\' sep = r'\\'
pattern_re = "^" + sep.join((prefix_re, ".*" + pattern_re)) pattern_re = pattern_re[len(start): len(pattern_re) - len(end)]
pattern_re = r'%s\A%s%s.*%s%s' % (start, prefix_re, sep, pattern_re, end)
else: # no prefix -- respect anchor flag else: # no prefix -- respect anchor flag
if anchor: if anchor:
pattern_re = "^" + pattern_re pattern_re = r'%s\A%s' % (start, pattern_re[len(start):])
return re.compile(pattern_re) return re.compile(pattern_re)

View File

@ -51,14 +51,14 @@ class FileListTestCase(support.LoggingSilencer,
for glob, regex in ( for glob, regex in (
# simple cases # simple cases
('foo*', r'foo[^%(sep)s]*\Z(?ms)'), ('foo*', r'(?s:foo[^%(sep)s]*)\Z'),
('foo?', r'foo[^%(sep)s]\Z(?ms)'), ('foo?', r'(?s:foo[^%(sep)s])\Z'),
('foo??', r'foo[^%(sep)s][^%(sep)s]\Z(?ms)'), ('foo??', r'(?s:foo[^%(sep)s][^%(sep)s])\Z'),
# special cases # special cases
(r'foo\\*', r'foo\\\\[^%(sep)s]*\Z(?ms)'), (r'foo\\*', r'(?s:foo\\\\[^%(sep)s]*)\Z'),
(r'foo\\\*', r'foo\\\\\\[^%(sep)s]*\Z(?ms)'), (r'foo\\\*', r'(?s:foo\\\\\\[^%(sep)s]*)\Z'),
('foo????', r'foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s]\Z(?ms)'), ('foo????', r'(?s:foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s])\Z'),
(r'foo\\??', r'foo\\\\[^%(sep)s][^%(sep)s]\Z(?ms)')): (r'foo\\??', r'(?s:foo\\\\[^%(sep)s][^%(sep)s])\Z')):
regex = regex % {'sep': sep} regex = regex % {'sep': sep}
self.assertEqual(glob_to_re(glob), regex) self.assertEqual(glob_to_re(glob), regex)

View File

@ -106,4 +106,4 @@ def translate(pat):
res = '%s[%s]' % (res, stuff) res = '%s[%s]' % (res, stuff)
else: else:
res = res + re.escape(c) res = res + re.escape(c)
return res + r'\Z(?ms)' return r'(?s:%s)\Z' % res

View File

@ -458,7 +458,6 @@ class Morsel(dict):
_LegalKeyChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=" _LegalKeyChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
_LegalValueChars = _LegalKeyChars + r'\[\]' _LegalValueChars = _LegalKeyChars + r'\[\]'
_CookiePattern = re.compile(r""" _CookiePattern = re.compile(r"""
(?x) # This is a verbose pattern
\s* # Optional whitespace at start of cookie \s* # Optional whitespace at start of cookie
(?P<key> # Start of group 'key' (?P<key> # Start of group 'key'
[""" + _LegalKeyChars + r"""]+? # Any word of at least one letter [""" + _LegalKeyChars + r"""]+? # Any word of at least one letter
@ -475,7 +474,7 @@ _CookiePattern = re.compile(r"""
)? # End of optional value group )? # End of optional value group
\s* # Any number of spaces. \s* # Any number of spaces.
(\s+|;|$) # Ending either at space, semicolon, or EOS. (\s+|;|$) # Ending either at space, semicolon, or EOS.
""", re.ASCII) # May be removed if safe. """, re.ASCII | re.VERBOSE) # re.ASCII may be removed if safe.
# At long last, here is the cookie class. Using this class is almost just like # At long last, here is the cookie class. Using this class is almost just like

View File

@ -279,6 +279,9 @@ class Tokenizer:
break break
result += c result += c
return result return result
@property
def pos(self):
return self.index - len(self.next or '')
def tell(self): def tell(self):
return self.index - len(self.next or '') return self.index - len(self.next or '')
def seek(self, index): def seek(self, index):
@ -727,8 +730,13 @@ def _parse(source, state, verbose):
state.checklookbehindgroup(condgroup, source) state.checklookbehindgroup(condgroup, source)
elif char in FLAGS or char == "-": elif char in FLAGS or char == "-":
# flags # flags
pos = source.pos
flags = _parse_flags(source, state, char) flags = _parse_flags(source, state, char)
if flags is None: # global flags if flags is None: # global flags
if pos != 3: # "(?x"
import warnings
warnings.warn('Flags not at the start of the expression',
DeprecationWarning, stacklevel=7)
continue continue
add_flags, del_flags = flags add_flags, del_flags = flags
group = None group = None

View File

@ -106,8 +106,8 @@ tests = [
('a.*b', 'acc\nccb', FAIL), ('a.*b', 'acc\nccb', FAIL),
('a.{4,5}b', 'acc\nccb', FAIL), ('a.{4,5}b', 'acc\nccb', FAIL),
('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'), ('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'), ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'), ('(?s)a.*b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'), ('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'), ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
@ -563,7 +563,7 @@ tests = [
# Check odd placement of embedded pattern modifiers # Check odd placement of embedded pattern modifiers
# not an error under PCRE/PRE: # not an error under PCRE/PRE:
('w(?i)', 'W', SUCCEED, 'found', 'W'), ('(?i)w', 'W', SUCCEED, 'found', 'W'),
# ('w(?i)', 'W', SYNTAX_ERROR), # ('w(?i)', 'W', SYNTAX_ERROR),
# Comments using the x embedded pattern modifier # Comments using the x embedded pattern modifier
@ -627,7 +627,7 @@ xyzabc
# bug 114033: nothing to repeat # bug 114033: nothing to repeat
(r'(x?)?', 'x', SUCCEED, 'found', 'x'), (r'(x?)?', 'x', SUCCEED, 'found', 'x'),
# bug 115040: rescan if flags are modified inside pattern # bug 115040: rescan if flags are modified inside pattern
(r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'), (r'(?x) foo ', 'foo', SUCCEED, 'found', 'foo'),
# bug 115618: negative lookahead # bug 115618: negative lookahead
(r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'), (r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'),
# bug 116251: character class bug # bug 116251: character class bug

View File

@ -62,14 +62,14 @@ class FnmatchTestCase(unittest.TestCase):
class TranslateTestCase(unittest.TestCase): class TranslateTestCase(unittest.TestCase):
def test_translate(self): def test_translate(self):
self.assertEqual(translate('*'), r'.*\Z(?ms)') self.assertEqual(translate('*'), r'(?s:.*)\Z')
self.assertEqual(translate('?'), r'.\Z(?ms)') self.assertEqual(translate('?'), r'(?s:.)\Z')
self.assertEqual(translate('a?b*'), r'a.b.*\Z(?ms)') self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z')
self.assertEqual(translate('[abc]'), r'[abc]\Z(?ms)') self.assertEqual(translate('[abc]'), r'(?s:[abc])\Z')
self.assertEqual(translate('[]]'), r'[]]\Z(?ms)') self.assertEqual(translate('[]]'), r'(?s:[]])\Z')
self.assertEqual(translate('[!x]'), r'[^x]\Z(?ms)') self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z')
self.assertEqual(translate('[^x]'), r'[\^x]\Z(?ms)') self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z')
self.assertEqual(translate('[x'), r'\[x\Z(?ms)') self.assertEqual(translate('[x'), r'(?s:\[x)\Z')
class FilterTestCase(unittest.TestCase): class FilterTestCase(unittest.TestCase):

View File

@ -158,7 +158,7 @@ class PyclbrTest(TestCase):
cm('cgi', ignore=('log',)) # set with = in module cm('cgi', ignore=('log',)) # set with = in module
cm('pickle', ignore=('partial',)) cm('pickle', ignore=('partial',))
cm('aifc', ignore=('openfp', '_aifc_params')) # set with = in module cm('aifc', ignore=('openfp', '_aifc_params')) # set with = in module
cm('sre_parse', ignore=('dump', 'groups')) # from sre_constants import *; property cm('sre_parse', ignore=('dump', 'groups', 'pos')) # from sre_constants import *; property
cm('pdb') cm('pdb')
cm('pydoc') cm('pydoc')

View File

@ -1279,6 +1279,9 @@ class ReTests(unittest.TestCase):
self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char)) self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char))
self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char)) self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match(upper_char + '(?i)', lower_char))
def test_dollar_matches_twice(self): def test_dollar_matches_twice(self):
"$ matches the end of string, and just before the terminating \n" "$ matches the end of string, and just before the terminating \n"
pattern = re.compile('$') pattern = re.compile('$')

View File

@ -143,6 +143,10 @@ Core and Builtins
Library Library
------- -------
- Issue #22493: Inline flags now should be used only at the start of the
regular expression. Deprecation warning is emitted if uses them in the
middle of the regular expression.
- Issue #26885: xmlrpc now supports unmarshalling additional data types used - Issue #26885: xmlrpc now supports unmarshalling additional data types used
by Apache XML-RPC implementation for numerics and None. by Apache XML-RPC implementation for numerics and None.