bpo-30298: Weaken the condition of deprecation warnings for inline modifiers. (#1490)

Now allowed several subsequential inline modifiers at the start of the
pattern (e.g. '(?i)(?s)...').  In verbose mode whitespaces and comments
now are allowed before and between inline modifiers (e.g.
'(?x) (?i) (?s)...').
This commit is contained in:
Serhiy Storchaka 2017-05-10 06:05:20 +03:00 committed by GitHub
parent 211a392cc1
commit 305ccbe27e
3 changed files with 56 additions and 21 deletions

View File

@ -412,7 +412,7 @@ def _parse_sub(source, state, verbose, nested=True):
sourcematch = source.match
start = source.tell()
while True:
itemsappend(_parse(source, state, verbose))
itemsappend(_parse(source, state, verbose, not nested and not items))
if not sourcematch("|"):
break
@ -466,7 +466,7 @@ def _parse_sub_cond(source, state, condgroup, verbose):
subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
return subpattern
def _parse(source, state, verbose):
def _parse(source, state, verbose, first=False):
# parse a simple pattern
subpattern = SubPattern(state)
@ -730,10 +730,9 @@ def _parse(source, state, verbose):
state.checklookbehindgroup(condgroup, source)
elif char in FLAGS or char == "-":
# flags
pos = source.pos
flags = _parse_flags(source, state, char)
if flags is None: # global flags
if pos != 3: # "(?x"
if not first or subpattern:
import warnings
warnings.warn(
'Flags not at the start of the expression %s%s' % (
@ -742,6 +741,8 @@ def _parse(source, state, verbose):
),
DeprecationWarning, stacklevel=7
)
if (state.flags & SRE_FLAG_VERBOSE) and not verbose:
raise Verbose
continue
add_flags, del_flags = flags
group = None
@ -795,9 +796,6 @@ def _parse_flags(source, state, char):
msg = "unknown flag" if char.isalpha() else "missing -, : or )"
raise source.error(msg, len(char))
if char == ")":
if ((add_flags & SRE_FLAG_VERBOSE) and
not (state.flags & SRE_FLAG_VERBOSE)):
raise Verbose
state.flags |= add_flags
return None
if add_flags & GLOBAL_FLAGS:

View File

@ -1325,32 +1325,43 @@ class ReTests(unittest.TestCase):
upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below
lower_char = '\u1ea1' # Latin Small Letter A with Dot Below
p = re.compile(upper_char, re.I | re.U)
q = p.match(lower_char)
p = re.compile('.' + upper_char, re.I | re.S)
q = p.match('\n' + lower_char)
self.assertTrue(q)
p = re.compile(lower_char, re.I | re.U)
q = p.match(upper_char)
p = re.compile('.' + lower_char, re.I | re.S)
q = p.match('\n' + upper_char)
self.assertTrue(q)
p = re.compile('(?i)' + upper_char, re.U)
q = p.match(lower_char)
p = re.compile('(?i).' + upper_char, re.S)
q = p.match('\n' + lower_char)
self.assertTrue(q)
p = re.compile('(?i)' + lower_char, re.U)
q = p.match(upper_char)
p = re.compile('(?i).' + lower_char, re.S)
q = p.match('\n' + upper_char)
self.assertTrue(q)
p = re.compile('(?iu)' + upper_char)
q = p.match(lower_char)
p = re.compile('(?is).' + upper_char)
q = p.match('\n' + lower_char)
self.assertTrue(q)
p = re.compile('(?iu)' + lower_char)
q = p.match(upper_char)
p = re.compile('(?is).' + lower_char)
q = p.match('\n' + upper_char)
self.assertTrue(q)
self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char))
self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char))
p = re.compile('(?s)(?i).' + upper_char)
q = p.match('\n' + lower_char)
self.assertTrue(q)
p = re.compile('(?s)(?i).' + lower_char)
q = p.match('\n' + upper_char)
self.assertTrue(q)
self.assertTrue(re.match('(?ix) ' + upper_char, lower_char))
self.assertTrue(re.match('(?ix) ' + lower_char, upper_char))
self.assertTrue(re.match(' (?i) ' + upper_char, lower_char, re.X))
self.assertTrue(re.match('(?x) (?i) ' + upper_char, lower_char))
self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char, re.X))
p = upper_char + '(?i)'
with self.assertWarns(DeprecationWarning) as warns:
@ -1368,6 +1379,26 @@ class ReTests(unittest.TestCase):
'Flags not at the start of the expression %s (truncated)' % p[:20]
)
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match('(?s).(?i)' + upper_char, '\n' + lower_char))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match('(?i) ' + upper_char + ' (?x)', lower_char))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match('^(?i)' + upper_char, lower_char))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match('$|(?i)' + upper_char, lower_char))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.match('(?:(?i)' + upper_char + ')', lower_char))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.fullmatch('(^)?(?(1)(?i)' + upper_char + ')',
lower_char))
with self.assertWarns(DeprecationWarning):
self.assertTrue(re.fullmatch('($)?(?(1)|(?i)' + upper_char + ')',
lower_char))
def test_dollar_matches_twice(self):
"$ matches the end of string, and just before the terminating \n"
pattern = re.compile('$')

View File

@ -320,6 +320,12 @@ Extension Modules
Library
-------
- bpo-30298: Weaken the condition of deprecation warnings for inline modifiers.
Now allowed several subsequential inline modifiers at the start of the
pattern (e.g. ``'(?i)(?s)...'``). In verbose mode whitespaces and comments
now are allowed before and between inline modifiers (e.g.
``'(?x) (?i) (?s)...'``).
- bpo-30285: Optimized case-insensitive matching and searching of regular
expressions.