diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index ab37fd3fe2f..d8d1bd552fb 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -412,7 +412,7 @@ def _parse_sub(source, state, verbose, nested=True): sourcematch = source.match start = source.tell() while True: - itemsappend(_parse(source, state, verbose)) + itemsappend(_parse(source, state, verbose, not nested and not items)) if not sourcematch("|"): break @@ -466,7 +466,7 @@ def _parse_sub_cond(source, state, condgroup, verbose): subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) return subpattern -def _parse(source, state, verbose): +def _parse(source, state, verbose, first=False): # parse a simple pattern subpattern = SubPattern(state) @@ -730,10 +730,9 @@ def _parse(source, state, verbose): state.checklookbehindgroup(condgroup, source) elif char in FLAGS or char == "-": # flags - pos = source.pos flags = _parse_flags(source, state, char) if flags is None: # global flags - if pos != 3: # "(?x" + if not first or subpattern: import warnings warnings.warn( 'Flags not at the start of the expression %s%s' % ( @@ -742,6 +741,8 @@ def _parse(source, state, verbose): ), DeprecationWarning, stacklevel=7 ) + if (state.flags & SRE_FLAG_VERBOSE) and not verbose: + raise Verbose continue add_flags, del_flags = flags group = None @@ -795,9 +796,6 @@ def _parse_flags(source, state, char): msg = "unknown flag" if char.isalpha() else "missing -, : or )" raise source.error(msg, len(char)) if char == ")": - if ((add_flags & SRE_FLAG_VERBOSE) and - not (state.flags & SRE_FLAG_VERBOSE)): - raise Verbose state.flags |= add_flags return None if add_flags & GLOBAL_FLAGS: diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 3129f7e9888..4d71eea517e 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1325,32 +1325,43 @@ class ReTests(unittest.TestCase): upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below lower_char = '\u1ea1' # Latin Small Letter A with Dot Below - p = re.compile(upper_char, re.I | re.U) - q = p.match(lower_char) + p = re.compile('.' + upper_char, re.I | re.S) + q = p.match('\n' + lower_char) self.assertTrue(q) - p = re.compile(lower_char, re.I | re.U) - q = p.match(upper_char) + p = re.compile('.' + lower_char, re.I | re.S) + q = p.match('\n' + upper_char) self.assertTrue(q) - p = re.compile('(?i)' + upper_char, re.U) - q = p.match(lower_char) + p = re.compile('(?i).' + upper_char, re.S) + q = p.match('\n' + lower_char) self.assertTrue(q) - p = re.compile('(?i)' + lower_char, re.U) - q = p.match(upper_char) + p = re.compile('(?i).' + lower_char, re.S) + q = p.match('\n' + upper_char) self.assertTrue(q) - p = re.compile('(?iu)' + upper_char) - q = p.match(lower_char) + p = re.compile('(?is).' + upper_char) + q = p.match('\n' + lower_char) self.assertTrue(q) - p = re.compile('(?iu)' + lower_char) - q = p.match(upper_char) + p = re.compile('(?is).' + lower_char) + q = p.match('\n' + upper_char) self.assertTrue(q) - self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char)) - self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char)) + p = re.compile('(?s)(?i).' + upper_char) + q = p.match('\n' + lower_char) + self.assertTrue(q) + + p = re.compile('(?s)(?i).' + lower_char) + q = p.match('\n' + upper_char) + self.assertTrue(q) + + self.assertTrue(re.match('(?ix) ' + upper_char, lower_char)) + self.assertTrue(re.match('(?ix) ' + lower_char, upper_char)) + self.assertTrue(re.match(' (?i) ' + upper_char, lower_char, re.X)) + self.assertTrue(re.match('(?x) (?i) ' + upper_char, lower_char)) + self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char, re.X)) p = upper_char + '(?i)' with self.assertWarns(DeprecationWarning) as warns: @@ -1368,6 +1379,26 @@ class ReTests(unittest.TestCase): 'Flags not at the start of the expression %s (truncated)' % p[:20] ) + with self.assertWarns(DeprecationWarning): + self.assertTrue(re.match('(?s).(?i)' + upper_char, '\n' + lower_char)) + with self.assertWarns(DeprecationWarning): + self.assertTrue(re.match('(?i) ' + upper_char + ' (?x)', lower_char)) + with self.assertWarns(DeprecationWarning): + self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char)) + with self.assertWarns(DeprecationWarning): + self.assertTrue(re.match('^(?i)' + upper_char, lower_char)) + with self.assertWarns(DeprecationWarning): + self.assertTrue(re.match('$|(?i)' + upper_char, lower_char)) + with self.assertWarns(DeprecationWarning): + self.assertTrue(re.match('(?:(?i)' + upper_char + ')', lower_char)) + with self.assertWarns(DeprecationWarning): + self.assertTrue(re.fullmatch('(^)?(?(1)(?i)' + upper_char + ')', + lower_char)) + with self.assertWarns(DeprecationWarning): + self.assertTrue(re.fullmatch('($)?(?(1)|(?i)' + upper_char + ')', + lower_char)) + + def test_dollar_matches_twice(self): "$ matches the end of string, and just before the terminating \n" pattern = re.compile('$') diff --git a/Misc/NEWS b/Misc/NEWS index 7a79521efd7..997a03411de 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -320,6 +320,12 @@ Extension Modules Library ------- +- bpo-30298: Weaken the condition of deprecation warnings for inline modifiers. + Now allowed several subsequential inline modifiers at the start of the + pattern (e.g. ``'(?i)(?s)...'``). In verbose mode whitespaces and comments + now are allowed before and between inline modifiers (e.g. + ``'(?x) (?i) (?s)...'``). + - bpo-30285: Optimized case-insensitive matching and searching of regular expressions.