bpo-30298: Weaken the condition of deprecation warnings for inline modifiers. (#1490)
Now allowed several subsequential inline modifiers at the start of the pattern (e.g. '(?i)(?s)...'). In verbose mode whitespaces and comments now are allowed before and between inline modifiers (e.g. '(?x) (?i) (?s)...').
This commit is contained in:
parent
211a392cc1
commit
305ccbe27e
|
@ -412,7 +412,7 @@ def _parse_sub(source, state, verbose, nested=True):
|
|||
sourcematch = source.match
|
||||
start = source.tell()
|
||||
while True:
|
||||
itemsappend(_parse(source, state, verbose))
|
||||
itemsappend(_parse(source, state, verbose, not nested and not items))
|
||||
if not sourcematch("|"):
|
||||
break
|
||||
|
||||
|
@ -466,7 +466,7 @@ def _parse_sub_cond(source, state, condgroup, verbose):
|
|||
subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
|
||||
return subpattern
|
||||
|
||||
def _parse(source, state, verbose):
|
||||
def _parse(source, state, verbose, first=False):
|
||||
# parse a simple pattern
|
||||
subpattern = SubPattern(state)
|
||||
|
||||
|
@ -730,10 +730,9 @@ def _parse(source, state, verbose):
|
|||
state.checklookbehindgroup(condgroup, source)
|
||||
elif char in FLAGS or char == "-":
|
||||
# flags
|
||||
pos = source.pos
|
||||
flags = _parse_flags(source, state, char)
|
||||
if flags is None: # global flags
|
||||
if pos != 3: # "(?x"
|
||||
if not first or subpattern:
|
||||
import warnings
|
||||
warnings.warn(
|
||||
'Flags not at the start of the expression %s%s' % (
|
||||
|
@ -742,6 +741,8 @@ def _parse(source, state, verbose):
|
|||
),
|
||||
DeprecationWarning, stacklevel=7
|
||||
)
|
||||
if (state.flags & SRE_FLAG_VERBOSE) and not verbose:
|
||||
raise Verbose
|
||||
continue
|
||||
add_flags, del_flags = flags
|
||||
group = None
|
||||
|
@ -795,9 +796,6 @@ def _parse_flags(source, state, char):
|
|||
msg = "unknown flag" if char.isalpha() else "missing -, : or )"
|
||||
raise source.error(msg, len(char))
|
||||
if char == ")":
|
||||
if ((add_flags & SRE_FLAG_VERBOSE) and
|
||||
not (state.flags & SRE_FLAG_VERBOSE)):
|
||||
raise Verbose
|
||||
state.flags |= add_flags
|
||||
return None
|
||||
if add_flags & GLOBAL_FLAGS:
|
||||
|
|
|
@ -1325,32 +1325,43 @@ class ReTests(unittest.TestCase):
|
|||
upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below
|
||||
lower_char = '\u1ea1' # Latin Small Letter A with Dot Below
|
||||
|
||||
p = re.compile(upper_char, re.I | re.U)
|
||||
q = p.match(lower_char)
|
||||
p = re.compile('.' + upper_char, re.I | re.S)
|
||||
q = p.match('\n' + lower_char)
|
||||
self.assertTrue(q)
|
||||
|
||||
p = re.compile(lower_char, re.I | re.U)
|
||||
q = p.match(upper_char)
|
||||
p = re.compile('.' + lower_char, re.I | re.S)
|
||||
q = p.match('\n' + upper_char)
|
||||
self.assertTrue(q)
|
||||
|
||||
p = re.compile('(?i)' + upper_char, re.U)
|
||||
q = p.match(lower_char)
|
||||
p = re.compile('(?i).' + upper_char, re.S)
|
||||
q = p.match('\n' + lower_char)
|
||||
self.assertTrue(q)
|
||||
|
||||
p = re.compile('(?i)' + lower_char, re.U)
|
||||
q = p.match(upper_char)
|
||||
p = re.compile('(?i).' + lower_char, re.S)
|
||||
q = p.match('\n' + upper_char)
|
||||
self.assertTrue(q)
|
||||
|
||||
p = re.compile('(?iu)' + upper_char)
|
||||
q = p.match(lower_char)
|
||||
p = re.compile('(?is).' + upper_char)
|
||||
q = p.match('\n' + lower_char)
|
||||
self.assertTrue(q)
|
||||
|
||||
p = re.compile('(?iu)' + lower_char)
|
||||
q = p.match(upper_char)
|
||||
p = re.compile('(?is).' + lower_char)
|
||||
q = p.match('\n' + upper_char)
|
||||
self.assertTrue(q)
|
||||
|
||||
self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char))
|
||||
self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char))
|
||||
p = re.compile('(?s)(?i).' + upper_char)
|
||||
q = p.match('\n' + lower_char)
|
||||
self.assertTrue(q)
|
||||
|
||||
p = re.compile('(?s)(?i).' + lower_char)
|
||||
q = p.match('\n' + upper_char)
|
||||
self.assertTrue(q)
|
||||
|
||||
self.assertTrue(re.match('(?ix) ' + upper_char, lower_char))
|
||||
self.assertTrue(re.match('(?ix) ' + lower_char, upper_char))
|
||||
self.assertTrue(re.match(' (?i) ' + upper_char, lower_char, re.X))
|
||||
self.assertTrue(re.match('(?x) (?i) ' + upper_char, lower_char))
|
||||
self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char, re.X))
|
||||
|
||||
p = upper_char + '(?i)'
|
||||
with self.assertWarns(DeprecationWarning) as warns:
|
||||
|
@ -1368,6 +1379,26 @@ class ReTests(unittest.TestCase):
|
|||
'Flags not at the start of the expression %s (truncated)' % p[:20]
|
||||
)
|
||||
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
self.assertTrue(re.match('(?s).(?i)' + upper_char, '\n' + lower_char))
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
self.assertTrue(re.match('(?i) ' + upper_char + ' (?x)', lower_char))
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char))
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
self.assertTrue(re.match('^(?i)' + upper_char, lower_char))
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
self.assertTrue(re.match('$|(?i)' + upper_char, lower_char))
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
self.assertTrue(re.match('(?:(?i)' + upper_char + ')', lower_char))
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
self.assertTrue(re.fullmatch('(^)?(?(1)(?i)' + upper_char + ')',
|
||||
lower_char))
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
self.assertTrue(re.fullmatch('($)?(?(1)|(?i)' + upper_char + ')',
|
||||
lower_char))
|
||||
|
||||
|
||||
def test_dollar_matches_twice(self):
|
||||
"$ matches the end of string, and just before the terminating \n"
|
||||
pattern = re.compile('$')
|
||||
|
|
|
@ -320,6 +320,12 @@ Extension Modules
|
|||
Library
|
||||
-------
|
||||
|
||||
- bpo-30298: Weaken the condition of deprecation warnings for inline modifiers.
|
||||
Now allowed several subsequential inline modifiers at the start of the
|
||||
pattern (e.g. ``'(?i)(?s)...'``). In verbose mode whitespaces and comments
|
||||
now are allowed before and between inline modifiers (e.g.
|
||||
``'(?x) (?i) (?s)...'``).
|
||||
|
||||
- bpo-30285: Optimized case-insensitive matching and searching of regular
|
||||
expressions.
|
||||
|
||||
|
|
Loading…
Reference in New Issue