Issues #814253, #9179: Warnings now are raised when group references and

conditional group references are used in lookbehind assertions in regular
expressions.
This commit is contained in:
Serhiy Storchaka 2015-02-21 12:08:52 +02:00
parent a1543cdcd6
commit a3369a524c
4 changed files with 61 additions and 3 deletions

View File

@ -281,7 +281,9 @@ The special characters are:
assertion`. ``(?<=abc)def`` will find a match in ``abcdef``, since the
lookbehind will back up 3 characters and check if the contained pattern matches.
The contained pattern must only match strings of some fixed length, meaning that
``abc`` or ``a|b`` are allowed, but ``a*`` and ``a{3,4}`` are not. Note that
``abc`` or ``a|b`` are allowed, but ``a*`` and ``a{3,4}`` are not. Group
references are not supported even if they match strings of some fixed length.
Note that
patterns which start with positive lookbehind assertions will not match at the
beginning of the string being searched; you will most likely want to use the
:func:`search` function rather than the :func:`match` function:
@ -301,7 +303,8 @@ The special characters are:
Matches if the current position in the string is not preceded by a match for
``...``. This is called a :dfn:`negative lookbehind assertion`. Similar to
positive lookbehind assertions, the contained pattern must only match strings of
some fixed length. Patterns which start with negative lookbehind assertions may
some fixed length and shouldn't contain group references.
Patterns which start with negative lookbehind assertions may
match at the beginning of the string being searched.
``(?(id/name)yes-pattern|no-pattern)``

View File

@ -69,6 +69,8 @@ class Pattern:
self.open = []
self.groups = 1
self.groupdict = {}
self.lookbehind = 0
def opengroup(self, name=None):
gid = self.groups
self.groups = gid + 1
@ -352,6 +354,11 @@ def _escape(source, escape, state):
if group < state.groups:
if not state.checkgroup(group):
raise error("cannot refer to open group")
if state.lookbehind:
import warnings
warnings.warn('group references in lookbehind '
'assertions are not supported',
RuntimeWarning)
return GROUPREF, group
raise ValueError
if len(escape) == 2:
@ -630,6 +637,11 @@ def _parse(source, state):
if gid is None:
msg = "unknown group name: {0!r}".format(name)
raise error(msg)
if state.lookbehind:
import warnings
warnings.warn('group references in lookbehind '
'assertions are not supported',
RuntimeWarning)
subpatternappend((GROUPREF, gid))
continue
else:
@ -658,7 +670,10 @@ def _parse(source, state):
raise error("syntax error")
dir = -1 # lookbehind
char = sourceget()
state.lookbehind += 1
p = _parse_sub(source, state)
if dir < 0:
state.lookbehind -= 1
if not sourcematch(")"):
raise error("unbalanced parenthesis")
if char == "=":
@ -689,6 +704,11 @@ def _parse(source, state):
condgroup = int(condname)
except ValueError:
raise error("bad character in group name")
if state.lookbehind:
import warnings
warnings.warn('group references in lookbehind '
'assertions are not supported',
RuntimeWarning)
else:
# flags
if not source.next in FLAGS:

View File

@ -557,7 +557,7 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
"a\n\nb")
def test_non_consuming(self):
def test_lookahead(self):
self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
@ -571,6 +571,37 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
# Group reference.
self.assertTrue(re.match(r'(a)b(?=\1)a', 'aba'))
self.assertIsNone(re.match(r'(a)b(?=\1)c', 'abac'))
# Named group reference.
self.assertTrue(re.match(r'(?P<g>a)b(?=(?P=g))a', 'aba'))
self.assertIsNone(re.match(r'(?P<g>a)b(?=(?P=g))c', 'abac'))
# Conditional group reference.
self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(2)c|x))c', 'abc'))
self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc'))
self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(1)b|x))c', 'abc'))
self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(1)c|x))c', 'abc'))
# Group used before defined.
self.assertTrue(re.match(r'(a)b(?=(?(2)x|c))(c)', 'abc'))
self.assertIsNone(re.match(r'(a)b(?=(?(2)b|x))(c)', 'abc'))
self.assertTrue(re.match(r'(a)b(?=(?(1)c|x))(c)', 'abc'))
def test_lookbehind(self):
self.assertTrue(re.match(r'ab(?<=b)c', 'abc'))
self.assertIsNone(re.match(r'ab(?<=c)c', 'abc'))
self.assertIsNone(re.match(r'ab(?<!b)c', 'abc'))
self.assertTrue(re.match(r'ab(?<!c)c', 'abc'))
# Group reference.
self.assertWarns(RuntimeWarning, re.compile, r'(a)a(?<=\1)c')
# Named group reference.
self.assertWarns(RuntimeWarning, re.compile, r'(?P<g>a)a(?<=(?P=g))c')
# Conditional group reference.
self.assertWarns(RuntimeWarning, re.compile, r'(a)b(?<=(?(1)b|x))c')
# Group used before defined.
self.assertWarns(RuntimeWarning, re.compile, r'(a)b(?<=(?(2)b|x))(c)')
def test_ignore_case(self):
self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")

View File

@ -13,6 +13,10 @@ Core and Builtins
Library
-------
- Issues #814253, #9179: Warnings now are raised when group references and
conditional group references are used in lookbehind assertions in regular
expressions.
- Issue #23215: Multibyte codecs with custom error handlers that ignores errors
consumed too much memory and raised SystemError or MemoryError.
Original patch by Aleksi Torhamo.