mirror of https://github.com/python/cpython
bpo-34294: re module, fix wrong capturing groups in rare cases. (GH-11546)
Need to reset capturing groups between two SRE(match) callings in loops, this fixes wrong capturing groups in rare cases. Also add a missing index in re.rst.
This commit is contained in:
parent
02c04f26df
commit
4a7f44a2ed
|
@ -371,6 +371,8 @@ The special characters are:
|
|||
``(?#...)``
|
||||
A comment; the contents of the parentheses are simply ignored.
|
||||
|
||||
.. index:: single: (?=; in regular expressions
|
||||
|
||||
``(?=...)``
|
||||
Matches if ``...`` matches next, but doesn't consume any of the string. This is
|
||||
called a :dfn:`lookahead assertion`. For example, ``Isaac (?=Asimov)`` will match
|
||||
|
|
|
@ -2067,6 +2067,40 @@ ELSE
|
|||
self.assertEqual(m.group(), b'xyz')
|
||||
self.assertEqual(m2.group(), b'')
|
||||
|
||||
def test_bug_34294(self):
|
||||
# Issue 34294: wrong capturing groups
|
||||
|
||||
# exists since Python 2
|
||||
s = "a\tx"
|
||||
p = r"\b(?=(\t)|(x))x"
|
||||
self.assertEqual(re.search(p, s).groups(), (None, 'x'))
|
||||
|
||||
# introduced in Python 3.7.0
|
||||
s = "ab"
|
||||
p = r"(?=(.)(.)?)"
|
||||
self.assertEqual(re.findall(p, s),
|
||||
[('a', 'b'), ('b', '')])
|
||||
self.assertEqual([m.groups() for m in re.finditer(p, s)],
|
||||
[('a', 'b'), ('b', None)])
|
||||
|
||||
# test-cases provided by issue34294, introduced in Python 3.7.0
|
||||
p = r"(?=<(?P<tag>\w+)/?>(?:(?P<text>.+?)</(?P=tag)>)?)"
|
||||
s = "<test><foo2/></test>"
|
||||
self.assertEqual(re.findall(p, s),
|
||||
[('test', '<foo2/>'), ('foo2', '')])
|
||||
self.assertEqual([m.groupdict() for m in re.finditer(p, s)],
|
||||
[{'tag': 'test', 'text': '<foo2/>'},
|
||||
{'tag': 'foo2', 'text': None}])
|
||||
s = "<test>Hello</test><foo/>"
|
||||
self.assertEqual([m.groupdict() for m in re.finditer(p, s)],
|
||||
[{'tag': 'test', 'text': 'Hello'},
|
||||
{'tag': 'foo', 'text': None}])
|
||||
s = "<test>Hello</test><foo/><foo/>"
|
||||
self.assertEqual([m.groupdict() for m in re.finditer(p, s)],
|
||||
[{'tag': 'test', 'text': 'Hello'},
|
||||
{'tag': 'foo', 'text': None},
|
||||
{'tag': 'foo', 'text': None}])
|
||||
|
||||
|
||||
class PatternReprTests(unittest.TestCase):
|
||||
def check(self, pattern, expected):
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
re module, fix wrong capturing groups in rare cases. :func:`re.search`,
|
||||
:func:`re.findall`, :func:`re.sub` and other functions that scan through
|
||||
string looking for a match, should reset capturing groups between two match
|
||||
attempts. Patch by Ma Lin.
|
|
@ -340,7 +340,7 @@ _sre_unicode_tolower_impl(PyObject *module, int character)
|
|||
LOCAL(void)
|
||||
state_reset(SRE_STATE* state)
|
||||
{
|
||||
/* FIXME: dynamic! */
|
||||
/* state->mark will be set to 0 in SRE_OP_MARK dynamically. */
|
||||
/*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
|
||||
|
||||
state->lastmark = -1;
|
||||
|
|
|
@ -1363,6 +1363,10 @@ exit:
|
|||
return ret; /* should never get here */
|
||||
}
|
||||
|
||||
/* need to reset capturing groups between two SRE(match) callings in loops */
|
||||
#define RESET_CAPTURE_GROUP() \
|
||||
do { state->lastmark = state->lastindex = -1; } while (0)
|
||||
|
||||
LOCAL(Py_ssize_t)
|
||||
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
|
||||
{
|
||||
|
@ -1440,6 +1444,7 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
|
|||
if (status != 0)
|
||||
return status;
|
||||
++ptr;
|
||||
RESET_CAPTURE_GROUP();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -1487,6 +1492,7 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
|
|||
/* close but no cigar -- try again */
|
||||
if (++ptr >= end)
|
||||
return 0;
|
||||
RESET_CAPTURE_GROUP();
|
||||
}
|
||||
i = overlap[i];
|
||||
} while (i != 0);
|
||||
|
@ -1510,6 +1516,7 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
|
|||
if (status != 0)
|
||||
break;
|
||||
ptr++;
|
||||
RESET_CAPTURE_GROUP();
|
||||
}
|
||||
} else {
|
||||
/* general case */
|
||||
|
@ -1520,6 +1527,7 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
|
|||
state->must_advance = 0;
|
||||
while (status == 0 && ptr < end) {
|
||||
ptr++;
|
||||
RESET_CAPTURE_GROUP();
|
||||
TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
|
||||
state->start = state->ptr = ptr;
|
||||
status = SRE(match)(state, pattern, 0);
|
||||
|
|
Loading…
Reference in New Issue