Issue #16688: Fix backreferences did make case-insensitive regex fail on non-ASCII strings.

Patch by Matthew Barnett.
This commit is contained in:
Serhiy Storchaka 2012-12-29 23:38:48 +02:00
parent 2d8298dcd2
commit c1b59d4552
4 changed files with 15 additions and 5 deletions

View File

@ -968,6 +968,11 @@ class ReTests(unittest.TestCase):
self.assertEqual(r, s)
self.assertEqual(n, size + 1)
def test_bug_16688(self):
# Issue 16688: Backreferences make case-insensitive regex fail on
# non-ASCII strings.
self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
def run_re_tests():
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR

View File

@ -70,6 +70,7 @@ Anton Barkovsky
Nick Barnes
Quentin Barnes
David Barnett
Matthew Barnett
Richard Barran
Cesar Eduardo Barros
Des Barry

View File

@ -124,6 +124,9 @@ Core and Builtins
Library
-------
- Issue #16688: Fix backreferences did make case-insensitive regex fail on
non-ASCII strings. Patch by Matthew Barnett.
- Issue #16485: Fix file descriptor not being closed if file header patching
fails on closing of aifc file.

View File

@ -492,7 +492,7 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
Py_ssize_t i;
/* adjust end */
if (maxcount < end - ptr && maxcount != 65535)
if (maxcount < (end - ptr) / state->charsize && maxcount != 65535)
end = ptr + maxcount*state->charsize;
switch (pattern[0]) {
@ -583,7 +583,7 @@ SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
Py_ssize_t i;
/* check minimal length */
if (pattern[3] && (end - ptr) < pattern[3])
if (pattern[3] && (end - ptr)/state->charsize < pattern[3])
return 0;
/* check known prefix */
@ -801,7 +801,7 @@ entrance:
/* <INFO> <1=skip> <2=flags> <3=min> ... */
if (ctx->pattern[3] && (end - ctx->ptr)/state->charsize < ctx->pattern[3]) {
TRACE(("reject (got %d chars, need %d)\n",
(end - ctx->ptr), ctx->pattern[3]));
(end - ctx->ptr)/state->charsize, ctx->pattern[3]));
RETURN_FAILURE;
}
ctx->pattern += ctx->pattern[1] + 1;
@ -1329,9 +1329,10 @@ entrance:
RETURN_FAILURE;
while (p < e) {
if (ctx->ptr >= end ||
state->lower(SRE_CHARGET(state, ctx->ptr, 0)) != state->lower(*p))
state->lower(SRE_CHARGET(state, ctx->ptr, 0)) !=
state->lower(SRE_CHARGET(state, p, 0)))
RETURN_FAILURE;
p++;
p += state->charsize;
ctx->ptr += state->charsize;
}
}