Issue #16688: Fix backreferences did make case-insensitive regex fail on non-ASCII strings.
Patch by Matthew Barnett.
This commit is contained in:
parent
2d8298dcd2
commit
c1b59d4552
|
@ -968,6 +968,11 @@ class ReTests(unittest.TestCase):
|
|||
self.assertEqual(r, s)
|
||||
self.assertEqual(n, size + 1)
|
||||
|
||||
def test_bug_16688(self):
|
||||
# Issue 16688: Backreferences make case-insensitive regex fail on
|
||||
# non-ASCII strings.
|
||||
self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
|
||||
self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
|
||||
|
||||
def run_re_tests():
|
||||
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
|
||||
|
|
|
@ -70,6 +70,7 @@ Anton Barkovsky
|
|||
Nick Barnes
|
||||
Quentin Barnes
|
||||
David Barnett
|
||||
Matthew Barnett
|
||||
Richard Barran
|
||||
Cesar Eduardo Barros
|
||||
Des Barry
|
||||
|
|
|
@ -124,6 +124,9 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #16688: Fix backreferences did make case-insensitive regex fail on
|
||||
non-ASCII strings. Patch by Matthew Barnett.
|
||||
|
||||
- Issue #16485: Fix file descriptor not being closed if file header patching
|
||||
fails on closing of aifc file.
|
||||
|
||||
|
|
|
@ -492,7 +492,7 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
|
|||
Py_ssize_t i;
|
||||
|
||||
/* adjust end */
|
||||
if (maxcount < end - ptr && maxcount != 65535)
|
||||
if (maxcount < (end - ptr) / state->charsize && maxcount != 65535)
|
||||
end = ptr + maxcount*state->charsize;
|
||||
|
||||
switch (pattern[0]) {
|
||||
|
@ -583,7 +583,7 @@ SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
|
|||
Py_ssize_t i;
|
||||
|
||||
/* check minimal length */
|
||||
if (pattern[3] && (end - ptr) < pattern[3])
|
||||
if (pattern[3] && (end - ptr)/state->charsize < pattern[3])
|
||||
return 0;
|
||||
|
||||
/* check known prefix */
|
||||
|
@ -801,7 +801,7 @@ entrance:
|
|||
/* <INFO> <1=skip> <2=flags> <3=min> ... */
|
||||
if (ctx->pattern[3] && (end - ctx->ptr)/state->charsize < ctx->pattern[3]) {
|
||||
TRACE(("reject (got %d chars, need %d)\n",
|
||||
(end - ctx->ptr), ctx->pattern[3]));
|
||||
(end - ctx->ptr)/state->charsize, ctx->pattern[3]));
|
||||
RETURN_FAILURE;
|
||||
}
|
||||
ctx->pattern += ctx->pattern[1] + 1;
|
||||
|
@ -1329,9 +1329,10 @@ entrance:
|
|||
RETURN_FAILURE;
|
||||
while (p < e) {
|
||||
if (ctx->ptr >= end ||
|
||||
state->lower(SRE_CHARGET(state, ctx->ptr, 0)) != state->lower(*p))
|
||||
state->lower(SRE_CHARGET(state, ctx->ptr, 0)) !=
|
||||
state->lower(SRE_CHARGET(state, p, 0)))
|
||||
RETURN_FAILURE;
|
||||
p++;
|
||||
p += state->charsize;
|
||||
ctx->ptr += state->charsize;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue