From 429b59ec6990dab19d5a0e0a91f55b5f0e850cf6 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 14 May 2014 21:48:17 +0300 Subject: [PATCH] Issue #20998: Fixed re.fullmatch() of repeated single character pattern with ignore case. Original patch by Matthew Barnett. --- Lib/test/test_re.py | 5 +++++ Misc/NEWS | 3 +++ Modules/_sre.c | 15 +++++++-------- Modules/sre.h | 1 - Modules/sre_lib.h | 20 ++++++++++---------- 5 files changed, 25 insertions(+), 19 deletions(-) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 33ccd153983..0c8a52f23a5 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1223,6 +1223,11 @@ class ReTests(unittest.TestCase): pat.scanner(string='abracadabra', pos=3, endpos=10).search().span(), (7, 9)) + def test_bug_20998(self): + # Issue #20998: Fullmatch of repeated single character pattern + # with ignore case. + self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3)) + class PatternReprTests(unittest.TestCase): def check(self, pattern, expected): diff --git a/Misc/NEWS b/Misc/NEWS index 161016ab402..e2836a1cea4 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -23,6 +23,9 @@ Core and Builtins Library ------- +- Issue #20998: Fixed re.fullmatch() of repeated single character pattern + with ignore case. Original patch by Matthew Barnett. + - Issue #21075: fileinput.FileInput now reads bytes from standard stream if binary mode is specified. Patch by Sam Kimbrel. diff --git a/Modules/_sre.c b/Modules/_sre.c index eb1106ad805..300d883cf61 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -505,14 +505,14 @@ pattern_dealloc(PatternObject* self) } LOCAL(Py_ssize_t) -sre_match(SRE_STATE* state, SRE_CODE* pattern) +sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all) { if (state->charsize == 1) - return sre_ucs1_match(state, pattern); + return sre_ucs1_match(state, pattern, match_all); if (state->charsize == 2) - return sre_ucs2_match(state, pattern); + return sre_ucs2_match(state, pattern, match_all); assert(state->charsize == 4); - return sre_ucs4_match(state, pattern); + return sre_ucs4_match(state, pattern, match_all); } LOCAL(Py_ssize_t) @@ -576,7 +576,7 @@ pattern_match(PatternObject *self, PyObject *args, PyObject *kwargs) TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr)); - status = sre_match(&state, PatternObject_GetCode(self)); + status = sre_match(&state, PatternObject_GetCode(self), 0); TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); if (PyErr_Occurred()) @@ -609,12 +609,11 @@ pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw) if (!string) return NULL; - state.match_all = 1; state.ptr = state.start; TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr)); - status = sre_match(&state, PatternObject_GetCode(self)); + status = sre_match(&state, PatternObject_GetCode(self), 1); TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); if (PyErr_Occurred()) @@ -2572,7 +2571,7 @@ scanner_match(ScannerObject* self, PyObject *unused) state->ptr = state->start; - status = sre_match(state, PatternObject_GetCode(self->pattern)); + status = sre_match(state, PatternObject_GetCode(self->pattern), 0); if (PyErr_Occurred()) return NULL; diff --git a/Modules/sre.h b/Modules/sre.h index 621e2d88d53..42fe28d554c 100644 --- a/Modules/sre.h +++ b/Modules/sre.h @@ -86,7 +86,6 @@ typedef struct { SRE_REPEAT *repeat; /* hooks */ SRE_TOLOWER_HOOK lower; - int match_all; } SRE_STATE; typedef struct { diff --git a/Modules/sre_lib.h b/Modules/sre_lib.h index df86697690b..5c6c5a559e6 100644 --- a/Modules/sre_lib.h +++ b/Modules/sre_lib.h @@ -173,7 +173,7 @@ SRE(charset)(SRE_CODE* set, SRE_CODE ch) } } -LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern); +LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all); LOCAL(Py_ssize_t) SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount) @@ -259,7 +259,7 @@ SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount) /* repeated single character pattern */ TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); while ((SRE_CHAR*) state->ptr < end) { - i = SRE(match)(state, pattern); + i = SRE(match)(state, pattern, 0); if (i < 0) return i; if (!i) @@ -490,7 +490,7 @@ typedef struct { /* check if string matches the given pattern. returns <0 for error, 0 for failure, and 1 for success */ LOCAL(Py_ssize_t) -SRE(match)(SRE_STATE* state, SRE_CODE* pattern) +SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all) { SRE_CHAR* end = (SRE_CHAR *)state->end; Py_ssize_t alloc_pos, ctx_pos = -1; @@ -507,7 +507,7 @@ SRE(match)(SRE_STATE* state, SRE_CODE* pattern) ctx->last_ctx_pos = -1; ctx->jump = JUMP_NONE; ctx->pattern = pattern; - ctx->match_all = state->match_all; + ctx->match_all = match_all; ctx_pos = alloc_pos; entrance: @@ -739,7 +739,7 @@ entrance: RETURN_FAILURE; if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS && - (!ctx->match_all || ctx->ptr == state->end)) { + ctx->ptr == state->end) { /* tail is empty. we're finished */ state->ptr = ctx->ptr; RETURN_SUCCESS; @@ -824,7 +824,7 @@ entrance: } if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS && - (!ctx->match_all || ctx->ptr == state->end)) { + (!match_all || ctx->ptr == state->end)) { /* tail is empty. we're finished */ state->ptr = ctx->ptr; RETURN_SUCCESS; @@ -1269,7 +1269,7 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) state->ptr = ptr - (prefix_len - prefix_skip - 1); if (flags & SRE_INFO_LITERAL) return 1; /* we got all of it */ - status = SRE(match)(state, pattern + 2*prefix_skip); + status = SRE(match)(state, pattern + 2*prefix_skip, 0); if (status != 0) return status; /* close but no cigar -- try again */ @@ -1302,7 +1302,7 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) state->ptr = ++ptr; if (flags & SRE_INFO_LITERAL) return 1; /* we got all of it */ - status = SRE(match)(state, pattern + 2); + status = SRE(match)(state, pattern + 2, 0); if (status != 0) break; } @@ -1317,7 +1317,7 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); state->start = ptr; state->ptr = ptr; - status = SRE(match)(state, pattern); + status = SRE(match)(state, pattern, 0); if (status != 0) break; ptr++; @@ -1327,7 +1327,7 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) while (ptr <= end) { TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); state->start = state->ptr = ptr++; - status = SRE(match)(state, pattern); + status = SRE(match)(state, pattern, 0); if (status != 0) break; }