From 03d6ee382340dc1db57d7bbc269de0e102ed1a7a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 6 Jul 2015 13:58:33 +0300 Subject: [PATCH] Issue #18684: Fixed reading out of the buffer in the re module. --- Misc/NEWS | 2 ++ Modules/_sre.c | 26 +++++++++++++++++++++----- Modules/sre_lib.h | 29 ++++++++++++++++++++--------- 3 files changed, 43 insertions(+), 14 deletions(-) diff --git a/Misc/NEWS b/Misc/NEWS index 8713f3135fe..f064e0ae9b8 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -66,6 +66,8 @@ Core and Builtins Library ------- +- Issue #18684: Fixed reading out of the buffer in the re module. + - Issue #24259: tarfile now raises a ReadError if an archive is truncated inside a data segment. diff --git a/Modules/_sre.c b/Modules/_sre.c index 300d883cf61..d6fcda18b69 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -883,7 +883,7 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw) } if (state.start == state.ptr) { - if (last == state.end) + if (last == state.end || state.ptr == state.end) break; /* skip one character */ state.start = (void*) ((char*) state.ptr + state.charsize); @@ -1081,6 +1081,8 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string, next: /* move on */ + if (state.ptr == state.end) + break; if (state.ptr == state.start) state.start = (void*) ((char*) state.ptr + state.charsize); else @@ -2567,6 +2569,9 @@ scanner_match(ScannerObject* self, PyObject *unused) PyObject* match; Py_ssize_t status; + if (state->start == NULL) + Py_RETURN_NONE; + state_reset(state); state->ptr = state->start; @@ -2578,10 +2583,14 @@ scanner_match(ScannerObject* self, PyObject *unused) match = pattern_new_match((PatternObject*) self->pattern, state, status); - if (status == 0 || state->ptr == state->start) + if (status == 0) + state->start = NULL; + else if (state->ptr != state->start) + state->start = state->ptr; + else if (state->ptr != state->end) state->start = (void*) ((char*) state->ptr + state->charsize); else - state->start = state->ptr; + state->start = NULL; return match; } @@ -2594,6 +2603,9 @@ scanner_search(ScannerObject* self, PyObject *unused) PyObject* match; Py_ssize_t status; + if (state->start == NULL) + Py_RETURN_NONE; + state_reset(state); state->ptr = state->start; @@ -2605,10 +2617,14 @@ scanner_search(ScannerObject* self, PyObject *unused) match = pattern_new_match((PatternObject*) self->pattern, state, status); - if (status == 0 || state->ptr == state->start) + if (status == 0) + state->start = NULL; + else if (state->ptr != state->start) + state->start = state->ptr; + else if (state->ptr != state->end) state->start = (void*) ((char*) state->ptr + state->charsize); else - state->start = state->ptr; + state->start = NULL; return match; } diff --git a/Modules/sre_lib.h b/Modules/sre_lib.h index 5c6c5a559e6..1049de4226c 100644 --- a/Modules/sre_lib.h +++ b/Modules/sre_lib.h @@ -30,7 +30,7 @@ SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at) SRE_IS_LINEBREAK((int) ptr[-1])); case SRE_AT_END: - return (((void*) (ptr+1) == state->end && + return (((SRE_CHAR *)state->end - ptr == 1 && SRE_IS_LINEBREAK((int) ptr[0])) || ((void*) ptr == state->end)); @@ -1093,9 +1093,9 @@ entrance: /* */ TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern, ctx->ptr, ctx->pattern[1])); - state->ptr = ctx->ptr - ctx->pattern[1]; - if (state->ptr < state->beginning) + if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1]) RETURN_FAILURE; + state->ptr = ctx->ptr - ctx->pattern[1]; DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2); RETURN_ON_FAILURE(ret); ctx->pattern += ctx->pattern[0]; @@ -1106,8 +1106,8 @@ entrance: /* */ TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern, ctx->ptr, ctx->pattern[1])); - state->ptr = ctx->ptr - ctx->pattern[1]; - if (state->ptr >= state->beginning) { + if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) { + state->ptr = ctx->ptr - ctx->pattern[1]; DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2); if (ret) { RETURN_ON_ERROR(ret); @@ -1199,12 +1199,20 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) SRE_CODE* overlap = NULL; int flags = 0; + if (ptr > end) + return 0; + if (pattern[0] == SRE_OP_INFO) { /* optimization info block */ /* <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ flags = pattern[2]; + if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) { + TRACE(("reject (got %u chars, need %u)\n", + (unsigned int)(end - ptr), pattern[3])); + return 0; + } if (pattern[3] > 1) { /* adjust end point (but make sure we leave at least one character in there, so literal search will work) */ @@ -1322,15 +1330,18 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) break; ptr++; } - } else + } else { /* general case */ - while (ptr <= end) { + assert(ptr <= end); + while (1) { TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); - state->start = state->ptr = ptr++; + state->start = state->ptr = ptr; status = SRE(match)(state, pattern, 0); - if (status != 0) + if (status != 0 || ptr >= end) break; + ptr++; } + } return status; }