mirror of https://github.com/python/cpython
gh-67877: Fix memory leaks in terminated RE matching (GH-126840)
If SRE(match) function terminates abruptly, either because of a signal or because memory allocation fails, allocated SRE_REPEAT blocks might be never released. Co-authored-by: <wjssz@users.noreply.github.com>
This commit is contained in:
parent
3938fd60c0
commit
7538e7f569
|
@ -2681,6 +2681,29 @@ class ReTests(unittest.TestCase):
|
|||
self.assertIsNone(re.search(p, s))
|
||||
self.assertIsNone(re.search('(?s:.)' + p, s))
|
||||
|
||||
def check_interrupt(self, pattern, string, maxcount):
|
||||
class Interrupt(Exception):
|
||||
pass
|
||||
p = re.compile(pattern)
|
||||
for n in range(maxcount):
|
||||
try:
|
||||
p._fail_after(n, Interrupt)
|
||||
p.match(string)
|
||||
return n
|
||||
except Interrupt:
|
||||
pass
|
||||
finally:
|
||||
p._fail_after(-1, None)
|
||||
|
||||
@unittest.skipUnless(hasattr(re.Pattern, '_fail_after'), 'requires debug build')
|
||||
def test_memory_leaks(self):
|
||||
self.check_interrupt(r'(.)*:', 'abc:', 100)
|
||||
self.check_interrupt(r'([^:])*?:', 'abc:', 100)
|
||||
self.check_interrupt(r'([^:])*+:', 'abc:', 100)
|
||||
self.check_interrupt(r'(.){2,4}:', 'abc:', 100)
|
||||
self.check_interrupt(r'([^:]){2,4}?:', 'abc:', 100)
|
||||
self.check_interrupt(r'([^:]){2,4}+:', 'abc:', 100)
|
||||
|
||||
|
||||
def get_debug_out(pat):
|
||||
with captured_stdout() as out:
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Fix memory leaks when :mod:`regular expression <re>` matching terminates
|
||||
abruptly, either because of a signal or because memory allocation fails.
|
|
@ -985,6 +985,44 @@ PyDoc_STRVAR(_sre_SRE_Pattern___deepcopy____doc__,
|
|||
#define _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF \
|
||||
{"__deepcopy__", (PyCFunction)_sre_SRE_Pattern___deepcopy__, METH_O, _sre_SRE_Pattern___deepcopy____doc__},
|
||||
|
||||
#if defined(Py_DEBUG)
|
||||
|
||||
PyDoc_STRVAR(_sre_SRE_Pattern__fail_after__doc__,
|
||||
"_fail_after($self, count, exception, /)\n"
|
||||
"--\n"
|
||||
"\n"
|
||||
"For debugging.");
|
||||
|
||||
#define _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF \
|
||||
{"_fail_after", _PyCFunction_CAST(_sre_SRE_Pattern__fail_after), METH_FASTCALL, _sre_SRE_Pattern__fail_after__doc__},
|
||||
|
||||
static PyObject *
|
||||
_sre_SRE_Pattern__fail_after_impl(PatternObject *self, int count,
|
||||
PyObject *exception);
|
||||
|
||||
static PyObject *
|
||||
_sre_SRE_Pattern__fail_after(PatternObject *self, PyObject *const *args, Py_ssize_t nargs)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
int count;
|
||||
PyObject *exception;
|
||||
|
||||
if (!_PyArg_CheckPositional("_fail_after", nargs, 2, 2)) {
|
||||
goto exit;
|
||||
}
|
||||
count = PyLong_AsInt(args[0]);
|
||||
if (count == -1 && PyErr_Occurred()) {
|
||||
goto exit;
|
||||
}
|
||||
exception = args[1];
|
||||
return_value = _sre_SRE_Pattern__fail_after_impl(self, count, exception);
|
||||
|
||||
exit:
|
||||
return return_value;
|
||||
}
|
||||
|
||||
#endif /* defined(Py_DEBUG) */
|
||||
|
||||
PyDoc_STRVAR(_sre_compile__doc__,
|
||||
"compile($module, /, pattern, flags, code, groups, groupindex,\n"
|
||||
" indexgroup)\n"
|
||||
|
@ -1474,4 +1512,8 @@ _sre_SRE_Scanner_search(ScannerObject *self, PyTypeObject *cls, PyObject *const
|
|||
}
|
||||
return _sre_SRE_Scanner_search_impl(self, cls);
|
||||
}
|
||||
/*[clinic end generated code: output=afaa301d55957cb0 input=a9049054013a1b77]*/
|
||||
|
||||
#ifndef _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF
|
||||
#define _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF
|
||||
#endif /* !defined(_SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF) */
|
||||
/*[clinic end generated code: output=f8cb77f2261f0b2e input=a9049054013a1b77]*/
|
||||
|
|
|
@ -267,6 +267,85 @@ data_stack_grow(SRE_STATE* state, Py_ssize_t size)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* memory pool functions for SRE_REPEAT, this can avoid memory
|
||||
leak when SRE(match) function terminates abruptly.
|
||||
state->repeat_pool_used is a doubly-linked list, so that we
|
||||
can remove a SRE_REPEAT node from it.
|
||||
state->repeat_pool_unused is a singly-linked list, we put/get
|
||||
node at the head. */
|
||||
static SRE_REPEAT *
|
||||
repeat_pool_malloc(SRE_STATE *state)
|
||||
{
|
||||
SRE_REPEAT *repeat;
|
||||
|
||||
if (state->repeat_pool_unused) {
|
||||
/* remove from unused pool (singly-linked list) */
|
||||
repeat = state->repeat_pool_unused;
|
||||
state->repeat_pool_unused = repeat->pool_next;
|
||||
}
|
||||
else {
|
||||
repeat = PyMem_Malloc(sizeof(SRE_REPEAT));
|
||||
if (!repeat) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* add to used pool (doubly-linked list) */
|
||||
SRE_REPEAT *temp = state->repeat_pool_used;
|
||||
if (temp) {
|
||||
temp->pool_prev = repeat;
|
||||
}
|
||||
repeat->pool_prev = NULL;
|
||||
repeat->pool_next = temp;
|
||||
state->repeat_pool_used = repeat;
|
||||
|
||||
return repeat;
|
||||
}
|
||||
|
||||
static void
|
||||
repeat_pool_free(SRE_STATE *state, SRE_REPEAT *repeat)
|
||||
{
|
||||
SRE_REPEAT *prev = repeat->pool_prev;
|
||||
SRE_REPEAT *next = repeat->pool_next;
|
||||
|
||||
/* remove from used pool (doubly-linked list) */
|
||||
if (prev) {
|
||||
prev->pool_next = next;
|
||||
}
|
||||
else {
|
||||
state->repeat_pool_used = next;
|
||||
}
|
||||
if (next) {
|
||||
next->pool_prev = prev;
|
||||
}
|
||||
|
||||
/* add to unused pool (singly-linked list) */
|
||||
repeat->pool_next = state->repeat_pool_unused;
|
||||
state->repeat_pool_unused = repeat;
|
||||
}
|
||||
|
||||
static void
|
||||
repeat_pool_clear(SRE_STATE *state)
|
||||
{
|
||||
/* clear used pool */
|
||||
SRE_REPEAT *next = state->repeat_pool_used;
|
||||
state->repeat_pool_used = NULL;
|
||||
while (next) {
|
||||
SRE_REPEAT *temp = next;
|
||||
next = temp->pool_next;
|
||||
PyMem_Free(temp);
|
||||
}
|
||||
|
||||
/* clear unused pool */
|
||||
next = state->repeat_pool_unused;
|
||||
state->repeat_pool_unused = NULL;
|
||||
while (next) {
|
||||
SRE_REPEAT *temp = next;
|
||||
next = temp->pool_next;
|
||||
PyMem_Free(temp);
|
||||
}
|
||||
}
|
||||
|
||||
/* generate 8-bit version */
|
||||
|
||||
#define SRE_CHAR Py_UCS1
|
||||
|
@ -511,6 +590,11 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
|
|||
state->pos = start;
|
||||
state->endpos = end;
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
state->fail_after_count = pattern->fail_after_count;
|
||||
state->fail_after_exc = pattern->fail_after_exc; // borrowed ref
|
||||
#endif
|
||||
|
||||
return string;
|
||||
err:
|
||||
/* We add an explicit cast here because MSVC has a bug when
|
||||
|
@ -533,6 +617,8 @@ state_fini(SRE_STATE* state)
|
|||
/* See above PyMem_Free() for why we explicitly cast here. */
|
||||
PyMem_Free((void*) state->mark);
|
||||
state->mark = NULL;
|
||||
/* SRE_REPEAT pool */
|
||||
repeat_pool_clear(state);
|
||||
}
|
||||
|
||||
/* calculate offset from start of string */
|
||||
|
@ -619,6 +705,9 @@ pattern_traverse(PatternObject *self, visitproc visit, void *arg)
|
|||
Py_VISIT(self->groupindex);
|
||||
Py_VISIT(self->indexgroup);
|
||||
Py_VISIT(self->pattern);
|
||||
#ifdef Py_DEBUG
|
||||
Py_VISIT(self->fail_after_exc);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -628,6 +717,9 @@ pattern_clear(PatternObject *self)
|
|||
Py_CLEAR(self->groupindex);
|
||||
Py_CLEAR(self->indexgroup);
|
||||
Py_CLEAR(self->pattern);
|
||||
#ifdef Py_DEBUG
|
||||
Py_CLEAR(self->fail_after_exc);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -690,7 +782,7 @@ _sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls,
|
|||
Py_ssize_t status;
|
||||
PyObject *match;
|
||||
|
||||
if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
|
||||
if (!state_init(&state, self, string, pos, endpos))
|
||||
return NULL;
|
||||
|
||||
INIT_TRACE(&state);
|
||||
|
@ -1381,6 +1473,29 @@ _sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo)
|
|||
return Py_NewRef(self);
|
||||
}
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
/*[clinic input]
|
||||
_sre.SRE_Pattern._fail_after
|
||||
|
||||
count: int
|
||||
exception: object
|
||||
/
|
||||
|
||||
For debugging.
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
_sre_SRE_Pattern__fail_after_impl(PatternObject *self, int count,
|
||||
PyObject *exception)
|
||||
/*[clinic end generated code: output=9a6bf12135ac50c2 input=ef80a45c66c5499d]*/
|
||||
{
|
||||
self->fail_after_count = count;
|
||||
Py_INCREF(exception);
|
||||
Py_XSETREF(self->fail_after_exc, exception);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
#endif /* Py_DEBUG */
|
||||
|
||||
static PyObject *
|
||||
pattern_repr(PatternObject *obj)
|
||||
{
|
||||
|
@ -1506,6 +1621,10 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
|
|||
self->pattern = NULL;
|
||||
self->groupindex = NULL;
|
||||
self->indexgroup = NULL;
|
||||
#ifdef Py_DEBUG
|
||||
self->fail_after_count = -1;
|
||||
self->fail_after_exc = NULL;
|
||||
#endif
|
||||
|
||||
self->codesize = n;
|
||||
|
||||
|
@ -2604,7 +2723,8 @@ pattern_new_match(_sremodulestate* module_state,
|
|||
if (!match)
|
||||
return NULL;
|
||||
|
||||
match->pattern = (PatternObject*)Py_NewRef(pattern);
|
||||
Py_INCREF(pattern);
|
||||
match->pattern = pattern;
|
||||
|
||||
match->string = Py_NewRef(state->string);
|
||||
|
||||
|
@ -2740,7 +2860,7 @@ _sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
match = pattern_new_match(module_state, (PatternObject*) self->pattern,
|
||||
match = pattern_new_match(module_state, self->pattern,
|
||||
state, status);
|
||||
|
||||
if (status == 0)
|
||||
|
@ -2790,7 +2910,7 @@ _sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
match = pattern_new_match(module_state, (PatternObject*) self->pattern,
|
||||
match = pattern_new_match(module_state, self->pattern,
|
||||
state, status);
|
||||
|
||||
if (status == 0)
|
||||
|
@ -2826,7 +2946,8 @@ pattern_scanner(_sremodulestate *module_state,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
scanner->pattern = Py_NewRef(self);
|
||||
Py_INCREF(self);
|
||||
scanner->pattern = self;
|
||||
|
||||
PyObject_GC_Track(scanner);
|
||||
return (PyObject*) scanner;
|
||||
|
@ -3020,6 +3141,7 @@ static PyMethodDef pattern_methods[] = {
|
|||
_SRE_SRE_PATTERN_SCANNER_METHODDEF
|
||||
_SRE_SRE_PATTERN___COPY___METHODDEF
|
||||
_SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
|
||||
_SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF
|
||||
{"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS,
|
||||
PyDoc_STR("See PEP 585")},
|
||||
{NULL, NULL}
|
||||
|
|
|
@ -34,6 +34,11 @@ typedef struct {
|
|||
int flags; /* flags used when compiling pattern source */
|
||||
PyObject *weakreflist; /* List of weak references */
|
||||
int isbytes; /* pattern type (1 - bytes, 0 - string, -1 - None) */
|
||||
#ifdef Py_DEBUG
|
||||
/* for simulation of user interruption */
|
||||
int fail_after_count;
|
||||
PyObject *fail_after_exc;
|
||||
#endif
|
||||
/* pattern code */
|
||||
Py_ssize_t codesize;
|
||||
SRE_CODE code[1];
|
||||
|
@ -68,6 +73,9 @@ typedef struct SRE_REPEAT_T {
|
|||
const SRE_CODE* pattern; /* points to REPEAT operator arguments */
|
||||
const void* last_ptr; /* helper to check for infinite loops */
|
||||
struct SRE_REPEAT_T *prev; /* points to previous repeat context */
|
||||
/* for SRE_REPEAT pool */
|
||||
struct SRE_REPEAT_T *pool_prev;
|
||||
struct SRE_REPEAT_T *pool_next;
|
||||
} SRE_REPEAT;
|
||||
|
||||
typedef struct {
|
||||
|
@ -95,12 +103,19 @@ typedef struct {
|
|||
size_t data_stack_base;
|
||||
/* current repeat context */
|
||||
SRE_REPEAT *repeat;
|
||||
/* SRE_REPEAT pool */
|
||||
SRE_REPEAT *repeat_pool_used;
|
||||
SRE_REPEAT *repeat_pool_unused;
|
||||
unsigned int sigcount;
|
||||
#ifdef Py_DEBUG
|
||||
int fail_after_count;
|
||||
PyObject *fail_after_exc;
|
||||
#endif
|
||||
} SRE_STATE;
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
PyObject* pattern;
|
||||
PatternObject* pattern;
|
||||
SRE_STATE state;
|
||||
int executing;
|
||||
} ScannerObject;
|
||||
|
|
|
@ -560,13 +560,28 @@ typedef struct {
|
|||
Py_ssize_t last_ctx_pos;
|
||||
} SRE(match_context);
|
||||
|
||||
#define MAYBE_CHECK_SIGNALS \
|
||||
#define _MAYBE_CHECK_SIGNALS \
|
||||
do { \
|
||||
if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
|
||||
RETURN_ERROR(SRE_ERROR_INTERRUPTED); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
# define MAYBE_CHECK_SIGNALS \
|
||||
do { \
|
||||
_MAYBE_CHECK_SIGNALS; \
|
||||
if (state->fail_after_count >= 0) { \
|
||||
if (state->fail_after_count-- == 0) { \
|
||||
PyErr_SetNone(state->fail_after_exc); \
|
||||
RETURN_ERROR(SRE_ERROR_INTERRUPTED); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
|
||||
#endif /* Py_DEBUG */
|
||||
|
||||
#ifdef HAVE_COMPUTED_GOTOS
|
||||
#ifndef USE_COMPUTED_GOTOS
|
||||
#define USE_COMPUTED_GOTOS 1
|
||||
|
@ -1120,12 +1135,9 @@ dispatch:
|
|||
pattern[1], pattern[2]));
|
||||
|
||||
/* install new repeat context */
|
||||
/* TODO(https://github.com/python/cpython/issues/67877): Fix this
|
||||
* potential memory leak. */
|
||||
ctx->u.rep = (SRE_REPEAT*) PyMem_Malloc(sizeof(*ctx->u.rep));
|
||||
ctx->u.rep = repeat_pool_malloc(state);
|
||||
if (!ctx->u.rep) {
|
||||
PyErr_NoMemory();
|
||||
RETURN_FAILURE;
|
||||
RETURN_ERROR(SRE_ERROR_MEMORY);
|
||||
}
|
||||
ctx->u.rep->count = -1;
|
||||
ctx->u.rep->pattern = pattern;
|
||||
|
@ -1136,7 +1148,7 @@ dispatch:
|
|||
state->ptr = ptr;
|
||||
DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
|
||||
state->repeat = ctx->u.rep->prev;
|
||||
PyMem_Free(ctx->u.rep);
|
||||
repeat_pool_free(state, ctx->u.rep);
|
||||
|
||||
if (ret) {
|
||||
RETURN_ON_ERROR(ret);
|
||||
|
|
Loading…
Reference in New Issue