Issue #22437: Number of capturing groups in regular expression is no longer
limited by 100.
This commit is contained in:
parent
c31e6227f9
commit
9baa5b2de2
|
@ -217,6 +217,12 @@ os
|
|||
* :class:`os.stat_result` now has a :attr:`~os.stat_result.st_file_attributes`
|
||||
attribute on Windows (contributed by Ben Hoyt in :issue:`21719`).
|
||||
|
||||
re
|
||||
--
|
||||
|
||||
* Number of capturing groups in regular expression is no longer limited by 100.
|
||||
(Contributed by Serhiy Storchaka in :issue:`22437`.)
|
||||
|
||||
shutil
|
||||
------
|
||||
|
||||
|
|
|
@ -470,12 +470,6 @@ def compile(p, flags=0):
|
|||
|
||||
# print code
|
||||
|
||||
# XXX: <fl> get rid of this limitation!
|
||||
if p.pattern.groups > 100:
|
||||
raise AssertionError(
|
||||
"sorry, but this version only supports 100 named groups"
|
||||
)
|
||||
|
||||
# map in either direction
|
||||
groupindex = p.pattern.groupdict
|
||||
indexgroup = [None] * p.pattern.groups
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
|
||||
MAGIC = 20031017
|
||||
|
||||
from _sre import MAXREPEAT
|
||||
from _sre import MAXREPEAT, MAXGROUPS
|
||||
|
||||
# SRE standard exception (access as sre.error)
|
||||
# should this really be here?
|
||||
|
|
|
@ -72,6 +72,8 @@ class Pattern:
|
|||
def opengroup(self, name=None):
|
||||
gid = self.groups
|
||||
self.groups = gid + 1
|
||||
if self.groups > MAXGROUPS:
|
||||
raise error("groups number is too large")
|
||||
if name is not None:
|
||||
ogid = self.groupdict.get(name, None)
|
||||
if ogid is not None:
|
||||
|
@ -695,8 +697,14 @@ def _parse(source, state):
|
|||
else:
|
||||
try:
|
||||
condgroup = int(condname)
|
||||
if condgroup < 0:
|
||||
raise ValueError
|
||||
except ValueError:
|
||||
raise error("bad character in group name")
|
||||
if not condgroup:
|
||||
raise error("bad group number")
|
||||
if condgroup >= MAXGROUPS:
|
||||
raise error("the group number is too large")
|
||||
else:
|
||||
# flags
|
||||
if not source.next in FLAGS:
|
||||
|
@ -822,6 +830,8 @@ def parse_template(source, pattern):
|
|||
index = int(name)
|
||||
if index < 0:
|
||||
raise error("negative group number")
|
||||
if index >= MAXGROUPS:
|
||||
raise error("the group number is too large")
|
||||
except ValueError:
|
||||
if not name.isidentifier():
|
||||
raise error("bad character in group name")
|
||||
|
|
|
@ -193,6 +193,7 @@ class ReTests(unittest.TestCase):
|
|||
def test_symbolic_groups(self):
|
||||
re.compile('(?P<a>x)(?P=a)(?(a)y)')
|
||||
re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
|
||||
re.compile('(?P<a1>x)\1(?(1)y)')
|
||||
self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
|
||||
self.assertRaises(re.error, re.compile, '(?Px)')
|
||||
self.assertRaises(re.error, re.compile, '(?P=)')
|
||||
|
@ -212,6 +213,10 @@ class ReTests(unittest.TestCase):
|
|||
re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
|
||||
re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
|
||||
self.assertRaises(re.error, re.compile, '(?P<©>x)')
|
||||
# Support > 100 groups.
|
||||
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
|
||||
pat = '(?:%s)(?(200)z|t)' % pat
|
||||
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
|
||||
|
||||
def test_symbolic_refs(self):
|
||||
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
|
||||
|
@ -228,6 +233,9 @@ class ReTests(unittest.TestCase):
|
|||
self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
|
||||
self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
|
||||
self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<©>', 'xx')
|
||||
# Support > 100 groups.
|
||||
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
|
||||
self.assertEqual(re.sub(pat, '\g<200>', 'xc8yzxc8y'), 'c8zc8')
|
||||
|
||||
def test_re_subn(self):
|
||||
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
|
||||
|
@ -404,6 +412,10 @@ class ReTests(unittest.TestCase):
|
|||
self.assertIsNone(p.match('abd'))
|
||||
self.assertIsNone(p.match('ac'))
|
||||
|
||||
# Support > 100 groups.
|
||||
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
|
||||
pat = '(?:%s)(?(200)z)' % pat
|
||||
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
|
||||
|
||||
def test_re_groupref(self):
|
||||
self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
|
||||
|
@ -1070,8 +1082,10 @@ class ReTests(unittest.TestCase):
|
|||
# a RuntimeError is raised instead of OverflowError.
|
||||
long_overflow = 2**128
|
||||
self.assertRaises(TypeError, re.finditer, "a", {})
|
||||
self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
|
||||
self.assertRaises(TypeError, _sre.compile, {}, 0, [])
|
||||
with self.assertRaises(OverflowError):
|
||||
_sre.compile("abc", 0, [long_overflow], 0, [], [])
|
||||
with self.assertRaises(TypeError):
|
||||
_sre.compile({}, 0, [], 0, [], [])
|
||||
|
||||
def test_search_dot_unicode(self):
|
||||
self.assertTrue(re.search("123.*-", '123abc-'))
|
||||
|
|
|
@ -145,6 +145,9 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #22437: Number of capturing groups in regular expression is no longer
|
||||
limited by 100.
|
||||
|
||||
- Issue #17442: InteractiveInterpreter now displays the full chained traceback
|
||||
in its showtraceback method, to match the built in interactive interpreter.
|
||||
|
||||
|
|
|
@ -357,6 +357,11 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
|
|||
|
||||
memset(state, 0, sizeof(SRE_STATE));
|
||||
|
||||
state->mark = PyMem_New(void *, pattern->groups * 2);
|
||||
if (!state->mark) {
|
||||
PyErr_NoMemory();
|
||||
goto err;
|
||||
}
|
||||
state->lastmark = -1;
|
||||
state->lastindex = -1;
|
||||
|
||||
|
@ -409,6 +414,8 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
|
|||
|
||||
return string;
|
||||
err:
|
||||
PyMem_Del(state->mark);
|
||||
state->mark = NULL;
|
||||
if (state->buffer.buf)
|
||||
PyBuffer_Release(&state->buffer);
|
||||
return NULL;
|
||||
|
@ -421,6 +428,8 @@ state_fini(SRE_STATE* state)
|
|||
PyBuffer_Release(&state->buffer);
|
||||
Py_XDECREF(state->string);
|
||||
data_stack_dealloc(state);
|
||||
PyMem_Del(state->mark);
|
||||
state->mark = NULL;
|
||||
}
|
||||
|
||||
/* calculate offset from start of string */
|
||||
|
@ -560,6 +569,7 @@ pattern_match(PatternObject *self, PyObject *args, PyObject *kwargs)
|
|||
PyObject *pattern = NULL;
|
||||
SRE_STATE state;
|
||||
Py_ssize_t status;
|
||||
PyObject *match;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs,
|
||||
"|Onn$O:match", _keywords,
|
||||
|
@ -579,12 +589,14 @@ pattern_match(PatternObject *self, PyObject *args, PyObject *kwargs)
|
|||
status = sre_match(&state, PatternObject_GetCode(self), 0);
|
||||
|
||||
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
|
||||
if (PyErr_Occurred())
|
||||
return NULL;
|
||||
|
||||
if (PyErr_Occurred()) {
|
||||
state_fini(&state);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return (PyObject *)pattern_new_match(self, &state, status);
|
||||
match = pattern_new_match(self, &state, status);
|
||||
state_fini(&state);
|
||||
return match;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
|
@ -592,6 +604,7 @@ pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw)
|
|||
{
|
||||
SRE_STATE state;
|
||||
Py_ssize_t status;
|
||||
PyObject *match;
|
||||
|
||||
PyObject *string = NULL, *string2 = NULL;
|
||||
Py_ssize_t start = 0;
|
||||
|
@ -616,12 +629,14 @@ pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw)
|
|||
status = sre_match(&state, PatternObject_GetCode(self), 1);
|
||||
|
||||
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
|
||||
if (PyErr_Occurred())
|
||||
return NULL;
|
||||
|
||||
if (PyErr_Occurred()) {
|
||||
state_fini(&state);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return pattern_new_match(self, &state, status);
|
||||
match = pattern_new_match(self, &state, status);
|
||||
state_fini(&state);
|
||||
return match;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
|
@ -629,6 +644,7 @@ pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
|
|||
{
|
||||
SRE_STATE state;
|
||||
Py_ssize_t status;
|
||||
PyObject *match;
|
||||
|
||||
PyObject *string = NULL, *string2 = NULL;
|
||||
Py_ssize_t start = 0;
|
||||
|
@ -652,12 +668,14 @@ pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
|
|||
|
||||
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
|
||||
|
||||
if (PyErr_Occurred()) {
|
||||
state_fini(&state);
|
||||
|
||||
if (PyErr_Occurred())
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return pattern_new_match(self, &state, status);
|
||||
match = pattern_new_match(self, &state, status);
|
||||
state_fini(&state);
|
||||
return match;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
|
@ -1417,7 +1435,7 @@ _compile(PyObject* self_, PyObject* args)
|
|||
PyObject* groupindex = NULL;
|
||||
PyObject* indexgroup = NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
|
||||
if (!PyArg_ParseTuple(args, "OiO!nOO", &pattern, &flags,
|
||||
&PyList_Type, &code, &groups,
|
||||
&groupindex, &indexgroup))
|
||||
return NULL;
|
||||
|
@ -1933,10 +1951,9 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
|
|||
static int
|
||||
_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
|
||||
{
|
||||
if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
|
||||
if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
|
||||
code >= end || end[-1] != SRE_OP_SUCCESS)
|
||||
FAIL;
|
||||
if (groups == 0) /* fix for simplejson */
|
||||
groups = 100; /* 100 groups should always be safe */
|
||||
return _validate_inner(code, end-1, groups);
|
||||
}
|
||||
|
||||
|
@ -2747,6 +2764,12 @@ PyMODINIT_FUNC PyInit__sre(void)
|
|||
Py_DECREF(x);
|
||||
}
|
||||
|
||||
x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
|
||||
if (x) {
|
||||
PyDict_SetItemString(d, "MAXGROUPS", x);
|
||||
Py_DECREF(x);
|
||||
}
|
||||
|
||||
x = PyUnicode_FromString(copyright);
|
||||
if (x) {
|
||||
PyDict_SetItemString(d, "copyright", x);
|
||||
|
|
|
@ -18,8 +18,10 @@
|
|||
#define SRE_CODE Py_UCS4
|
||||
#if SIZEOF_SIZE_T > 4
|
||||
# define SRE_MAXREPEAT (~(SRE_CODE)0)
|
||||
# define SRE_MAXGROUPS ((~(SRE_CODE)0) / 2)
|
||||
#else
|
||||
# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
|
||||
# define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_SIZE_T / 2)
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
|
@ -52,9 +54,6 @@ typedef struct {
|
|||
|
||||
typedef unsigned int (*SRE_TOLOWER_HOOK)(unsigned int ch);
|
||||
|
||||
/* FIXME: <fl> shouldn't be a constant, really... */
|
||||
#define SRE_MARK_SIZE 200
|
||||
|
||||
typedef struct SRE_REPEAT_T {
|
||||
Py_ssize_t count;
|
||||
SRE_CODE* pattern; /* points to REPEAT operator arguments */
|
||||
|
@ -76,7 +75,7 @@ typedef struct {
|
|||
/* registers */
|
||||
Py_ssize_t lastindex;
|
||||
Py_ssize_t lastmark;
|
||||
void* mark[SRE_MARK_SIZE];
|
||||
void** mark;
|
||||
/* dynamically allocated stuff */
|
||||
char* data_stack;
|
||||
size_t data_stack_size;
|
||||
|
|
Loading…
Reference in New Issue