Issue #1160: Fix compiling large regular expressions on UCS2 builds.

Patch by Serhiy Storchaka.
This commit is contained in:
Antoine Pitrou 2012-11-20 22:30:42 +01:00
parent 4a1fdcf07d
commit 39bdad813a
4 changed files with 17 additions and 9 deletions

View File

@ -419,6 +419,12 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.match("([\u2222\u2223])",
"\u2222", re.UNICODE).group(1), "\u2222")
def test_big_codesize(self):
# Issue #1160
r = re.compile('|'.join(('%d'%x for x in range(10000))))
self.assertIsNotNone(r.match('1000'))
self.assertIsNotNone(r.match('9999'))
def test_anyall(self):
self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
"a\nb")

View File

@ -167,6 +167,9 @@ Core and Builtins
Library
-------
- Issue #1160: Fix compiling large regular expressions on UCS2 builds.
Patch by Serhiy Storchaka.
- Issue #14313: zipfile now raises NotImplementedError when the compression
type is unknown.

View File

@ -2695,6 +2695,13 @@ _compile(PyObject* self_, PyObject* args)
for (i = 0; i < n; i++) {
PyObject *o = PyList_GET_ITEM(code, i);
unsigned long value = PyLong_AsUnsignedLong(o);
if (value == (unsigned long)-1 && PyErr_Occurred()) {
if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
PyErr_SetString(PyExc_OverflowError,
"regular expression code size limit exceeded");
}
break;
}
self->code[i] = (SRE_CODE) value;
if ((unsigned long) self->code[i] != value) {
PyErr_SetString(PyExc_OverflowError,
@ -3065,10 +3072,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
GET_ARG; max = arg;
if (min > max)
FAIL;
#ifdef Py_UNICODE_WIDE
if (max > 65535)
FAIL;
#endif
if (!_validate_inner(code, code+skip-4, groups))
FAIL;
code += skip-4;
@ -3086,10 +3091,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
GET_ARG; max = arg;
if (min > max)
FAIL;
#ifdef Py_UNICODE_WIDE
if (max > 65535)
FAIL;
#endif
if (!_validate_inner(code, code+skip-3, groups))
FAIL;
code += skip-3;

View File

@ -14,12 +14,8 @@
#include "sre_constants.h"
/* size of a code word (must be unsigned short or larger, and
large enough to hold a Py_UNICODE character) */
#ifdef Py_UNICODE_WIDE
large enough to hold a UCS4 character) */
#define SRE_CODE Py_UCS4
#else
#define SRE_CODE unsigned short
#endif
typedef struct {
PyObject_VAR_HEAD