diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 0bceaa292fb..ff2c9535178 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -425,6 +425,12 @@ class ReTests(unittest.TestCase): self.assertEqual(re.match(u"([\u2222\u2223])", u"\u2222", re.UNICODE).group(1), u"\u2222") + def test_big_codesize(self): + # Issue #1160 + r = re.compile('|'.join(('%d'%x for x in range(10000)))) + self.assertIsNotNone(r.match('1000')) + self.assertIsNotNone(r.match('9999')) + def test_anyall(self): self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0), "a\nb") diff --git a/Misc/NEWS b/Misc/NEWS index b5b7a5042ce..f69dd3c0976 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -151,6 +151,9 @@ Core and Builtins Library ------- +- Issue #1160: Fix compiling large regular expressions on UCS2 builds. + Patch by Serhiy Storchaka. + - Issue #14313: zipfile now raises NotImplementedError when the compression type is unknown. diff --git a/Modules/_sre.c b/Modules/_sre.c index cd959179628..ab4f269be58 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -2675,6 +2675,13 @@ _compile(PyObject* self_, PyObject* args) PyObject *o = PyList_GET_ITEM(code, i); unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o) : PyLong_AsUnsignedLong(o); + if (value == (unsigned long)-1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_SetString(PyExc_OverflowError, + "regular expression code size limit exceeded"); + } + break; + } self->code[i] = (SRE_CODE) value; if ((unsigned long) self->code[i] != value) { PyErr_SetString(PyExc_OverflowError, @@ -3035,10 +3042,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) GET_ARG; max = arg; if (min > max) FAIL; -#ifdef Py_UNICODE_WIDE if (max > 65535) FAIL; -#endif if (!_validate_inner(code, code+skip-4, groups)) FAIL; code += skip-4; @@ -3056,10 +3061,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) GET_ARG; max = arg; if (min > max) FAIL; -#ifdef Py_UNICODE_WIDE if (max > 65535) FAIL; -#endif if (!_validate_inner(code, code+skip-3, groups)) FAIL; code += skip-3; diff --git a/Modules/sre.h b/Modules/sre.h index d4af05c045e..9bfdf7fdfcd 100644 --- a/Modules/sre.h +++ b/Modules/sre.h @@ -14,12 +14,8 @@ #include "sre_constants.h" /* size of a code word (must be unsigned short or larger, and - large enough to hold a Py_UNICODE character) */ -#ifdef Py_UNICODE_WIDE + large enough to hold a UCS4 character) */ #define SRE_CODE Py_UCS4 -#else -#define SRE_CODE unsigned short -#endif typedef struct { PyObject_VAR_HEAD