From 39bdad813a2f07bd8c373e2c88b76467ef12832c Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 20 Nov 2012 22:30:42 +0100 Subject: [PATCH] Issue #1160: Fix compiling large regular expressions on UCS2 builds. Patch by Serhiy Storchaka. --- Lib/test/test_re.py | 6 ++++++ Misc/NEWS | 3 +++ Modules/_sre.c | 11 +++++++---- Modules/sre.h | 6 +----- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 5162d4e6d83..39972d51e89 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -419,6 +419,12 @@ class ReTests(unittest.TestCase): self.assertEqual(re.match("([\u2222\u2223])", "\u2222", re.UNICODE).group(1), "\u2222") + def test_big_codesize(self): + # Issue #1160 + r = re.compile('|'.join(('%d'%x for x in range(10000)))) + self.assertIsNotNone(r.match('1000')) + self.assertIsNotNone(r.match('9999')) + def test_anyall(self): self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0), "a\nb") diff --git a/Misc/NEWS b/Misc/NEWS index c8580e33cb0..7aa34d9671b 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -167,6 +167,9 @@ Core and Builtins Library ------- +- Issue #1160: Fix compiling large regular expressions on UCS2 builds. + Patch by Serhiy Storchaka. + - Issue #14313: zipfile now raises NotImplementedError when the compression type is unknown. diff --git a/Modules/_sre.c b/Modules/_sre.c index 472b5a3797d..9600a080ecd 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -2695,6 +2695,13 @@ _compile(PyObject* self_, PyObject* args) for (i = 0; i < n; i++) { PyObject *o = PyList_GET_ITEM(code, i); unsigned long value = PyLong_AsUnsignedLong(o); + if (value == (unsigned long)-1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_SetString(PyExc_OverflowError, + "regular expression code size limit exceeded"); + } + break; + } self->code[i] = (SRE_CODE) value; if ((unsigned long) self->code[i] != value) { PyErr_SetString(PyExc_OverflowError, @@ -3065,10 +3072,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) GET_ARG; max = arg; if (min > max) FAIL; -#ifdef Py_UNICODE_WIDE if (max > 65535) FAIL; -#endif if (!_validate_inner(code, code+skip-4, groups)) FAIL; code += skip-4; @@ -3086,10 +3091,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) GET_ARG; max = arg; if (min > max) FAIL; -#ifdef Py_UNICODE_WIDE if (max > 65535) FAIL; -#endif if (!_validate_inner(code, code+skip-3, groups)) FAIL; code += skip-3; diff --git a/Modules/sre.h b/Modules/sre.h index aec9b541dd0..d389b46dd13 100644 --- a/Modules/sre.h +++ b/Modules/sre.h @@ -14,12 +14,8 @@ #include "sre_constants.h" /* size of a code word (must be unsigned short or larger, and - large enough to hold a Py_UNICODE character) */ -#ifdef Py_UNICODE_WIDE + large enough to hold a UCS4 character) */ #define SRE_CODE Py_UCS4 -#else -#define SRE_CODE unsigned short -#endif typedef struct { PyObject_VAR_HEAD