diff --git a/Include/errcode.h b/Include/errcode.h index 8d44e9ae559..dac5cf068c9 100644 --- a/Include/errcode.h +++ b/Include/errcode.h @@ -19,24 +19,25 @@ extern "C" { #endif -#define E_OK 10 /* No error */ -#define E_EOF 11 /* End Of File */ -#define E_INTR 12 /* Interrupted */ -#define E_TOKEN 13 /* Bad token */ -#define E_SYNTAX 14 /* Syntax error */ -#define E_NOMEM 15 /* Ran out of memory */ -#define E_DONE 16 /* Parsing complete */ -#define E_ERROR 17 /* Execution error */ -#define E_TABSPACE 18 /* Inconsistent mixing of tabs and spaces */ -#define E_OVERFLOW 19 /* Node had too many children */ -#define E_TOODEEP 20 /* Too many indentation levels */ -#define E_DEDENT 21 /* No matching outer block for dedent */ -#define E_DECODE 22 /* Error in decoding into Unicode */ -#define E_EOFS 23 /* EOF in triple-quoted string */ -#define E_EOLS 24 /* EOL in single-quoted string */ -#define E_LINECONT 25 /* Unexpected characters after a line continuation */ -#define E_BADSINGLE 27 /* Ill-formed single statement input */ -#define E_INTERACT_STOP 28 /* Interactive mode stopped tokenization */ +#define E_OK 10 /* No error */ +#define E_EOF 11 /* End Of File */ +#define E_INTR 12 /* Interrupted */ +#define E_TOKEN 13 /* Bad token */ +#define E_SYNTAX 14 /* Syntax error */ +#define E_NOMEM 15 /* Ran out of memory */ +#define E_DONE 16 /* Parsing complete */ +#define E_ERROR 17 /* Execution error */ +#define E_TABSPACE 18 /* Inconsistent mixing of tabs and spaces */ +#define E_OVERFLOW 19 /* Node had too many children */ +#define E_TOODEEP 20 /* Too many indentation levels */ +#define E_DEDENT 21 /* No matching outer block for dedent */ +#define E_DECODE 22 /* Error in decoding into Unicode */ +#define E_EOFS 23 /* EOF in triple-quoted string */ +#define E_EOLS 24 /* EOL in single-quoted string */ +#define E_LINECONT 25 /* Unexpected characters after a line continuation */ +#define E_BADSINGLE 27 /* Ill-formed single statement input */ +#define E_INTERACT_STOP 28 /* Interactive mode stopped tokenization */ +#define E_COLUMNOVERFLOW 29 /* Column offset overflow */ #ifdef __cplusplus } diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index eafa7d84638..4031c5ca76c 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -18,6 +18,12 @@ from test.support.os_helper import TESTFN, unlink from test.support.warnings_helper import check_warnings from test import support +try: + from _testcapi import INT_MAX +except ImportError: + INT_MAX = 2**31 - 1 + + class NaiveException(Exception): def __init__(self, x): @@ -318,11 +324,13 @@ class ExceptionTests(unittest.TestCase): check('(yield i) = 2', 1, 2) check('def f(*):\n pass', 1, 7) + @unittest.skipIf(INT_MAX >= sys.maxsize, "Downcasting to int is safe for col_offset") @support.requires_resource('cpu') - @support.bigmemtest(support._2G, memuse=1.5) - def testMemoryErrorBigSource(self, _size): - with self.assertRaises(OverflowError): - exec(f"if True:\n {' ' * 2**31}print('hello world')") + @support.bigmemtest(INT_MAX, memuse=2, dry_run=False) + def testMemoryErrorBigSource(self, size): + src = b"if True:\n%*s" % (size, b"pass") + with self.assertRaisesRegex(OverflowError, "Parser column offset overflow"): + compile(src, '', 'exec') @cpython_only def testSettingException(self): diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index c7134ab868b..1a01bb0352a 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -59,6 +59,10 @@ tok_nextc(struct tok_state *tok) int rc; for (;;) { if (tok->cur != tok->inp) { + if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) { + tok->done = E_COLUMNOVERFLOW; + return EOF; + } tok->col_offset++; return Py_CHARMASK(*tok->cur++); /* Fast path */ } diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c index 15e99e23d84..057bf551519 100644 --- a/Parser/pegen_errors.c +++ b/Parser/pegen_errors.c @@ -68,6 +68,7 @@ _Pypegen_tokenizer_error(Parser *p) const char *msg = NULL; PyObject* errtype = PyExc_SyntaxError; Py_ssize_t col_offset = -1; + p->error_indicator = 1; switch (p->tok->done) { case E_TOKEN: msg = "invalid token"; @@ -103,6 +104,10 @@ _Pypegen_tokenizer_error(Parser *p) msg = "unexpected character after line continuation character"; break; } + case E_COLUMNOVERFLOW: + PyErr_SetString(PyExc_OverflowError, + "Parser column offset overflow - source line is too big"); + return -1; default: msg = "unknown parsing error"; }