From 70f315c2d6de87b0514ce16cc00a91a5b60a6098 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Tue, 30 May 2023 23:52:52 +0200 Subject: [PATCH] gh-105042: Disable unmatched parens syntax error in python tokenize (#105061) --- Lib/test/inspect_fodder.py | 5 +++ Lib/test/test_inspect.py | 4 ++- Lib/test/test_tokenize.py | 7 ++++ Parser/tokenizer.c | 65 +++++++++++++++++++------------------- Python/Python-tokenize.c | 2 +- 5 files changed, 49 insertions(+), 34 deletions(-) diff --git a/Lib/test/inspect_fodder.py b/Lib/test/inspect_fodder.py index 567dfbab804..60ba7aa7839 100644 --- a/Lib/test/inspect_fodder.py +++ b/Lib/test/inspect_fodder.py @@ -113,3 +113,8 @@ class WhichComments: # after asyncf - line 113 # end of WhichComments - line 114 # after WhichComments - line 115 + +# Test that getsource works on a line that includes +# a closing parenthesis with the opening paren being in another line +( +); after_closing = lambda: 1 diff --git a/Lib/test/test_inspect.py b/Lib/test/test_inspect.py index a7bd680d0f5..6a49e3b5530 100644 --- a/Lib/test/test_inspect.py +++ b/Lib/test/test_inspect.py @@ -557,7 +557,8 @@ class TestRetrievingSourceCode(GetSourceBase): def test_getfunctions(self): functions = inspect.getmembers(mod, inspect.isfunction) - self.assertEqual(functions, [('eggs', mod.eggs), + self.assertEqual(functions, [('after_closing', mod.after_closing), + ('eggs', mod.eggs), ('lobbest', mod.lobbest), ('spam', mod.spam)]) @@ -641,6 +642,7 @@ class TestRetrievingSourceCode(GetSourceBase): self.assertSourceEqual(git.abuse, 29, 39) self.assertSourceEqual(mod.StupidGit, 21, 51) self.assertSourceEqual(mod.lobbest, 75, 76) + self.assertSourceEqual(mod.after_closing, 120, 120) def test_getsourcefile(self): self.assertEqual(normcase(inspect.getsourcefile(mod.spam)), modfile) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 44e3e26243b..a9a2b767388 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1119,6 +1119,13 @@ async def f(): NEWLINE '\\n' (4, 1) (4, 2) """) + def test_closing_parenthesis_from_different_line(self): + self.check_tokenize("); x", """\ + OP ')' (1, 0) (1, 1) + OP ';' (1, 1) (1, 2) + NAME 'x' (1, 3) (1, 4) + """) + class GenerateTokensTest(TokenizeTest): def check_tokenize(self, s, expected): # Format the tokens in s in a table format. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index e7004453135..db128284b61 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -2626,41 +2626,42 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t case ')': case ']': case '}': - if (!tok->level) { - if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') { - return MAKE_TOKEN(syntaxerror(tok, "f-string: single '}' is not allowed")); - } + if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') { + return MAKE_TOKEN(syntaxerror(tok, "f-string: single '}' is not allowed")); + } + if (!tok->tok_extra_tokens && !tok->level) { return MAKE_TOKEN(syntaxerror(tok, "unmatched '%c'", c)); } - tok->level--; - int opening = tok->parenstack[tok->level]; - if (!((opening == '(' && c == ')') || - (opening == '[' && c == ']') || - (opening == '{' && c == '}'))) - { - /* If the opening bracket belongs to an f-string's expression - part (e.g. f"{)}") and the closing bracket is an arbitrary - nested expression, then instead of matching a different - syntactical construct with it; we'll throw an unmatched - parentheses error. */ - if (INSIDE_FSTRING(tok) && opening == '{') { - assert(current_tok->curly_bracket_depth >= 0); - int previous_bracket = current_tok->curly_bracket_depth - 1; - if (previous_bracket == current_tok->curly_bracket_expr_start_depth) { - return MAKE_TOKEN(syntaxerror(tok, "f-string: unmatched '%c'", c)); + if (tok->level > 0) { + tok->level--; + int opening = tok->parenstack[tok->level]; + if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') || + (opening == '[' && c == ']') || + (opening == '{' && c == '}'))) { + /* If the opening bracket belongs to an f-string's expression + part (e.g. f"{)}") and the closing bracket is an arbitrary + nested expression, then instead of matching a different + syntactical construct with it; we'll throw an unmatched + parentheses error. */ + if (INSIDE_FSTRING(tok) && opening == '{') { + assert(current_tok->curly_bracket_depth >= 0); + int previous_bracket = current_tok->curly_bracket_depth - 1; + if (previous_bracket == current_tok->curly_bracket_expr_start_depth) { + return MAKE_TOKEN(syntaxerror(tok, "f-string: unmatched '%c'", c)); + } + } + if (tok->parenlinenostack[tok->level] != tok->lineno) { + return MAKE_TOKEN(syntaxerror(tok, + "closing parenthesis '%c' does not match " + "opening parenthesis '%c' on line %d", + c, opening, tok->parenlinenostack[tok->level])); + } + else { + return MAKE_TOKEN(syntaxerror(tok, + "closing parenthesis '%c' does not match " + "opening parenthesis '%c'", + c, opening)); } - } - if (tok->parenlinenostack[tok->level] != tok->lineno) { - return MAKE_TOKEN(syntaxerror(tok, - "closing parenthesis '%c' does not match " - "opening parenthesis '%c' on line %d", - c, opening, tok->parenlinenostack[tok->level])); - } - else { - return MAKE_TOKEN(syntaxerror(tok, - "closing parenthesis '%c' does not match " - "opening parenthesis '%c'", - c, opening)); } } diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index 8bf8f5435cf..a7933b2d6b0 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -84,7 +84,7 @@ _tokenizer_error(struct tok_state *tok) msg = "invalid token"; break; case E_EOF: - if (tok->level) { + if (tok->level > 0) { PyErr_Format(PyExc_SyntaxError, "parenthesis '%c' was never closed", tok->parenstack[tok->level-1]);