From 3156d193b81f7fefbafa1a5299bc9588a6768956 Mon Sep 17 00:00:00 2001 From: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Date: Wed, 18 Oct 2023 05:58:51 -0700 Subject: [PATCH] gh-100445: Improve error message for unterminated strings with escapes (#100446) --- Lib/test/test_syntax.py | 10 ++++++++-- ...2-12-27-02-51-45.gh-issue-100445.C8f6ph.rst | 1 + Parser/lexer/lexer.c | 18 ++++++++++++++++-- 3 files changed, 25 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-12-27-02-51-45.gh-issue-100445.C8f6ph.rst diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index 00c5f624ceb..7ebf9ca1707 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -2298,8 +2298,14 @@ func( def test_error_string_literal(self): - self._check_error("'blech", "unterminated string literal") - self._check_error('"blech', "unterminated string literal") + self._check_error("'blech", r"unterminated string literal \(.*\)$") + self._check_error('"blech', r"unterminated string literal \(.*\)$") + self._check_error( + r'"blech\"', r"unterminated string literal \(.*\); perhaps you escaped the end quote" + ) + self._check_error( + r'r"blech\"', r"unterminated string literal \(.*\); perhaps you escaped the end quote" + ) self._check_error("'''blech", "unterminated triple-quoted string literal") self._check_error('"""blech', "unterminated triple-quoted string literal") diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-12-27-02-51-45.gh-issue-100445.C8f6ph.rst b/Misc/NEWS.d/next/Core and Builtins/2022-12-27-02-51-45.gh-issue-100445.C8f6ph.rst new file mode 100644 index 00000000000..72f38849df9 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-12-27-02-51-45.gh-issue-100445.C8f6ph.rst @@ -0,0 +1 @@ +Improve error message for unterminated strings with escapes. diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index 1a01bb0352a..2ba24a2c240 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -972,6 +972,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t int quote = c; int quote_size = 1; /* 1 or 3 */ int end_quote_size = 0; + int has_escaped_quote = 0; /* Nodes of type STRING, especially multi line strings must be handled differently in order to get both @@ -1037,8 +1038,18 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t return MAKE_TOKEN(ERRORTOKEN); } else { - _PyTokenizer_syntaxerror(tok, "unterminated string literal (detected at" - " line %d)", start); + if (has_escaped_quote) { + _PyTokenizer_syntaxerror( + tok, + "unterminated string literal (detected at line %d); " + "perhaps you escaped the end quote?", + start + ); + } else { + _PyTokenizer_syntaxerror( + tok, "unterminated string literal (detected at line %d)", start + ); + } if (c != '\n') { tok->done = E_EOLS; } @@ -1052,6 +1063,9 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t end_quote_size = 0; if (c == '\\') { c = tok_nextc(tok); /* skip escaped char */ + if (c == quote) { /* but record whether the escaped char was a quote */ + has_escaped_quote = 1; + } if (c == '\r') { c = tok_nextc(tok); }