gh-105017: Fix including additional NL token when using CRLF (#105022)

Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
2023-05-27 17:50:43 +01:00 · 2023-05-27 17:50:43 +01:00 · 86d8f48935
parent 6e62eb2e70
commit 86d8f48935
3 changed files with 10 additions and 1 deletions
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@ -84,6 +84,14 @@ class TokenizeTest(TestCase):
    NEWLINE    '\\n'          (4, 26) (4, 27)
    DEDENT     ''            (5, 0) (5, 0)
    """)
+
+        self.check_tokenize("foo='bar'\r\n", """\
+    NAME       'foo'         (1, 0) (1, 3)
+    OP         '='           (1, 3) (1, 4)
+    STRING     "'bar'"       (1, 4) (1, 9)
+    NEWLINE    '\\n'          (1, 9) (1, 10)
+            """)
+
        indent_error_file = b"""\
 def k(x):
    x += 2
--- a/Builtins/2023-05-27-16-23-16.gh-issue-105017.KQrsC0.rst
+++ b/Builtins/2023-05-27-16-23-16.gh-issue-105017.KQrsC0.rst
@ -0,0 +1 @@
+Do not include an additional final ``NL`` token when parsing files having CRLF lines. Patch by Marta Gómez.
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@ -800,7 +800,7 @@ translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
    }
    /* If this is exec input, add a newline to the end of the string if
       there isn't one already. */
-    if (exec_input && c != '\n') {
+    if (exec_input && c != '\n' && c != '\0') {
        *current = '\n';
        current++;
    }
				`@ -0,0 +1 @@`
				Do not include an additional final ``NL`` token when parsing files having CRLF lines. Patch by Marta Gómez.