bpo-46091: Correctly calculate indentation levels for whitespace lines with continuation characters (GH-30130)

This commit is contained in:
Pablo Galindo Salgado 2022-01-25 22:12:14 +00:00 committed by GitHub
parent b1cb843050
commit a0efc0c196
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 165 additions and 16 deletions

View File

@ -1078,8 +1078,7 @@ Module(
ast.literal_eval(node)
def test_literal_eval_syntax_errors(self):
msg = "unexpected character after line continuation character"
with self.assertRaisesRegex(SyntaxError, msg):
with self.assertRaisesRegex(SyntaxError, "unexpected indent"):
ast.literal_eval(r'''
\
(\

View File

@ -1613,6 +1613,36 @@ pass
except SyntaxError:
self.fail("Empty line after a line continuation character is valid.")
# See issue-46091
s1 = r"""\
def fib(n):
\
'''Print a Fibonacci series up to n.'''
\
a, b = 0, 1
"""
s2 = r"""\
def fib(n):
'''Print a Fibonacci series up to n.'''
a, b = 0, 1
"""
try:
self.assertEqual(compile(s1, '<string>', 'exec'), compile(s2, '<string>', 'exec'))
except SyntaxError:
self.fail("Indented statement over multiple lines is valid")
def test_continuation_bad_indentation(self):
# Check that code that breaks indentation across multiple lines raises a syntax error
code = r"""\
if x:
y = 1
\
foo = 1
"""
self.assertRaises(IndentationError, exec, code)
@support.cpython_only
def test_nested_named_except_blocks(self):
code = ""

View File

@ -6,6 +6,7 @@ from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
NEWLINE, _generate_tokens_from_c_tokenizer)
from io import BytesIO, StringIO
import unittest
from textwrap import dedent
from unittest import TestCase, mock
from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
INVALID_UNDERSCORE_LITERALS)
@ -44,7 +45,6 @@ class TokenizeTest(TestCase):
# The ENDMARKER and final NEWLINE are omitted.
f = BytesIO(s.encode('utf-8'))
result = stringify_tokens_from_source(tokenize(f.readline), s)
self.assertEqual(result,
[" ENCODING 'utf-8' (0, 0) (0, 0)"] +
expected.rstrip().splitlines())
@ -2512,6 +2512,104 @@ async def f():
self.assertRaises(SyntaxError, get_tokens, "("*1000+"a"+")"*1000)
self.assertRaises(SyntaxError, get_tokens, "]")
def test_continuation_lines_indentation(self):
def get_tokens(string):
return [(kind, string) for (kind, string, *_) in _generate_tokens_from_c_tokenizer(string)]
code = dedent("""
def fib(n):
\\
'''Print a Fibonacci series up to n.'''
\\
a, b = 0, 1
""")
self.check_tokenize(code, """\
NAME 'def' (2, 0) (2, 3)
NAME 'fib' (2, 4) (2, 7)
LPAR '(' (2, 7) (2, 8)
NAME 'n' (2, 8) (2, 9)
RPAR ')' (2, 9) (2, 10)
COLON ':' (2, 10) (2, 11)
NEWLINE '' (2, 11) (2, 11)
INDENT '' (4, -1) (4, -1)
STRING "'''Print a Fibonacci series up to n.'''" (4, 0) (4, 39)
NEWLINE '' (4, 39) (4, 39)
NAME 'a' (6, 0) (6, 1)
COMMA ',' (6, 1) (6, 2)
NAME 'b' (6, 3) (6, 4)
EQUAL '=' (6, 5) (6, 6)
NUMBER '0' (6, 7) (6, 8)
COMMA ',' (6, 8) (6, 9)
NUMBER '1' (6, 10) (6, 11)
NEWLINE '' (6, 11) (6, 11)
DEDENT '' (6, -1) (6, -1)
""")
code_no_cont = dedent("""
def fib(n):
'''Print a Fibonacci series up to n.'''
a, b = 0, 1
""")
self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
code = dedent("""
pass
\\
pass
""")
self.check_tokenize(code, """\
NAME 'pass' (2, 0) (2, 4)
NEWLINE '' (2, 4) (2, 4)
NAME 'pass' (5, 0) (5, 4)
NEWLINE '' (5, 4) (5, 4)
""")
code_no_cont = dedent("""
pass
pass
""")
self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
code = dedent("""
if x:
y = 1
\\
\\
\\
\\
foo = 1
""")
self.check_tokenize(code, """\
NAME 'if' (2, 0) (2, 2)
NAME 'x' (2, 3) (2, 4)
COLON ':' (2, 4) (2, 5)
NEWLINE '' (2, 5) (2, 5)
INDENT '' (3, -1) (3, -1)
NAME 'y' (3, 4) (3, 5)
EQUAL '=' (3, 6) (3, 7)
NUMBER '1' (3, 8) (3, 9)
NEWLINE '' (3, 9) (3, 9)
NAME 'foo' (8, 4) (8, 7)
EQUAL '=' (8, 8) (8, 9)
NUMBER '1' (8, 10) (8, 11)
NEWLINE '' (8, 11) (8, 11)
DEDENT '' (8, -1) (8, -1)
""")
code_no_cont = dedent("""
if x:
y = 1
foo = 1
""")
self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,2 @@
Correctly calculate indentation levels for lines with whitespace character
that are ended by line continuation characters. Patch by Pablo Galindo

View File

@ -1347,6 +1347,24 @@ tok_decimal_tail(struct tok_state *tok)
/* Get next token, after space stripping etc. */
static inline int
tok_continuation_line(struct tok_state *tok) {
int c = tok_nextc(tok);
if (c != '\n') {
tok->done = E_LINECONT;
return -1;
}
c = tok_nextc(tok);
if (c == EOF) {
tok->done = E_EOF;
tok->cur = tok->inp;
return -1;
} else {
tok_backup(tok, c);
}
return c;
}
static int
tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
{
@ -1363,6 +1381,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
int col = 0;
int altcol = 0;
tok->atbol = 0;
int cont_line_col = 0;
for (;;) {
c = tok_nextc(tok);
if (c == ' ') {
@ -1375,14 +1394,23 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
else if (c == '\014') {/* Control-L (formfeed) */
col = altcol = 0; /* For Emacs users */
}
else if (c == '\\') {
// Indentation cannot be split over multiple physical lines
// using backslashes. This means that if we found a backslash
// preceded by whitespace, **the first one we find** determines
// the level of indentation of whatever comes next.
cont_line_col = cont_line_col ? cont_line_col : col;
if ((c = tok_continuation_line(tok)) == -1) {
return ERRORTOKEN;
}
}
else {
break;
}
}
tok_backup(tok, c);
if (c == '#' || c == '\n' || c == '\\') {
if (c == '#' || c == '\n') {
/* Lines with only whitespace and/or comments
and/or a line continuation character
shouldn't affect the indentation and are
not passed to the parser as NEWLINE tokens,
except *totally* empty lines in interactive
@ -1403,6 +1431,8 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
may need to skip to the end of a comment */
}
if (!blankline && tok->level == 0) {
col = cont_line_col ? cont_line_col : col;
altcol = cont_line_col ? cont_line_col : altcol;
if (col == tok->indstack[tok->indent]) {
/* No change */
if (altcol != tok->altindstack[tok->indent]) {
@ -1964,19 +1994,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
/* Line continuation */
if (c == '\\') {
c = tok_nextc(tok);
if (c != '\n') {
tok->done = E_LINECONT;
if ((c = tok_continuation_line(tok)) == -1) {
return ERRORTOKEN;
}
c = tok_nextc(tok);
if (c == EOF) {
tok->done = E_EOF;
tok->cur = tok->inp;
return ERRORTOKEN;
} else {
tok_backup(tok, c);
}
tok->cont_line = 1;
goto again; /* Read next line */
}