mirror of https://github.com/python/cpython
bpo-46091: Correctly calculate indentation levels for whitespace lines with continuation characters (GH-30130)
This commit is contained in:
parent
b1cb843050
commit
a0efc0c196
|
@ -1078,8 +1078,7 @@ Module(
|
||||||
ast.literal_eval(node)
|
ast.literal_eval(node)
|
||||||
|
|
||||||
def test_literal_eval_syntax_errors(self):
|
def test_literal_eval_syntax_errors(self):
|
||||||
msg = "unexpected character after line continuation character"
|
with self.assertRaisesRegex(SyntaxError, "unexpected indent"):
|
||||||
with self.assertRaisesRegex(SyntaxError, msg):
|
|
||||||
ast.literal_eval(r'''
|
ast.literal_eval(r'''
|
||||||
\
|
\
|
||||||
(\
|
(\
|
||||||
|
|
|
@ -1613,6 +1613,36 @@ pass
|
||||||
except SyntaxError:
|
except SyntaxError:
|
||||||
self.fail("Empty line after a line continuation character is valid.")
|
self.fail("Empty line after a line continuation character is valid.")
|
||||||
|
|
||||||
|
# See issue-46091
|
||||||
|
s1 = r"""\
|
||||||
|
def fib(n):
|
||||||
|
\
|
||||||
|
'''Print a Fibonacci series up to n.'''
|
||||||
|
\
|
||||||
|
a, b = 0, 1
|
||||||
|
"""
|
||||||
|
s2 = r"""\
|
||||||
|
def fib(n):
|
||||||
|
'''Print a Fibonacci series up to n.'''
|
||||||
|
a, b = 0, 1
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.assertEqual(compile(s1, '<string>', 'exec'), compile(s2, '<string>', 'exec'))
|
||||||
|
except SyntaxError:
|
||||||
|
self.fail("Indented statement over multiple lines is valid")
|
||||||
|
|
||||||
|
def test_continuation_bad_indentation(self):
|
||||||
|
# Check that code that breaks indentation across multiple lines raises a syntax error
|
||||||
|
|
||||||
|
code = r"""\
|
||||||
|
if x:
|
||||||
|
y = 1
|
||||||
|
\
|
||||||
|
foo = 1
|
||||||
|
"""
|
||||||
|
|
||||||
|
self.assertRaises(IndentationError, exec, code)
|
||||||
|
|
||||||
@support.cpython_only
|
@support.cpython_only
|
||||||
def test_nested_named_except_blocks(self):
|
def test_nested_named_except_blocks(self):
|
||||||
code = ""
|
code = ""
|
||||||
|
|
|
@ -6,6 +6,7 @@ from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
|
||||||
NEWLINE, _generate_tokens_from_c_tokenizer)
|
NEWLINE, _generate_tokens_from_c_tokenizer)
|
||||||
from io import BytesIO, StringIO
|
from io import BytesIO, StringIO
|
||||||
import unittest
|
import unittest
|
||||||
|
from textwrap import dedent
|
||||||
from unittest import TestCase, mock
|
from unittest import TestCase, mock
|
||||||
from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
|
from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
|
||||||
INVALID_UNDERSCORE_LITERALS)
|
INVALID_UNDERSCORE_LITERALS)
|
||||||
|
@ -44,7 +45,6 @@ class TokenizeTest(TestCase):
|
||||||
# The ENDMARKER and final NEWLINE are omitted.
|
# The ENDMARKER and final NEWLINE are omitted.
|
||||||
f = BytesIO(s.encode('utf-8'))
|
f = BytesIO(s.encode('utf-8'))
|
||||||
result = stringify_tokens_from_source(tokenize(f.readline), s)
|
result = stringify_tokens_from_source(tokenize(f.readline), s)
|
||||||
|
|
||||||
self.assertEqual(result,
|
self.assertEqual(result,
|
||||||
[" ENCODING 'utf-8' (0, 0) (0, 0)"] +
|
[" ENCODING 'utf-8' (0, 0) (0, 0)"] +
|
||||||
expected.rstrip().splitlines())
|
expected.rstrip().splitlines())
|
||||||
|
@ -2511,7 +2511,105 @@ async def f():
|
||||||
|
|
||||||
self.assertRaises(SyntaxError, get_tokens, "("*1000+"a"+")"*1000)
|
self.assertRaises(SyntaxError, get_tokens, "("*1000+"a"+")"*1000)
|
||||||
self.assertRaises(SyntaxError, get_tokens, "]")
|
self.assertRaises(SyntaxError, get_tokens, "]")
|
||||||
|
|
||||||
|
def test_continuation_lines_indentation(self):
|
||||||
|
def get_tokens(string):
|
||||||
|
return [(kind, string) for (kind, string, *_) in _generate_tokens_from_c_tokenizer(string)]
|
||||||
|
|
||||||
|
code = dedent("""
|
||||||
|
def fib(n):
|
||||||
|
\\
|
||||||
|
'''Print a Fibonacci series up to n.'''
|
||||||
|
\\
|
||||||
|
a, b = 0, 1
|
||||||
|
""")
|
||||||
|
|
||||||
|
self.check_tokenize(code, """\
|
||||||
|
NAME 'def' (2, 0) (2, 3)
|
||||||
|
NAME 'fib' (2, 4) (2, 7)
|
||||||
|
LPAR '(' (2, 7) (2, 8)
|
||||||
|
NAME 'n' (2, 8) (2, 9)
|
||||||
|
RPAR ')' (2, 9) (2, 10)
|
||||||
|
COLON ':' (2, 10) (2, 11)
|
||||||
|
NEWLINE '' (2, 11) (2, 11)
|
||||||
|
INDENT '' (4, -1) (4, -1)
|
||||||
|
STRING "'''Print a Fibonacci series up to n.'''" (4, 0) (4, 39)
|
||||||
|
NEWLINE '' (4, 39) (4, 39)
|
||||||
|
NAME 'a' (6, 0) (6, 1)
|
||||||
|
COMMA ',' (6, 1) (6, 2)
|
||||||
|
NAME 'b' (6, 3) (6, 4)
|
||||||
|
EQUAL '=' (6, 5) (6, 6)
|
||||||
|
NUMBER '0' (6, 7) (6, 8)
|
||||||
|
COMMA ',' (6, 8) (6, 9)
|
||||||
|
NUMBER '1' (6, 10) (6, 11)
|
||||||
|
NEWLINE '' (6, 11) (6, 11)
|
||||||
|
DEDENT '' (6, -1) (6, -1)
|
||||||
|
""")
|
||||||
|
|
||||||
|
code_no_cont = dedent("""
|
||||||
|
def fib(n):
|
||||||
|
'''Print a Fibonacci series up to n.'''
|
||||||
|
a, b = 0, 1
|
||||||
|
""")
|
||||||
|
|
||||||
|
self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
|
||||||
|
|
||||||
|
code = dedent("""
|
||||||
|
pass
|
||||||
|
\\
|
||||||
|
|
||||||
|
pass
|
||||||
|
""")
|
||||||
|
|
||||||
|
self.check_tokenize(code, """\
|
||||||
|
NAME 'pass' (2, 0) (2, 4)
|
||||||
|
NEWLINE '' (2, 4) (2, 4)
|
||||||
|
NAME 'pass' (5, 0) (5, 4)
|
||||||
|
NEWLINE '' (5, 4) (5, 4)
|
||||||
|
""")
|
||||||
|
|
||||||
|
code_no_cont = dedent("""
|
||||||
|
pass
|
||||||
|
pass
|
||||||
|
""")
|
||||||
|
|
||||||
|
self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
|
||||||
|
|
||||||
|
code = dedent("""
|
||||||
|
if x:
|
||||||
|
y = 1
|
||||||
|
\\
|
||||||
|
\\
|
||||||
|
\\
|
||||||
|
\\
|
||||||
|
foo = 1
|
||||||
|
""")
|
||||||
|
|
||||||
|
self.check_tokenize(code, """\
|
||||||
|
NAME 'if' (2, 0) (2, 2)
|
||||||
|
NAME 'x' (2, 3) (2, 4)
|
||||||
|
COLON ':' (2, 4) (2, 5)
|
||||||
|
NEWLINE '' (2, 5) (2, 5)
|
||||||
|
INDENT '' (3, -1) (3, -1)
|
||||||
|
NAME 'y' (3, 4) (3, 5)
|
||||||
|
EQUAL '=' (3, 6) (3, 7)
|
||||||
|
NUMBER '1' (3, 8) (3, 9)
|
||||||
|
NEWLINE '' (3, 9) (3, 9)
|
||||||
|
NAME 'foo' (8, 4) (8, 7)
|
||||||
|
EQUAL '=' (8, 8) (8, 9)
|
||||||
|
NUMBER '1' (8, 10) (8, 11)
|
||||||
|
NEWLINE '' (8, 11) (8, 11)
|
||||||
|
DEDENT '' (8, -1) (8, -1)
|
||||||
|
""")
|
||||||
|
|
||||||
|
code_no_cont = dedent("""
|
||||||
|
if x:
|
||||||
|
y = 1
|
||||||
|
foo = 1
|
||||||
|
""")
|
||||||
|
|
||||||
|
self.assertEqual(get_tokens(code), get_tokens(code_no_cont))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Correctly calculate indentation levels for lines with whitespace character
|
||||||
|
that are ended by line continuation characters. Patch by Pablo Galindo
|
|
@ -1347,6 +1347,24 @@ tok_decimal_tail(struct tok_state *tok)
|
||||||
|
|
||||||
/* Get next token, after space stripping etc. */
|
/* Get next token, after space stripping etc. */
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
tok_continuation_line(struct tok_state *tok) {
|
||||||
|
int c = tok_nextc(tok);
|
||||||
|
if (c != '\n') {
|
||||||
|
tok->done = E_LINECONT;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
c = tok_nextc(tok);
|
||||||
|
if (c == EOF) {
|
||||||
|
tok->done = E_EOF;
|
||||||
|
tok->cur = tok->inp;
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
tok_backup(tok, c);
|
||||||
|
}
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
||||||
{
|
{
|
||||||
|
@ -1363,6 +1381,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
||||||
int col = 0;
|
int col = 0;
|
||||||
int altcol = 0;
|
int altcol = 0;
|
||||||
tok->atbol = 0;
|
tok->atbol = 0;
|
||||||
|
int cont_line_col = 0;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
c = tok_nextc(tok);
|
c = tok_nextc(tok);
|
||||||
if (c == ' ') {
|
if (c == ' ') {
|
||||||
|
@ -1375,14 +1394,23 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
||||||
else if (c == '\014') {/* Control-L (formfeed) */
|
else if (c == '\014') {/* Control-L (formfeed) */
|
||||||
col = altcol = 0; /* For Emacs users */
|
col = altcol = 0; /* For Emacs users */
|
||||||
}
|
}
|
||||||
|
else if (c == '\\') {
|
||||||
|
// Indentation cannot be split over multiple physical lines
|
||||||
|
// using backslashes. This means that if we found a backslash
|
||||||
|
// preceded by whitespace, **the first one we find** determines
|
||||||
|
// the level of indentation of whatever comes next.
|
||||||
|
cont_line_col = cont_line_col ? cont_line_col : col;
|
||||||
|
if ((c = tok_continuation_line(tok)) == -1) {
|
||||||
|
return ERRORTOKEN;
|
||||||
|
}
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tok_backup(tok, c);
|
tok_backup(tok, c);
|
||||||
if (c == '#' || c == '\n' || c == '\\') {
|
if (c == '#' || c == '\n') {
|
||||||
/* Lines with only whitespace and/or comments
|
/* Lines with only whitespace and/or comments
|
||||||
and/or a line continuation character
|
|
||||||
shouldn't affect the indentation and are
|
shouldn't affect the indentation and are
|
||||||
not passed to the parser as NEWLINE tokens,
|
not passed to the parser as NEWLINE tokens,
|
||||||
except *totally* empty lines in interactive
|
except *totally* empty lines in interactive
|
||||||
|
@ -1403,6 +1431,8 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
||||||
may need to skip to the end of a comment */
|
may need to skip to the end of a comment */
|
||||||
}
|
}
|
||||||
if (!blankline && tok->level == 0) {
|
if (!blankline && tok->level == 0) {
|
||||||
|
col = cont_line_col ? cont_line_col : col;
|
||||||
|
altcol = cont_line_col ? cont_line_col : altcol;
|
||||||
if (col == tok->indstack[tok->indent]) {
|
if (col == tok->indstack[tok->indent]) {
|
||||||
/* No change */
|
/* No change */
|
||||||
if (altcol != tok->altindstack[tok->indent]) {
|
if (altcol != tok->altindstack[tok->indent]) {
|
||||||
|
@ -1964,19 +1994,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
||||||
|
|
||||||
/* Line continuation */
|
/* Line continuation */
|
||||||
if (c == '\\') {
|
if (c == '\\') {
|
||||||
c = tok_nextc(tok);
|
if ((c = tok_continuation_line(tok)) == -1) {
|
||||||
if (c != '\n') {
|
|
||||||
tok->done = E_LINECONT;
|
|
||||||
return ERRORTOKEN;
|
return ERRORTOKEN;
|
||||||
}
|
}
|
||||||
c = tok_nextc(tok);
|
|
||||||
if (c == EOF) {
|
|
||||||
tok->done = E_EOF;
|
|
||||||
tok->cur = tok->inp;
|
|
||||||
return ERRORTOKEN;
|
|
||||||
} else {
|
|
||||||
tok_backup(tok, c);
|
|
||||||
}
|
|
||||||
tok->cont_line = 1;
|
tok->cont_line = 1;
|
||||||
goto again; /* Read next line */
|
goto again; /* Read next line */
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue