bpo-43822: Improve syntax errors for missing commas (GH-25377)

This commit is contained in:
Pablo Galindo 2021-04-15 21:38:45 +01:00 committed by GitHub
parent e692f55979
commit b280248be8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 1235 additions and 1034 deletions

View File

@ -211,6 +211,8 @@
.. data:: TYPE_COMMENT .. data:: TYPE_COMMENT
.. data:: SOFT_KEYWORD
.. data:: ERRORTOKEN .. data:: ERRORTOKEN
.. data:: N_TOKENS .. data:: N_TOKENS

View File

@ -59,6 +59,7 @@ AWAIT
ASYNC ASYNC
TYPE_IGNORE TYPE_IGNORE
TYPE_COMMENT TYPE_COMMENT
SOFT_KEYWORD
ERRORTOKEN ERRORTOKEN
# These aren't used by the C tokenizer but are needed for tokenize.py # These aren't used by the C tokenizer but are needed for tokenize.py

View File

@ -7,6 +7,7 @@ _PyPegen_parse(Parser *p)
// Initialize keywords // Initialize keywords
p->keywords = reserved_keywords; p->keywords = reserved_keywords;
p->n_keyword_lists = n_keyword_lists; p->n_keyword_lists = n_keyword_lists;
p->soft_keywords = soft_keywords;
// Run parser // Run parser
void *result = NULL; void *result = NULL;
@ -459,6 +460,7 @@ expressions[expr_ty]:
| a=expression ',' { _PyAST_Tuple(CHECK(asdl_expr_seq*, _PyPegen_singleton_seq(p, a)), Load, EXTRA) } | a=expression ',' { _PyAST_Tuple(CHECK(asdl_expr_seq*, _PyPegen_singleton_seq(p, a)), Load, EXTRA) }
| expression | expression
expression[expr_ty] (memo): expression[expr_ty] (memo):
| invalid_expression
| a=disjunction 'if' b=disjunction 'else' c=expression { _PyAST_IfExp(b, a, c, EXTRA) } | a=disjunction 'if' b=disjunction 'else' c=expression { _PyAST_IfExp(b, a, c, EXTRA) }
| disjunction | disjunction
| lambdef | lambdef
@ -778,6 +780,13 @@ invalid_kwarg:
| expression a='=' { | expression a='=' {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION( RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
a, "expression cannot contain assignment, perhaps you meant \"==\"?") } a, "expression cannot contain assignment, perhaps you meant \"==\"?") }
invalid_expression:
# !(NAME STRING) is not matched so we don't show this error with some invalid string prefixes like: kf"dsfsdf"
# Soft keywords need to also be ignored because they can be parsed as NAME NAME
| !(NAME STRING | SOFT_KEYWORD) a=disjunction expression {
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, a->lineno, a->end_col_offset - 1, "invalid syntax. Perhaps you forgot a comma?") }
invalid_named_expression: invalid_named_expression:
| a=expression ':=' expression { | a=expression ':=' expression {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION( RAISE_SYNTAX_ERROR_KNOWN_LOCATION(

5
Include/token.h generated
View File

@ -69,8 +69,9 @@ extern "C" {
#define ASYNC 56 #define ASYNC 56
#define TYPE_IGNORE 57 #define TYPE_IGNORE 57
#define TYPE_COMMENT 58 #define TYPE_COMMENT 58
#define ERRORTOKEN 59 #define SOFT_KEYWORD 59
#define N_TOKENS 63 #define ERRORTOKEN 60
#define N_TOKENS 64
#define NT_OFFSET 256 #define NT_OFFSET 256
/* Special definitions for cooperation with parser */ /* Special definitions for cooperation with parser */

View File

@ -103,7 +103,7 @@ Verify that parenthesis are required when used as a keyword argument value
>>> dict(a = i for i in range(10)) >>> dict(a = i for i in range(10))
Traceback (most recent call last): Traceback (most recent call last):
... ...
SyntaxError: invalid syntax SyntaxError: invalid syntax. Maybe you meant '==' or ':=' instead of '='?
Verify that parenthesis are required when used as a keyword argument value Verify that parenthesis are required when used as a keyword argument value

View File

@ -248,22 +248,36 @@ SyntaxError: did you forget parentheses around the comprehension target?
# Missing commas in literals collections should not # Missing commas in literals collections should not
# produce special error messages regarding missing # produce special error messages regarding missing
# parentheses # parentheses, but about missing commas instead
>>> [1, 2 3] >>> [1, 2 3]
Traceback (most recent call last): Traceback (most recent call last):
SyntaxError: invalid syntax SyntaxError: invalid syntax. Perhaps you forgot a comma?
>>> {1, 2 3} >>> {1, 2 3}
Traceback (most recent call last): Traceback (most recent call last):
SyntaxError: invalid syntax SyntaxError: invalid syntax. Perhaps you forgot a comma?
>>> {1:2, 2:5 3:12} >>> {1:2, 2:5 3:12}
Traceback (most recent call last): Traceback (most recent call last):
SyntaxError: invalid syntax SyntaxError: invalid syntax. Perhaps you forgot a comma?
>>> (1, 2 3) >>> (1, 2 3)
Traceback (most recent call last): Traceback (most recent call last):
SyntaxError: invalid syntax. Perhaps you forgot a comma?
# Make sure soft keywords constructs don't raise specialized
# errors regarding missing commas
>>> match x:
... y = 3
Traceback (most recent call last):
SyntaxError: invalid syntax
>>> match x:
... case y:
... 3 $ 3
Traceback (most recent call last):
SyntaxError: invalid syntax SyntaxError: invalid syntax
From compiler_complex_args(): From compiler_complex_args():
@ -864,7 +878,7 @@ leading to spurious errors.
SyntaxError: cannot assign to attribute here. Maybe you meant '==' instead of '='? SyntaxError: cannot assign to attribute here. Maybe you meant '==' instead of '='?
Ensure that early = are not matched by the parser as invalid comparisons Ensure that early = are not matched by the parser as invalid comparisons
>>> f(2, 4, x=34); {1,2 a} >>> f(2, 4, x=34); 1 $ 2
Traceback (most recent call last): Traceback (most recent call last):
SyntaxError: invalid syntax SyntaxError: invalid syntax

11
Lib/token.py generated
View File

@ -62,12 +62,13 @@ AWAIT = 55
ASYNC = 56 ASYNC = 56
TYPE_IGNORE = 57 TYPE_IGNORE = 57
TYPE_COMMENT = 58 TYPE_COMMENT = 58
SOFT_KEYWORD = 59
# These aren't used by the C tokenizer but are needed for tokenize.py # These aren't used by the C tokenizer but are needed for tokenize.py
ERRORTOKEN = 59 ERRORTOKEN = 60
COMMENT = 60 COMMENT = 61
NL = 61 NL = 62
ENCODING = 62 ENCODING = 63
N_TOKENS = 63 N_TOKENS = 64
# Special definitions for cooperation with parser # Special definitions for cooperation with parser
NT_OFFSET = 256 NT_OFFSET = 256

View File

@ -0,0 +1,2 @@
Improve syntax errors in the parser for missing commas between expressions.
Patch by Pablo Galindo.

File diff suppressed because it is too large Load Diff

View File

@ -943,6 +943,23 @@ _PyPegen_string_token(Parser *p)
return _PyPegen_expect_token(p, STRING); return _PyPegen_expect_token(p, STRING);
} }
expr_ty _PyPegen_soft_keyword_token(Parser *p) {
Token *t = _PyPegen_expect_token(p, NAME);
if (t == NULL) {
return NULL;
}
char *the_token;
Py_ssize_t size;
PyBytes_AsStringAndSize(t->bytes, &the_token, &size);
for (char **keyword = p->soft_keywords; *keyword != NULL; keyword++) {
if (strncmp(*keyword, the_token, size) == 0) {
return _PyPegen_name_token(p);
}
}
return NULL;
}
static PyObject * static PyObject *
parsenumber_raw(const char *s) parsenumber_raw(const char *s)
{ {
@ -1151,6 +1168,7 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
p->tok = tok; p->tok = tok;
p->keywords = NULL; p->keywords = NULL;
p->n_keyword_lists = -1; p->n_keyword_lists = -1;
p->soft_keywords = NULL;
p->tokens = PyMem_Malloc(sizeof(Token *)); p->tokens = PyMem_Malloc(sizeof(Token *));
if (!p->tokens) { if (!p->tokens) {
PyMem_Free(p); PyMem_Free(p);

View File

@ -59,6 +59,7 @@ typedef struct {
int fill, size; int fill, size;
PyArena *arena; PyArena *arena;
KeywordToken **keywords; KeywordToken **keywords;
char **soft_keywords;
int n_keyword_lists; int n_keyword_lists;
int start_rule; int start_rule;
int *errcode; int *errcode;
@ -125,6 +126,7 @@ int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
Token *_PyPegen_expect_token(Parser *p, int type); Token *_PyPegen_expect_token(Parser *p, int type);
Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected); Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected);
expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword); expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
expr_ty _PyPegen_soft_keyword_token(Parser *p);
Token *_PyPegen_get_last_nonnwhitespace_token(Parser *); Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
int _PyPegen_fill_token(Parser *p); int _PyPegen_fill_token(Parser *p);
expr_ty _PyPegen_name_token(Parser *p); expr_ty _PyPegen_name_token(Parser *p);

1
Parser/token.c generated
View File

@ -65,6 +65,7 @@ const char * const _PyParser_TokenNames[] = {
"ASYNC", "ASYNC",
"TYPE_IGNORE", "TYPE_IGNORE",
"TYPE_COMMENT", "TYPE_COMMENT",
"SOFT_KEYWORD",
"<ERRORTOKEN>", "<ERRORTOKEN>",
"<COMMENT>", "<COMMENT>",
"<NL>", "<NL>",

View File

@ -46,6 +46,7 @@ _PyPegen_parse(Parser *p)
// Initialize keywords // Initialize keywords
p->keywords = reserved_keywords; p->keywords = reserved_keywords;
p->n_keyword_lists = n_keyword_lists; p->n_keyword_lists = n_keyword_lists;
p->soft_keywords = soft_keywords;
return start_rule(p); return start_rule(p);
} }
@ -66,6 +67,7 @@ BASE_NODETYPES = {
"NAME": NodeTypes.NAME_TOKEN, "NAME": NodeTypes.NAME_TOKEN,
"NUMBER": NodeTypes.NUMBER_TOKEN, "NUMBER": NodeTypes.NUMBER_TOKEN,
"STRING": NodeTypes.STRING_TOKEN, "STRING": NodeTypes.STRING_TOKEN,
"SOFT_KEYWORD": NodeTypes.SOFT_KEYWORD,
} }
@ -411,6 +413,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
if subheader: if subheader:
self.print(subheader) self.print(subheader)
self._setup_keywords() self._setup_keywords()
self._setup_soft_keywords()
for i, (rulename, rule) in enumerate(self.todo.items(), 1000): for i, (rulename, rule) in enumerate(self.todo.items(), 1000):
comment = " // Left-recursive" if rule.left_recursive else "" comment = " // Left-recursive" if rule.left_recursive else ""
self.print(f"#define {rulename}_type {i}{comment}") self.print(f"#define {rulename}_type {i}{comment}")
@ -474,6 +477,15 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print("},") self.print("},")
self.print("};") self.print("};")
def _setup_soft_keywords(self) -> None:
soft_keywords = sorted(self.callmakervisitor.soft_keywords)
self.print("static char *soft_keywords[] = {")
with self.indent():
for keyword in soft_keywords:
self.print(f'"{keyword}",')
self.print("NULL,")
self.print("};")
def _set_up_token_start_metadata_extraction(self) -> None: def _set_up_token_start_metadata_extraction(self) -> None:
self.print("if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) {") self.print("if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) {")
with self.indent(): with self.indent():