mirror of https://github.com/python/cpython
bpo-43822: Improve syntax errors for missing commas (GH-25377)
This commit is contained in:
parent
e692f55979
commit
b280248be8
|
@ -211,6 +211,8 @@
|
||||||
|
|
||||||
.. data:: TYPE_COMMENT
|
.. data:: TYPE_COMMENT
|
||||||
|
|
||||||
|
.. data:: SOFT_KEYWORD
|
||||||
|
|
||||||
.. data:: ERRORTOKEN
|
.. data:: ERRORTOKEN
|
||||||
|
|
||||||
.. data:: N_TOKENS
|
.. data:: N_TOKENS
|
||||||
|
|
|
@ -59,6 +59,7 @@ AWAIT
|
||||||
ASYNC
|
ASYNC
|
||||||
TYPE_IGNORE
|
TYPE_IGNORE
|
||||||
TYPE_COMMENT
|
TYPE_COMMENT
|
||||||
|
SOFT_KEYWORD
|
||||||
ERRORTOKEN
|
ERRORTOKEN
|
||||||
|
|
||||||
# These aren't used by the C tokenizer but are needed for tokenize.py
|
# These aren't used by the C tokenizer but are needed for tokenize.py
|
||||||
|
|
|
@ -7,6 +7,7 @@ _PyPegen_parse(Parser *p)
|
||||||
// Initialize keywords
|
// Initialize keywords
|
||||||
p->keywords = reserved_keywords;
|
p->keywords = reserved_keywords;
|
||||||
p->n_keyword_lists = n_keyword_lists;
|
p->n_keyword_lists = n_keyword_lists;
|
||||||
|
p->soft_keywords = soft_keywords;
|
||||||
|
|
||||||
// Run parser
|
// Run parser
|
||||||
void *result = NULL;
|
void *result = NULL;
|
||||||
|
@ -459,6 +460,7 @@ expressions[expr_ty]:
|
||||||
| a=expression ',' { _PyAST_Tuple(CHECK(asdl_expr_seq*, _PyPegen_singleton_seq(p, a)), Load, EXTRA) }
|
| a=expression ',' { _PyAST_Tuple(CHECK(asdl_expr_seq*, _PyPegen_singleton_seq(p, a)), Load, EXTRA) }
|
||||||
| expression
|
| expression
|
||||||
expression[expr_ty] (memo):
|
expression[expr_ty] (memo):
|
||||||
|
| invalid_expression
|
||||||
| a=disjunction 'if' b=disjunction 'else' c=expression { _PyAST_IfExp(b, a, c, EXTRA) }
|
| a=disjunction 'if' b=disjunction 'else' c=expression { _PyAST_IfExp(b, a, c, EXTRA) }
|
||||||
| disjunction
|
| disjunction
|
||||||
| lambdef
|
| lambdef
|
||||||
|
@ -778,6 +780,13 @@ invalid_kwarg:
|
||||||
| expression a='=' {
|
| expression a='=' {
|
||||||
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
|
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
|
||||||
a, "expression cannot contain assignment, perhaps you meant \"==\"?") }
|
a, "expression cannot contain assignment, perhaps you meant \"==\"?") }
|
||||||
|
|
||||||
|
invalid_expression:
|
||||||
|
# !(NAME STRING) is not matched so we don't show this error with some invalid string prefixes like: kf"dsfsdf"
|
||||||
|
# Soft keywords need to also be ignored because they can be parsed as NAME NAME
|
||||||
|
| !(NAME STRING | SOFT_KEYWORD) a=disjunction expression {
|
||||||
|
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, a->lineno, a->end_col_offset - 1, "invalid syntax. Perhaps you forgot a comma?") }
|
||||||
|
|
||||||
invalid_named_expression:
|
invalid_named_expression:
|
||||||
| a=expression ':=' expression {
|
| a=expression ':=' expression {
|
||||||
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
|
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
|
||||||
|
|
|
@ -69,8 +69,9 @@ extern "C" {
|
||||||
#define ASYNC 56
|
#define ASYNC 56
|
||||||
#define TYPE_IGNORE 57
|
#define TYPE_IGNORE 57
|
||||||
#define TYPE_COMMENT 58
|
#define TYPE_COMMENT 58
|
||||||
#define ERRORTOKEN 59
|
#define SOFT_KEYWORD 59
|
||||||
#define N_TOKENS 63
|
#define ERRORTOKEN 60
|
||||||
|
#define N_TOKENS 64
|
||||||
#define NT_OFFSET 256
|
#define NT_OFFSET 256
|
||||||
|
|
||||||
/* Special definitions for cooperation with parser */
|
/* Special definitions for cooperation with parser */
|
||||||
|
|
|
@ -103,7 +103,7 @@ Verify that parenthesis are required when used as a keyword argument value
|
||||||
>>> dict(a = i for i in range(10))
|
>>> dict(a = i for i in range(10))
|
||||||
Traceback (most recent call last):
|
Traceback (most recent call last):
|
||||||
...
|
...
|
||||||
SyntaxError: invalid syntax
|
SyntaxError: invalid syntax. Maybe you meant '==' or ':=' instead of '='?
|
||||||
|
|
||||||
Verify that parenthesis are required when used as a keyword argument value
|
Verify that parenthesis are required when used as a keyword argument value
|
||||||
|
|
||||||
|
|
|
@ -248,22 +248,36 @@ SyntaxError: did you forget parentheses around the comprehension target?
|
||||||
|
|
||||||
# Missing commas in literals collections should not
|
# Missing commas in literals collections should not
|
||||||
# produce special error messages regarding missing
|
# produce special error messages regarding missing
|
||||||
# parentheses
|
# parentheses, but about missing commas instead
|
||||||
|
|
||||||
>>> [1, 2 3]
|
>>> [1, 2 3]
|
||||||
Traceback (most recent call last):
|
Traceback (most recent call last):
|
||||||
SyntaxError: invalid syntax
|
SyntaxError: invalid syntax. Perhaps you forgot a comma?
|
||||||
|
|
||||||
>>> {1, 2 3}
|
>>> {1, 2 3}
|
||||||
Traceback (most recent call last):
|
Traceback (most recent call last):
|
||||||
SyntaxError: invalid syntax
|
SyntaxError: invalid syntax. Perhaps you forgot a comma?
|
||||||
|
|
||||||
>>> {1:2, 2:5 3:12}
|
>>> {1:2, 2:5 3:12}
|
||||||
Traceback (most recent call last):
|
Traceback (most recent call last):
|
||||||
SyntaxError: invalid syntax
|
SyntaxError: invalid syntax. Perhaps you forgot a comma?
|
||||||
|
|
||||||
>>> (1, 2 3)
|
>>> (1, 2 3)
|
||||||
Traceback (most recent call last):
|
Traceback (most recent call last):
|
||||||
|
SyntaxError: invalid syntax. Perhaps you forgot a comma?
|
||||||
|
|
||||||
|
# Make sure soft keywords constructs don't raise specialized
|
||||||
|
# errors regarding missing commas
|
||||||
|
|
||||||
|
>>> match x:
|
||||||
|
... y = 3
|
||||||
|
Traceback (most recent call last):
|
||||||
|
SyntaxError: invalid syntax
|
||||||
|
|
||||||
|
>>> match x:
|
||||||
|
... case y:
|
||||||
|
... 3 $ 3
|
||||||
|
Traceback (most recent call last):
|
||||||
SyntaxError: invalid syntax
|
SyntaxError: invalid syntax
|
||||||
|
|
||||||
From compiler_complex_args():
|
From compiler_complex_args():
|
||||||
|
@ -864,7 +878,7 @@ leading to spurious errors.
|
||||||
SyntaxError: cannot assign to attribute here. Maybe you meant '==' instead of '='?
|
SyntaxError: cannot assign to attribute here. Maybe you meant '==' instead of '='?
|
||||||
|
|
||||||
Ensure that early = are not matched by the parser as invalid comparisons
|
Ensure that early = are not matched by the parser as invalid comparisons
|
||||||
>>> f(2, 4, x=34); {1,2 a}
|
>>> f(2, 4, x=34); 1 $ 2
|
||||||
Traceback (most recent call last):
|
Traceback (most recent call last):
|
||||||
SyntaxError: invalid syntax
|
SyntaxError: invalid syntax
|
||||||
|
|
||||||
|
|
|
@ -62,12 +62,13 @@ AWAIT = 55
|
||||||
ASYNC = 56
|
ASYNC = 56
|
||||||
TYPE_IGNORE = 57
|
TYPE_IGNORE = 57
|
||||||
TYPE_COMMENT = 58
|
TYPE_COMMENT = 58
|
||||||
|
SOFT_KEYWORD = 59
|
||||||
# These aren't used by the C tokenizer but are needed for tokenize.py
|
# These aren't used by the C tokenizer but are needed for tokenize.py
|
||||||
ERRORTOKEN = 59
|
ERRORTOKEN = 60
|
||||||
COMMENT = 60
|
COMMENT = 61
|
||||||
NL = 61
|
NL = 62
|
||||||
ENCODING = 62
|
ENCODING = 63
|
||||||
N_TOKENS = 63
|
N_TOKENS = 64
|
||||||
# Special definitions for cooperation with parser
|
# Special definitions for cooperation with parser
|
||||||
NT_OFFSET = 256
|
NT_OFFSET = 256
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Improve syntax errors in the parser for missing commas between expressions.
|
||||||
|
Patch by Pablo Galindo.
|
2180
Parser/parser.c
2180
Parser/parser.c
File diff suppressed because it is too large
Load Diff
|
@ -943,6 +943,23 @@ _PyPegen_string_token(Parser *p)
|
||||||
return _PyPegen_expect_token(p, STRING);
|
return _PyPegen_expect_token(p, STRING);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
expr_ty _PyPegen_soft_keyword_token(Parser *p) {
|
||||||
|
Token *t = _PyPegen_expect_token(p, NAME);
|
||||||
|
if (t == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
char *the_token;
|
||||||
|
Py_ssize_t size;
|
||||||
|
PyBytes_AsStringAndSize(t->bytes, &the_token, &size);
|
||||||
|
for (char **keyword = p->soft_keywords; *keyword != NULL; keyword++) {
|
||||||
|
if (strncmp(*keyword, the_token, size) == 0) {
|
||||||
|
return _PyPegen_name_token(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
parsenumber_raw(const char *s)
|
parsenumber_raw(const char *s)
|
||||||
{
|
{
|
||||||
|
@ -1151,6 +1168,7 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
|
||||||
p->tok = tok;
|
p->tok = tok;
|
||||||
p->keywords = NULL;
|
p->keywords = NULL;
|
||||||
p->n_keyword_lists = -1;
|
p->n_keyword_lists = -1;
|
||||||
|
p->soft_keywords = NULL;
|
||||||
p->tokens = PyMem_Malloc(sizeof(Token *));
|
p->tokens = PyMem_Malloc(sizeof(Token *));
|
||||||
if (!p->tokens) {
|
if (!p->tokens) {
|
||||||
PyMem_Free(p);
|
PyMem_Free(p);
|
||||||
|
|
|
@ -59,6 +59,7 @@ typedef struct {
|
||||||
int fill, size;
|
int fill, size;
|
||||||
PyArena *arena;
|
PyArena *arena;
|
||||||
KeywordToken **keywords;
|
KeywordToken **keywords;
|
||||||
|
char **soft_keywords;
|
||||||
int n_keyword_lists;
|
int n_keyword_lists;
|
||||||
int start_rule;
|
int start_rule;
|
||||||
int *errcode;
|
int *errcode;
|
||||||
|
@ -125,6 +126,7 @@ int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
|
||||||
Token *_PyPegen_expect_token(Parser *p, int type);
|
Token *_PyPegen_expect_token(Parser *p, int type);
|
||||||
Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected);
|
Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected);
|
||||||
expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
|
expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
|
||||||
|
expr_ty _PyPegen_soft_keyword_token(Parser *p);
|
||||||
Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
|
Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
|
||||||
int _PyPegen_fill_token(Parser *p);
|
int _PyPegen_fill_token(Parser *p);
|
||||||
expr_ty _PyPegen_name_token(Parser *p);
|
expr_ty _PyPegen_name_token(Parser *p);
|
||||||
|
|
|
@ -65,6 +65,7 @@ const char * const _PyParser_TokenNames[] = {
|
||||||
"ASYNC",
|
"ASYNC",
|
||||||
"TYPE_IGNORE",
|
"TYPE_IGNORE",
|
||||||
"TYPE_COMMENT",
|
"TYPE_COMMENT",
|
||||||
|
"SOFT_KEYWORD",
|
||||||
"<ERRORTOKEN>",
|
"<ERRORTOKEN>",
|
||||||
"<COMMENT>",
|
"<COMMENT>",
|
||||||
"<NL>",
|
"<NL>",
|
||||||
|
|
|
@ -46,6 +46,7 @@ _PyPegen_parse(Parser *p)
|
||||||
// Initialize keywords
|
// Initialize keywords
|
||||||
p->keywords = reserved_keywords;
|
p->keywords = reserved_keywords;
|
||||||
p->n_keyword_lists = n_keyword_lists;
|
p->n_keyword_lists = n_keyword_lists;
|
||||||
|
p->soft_keywords = soft_keywords;
|
||||||
|
|
||||||
return start_rule(p);
|
return start_rule(p);
|
||||||
}
|
}
|
||||||
|
@ -66,6 +67,7 @@ BASE_NODETYPES = {
|
||||||
"NAME": NodeTypes.NAME_TOKEN,
|
"NAME": NodeTypes.NAME_TOKEN,
|
||||||
"NUMBER": NodeTypes.NUMBER_TOKEN,
|
"NUMBER": NodeTypes.NUMBER_TOKEN,
|
||||||
"STRING": NodeTypes.STRING_TOKEN,
|
"STRING": NodeTypes.STRING_TOKEN,
|
||||||
|
"SOFT_KEYWORD": NodeTypes.SOFT_KEYWORD,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -411,6 +413,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
if subheader:
|
if subheader:
|
||||||
self.print(subheader)
|
self.print(subheader)
|
||||||
self._setup_keywords()
|
self._setup_keywords()
|
||||||
|
self._setup_soft_keywords()
|
||||||
for i, (rulename, rule) in enumerate(self.todo.items(), 1000):
|
for i, (rulename, rule) in enumerate(self.todo.items(), 1000):
|
||||||
comment = " // Left-recursive" if rule.left_recursive else ""
|
comment = " // Left-recursive" if rule.left_recursive else ""
|
||||||
self.print(f"#define {rulename}_type {i}{comment}")
|
self.print(f"#define {rulename}_type {i}{comment}")
|
||||||
|
@ -474,6 +477,15 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
self.print("},")
|
self.print("},")
|
||||||
self.print("};")
|
self.print("};")
|
||||||
|
|
||||||
|
def _setup_soft_keywords(self) -> None:
|
||||||
|
soft_keywords = sorted(self.callmakervisitor.soft_keywords)
|
||||||
|
self.print("static char *soft_keywords[] = {")
|
||||||
|
with self.indent():
|
||||||
|
for keyword in soft_keywords:
|
||||||
|
self.print(f'"{keyword}",')
|
||||||
|
self.print("NULL,")
|
||||||
|
self.print("};")
|
||||||
|
|
||||||
def _set_up_token_start_metadata_extraction(self) -> None:
|
def _set_up_token_start_metadata_extraction(self) -> None:
|
||||||
self.print("if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) {")
|
self.print("if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) {")
|
||||||
with self.indent():
|
with self.indent():
|
||||||
|
|
Loading…
Reference in New Issue