mirror of https://github.com/python/cpython
bpo-43822: Improve syntax errors for missing commas (GH-25377)
This commit is contained in:
parent
e692f55979
commit
b280248be8
|
@ -211,6 +211,8 @@
|
|||
|
||||
.. data:: TYPE_COMMENT
|
||||
|
||||
.. data:: SOFT_KEYWORD
|
||||
|
||||
.. data:: ERRORTOKEN
|
||||
|
||||
.. data:: N_TOKENS
|
||||
|
|
|
@ -59,6 +59,7 @@ AWAIT
|
|||
ASYNC
|
||||
TYPE_IGNORE
|
||||
TYPE_COMMENT
|
||||
SOFT_KEYWORD
|
||||
ERRORTOKEN
|
||||
|
||||
# These aren't used by the C tokenizer but are needed for tokenize.py
|
||||
|
|
|
@ -7,6 +7,7 @@ _PyPegen_parse(Parser *p)
|
|||
// Initialize keywords
|
||||
p->keywords = reserved_keywords;
|
||||
p->n_keyword_lists = n_keyword_lists;
|
||||
p->soft_keywords = soft_keywords;
|
||||
|
||||
// Run parser
|
||||
void *result = NULL;
|
||||
|
@ -459,6 +460,7 @@ expressions[expr_ty]:
|
|||
| a=expression ',' { _PyAST_Tuple(CHECK(asdl_expr_seq*, _PyPegen_singleton_seq(p, a)), Load, EXTRA) }
|
||||
| expression
|
||||
expression[expr_ty] (memo):
|
||||
| invalid_expression
|
||||
| a=disjunction 'if' b=disjunction 'else' c=expression { _PyAST_IfExp(b, a, c, EXTRA) }
|
||||
| disjunction
|
||||
| lambdef
|
||||
|
@ -778,6 +780,13 @@ invalid_kwarg:
|
|||
| expression a='=' {
|
||||
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
|
||||
a, "expression cannot contain assignment, perhaps you meant \"==\"?") }
|
||||
|
||||
invalid_expression:
|
||||
# !(NAME STRING) is not matched so we don't show this error with some invalid string prefixes like: kf"dsfsdf"
|
||||
# Soft keywords need to also be ignored because they can be parsed as NAME NAME
|
||||
| !(NAME STRING | SOFT_KEYWORD) a=disjunction expression {
|
||||
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, a->lineno, a->end_col_offset - 1, "invalid syntax. Perhaps you forgot a comma?") }
|
||||
|
||||
invalid_named_expression:
|
||||
| a=expression ':=' expression {
|
||||
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
|
||||
|
|
|
@ -69,8 +69,9 @@ extern "C" {
|
|||
#define ASYNC 56
|
||||
#define TYPE_IGNORE 57
|
||||
#define TYPE_COMMENT 58
|
||||
#define ERRORTOKEN 59
|
||||
#define N_TOKENS 63
|
||||
#define SOFT_KEYWORD 59
|
||||
#define ERRORTOKEN 60
|
||||
#define N_TOKENS 64
|
||||
#define NT_OFFSET 256
|
||||
|
||||
/* Special definitions for cooperation with parser */
|
||||
|
|
|
@ -103,7 +103,7 @@ Verify that parenthesis are required when used as a keyword argument value
|
|||
>>> dict(a = i for i in range(10))
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
SyntaxError: invalid syntax
|
||||
SyntaxError: invalid syntax. Maybe you meant '==' or ':=' instead of '='?
|
||||
|
||||
Verify that parenthesis are required when used as a keyword argument value
|
||||
|
||||
|
|
|
@ -248,22 +248,36 @@ SyntaxError: did you forget parentheses around the comprehension target?
|
|||
|
||||
# Missing commas in literals collections should not
|
||||
# produce special error messages regarding missing
|
||||
# parentheses
|
||||
# parentheses, but about missing commas instead
|
||||
|
||||
>>> [1, 2 3]
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: invalid syntax
|
||||
SyntaxError: invalid syntax. Perhaps you forgot a comma?
|
||||
|
||||
>>> {1, 2 3}
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: invalid syntax
|
||||
SyntaxError: invalid syntax. Perhaps you forgot a comma?
|
||||
|
||||
>>> {1:2, 2:5 3:12}
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: invalid syntax
|
||||
SyntaxError: invalid syntax. Perhaps you forgot a comma?
|
||||
|
||||
>>> (1, 2 3)
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: invalid syntax. Perhaps you forgot a comma?
|
||||
|
||||
# Make sure soft keywords constructs don't raise specialized
|
||||
# errors regarding missing commas
|
||||
|
||||
>>> match x:
|
||||
... y = 3
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: invalid syntax
|
||||
|
||||
>>> match x:
|
||||
... case y:
|
||||
... 3 $ 3
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: invalid syntax
|
||||
|
||||
From compiler_complex_args():
|
||||
|
@ -864,7 +878,7 @@ leading to spurious errors.
|
|||
SyntaxError: cannot assign to attribute here. Maybe you meant '==' instead of '='?
|
||||
|
||||
Ensure that early = are not matched by the parser as invalid comparisons
|
||||
>>> f(2, 4, x=34); {1,2 a}
|
||||
>>> f(2, 4, x=34); 1 $ 2
|
||||
Traceback (most recent call last):
|
||||
SyntaxError: invalid syntax
|
||||
|
||||
|
|
|
@ -62,12 +62,13 @@ AWAIT = 55
|
|||
ASYNC = 56
|
||||
TYPE_IGNORE = 57
|
||||
TYPE_COMMENT = 58
|
||||
SOFT_KEYWORD = 59
|
||||
# These aren't used by the C tokenizer but are needed for tokenize.py
|
||||
ERRORTOKEN = 59
|
||||
COMMENT = 60
|
||||
NL = 61
|
||||
ENCODING = 62
|
||||
N_TOKENS = 63
|
||||
ERRORTOKEN = 60
|
||||
COMMENT = 61
|
||||
NL = 62
|
||||
ENCODING = 63
|
||||
N_TOKENS = 64
|
||||
# Special definitions for cooperation with parser
|
||||
NT_OFFSET = 256
|
||||
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Improve syntax errors in the parser for missing commas between expressions.
|
||||
Patch by Pablo Galindo.
|
2180
Parser/parser.c
2180
Parser/parser.c
File diff suppressed because it is too large
Load Diff
|
@ -943,6 +943,23 @@ _PyPegen_string_token(Parser *p)
|
|||
return _PyPegen_expect_token(p, STRING);
|
||||
}
|
||||
|
||||
|
||||
expr_ty _PyPegen_soft_keyword_token(Parser *p) {
|
||||
Token *t = _PyPegen_expect_token(p, NAME);
|
||||
if (t == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
char *the_token;
|
||||
Py_ssize_t size;
|
||||
PyBytes_AsStringAndSize(t->bytes, &the_token, &size);
|
||||
for (char **keyword = p->soft_keywords; *keyword != NULL; keyword++) {
|
||||
if (strncmp(*keyword, the_token, size) == 0) {
|
||||
return _PyPegen_name_token(p);
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
parsenumber_raw(const char *s)
|
||||
{
|
||||
|
@ -1151,6 +1168,7 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
|
|||
p->tok = tok;
|
||||
p->keywords = NULL;
|
||||
p->n_keyword_lists = -1;
|
||||
p->soft_keywords = NULL;
|
||||
p->tokens = PyMem_Malloc(sizeof(Token *));
|
||||
if (!p->tokens) {
|
||||
PyMem_Free(p);
|
||||
|
|
|
@ -59,6 +59,7 @@ typedef struct {
|
|||
int fill, size;
|
||||
PyArena *arena;
|
||||
KeywordToken **keywords;
|
||||
char **soft_keywords;
|
||||
int n_keyword_lists;
|
||||
int start_rule;
|
||||
int *errcode;
|
||||
|
@ -125,6 +126,7 @@ int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
|
|||
Token *_PyPegen_expect_token(Parser *p, int type);
|
||||
Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected);
|
||||
expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
|
||||
expr_ty _PyPegen_soft_keyword_token(Parser *p);
|
||||
Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
|
||||
int _PyPegen_fill_token(Parser *p);
|
||||
expr_ty _PyPegen_name_token(Parser *p);
|
||||
|
|
|
@ -65,6 +65,7 @@ const char * const _PyParser_TokenNames[] = {
|
|||
"ASYNC",
|
||||
"TYPE_IGNORE",
|
||||
"TYPE_COMMENT",
|
||||
"SOFT_KEYWORD",
|
||||
"<ERRORTOKEN>",
|
||||
"<COMMENT>",
|
||||
"<NL>",
|
||||
|
|
|
@ -46,6 +46,7 @@ _PyPegen_parse(Parser *p)
|
|||
// Initialize keywords
|
||||
p->keywords = reserved_keywords;
|
||||
p->n_keyword_lists = n_keyword_lists;
|
||||
p->soft_keywords = soft_keywords;
|
||||
|
||||
return start_rule(p);
|
||||
}
|
||||
|
@ -66,6 +67,7 @@ BASE_NODETYPES = {
|
|||
"NAME": NodeTypes.NAME_TOKEN,
|
||||
"NUMBER": NodeTypes.NUMBER_TOKEN,
|
||||
"STRING": NodeTypes.STRING_TOKEN,
|
||||
"SOFT_KEYWORD": NodeTypes.SOFT_KEYWORD,
|
||||
}
|
||||
|
||||
|
||||
|
@ -411,6 +413,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
if subheader:
|
||||
self.print(subheader)
|
||||
self._setup_keywords()
|
||||
self._setup_soft_keywords()
|
||||
for i, (rulename, rule) in enumerate(self.todo.items(), 1000):
|
||||
comment = " // Left-recursive" if rule.left_recursive else ""
|
||||
self.print(f"#define {rulename}_type {i}{comment}")
|
||||
|
@ -474,6 +477,15 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
self.print("},")
|
||||
self.print("};")
|
||||
|
||||
def _setup_soft_keywords(self) -> None:
|
||||
soft_keywords = sorted(self.callmakervisitor.soft_keywords)
|
||||
self.print("static char *soft_keywords[] = {")
|
||||
with self.indent():
|
||||
for keyword in soft_keywords:
|
||||
self.print(f'"{keyword}",')
|
||||
self.print("NULL,")
|
||||
self.print("};")
|
||||
|
||||
def _set_up_token_start_metadata_extraction(self) -> None:
|
||||
self.print("if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) {")
|
||||
with self.indent():
|
||||
|
|
Loading…
Reference in New Issue