mirror of https://github.com/python/cpython
bpo-30529: Fix errors for invalid whitespaces in f-string subexpressions. (#1888)
'invalid character in identifier' now is raised instead of 'f-string: empty expression not allowed' if a subexpression contains only whitespaces and they are not accepted by Python parser.
This commit is contained in:
parent
29adc13bd7
commit
2e9cd5825c
|
@ -280,6 +280,10 @@ f'{a * x()}'"""
|
||||||
"f'{10:{ }}'",
|
"f'{10:{ }}'",
|
||||||
"f' { } '",
|
"f' { } '",
|
||||||
|
|
||||||
|
# The Python parser ignores also the following
|
||||||
|
# whitespace characters in additional to a space.
|
||||||
|
"f'''{\t\f\r\n}'''",
|
||||||
|
|
||||||
# Catch the empty expression before the
|
# Catch the empty expression before the
|
||||||
# invalid conversion.
|
# invalid conversion.
|
||||||
"f'{!x}'",
|
"f'{!x}'",
|
||||||
|
@ -300,6 +304,12 @@ f'{a * x()}'"""
|
||||||
"f'{:x'",
|
"f'{:x'",
|
||||||
])
|
])
|
||||||
|
|
||||||
|
# Different error message is raised for other whitespace characters.
|
||||||
|
self.assertAllRaise(SyntaxError, 'invalid character in identifier',
|
||||||
|
["f'''{\xa0}'''",
|
||||||
|
"\xa0",
|
||||||
|
])
|
||||||
|
|
||||||
def test_parens_in_expressions(self):
|
def test_parens_in_expressions(self):
|
||||||
self.assertEqual(f'{3,}', '(3,)')
|
self.assertEqual(f'{3,}', '(3,)')
|
||||||
|
|
||||||
|
|
31
Python/ast.c
31
Python/ast.c
|
@ -4274,49 +4274,32 @@ fstring_compile_expr(const char *expr_start, const char *expr_end,
|
||||||
struct compiling *c, const node *n)
|
struct compiling *c, const node *n)
|
||||||
|
|
||||||
{
|
{
|
||||||
int all_whitespace = 1;
|
|
||||||
int kind;
|
|
||||||
void *data;
|
|
||||||
PyCompilerFlags cf;
|
PyCompilerFlags cf;
|
||||||
mod_ty mod;
|
mod_ty mod;
|
||||||
char *str;
|
char *str;
|
||||||
PyObject *o;
|
|
||||||
Py_ssize_t len;
|
Py_ssize_t len;
|
||||||
Py_ssize_t i;
|
const char *s;
|
||||||
|
|
||||||
assert(expr_end >= expr_start);
|
assert(expr_end >= expr_start);
|
||||||
assert(*(expr_start-1) == '{');
|
assert(*(expr_start-1) == '{');
|
||||||
assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':');
|
assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':');
|
||||||
|
|
||||||
/* We know there are no escapes here, because backslashes are not allowed,
|
|
||||||
and we know it's utf-8 encoded (per PEP 263). But, in order to check
|
|
||||||
that each char is not whitespace, we need to decode it to unicode.
|
|
||||||
Which is unfortunate, but such is life. */
|
|
||||||
|
|
||||||
/* If the substring is all whitespace, it's an error. We need to catch
|
/* If the substring is all whitespace, it's an error. We need to catch
|
||||||
this here, and not when we call PyParser_ASTFromString, because turning
|
this here, and not when we call PyParser_ASTFromString, because turning
|
||||||
the expression '' in to '()' would go from being invalid to valid. */
|
the expression '' in to '()' would go from being invalid to valid. */
|
||||||
/* Note that this code says an empty string is all whitespace. That's
|
for (s = expr_start; s != expr_end; s++) {
|
||||||
important. There's a test for it: f'{}'. */
|
char c = *s;
|
||||||
o = PyUnicode_DecodeUTF8(expr_start, expr_end-expr_start, NULL);
|
/* The Python parser ignores only the following whitespace
|
||||||
if (o == NULL)
|
characters (\r already is converted to \n). */
|
||||||
return NULL;
|
if (!(c == ' ' || c == '\t' || c == '\n' || c == '\f')) {
|
||||||
len = PyUnicode_GET_LENGTH(o);
|
|
||||||
kind = PyUnicode_KIND(o);
|
|
||||||
data = PyUnicode_DATA(o);
|
|
||||||
for (i = 0; i < len; i++) {
|
|
||||||
if (!Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, i))) {
|
|
||||||
all_whitespace = 0;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Py_DECREF(o);
|
if (s == expr_end) {
|
||||||
if (all_whitespace) {
|
|
||||||
ast_error(c, n, "f-string: empty expression not allowed");
|
ast_error(c, n, "f-string: empty expression not allowed");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Reuse len to be the length of the utf-8 input string. */
|
|
||||||
len = expr_end - expr_start;
|
len = expr_end - expr_start;
|
||||||
/* Allocate 3 extra bytes: open paren, close paren, null byte. */
|
/* Allocate 3 extra bytes: open paren, close paren, null byte. */
|
||||||
str = PyMem_RawMalloc(len + 3);
|
str = PyMem_RawMalloc(len + 3);
|
||||||
|
|
Loading…
Reference in New Issue