mirror of https://github.com/python/cpython
gh-121130: Fix f-string format specifiers with debug expressions (#121150)
This commit is contained in:
parent
69c68de43a
commit
c46d64e0ef
|
@ -316,9 +316,7 @@ Literals
|
||||||
args=[
|
args=[
|
||||||
Name(id='a', ctx=Load())]),
|
Name(id='a', ctx=Load())]),
|
||||||
conversion=-1,
|
conversion=-1,
|
||||||
format_spec=JoinedStr(
|
format_spec=Constant(value='.3'))]))
|
||||||
values=[
|
|
||||||
Constant(value='.3')]))]))
|
|
||||||
|
|
||||||
|
|
||||||
.. class:: List(elts, ctx)
|
.. class:: List(elts, ctx)
|
||||||
|
|
|
@ -3638,7 +3638,7 @@ eval_results = [
|
||||||
('Expression', ('Subscript', (1, 0, 1, 10), ('List', (1, 0, 1, 3), [('Constant', (1, 1, 1, 2), 5, None)], ('Load',)), ('Slice', (1, 4, 1, 9), ('Constant', (1, 4, 1, 5), 1, None), ('Constant', (1, 6, 1, 7), 1, None), ('Constant', (1, 8, 1, 9), 1, None)), ('Load',))),
|
('Expression', ('Subscript', (1, 0, 1, 10), ('List', (1, 0, 1, 3), [('Constant', (1, 1, 1, 2), 5, None)], ('Load',)), ('Slice', (1, 4, 1, 9), ('Constant', (1, 4, 1, 5), 1, None), ('Constant', (1, 6, 1, 7), 1, None), ('Constant', (1, 8, 1, 9), 1, None)), ('Load',))),
|
||||||
('Expression', ('IfExp', (1, 0, 1, 21), ('Name', (1, 9, 1, 10), 'x', ('Load',)), ('Call', (1, 0, 1, 5), ('Name', (1, 0, 1, 3), 'foo', ('Load',)), [], []), ('Call', (1, 16, 1, 21), ('Name', (1, 16, 1, 19), 'bar', ('Load',)), [], []))),
|
('Expression', ('IfExp', (1, 0, 1, 21), ('Name', (1, 9, 1, 10), 'x', ('Load',)), ('Call', (1, 0, 1, 5), ('Name', (1, 0, 1, 3), 'foo', ('Load',)), [], []), ('Call', (1, 16, 1, 21), ('Name', (1, 16, 1, 19), 'bar', ('Load',)), [], []))),
|
||||||
('Expression', ('JoinedStr', (1, 0, 1, 6), [('FormattedValue', (1, 2, 1, 5), ('Name', (1, 3, 1, 4), 'a', ('Load',)), -1, None)])),
|
('Expression', ('JoinedStr', (1, 0, 1, 6), [('FormattedValue', (1, 2, 1, 5), ('Name', (1, 3, 1, 4), 'a', ('Load',)), -1, None)])),
|
||||||
('Expression', ('JoinedStr', (1, 0, 1, 10), [('FormattedValue', (1, 2, 1, 9), ('Name', (1, 3, 1, 4), 'a', ('Load',)), -1, ('JoinedStr', (1, 4, 1, 8), [('Constant', (1, 5, 1, 8), '.2f', None)]))])),
|
('Expression', ('JoinedStr', (1, 0, 1, 10), [('FormattedValue', (1, 2, 1, 9), ('Name', (1, 3, 1, 4), 'a', ('Load',)), -1, ('Constant', (1, 5, 1, 8), '.2f', None))])),
|
||||||
('Expression', ('JoinedStr', (1, 0, 1, 8), [('FormattedValue', (1, 2, 1, 7), ('Name', (1, 3, 1, 4), 'a', ('Load',)), 114, None)])),
|
('Expression', ('JoinedStr', (1, 0, 1, 8), [('FormattedValue', (1, 2, 1, 7), ('Name', (1, 3, 1, 4), 'a', ('Load',)), 114, None)])),
|
||||||
('Expression', ('JoinedStr', (1, 0, 1, 11), [('Constant', (1, 2, 1, 6), 'foo(', None), ('FormattedValue', (1, 6, 1, 9), ('Name', (1, 7, 1, 8), 'a', ('Load',)), -1, None), ('Constant', (1, 9, 1, 10), ')', None)])),
|
('Expression', ('JoinedStr', (1, 0, 1, 11), [('Constant', (1, 2, 1, 6), 'foo(', None), ('FormattedValue', (1, 6, 1, 9), ('Name', (1, 7, 1, 8), 'a', ('Load',)), -1, None), ('Constant', (1, 9, 1, 10), ')', None)])),
|
||||||
]
|
]
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
# Unicode identifiers in tests is allowed by PEP 3131.
|
# Unicode identifiers in tests is allowed by PEP 3131.
|
||||||
|
|
||||||
import ast
|
import ast
|
||||||
|
import datetime
|
||||||
import dis
|
import dis
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
@ -1601,6 +1602,12 @@ x = (
|
||||||
self.assertEqual(f'{f(a=4)}', '3=')
|
self.assertEqual(f'{f(a=4)}', '3=')
|
||||||
self.assertEqual(x, 4)
|
self.assertEqual(x, 4)
|
||||||
|
|
||||||
|
# Check debug expressions in format spec
|
||||||
|
y = 20
|
||||||
|
self.assertEqual(f"{2:{y=}}", "yyyyyyyyyyyyyyyyyyy2")
|
||||||
|
self.assertEqual(f"{datetime.datetime.now():h1{y=}h2{y=}h3{y=}}",
|
||||||
|
'h1y=20h2y=20h3y=20')
|
||||||
|
|
||||||
# Make sure __format__ is being called.
|
# Make sure __format__ is being called.
|
||||||
class C:
|
class C:
|
||||||
def __format__(self, s):
|
def __format__(self, s):
|
||||||
|
@ -1614,9 +1621,11 @@ x = (
|
||||||
self.assertEqual(f'{C()=: }', 'C()=FORMAT- ')
|
self.assertEqual(f'{C()=: }', 'C()=FORMAT- ')
|
||||||
self.assertEqual(f'{C()=:x}', 'C()=FORMAT-x')
|
self.assertEqual(f'{C()=:x}', 'C()=FORMAT-x')
|
||||||
self.assertEqual(f'{C()=!r:*^20}', 'C()=********REPR********')
|
self.assertEqual(f'{C()=!r:*^20}', 'C()=********REPR********')
|
||||||
|
self.assertEqual(f"{C():{20=}}", 'FORMAT-20=20')
|
||||||
|
|
||||||
self.assertRaises(SyntaxError, eval, "f'{C=]'")
|
self.assertRaises(SyntaxError, eval, "f'{C=]'")
|
||||||
|
|
||||||
|
|
||||||
# Make sure leading and following text works.
|
# Make sure leading and following text works.
|
||||||
x = 'foo'
|
x = 'foo'
|
||||||
self.assertEqual(f'X{x=}Y', 'Xx='+repr(x)+'Y')
|
self.assertEqual(f'X{x=}Y', 'Xx='+repr(x)+'Y')
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Fix f-strings with debug expressions in format specifiers. Patch by Pablo
|
||||||
|
Galindo
|
|
@ -969,6 +969,8 @@ _PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv)
|
||||||
return result_token_with_metadata(p, conv, conv_token->metadata);
|
return result_token_with_metadata(p, conv, conv_token->metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static asdl_expr_seq *
|
||||||
|
unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions);
|
||||||
ResultTokenWithMetadata *
|
ResultTokenWithMetadata *
|
||||||
_PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, int lineno, int col_offset,
|
_PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, int lineno, int col_offset,
|
||||||
int end_lineno, int end_col_offset, PyArena *arena)
|
int end_lineno, int end_col_offset, PyArena *arena)
|
||||||
|
@ -1007,8 +1009,15 @@ _PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, in
|
||||||
assert(j == non_empty_count);
|
assert(j == non_empty_count);
|
||||||
spec = resized_spec;
|
spec = resized_spec;
|
||||||
}
|
}
|
||||||
expr_ty res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno,
|
expr_ty res;
|
||||||
end_col_offset, p->arena);
|
if (asdl_seq_LEN(spec) == 0) {
|
||||||
|
res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno,
|
||||||
|
end_col_offset, p->arena);
|
||||||
|
} else {
|
||||||
|
res = _PyPegen_concatenate_strings(p, spec,
|
||||||
|
lineno, col_offset, end_lineno,
|
||||||
|
end_col_offset, arena);
|
||||||
|
}
|
||||||
if (!res) {
|
if (!res) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -1308,6 +1317,7 @@ unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions)
|
||||||
|
|
||||||
expr_ty
|
expr_ty
|
||||||
_PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* raw_expressions, Token*b) {
|
_PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* raw_expressions, Token*b) {
|
||||||
|
|
||||||
asdl_expr_seq *expr = unpack_top_level_joined_strs(p, raw_expressions);
|
asdl_expr_seq *expr = unpack_top_level_joined_strs(p, raw_expressions);
|
||||||
Py_ssize_t n_items = asdl_seq_LEN(expr);
|
Py_ssize_t n_items = asdl_seq_LEN(expr);
|
||||||
|
|
||||||
|
@ -1472,7 +1482,6 @@ expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, Re
|
||||||
debug_end_offset = end_col_offset;
|
debug_end_offset = end_col_offset;
|
||||||
debug_metadata = closing_brace->metadata;
|
debug_metadata = closing_brace->metadata;
|
||||||
}
|
}
|
||||||
|
|
||||||
expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line,
|
expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line,
|
||||||
debug_end_offset - 1, p->arena);
|
debug_end_offset - 1, p->arena);
|
||||||
if (!debug_text) {
|
if (!debug_text) {
|
||||||
|
@ -1505,16 +1514,23 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
|
||||||
Py_ssize_t n_flattened_elements = 0;
|
Py_ssize_t n_flattened_elements = 0;
|
||||||
for (i = 0; i < len; i++) {
|
for (i = 0; i < len; i++) {
|
||||||
expr_ty elem = asdl_seq_GET(strings, i);
|
expr_ty elem = asdl_seq_GET(strings, i);
|
||||||
if (elem->kind == Constant_kind) {
|
switch(elem->kind) {
|
||||||
if (PyBytes_CheckExact(elem->v.Constant.value)) {
|
case Constant_kind:
|
||||||
bytes_found = 1;
|
if (PyBytes_CheckExact(elem->v.Constant.value)) {
|
||||||
} else {
|
bytes_found = 1;
|
||||||
unicode_string_found = 1;
|
} else {
|
||||||
}
|
unicode_string_found = 1;
|
||||||
n_flattened_elements++;
|
}
|
||||||
} else {
|
n_flattened_elements++;
|
||||||
n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values);
|
break;
|
||||||
f_string_found = 1;
|
case JoinedStr_kind:
|
||||||
|
n_flattened_elements += asdl_seq_LEN(elem->v.JoinedStr.values);
|
||||||
|
f_string_found = 1;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
n_flattened_elements++;
|
||||||
|
f_string_found = 1;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1556,16 +1572,19 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
|
||||||
Py_ssize_t j = 0;
|
Py_ssize_t j = 0;
|
||||||
for (i = 0; i < len; i++) {
|
for (i = 0; i < len; i++) {
|
||||||
expr_ty elem = asdl_seq_GET(strings, i);
|
expr_ty elem = asdl_seq_GET(strings, i);
|
||||||
if (elem->kind == Constant_kind) {
|
switch(elem->kind) {
|
||||||
asdl_seq_SET(flattened, current_pos++, elem);
|
case JoinedStr_kind:
|
||||||
} else {
|
for (j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) {
|
||||||
for (j = 0; j < asdl_seq_LEN(elem->v.JoinedStr.values); j++) {
|
expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j);
|
||||||
expr_ty subvalue = asdl_seq_GET(elem->v.JoinedStr.values, j);
|
if (subvalue == NULL) {
|
||||||
if (subvalue == NULL) {
|
return NULL;
|
||||||
return NULL;
|
}
|
||||||
|
asdl_seq_SET(flattened, current_pos++, subvalue);
|
||||||
}
|
}
|
||||||
asdl_seq_SET(flattened, current_pos++, subvalue);
|
break;
|
||||||
}
|
default:
|
||||||
|
asdl_seq_SET(flattened, current_pos++, elem);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -989,6 +989,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
|
||||||
the_current_tok->last_expr_buffer = NULL;
|
the_current_tok->last_expr_buffer = NULL;
|
||||||
the_current_tok->last_expr_size = 0;
|
the_current_tok->last_expr_size = 0;
|
||||||
the_current_tok->last_expr_end = -1;
|
the_current_tok->last_expr_end = -1;
|
||||||
|
the_current_tok->in_format_spec = 0;
|
||||||
the_current_tok->f_string_debug = 0;
|
the_current_tok->f_string_debug = 0;
|
||||||
|
|
||||||
switch (*tok->start) {
|
switch (*tok->start) {
|
||||||
|
@ -1137,15 +1138,20 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
|
||||||
* by the `{` case, so for ensuring that we are on the 0th level, we need
|
* by the `{` case, so for ensuring that we are on the 0th level, we need
|
||||||
* to adjust it manually */
|
* to adjust it manually */
|
||||||
int cursor = current_tok->curly_bracket_depth - (c != '{');
|
int cursor = current_tok->curly_bracket_depth - (c != '{');
|
||||||
if (cursor == 0 && !_PyLexer_update_fstring_expr(tok, c)) {
|
int in_format_spec = current_tok->in_format_spec;
|
||||||
|
int cursor_in_format_with_debug =
|
||||||
|
cursor == 1 && (current_tok->f_string_debug || in_format_spec);
|
||||||
|
int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
|
||||||
|
if ((cursor_valid) && !_PyLexer_update_fstring_expr(tok, c)) {
|
||||||
return MAKE_TOKEN(ENDMARKER);
|
return MAKE_TOKEN(ENDMARKER);
|
||||||
}
|
}
|
||||||
if (cursor == 0 && c != '{' && set_fstring_expr(tok, token, c)) {
|
if ((cursor_valid) && c != '{' && set_fstring_expr(tok, token, c)) {
|
||||||
return MAKE_TOKEN(ERRORTOKEN);
|
return MAKE_TOKEN(ERRORTOKEN);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
|
if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
|
||||||
current_tok->kind = TOK_FSTRING_MODE;
|
current_tok->kind = TOK_FSTRING_MODE;
|
||||||
|
current_tok->in_format_spec = 1;
|
||||||
p_start = tok->start;
|
p_start = tok->start;
|
||||||
p_end = tok->cur;
|
p_end = tok->cur;
|
||||||
return MAKE_TOKEN(_PyToken_OneChar(c));
|
return MAKE_TOKEN(_PyToken_OneChar(c));
|
||||||
|
@ -1235,6 +1241,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
|
||||||
if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
|
if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
|
||||||
current_tok->curly_bracket_expr_start_depth--;
|
current_tok->curly_bracket_expr_start_depth--;
|
||||||
current_tok->kind = TOK_FSTRING_MODE;
|
current_tok->kind = TOK_FSTRING_MODE;
|
||||||
|
current_tok->in_format_spec = 0;
|
||||||
current_tok->f_string_debug = 0;
|
current_tok->f_string_debug = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1317,11 +1324,11 @@ f_string_middle:
|
||||||
tok->multi_line_start = tok->line_start;
|
tok->multi_line_start = tok->line_start;
|
||||||
while (end_quote_size != current_tok->f_string_quote_size) {
|
while (end_quote_size != current_tok->f_string_quote_size) {
|
||||||
int c = tok_nextc(tok);
|
int c = tok_nextc(tok);
|
||||||
if (tok->done == E_ERROR) {
|
if (tok->done == E_ERROR || tok->done == E_DECODE) {
|
||||||
return MAKE_TOKEN(ERRORTOKEN);
|
return MAKE_TOKEN(ERRORTOKEN);
|
||||||
}
|
}
|
||||||
int in_format_spec = (
|
int in_format_spec = (
|
||||||
current_tok->last_expr_end != -1
|
current_tok->in_format_spec
|
||||||
&&
|
&&
|
||||||
INSIDE_FSTRING_EXPR(current_tok)
|
INSIDE_FSTRING_EXPR(current_tok)
|
||||||
);
|
);
|
||||||
|
@ -1337,6 +1344,7 @@ f_string_middle:
|
||||||
if (in_format_spec && c == '\n') {
|
if (in_format_spec && c == '\n') {
|
||||||
tok_backup(tok, c);
|
tok_backup(tok, c);
|
||||||
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
|
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
|
||||||
|
current_tok->in_format_spec = 0;
|
||||||
p_start = tok->start;
|
p_start = tok->start;
|
||||||
p_end = tok->cur;
|
p_end = tok->cur;
|
||||||
return MAKE_TOKEN(FSTRING_MIDDLE);
|
return MAKE_TOKEN(FSTRING_MIDDLE);
|
||||||
|
@ -1378,6 +1386,9 @@ f_string_middle:
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c == '{') {
|
if (c == '{') {
|
||||||
|
if (!_PyLexer_update_fstring_expr(tok, c)) {
|
||||||
|
return MAKE_TOKEN(ENDMARKER);
|
||||||
|
}
|
||||||
int peek = tok_nextc(tok);
|
int peek = tok_nextc(tok);
|
||||||
if (peek != '{' || in_format_spec) {
|
if (peek != '{' || in_format_spec) {
|
||||||
tok_backup(tok, peek);
|
tok_backup(tok, peek);
|
||||||
|
@ -1387,6 +1398,7 @@ f_string_middle:
|
||||||
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: expressions nested too deeply"));
|
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: expressions nested too deeply"));
|
||||||
}
|
}
|
||||||
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
|
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
|
||||||
|
current_tok->in_format_spec = 0;
|
||||||
p_start = tok->start;
|
p_start = tok->start;
|
||||||
p_end = tok->cur;
|
p_end = tok->cur;
|
||||||
} else {
|
} else {
|
||||||
|
@ -1406,13 +1418,15 @@ f_string_middle:
|
||||||
// scanning (indicated by the end of the expression being set) and we are not at the top level
|
// scanning (indicated by the end of the expression being set) and we are not at the top level
|
||||||
// of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
|
// of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
|
||||||
// brackets, we can bypass it here.
|
// brackets, we can bypass it here.
|
||||||
if (peek == '}' && !in_format_spec) {
|
int cursor = current_tok->curly_bracket_depth;
|
||||||
|
if (peek == '}' && !in_format_spec && cursor == 0) {
|
||||||
p_start = tok->start;
|
p_start = tok->start;
|
||||||
p_end = tok->cur - 1;
|
p_end = tok->cur - 1;
|
||||||
} else {
|
} else {
|
||||||
tok_backup(tok, peek);
|
tok_backup(tok, peek);
|
||||||
tok_backup(tok, c);
|
tok_backup(tok, c);
|
||||||
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
|
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
|
||||||
|
current_tok->in_format_spec = 0;
|
||||||
p_start = tok->start;
|
p_start = tok->start;
|
||||||
p_end = tok->cur;
|
p_end = tok->cur;
|
||||||
}
|
}
|
||||||
|
|
|
@ -74,6 +74,7 @@ free_fstring_expressions(struct tok_state *tok)
|
||||||
mode->last_expr_buffer = NULL;
|
mode->last_expr_buffer = NULL;
|
||||||
mode->last_expr_size = 0;
|
mode->last_expr_size = 0;
|
||||||
mode->last_expr_end = -1;
|
mode->last_expr_end = -1;
|
||||||
|
mode->in_format_spec = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,6 +58,7 @@ typedef struct _tokenizer_mode {
|
||||||
Py_ssize_t last_expr_end;
|
Py_ssize_t last_expr_end;
|
||||||
char* last_expr_buffer;
|
char* last_expr_buffer;
|
||||||
int f_string_debug;
|
int f_string_debug;
|
||||||
|
int in_format_spec;
|
||||||
} tokenizer_mode;
|
} tokenizer_mode;
|
||||||
|
|
||||||
/* Tokenizer state */
|
/* Tokenizer state */
|
||||||
|
|
Loading…
Reference in New Issue