mirror of https://github.com/python/cpython
gh-103656: Transfer f-string buffers to parser to avoid use-after-free (GH-103896)
Co-authored-by: Pablo Galindo <pablogsal@gmail.com>
This commit is contained in:
parent
76632b836c
commit
9169a56fad
|
@ -881,14 +881,13 @@ fstring_middle[expr_ty]:
|
||||||
| fstring_replacement_field
|
| fstring_replacement_field
|
||||||
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
|
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
|
||||||
fstring_replacement_field[expr_ty]:
|
fstring_replacement_field[expr_ty]:
|
||||||
| '{' a=(yield_expr | star_expressions) debug_expr="="? conversion=[fstring_conversion] format=[fstring_full_format_spec] '}' {
|
| '{' a=(yield_expr | star_expressions) debug_expr="="? conversion=[fstring_conversion] format=[fstring_full_format_spec] rbrace='}' {
|
||||||
_PyPegen_formatted_value(p, a, debug_expr, conversion, format, EXTRA)
|
_PyPegen_formatted_value(p, a, debug_expr, conversion, format, rbrace, EXTRA) }
|
||||||
}
|
|
||||||
| invalid_replacement_field
|
| invalid_replacement_field
|
||||||
fstring_conversion[expr_ty]:
|
fstring_conversion[ResultTokenWithMetadata*]:
|
||||||
| conv_token="!" conv=NAME { _PyPegen_check_fstring_conversion(p, conv_token, conv) }
|
| conv_token="!" conv=NAME { _PyPegen_check_fstring_conversion(p, conv_token, conv) }
|
||||||
fstring_full_format_spec[expr_ty]:
|
fstring_full_format_spec[ResultTokenWithMetadata*]:
|
||||||
| ':' spec=fstring_format_spec* { spec ? _PyAST_JoinedStr((asdl_expr_seq*)spec, EXTRA) : NULL }
|
| colon=':' spec=fstring_format_spec* { _PyPegen_setup_full_format_spec(p, colon, (asdl_expr_seq *) spec, EXTRA) }
|
||||||
fstring_format_spec[expr_ty]:
|
fstring_format_spec[expr_ty]:
|
||||||
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
|
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
|
||||||
| fstring_replacement_field
|
| fstring_replacement_field
|
||||||
|
|
|
@ -1535,5 +1535,19 @@ x = (
|
||||||
self.assertAllRaise(SyntaxError, "unterminated triple-quoted f-string literal",
|
self.assertAllRaise(SyntaxError, "unterminated triple-quoted f-string literal",
|
||||||
['f"""', "f'''"])
|
['f"""', "f'''"])
|
||||||
|
|
||||||
|
def test_syntax_error_after_debug(self):
|
||||||
|
self.assertAllRaise(SyntaxError, "f-string: expecting a valid expression after '{'",
|
||||||
|
[
|
||||||
|
"f'{1=}{;'",
|
||||||
|
"f'{1=}{+;'",
|
||||||
|
"f'{1=}{2}{;'",
|
||||||
|
"f'{1=}{3}{;'",
|
||||||
|
])
|
||||||
|
self.assertAllRaise(SyntaxError, "f-string: expecting '=', or '!', or ':', or '}'",
|
||||||
|
[
|
||||||
|
"f'{1=}{1;'",
|
||||||
|
"f'{1=}{1;}'",
|
||||||
|
])
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -965,17 +965,43 @@ _PyPegen_check_legacy_stmt(Parser *p, expr_ty name) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
expr_ty
|
static ResultTokenWithMetadata *
|
||||||
_PyPegen_check_fstring_conversion(Parser *p, Token* symbol, expr_ty conv) {
|
result_token_with_metadata(Parser *p, void *result, PyObject *metadata)
|
||||||
if (symbol->lineno != conv->lineno || symbol->end_col_offset != conv->col_offset) {
|
{
|
||||||
|
ResultTokenWithMetadata *res = _PyArena_Malloc(p->arena, sizeof(ResultTokenWithMetadata));
|
||||||
|
if (res == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
res->metadata = metadata;
|
||||||
|
res->result = result;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
ResultTokenWithMetadata *
|
||||||
|
_PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv)
|
||||||
|
{
|
||||||
|
if (conv_token->lineno != conv->lineno || conv_token->end_col_offset != conv->col_offset) {
|
||||||
return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
|
return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
|
||||||
symbol, conv,
|
conv_token, conv,
|
||||||
"f-string: conversion type must come right after the exclamanation mark"
|
"f-string: conversion type must come right after the exclamanation mark"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return conv;
|
return result_token_with_metadata(p, conv, conv_token->metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ResultTokenWithMetadata *
|
||||||
|
_PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec, int lineno, int col_offset,
|
||||||
|
int end_lineno, int end_col_offset, PyArena *arena)
|
||||||
|
{
|
||||||
|
if (!spec) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
expr_ty res = _PyAST_JoinedStr(spec, lineno, col_offset, end_lineno, end_col_offset, p->arena);
|
||||||
|
if (!res) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return result_token_with_metadata(p, res, colon->metadata);
|
||||||
|
}
|
||||||
|
|
||||||
const char *
|
const char *
|
||||||
_PyPegen_get_expr_name(expr_ty e)
|
_PyPegen_get_expr_name(expr_ty e)
|
||||||
|
@ -1197,27 +1223,6 @@ _PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq
|
||||||
|
|
||||||
// Fstring stuff
|
// Fstring stuff
|
||||||
|
|
||||||
static expr_ty
|
|
||||||
decode_fstring_buffer(Parser *p, int lineno, int col_offset, int end_lineno,
|
|
||||||
int end_col_offset)
|
|
||||||
{
|
|
||||||
tokenizer_mode *tok_mode = &(p->tok->tok_mode_stack[p->tok->tok_mode_stack_index]);
|
|
||||||
assert(tok_mode->last_expr_buffer != NULL);
|
|
||||||
assert(tok_mode->last_expr_size >= 0 && tok_mode->last_expr_end >= 0);
|
|
||||||
|
|
||||||
PyObject *res = PyUnicode_DecodeUTF8(
|
|
||||||
tok_mode->last_expr_buffer,
|
|
||||||
tok_mode->last_expr_size - tok_mode->last_expr_end,
|
|
||||||
NULL
|
|
||||||
);
|
|
||||||
if (!res || _PyArena_AddPyObject(p->arena, res) < 0) {
|
|
||||||
Py_XDECREF(res);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return _PyAST_Constant(res, NULL, lineno, col_offset, end_lineno, end_col_offset, p->arena);
|
|
||||||
}
|
|
||||||
|
|
||||||
static expr_ty
|
static expr_ty
|
||||||
_PyPegen_decode_fstring_part(Parser* p, int is_raw, expr_ty constant) {
|
_PyPegen_decode_fstring_part(Parser* p, int is_raw, expr_ty constant) {
|
||||||
assert(PyUnicode_CheckExact(constant->v.Constant.value));
|
assert(PyUnicode_CheckExact(constant->v.Constant.value));
|
||||||
|
@ -1386,19 +1391,20 @@ expr_ty _PyPegen_constant_from_string(Parser* p, Token* tok) {
|
||||||
return _PyAST_Constant(s, kind, tok->lineno, tok->col_offset, tok->end_lineno, tok->end_col_offset, p->arena);
|
return _PyAST_Constant(s, kind, tok->lineno, tok->col_offset, tok->end_lineno, tok->end_col_offset, p->arena);
|
||||||
}
|
}
|
||||||
|
|
||||||
expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, expr_ty conversion,
|
expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, ResultTokenWithMetadata *conversion,
|
||||||
expr_ty format, int lineno, int col_offset, int end_lineno, int end_col_offset,
|
ResultTokenWithMetadata *format, Token *closing_brace, int lineno, int col_offset,
|
||||||
PyArena *arena) {
|
int end_lineno, int end_col_offset, PyArena *arena) {
|
||||||
int conversion_val = -1;
|
int conversion_val = -1;
|
||||||
if (conversion != NULL) {
|
if (conversion != NULL) {
|
||||||
assert(conversion->kind == Name_kind);
|
expr_ty conversion_expr = (expr_ty) conversion->result;
|
||||||
Py_UCS4 first = PyUnicode_READ_CHAR(conversion->v.Name.id, 0);
|
assert(conversion_expr->kind == Name_kind);
|
||||||
|
Py_UCS4 first = PyUnicode_READ_CHAR(conversion_expr->v.Name.id, 0);
|
||||||
|
|
||||||
if (PyUnicode_GET_LENGTH(conversion->v.Name.id) > 1 ||
|
if (PyUnicode_GET_LENGTH(conversion_expr->v.Name.id) > 1 ||
|
||||||
!(first == 's' || first == 'r' || first == 'a')) {
|
!(first == 's' || first == 'r' || first == 'a')) {
|
||||||
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conversion,
|
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conversion_expr,
|
||||||
"f-string: invalid conversion character %R: expected 's', 'r', or 'a'",
|
"f-string: invalid conversion character %R: expected 's', 'r', or 'a'",
|
||||||
conversion->v.Name.id);
|
conversion_expr->v.Name.id);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1410,7 +1416,7 @@ expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, ex
|
||||||
}
|
}
|
||||||
|
|
||||||
expr_ty formatted_value = _PyAST_FormattedValue(
|
expr_ty formatted_value = _PyAST_FormattedValue(
|
||||||
expression, conversion_val, format,
|
expression, conversion_val, format ? (expr_ty) format->result : NULL,
|
||||||
lineno, col_offset, end_lineno,
|
lineno, col_offset, end_lineno,
|
||||||
end_col_offset, arena
|
end_col_offset, arena
|
||||||
);
|
);
|
||||||
|
@ -1418,22 +1424,26 @@ expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, ex
|
||||||
if (debug) {
|
if (debug) {
|
||||||
/* Find the non whitespace token after the "=" */
|
/* Find the non whitespace token after the "=" */
|
||||||
int debug_end_line, debug_end_offset;
|
int debug_end_line, debug_end_offset;
|
||||||
|
PyObject *debug_metadata;
|
||||||
|
|
||||||
if (conversion) {
|
if (conversion) {
|
||||||
debug_end_line = conversion->lineno;
|
debug_end_line = ((expr_ty) conversion->result)->lineno;
|
||||||
debug_end_offset = conversion->col_offset;
|
debug_end_offset = ((expr_ty) conversion->result)->col_offset;
|
||||||
|
debug_metadata = conversion->metadata;
|
||||||
}
|
}
|
||||||
else if (format) {
|
else if (format) {
|
||||||
debug_end_line = format->lineno;
|
debug_end_line = ((expr_ty) format->result)->lineno;
|
||||||
debug_end_offset = format->col_offset + 1; // HACK: ??
|
debug_end_offset = ((expr_ty) format->result)->col_offset + 1;
|
||||||
|
debug_metadata = format->metadata;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
debug_end_line = end_lineno;
|
debug_end_line = end_lineno;
|
||||||
debug_end_offset = end_col_offset;
|
debug_end_offset = end_col_offset;
|
||||||
|
debug_metadata = closing_brace->metadata;
|
||||||
}
|
}
|
||||||
|
|
||||||
expr_ty debug_text = decode_fstring_buffer(p, lineno, col_offset + 1,
|
expr_ty debug_text = _PyAST_Constant(debug_metadata, NULL, lineno, col_offset + 1, debug_end_line,
|
||||||
debug_end_line, debug_end_offset - 1);
|
debug_end_offset - 1, p->arena);
|
||||||
if (!debug_text) {
|
if (!debug_text) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
|
@ -738,8 +738,8 @@ static NameDefaultPair* lambda_param_maybe_default_rule(Parser *p);
|
||||||
static arg_ty lambda_param_rule(Parser *p);
|
static arg_ty lambda_param_rule(Parser *p);
|
||||||
static expr_ty fstring_middle_rule(Parser *p);
|
static expr_ty fstring_middle_rule(Parser *p);
|
||||||
static expr_ty fstring_replacement_field_rule(Parser *p);
|
static expr_ty fstring_replacement_field_rule(Parser *p);
|
||||||
static expr_ty fstring_conversion_rule(Parser *p);
|
static ResultTokenWithMetadata* fstring_conversion_rule(Parser *p);
|
||||||
static expr_ty fstring_full_format_spec_rule(Parser *p);
|
static ResultTokenWithMetadata* fstring_full_format_spec_rule(Parser *p);
|
||||||
static expr_ty fstring_format_spec_rule(Parser *p);
|
static expr_ty fstring_format_spec_rule(Parser *p);
|
||||||
static expr_ty string_rule(Parser *p);
|
static expr_ty string_rule(Parser *p);
|
||||||
static expr_ty strings_rule(Parser *p);
|
static expr_ty strings_rule(Parser *p);
|
||||||
|
@ -15639,11 +15639,11 @@ fstring_replacement_field_rule(Parser *p)
|
||||||
}
|
}
|
||||||
D(fprintf(stderr, "%*c> fstring_replacement_field[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'{' (yield_expr | star_expressions) \"=\"? fstring_conversion? fstring_full_format_spec? '}'"));
|
D(fprintf(stderr, "%*c> fstring_replacement_field[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'{' (yield_expr | star_expressions) \"=\"? fstring_conversion? fstring_full_format_spec? '}'"));
|
||||||
Token * _literal;
|
Token * _literal;
|
||||||
Token * _literal_1;
|
|
||||||
void *a;
|
void *a;
|
||||||
void *conversion;
|
void *conversion;
|
||||||
void *debug_expr;
|
void *debug_expr;
|
||||||
void *format;
|
void *format;
|
||||||
|
Token * rbrace;
|
||||||
if (
|
if (
|
||||||
(_literal = _PyPegen_expect_token(p, 25)) // token='{'
|
(_literal = _PyPegen_expect_token(p, 25)) // token='{'
|
||||||
&&
|
&&
|
||||||
|
@ -15655,7 +15655,7 @@ fstring_replacement_field_rule(Parser *p)
|
||||||
&&
|
&&
|
||||||
(format = fstring_full_format_spec_rule(p), !p->error_indicator) // fstring_full_format_spec?
|
(format = fstring_full_format_spec_rule(p), !p->error_indicator) // fstring_full_format_spec?
|
||||||
&&
|
&&
|
||||||
(_literal_1 = _PyPegen_expect_token(p, 26)) // token='}'
|
(rbrace = _PyPegen_expect_token(p, 26)) // token='}'
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
D(fprintf(stderr, "%*c+ fstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' (yield_expr | star_expressions) \"=\"? fstring_conversion? fstring_full_format_spec? '}'"));
|
D(fprintf(stderr, "%*c+ fstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' (yield_expr | star_expressions) \"=\"? fstring_conversion? fstring_full_format_spec? '}'"));
|
||||||
|
@ -15668,7 +15668,7 @@ fstring_replacement_field_rule(Parser *p)
|
||||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||||
int _end_col_offset = _token->end_col_offset;
|
int _end_col_offset = _token->end_col_offset;
|
||||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||||
_res = _PyPegen_formatted_value ( p , a , debug_expr , conversion , format , EXTRA );
|
_res = _PyPegen_formatted_value ( p , a , debug_expr , conversion , format , rbrace , EXTRA );
|
||||||
if (_res == NULL && PyErr_Occurred()) {
|
if (_res == NULL && PyErr_Occurred()) {
|
||||||
p->error_indicator = 1;
|
p->error_indicator = 1;
|
||||||
p->level--;
|
p->level--;
|
||||||
|
@ -15706,7 +15706,7 @@ fstring_replacement_field_rule(Parser *p)
|
||||||
}
|
}
|
||||||
|
|
||||||
// fstring_conversion: "!" NAME
|
// fstring_conversion: "!" NAME
|
||||||
static expr_ty
|
static ResultTokenWithMetadata*
|
||||||
fstring_conversion_rule(Parser *p)
|
fstring_conversion_rule(Parser *p)
|
||||||
{
|
{
|
||||||
if (p->level++ == MAXSTACK) {
|
if (p->level++ == MAXSTACK) {
|
||||||
|
@ -15717,7 +15717,7 @@ fstring_conversion_rule(Parser *p)
|
||||||
p->level--;
|
p->level--;
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
expr_ty _res = NULL;
|
ResultTokenWithMetadata* _res = NULL;
|
||||||
int _mark = p->mark;
|
int _mark = p->mark;
|
||||||
{ // "!" NAME
|
{ // "!" NAME
|
||||||
if (p->error_indicator) {
|
if (p->error_indicator) {
|
||||||
|
@ -15753,7 +15753,7 @@ fstring_conversion_rule(Parser *p)
|
||||||
}
|
}
|
||||||
|
|
||||||
// fstring_full_format_spec: ':' fstring_format_spec*
|
// fstring_full_format_spec: ':' fstring_format_spec*
|
||||||
static expr_ty
|
static ResultTokenWithMetadata*
|
||||||
fstring_full_format_spec_rule(Parser *p)
|
fstring_full_format_spec_rule(Parser *p)
|
||||||
{
|
{
|
||||||
if (p->level++ == MAXSTACK) {
|
if (p->level++ == MAXSTACK) {
|
||||||
|
@ -15764,7 +15764,7 @@ fstring_full_format_spec_rule(Parser *p)
|
||||||
p->level--;
|
p->level--;
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
expr_ty _res = NULL;
|
ResultTokenWithMetadata* _res = NULL;
|
||||||
int _mark = p->mark;
|
int _mark = p->mark;
|
||||||
if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) {
|
if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) {
|
||||||
p->error_indicator = 1;
|
p->error_indicator = 1;
|
||||||
|
@ -15781,10 +15781,10 @@ fstring_full_format_spec_rule(Parser *p)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
D(fprintf(stderr, "%*c> fstring_full_format_spec[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "':' fstring_format_spec*"));
|
D(fprintf(stderr, "%*c> fstring_full_format_spec[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "':' fstring_format_spec*"));
|
||||||
Token * _literal;
|
Token * colon;
|
||||||
asdl_seq * spec;
|
asdl_seq * spec;
|
||||||
if (
|
if (
|
||||||
(_literal = _PyPegen_expect_token(p, 11)) // token=':'
|
(colon = _PyPegen_expect_token(p, 11)) // token=':'
|
||||||
&&
|
&&
|
||||||
(spec = _loop0_112_rule(p)) // fstring_format_spec*
|
(spec = _loop0_112_rule(p)) // fstring_format_spec*
|
||||||
)
|
)
|
||||||
|
@ -15799,7 +15799,7 @@ fstring_full_format_spec_rule(Parser *p)
|
||||||
UNUSED(_end_lineno); // Only used by EXTRA macro
|
UNUSED(_end_lineno); // Only used by EXTRA macro
|
||||||
int _end_col_offset = _token->end_col_offset;
|
int _end_col_offset = _token->end_col_offset;
|
||||||
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
UNUSED(_end_col_offset); // Only used by EXTRA macro
|
||||||
_res = spec ? _PyAST_JoinedStr ( ( asdl_expr_seq* ) spec , EXTRA ) : NULL;
|
_res = _PyPegen_setup_full_format_spec ( p , colon , ( asdl_expr_seq* ) spec , EXTRA );
|
||||||
if (_res == NULL && PyErr_Occurred()) {
|
if (_res == NULL && PyErr_Occurred()) {
|
||||||
p->error_indicator = 1;
|
p->error_indicator = 1;
|
||||||
p->level--;
|
p->level--;
|
||||||
|
|
|
@ -155,6 +155,16 @@ initialize_token(Parser *p, Token *parser_token, struct token *new_token, int to
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
parser_token->metadata = NULL;
|
||||||
|
if (new_token->metadata != NULL) {
|
||||||
|
if (_PyArena_AddPyObject(p->arena, new_token->metadata) < 0) {
|
||||||
|
Py_DECREF(parser_token->metadata);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
parser_token->metadata = new_token->metadata;
|
||||||
|
new_token->metadata = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
parser_token->level = new_token->level;
|
parser_token->level = new_token->level;
|
||||||
parser_token->lineno = new_token->lineno;
|
parser_token->lineno = new_token->lineno;
|
||||||
parser_token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + new_token->col_offset
|
parser_token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + new_token->col_offset
|
||||||
|
@ -198,6 +208,7 @@ int
|
||||||
_PyPegen_fill_token(Parser *p)
|
_PyPegen_fill_token(Parser *p)
|
||||||
{
|
{
|
||||||
struct token new_token;
|
struct token new_token;
|
||||||
|
new_token.metadata = NULL;
|
||||||
int type = _PyTokenizer_Get(p->tok, &new_token);
|
int type = _PyTokenizer_Get(p->tok, &new_token);
|
||||||
|
|
||||||
// Record and skip '# type: ignore' comments
|
// Record and skip '# type: ignore' comments
|
||||||
|
@ -206,14 +217,14 @@ _PyPegen_fill_token(Parser *p)
|
||||||
char *tag = PyMem_Malloc(len + 1);
|
char *tag = PyMem_Malloc(len + 1);
|
||||||
if (tag == NULL) {
|
if (tag == NULL) {
|
||||||
PyErr_NoMemory();
|
PyErr_NoMemory();
|
||||||
return -1;
|
goto error;
|
||||||
}
|
}
|
||||||
strncpy(tag, new_token.start, len);
|
strncpy(tag, new_token.start, len);
|
||||||
tag[len] = '\0';
|
tag[len] = '\0';
|
||||||
// Ownership of tag passes to the growable array
|
// Ownership of tag passes to the growable array
|
||||||
if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) {
|
if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) {
|
||||||
PyErr_NoMemory();
|
PyErr_NoMemory();
|
||||||
return -1;
|
goto error;
|
||||||
}
|
}
|
||||||
type = _PyTokenizer_Get(p->tok, &new_token);
|
type = _PyTokenizer_Get(p->tok, &new_token);
|
||||||
}
|
}
|
||||||
|
@ -234,11 +245,14 @@ _PyPegen_fill_token(Parser *p)
|
||||||
|
|
||||||
// Check if we are at the limit of the token array capacity and resize if needed
|
// Check if we are at the limit of the token array capacity and resize if needed
|
||||||
if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) {
|
if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) {
|
||||||
return -1;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
Token *t = p->tokens[p->fill];
|
Token *t = p->tokens[p->fill];
|
||||||
return initialize_token(p, t, &new_token, type);
|
return initialize_token(p, t, &new_token, type);
|
||||||
|
error:
|
||||||
|
Py_XDECREF(new_token.metadata);
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(Py_DEBUG)
|
#if defined(Py_DEBUG)
|
||||||
|
|
|
@ -39,6 +39,7 @@ typedef struct {
|
||||||
int level;
|
int level;
|
||||||
int lineno, col_offset, end_lineno, end_col_offset;
|
int lineno, col_offset, end_lineno, end_col_offset;
|
||||||
Memo *memo;
|
Memo *memo;
|
||||||
|
PyObject *metadata;
|
||||||
} Token;
|
} Token;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -118,6 +119,11 @@ typedef struct {
|
||||||
int is_keyword;
|
int is_keyword;
|
||||||
} KeywordOrStarred;
|
} KeywordOrStarred;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
void *result;
|
||||||
|
PyObject *metadata;
|
||||||
|
} ResultTokenWithMetadata;
|
||||||
|
|
||||||
// Internal parser functions
|
// Internal parser functions
|
||||||
#if defined(Py_DEBUG)
|
#if defined(Py_DEBUG)
|
||||||
void _PyPegen_clear_memo_statistics(void);
|
void _PyPegen_clear_memo_statistics(void);
|
||||||
|
@ -310,7 +316,8 @@ StarEtc *_PyPegen_star_etc(Parser *, arg_ty, asdl_seq *, arg_ty);
|
||||||
arguments_ty _PyPegen_make_arguments(Parser *, asdl_arg_seq *, SlashWithDefault *,
|
arguments_ty _PyPegen_make_arguments(Parser *, asdl_arg_seq *, SlashWithDefault *,
|
||||||
asdl_arg_seq *, asdl_seq *, StarEtc *);
|
asdl_arg_seq *, asdl_seq *, StarEtc *);
|
||||||
arguments_ty _PyPegen_empty_arguments(Parser *);
|
arguments_ty _PyPegen_empty_arguments(Parser *);
|
||||||
expr_ty _PyPegen_formatted_value(Parser *, expr_ty, Token *, expr_ty, expr_ty, int, int, int, int, PyArena *);
|
expr_ty _PyPegen_formatted_value(Parser *, expr_ty, Token *, ResultTokenWithMetadata *, ResultTokenWithMetadata *, Token *,
|
||||||
|
int, int, int, int, PyArena *);
|
||||||
AugOperator *_PyPegen_augoperator(Parser*, operator_ty type);
|
AugOperator *_PyPegen_augoperator(Parser*, operator_ty type);
|
||||||
stmt_ty _PyPegen_function_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
|
stmt_ty _PyPegen_function_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
|
||||||
stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
|
stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
|
||||||
|
@ -329,7 +336,9 @@ expr_ty _PyPegen_ensure_real(Parser *p, expr_ty);
|
||||||
asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
|
asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
|
||||||
int _PyPegen_check_barry_as_flufl(Parser *, Token *);
|
int _PyPegen_check_barry_as_flufl(Parser *, Token *);
|
||||||
int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t);
|
int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t);
|
||||||
expr_ty _PyPegen_check_fstring_conversion(Parser *p, Token *, expr_ty t);
|
ResultTokenWithMetadata *_PyPegen_check_fstring_conversion(Parser *p, Token *, expr_ty t);
|
||||||
|
ResultTokenWithMetadata *_PyPegen_setup_full_format_spec(Parser *, Token *, asdl_expr_seq *, int, int,
|
||||||
|
int, int, PyArena *);
|
||||||
mod_ty _PyPegen_make_module(Parser *, asdl_stmt_seq *);
|
mod_ty _PyPegen_make_module(Parser *, asdl_stmt_seq *);
|
||||||
void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
|
void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
|
||||||
expr_ty _PyPegen_get_last_comprehension_item(comprehension_ty comprehension);
|
expr_ty _PyPegen_get_last_comprehension_item(comprehension_ty comprehension);
|
||||||
|
|
|
@ -165,6 +165,7 @@ _PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) {
|
||||||
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
struct token new_token;
|
struct token new_token;
|
||||||
|
new_token.metadata = NULL;
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
switch (_PyTokenizer_Get(p->tok, &new_token)) {
|
switch (_PyTokenizer_Get(p->tok, &new_token)) {
|
||||||
|
@ -192,6 +193,7 @@ _PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) {
|
||||||
|
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
|
Py_XDECREF(new_token.metadata);
|
||||||
// If we're in an f-string, we want the syntax error in the expression part
|
// If we're in an f-string, we want the syntax error in the expression part
|
||||||
// to propagate, so that tokenizer errors (like expecting '}') that happen afterwards
|
// to propagate, so that tokenizer errors (like expecting '}') that happen afterwards
|
||||||
// do not swallow it.
|
// do not swallow it.
|
||||||
|
|
|
@ -111,7 +111,7 @@ tok_new(void)
|
||||||
tok->interactive_underflow = IUNDERFLOW_NORMAL;
|
tok->interactive_underflow = IUNDERFLOW_NORMAL;
|
||||||
tok->str = NULL;
|
tok->str = NULL;
|
||||||
tok->report_warnings = 1;
|
tok->report_warnings = 1;
|
||||||
tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .f_string_quote='\0', .f_string_quote_size = 0};
|
tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .f_string_quote='\0', .f_string_quote_size = 0, .f_string_debug=0};
|
||||||
tok->tok_mode_stack_index = 0;
|
tok->tok_mode_stack_index = 0;
|
||||||
tok->tok_report_warnings = 1;
|
tok->tok_report_warnings = 1;
|
||||||
#ifdef Py_DEBUG
|
#ifdef Py_DEBUG
|
||||||
|
@ -390,6 +390,28 @@ restore_fstring_buffers(struct tok_state *tok)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
|
||||||
|
assert(token != NULL);
|
||||||
|
assert(c == '}' || c == ':' || c == '!');
|
||||||
|
tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
|
||||||
|
|
||||||
|
if (!tok_mode->f_string_debug || token->metadata) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *res = PyUnicode_DecodeUTF8(
|
||||||
|
tok_mode->last_expr_buffer,
|
||||||
|
tok_mode->last_expr_size - tok_mode->last_expr_end,
|
||||||
|
NULL
|
||||||
|
);
|
||||||
|
if (!res) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
token->metadata = res;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
update_fstring_expr(struct tok_state *tok, char cur)
|
update_fstring_expr(struct tok_state *tok, char cur)
|
||||||
{
|
{
|
||||||
|
@ -2224,6 +2246,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
|
||||||
the_current_tok->last_expr_buffer = NULL;
|
the_current_tok->last_expr_buffer = NULL;
|
||||||
the_current_tok->last_expr_size = 0;
|
the_current_tok->last_expr_size = 0;
|
||||||
the_current_tok->last_expr_end = -1;
|
the_current_tok->last_expr_end = -1;
|
||||||
|
the_current_tok->f_string_debug = 0;
|
||||||
|
|
||||||
switch (*tok->start) {
|
switch (*tok->start) {
|
||||||
case 'F':
|
case 'F':
|
||||||
|
@ -2350,10 +2373,12 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
|
||||||
* by the `{` case, so for ensuring that we are on the 0th level, we need
|
* by the `{` case, so for ensuring that we are on the 0th level, we need
|
||||||
* to adjust it manually */
|
* to adjust it manually */
|
||||||
int cursor = current_tok->curly_bracket_depth - (c != '{');
|
int cursor = current_tok->curly_bracket_depth - (c != '{');
|
||||||
|
|
||||||
if (cursor == 0 && !update_fstring_expr(tok, c)) {
|
if (cursor == 0 && !update_fstring_expr(tok, c)) {
|
||||||
return MAKE_TOKEN(ENDMARKER);
|
return MAKE_TOKEN(ENDMARKER);
|
||||||
}
|
}
|
||||||
|
if (cursor == 0 && c != '{' && set_fstring_expr(tok, token, c)) {
|
||||||
|
return MAKE_TOKEN(ERRORTOKEN);
|
||||||
|
}
|
||||||
|
|
||||||
if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
|
if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
|
||||||
current_tok->kind = TOK_FSTRING_MODE;
|
current_tok->kind = TOK_FSTRING_MODE;
|
||||||
|
@ -2445,6 +2470,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
|
||||||
if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
|
if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
|
||||||
current_tok->curly_bracket_expr_start_depth--;
|
current_tok->curly_bracket_expr_start_depth--;
|
||||||
current_tok->kind = TOK_FSTRING_MODE;
|
current_tok->kind = TOK_FSTRING_MODE;
|
||||||
|
current_tok->f_string_debug = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -2458,6 +2484,10 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
|
||||||
return MAKE_TOKEN(syntaxerror(tok, "invalid non-printable character U+%s", hex));
|
return MAKE_TOKEN(syntaxerror(tok, "invalid non-printable character U+%s", hex));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if( c == '=' && INSIDE_FSTRING_EXPR(current_tok)) {
|
||||||
|
current_tok->f_string_debug = 1;
|
||||||
|
}
|
||||||
|
|
||||||
/* Punctuation character */
|
/* Punctuation character */
|
||||||
p_start = tok->start;
|
p_start = tok->start;
|
||||||
p_end = tok->cur;
|
p_end = tok->cur;
|
||||||
|
|
|
@ -31,6 +31,7 @@ struct token {
|
||||||
int level;
|
int level;
|
||||||
int lineno, col_offset, end_lineno, end_col_offset;
|
int lineno, col_offset, end_lineno, end_col_offset;
|
||||||
const char *start, *end;
|
const char *start, *end;
|
||||||
|
PyObject *metadata;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum tokenizer_mode_kind_t {
|
enum tokenizer_mode_kind_t {
|
||||||
|
@ -58,6 +59,7 @@ typedef struct _tokenizer_mode {
|
||||||
Py_ssize_t last_expr_size;
|
Py_ssize_t last_expr_size;
|
||||||
Py_ssize_t last_expr_end;
|
Py_ssize_t last_expr_end;
|
||||||
char* last_expr_buffer;
|
char* last_expr_buffer;
|
||||||
|
int f_string_debug;
|
||||||
} tokenizer_mode;
|
} tokenizer_mode;
|
||||||
|
|
||||||
/* Tokenizer state */
|
/* Tokenizer state */
|
||||||
|
|
Loading…
Reference in New Issue