mirror of https://github.com/python/cpython
gh-105259: Ensure we don't show newline characters for trailing NEWLINE tokens (#105364)
This commit is contained in:
parent
0202aa002e
commit
c0a6ed3934
|
@ -1870,7 +1870,7 @@ class CTokenizeTest(TestCase):
|
||||||
TokenInfo(type=NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1\n'),
|
TokenInfo(type=NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1\n'),
|
||||||
TokenInfo(type=OP, string='+', start=(1, 1), end=(1, 2), line='1+1\n'),
|
TokenInfo(type=OP, string='+', start=(1, 1), end=(1, 2), line='1+1\n'),
|
||||||
TokenInfo(type=NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1\n'),
|
TokenInfo(type=NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1\n'),
|
||||||
TokenInfo(type=NEWLINE, string='\n', start=(1, 3), end=(1, 4), line='1+1\n'),
|
TokenInfo(type=NEWLINE, string='', start=(1, 3), end=(1, 4), line='1+1\n'),
|
||||||
TokenInfo(type=ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
|
TokenInfo(type=ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
|
||||||
]
|
]
|
||||||
for encoding in ["utf-8", "latin-1", "utf-16"]:
|
for encoding in ["utf-8", "latin-1", "utf-16"]:
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Don't include newline character for trailing ``NEWLINE`` tokens emitted in
|
||||||
|
the :mod:`tokenize` module. Patch by Pablo Galindo
|
|
@ -114,6 +114,7 @@ tok_new(void)
|
||||||
tok->report_warnings = 1;
|
tok->report_warnings = 1;
|
||||||
tok->tok_extra_tokens = 0;
|
tok->tok_extra_tokens = 0;
|
||||||
tok->comment_newline = 0;
|
tok->comment_newline = 0;
|
||||||
|
tok->implicit_newline = 0;
|
||||||
tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .f_string_quote='\0', .f_string_quote_size = 0, .f_string_debug=0};
|
tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .f_string_quote='\0', .f_string_quote_size = 0, .f_string_debug=0};
|
||||||
tok->tok_mode_stack_index = 0;
|
tok->tok_mode_stack_index = 0;
|
||||||
tok->tok_report_warnings = 1;
|
tok->tok_report_warnings = 1;
|
||||||
|
@ -355,10 +356,12 @@ tok_concatenate_interactive_new_line(struct tok_state *tok, const char *line) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
strcpy(new_str + current_size, line);
|
strcpy(new_str + current_size, line);
|
||||||
|
tok->implicit_newline = 0;
|
||||||
if (last_char != '\n') {
|
if (last_char != '\n') {
|
||||||
/* Last line does not end in \n, fake one */
|
/* Last line does not end in \n, fake one */
|
||||||
new_str[current_size + line_size - 1] = '\n';
|
new_str[current_size + line_size - 1] = '\n';
|
||||||
new_str[current_size + line_size] = '\0';
|
new_str[current_size + line_size] = '\0';
|
||||||
|
tok->implicit_newline = 1;
|
||||||
}
|
}
|
||||||
tok->interactive_src_start = new_str;
|
tok->interactive_src_start = new_str;
|
||||||
tok->interactive_src_end = new_str + current_size + line_size;
|
tok->interactive_src_end = new_str + current_size + line_size;
|
||||||
|
@ -1262,11 +1265,13 @@ tok_underflow_file(struct tok_state *tok) {
|
||||||
tok->done = E_EOF;
|
tok->done = E_EOF;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
tok->implicit_newline = 0;
|
||||||
if (tok->inp[-1] != '\n') {
|
if (tok->inp[-1] != '\n') {
|
||||||
assert(tok->inp + 1 < tok->end);
|
assert(tok->inp + 1 < tok->end);
|
||||||
/* Last line does not end in \n, fake one */
|
/* Last line does not end in \n, fake one */
|
||||||
*tok->inp++ = '\n';
|
*tok->inp++ = '\n';
|
||||||
*tok->inp = '\0';
|
*tok->inp = '\0';
|
||||||
|
tok->implicit_newline = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ADVANCE_LINENO();
|
ADVANCE_LINENO();
|
||||||
|
@ -1304,11 +1309,13 @@ tok_underflow_readline(struct tok_state* tok) {
|
||||||
tok->done = E_EOF;
|
tok->done = E_EOF;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
tok->implicit_newline = 0;
|
||||||
if (tok->inp[-1] != '\n') {
|
if (tok->inp[-1] != '\n') {
|
||||||
assert(tok->inp + 1 < tok->end);
|
assert(tok->inp + 1 < tok->end);
|
||||||
/* Last line does not end in \n, fake one */
|
/* Last line does not end in \n, fake one */
|
||||||
*tok->inp++ = '\n';
|
*tok->inp++ = '\n';
|
||||||
*tok->inp = '\0';
|
*tok->inp = '\0';
|
||||||
|
tok->implicit_newline = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ADVANCE_LINENO();
|
ADVANCE_LINENO();
|
||||||
|
|
|
@ -131,6 +131,7 @@ struct tok_state {
|
||||||
int tok_report_warnings;
|
int tok_report_warnings;
|
||||||
int tok_extra_tokens;
|
int tok_extra_tokens;
|
||||||
int comment_newline;
|
int comment_newline;
|
||||||
|
int implicit_newline;
|
||||||
#ifdef Py_DEBUG
|
#ifdef Py_DEBUG
|
||||||
int debug;
|
int debug;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -243,10 +243,12 @@ tokenizeriter_next(tokenizeriterobject *it)
|
||||||
}
|
}
|
||||||
else if (type == NEWLINE) {
|
else if (type == NEWLINE) {
|
||||||
Py_DECREF(str);
|
Py_DECREF(str);
|
||||||
if (it->tok->start[0] == '\r') {
|
if (!it->tok->implicit_newline) {
|
||||||
str = PyUnicode_FromString("\r\n");
|
if (it->tok->start[0] == '\r') {
|
||||||
} else {
|
str = PyUnicode_FromString("\r\n");
|
||||||
str = PyUnicode_FromString("\n");
|
} else {
|
||||||
|
str = PyUnicode_FromString("\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
end_col_offset++;
|
end_col_offset++;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue