bpo-43833: Emit warnings for numeric literals followed by keyword (GH-25466)

Emit a deprecation warning if the numeric literal is immediately followed by
one of keywords: and, else, for, if, in, is, or. Raise a syntax error with
more informative message if it is immediately followed by other keyword or
identifier.

Automerge-Triggered-By: GH:pablogsal
This commit is contained in:
Serhiy Storchaka 2021-06-09 02:31:10 +03:00 committed by GitHub
parent 3e1c7167d8
commit 2ea6d89028
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 246 additions and 3 deletions

View File

@ -1444,6 +1444,17 @@ Optimizations
Deprecated
==========
* Currently Python accepts numeric literals immediately followed by keywords,
for example ``0in x``, ``1or x``, ``0if 1else 2``. It allows confusing
and ambigious expressions like ``[0x1for x in y]`` (which can be
interpreted as ``[0x1 for x in y]`` or ``[0x1f or x in y]``). Starting in
this release, a deprecation warning is raised if the numeric literal is
immediately followed by one of keywords :keyword:`and`, :keyword:`else`,
:keyword:`for`, :keyword:`if`, :keyword:`in`, :keyword:`is` and :keyword:`or`.
If future releases it will be changed to syntax warning, and finally to
syntax error.
(Contributed by Serhiy Storchaka in :issue:`43833`).
* Starting in this release, there will be a concerted effort to begin
cleaning up old import semantics that were kept for Python 2.7
compatibility. Specifically,
@ -1670,6 +1681,18 @@ This section lists previously described changes and other bugfixes
that may require changes to your code.
Changes in the Python syntax
----------------------------
* Deprecation warning is now emitted when compiling previously valid syntax
if the numeric literal is immediately followed by a keyword (like in ``0in x``).
If future releases it will be changed to syntax warning, and finally to a
syntax error. To get rid of the warning and make the code compatible with
future releases just add a space between the numeric literal and the
following keyword.
(Contributed by Serhiy Storchaka in :issue:`43833`).
Changes in the Python API
-------------------------

View File

@ -162,7 +162,7 @@ if 1:
for arg in ["077787", "0xj", "0x.", "0e", "090000000000000",
"080000000000000", "000000000000009", "000000000000008",
"0b42", "0BADCAFE", "0o123456789", "0b1.1", "0o4.2",
"0b101j2", "0o153j2", "0b100e1", "0o777e1", "0777",
"0b101j", "0o153j", "0b100e1", "0o777e1", "0777",
"000777", "000000000000007"]:
self.assertRaises(SyntaxError, eval, arg)

View File

@ -177,8 +177,10 @@ class TokenTests(unittest.TestCase):
def test_float_exponent_tokenization(self):
# See issue 21642.
self.assertEqual(1 if 1else 0, 1)
self.assertEqual(1 if 0else 0, 0)
with warnings.catch_warnings():
warnings.simplefilter('ignore', DeprecationWarning)
self.assertEqual(eval("1 if 1else 0"), 1)
self.assertEqual(eval("1 if 0else 0"), 0)
self.assertRaises(SyntaxError, eval, "0 if 1Else 0")
def test_underscore_literals(self):
@ -211,6 +213,92 @@ class TokenTests(unittest.TestCase):
check("1e2_", "invalid decimal literal")
check("1e+", "invalid decimal literal")
def test_end_of_numerical_literals(self):
def check(test):
with self.assertWarns(DeprecationWarning):
compile(test, "<testcase>", "eval")
def check_error(test):
with warnings.catch_warnings(record=True) as w:
with self.assertRaises(SyntaxError):
compile(test, "<testcase>", "eval")
self.assertEqual(w, [])
check_error("0xfand x")
check("0o7and x")
check("0b1and x")
check("9and x")
check("0and x")
check("1.and x")
check("1e3and x")
check("1jand x")
check("0xfor x")
check("0o7or x")
check("0b1or x")
check("9or x")
check_error("0or x")
check("1.or x")
check("1e3or x")
check("1jor x")
check("0xfin x")
check("0o7in x")
check("0b1in x")
check("9in x")
check("0in x")
check("1.in x")
check("1e3in x")
check("1jin x")
with warnings.catch_warnings():
warnings.simplefilter('ignore', SyntaxWarning)
check("0xfis x")
check("0o7is x")
check("0b1is x")
check("9is x")
check("0is x")
check("1.is x")
check("1e3is x")
check("1jis x")
check("0xfif x else y")
check("0o7if x else y")
check("0b1if x else y")
check("9if x else y")
check("0if x else y")
check("1.if x else y")
check("1e3if x else y")
check("1jif x else y")
check_error("x if 0xfelse y")
check("x if 0o7else y")
check("x if 0b1else y")
check("x if 9else y")
check("x if 0else y")
check("x if 1.else y")
check("x if 1e3else y")
check("x if 1jelse y")
check("[0x1ffor x in ()]")
check("[0x1for x in ()]")
check("[0xfor x in ()]")
check("[0o7for x in ()]")
check("[0b1for x in ()]")
check("[9for x in ()]")
check("[1.for x in ()]")
check("[1e3for x in ()]")
check("[1jfor x in ()]")
check_error("0xfspam")
check_error("0o7spam")
check_error("0b1spam")
check_error("9spam")
check_error("0spam")
check_error("1.spam")
check_error("1e3spam")
check_error("1jspam")
def test_string_literals(self):
x = ''; y = ""; self.assertTrue(len(x) == 0 and x == y)
x = '\''; y = "'"; self.assertTrue(len(x) == 1 and x == y and ord(x) == 39)

View File

@ -0,0 +1,4 @@
Emit a deprecation warning if the numeric literal is immediately followed by
one of keywords: and, else, for, if, in, is, or. Raise a syntax error with
more informative message if it is immediately followed by other keyword or
identifier.

View File

@ -1121,6 +1121,113 @@ indenterror(struct tok_state *tok)
return ERRORTOKEN;
}
static int
parser_warn(struct tok_state *tok, const char *format, ...)
{
PyObject *errmsg;
va_list vargs;
#ifdef HAVE_STDARG_PROTOTYPES
va_start(vargs, format);
#else
va_start(vargs);
#endif
errmsg = PyUnicode_FromFormatV(format, vargs);
va_end(vargs);
if (!errmsg) {
goto error;
}
if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, errmsg, tok->filename,
tok->lineno, NULL, NULL) < 0) {
if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
/* Replace the DeprecationWarning exception with a SyntaxError
to get a more accurate error report */
PyErr_Clear();
syntaxerror(tok, "%U", errmsg);
}
goto error;
}
Py_DECREF(errmsg);
return 0;
error:
Py_XDECREF(errmsg);
tok->done = E_ERROR;
return -1;
}
static int
lookahead(struct tok_state *tok, const char *test)
{
const char *s = test;
int res = 0;
while (1) {
int c = tok_nextc(tok);
if (*s == 0) {
res = !is_potential_identifier_char(c);
}
else if (c == *s) {
s++;
continue;
}
tok_backup(tok, c);
while (s != test) {
tok_backup(tok, *--s);
}
return res;
}
}
static int
verify_end_of_number(struct tok_state *tok, int c, const char *kind)
{
/* Emit a deprecation warning only if the numeric literal is immediately
* followed by one of keywords which can occurr after a numeric literal
* in valid code: "and", "else", "for", "if", "in", "is" and "or".
* It allows to gradually deprecate existing valid code without adding
* warning before error in most cases of invalid numeric literal (which
* would be confusiong and break existing tests).
* Raise a syntax error with slighly better message than plain
* "invalid syntax" if the numeric literal is immediately followed by
* other keyword or identifier.
*/
int r = 0;
if (c == 'a') {
r = lookahead(tok, "nd");
}
else if (c == 'e') {
r = lookahead(tok, "lse");
}
else if (c == 'f') {
r = lookahead(tok, "or");
}
else if (c == 'i') {
int c2 = tok_nextc(tok);
if (c2 == 'f' || c2 == 'n' || c2 == 's') {
r = 1;
}
tok_backup(tok, c2);
}
else if (c == 'o') {
r = lookahead(tok, "r");
}
if (r) {
tok_backup(tok, c);
if (parser_warn(tok, "invalid %s literal", kind)) {
return 0;
}
tok_nextc(tok);
}
else /* In future releases, only error will remain. */
if (is_potential_identifier_char(c)) {
tok_backup(tok, c);
syntaxerror(tok, "invalid %s literal", kind);
return 0;
}
return 1;
}
/* Verify that the identifier follows PEP 3131.
All identifier strings are guaranteed to be "ready" unicode objects.
*/
@ -1569,6 +1676,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
c = tok_nextc(tok);
} while (isxdigit(c));
} while (c == '_');
if (!verify_end_of_number(tok, c, "hexadecimal")) {
return ERRORTOKEN;
}
}
else if (c == 'o' || c == 'O') {
/* Octal */
@ -1595,6 +1705,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
return syntaxerror(tok,
"invalid digit '%c' in octal literal", c);
}
if (!verify_end_of_number(tok, c, "octal")) {
return ERRORTOKEN;
}
}
else if (c == 'b' || c == 'B') {
/* Binary */
@ -1621,6 +1734,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
return syntaxerror(tok,
"invalid digit '%c' in binary literal", c);
}
if (!verify_end_of_number(tok, c, "binary")) {
return ERRORTOKEN;
}
}
else {
int nonzero = 0;
@ -1664,6 +1780,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
"literals are not permitted; "
"use an 0o prefix for octal integers");
}
if (!verify_end_of_number(tok, c, "decimal")) {
return ERRORTOKEN;
}
}
}
else {
@ -1699,6 +1818,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
}
} else if (!isdigit(c)) {
tok_backup(tok, c);
if (!verify_end_of_number(tok, e, "decimal")) {
return ERRORTOKEN;
}
tok_backup(tok, e);
*p_start = tok->start;
*p_end = tok->cur;
@ -1713,6 +1835,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
/* Imaginary part */
imaginary:
c = tok_nextc(tok);
if (!verify_end_of_number(tok, c, "imaginary")) {
return ERRORTOKEN;
}
}
else if (!verify_end_of_number(tok, c, "decimal")) {
return ERRORTOKEN;
}
}
}