diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst index b3b71aff51d..48f20434f01 100644 --- a/Doc/reference/lexical_analysis.rst +++ b/Doc/reference/lexical_analysis.rst @@ -560,6 +560,10 @@ is more easily recognized as broken.) It is also important to note that the escape sequences only recognized in string literals fall into the category of unrecognized escapes for bytes literals. + .. versionchanged:: 3.6 + Unrecognized escape sequences produce a DeprecationWarning. In + some future version of Python they will be a SyntaxError. + Even in a raw literal, quotes can be escaped with a backslash, but the backslash remains in the result; for example, ``r"\""`` is a valid string literal consisting of two characters: a backslash and a double quote; ``r"\"`` diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst index e53d48e7fa7..a76ac9d38e6 100644 --- a/Doc/whatsnew/3.6.rst +++ b/Doc/whatsnew/3.6.rst @@ -952,6 +952,11 @@ Deprecated features parameter will be dropped in a future Python release and likely earlier through third party tools. See :issue:`27919` for details. +* A backslash-character pair that is not a valid escape sequence now generates + a DeprecationWarning. Although this will eventually become a SyntaxError, + that will not be for several Python releases. (Contributed by Emanuel Barry + in :issue:`27364`.) + Deprecated Python behavior -------------------------- diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 1af552405c2..4d91a07868f 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1175,7 +1175,7 @@ class EscapeDecodeTest(unittest.TestCase): check(b"[\\\n]", b"[]") check(br'[\"]', b'["]') check(br"[\']", b"[']") - check(br"[\\]", br"[\]") + check(br"[\\]", b"[\\]") check(br"[\a]", b"[\x07]") check(br"[\b]", b"[\x08]") check(br"[\t]", b"[\x09]") @@ -1184,7 +1184,6 @@ class EscapeDecodeTest(unittest.TestCase): check(br"[\f]", b"[\x0c]") check(br"[\r]", b"[\x0d]") check(br"[\7]", b"[\x07]") - check(br"[\8]", br"[\8]") check(br"[\78]", b"[\x078]") check(br"[\41]", b"[!]") check(br"[\418]", b"[!8]") @@ -1192,12 +1191,18 @@ class EscapeDecodeTest(unittest.TestCase): check(br"[\1010]", b"[A0]") check(br"[\501]", b"[A]") check(br"[\x41]", b"[A]") - check(br"[\X41]", br"[\X41]") check(br"[\x410]", b"[A0]") - for b in range(256): - if b not in b'\n"\'\\abtnvfr01234567x': - b = bytes([b]) - check(b'\\' + b, b'\\' + b) + for i in range(97, 123): + b = bytes([i]) + if b not in b'abfnrtvx': + with self.assertWarns(DeprecationWarning): + check(b"\\" + b, b"\\" + b) + with self.assertWarns(DeprecationWarning): + check(b"\\" + b.upper(), b"\\" + b.upper()) + with self.assertWarns(DeprecationWarning): + check(br"\8", b"\\8") + with self.assertWarns(DeprecationWarning): + check(br"\9", b"\\9") def test_errors(self): decode = codecs.escape_decode @@ -2448,7 +2453,6 @@ class UnicodeEscapeTest(unittest.TestCase): check(br"[\f]", "[\x0c]") check(br"[\r]", "[\x0d]") check(br"[\7]", "[\x07]") - check(br"[\8]", r"[\8]") check(br"[\78]", "[\x078]") check(br"[\41]", "[!]") check(br"[\418]", "[!8]") @@ -2458,9 +2462,18 @@ class UnicodeEscapeTest(unittest.TestCase): check(br"[\x410]", "[A0]") check(br"\u20ac", "\u20ac") check(br"\U0001d120", "\U0001d120") - for b in range(256): - if b not in b'\n"\'\\abtnvfr01234567xuUN': - check(b'\\' + bytes([b]), '\\' + chr(b)) + for i in range(97, 123): + b = bytes([i]) + if b not in b'abfnrtuvx': + with self.assertWarns(DeprecationWarning): + check(b"\\" + b, "\\" + chr(i)) + if b.upper() not in b'UN': + with self.assertWarns(DeprecationWarning): + check(b"\\" + b.upper(), "\\" + chr(i-32)) + with self.assertWarns(DeprecationWarning): + check(br"\8", "\\8") + with self.assertWarns(DeprecationWarning): + check(br"\9", "\\9") def test_decode_errors(self): decode = codecs.unicode_escape_decode diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 9ab624e6fc5..2684b940ef5 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -10,6 +10,7 @@ import codecs import itertools import operator import struct +import string import sys import unittest import warnings @@ -2752,6 +2753,12 @@ class UnicodeTest(string_tests.CommonTest, support.check_free_after_iterating(self, iter, str) support.check_free_after_iterating(self, reversed, str) + def test_invalid_sequences(self): + for letter in string.ascii_letters + "89": # 0-7 are octal escapes + if letter in "abfnrtuvxNU": + continue + with self.assertWarns(DeprecationWarning): + eval(r"'\%s'" % letter) class StringModuleTest(unittest.TestCase): def test_formatter_parser(self): diff --git a/Misc/NEWS b/Misc/NEWS index a55400ff5dc..8f1b724f674 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 1 Core and Builtins ----------------- +- Issue #27364: A backslash-character pair that is not a valid escape sequence + now generates a DeprecationWarning. + - Issue #27350: `dict` implementation is changed like PyPy. It is more compact and preserves insertion order. (Concept developed by Raymond Hettinger and patch by Inada Naoki.) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index b0d9b398252..6e7c4fa1886 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1207,8 +1207,9 @@ PyObject *PyBytes_DecodeEscape(const char *s, break; default: + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "invalid escape sequence '\\%c'", *(--s)) < 0) + goto failed; *p++ = '\\'; - s--; goto non_esc; /* an arbitrary number of unescaped UTF-8 bytes may follow. */ } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7979eec8456..e0c3bfecdd8 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6065,6 +6065,9 @@ PyUnicode_DecodeUnicodeEscape(const char *s, goto error; default: + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "invalid escape sequence '\\%c'", c) < 0) + goto onError; WRITE_ASCII_CHAR('\\'); WRITE_CHAR(c); continue;