#27364: Deprecate invalid escape strings in str/byutes.

Patch by Emanuel Barry, reviewed by Serhiy Storchaka and Martin Panter.
This commit is contained in:
R David Murray 2016-09-08 15:34:08 -04:00
parent 186122ead2
commit 110b6fecbb
7 changed files with 48 additions and 12 deletions

View File

@ -560,6 +560,10 @@ is more easily recognized as broken.) It is also important to note that the
escape sequences only recognized in string literals fall into the category of escape sequences only recognized in string literals fall into the category of
unrecognized escapes for bytes literals. unrecognized escapes for bytes literals.
.. versionchanged:: 3.6
Unrecognized escape sequences produce a DeprecationWarning. In
some future version of Python they will be a SyntaxError.
Even in a raw literal, quotes can be escaped with a backslash, but the Even in a raw literal, quotes can be escaped with a backslash, but the
backslash remains in the result; for example, ``r"\""`` is a valid string backslash remains in the result; for example, ``r"\""`` is a valid string
literal consisting of two characters: a backslash and a double quote; ``r"\"`` literal consisting of two characters: a backslash and a double quote; ``r"\"``

View File

@ -952,6 +952,11 @@ Deprecated features
parameter will be dropped in a future Python release and likely earlier parameter will be dropped in a future Python release and likely earlier
through third party tools. See :issue:`27919` for details. through third party tools. See :issue:`27919` for details.
* A backslash-character pair that is not a valid escape sequence now generates
a DeprecationWarning. Although this will eventually become a SyntaxError,
that will not be for several Python releases. (Contributed by Emanuel Barry
in :issue:`27364`.)
Deprecated Python behavior Deprecated Python behavior
-------------------------- --------------------------

View File

@ -1175,7 +1175,7 @@ class EscapeDecodeTest(unittest.TestCase):
check(b"[\\\n]", b"[]") check(b"[\\\n]", b"[]")
check(br'[\"]', b'["]') check(br'[\"]', b'["]')
check(br"[\']", b"[']") check(br"[\']", b"[']")
check(br"[\\]", br"[\]") check(br"[\\]", b"[\\]")
check(br"[\a]", b"[\x07]") check(br"[\a]", b"[\x07]")
check(br"[\b]", b"[\x08]") check(br"[\b]", b"[\x08]")
check(br"[\t]", b"[\x09]") check(br"[\t]", b"[\x09]")
@ -1184,7 +1184,6 @@ class EscapeDecodeTest(unittest.TestCase):
check(br"[\f]", b"[\x0c]") check(br"[\f]", b"[\x0c]")
check(br"[\r]", b"[\x0d]") check(br"[\r]", b"[\x0d]")
check(br"[\7]", b"[\x07]") check(br"[\7]", b"[\x07]")
check(br"[\8]", br"[\8]")
check(br"[\78]", b"[\x078]") check(br"[\78]", b"[\x078]")
check(br"[\41]", b"[!]") check(br"[\41]", b"[!]")
check(br"[\418]", b"[!8]") check(br"[\418]", b"[!8]")
@ -1192,12 +1191,18 @@ class EscapeDecodeTest(unittest.TestCase):
check(br"[\1010]", b"[A0]") check(br"[\1010]", b"[A0]")
check(br"[\501]", b"[A]") check(br"[\501]", b"[A]")
check(br"[\x41]", b"[A]") check(br"[\x41]", b"[A]")
check(br"[\X41]", br"[\X41]")
check(br"[\x410]", b"[A0]") check(br"[\x410]", b"[A0]")
for b in range(256): for i in range(97, 123):
if b not in b'\n"\'\\abtnvfr01234567x': b = bytes([i])
b = bytes([b]) if b not in b'abfnrtvx':
check(b'\\' + b, b'\\' + b) with self.assertWarns(DeprecationWarning):
check(b"\\" + b, b"\\" + b)
with self.assertWarns(DeprecationWarning):
check(b"\\" + b.upper(), b"\\" + b.upper())
with self.assertWarns(DeprecationWarning):
check(br"\8", b"\\8")
with self.assertWarns(DeprecationWarning):
check(br"\9", b"\\9")
def test_errors(self): def test_errors(self):
decode = codecs.escape_decode decode = codecs.escape_decode
@ -2448,7 +2453,6 @@ class UnicodeEscapeTest(unittest.TestCase):
check(br"[\f]", "[\x0c]") check(br"[\f]", "[\x0c]")
check(br"[\r]", "[\x0d]") check(br"[\r]", "[\x0d]")
check(br"[\7]", "[\x07]") check(br"[\7]", "[\x07]")
check(br"[\8]", r"[\8]")
check(br"[\78]", "[\x078]") check(br"[\78]", "[\x078]")
check(br"[\41]", "[!]") check(br"[\41]", "[!]")
check(br"[\418]", "[!8]") check(br"[\418]", "[!8]")
@ -2458,9 +2462,18 @@ class UnicodeEscapeTest(unittest.TestCase):
check(br"[\x410]", "[A0]") check(br"[\x410]", "[A0]")
check(br"\u20ac", "\u20ac") check(br"\u20ac", "\u20ac")
check(br"\U0001d120", "\U0001d120") check(br"\U0001d120", "\U0001d120")
for b in range(256): for i in range(97, 123):
if b not in b'\n"\'\\abtnvfr01234567xuUN': b = bytes([i])
check(b'\\' + bytes([b]), '\\' + chr(b)) if b not in b'abfnrtuvx':
with self.assertWarns(DeprecationWarning):
check(b"\\" + b, "\\" + chr(i))
if b.upper() not in b'UN':
with self.assertWarns(DeprecationWarning):
check(b"\\" + b.upper(), "\\" + chr(i-32))
with self.assertWarns(DeprecationWarning):
check(br"\8", "\\8")
with self.assertWarns(DeprecationWarning):
check(br"\9", "\\9")
def test_decode_errors(self): def test_decode_errors(self):
decode = codecs.unicode_escape_decode decode = codecs.unicode_escape_decode

View File

@ -10,6 +10,7 @@ import codecs
import itertools import itertools
import operator import operator
import struct import struct
import string
import sys import sys
import unittest import unittest
import warnings import warnings
@ -2752,6 +2753,12 @@ class UnicodeTest(string_tests.CommonTest,
support.check_free_after_iterating(self, iter, str) support.check_free_after_iterating(self, iter, str)
support.check_free_after_iterating(self, reversed, str) support.check_free_after_iterating(self, reversed, str)
def test_invalid_sequences(self):
for letter in string.ascii_letters + "89": # 0-7 are octal escapes
if letter in "abfnrtuvxNU":
continue
with self.assertWarns(DeprecationWarning):
eval(r"'\%s'" % letter)
class StringModuleTest(unittest.TestCase): class StringModuleTest(unittest.TestCase):
def test_formatter_parser(self): def test_formatter_parser(self):

View File

@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 1
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #27364: A backslash-character pair that is not a valid escape sequence
now generates a DeprecationWarning.
- Issue #27350: `dict` implementation is changed like PyPy. It is more compact - Issue #27350: `dict` implementation is changed like PyPy. It is more compact
and preserves insertion order. and preserves insertion order.
(Concept developed by Raymond Hettinger and patch by Inada Naoki.) (Concept developed by Raymond Hettinger and patch by Inada Naoki.)

View File

@ -1207,8 +1207,9 @@ PyObject *PyBytes_DecodeEscape(const char *s,
break; break;
default: default:
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "invalid escape sequence '\\%c'", *(--s)) < 0)
goto failed;
*p++ = '\\'; *p++ = '\\';
s--;
goto non_esc; /* an arbitrary number of unescaped goto non_esc; /* an arbitrary number of unescaped
UTF-8 bytes may follow. */ UTF-8 bytes may follow. */
} }

View File

@ -6065,6 +6065,9 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
goto error; goto error;
default: default:
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"invalid escape sequence '\\%c'", c) < 0)
goto onError;
WRITE_ASCII_CHAR('\\'); WRITE_ASCII_CHAR('\\');
WRITE_CHAR(c); WRITE_CHAR(c);
continue; continue;