mirror of https://github.com/python/cpython
#27364: Deprecate invalid escape strings in str/byutes.
Patch by Emanuel Barry, reviewed by Serhiy Storchaka and Martin Panter.
This commit is contained in:
parent
186122ead2
commit
110b6fecbb
|
@ -560,6 +560,10 @@ is more easily recognized as broken.) It is also important to note that the
|
|||
escape sequences only recognized in string literals fall into the category of
|
||||
unrecognized escapes for bytes literals.
|
||||
|
||||
.. versionchanged:: 3.6
|
||||
Unrecognized escape sequences produce a DeprecationWarning. In
|
||||
some future version of Python they will be a SyntaxError.
|
||||
|
||||
Even in a raw literal, quotes can be escaped with a backslash, but the
|
||||
backslash remains in the result; for example, ``r"\""`` is a valid string
|
||||
literal consisting of two characters: a backslash and a double quote; ``r"\"``
|
||||
|
|
|
@ -952,6 +952,11 @@ Deprecated features
|
|||
parameter will be dropped in a future Python release and likely earlier
|
||||
through third party tools. See :issue:`27919` for details.
|
||||
|
||||
* A backslash-character pair that is not a valid escape sequence now generates
|
||||
a DeprecationWarning. Although this will eventually become a SyntaxError,
|
||||
that will not be for several Python releases. (Contributed by Emanuel Barry
|
||||
in :issue:`27364`.)
|
||||
|
||||
|
||||
Deprecated Python behavior
|
||||
--------------------------
|
||||
|
|
|
@ -1175,7 +1175,7 @@ class EscapeDecodeTest(unittest.TestCase):
|
|||
check(b"[\\\n]", b"[]")
|
||||
check(br'[\"]', b'["]')
|
||||
check(br"[\']", b"[']")
|
||||
check(br"[\\]", br"[\]")
|
||||
check(br"[\\]", b"[\\]")
|
||||
check(br"[\a]", b"[\x07]")
|
||||
check(br"[\b]", b"[\x08]")
|
||||
check(br"[\t]", b"[\x09]")
|
||||
|
@ -1184,7 +1184,6 @@ class EscapeDecodeTest(unittest.TestCase):
|
|||
check(br"[\f]", b"[\x0c]")
|
||||
check(br"[\r]", b"[\x0d]")
|
||||
check(br"[\7]", b"[\x07]")
|
||||
check(br"[\8]", br"[\8]")
|
||||
check(br"[\78]", b"[\x078]")
|
||||
check(br"[\41]", b"[!]")
|
||||
check(br"[\418]", b"[!8]")
|
||||
|
@ -1192,12 +1191,18 @@ class EscapeDecodeTest(unittest.TestCase):
|
|||
check(br"[\1010]", b"[A0]")
|
||||
check(br"[\501]", b"[A]")
|
||||
check(br"[\x41]", b"[A]")
|
||||
check(br"[\X41]", br"[\X41]")
|
||||
check(br"[\x410]", b"[A0]")
|
||||
for b in range(256):
|
||||
if b not in b'\n"\'\\abtnvfr01234567x':
|
||||
b = bytes([b])
|
||||
check(b'\\' + b, b'\\' + b)
|
||||
for i in range(97, 123):
|
||||
b = bytes([i])
|
||||
if b not in b'abfnrtvx':
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
check(b"\\" + b, b"\\" + b)
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
check(b"\\" + b.upper(), b"\\" + b.upper())
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
check(br"\8", b"\\8")
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
check(br"\9", b"\\9")
|
||||
|
||||
def test_errors(self):
|
||||
decode = codecs.escape_decode
|
||||
|
@ -2448,7 +2453,6 @@ class UnicodeEscapeTest(unittest.TestCase):
|
|||
check(br"[\f]", "[\x0c]")
|
||||
check(br"[\r]", "[\x0d]")
|
||||
check(br"[\7]", "[\x07]")
|
||||
check(br"[\8]", r"[\8]")
|
||||
check(br"[\78]", "[\x078]")
|
||||
check(br"[\41]", "[!]")
|
||||
check(br"[\418]", "[!8]")
|
||||
|
@ -2458,9 +2462,18 @@ class UnicodeEscapeTest(unittest.TestCase):
|
|||
check(br"[\x410]", "[A0]")
|
||||
check(br"\u20ac", "\u20ac")
|
||||
check(br"\U0001d120", "\U0001d120")
|
||||
for b in range(256):
|
||||
if b not in b'\n"\'\\abtnvfr01234567xuUN':
|
||||
check(b'\\' + bytes([b]), '\\' + chr(b))
|
||||
for i in range(97, 123):
|
||||
b = bytes([i])
|
||||
if b not in b'abfnrtuvx':
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
check(b"\\" + b, "\\" + chr(i))
|
||||
if b.upper() not in b'UN':
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
check(b"\\" + b.upper(), "\\" + chr(i-32))
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
check(br"\8", "\\8")
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
check(br"\9", "\\9")
|
||||
|
||||
def test_decode_errors(self):
|
||||
decode = codecs.unicode_escape_decode
|
||||
|
|
|
@ -10,6 +10,7 @@ import codecs
|
|||
import itertools
|
||||
import operator
|
||||
import struct
|
||||
import string
|
||||
import sys
|
||||
import unittest
|
||||
import warnings
|
||||
|
@ -2752,6 +2753,12 @@ class UnicodeTest(string_tests.CommonTest,
|
|||
support.check_free_after_iterating(self, iter, str)
|
||||
support.check_free_after_iterating(self, reversed, str)
|
||||
|
||||
def test_invalid_sequences(self):
|
||||
for letter in string.ascii_letters + "89": # 0-7 are octal escapes
|
||||
if letter in "abfnrtuvxNU":
|
||||
continue
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
eval(r"'\%s'" % letter)
|
||||
|
||||
class StringModuleTest(unittest.TestCase):
|
||||
def test_formatter_parser(self):
|
||||
|
|
|
@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 1
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #27364: A backslash-character pair that is not a valid escape sequence
|
||||
now generates a DeprecationWarning.
|
||||
|
||||
- Issue #27350: `dict` implementation is changed like PyPy. It is more compact
|
||||
and preserves insertion order.
|
||||
(Concept developed by Raymond Hettinger and patch by Inada Naoki.)
|
||||
|
|
|
@ -1207,8 +1207,9 @@ PyObject *PyBytes_DecodeEscape(const char *s,
|
|||
break;
|
||||
|
||||
default:
|
||||
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "invalid escape sequence '\\%c'", *(--s)) < 0)
|
||||
goto failed;
|
||||
*p++ = '\\';
|
||||
s--;
|
||||
goto non_esc; /* an arbitrary number of unescaped
|
||||
UTF-8 bytes may follow. */
|
||||
}
|
||||
|
|
|
@ -6065,6 +6065,9 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
|
|||
goto error;
|
||||
|
||||
default:
|
||||
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
||||
"invalid escape sequence '\\%c'", c) < 0)
|
||||
goto onError;
|
||||
WRITE_ASCII_CHAR('\\');
|
||||
WRITE_CHAR(c);
|
||||
continue;
|
||||
|
|
Loading…
Reference in New Issue