mirror of https://github.com/python/cpython
#27364: Deprecate invalid escape strings in str/byutes.
Patch by Emanuel Barry, reviewed by Serhiy Storchaka and Martin Panter.
This commit is contained in:
parent
186122ead2
commit
110b6fecbb
|
@ -560,6 +560,10 @@ is more easily recognized as broken.) It is also important to note that the
|
||||||
escape sequences only recognized in string literals fall into the category of
|
escape sequences only recognized in string literals fall into the category of
|
||||||
unrecognized escapes for bytes literals.
|
unrecognized escapes for bytes literals.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.6
|
||||||
|
Unrecognized escape sequences produce a DeprecationWarning. In
|
||||||
|
some future version of Python they will be a SyntaxError.
|
||||||
|
|
||||||
Even in a raw literal, quotes can be escaped with a backslash, but the
|
Even in a raw literal, quotes can be escaped with a backslash, but the
|
||||||
backslash remains in the result; for example, ``r"\""`` is a valid string
|
backslash remains in the result; for example, ``r"\""`` is a valid string
|
||||||
literal consisting of two characters: a backslash and a double quote; ``r"\"``
|
literal consisting of two characters: a backslash and a double quote; ``r"\"``
|
||||||
|
|
|
@ -952,6 +952,11 @@ Deprecated features
|
||||||
parameter will be dropped in a future Python release and likely earlier
|
parameter will be dropped in a future Python release and likely earlier
|
||||||
through third party tools. See :issue:`27919` for details.
|
through third party tools. See :issue:`27919` for details.
|
||||||
|
|
||||||
|
* A backslash-character pair that is not a valid escape sequence now generates
|
||||||
|
a DeprecationWarning. Although this will eventually become a SyntaxError,
|
||||||
|
that will not be for several Python releases. (Contributed by Emanuel Barry
|
||||||
|
in :issue:`27364`.)
|
||||||
|
|
||||||
|
|
||||||
Deprecated Python behavior
|
Deprecated Python behavior
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
|
@ -1175,7 +1175,7 @@ class EscapeDecodeTest(unittest.TestCase):
|
||||||
check(b"[\\\n]", b"[]")
|
check(b"[\\\n]", b"[]")
|
||||||
check(br'[\"]', b'["]')
|
check(br'[\"]', b'["]')
|
||||||
check(br"[\']", b"[']")
|
check(br"[\']", b"[']")
|
||||||
check(br"[\\]", br"[\]")
|
check(br"[\\]", b"[\\]")
|
||||||
check(br"[\a]", b"[\x07]")
|
check(br"[\a]", b"[\x07]")
|
||||||
check(br"[\b]", b"[\x08]")
|
check(br"[\b]", b"[\x08]")
|
||||||
check(br"[\t]", b"[\x09]")
|
check(br"[\t]", b"[\x09]")
|
||||||
|
@ -1184,7 +1184,6 @@ class EscapeDecodeTest(unittest.TestCase):
|
||||||
check(br"[\f]", b"[\x0c]")
|
check(br"[\f]", b"[\x0c]")
|
||||||
check(br"[\r]", b"[\x0d]")
|
check(br"[\r]", b"[\x0d]")
|
||||||
check(br"[\7]", b"[\x07]")
|
check(br"[\7]", b"[\x07]")
|
||||||
check(br"[\8]", br"[\8]")
|
|
||||||
check(br"[\78]", b"[\x078]")
|
check(br"[\78]", b"[\x078]")
|
||||||
check(br"[\41]", b"[!]")
|
check(br"[\41]", b"[!]")
|
||||||
check(br"[\418]", b"[!8]")
|
check(br"[\418]", b"[!8]")
|
||||||
|
@ -1192,12 +1191,18 @@ class EscapeDecodeTest(unittest.TestCase):
|
||||||
check(br"[\1010]", b"[A0]")
|
check(br"[\1010]", b"[A0]")
|
||||||
check(br"[\501]", b"[A]")
|
check(br"[\501]", b"[A]")
|
||||||
check(br"[\x41]", b"[A]")
|
check(br"[\x41]", b"[A]")
|
||||||
check(br"[\X41]", br"[\X41]")
|
|
||||||
check(br"[\x410]", b"[A0]")
|
check(br"[\x410]", b"[A0]")
|
||||||
for b in range(256):
|
for i in range(97, 123):
|
||||||
if b not in b'\n"\'\\abtnvfr01234567x':
|
b = bytes([i])
|
||||||
b = bytes([b])
|
if b not in b'abfnrtvx':
|
||||||
check(b'\\' + b, b'\\' + b)
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
check(b"\\" + b, b"\\" + b)
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
check(b"\\" + b.upper(), b"\\" + b.upper())
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
check(br"\8", b"\\8")
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
check(br"\9", b"\\9")
|
||||||
|
|
||||||
def test_errors(self):
|
def test_errors(self):
|
||||||
decode = codecs.escape_decode
|
decode = codecs.escape_decode
|
||||||
|
@ -2448,7 +2453,6 @@ class UnicodeEscapeTest(unittest.TestCase):
|
||||||
check(br"[\f]", "[\x0c]")
|
check(br"[\f]", "[\x0c]")
|
||||||
check(br"[\r]", "[\x0d]")
|
check(br"[\r]", "[\x0d]")
|
||||||
check(br"[\7]", "[\x07]")
|
check(br"[\7]", "[\x07]")
|
||||||
check(br"[\8]", r"[\8]")
|
|
||||||
check(br"[\78]", "[\x078]")
|
check(br"[\78]", "[\x078]")
|
||||||
check(br"[\41]", "[!]")
|
check(br"[\41]", "[!]")
|
||||||
check(br"[\418]", "[!8]")
|
check(br"[\418]", "[!8]")
|
||||||
|
@ -2458,9 +2462,18 @@ class UnicodeEscapeTest(unittest.TestCase):
|
||||||
check(br"[\x410]", "[A0]")
|
check(br"[\x410]", "[A0]")
|
||||||
check(br"\u20ac", "\u20ac")
|
check(br"\u20ac", "\u20ac")
|
||||||
check(br"\U0001d120", "\U0001d120")
|
check(br"\U0001d120", "\U0001d120")
|
||||||
for b in range(256):
|
for i in range(97, 123):
|
||||||
if b not in b'\n"\'\\abtnvfr01234567xuUN':
|
b = bytes([i])
|
||||||
check(b'\\' + bytes([b]), '\\' + chr(b))
|
if b not in b'abfnrtuvx':
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
check(b"\\" + b, "\\" + chr(i))
|
||||||
|
if b.upper() not in b'UN':
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
check(b"\\" + b.upper(), "\\" + chr(i-32))
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
check(br"\8", "\\8")
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
check(br"\9", "\\9")
|
||||||
|
|
||||||
def test_decode_errors(self):
|
def test_decode_errors(self):
|
||||||
decode = codecs.unicode_escape_decode
|
decode = codecs.unicode_escape_decode
|
||||||
|
|
|
@ -10,6 +10,7 @@ import codecs
|
||||||
import itertools
|
import itertools
|
||||||
import operator
|
import operator
|
||||||
import struct
|
import struct
|
||||||
|
import string
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
import warnings
|
import warnings
|
||||||
|
@ -2752,6 +2753,12 @@ class UnicodeTest(string_tests.CommonTest,
|
||||||
support.check_free_after_iterating(self, iter, str)
|
support.check_free_after_iterating(self, iter, str)
|
||||||
support.check_free_after_iterating(self, reversed, str)
|
support.check_free_after_iterating(self, reversed, str)
|
||||||
|
|
||||||
|
def test_invalid_sequences(self):
|
||||||
|
for letter in string.ascii_letters + "89": # 0-7 are octal escapes
|
||||||
|
if letter in "abfnrtuvxNU":
|
||||||
|
continue
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
eval(r"'\%s'" % letter)
|
||||||
|
|
||||||
class StringModuleTest(unittest.TestCase):
|
class StringModuleTest(unittest.TestCase):
|
||||||
def test_formatter_parser(self):
|
def test_formatter_parser(self):
|
||||||
|
|
|
@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 1
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #27364: A backslash-character pair that is not a valid escape sequence
|
||||||
|
now generates a DeprecationWarning.
|
||||||
|
|
||||||
- Issue #27350: `dict` implementation is changed like PyPy. It is more compact
|
- Issue #27350: `dict` implementation is changed like PyPy. It is more compact
|
||||||
and preserves insertion order.
|
and preserves insertion order.
|
||||||
(Concept developed by Raymond Hettinger and patch by Inada Naoki.)
|
(Concept developed by Raymond Hettinger and patch by Inada Naoki.)
|
||||||
|
|
|
@ -1207,8 +1207,9 @@ PyObject *PyBytes_DecodeEscape(const char *s,
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "invalid escape sequence '\\%c'", *(--s)) < 0)
|
||||||
|
goto failed;
|
||||||
*p++ = '\\';
|
*p++ = '\\';
|
||||||
s--;
|
|
||||||
goto non_esc; /* an arbitrary number of unescaped
|
goto non_esc; /* an arbitrary number of unescaped
|
||||||
UTF-8 bytes may follow. */
|
UTF-8 bytes may follow. */
|
||||||
}
|
}
|
||||||
|
|
|
@ -6065,6 +6065,9 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
||||||
|
"invalid escape sequence '\\%c'", c) < 0)
|
||||||
|
goto onError;
|
||||||
WRITE_ASCII_CHAR('\\');
|
WRITE_ASCII_CHAR('\\');
|
||||||
WRITE_CHAR(c);
|
WRITE_CHAR(c);
|
||||||
continue;
|
continue;
|
||||||
|
|
Loading…
Reference in New Issue