Issue #15096: Drop support for the ur string prefix
This commit is contained in:
parent
10c8791978
commit
0b3847de6d
|
@ -401,7 +401,7 @@ String literals are described by the following lexical definitions:
|
||||||
|
|
||||||
.. productionlist::
|
.. productionlist::
|
||||||
stringliteral: [`stringprefix`](`shortstring` | `longstring`)
|
stringliteral: [`stringprefix`](`shortstring` | `longstring`)
|
||||||
stringprefix: "r" | "u" | "ur" | "R" | "U" | "UR" | "Ur" | "uR"
|
stringprefix: "r" | "u" | "R" | "U"
|
||||||
shortstring: "'" `shortstringitem`* "'" | '"' `shortstringitem`* '"'
|
shortstring: "'" `shortstringitem`* "'" | '"' `shortstringitem`* '"'
|
||||||
longstring: "'''" `longstringitem`* "'''" | '"""' `longstringitem`* '"""'
|
longstring: "'''" `longstringitem`* "'''" | '"""' `longstringitem`* '"""'
|
||||||
shortstringitem: `shortstringchar` | `stringescapeseq`
|
shortstringitem: `shortstringchar` | `stringescapeseq`
|
||||||
|
@ -444,19 +444,21 @@ must be expressed with escapes.
|
||||||
As of Python 3.3 it is possible again to prefix unicode strings with a
|
As of Python 3.3 it is possible again to prefix unicode strings with a
|
||||||
``u`` prefix to simplify maintenance of dual 2.x and 3.x codebases.
|
``u`` prefix to simplify maintenance of dual 2.x and 3.x codebases.
|
||||||
|
|
||||||
Both string and bytes literals may optionally be prefixed with a letter ``'r'``
|
Bytes literals may optionally be prefixed with a letter ``'r'``
|
||||||
or ``'R'``; such strings are called :dfn:`raw strings` and treat backslashes as
|
or ``'R'``; such strings are called :dfn:`raw strings` and treat backslashes as
|
||||||
literal characters. As a result, in string literals, ``'\U'`` and ``'\u'``
|
literal characters. As a result, in string literals, ``'\U'`` and ``'\u'``
|
||||||
escapes in raw strings are not treated specially.
|
escapes in raw strings are not treated specially. Given that Python 2.x's raw
|
||||||
|
unicode literals behave differently than Python 3.x's the ``'ur'`` syntax
|
||||||
|
is not supported.
|
||||||
|
|
||||||
.. versionadded:: 3.3
|
.. versionadded:: 3.3
|
||||||
The ``'rb'`` prefix of raw bytes literals has been added as a synonym
|
The ``'rb'`` prefix of raw bytes literals has been added as a synonym
|
||||||
of ``'br'``.
|
of ``'br'``.
|
||||||
|
|
||||||
.. versionadded:: 3.3
|
.. versionadded:: 3.3
|
||||||
Support for the unicode legacy literal (``u'value'``) and other
|
Support for the unicode legacy literal (``u'value'``) was reintroduced
|
||||||
versions were reintroduced to simplify the maintenance of dual
|
to simplify the maintenance of dual Python 2.x and 3.x codebases.
|
||||||
Python 2.x and 3.x codebases. See :pep:`414` for more information.
|
See :pep:`414` for more information.
|
||||||
|
|
||||||
In triple-quoted strings, unescaped newlines and quotes are allowed (and are
|
In triple-quoted strings, unescaped newlines and quotes are allowed (and are
|
||||||
retained), except that three unescaped quotes in a row terminate the string. (A
|
retained), except that three unescaped quotes in a row terminate the string. (A
|
||||||
|
|
|
@ -123,6 +123,15 @@ class TestLiterals(unittest.TestCase):
|
||||||
self.assertRaises(SyntaxError, eval, """ rrb'' """)
|
self.assertRaises(SyntaxError, eval, """ rrb'' """)
|
||||||
self.assertRaises(SyntaxError, eval, """ rbb'' """)
|
self.assertRaises(SyntaxError, eval, """ rbb'' """)
|
||||||
|
|
||||||
|
def test_eval_str_u(self):
|
||||||
|
self.assertEqual(eval(""" u'x' """), 'x')
|
||||||
|
self.assertEqual(eval(""" U'\u00e4' """), 'ä')
|
||||||
|
self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
|
||||||
|
self.assertRaises(SyntaxError, eval, """ ur'' """)
|
||||||
|
self.assertRaises(SyntaxError, eval, """ ru'' """)
|
||||||
|
self.assertRaises(SyntaxError, eval, """ bu'' """)
|
||||||
|
self.assertRaises(SyntaxError, eval, """ ub'' """)
|
||||||
|
|
||||||
def check_encoding(self, encoding, extra=""):
|
def check_encoding(self, encoding, extra=""):
|
||||||
modname = "xx_" + encoding.replace("-", "_")
|
modname = "xx_" + encoding.replace("-", "_")
|
||||||
fn = os.path.join(self.tmpdir, modname + ".py")
|
fn = os.path.join(self.tmpdir, modname + ".py")
|
||||||
|
|
|
@ -299,24 +299,6 @@ String literals
|
||||||
STRING 'u"abc"' (1, 0) (1, 6)
|
STRING 'u"abc"' (1, 0) (1, 6)
|
||||||
OP '+' (1, 7) (1, 8)
|
OP '+' (1, 7) (1, 8)
|
||||||
STRING 'U"abc"' (1, 9) (1, 15)
|
STRING 'U"abc"' (1, 9) (1, 15)
|
||||||
>>> dump_tokens("ur'abc' + uR'abc' + Ur'abc' + UR'abc'")
|
|
||||||
ENCODING 'utf-8' (0, 0) (0, 0)
|
|
||||||
STRING "ur'abc'" (1, 0) (1, 7)
|
|
||||||
OP '+' (1, 8) (1, 9)
|
|
||||||
STRING "uR'abc'" (1, 10) (1, 17)
|
|
||||||
OP '+' (1, 18) (1, 19)
|
|
||||||
STRING "Ur'abc'" (1, 20) (1, 27)
|
|
||||||
OP '+' (1, 28) (1, 29)
|
|
||||||
STRING "UR'abc'" (1, 30) (1, 37)
|
|
||||||
>>> dump_tokens('ur"abc" + uR"abc" + Ur"abc" + UR"abc"')
|
|
||||||
ENCODING 'utf-8' (0, 0) (0, 0)
|
|
||||||
STRING 'ur"abc"' (1, 0) (1, 7)
|
|
||||||
OP '+' (1, 8) (1, 9)
|
|
||||||
STRING 'uR"abc"' (1, 10) (1, 17)
|
|
||||||
OP '+' (1, 18) (1, 19)
|
|
||||||
STRING 'Ur"abc"' (1, 20) (1, 27)
|
|
||||||
OP '+' (1, 28) (1, 29)
|
|
||||||
STRING 'UR"abc"' (1, 30) (1, 37)
|
|
||||||
|
|
||||||
>>> dump_tokens("b'abc' + B'abc'")
|
>>> dump_tokens("b'abc' + B'abc'")
|
||||||
ENCODING 'utf-8' (0, 0) (0, 0)
|
ENCODING 'utf-8' (0, 0) (0, 0)
|
||||||
|
@ -642,7 +624,7 @@ Non-ascii identifiers
|
||||||
|
|
||||||
Legacy unicode literals:
|
Legacy unicode literals:
|
||||||
|
|
||||||
>>> dump_tokens("Örter = u'places'\\ngrün = UR'green'")
|
>>> dump_tokens("Örter = u'places'\\ngrün = U'green'")
|
||||||
ENCODING 'utf-8' (0, 0) (0, 0)
|
ENCODING 'utf-8' (0, 0) (0, 0)
|
||||||
NAME 'Örter' (1, 0) (1, 5)
|
NAME 'Örter' (1, 0) (1, 5)
|
||||||
OP '=' (1, 6) (1, 7)
|
OP '=' (1, 6) (1, 7)
|
||||||
|
@ -650,7 +632,7 @@ Legacy unicode literals:
|
||||||
NEWLINE '\\n' (1, 17) (1, 18)
|
NEWLINE '\\n' (1, 17) (1, 18)
|
||||||
NAME 'grün' (2, 0) (2, 4)
|
NAME 'grün' (2, 0) (2, 4)
|
||||||
OP '=' (2, 5) (2, 6)
|
OP '=' (2, 5) (2, 6)
|
||||||
STRING "UR'green'" (2, 7) (2, 16)
|
STRING "U'green'" (2, 7) (2, 15)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from test import support
|
from test import support
|
||||||
|
|
|
@ -127,7 +127,7 @@ Floatnumber = group(Pointfloat, Expfloat)
|
||||||
Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
|
Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
|
||||||
Number = group(Imagnumber, Floatnumber, Intnumber)
|
Number = group(Imagnumber, Floatnumber, Intnumber)
|
||||||
|
|
||||||
StringPrefix = r'(?:[uUbB][rR]?|[rR][bB]?)?'
|
StringPrefix = r'(?:[bB][rR]?|[rR][bB]?|[uU])?'
|
||||||
|
|
||||||
# Tail end of ' string.
|
# Tail end of ' string.
|
||||||
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
|
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
|
||||||
|
@ -183,12 +183,8 @@ endpats = {"'": Single, '"': Double,
|
||||||
"rB'''": Single3, 'rB"""': Double3,
|
"rB'''": Single3, 'rB"""': Double3,
|
||||||
"RB'''": Single3, 'RB"""': Double3,
|
"RB'''": Single3, 'RB"""': Double3,
|
||||||
"u'''": Single3, 'u"""': Double3,
|
"u'''": Single3, 'u"""': Double3,
|
||||||
"ur'''": Single3, 'ur"""': Double3,
|
|
||||||
"R'''": Single3, 'R"""': Double3,
|
"R'''": Single3, 'R"""': Double3,
|
||||||
"U'''": Single3, 'U"""': Double3,
|
"U'''": Single3, 'U"""': Double3,
|
||||||
"uR'''": Single3, 'uR"""': Double3,
|
|
||||||
"Ur'''": Single3, 'Ur"""': Double3,
|
|
||||||
"UR'''": Single3, 'UR"""': Double3,
|
|
||||||
'r': None, 'R': None, 'b': None, 'B': None,
|
'r': None, 'R': None, 'b': None, 'B': None,
|
||||||
'u': None, 'U': None}
|
'u': None, 'U': None}
|
||||||
|
|
||||||
|
@ -201,8 +197,7 @@ for t in ("'''", '"""',
|
||||||
"rb'''", 'rb"""', "rB'''", 'rB"""',
|
"rb'''", 'rb"""', "rB'''", 'rB"""',
|
||||||
"Rb'''", 'Rb"""', "RB'''", 'RB"""',
|
"Rb'''", 'Rb"""', "RB'''", 'RB"""',
|
||||||
"u'''", 'u"""', "U'''", 'U"""',
|
"u'''", 'u"""', "U'''", 'U"""',
|
||||||
"ur'''", 'ur"""', "Ur'''", 'Ur"""',
|
):
|
||||||
"uR'''", 'uR"""', "UR'''", 'UR"""'):
|
|
||||||
triple_quoted[t] = t
|
triple_quoted[t] = t
|
||||||
single_quoted = {}
|
single_quoted = {}
|
||||||
for t in ("'", '"',
|
for t in ("'", '"',
|
||||||
|
@ -213,8 +208,7 @@ for t in ("'", '"',
|
||||||
"rb'", 'rb"', "rB'", 'rB"',
|
"rb'", 'rb"', "rB'", 'rB"',
|
||||||
"Rb'", 'Rb"', "RB'", 'RB"' ,
|
"Rb'", 'Rb"', "RB'", 'RB"' ,
|
||||||
"u'", 'u"', "U'", 'U"',
|
"u'", 'u"', "U'", 'U"',
|
||||||
"ur'", 'ur"', "Ur'", 'Ur"',
|
):
|
||||||
"uR'", 'uR"', "UR'", 'UR"' ):
|
|
||||||
single_quoted[t] = t
|
single_quoted[t] = t
|
||||||
|
|
||||||
tabsize = 8
|
tabsize = 8
|
||||||
|
|
|
@ -10,6 +10,9 @@ What's New in Python 3.3.0 Beta 1?
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #15096: Removed support for ur'' as the raw notation isn't
|
||||||
|
compatible with Python 2.x's raw unicode strings.
|
||||||
|
|
||||||
- Issue #13783: Generator objects now use the identifier APIs internally
|
- Issue #13783: Generator objects now use the identifier APIs internally
|
||||||
|
|
||||||
- Issue #14874: Restore charmap decoding speed to pre-PEP 393 levels.
|
- Issue #14874: Restore charmap decoding speed to pre-PEP 393 levels.
|
||||||
|
|
|
@ -1412,7 +1412,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
|
||||||
/* Identifier (most frequent token!) */
|
/* Identifier (most frequent token!) */
|
||||||
nonascii = 0;
|
nonascii = 0;
|
||||||
if (is_potential_identifier_start(c)) {
|
if (is_potential_identifier_start(c)) {
|
||||||
/* Process b"", r"", u"", br"", rb"" and ur"" */
|
/* Process b"", r"", u"", br"" and rb"" */
|
||||||
int saw_b = 0, saw_r = 0, saw_u = 0;
|
int saw_b = 0, saw_r = 0, saw_u = 0;
|
||||||
while (1) {
|
while (1) {
|
||||||
if (!(saw_b || saw_u) && (c == 'b' || c == 'B'))
|
if (!(saw_b || saw_u) && (c == 'b' || c == 'B'))
|
||||||
|
@ -1421,7 +1421,8 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
|
||||||
want to support it in arbitrary order like byte literals. */
|
want to support it in arbitrary order like byte literals. */
|
||||||
else if (!(saw_b || saw_u || saw_r) && (c == 'u' || c == 'U'))
|
else if (!(saw_b || saw_u || saw_r) && (c == 'u' || c == 'U'))
|
||||||
saw_u = 1;
|
saw_u = 1;
|
||||||
else if (!saw_r && (c == 'r' || c == 'R'))
|
/* ur"" and ru"" are not supported */
|
||||||
|
else if (!(saw_r || saw_u) && (c == 'r' || c == 'R'))
|
||||||
saw_r = 1;
|
saw_r = 1;
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
|
|
Loading…
Reference in New Issue