* Now the source encoding declaration on the second line isn't effective if the first line contains anything except a comment. This affects compile(), eval() and exec() too. * IDLE now ignores the source encoding declaration on the second line if the first line contains anything except a comment. * 2to3 and the findnocoding.py script now ignore the source encoding declaration on the second line if the first line contains anything except a comment.
This commit is contained in:
parent
423f1282b3
commit
3eb554fc82
|
@ -72,6 +72,7 @@ else:
|
|||
encoding = encoding.lower()
|
||||
|
||||
coding_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)')
|
||||
blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)')
|
||||
|
||||
class EncodingMessage(SimpleDialog):
|
||||
"Inform user that an encoding declaration is needed."
|
||||
|
@ -130,6 +131,8 @@ def coding_spec(str):
|
|||
match = coding_re.match(line)
|
||||
if match is not None:
|
||||
break
|
||||
if not blank_re.match(line):
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
name = match.group(1)
|
||||
|
|
|
@ -237,6 +237,7 @@ class Untokenizer:
|
|||
toks_append(tokval)
|
||||
|
||||
cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)')
|
||||
blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)')
|
||||
|
||||
def _get_normal_name(orig_enc):
|
||||
"""Imitates get_normal_name in tokenizer.c."""
|
||||
|
@ -309,6 +310,8 @@ def detect_encoding(readline):
|
|||
encoding = find_cookie(first)
|
||||
if encoding:
|
||||
return encoding, [first]
|
||||
if not blank_re.match(first):
|
||||
return default, [first]
|
||||
|
||||
second = read_or_stop()
|
||||
if not second:
|
||||
|
|
|
@ -412,9 +412,24 @@ if 1:
|
|||
l = lambda: "foo"
|
||||
self.assertIsNone(l.__doc__)
|
||||
|
||||
def test_unicode_encoding(self):
|
||||
@test_support.requires_unicode
|
||||
def test_encoding(self):
|
||||
code = b'# -*- coding: badencoding -*-\npass\n'
|
||||
self.assertRaises(SyntaxError, compile, code, 'tmp', 'exec')
|
||||
code = u"# -*- coding: utf-8 -*-\npass\n"
|
||||
self.assertRaises(SyntaxError, compile, code, "tmp", "exec")
|
||||
code = 'u"\xc2\xa4"\n'
|
||||
self.assertEqual(eval(code), u'\xc2\xa4')
|
||||
code = u'u"\xc2\xa4"\n'
|
||||
self.assertEqual(eval(code), u'\xc2\xa4')
|
||||
code = '# -*- coding: latin1 -*-\nu"\xc2\xa4"\n'
|
||||
self.assertEqual(eval(code), u'\xc2\xa4')
|
||||
code = '# -*- coding: utf-8 -*-\nu"\xc2\xa4"\n'
|
||||
self.assertEqual(eval(code), u'\xa4')
|
||||
code = '# -*- coding: iso8859-15 -*-\nu"\xc2\xa4"\n'
|
||||
self.assertEqual(eval(code), test_support.u(r'\xc2\u20ac'))
|
||||
code = 'u"""\\\n# -*- coding: utf-8 -*-\n\xc2\xa4"""\n'
|
||||
self.assertEqual(eval(code), u'# -*- coding: utf-8 -*-\n\xc2\xa4')
|
||||
|
||||
def test_subscripts(self):
|
||||
# SF bug 1448804
|
||||
|
|
10
Misc/NEWS
10
Misc/NEWS
|
@ -10,6 +10,9 @@ What's New in Python 2.7.9?
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #22221: Now the source encoding declaration on the second line isn't
|
||||
effective if the first line contains anything except a comment.
|
||||
|
||||
- Issue #22023: Fix ``%S``, ``%R`` and ``%V`` formats of
|
||||
:c:func:`PyUnicode_FromFormat`.
|
||||
|
||||
|
@ -124,6 +127,9 @@ _ Issue #21597: The separator between the turtledemo text pane and the drawing
|
|||
IDLE
|
||||
----
|
||||
|
||||
- Issue #22221: IDLE now ignores the source encoding declaration on the second
|
||||
line if the first line contains anything except a comment.
|
||||
|
||||
- Issue #17390: Adjust Editor window title; remove 'Python',
|
||||
move version to end.
|
||||
|
||||
|
@ -140,6 +146,10 @@ Extension Modules
|
|||
Tools/Demos
|
||||
-----------
|
||||
|
||||
- Issue #22221: 2to3 and the findnocoding.py script now ignore the source
|
||||
encoding declaration on the second line if the first line contains anything
|
||||
except a comment.
|
||||
|
||||
- Issue #22201: Command-line interface of the zipfile module now correctly
|
||||
extracts ZIP files with directory entries. Patch by Ryan Wilson.
|
||||
|
||||
|
|
|
@ -259,11 +259,25 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
|
|||
char * cs;
|
||||
int r = 1;
|
||||
|
||||
if (tok->cont_line)
|
||||
if (tok->cont_line) {
|
||||
/* It's a continuation line, so it can't be a coding spec. */
|
||||
tok->read_coding_spec = 1;
|
||||
return 1;
|
||||
}
|
||||
cs = get_coding_spec(line, size);
|
||||
if (cs != NULL) {
|
||||
if (!cs) {
|
||||
Py_ssize_t i;
|
||||
for (i = 0; i < size; i++) {
|
||||
if (line[i] == '#' || line[i] == '\n' || line[i] == '\r')
|
||||
break;
|
||||
if (line[i] != ' ' && line[i] != '\t' && line[i] != '\014') {
|
||||
/* Stop checking coding spec after a line containing
|
||||
* anything except a comment. */
|
||||
tok->read_coding_spec = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
tok->read_coding_spec = 1;
|
||||
if (tok->encoding == NULL) {
|
||||
assert(tok->decoding_state == 1); /* raw */
|
||||
|
@ -688,7 +702,7 @@ decode_str(const char *input, int single, struct tok_state *tok)
|
|||
if (newl[0]) {
|
||||
if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl))
|
||||
return error_ret(tok);
|
||||
if (tok->enc == NULL && newl[1]) {
|
||||
if (tok->enc == NULL && !tok->read_coding_spec && newl[1]) {
|
||||
if (!check_coding_spec(newl[0]+1, newl[1] - newl[0],
|
||||
tok, buf_setreadl))
|
||||
return error_ret(tok);
|
||||
|
|
|
@ -33,6 +33,7 @@ except ImportError:
|
|||
|
||||
|
||||
decl_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)')
|
||||
blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)')
|
||||
|
||||
def get_declaration(line):
|
||||
match = decl_re.match(line)
|
||||
|
@ -57,7 +58,8 @@ def needs_declaration(fullpath):
|
|||
line1 = infile.readline()
|
||||
line2 = infile.readline()
|
||||
|
||||
if get_declaration(line1) or get_declaration(line2):
|
||||
if (get_declaration(line1) or
|
||||
blank_re.match(line1) and get_declaration(line2)):
|
||||
# the file does have an encoding declaration, so trust it
|
||||
infile.close()
|
||||
return False
|
||||
|
|
Loading…
Reference in New Issue