From 3eb554fc828c812a31c1a3cd9f619eacbb708010 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 5 Sep 2014 10:22:05 +0300 Subject: [PATCH] Issue #22221: Backported fixes from Python 3 (issue #18960). * Now the source encoding declaration on the second line isn't effective if the first line contains anything except a comment. This affects compile(), eval() and exec() too. * IDLE now ignores the source encoding declaration on the second line if the first line contains anything except a comment. * 2to3 and the findnocoding.py script now ignore the source encoding declaration on the second line if the first line contains anything except a comment. --- Lib/idlelib/IOBinding.py | 3 +++ Lib/lib2to3/pgen2/tokenize.py | 3 +++ Lib/test/test_compile.py | 17 ++++++++++++++++- Misc/NEWS | 10 ++++++++++ Parser/tokenizer.c | 20 +++++++++++++++++--- Tools/scripts/findnocoding.py | 4 +++- 6 files changed, 52 insertions(+), 5 deletions(-) diff --git a/Lib/idlelib/IOBinding.py b/Lib/idlelib/IOBinding.py index aedd37242cf..e3affa8992d 100644 --- a/Lib/idlelib/IOBinding.py +++ b/Lib/idlelib/IOBinding.py @@ -72,6 +72,7 @@ else: encoding = encoding.lower() coding_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)') +blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)') class EncodingMessage(SimpleDialog): "Inform user that an encoding declaration is needed." @@ -130,6 +131,8 @@ def coding_spec(str): match = coding_re.match(line) if match is not None: break + if not blank_re.match(line): + return None else: return None name = match.group(1) diff --git a/Lib/lib2to3/pgen2/tokenize.py b/Lib/lib2to3/pgen2/tokenize.py index 4cb2a41262e..d64a3e63e42 100644 --- a/Lib/lib2to3/pgen2/tokenize.py +++ b/Lib/lib2to3/pgen2/tokenize.py @@ -237,6 +237,7 @@ class Untokenizer: toks_append(tokval) cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)') +blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)') def _get_normal_name(orig_enc): """Imitates get_normal_name in tokenizer.c.""" @@ -309,6 +310,8 @@ def detect_encoding(readline): encoding = find_cookie(first) if encoding: return encoding, [first] + if not blank_re.match(first): + return default, [first] second = read_or_stop() if not second: diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index ca396a974e1..cfc6389b59a 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -412,9 +412,24 @@ if 1: l = lambda: "foo" self.assertIsNone(l.__doc__) - def test_unicode_encoding(self): + @test_support.requires_unicode + def test_encoding(self): + code = b'# -*- coding: badencoding -*-\npass\n' + self.assertRaises(SyntaxError, compile, code, 'tmp', 'exec') code = u"# -*- coding: utf-8 -*-\npass\n" self.assertRaises(SyntaxError, compile, code, "tmp", "exec") + code = 'u"\xc2\xa4"\n' + self.assertEqual(eval(code), u'\xc2\xa4') + code = u'u"\xc2\xa4"\n' + self.assertEqual(eval(code), u'\xc2\xa4') + code = '# -*- coding: latin1 -*-\nu"\xc2\xa4"\n' + self.assertEqual(eval(code), u'\xc2\xa4') + code = '# -*- coding: utf-8 -*-\nu"\xc2\xa4"\n' + self.assertEqual(eval(code), u'\xa4') + code = '# -*- coding: iso8859-15 -*-\nu"\xc2\xa4"\n' + self.assertEqual(eval(code), test_support.u(r'\xc2\u20ac')) + code = 'u"""\\\n# -*- coding: utf-8 -*-\n\xc2\xa4"""\n' + self.assertEqual(eval(code), u'# -*- coding: utf-8 -*-\n\xc2\xa4') def test_subscripts(self): # SF bug 1448804 diff --git a/Misc/NEWS b/Misc/NEWS index e50de99653c..91e1db4589f 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 2.7.9? Core and Builtins ----------------- +- Issue #22221: Now the source encoding declaration on the second line isn't + effective if the first line contains anything except a comment. + - Issue #22023: Fix ``%S``, ``%R`` and ``%V`` formats of :c:func:`PyUnicode_FromFormat`. @@ -124,6 +127,9 @@ _ Issue #21597: The separator between the turtledemo text pane and the drawing IDLE ---- +- Issue #22221: IDLE now ignores the source encoding declaration on the second + line if the first line contains anything except a comment. + - Issue #17390: Adjust Editor window title; remove 'Python', move version to end. @@ -140,6 +146,10 @@ Extension Modules Tools/Demos ----------- +- Issue #22221: 2to3 and the findnocoding.py script now ignore the source + encoding declaration on the second line if the first line contains anything + except a comment. + - Issue #22201: Command-line interface of the zipfile module now correctly extracts ZIP files with directory entries. Patch by Ryan Wilson. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 3e4af5334d7..d0e4a0e2745 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -259,11 +259,25 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok, char * cs; int r = 1; - if (tok->cont_line) + if (tok->cont_line) { /* It's a continuation line, so it can't be a coding spec. */ + tok->read_coding_spec = 1; return 1; + } cs = get_coding_spec(line, size); - if (cs != NULL) { + if (!cs) { + Py_ssize_t i; + for (i = 0; i < size; i++) { + if (line[i] == '#' || line[i] == '\n' || line[i] == '\r') + break; + if (line[i] != ' ' && line[i] != '\t' && line[i] != '\014') { + /* Stop checking coding spec after a line containing + * anything except a comment. */ + tok->read_coding_spec = 1; + break; + } + } + } else { tok->read_coding_spec = 1; if (tok->encoding == NULL) { assert(tok->decoding_state == 1); /* raw */ @@ -688,7 +702,7 @@ decode_str(const char *input, int single, struct tok_state *tok) if (newl[0]) { if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl)) return error_ret(tok); - if (tok->enc == NULL && newl[1]) { + if (tok->enc == NULL && !tok->read_coding_spec && newl[1]) { if (!check_coding_spec(newl[0]+1, newl[1] - newl[0], tok, buf_setreadl)) return error_ret(tok); diff --git a/Tools/scripts/findnocoding.py b/Tools/scripts/findnocoding.py index 5d932908d36..70b1a666133 100755 --- a/Tools/scripts/findnocoding.py +++ b/Tools/scripts/findnocoding.py @@ -33,6 +33,7 @@ except ImportError: decl_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)') +blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)') def get_declaration(line): match = decl_re.match(line) @@ -57,7 +58,8 @@ def needs_declaration(fullpath): line1 = infile.readline() line2 = infile.readline() - if get_declaration(line1) or get_declaration(line2): + if (get_declaration(line1) or + blank_re.match(line1) and get_declaration(line2)): # the file does have an encoding declaration, so trust it infile.close() return False