Issue #26581: Use the first coding cookie on a line, not the last one.

This commit is contained in:
Serhiy Storchaka 2016-03-20 23:36:29 +02:00
parent 97eee1cfda
commit e431d3c9aa
8 changed files with 10 additions and 6 deletions

View File

@ -62,7 +62,7 @@ locale_encoding = locale_encoding.lower()
encoding = locale_encoding ### KBK 07Sep07 This is used all over IDLE, check! encoding = locale_encoding ### KBK 07Sep07 This is used all over IDLE, check!
### 'encoding' is used below in encode(), check! ### 'encoding' is used below in encode(), check!
coding_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII) coding_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
def coding_spec(data): def coding_spec(data):

View File

@ -236,7 +236,7 @@ class Untokenizer:
startline = False startline = False
toks_append(tokval) toks_append(tokval)
cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII) cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
def _get_normal_name(orig_enc): def _get_normal_name(orig_enc):

View File

@ -14,7 +14,7 @@ import unittest
import warnings import warnings
CODING_RE = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII) CODING_RE = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
class EncodingTest: class EncodingTest:

View File

@ -178,7 +178,7 @@ class AbstractSourceEncodingTest:
def test_double_coding_same_line(self): def test_double_coding_same_line(self):
src = (b'#coding:iso8859-15 coding:latin1\n' src = (b'#coding:iso8859-15 coding:latin1\n'
b'print(ascii("\xc3\xa4"))\n') b'print(ascii("\xc3\xa4"))\n')
self.check_script_output(src, br"'\xc3\xa4'") self.check_script_output(src, br"'\xc3\u20ac'")
def test_first_non_utf8_coding_line(self): def test_first_non_utf8_coding_line(self):
src = (b'#coding:iso-8859-15 \xa4\n' src = (b'#coding:iso-8859-15 \xa4\n'

View File

@ -33,7 +33,7 @@ import re
import sys import sys
from token import * from token import *
cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII) cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
import token import token

View File

@ -10,6 +10,9 @@ Release date: tba
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #26581: If coding cookie is specified multiple times on a line in
Python source code file, only the first one is taken to account.
- Issue #26464: Fix str.translate() when string is ASCII and first replacements - Issue #26464: Fix str.translate() when string is ASCII and first replacements
removes character, but next replacement uses a non-ASCII character or a removes character, but next replacement uses a non-ASCII character or a
string longer than 1 character. Regression introduced in Python 3.5.0. string longer than 1 character. Regression introduced in Python 3.5.0.

View File

@ -275,6 +275,7 @@ get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *t
return 0; return 0;
} }
*spec = r; *spec = r;
break;
} }
} }
} }

View File

@ -32,7 +32,7 @@ except ImportError:
"no sophisticated Python source file search will be done.", file=sys.stderr) "no sophisticated Python source file search will be done.", file=sys.stderr)
decl_re = re.compile(rb'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)') decl_re = re.compile(rb'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)')
blank_re = re.compile(rb'^[ \t\f]*(?:[#\r\n]|$)') blank_re = re.compile(rb'^[ \t\f]*(?:[#\r\n]|$)')
def get_declaration(line): def get_declaration(line):