From 00888934476bf21fc7b0b76b96adf08cee7db57e Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Thu, 18 Mar 2010 22:37:38 +0000 Subject: [PATCH] kill py_compile's homemade encoding detection in favor of tokenize.detect_encoding() (see #8168) --- Lib/py_compile.py | 20 +++----------------- Misc/NEWS | 2 ++ 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/Lib/py_compile.py b/Lib/py_compile.py index 10af1bb58ea..859136550d3 100644 --- a/Lib/py_compile.py +++ b/Lib/py_compile.py @@ -7,8 +7,8 @@ import builtins import imp import marshal import os -import re import sys +import tokenize import traceback MAGIC = imp.get_magic() @@ -69,21 +69,6 @@ def wr_long(f, x): (x >> 16) & 0xff, (x >> 24) & 0xff])) -def read_encoding(file, default): - """Read the first two lines of the file looking for coding: xyzzy.""" - f = open(file, "rb") - try: - for i in range(2): - line = f.readline() - if not line: - break - m = re.match(br".*\bcoding:\s*(\S+)\b", line) - if m: - return m.group(1).decode("ascii") - return default - finally: - f.close() - def compile(file, cfile=None, dfile=None, doraise=False): """Byte-compile one Python source file to Python bytecode. @@ -119,7 +104,8 @@ def compile(file, cfile=None, dfile=None, doraise=False): directories). """ - encoding = read_encoding(file, "utf-8") + with open(file, "rb") as f: + encoding = tokenize.detect_encoding(f.readline)[0] with open(file, encoding=encoding) as f: try: timestamp = int(os.fstat(f.fileno()).st_mtime) diff --git a/Misc/NEWS b/Misc/NEWS index f1b068b47dc..5484d3941b7 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -283,6 +283,8 @@ C-API Library ------- +- Issue #8168: py_compile now handles files with utf-8 BOMS. + - ``tokenize.detect_encoding`` now returns ``'utf-8-sig'`` when a UTF-8 BOM is detected.