kill py_compile's homemade encoding detection in favor of tokenize.detect_encoding() (see #8168)

This commit is contained in:
Benjamin Peterson 2010-03-18 22:37:38 +00:00
parent 1613ed8108
commit 0088893447
2 changed files with 5 additions and 17 deletions

View File

@ -7,8 +7,8 @@ import builtins
import imp
import marshal
import os
import re
import sys
import tokenize
import traceback
MAGIC = imp.get_magic()
@ -69,21 +69,6 @@ def wr_long(f, x):
(x >> 16) & 0xff,
(x >> 24) & 0xff]))
def read_encoding(file, default):
"""Read the first two lines of the file looking for coding: xyzzy."""
f = open(file, "rb")
try:
for i in range(2):
line = f.readline()
if not line:
break
m = re.match(br".*\bcoding:\s*(\S+)\b", line)
if m:
return m.group(1).decode("ascii")
return default
finally:
f.close()
def compile(file, cfile=None, dfile=None, doraise=False):
"""Byte-compile one Python source file to Python bytecode.
@ -119,7 +104,8 @@ def compile(file, cfile=None, dfile=None, doraise=False):
directories).
"""
encoding = read_encoding(file, "utf-8")
with open(file, "rb") as f:
encoding = tokenize.detect_encoding(f.readline)[0]
with open(file, encoding=encoding) as f:
try:
timestamp = int(os.fstat(f.fileno()).st_mtime)

View File

@ -283,6 +283,8 @@ C-API
Library
-------
- Issue #8168: py_compile now handles files with utf-8 BOMS.
- ``tokenize.detect_encoding`` now returns ``'utf-8-sig'`` when a UTF-8 BOM is
detected.