Issue #10335: Add tokenize.open(), detect the file encoding using
tokenize.detect_encoding() and open it in read only mode.
This commit is contained in:
parent
ae4836df6d
commit
58c0752a33
|
@ -101,14 +101,16 @@ function it uses to do this is available:
|
||||||
If no encoding is specified, then the default of ``'utf-8'`` will be
|
If no encoding is specified, then the default of ``'utf-8'`` will be
|
||||||
returned.
|
returned.
|
||||||
|
|
||||||
:func:`detect_encoding` is useful for robustly reading Python source files.
|
Use :func:`open` to open Python source files: it uses
|
||||||
A common pattern for this follows::
|
:func:`detect_encoding` to detect the file encoding.
|
||||||
|
|
||||||
def read_python_source(file_name):
|
|
||||||
with open(file_name, "rb") as fp:
|
.. function:: open(filename)
|
||||||
encoding = tokenize.detect_encoding(fp.readline)[0]
|
|
||||||
with open(file_name, "r", encoding=encoding) as fp:
|
Open a file in read only mode using the encoding detected by
|
||||||
return fp.read()
|
:func:`detect_encoding`.
|
||||||
|
|
||||||
|
.. versionadded:: 3.2
|
||||||
|
|
||||||
|
|
||||||
Example of a script rewriter that transforms float literals into Decimal
|
Example of a script rewriter that transforms float literals into Decimal
|
||||||
|
@ -153,4 +155,3 @@ objects::
|
||||||
result.append((toknum, tokval))
|
result.append((toknum, tokval))
|
||||||
return untokenize(result).decode('utf-8')
|
return untokenize(result).decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -123,9 +123,7 @@ def updatecache(filename, module_globals=None):
|
||||||
else:
|
else:
|
||||||
return []
|
return []
|
||||||
try:
|
try:
|
||||||
with open(fullname, 'rb') as fp:
|
with tokenize.open(fullname) as fp:
|
||||||
coding, line = tokenize.detect_encoding(fp.readline)
|
|
||||||
with open(fullname, 'r', encoding=coding) as fp:
|
|
||||||
lines = fp.readlines()
|
lines = fp.readlines()
|
||||||
except IOError:
|
except IOError:
|
||||||
return []
|
return []
|
||||||
|
|
|
@ -104,9 +104,7 @@ def compile(file, cfile=None, dfile=None, doraise=False):
|
||||||
byte-compile all installed files (or all files in selected
|
byte-compile all installed files (or all files in selected
|
||||||
directories).
|
directories).
|
||||||
"""
|
"""
|
||||||
with open(file, "rb") as f:
|
with tokenize.open(file) as f:
|
||||||
encoding = tokenize.detect_encoding(f.readline)[0]
|
|
||||||
with open(file, encoding=encoding) as f:
|
|
||||||
try:
|
try:
|
||||||
timestamp = int(os.fstat(f.fileno()).st_mtime)
|
timestamp = int(os.fstat(f.fileno()).st_mtime)
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
|
|
|
@ -93,11 +93,8 @@ def check(file):
|
||||||
check(fullname)
|
check(fullname)
|
||||||
return
|
return
|
||||||
|
|
||||||
with open(file, 'rb') as f:
|
|
||||||
encoding, lines = tokenize.detect_encoding(f.readline)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
f = open(file, encoding=encoding)
|
f = tokenize.open(file)
|
||||||
except IOError as msg:
|
except IOError as msg:
|
||||||
errprint("%r: I/O Error: %s" % (file, msg))
|
errprint("%r: I/O Error: %s" % (file, msg))
|
||||||
return
|
return
|
||||||
|
|
|
@ -564,7 +564,8 @@ Non-ascii identifiers
|
||||||
|
|
||||||
from test import support
|
from test import support
|
||||||
from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
|
from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
|
||||||
STRING, ENDMARKER, tok_name, detect_encoding)
|
STRING, ENDMARKER, tok_name, detect_encoding,
|
||||||
|
open as tokenize_open)
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from unittest import TestCase
|
from unittest import TestCase
|
||||||
import os, sys, glob
|
import os, sys, glob
|
||||||
|
@ -857,6 +858,26 @@ class TestDetectEncoding(TestCase):
|
||||||
readline = self.get_readline((b'# coding: bad\n',))
|
readline = self.get_readline((b'# coding: bad\n',))
|
||||||
self.assertRaises(SyntaxError, detect_encoding, readline)
|
self.assertRaises(SyntaxError, detect_encoding, readline)
|
||||||
|
|
||||||
|
def test_open(self):
|
||||||
|
filename = support.TESTFN + '.py'
|
||||||
|
self.addCleanup(support.unlink, filename)
|
||||||
|
|
||||||
|
# test coding cookie
|
||||||
|
for encoding in ('iso-8859-15', 'utf-8'):
|
||||||
|
with open(filename, 'w', encoding=encoding) as fp:
|
||||||
|
print("# coding: %s" % encoding, file=fp)
|
||||||
|
print("print('euro:\u20ac')", file=fp)
|
||||||
|
with tokenize_open(filename) as fp:
|
||||||
|
assert fp.encoding == encoding
|
||||||
|
assert fp.mode == 'r'
|
||||||
|
|
||||||
|
# test BOM (no coding cookie)
|
||||||
|
with open(filename, 'w', encoding='utf-8-sig') as fp:
|
||||||
|
print("print('euro:\u20ac')", file=fp)
|
||||||
|
with tokenize_open(filename) as fp:
|
||||||
|
assert fp.encoding == 'utf-8-sig'
|
||||||
|
assert fp.mode == 'r'
|
||||||
|
|
||||||
class TestTokenize(TestCase):
|
class TestTokenize(TestCase):
|
||||||
|
|
||||||
def test_tokenize(self):
|
def test_tokenize(self):
|
||||||
|
|
|
@ -29,6 +29,7 @@ import sys
|
||||||
from token import *
|
from token import *
|
||||||
from codecs import lookup, BOM_UTF8
|
from codecs import lookup, BOM_UTF8
|
||||||
import collections
|
import collections
|
||||||
|
from io import TextIOWrapper
|
||||||
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
|
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
|
||||||
|
|
||||||
import token
|
import token
|
||||||
|
@ -335,6 +336,20 @@ def detect_encoding(readline):
|
||||||
return default, [first, second]
|
return default, [first, second]
|
||||||
|
|
||||||
|
|
||||||
|
_builtin_open = open
|
||||||
|
|
||||||
|
def open(filename):
|
||||||
|
"""Open a file in read only mode using the encoding detected by
|
||||||
|
detect_encoding().
|
||||||
|
"""
|
||||||
|
buffer = _builtin_open(filename, 'rb')
|
||||||
|
encoding, lines = detect_encoding(buffer.readline)
|
||||||
|
buffer.seek(0)
|
||||||
|
text = TextIOWrapper(buffer, encoding, line_buffering=True)
|
||||||
|
text.mode = 'r'
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
def tokenize(readline):
|
def tokenize(readline):
|
||||||
"""
|
"""
|
||||||
The tokenize() generator requires one argment, readline, which
|
The tokenize() generator requires one argment, readline, which
|
||||||
|
|
|
@ -432,10 +432,9 @@ def find_strings(filename, encoding=None):
|
||||||
def find_executable_linenos(filename):
|
def find_executable_linenos(filename):
|
||||||
"""Return dict where keys are line numbers in the line number table."""
|
"""Return dict where keys are line numbers in the line number table."""
|
||||||
try:
|
try:
|
||||||
with io.FileIO(filename, 'r') as file:
|
with tokenize.open(filename) as f:
|
||||||
encoding, lines = tokenize.detect_encoding(file.readline)
|
|
||||||
with open(filename, "r", encoding=encoding) as f:
|
|
||||||
prog = f.read()
|
prog = f.read()
|
||||||
|
encoding = f.encoding
|
||||||
except IOError as err:
|
except IOError as err:
|
||||||
print(("Not printing coverage data for %r: %s"
|
print(("Not printing coverage data for %r: %s"
|
||||||
% (filename, err)), file=sys.stderr)
|
% (filename, err)), file=sys.stderr)
|
||||||
|
|
|
@ -60,6 +60,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #10335: Add tokenize.open(), detect the file encoding using
|
||||||
|
tokenize.detect_encoding() and open it in read only mode.
|
||||||
|
|
||||||
- Issue #10321: Added support for binary data to smtplib.SMTP.sendmail,
|
- Issue #10321: Added support for binary data to smtplib.SMTP.sendmail,
|
||||||
and a new method send_message to send an email.message.Message object.
|
and a new method send_message to send an email.message.Message object.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue