From d42e5820631cd66ee1eab8f610d4b58f3dfdd81c Mon Sep 17 00:00:00 2001 From: Barry Date: Tue, 14 Apr 2020 20:16:06 +0100 Subject: [PATCH] bpo-40260: Update modulefinder to use io.open_code() and respect coding comments (GH-19488) --- Lib/modulefinder.py | 30 ++++----- Lib/test/test_modulefinder.py | 65 +++++++++++++++++-- .../2020-04-12-21-18-56.bpo-40260.F6VWaE.rst | 1 + 3 files changed, 75 insertions(+), 21 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-04-12-21-18-56.bpo-40260.F6VWaE.rst diff --git a/Lib/modulefinder.py b/Lib/modulefinder.py index e0d29984f86..84ddbdb6ee3 100644 --- a/Lib/modulefinder.py +++ b/Lib/modulefinder.py @@ -5,6 +5,7 @@ import importlib._bootstrap_external import importlib.machinery import marshal import os +import io import sys import types import warnings @@ -68,35 +69,32 @@ def _find_module(name, path=None): # Some special cases: if spec.loader is importlib.machinery.BuiltinImporter: - return None, None, ("", "", _C_BUILTIN) + return None, None, ("", _C_BUILTIN) if spec.loader is importlib.machinery.FrozenImporter: - return None, None, ("", "", _PY_FROZEN) + return None, None, ("", _PY_FROZEN) file_path = spec.origin if spec.loader.is_package(name): - return None, os.path.dirname(file_path), ("", "", _PKG_DIRECTORY) + return None, os.path.dirname(file_path), ("", _PKG_DIRECTORY) if isinstance(spec.loader, importlib.machinery.SourceFileLoader): kind = _PY_SOURCE - mode = "r" elif isinstance(spec.loader, importlib.machinery.ExtensionFileLoader): kind = _C_EXTENSION - mode = "rb" elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader): kind = _PY_COMPILED - mode = "rb" else: # Should never happen. - return None, None, ("", "", _SEARCH_ERROR) + return None, None, ("", _SEARCH_ERROR) - file = open(file_path, mode) + file = io.open_code(file_path) suffix = os.path.splitext(file_path)[-1] - return file, file_path, (suffix, mode, kind) + return file, file_path, (suffix, kind) class Module: @@ -160,15 +158,15 @@ class ModuleFinder: def run_script(self, pathname): self.msg(2, "run_script", pathname) - with open(pathname) as fp: - stuff = ("", "r", _PY_SOURCE) + with io.open_code(pathname) as fp: + stuff = ("", _PY_SOURCE) self.load_module('__main__', fp, pathname, stuff) def load_file(self, pathname): dir, name = os.path.split(pathname) name, ext = os.path.splitext(name) - with open(pathname) as fp: - stuff = (ext, "r", _PY_SOURCE) + with io.open_code(pathname) as fp: + stuff = (ext, _PY_SOURCE) self.load_module(name, fp, pathname, stuff) def import_hook(self, name, caller=None, fromlist=None, level=-1): @@ -333,14 +331,14 @@ class ModuleFinder: return m def load_module(self, fqname, fp, pathname, file_info): - suffix, mode, type = file_info + suffix, type = file_info self.msgin(2, "load_module", fqname, fp and "fp", pathname) if type == _PKG_DIRECTORY: m = self.load_package(fqname, pathname) self.msgout(2, "load_module ->", m) return m if type == _PY_SOURCE: - co = compile(fp.read()+'\n', pathname, 'exec') + co = compile(fp.read()+b'\n', pathname, 'exec') elif type == _PY_COMPILED: try: data = fp.read() @@ -504,7 +502,7 @@ class ModuleFinder: if path is None: if name in sys.builtin_module_names: - return (None, None, ("", "", _C_BUILTIN)) + return (None, None, ("", _C_BUILTIN)) path = self.path diff --git a/Lib/test/test_modulefinder.py b/Lib/test/test_modulefinder.py index ebd96e1c8a2..1aa45011df0 100644 --- a/Lib/test/test_modulefinder.py +++ b/Lib/test/test_modulefinder.py @@ -40,7 +40,8 @@ a/module.py from c import something b/__init__.py from sys import * -"""] +""", +] maybe_test_new = [ "a.module", @@ -245,6 +246,48 @@ b/__init__.py b/c.py """] +coding_default_utf8_test = [ + "a_utf8", + ["a_utf8", "b_utf8"], + [], [], + """\ +a_utf8.py + # use the default of utf8 + print('Unicode test A code point 2090 \u2090 that is not valid in cp1252') + import b_utf8 +b_utf8.py + # use the default of utf8 + print('Unicode test B code point 2090 \u2090 that is not valid in cp1252') +"""] + +coding_explicit_utf8_test = [ + "a_utf8", + ["a_utf8", "b_utf8"], + [], [], + """\ +a_utf8.py + # coding=utf8 + print('Unicode test A code point 2090 \u2090 that is not valid in cp1252') + import b_utf8 +b_utf8.py + # use the default of utf8 + print('Unicode test B code point 2090 \u2090 that is not valid in cp1252') +"""] + +coding_explicit_cp1252_test = [ + "a_cp1252", + ["a_cp1252", "b_utf8"], + [], [], + b"""\ +a_cp1252.py + # coding=cp1252 + # 0xe2 is not allowed in utf8 + print('CP1252 test P\xe2t\xe9') + import b_utf8 +b_utf8.py + # use the default of utf8 + print('Unicode test A code point 2090 \u2090 that is not valid in cp1252') +"""] def open_file(path): dirname = os.path.dirname(path) @@ -253,18 +296,22 @@ def open_file(path): except OSError as e: if e.errno != errno.EEXIST: raise - return open(path, "w") + return open(path, 'wb') def create_package(source): ofi = None try: for line in source.splitlines(): - if line.startswith(" ") or line.startswith("\t"): - ofi.write(line.strip() + "\n") + if type(line) != bytes: + line = line.encode('utf-8') + if line.startswith(b' ') or line.startswith(b'\t'): + ofi.write(line.strip() + b'\n') else: if ofi: ofi.close() + if type(line) == bytes: + line = line.decode('utf-8') ofi = open_file(os.path.join(TEST_DIR, line.strip())) finally: if ofi: @@ -337,7 +384,7 @@ class ModuleFinderTest(unittest.TestCase): source_path = base_path + importlib.machinery.SOURCE_SUFFIXES[0] bytecode_path = base_path + importlib.machinery.BYTECODE_SUFFIXES[0] with open_file(source_path) as file: - file.write('testing_modulefinder = True\n') + file.write('testing_modulefinder = True\n'.encode('utf-8')) py_compile.compile(source_path, cfile=bytecode_path) os.remove(source_path) self._do_test(bytecode_test) @@ -365,6 +412,14 @@ b.py """ % list(range(2**16))] # 2**16 constants self._do_test(extended_opargs_test) + def test_coding_default_utf8(self): + self._do_test(coding_default_utf8_test) + + def test_coding_explicit_utf8(self): + self._do_test(coding_explicit_utf8_test) + + def test_coding_explicit_cp1252(self): + self._do_test(coding_explicit_cp1252_test) if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2020-04-12-21-18-56.bpo-40260.F6VWaE.rst b/Misc/NEWS.d/next/Library/2020-04-12-21-18-56.bpo-40260.F6VWaE.rst new file mode 100644 index 00000000000..decc073bf4d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-04-12-21-18-56.bpo-40260.F6VWaE.rst @@ -0,0 +1 @@ +Ensure :mod:`modulefinder` uses :func:`io.open_code` and respects coding comments.