From ce720d3e0674d6ac6f1b950c20a89be4cfde7853 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 1 Mar 2020 10:42:56 -0800 Subject: [PATCH] bpo-39769: Fix compileall ddir for subpkgs. (GH-18676) (GH-18718) Fix compileall.compile_dir() ddir= behavior on sub-packages. Fixes compileall.compile_dir's ddir parameter and compileall command line flag `-d` to no longer write the wrong pathname to the generated pyc file for submodules beneath the root of the directory tree being compiled. This fixes a regression introduced with Python 3.5. Tests backported from GH 02673352b5db6ca4d3dc804965facbedfe66425d, the implementation is different due to intervening code changes. But still quiet simple. Why was the bug ever introduced? The refactoring to add parallel execution kept the ddir -> dfile computations but discarded the results instead of sending them to compile_file(). This fixes that. Lack of tests meant this went unnoticed. --- Lib/compileall.py | 29 +++++++------ Lib/test/test_compileall.py | 41 +++++++++++++++++++ Lib/test/test_importlib/util.py | 11 +++++ .../2020-02-29-13-20-33.bpo-39769.hJmxu4.rst | 4 ++ 4 files changed, 73 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-02-29-13-20-33.bpo-39769.hJmxu4.rst diff --git a/Lib/compileall.py b/Lib/compileall.py index 49306d9dabb..bfac8efc804 100644 --- a/Lib/compileall.py +++ b/Lib/compileall.py @@ -41,7 +41,7 @@ def _walk_dir(dir, ddir=None, maxlevels=10, quiet=0): else: dfile = None if not os.path.isdir(fullname): - yield fullname + yield fullname, ddir elif (maxlevels > 0 and name != os.curdir and name != os.pardir and os.path.isdir(fullname) and not os.path.islink(fullname)): yield from _walk_dir(fullname, ddir=dfile, @@ -76,28 +76,33 @@ def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None, from concurrent.futures import ProcessPoolExecutor except ImportError: workers = 1 - files = _walk_dir(dir, quiet=quiet, maxlevels=maxlevels, - ddir=ddir) + files_and_ddirs = _walk_dir(dir, quiet=quiet, maxlevels=maxlevels, + ddir=ddir) success = True if workers != 1 and ProcessPoolExecutor is not None: # If workers == 0, let ProcessPoolExecutor choose workers = workers or None with ProcessPoolExecutor(max_workers=workers) as executor: - results = executor.map(partial(compile_file, - ddir=ddir, force=force, - rx=rx, quiet=quiet, - legacy=legacy, - optimize=optimize, - invalidation_mode=invalidation_mode), - files) + results = executor.map( + partial(_compile_file_tuple, + force=force, rx=rx, quiet=quiet, + legacy=legacy, optimize=optimize, + invalidation_mode=invalidation_mode, + ), + files_and_ddirs) success = min(results, default=True) else: - for file in files: - if not compile_file(file, ddir, force, rx, quiet, + for file, dfile in files_and_ddirs: + if not compile_file(file, dfile, force, rx, quiet, legacy, optimize, invalidation_mode): success = False return success +def _compile_file_tuple(file_and_dfile, **kwargs): + """Needs to be toplevel for ProcessPoolExecutor.""" + file, dfile = file_and_dfile + return compile_file(file, dfile, **kwargs) + def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, invalidation_mode=None): diff --git a/Lib/test/test_compileall.py b/Lib/test/test_compileall.py index 04f6e1e049d..64f092b53b5 100644 --- a/Lib/test/test_compileall.py +++ b/Lib/test/test_compileall.py @@ -577,6 +577,47 @@ class CommandLineTestsBase: self.assertTrue(compile_dir.called) self.assertEqual(compile_dir.call_args[-1]['workers'], 0) + def _test_ddir_only(self, *, ddir, parallel=True): + """Recursive compile_dir ddir must contain package paths; bpo39769.""" + fullpath = ["test", "foo"] + path = self.directory + mods = [] + for subdir in fullpath: + path = os.path.join(path, subdir) + os.mkdir(path) + script_helper.make_script(path, "__init__", "") + mods.append(script_helper.make_script(path, "mod", + "def fn(): 1/0\nfn()\n")) + compileall.compile_dir( + self.directory, quiet=True, ddir=ddir, + workers=2 if parallel else 1) + self.assertTrue(mods) + for mod in mods: + self.assertTrue(mod.startswith(self.directory), mod) + modcode = importlib.util.cache_from_source(mod) + modpath = mod[len(self.directory+os.sep):] + _, _, err = script_helper.assert_python_failure(modcode) + expected_in = os.path.join(ddir, modpath) + mod_code_obj = test.test_importlib.util._get_code_from_pyc(modcode) + self.assertEqual(mod_code_obj.co_filename, expected_in) + self.assertIn(f'"{expected_in}"', os.fsdecode(err)) + + def test_ddir_only_one_worker(self): + """Recursive compile_dir ddir= contains package paths; bpo39769.""" + return self._test_ddir_only(ddir="", parallel=False) + + def test_ddir_multiple_workers(self): + """Recursive compile_dir ddir= contains package paths; bpo39769.""" + return self._test_ddir_only(ddir="", parallel=True) + + def test_ddir_empty_only_one_worker(self): + """Recursive compile_dir ddir='' contains package paths; bpo39769.""" + return self._test_ddir_only(ddir="", parallel=False) + + def test_ddir_empty_multiple_workers(self): + """Recursive compile_dir ddir='' contains package paths; bpo39769.""" + return self._test_ddir_only(ddir="", parallel=True) + class CommmandLineTestsWithSourceEpoch(CommandLineTestsBase, unittest.TestCase, diff --git a/Lib/test/test_importlib/util.py b/Lib/test/test_importlib/util.py index e016ea49119..e6a1476875b 100644 --- a/Lib/test/test_importlib/util.py +++ b/Lib/test/test_importlib/util.py @@ -7,6 +7,7 @@ import importlib from importlib import machinery, util, invalidate_caches from importlib.abc import ResourceReader import io +import marshal import os import os.path from pathlib import Path, PurePath @@ -118,6 +119,16 @@ def submodule(parent, name, pkg_dir, content=''): return '{}.{}'.format(parent, name), path +def _get_code_from_pyc(pyc_path): + """Reads a pyc file and returns the unmarshalled code object within. + + No header validation is performed. + """ + with open(pyc_path, 'rb') as pyc_f: + pyc_f.seek(16) + return marshal.load(pyc_f) + + @contextlib.contextmanager def uncache(*names): """Uncache a module from sys.modules. diff --git a/Misc/NEWS.d/next/Library/2020-02-29-13-20-33.bpo-39769.hJmxu4.rst b/Misc/NEWS.d/next/Library/2020-02-29-13-20-33.bpo-39769.hJmxu4.rst new file mode 100644 index 00000000000..9b564bd10d3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-02-29-13-20-33.bpo-39769.hJmxu4.rst @@ -0,0 +1,4 @@ +The :func:`compileall.compile_dir` function's *ddir* parameter and the +compileall command line flag `-d` no longer write the wrong pathname to the +generated pyc file for submodules beneath the root of the directory tree +being compiled. This fixes a regression introduced with Python 3.5.