bpo-39769: Fix compileall ddir for subpkgs. (GH-18676) (GH-18718)

Fix compileall.compile_dir() ddir= behavior on sub-packages.

Fixes compileall.compile_dir's ddir parameter and compileall command
line flag `-d` to no longer write the wrong pathname to the generated
pyc file for submodules beneath the root of the directory tree being
compiled.  This fixes a regression introduced with Python 3.5.

Tests backported from GH 02673352b5, the
implementation is different due to intervening code changes.  But still
quiet simple.

Why was the bug ever introduced?  The refactoring to add parallel
execution kept the ddir -> dfile computations but discarded the results
instead of sending them to compile_file().  This fixes that.  Lack of tests
meant this went unnoticed.
This commit is contained in:
Gregory P. Smith 2020-03-01 10:42:56 -08:00 committed by GitHub
parent fec6681f7a
commit ce720d3e06
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 73 additions and 12 deletions

View File

@ -41,7 +41,7 @@ def _walk_dir(dir, ddir=None, maxlevels=10, quiet=0):
else:
dfile = None
if not os.path.isdir(fullname):
yield fullname
yield fullname, ddir
elif (maxlevels > 0 and name != os.curdir and name != os.pardir and
os.path.isdir(fullname) and not os.path.islink(fullname)):
yield from _walk_dir(fullname, ddir=dfile,
@ -76,28 +76,33 @@ def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None,
from concurrent.futures import ProcessPoolExecutor
except ImportError:
workers = 1
files = _walk_dir(dir, quiet=quiet, maxlevels=maxlevels,
ddir=ddir)
files_and_ddirs = _walk_dir(dir, quiet=quiet, maxlevels=maxlevels,
ddir=ddir)
success = True
if workers != 1 and ProcessPoolExecutor is not None:
# If workers == 0, let ProcessPoolExecutor choose
workers = workers or None
with ProcessPoolExecutor(max_workers=workers) as executor:
results = executor.map(partial(compile_file,
ddir=ddir, force=force,
rx=rx, quiet=quiet,
legacy=legacy,
optimize=optimize,
invalidation_mode=invalidation_mode),
files)
results = executor.map(
partial(_compile_file_tuple,
force=force, rx=rx, quiet=quiet,
legacy=legacy, optimize=optimize,
invalidation_mode=invalidation_mode,
),
files_and_ddirs)
success = min(results, default=True)
else:
for file in files:
if not compile_file(file, ddir, force, rx, quiet,
for file, dfile in files_and_ddirs:
if not compile_file(file, dfile, force, rx, quiet,
legacy, optimize, invalidation_mode):
success = False
return success
def _compile_file_tuple(file_and_dfile, **kwargs):
"""Needs to be toplevel for ProcessPoolExecutor."""
file, dfile = file_and_dfile
return compile_file(file, dfile, **kwargs)
def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
legacy=False, optimize=-1,
invalidation_mode=None):

View File

@ -577,6 +577,47 @@ class CommandLineTestsBase:
self.assertTrue(compile_dir.called)
self.assertEqual(compile_dir.call_args[-1]['workers'], 0)
def _test_ddir_only(self, *, ddir, parallel=True):
"""Recursive compile_dir ddir must contain package paths; bpo39769."""
fullpath = ["test", "foo"]
path = self.directory
mods = []
for subdir in fullpath:
path = os.path.join(path, subdir)
os.mkdir(path)
script_helper.make_script(path, "__init__", "")
mods.append(script_helper.make_script(path, "mod",
"def fn(): 1/0\nfn()\n"))
compileall.compile_dir(
self.directory, quiet=True, ddir=ddir,
workers=2 if parallel else 1)
self.assertTrue(mods)
for mod in mods:
self.assertTrue(mod.startswith(self.directory), mod)
modcode = importlib.util.cache_from_source(mod)
modpath = mod[len(self.directory+os.sep):]
_, _, err = script_helper.assert_python_failure(modcode)
expected_in = os.path.join(ddir, modpath)
mod_code_obj = test.test_importlib.util._get_code_from_pyc(modcode)
self.assertEqual(mod_code_obj.co_filename, expected_in)
self.assertIn(f'"{expected_in}"', os.fsdecode(err))
def test_ddir_only_one_worker(self):
"""Recursive compile_dir ddir= contains package paths; bpo39769."""
return self._test_ddir_only(ddir="<a prefix>", parallel=False)
def test_ddir_multiple_workers(self):
"""Recursive compile_dir ddir= contains package paths; bpo39769."""
return self._test_ddir_only(ddir="<a prefix>", parallel=True)
def test_ddir_empty_only_one_worker(self):
"""Recursive compile_dir ddir='' contains package paths; bpo39769."""
return self._test_ddir_only(ddir="", parallel=False)
def test_ddir_empty_multiple_workers(self):
"""Recursive compile_dir ddir='' contains package paths; bpo39769."""
return self._test_ddir_only(ddir="", parallel=True)
class CommmandLineTestsWithSourceEpoch(CommandLineTestsBase,
unittest.TestCase,

View File

@ -7,6 +7,7 @@ import importlib
from importlib import machinery, util, invalidate_caches
from importlib.abc import ResourceReader
import io
import marshal
import os
import os.path
from pathlib import Path, PurePath
@ -118,6 +119,16 @@ def submodule(parent, name, pkg_dir, content=''):
return '{}.{}'.format(parent, name), path
def _get_code_from_pyc(pyc_path):
"""Reads a pyc file and returns the unmarshalled code object within.
No header validation is performed.
"""
with open(pyc_path, 'rb') as pyc_f:
pyc_f.seek(16)
return marshal.load(pyc_f)
@contextlib.contextmanager
def uncache(*names):
"""Uncache a module from sys.modules.

View File

@ -0,0 +1,4 @@
The :func:`compileall.compile_dir` function's *ddir* parameter and the
compileall command line flag `-d` no longer write the wrong pathname to the
generated pyc file for submodules beneath the root of the directory tree
being compiled. This fixes a regression introduced with Python 3.5.