bpo-40495: compileall option to hardlink duplicate pyc files (GH-19901)
compileall is now able to use hardlinks to prevent duplicates in a case when .pyc files for different optimization levels have the same content. Co-authored-by: Miro Hrončok <miro@hroncok.cz> Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
parent
7443d42021
commit
e77d428856
|
@ -113,6 +113,11 @@ compile Python sources.
|
||||||
|
|
||||||
Ignore symlinks pointing outside the given directory.
|
Ignore symlinks pointing outside the given directory.
|
||||||
|
|
||||||
|
.. cmdoption:: --hardlink-dupes
|
||||||
|
|
||||||
|
If two ``.pyc`` files with different optimization level have
|
||||||
|
the same content, use hard links to consolidate duplicate files.
|
||||||
|
|
||||||
.. versionchanged:: 3.2
|
.. versionchanged:: 3.2
|
||||||
Added the ``-i``, ``-b`` and ``-h`` options.
|
Added the ``-i``, ``-b`` and ``-h`` options.
|
||||||
|
|
||||||
|
@ -125,7 +130,7 @@ compile Python sources.
|
||||||
Added the ``--invalidation-mode`` option.
|
Added the ``--invalidation-mode`` option.
|
||||||
|
|
||||||
.. versionchanged:: 3.9
|
.. versionchanged:: 3.9
|
||||||
Added the ``-s``, ``-p``, ``-e`` options.
|
Added the ``-s``, ``-p``, ``-e`` and ``--hardlink-dupes`` options.
|
||||||
Raised the default recursion limit from 10 to
|
Raised the default recursion limit from 10 to
|
||||||
:py:func:`sys.getrecursionlimit()`.
|
:py:func:`sys.getrecursionlimit()`.
|
||||||
Added the possibility to specify the ``-o`` option multiple times.
|
Added the possibility to specify the ``-o`` option multiple times.
|
||||||
|
@ -143,7 +148,7 @@ runtime.
|
||||||
Public functions
|
Public functions
|
||||||
----------------
|
----------------
|
||||||
|
|
||||||
.. function:: compile_dir(dir, maxlevels=sys.getrecursionlimit(), ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, workers=1, invalidation_mode=None, \*, stripdir=None, prependdir=None, limit_sl_dest=None)
|
.. function:: compile_dir(dir, maxlevels=sys.getrecursionlimit(), ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, workers=1, invalidation_mode=None, \*, stripdir=None, prependdir=None, limit_sl_dest=None, hardlink_dupes=False)
|
||||||
|
|
||||||
Recursively descend the directory tree named by *dir*, compiling all :file:`.py`
|
Recursively descend the directory tree named by *dir*, compiling all :file:`.py`
|
||||||
files along the way. Return a true value if all the files compiled successfully,
|
files along the way. Return a true value if all the files compiled successfully,
|
||||||
|
@ -193,6 +198,9 @@ Public functions
|
||||||
the ``-s``, ``-p`` and ``-e`` options described above.
|
the ``-s``, ``-p`` and ``-e`` options described above.
|
||||||
They may be specified as ``str``, ``bytes`` or :py:class:`os.PathLike`.
|
They may be specified as ``str``, ``bytes`` or :py:class:`os.PathLike`.
|
||||||
|
|
||||||
|
If *hardlink_dupes* is true and two ``.pyc`` files with different optimization
|
||||||
|
level have the same content, use hard links to consolidate duplicate files.
|
||||||
|
|
||||||
.. versionchanged:: 3.2
|
.. versionchanged:: 3.2
|
||||||
Added the *legacy* and *optimize* parameter.
|
Added the *legacy* and *optimize* parameter.
|
||||||
|
|
||||||
|
@ -219,9 +227,9 @@ Public functions
|
||||||
Setting *workers* to 0 now chooses the optimal number of cores.
|
Setting *workers* to 0 now chooses the optimal number of cores.
|
||||||
|
|
||||||
.. versionchanged:: 3.9
|
.. versionchanged:: 3.9
|
||||||
Added *stripdir*, *prependdir* and *limit_sl_dest* arguments.
|
Added *stripdir*, *prependdir*, *limit_sl_dest* and *hardlink_dupes* arguments.
|
||||||
|
|
||||||
.. function:: compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, invalidation_mode=None, \*, stripdir=None, prependdir=None, limit_sl_dest=None)
|
.. function:: compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, invalidation_mode=None, \*, stripdir=None, prependdir=None, limit_sl_dest=None, hardlink_dupes=False)
|
||||||
|
|
||||||
Compile the file with path *fullname*. Return a true value if the file
|
Compile the file with path *fullname*. Return a true value if the file
|
||||||
compiled successfully, and a false value otherwise.
|
compiled successfully, and a false value otherwise.
|
||||||
|
@ -257,6 +265,9 @@ Public functions
|
||||||
the ``-s``, ``-p`` and ``-e`` options described above.
|
the ``-s``, ``-p`` and ``-e`` options described above.
|
||||||
They may be specified as ``str``, ``bytes`` or :py:class:`os.PathLike`.
|
They may be specified as ``str``, ``bytes`` or :py:class:`os.PathLike`.
|
||||||
|
|
||||||
|
If *hardlink_dupes* is true and two ``.pyc`` files with different optimization
|
||||||
|
level have the same content, use hard links to consolidate duplicate files.
|
||||||
|
|
||||||
.. versionadded:: 3.2
|
.. versionadded:: 3.2
|
||||||
|
|
||||||
.. versionchanged:: 3.5
|
.. versionchanged:: 3.5
|
||||||
|
@ -273,7 +284,7 @@ Public functions
|
||||||
The *invalidation_mode* parameter's default value is updated to None.
|
The *invalidation_mode* parameter's default value is updated to None.
|
||||||
|
|
||||||
.. versionchanged:: 3.9
|
.. versionchanged:: 3.9
|
||||||
Added *stripdir*, *prependdir* and *limit_sl_dest* arguments.
|
Added *stripdir*, *prependdir*, *limit_sl_dest* and *hardlink_dupes* arguments.
|
||||||
|
|
||||||
.. function:: compile_path(skip_curdir=True, maxlevels=0, force=False, quiet=0, legacy=False, optimize=-1, invalidation_mode=None)
|
.. function:: compile_path(skip_curdir=True, maxlevels=0, force=False, quiet=0, legacy=False, optimize=-1, invalidation_mode=None)
|
||||||
|
|
||||||
|
|
|
@ -245,6 +245,16 @@ that schedules a shutdown for the default executor that waits on the
|
||||||
Added :class:`asyncio.PidfdChildWatcher`, a Linux-specific child watcher
|
Added :class:`asyncio.PidfdChildWatcher`, a Linux-specific child watcher
|
||||||
implementation that polls process file descriptors. (:issue:`38692`)
|
implementation that polls process file descriptors. (:issue:`38692`)
|
||||||
|
|
||||||
|
compileall
|
||||||
|
----------
|
||||||
|
|
||||||
|
Added new possibility to use hardlinks for duplicated ``.pyc`` files: *hardlink_dupes* parameter and --hardlink-dupes command line option.
|
||||||
|
(Contributed by Lumír 'Frenzy' Balhar in :issue:`40495`.)
|
||||||
|
|
||||||
|
Added new options for path manipulation in resulting ``.pyc`` files: *stripdir*, *prependdir*, *limit_sl_dest* parameters and -s, -p, -e command line options.
|
||||||
|
Added the possibility to specify the option for an optimization level multiple times.
|
||||||
|
(Contributed by Lumír 'Frenzy' Balhar in :issue:`38112`.)
|
||||||
|
|
||||||
concurrent.futures
|
concurrent.futures
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
|
|
|
@ -15,6 +15,7 @@ import sys
|
||||||
import importlib.util
|
import importlib.util
|
||||||
import py_compile
|
import py_compile
|
||||||
import struct
|
import struct
|
||||||
|
import filecmp
|
||||||
|
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
@ -47,7 +48,7 @@ def _walk_dir(dir, maxlevels, quiet=0):
|
||||||
def compile_dir(dir, maxlevels=None, ddir=None, force=False,
|
def compile_dir(dir, maxlevels=None, ddir=None, force=False,
|
||||||
rx=None, quiet=0, legacy=False, optimize=-1, workers=1,
|
rx=None, quiet=0, legacy=False, optimize=-1, workers=1,
|
||||||
invalidation_mode=None, *, stripdir=None,
|
invalidation_mode=None, *, stripdir=None,
|
||||||
prependdir=None, limit_sl_dest=None):
|
prependdir=None, limit_sl_dest=None, hardlink_dupes=False):
|
||||||
"""Byte-compile all modules in the given directory tree.
|
"""Byte-compile all modules in the given directory tree.
|
||||||
|
|
||||||
Arguments (only dir is required):
|
Arguments (only dir is required):
|
||||||
|
@ -70,6 +71,7 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False,
|
||||||
after stripdir
|
after stripdir
|
||||||
limit_sl_dest: ignore symlinks if they are pointing outside of
|
limit_sl_dest: ignore symlinks if they are pointing outside of
|
||||||
the defined path
|
the defined path
|
||||||
|
hardlink_dupes: hardlink duplicated pyc files
|
||||||
"""
|
"""
|
||||||
ProcessPoolExecutor = None
|
ProcessPoolExecutor = None
|
||||||
if ddir is not None and (stripdir is not None or prependdir is not None):
|
if ddir is not None and (stripdir is not None or prependdir is not None):
|
||||||
|
@ -104,7 +106,8 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False,
|
||||||
invalidation_mode=invalidation_mode,
|
invalidation_mode=invalidation_mode,
|
||||||
stripdir=stripdir,
|
stripdir=stripdir,
|
||||||
prependdir=prependdir,
|
prependdir=prependdir,
|
||||||
limit_sl_dest=limit_sl_dest),
|
limit_sl_dest=limit_sl_dest,
|
||||||
|
hardlink_dupes=hardlink_dupes),
|
||||||
files)
|
files)
|
||||||
success = min(results, default=True)
|
success = min(results, default=True)
|
||||||
else:
|
else:
|
||||||
|
@ -112,14 +115,15 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False,
|
||||||
if not compile_file(file, ddir, force, rx, quiet,
|
if not compile_file(file, ddir, force, rx, quiet,
|
||||||
legacy, optimize, invalidation_mode,
|
legacy, optimize, invalidation_mode,
|
||||||
stripdir=stripdir, prependdir=prependdir,
|
stripdir=stripdir, prependdir=prependdir,
|
||||||
limit_sl_dest=limit_sl_dest):
|
limit_sl_dest=limit_sl_dest,
|
||||||
|
hardlink_dupes=hardlink_dupes):
|
||||||
success = False
|
success = False
|
||||||
return success
|
return success
|
||||||
|
|
||||||
def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
|
def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
|
||||||
legacy=False, optimize=-1,
|
legacy=False, optimize=-1,
|
||||||
invalidation_mode=None, *, stripdir=None, prependdir=None,
|
invalidation_mode=None, *, stripdir=None, prependdir=None,
|
||||||
limit_sl_dest=None):
|
limit_sl_dest=None, hardlink_dupes=False):
|
||||||
"""Byte-compile one file.
|
"""Byte-compile one file.
|
||||||
|
|
||||||
Arguments (only fullname is required):
|
Arguments (only fullname is required):
|
||||||
|
@ -140,6 +144,7 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
|
||||||
after stripdir
|
after stripdir
|
||||||
limit_sl_dest: ignore symlinks if they are pointing outside of
|
limit_sl_dest: ignore symlinks if they are pointing outside of
|
||||||
the defined path.
|
the defined path.
|
||||||
|
hardlink_dupes: hardlink duplicated pyc files
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if ddir is not None and (stripdir is not None or prependdir is not None):
|
if ddir is not None and (stripdir is not None or prependdir is not None):
|
||||||
|
@ -176,6 +181,14 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
|
||||||
if isinstance(optimize, int):
|
if isinstance(optimize, int):
|
||||||
optimize = [optimize]
|
optimize = [optimize]
|
||||||
|
|
||||||
|
# Use set() to remove duplicates.
|
||||||
|
# Use sorted() to create pyc files in a deterministic order.
|
||||||
|
optimize = sorted(set(optimize))
|
||||||
|
|
||||||
|
if hardlink_dupes and len(optimize) < 2:
|
||||||
|
raise ValueError("Hardlinking of duplicated bytecode makes sense "
|
||||||
|
"only for more than one optimization level")
|
||||||
|
|
||||||
if rx is not None:
|
if rx is not None:
|
||||||
mo = rx.search(fullname)
|
mo = rx.search(fullname)
|
||||||
if mo:
|
if mo:
|
||||||
|
@ -220,10 +233,16 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
|
||||||
if not quiet:
|
if not quiet:
|
||||||
print('Compiling {!r}...'.format(fullname))
|
print('Compiling {!r}...'.format(fullname))
|
||||||
try:
|
try:
|
||||||
for opt_level, cfile in opt_cfiles.items():
|
for index, opt_level in enumerate(optimize):
|
||||||
|
cfile = opt_cfiles[opt_level]
|
||||||
ok = py_compile.compile(fullname, cfile, dfile, True,
|
ok = py_compile.compile(fullname, cfile, dfile, True,
|
||||||
optimize=opt_level,
|
optimize=opt_level,
|
||||||
invalidation_mode=invalidation_mode)
|
invalidation_mode=invalidation_mode)
|
||||||
|
if index > 0 and hardlink_dupes:
|
||||||
|
previous_cfile = opt_cfiles[optimize[index - 1]]
|
||||||
|
if filecmp.cmp(cfile, previous_cfile, shallow=False):
|
||||||
|
os.unlink(cfile)
|
||||||
|
os.link(previous_cfile, cfile)
|
||||||
except py_compile.PyCompileError as err:
|
except py_compile.PyCompileError as err:
|
||||||
success = False
|
success = False
|
||||||
if quiet >= 2:
|
if quiet >= 2:
|
||||||
|
@ -352,6 +371,9 @@ def main():
|
||||||
'Python interpreter itself (specified by -O).'))
|
'Python interpreter itself (specified by -O).'))
|
||||||
parser.add_argument('-e', metavar='DIR', dest='limit_sl_dest',
|
parser.add_argument('-e', metavar='DIR', dest='limit_sl_dest',
|
||||||
help='Ignore symlinks pointing outsite of the DIR')
|
help='Ignore symlinks pointing outsite of the DIR')
|
||||||
|
parser.add_argument('--hardlink-dupes', action='store_true',
|
||||||
|
dest='hardlink_dupes',
|
||||||
|
help='Hardlink duplicated pyc files')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
compile_dests = args.compile_dest
|
compile_dests = args.compile_dest
|
||||||
|
@ -371,6 +393,10 @@ def main():
|
||||||
if args.opt_levels is None:
|
if args.opt_levels is None:
|
||||||
args.opt_levels = [-1]
|
args.opt_levels = [-1]
|
||||||
|
|
||||||
|
if len(args.opt_levels) == 1 and args.hardlink_dupes:
|
||||||
|
parser.error(("Hardlinking of duplicated bytecode makes sense "
|
||||||
|
"only for more than one optimization level."))
|
||||||
|
|
||||||
if args.ddir is not None and (
|
if args.ddir is not None and (
|
||||||
args.stripdir is not None or args.prependdir is not None
|
args.stripdir is not None or args.prependdir is not None
|
||||||
):
|
):
|
||||||
|
@ -404,7 +430,8 @@ def main():
|
||||||
stripdir=args.stripdir,
|
stripdir=args.stripdir,
|
||||||
prependdir=args.prependdir,
|
prependdir=args.prependdir,
|
||||||
optimize=args.opt_levels,
|
optimize=args.opt_levels,
|
||||||
limit_sl_dest=args.limit_sl_dest):
|
limit_sl_dest=args.limit_sl_dest,
|
||||||
|
hardlink_dupes=args.hardlink_dupes):
|
||||||
success = False
|
success = False
|
||||||
else:
|
else:
|
||||||
if not compile_dir(dest, maxlevels, args.ddir,
|
if not compile_dir(dest, maxlevels, args.ddir,
|
||||||
|
@ -414,7 +441,8 @@ def main():
|
||||||
stripdir=args.stripdir,
|
stripdir=args.stripdir,
|
||||||
prependdir=args.prependdir,
|
prependdir=args.prependdir,
|
||||||
optimize=args.opt_levels,
|
optimize=args.opt_levels,
|
||||||
limit_sl_dest=args.limit_sl_dest):
|
limit_sl_dest=args.limit_sl_dest,
|
||||||
|
hardlink_dupes=args.hardlink_dupes):
|
||||||
success = False
|
success = False
|
||||||
return success
|
return success
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -1,16 +1,19 @@
|
||||||
import sys
|
|
||||||
import compileall
|
import compileall
|
||||||
|
import contextlib
|
||||||
|
import filecmp
|
||||||
import importlib.util
|
import importlib.util
|
||||||
import test.test_importlib.util
|
import io
|
||||||
|
import itertools
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
import py_compile
|
import py_compile
|
||||||
import shutil
|
import shutil
|
||||||
import struct
|
import struct
|
||||||
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import test.test_importlib.util
|
||||||
import time
|
import time
|
||||||
import unittest
|
import unittest
|
||||||
import io
|
|
||||||
|
|
||||||
from unittest import mock, skipUnless
|
from unittest import mock, skipUnless
|
||||||
try:
|
try:
|
||||||
|
@ -26,6 +29,24 @@ from .test_py_compile import without_source_date_epoch
|
||||||
from .test_py_compile import SourceDateEpochTestMeta
|
from .test_py_compile import SourceDateEpochTestMeta
|
||||||
|
|
||||||
|
|
||||||
|
def get_pyc(script, opt):
|
||||||
|
if not opt:
|
||||||
|
# Replace None and 0 with ''
|
||||||
|
opt = ''
|
||||||
|
return importlib.util.cache_from_source(script, optimization=opt)
|
||||||
|
|
||||||
|
|
||||||
|
def get_pycs(script):
|
||||||
|
return [get_pyc(script, opt) for opt in (0, 1, 2)]
|
||||||
|
|
||||||
|
|
||||||
|
def is_hardlink(filename1, filename2):
|
||||||
|
"""Returns True if two files have the same inode (hardlink)"""
|
||||||
|
inode1 = os.stat(filename1).st_ino
|
||||||
|
inode2 = os.stat(filename2).st_ino
|
||||||
|
return inode1 == inode2
|
||||||
|
|
||||||
|
|
||||||
class CompileallTestsBase:
|
class CompileallTestsBase:
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
@ -825,6 +846,32 @@ class CommandLineTestsBase:
|
||||||
self.assertTrue(os.path.isfile(allowed_bc))
|
self.assertTrue(os.path.isfile(allowed_bc))
|
||||||
self.assertFalse(os.path.isfile(prohibited_bc))
|
self.assertFalse(os.path.isfile(prohibited_bc))
|
||||||
|
|
||||||
|
def test_hardlink_bad_args(self):
|
||||||
|
# Bad arguments combination, hardlink deduplication make sense
|
||||||
|
# only for more than one optimization level
|
||||||
|
self.assertRunNotOK(self.directory, "-o 1", "--hardlink-dupes")
|
||||||
|
|
||||||
|
def test_hardlink(self):
|
||||||
|
# 'a = 0' code produces the same bytecode for the 3 optimization
|
||||||
|
# levels. All three .pyc files must have the same inode (hardlinks).
|
||||||
|
#
|
||||||
|
# If deduplication is disabled, all pyc files must have different
|
||||||
|
# inodes.
|
||||||
|
for dedup in (True, False):
|
||||||
|
with tempfile.TemporaryDirectory() as path:
|
||||||
|
with self.subTest(dedup=dedup):
|
||||||
|
script = script_helper.make_script(path, "script", "a = 0")
|
||||||
|
pycs = get_pycs(script)
|
||||||
|
|
||||||
|
args = ["-q", "-o 0", "-o 1", "-o 2"]
|
||||||
|
if dedup:
|
||||||
|
args.append("--hardlink-dupes")
|
||||||
|
self.assertRunOK(path, *args)
|
||||||
|
|
||||||
|
self.assertEqual(is_hardlink(pycs[0], pycs[1]), dedup)
|
||||||
|
self.assertEqual(is_hardlink(pycs[1], pycs[2]), dedup)
|
||||||
|
self.assertEqual(is_hardlink(pycs[0], pycs[2]), dedup)
|
||||||
|
|
||||||
|
|
||||||
class CommandLineTestsWithSourceEpoch(CommandLineTestsBase,
|
class CommandLineTestsWithSourceEpoch(CommandLineTestsBase,
|
||||||
unittest.TestCase,
|
unittest.TestCase,
|
||||||
|
@ -841,5 +888,176 @@ class CommandLineTestsNoSourceEpoch(CommandLineTestsBase,
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class HardlinkDedupTestsBase:
|
||||||
|
# Test hardlink_dupes parameter of compileall.compile_dir()
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.path = None
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def temporary_directory(self):
|
||||||
|
with tempfile.TemporaryDirectory() as path:
|
||||||
|
self.path = path
|
||||||
|
yield path
|
||||||
|
self.path = None
|
||||||
|
|
||||||
|
def make_script(self, code, name="script"):
|
||||||
|
return script_helper.make_script(self.path, name, code)
|
||||||
|
|
||||||
|
def compile_dir(self, *, dedup=True, optimize=(0, 1, 2), force=False):
|
||||||
|
compileall.compile_dir(self.path, quiet=True, optimize=optimize,
|
||||||
|
hardlink_dupes=dedup, force=force)
|
||||||
|
|
||||||
|
def test_bad_args(self):
|
||||||
|
# Bad arguments combination, hardlink deduplication make sense
|
||||||
|
# only for more than one optimization level
|
||||||
|
with self.temporary_directory():
|
||||||
|
self.make_script("pass")
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
compileall.compile_dir(self.path, quiet=True, optimize=0,
|
||||||
|
hardlink_dupes=True)
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
# same optimization level specified twice:
|
||||||
|
# compile_dir() removes duplicates
|
||||||
|
compileall.compile_dir(self.path, quiet=True, optimize=[0, 0],
|
||||||
|
hardlink_dupes=True)
|
||||||
|
|
||||||
|
def create_code(self, docstring=False, assertion=False):
|
||||||
|
lines = []
|
||||||
|
if docstring:
|
||||||
|
lines.append("'module docstring'")
|
||||||
|
lines.append('x = 1')
|
||||||
|
if assertion:
|
||||||
|
lines.append("assert x == 1")
|
||||||
|
return '\n'.join(lines)
|
||||||
|
|
||||||
|
def iter_codes(self):
|
||||||
|
for docstring in (False, True):
|
||||||
|
for assertion in (False, True):
|
||||||
|
code = self.create_code(docstring=docstring, assertion=assertion)
|
||||||
|
yield (code, docstring, assertion)
|
||||||
|
|
||||||
|
def test_disabled(self):
|
||||||
|
# Deduplication disabled, no hardlinks
|
||||||
|
for code, docstring, assertion in self.iter_codes():
|
||||||
|
with self.subTest(docstring=docstring, assertion=assertion):
|
||||||
|
with self.temporary_directory():
|
||||||
|
script = self.make_script(code)
|
||||||
|
pycs = get_pycs(script)
|
||||||
|
self.compile_dir(dedup=False)
|
||||||
|
self.assertFalse(is_hardlink(pycs[0], pycs[1]))
|
||||||
|
self.assertFalse(is_hardlink(pycs[0], pycs[2]))
|
||||||
|
self.assertFalse(is_hardlink(pycs[1], pycs[2]))
|
||||||
|
|
||||||
|
def check_hardlinks(self, script, docstring=False, assertion=False):
|
||||||
|
pycs = get_pycs(script)
|
||||||
|
self.assertEqual(is_hardlink(pycs[0], pycs[1]),
|
||||||
|
not assertion)
|
||||||
|
self.assertEqual(is_hardlink(pycs[0], pycs[2]),
|
||||||
|
not assertion and not docstring)
|
||||||
|
self.assertEqual(is_hardlink(pycs[1], pycs[2]),
|
||||||
|
not docstring)
|
||||||
|
|
||||||
|
def test_hardlink(self):
|
||||||
|
# Test deduplication on all combinations
|
||||||
|
for code, docstring, assertion in self.iter_codes():
|
||||||
|
with self.subTest(docstring=docstring, assertion=assertion):
|
||||||
|
with self.temporary_directory():
|
||||||
|
script = self.make_script(code)
|
||||||
|
self.compile_dir()
|
||||||
|
self.check_hardlinks(script, docstring, assertion)
|
||||||
|
|
||||||
|
def test_only_two_levels(self):
|
||||||
|
# Don't build the 3 optimization levels, but only 2
|
||||||
|
for opts in ((0, 1), (1, 2), (0, 2)):
|
||||||
|
with self.subTest(opts=opts):
|
||||||
|
with self.temporary_directory():
|
||||||
|
# code with no dostring and no assertion:
|
||||||
|
# same bytecode for all optimization levels
|
||||||
|
script = self.make_script(self.create_code())
|
||||||
|
self.compile_dir(optimize=opts)
|
||||||
|
pyc1 = get_pyc(script, opts[0])
|
||||||
|
pyc2 = get_pyc(script, opts[1])
|
||||||
|
self.assertTrue(is_hardlink(pyc1, pyc2))
|
||||||
|
|
||||||
|
def test_duplicated_levels(self):
|
||||||
|
# compile_dir() must not fail if optimize contains duplicated
|
||||||
|
# optimization levels and/or if optimization levels are not sorted.
|
||||||
|
with self.temporary_directory():
|
||||||
|
# code with no dostring and no assertion:
|
||||||
|
# same bytecode for all optimization levels
|
||||||
|
script = self.make_script(self.create_code())
|
||||||
|
self.compile_dir(optimize=[1, 0, 1, 0])
|
||||||
|
pyc1 = get_pyc(script, 0)
|
||||||
|
pyc2 = get_pyc(script, 1)
|
||||||
|
self.assertTrue(is_hardlink(pyc1, pyc2))
|
||||||
|
|
||||||
|
def test_recompilation(self):
|
||||||
|
# Test compile_dir() when pyc files already exists and the script
|
||||||
|
# content changed
|
||||||
|
with self.temporary_directory():
|
||||||
|
script = self.make_script("a = 0")
|
||||||
|
self.compile_dir()
|
||||||
|
# All three levels have the same inode
|
||||||
|
self.check_hardlinks(script)
|
||||||
|
|
||||||
|
pycs = get_pycs(script)
|
||||||
|
inode = os.stat(pycs[0]).st_ino
|
||||||
|
|
||||||
|
# Change of the module content
|
||||||
|
script = self.make_script("print(0)")
|
||||||
|
|
||||||
|
# Recompilation without -o 1
|
||||||
|
self.compile_dir(optimize=[0, 2], force=True)
|
||||||
|
|
||||||
|
# opt-1.pyc should have the same inode as before and others should not
|
||||||
|
self.assertEqual(inode, os.stat(pycs[1]).st_ino)
|
||||||
|
self.assertTrue(is_hardlink(pycs[0], pycs[2]))
|
||||||
|
self.assertNotEqual(inode, os.stat(pycs[2]).st_ino)
|
||||||
|
# opt-1.pyc and opt-2.pyc have different content
|
||||||
|
self.assertFalse(filecmp.cmp(pycs[1], pycs[2], shallow=True))
|
||||||
|
|
||||||
|
def test_import(self):
|
||||||
|
# Test that import updates a single pyc file when pyc files already
|
||||||
|
# exists and the script content changed
|
||||||
|
with self.temporary_directory():
|
||||||
|
script = self.make_script(self.create_code(), name="module")
|
||||||
|
self.compile_dir()
|
||||||
|
# All three levels have the same inode
|
||||||
|
self.check_hardlinks(script)
|
||||||
|
|
||||||
|
pycs = get_pycs(script)
|
||||||
|
inode = os.stat(pycs[0]).st_ino
|
||||||
|
|
||||||
|
# Change of the module content
|
||||||
|
script = self.make_script("print(0)", name="module")
|
||||||
|
|
||||||
|
# Import the module in Python with -O (optimization level 1)
|
||||||
|
script_helper.assert_python_ok(
|
||||||
|
"-O", "-c", "import module", __isolated=False, PYTHONPATH=self.path
|
||||||
|
)
|
||||||
|
|
||||||
|
# Only opt-1.pyc is changed
|
||||||
|
self.assertEqual(inode, os.stat(pycs[0]).st_ino)
|
||||||
|
self.assertEqual(inode, os.stat(pycs[2]).st_ino)
|
||||||
|
self.assertFalse(is_hardlink(pycs[1], pycs[2]))
|
||||||
|
# opt-1.pyc and opt-2.pyc have different content
|
||||||
|
self.assertFalse(filecmp.cmp(pycs[1], pycs[2], shallow=True))
|
||||||
|
|
||||||
|
|
||||||
|
class HardlinkDedupTestsWithSourceEpoch(HardlinkDedupTestsBase,
|
||||||
|
unittest.TestCase,
|
||||||
|
metaclass=SourceDateEpochTestMeta,
|
||||||
|
source_date_epoch=True):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class HardlinkDedupTestsNoSourceEpoch(HardlinkDedupTestsBase,
|
||||||
|
unittest.TestCase,
|
||||||
|
metaclass=SourceDateEpochTestMeta,
|
||||||
|
source_date_epoch=False):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -86,6 +86,7 @@ Marcin Bachry
|
||||||
Alfonso Baciero
|
Alfonso Baciero
|
||||||
Dwayne Bailey
|
Dwayne Bailey
|
||||||
Stig Bakken
|
Stig Bakken
|
||||||
|
Lumír Balhar
|
||||||
Aleksandr Balezin
|
Aleksandr Balezin
|
||||||
Greg Ball
|
Greg Ball
|
||||||
Lewis Ball
|
Lewis Ball
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
:mod:`compileall` is now able to use hardlinks to prevent duplicates in a
|
||||||
|
case when ``.pyc`` files for different optimization levels have the same content.
|
Loading…
Reference in New Issue