bpo-34022: Stop forcing of hash-based invalidation with SOURCE_DATE_EPOCH (GH-9607)

Unconditional forcing of ``CHECKED_HASH`` invalidation was introduced in
3.7.0 in bpo-29708.  The change is bad, as it unconditionally overrides
*invalidation_mode*, even if it was passed as an explicit argument to
``py_compile.compile()`` or ``compileall``.  An environment variable
should *never* override an explicit argument to a library function.
That change leads to multiple test failures if the ``SOURCE_DATE_EPOCH``
environment variable is set.

This changes ``py_compile.compile()`` to only look at
``SOURCE_DATE_EPOCH`` if no explicit *invalidation_mode* was specified.
I also made various relevant tests run with explicit control over the
value of ``SOURCE_DATE_EPOCH``.

While looking at this, I noticed that ``zipimport`` does not work
with hash-based .pycs _at all_, though I left the fixes for
subsequent commits.
This commit is contained in:
Elvis Pranskevichus 2018-10-10 12:43:14 -04:00 committed by Victor Stinner
parent 7e18deef65
commit a6b3ec5b6d
8 changed files with 161 additions and 30 deletions

View File

@ -85,13 +85,16 @@ compile Python sources.
.. cmdoption:: --invalidation-mode [timestamp|checked-hash|unchecked-hash] .. cmdoption:: --invalidation-mode [timestamp|checked-hash|unchecked-hash]
Control how the generated pycs will be invalidated at runtime. The default Control how the generated byte-code files are invalidated at runtime.
setting, ``timestamp``, means that ``.pyc`` files with the source timestamp The ``timestamp`` value, means that ``.pyc`` files with the source timestamp
and size embedded will be generated. The ``checked-hash`` and and size embedded will be generated. The ``checked-hash`` and
``unchecked-hash`` values cause hash-based pycs to be generated. Hash-based ``unchecked-hash`` values cause hash-based pycs to be generated. Hash-based
pycs embed a hash of the source file contents rather than a timestamp. See pycs embed a hash of the source file contents rather than a timestamp. See
:ref:`pyc-invalidation` for more information on how Python validates bytecode :ref:`pyc-invalidation` for more information on how Python validates
cache files at runtime. bytecode cache files at runtime.
The default is ``timestamp`` if the :envvar:`SOURCE_DATE_EPOCH` environment
variable is not set, and ``checked-hash`` if the ``SOURCE_DATE_EPOCH``
environment variable is set.
.. versionchanged:: 3.2 .. versionchanged:: 3.2
Added the ``-i``, ``-b`` and ``-h`` options. Added the ``-i``, ``-b`` and ``-h`` options.

View File

@ -54,10 +54,10 @@ byte-code cache files in the directory containing the source code.
level of the current interpreter. level of the current interpreter.
*invalidation_mode* should be a member of the :class:`PycInvalidationMode` *invalidation_mode* should be a member of the :class:`PycInvalidationMode`
enum and controls how the generated ``.pyc`` files are invalidated at enum and controls how the generated bytecode cache is invalidated at
runtime. If the :envvar:`SOURCE_DATE_EPOCH` environment variable is set, runtime. The default is :attr:`PycInvalidationMode.CHECKED_HASH` if
*invalidation_mode* will be forced to the :envvar:`SOURCE_DATE_EPOCH` environment variable is set, otherwise
:attr:`PycInvalidationMode.CHECKED_HASH`. the default is :attr:`PycInvalidationMode.TIMESTAMP`.
.. versionchanged:: 3.2 .. versionchanged:: 3.2
Changed default value of *cfile* to be :PEP:`3147`-compliant. Previous Changed default value of *cfile* to be :PEP:`3147`-compliant. Previous
@ -77,6 +77,11 @@ byte-code cache files in the directory containing the source code.
*invalidation_mode* will be forced to *invalidation_mode* will be forced to
:attr:`PycInvalidationMode.CHECKED_HASH`. :attr:`PycInvalidationMode.CHECKED_HASH`.
.. versionchanged:: 3.7.2
The :envvar:`SOURCE_DATE_EPOCH` environment variable no longer
overrides the value of the *invalidation_mode* argument, and determines
its default value instead.
.. class:: PycInvalidationMode .. class:: PycInvalidationMode

View File

@ -53,7 +53,7 @@ def _walk_dir(dir, ddir=None, maxlevels=10, quiet=0):
def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None, def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None,
quiet=0, legacy=False, optimize=-1, workers=1, quiet=0, legacy=False, optimize=-1, workers=1,
invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP): invalidation_mode=None):
"""Byte-compile all modules in the given directory tree. """Byte-compile all modules in the given directory tree.
Arguments (only dir is required): Arguments (only dir is required):
@ -96,7 +96,7 @@ def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None,
def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
legacy=False, optimize=-1, legacy=False, optimize=-1,
invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP): invalidation_mode=None):
"""Byte-compile one file. """Byte-compile one file.
Arguments (only fullname is required): Arguments (only fullname is required):
@ -182,7 +182,7 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
def compile_path(skip_curdir=1, maxlevels=0, force=False, quiet=0, def compile_path(skip_curdir=1, maxlevels=0, force=False, quiet=0,
legacy=False, optimize=-1, legacy=False, optimize=-1,
invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP): invalidation_mode=None):
"""Byte-compile all module on sys.path. """Byte-compile all module on sys.path.
Arguments (all optional): Arguments (all optional):
@ -255,9 +255,12 @@ def main():
type=int, help='Run compileall concurrently') type=int, help='Run compileall concurrently')
invalidation_modes = [mode.name.lower().replace('_', '-') invalidation_modes = [mode.name.lower().replace('_', '-')
for mode in py_compile.PycInvalidationMode] for mode in py_compile.PycInvalidationMode]
parser.add_argument('--invalidation-mode', default='timestamp', parser.add_argument('--invalidation-mode',
choices=sorted(invalidation_modes), choices=sorted(invalidation_modes),
help='How the pycs will be invalidated at runtime') help=('set .pyc invalidation mode; defaults to '
'"checked-hash" if the SOURCE_DATE_EPOCH '
'environment variable is set, and '
'"timestamp" otherwise.'))
args = parser.parse_args() args = parser.parse_args()
compile_dests = args.compile_dest compile_dests = args.compile_dest
@ -286,8 +289,11 @@ def main():
if args.workers is not None: if args.workers is not None:
args.workers = args.workers or None args.workers = args.workers or None
ivl_mode = args.invalidation_mode.replace('-', '_').upper() if args.invalidation_mode:
invalidation_mode = py_compile.PycInvalidationMode[ivl_mode] ivl_mode = args.invalidation_mode.replace('-', '_').upper()
invalidation_mode = py_compile.PycInvalidationMode[ivl_mode]
else:
invalidation_mode = None
success = True success = True
try: try:

View File

@ -69,8 +69,15 @@ class PycInvalidationMode(enum.Enum):
UNCHECKED_HASH = 3 UNCHECKED_HASH = 3
def _get_default_invalidation_mode():
if os.environ.get('SOURCE_DATE_EPOCH'):
return PycInvalidationMode.CHECKED_HASH
else:
return PycInvalidationMode.TIMESTAMP
def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1, def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1,
invalidation_mode=PycInvalidationMode.TIMESTAMP): invalidation_mode=None):
"""Byte-compile one Python source file to Python bytecode. """Byte-compile one Python source file to Python bytecode.
:param file: The source file name. :param file: The source file name.
@ -112,8 +119,8 @@ def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1,
the resulting file would be regular and thus not the same type of file as the resulting file would be regular and thus not the same type of file as
it was previously. it was previously.
""" """
if os.environ.get('SOURCE_DATE_EPOCH'): if invalidation_mode is None:
invalidation_mode = PycInvalidationMode.CHECKED_HASH invalidation_mode = _get_default_invalidation_mode()
if cfile is None: if cfile is None:
if optimize >= 0: if optimize >= 0:
optimization = optimize if optimize >= 1 else '' optimization = optimize if optimize >= 1 else ''

View File

@ -22,7 +22,11 @@ except ImportError:
from test import support from test import support
from test.support import script_helper from test.support import script_helper
class CompileallTests(unittest.TestCase): from .test_py_compile import without_source_date_epoch
from .test_py_compile import SourceDateEpochTestMeta
class CompileallTestsBase:
def setUp(self): def setUp(self):
self.directory = tempfile.mkdtemp() self.directory = tempfile.mkdtemp()
@ -46,7 +50,7 @@ class CompileallTests(unittest.TestCase):
with open(self.bad_source_path, 'w') as file: with open(self.bad_source_path, 'w') as file:
file.write('x (\n') file.write('x (\n')
def data(self): def timestamp_metadata(self):
with open(self.bc_path, 'rb') as file: with open(self.bc_path, 'rb') as file:
data = file.read(12) data = file.read(12)
mtime = int(os.stat(self.source_path).st_mtime) mtime = int(os.stat(self.source_path).st_mtime)
@ -57,16 +61,18 @@ class CompileallTests(unittest.TestCase):
def recreation_check(self, metadata): def recreation_check(self, metadata):
"""Check that compileall recreates bytecode when the new metadata is """Check that compileall recreates bytecode when the new metadata is
used.""" used."""
if os.environ.get('SOURCE_DATE_EPOCH'):
raise unittest.SkipTest('SOURCE_DATE_EPOCH is set')
py_compile.compile(self.source_path) py_compile.compile(self.source_path)
self.assertEqual(*self.data()) self.assertEqual(*self.timestamp_metadata())
with open(self.bc_path, 'rb') as file: with open(self.bc_path, 'rb') as file:
bc = file.read()[len(metadata):] bc = file.read()[len(metadata):]
with open(self.bc_path, 'wb') as file: with open(self.bc_path, 'wb') as file:
file.write(metadata) file.write(metadata)
file.write(bc) file.write(bc)
self.assertNotEqual(*self.data()) self.assertNotEqual(*self.timestamp_metadata())
compileall.compile_dir(self.directory, force=False, quiet=True) compileall.compile_dir(self.directory, force=False, quiet=True)
self.assertTrue(*self.data()) self.assertTrue(*self.timestamp_metadata())
def test_mtime(self): def test_mtime(self):
# Test a change in mtime leads to a new .pyc. # Test a change in mtime leads to a new .pyc.
@ -189,6 +195,21 @@ class CompileallTests(unittest.TestCase):
compileall.compile_dir(self.directory, quiet=True, workers=5) compileall.compile_dir(self.directory, quiet=True, workers=5)
self.assertTrue(compile_file_mock.called) self.assertTrue(compile_file_mock.called)
class CompileallTestsWithSourceEpoch(CompileallTestsBase,
unittest.TestCase,
metaclass=SourceDateEpochTestMeta,
source_date_epoch=True):
pass
class CompileallTestsWithoutSourceEpoch(CompileallTestsBase,
unittest.TestCase,
metaclass=SourceDateEpochTestMeta,
source_date_epoch=False):
pass
class EncodingTest(unittest.TestCase): class EncodingTest(unittest.TestCase):
"""Issue 6716: compileall should escape source code when printing errors """Issue 6716: compileall should escape source code when printing errors
to stdout.""" to stdout."""
@ -212,7 +233,7 @@ class EncodingTest(unittest.TestCase):
sys.stdout = orig_stdout sys.stdout = orig_stdout
class CommandLineTests(unittest.TestCase): class CommandLineTestsBase:
"""Test compileall's CLI.""" """Test compileall's CLI."""
@classmethod @classmethod
@ -285,6 +306,7 @@ class CommandLineTests(unittest.TestCase):
self.assertNotCompiled(self.initfn) self.assertNotCompiled(self.initfn)
self.assertNotCompiled(self.barfn) self.assertNotCompiled(self.barfn)
@without_source_date_epoch # timestamp invalidation test
def test_no_args_respects_force_flag(self): def test_no_args_respects_force_flag(self):
self._skip_if_sys_path_not_writable() self._skip_if_sys_path_not_writable()
bazfn = script_helper.make_script(self.directory, 'baz', '') bazfn = script_helper.make_script(self.directory, 'baz', '')
@ -353,6 +375,7 @@ class CommandLineTests(unittest.TestCase):
self.assertTrue(os.path.exists(self.pkgdir_cachedir)) self.assertTrue(os.path.exists(self.pkgdir_cachedir))
self.assertFalse(os.path.exists(cachecachedir)) self.assertFalse(os.path.exists(cachecachedir))
@without_source_date_epoch # timestamp invalidation test
def test_force(self): def test_force(self):
self.assertRunOK('-q', self.pkgdir) self.assertRunOK('-q', self.pkgdir)
pycpath = importlib.util.cache_from_source(self.barfn) pycpath = importlib.util.cache_from_source(self.barfn)
@ -556,5 +579,20 @@ class CommandLineTests(unittest.TestCase):
self.assertEqual(compile_dir.call_args[-1]['workers'], None) self.assertEqual(compile_dir.call_args[-1]['workers'], None)
class CommmandLineTestsWithSourceEpoch(CommandLineTestsBase,
unittest.TestCase,
metaclass=SourceDateEpochTestMeta,
source_date_epoch=True):
pass
class CommmandLineTestsNoSourceEpoch(CommandLineTestsBase,
unittest.TestCase,
metaclass=SourceDateEpochTestMeta,
source_date_epoch=False):
pass
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View File

@ -19,6 +19,9 @@ import warnings
from test.support import make_legacy_pyc, unload from test.support import make_legacy_pyc, unload
from test.test_py_compile import without_source_date_epoch
from test.test_py_compile import SourceDateEpochTestMeta
class SimpleTest(abc.LoaderTests): class SimpleTest(abc.LoaderTests):
@ -359,6 +362,17 @@ class SimpleTest(abc.LoaderTests):
abc=importlib_abc, util=importlib_util) abc=importlib_abc, util=importlib_util)
class SourceDateEpochTestMeta(SourceDateEpochTestMeta,
type(Source_SimpleTest)):
pass
class SourceDateEpoch_SimpleTest(Source_SimpleTest,
metaclass=SourceDateEpochTestMeta,
source_date_epoch=True):
pass
class BadBytecodeTest: class BadBytecodeTest:
def import_(self, file, module_name): def import_(self, file, module_name):
@ -617,6 +631,7 @@ class SourceLoaderBadBytecodeTest:
# [bad timestamp] # [bad timestamp]
@util.writes_bytecode_files @util.writes_bytecode_files
@without_source_date_epoch
def test_old_timestamp(self): def test_old_timestamp(self):
# When the timestamp is older than the source, bytecode should be # When the timestamp is older than the source, bytecode should be
# regenerated. # regenerated.

View File

@ -1,3 +1,4 @@
import functools
import importlib.util import importlib.util
import os import os
import py_compile import py_compile
@ -10,7 +11,44 @@ import unittest
from test import support from test import support
class PyCompileTests(unittest.TestCase): def without_source_date_epoch(fxn):
"""Runs function with SOURCE_DATE_EPOCH unset."""
@functools.wraps(fxn)
def wrapper(*args, **kwargs):
with support.EnvironmentVarGuard() as env:
env.unset('SOURCE_DATE_EPOCH')
return fxn(*args, **kwargs)
return wrapper
def with_source_date_epoch(fxn):
"""Runs function with SOURCE_DATE_EPOCH set."""
@functools.wraps(fxn)
def wrapper(*args, **kwargs):
with support.EnvironmentVarGuard() as env:
env['SOURCE_DATE_EPOCH'] = '123456789'
return fxn(*args, **kwargs)
return wrapper
# Run tests with SOURCE_DATE_EPOCH set or unset explicitly.
class SourceDateEpochTestMeta(type(unittest.TestCase)):
def __new__(mcls, name, bases, dct, *, source_date_epoch):
cls = super().__new__(mcls, name, bases, dct)
for attr in dir(cls):
if attr.startswith('test_'):
meth = getattr(cls, attr)
if source_date_epoch:
wrapper = with_source_date_epoch(meth)
else:
wrapper = without_source_date_epoch(meth)
setattr(cls, attr, wrapper)
return cls
class PyCompileTestsBase:
def setUp(self): def setUp(self):
self.directory = tempfile.mkdtemp() self.directory = tempfile.mkdtemp()
@ -99,16 +137,18 @@ class PyCompileTests(unittest.TestCase):
importlib.util.cache_from_source(bad_coding))) importlib.util.cache_from_source(bad_coding)))
def test_source_date_epoch(self): def test_source_date_epoch(self):
testtime = 123456789 py_compile.compile(self.source_path, self.pyc_path)
with support.EnvironmentVarGuard() as env:
env["SOURCE_DATE_EPOCH"] = str(testtime)
py_compile.compile(self.source_path, self.pyc_path)
self.assertTrue(os.path.exists(self.pyc_path)) self.assertTrue(os.path.exists(self.pyc_path))
self.assertFalse(os.path.exists(self.cache_path)) self.assertFalse(os.path.exists(self.cache_path))
with open(self.pyc_path, 'rb') as fp: with open(self.pyc_path, 'rb') as fp:
flags = importlib._bootstrap_external._classify_pyc( flags = importlib._bootstrap_external._classify_pyc(
fp.read(), 'test', {}) fp.read(), 'test', {})
self.assertEqual(flags, 0b11) if os.environ.get('SOURCE_DATE_EPOCH'):
expected_flags = 0b11
else:
expected_flags = 0b00
self.assertEqual(flags, expected_flags)
@unittest.skipIf(sys.flags.optimize > 0, 'test does not work with -O') @unittest.skipIf(sys.flags.optimize > 0, 'test does not work with -O')
def test_double_dot_no_clobber(self): def test_double_dot_no_clobber(self):
@ -153,5 +193,19 @@ class PyCompileTests(unittest.TestCase):
self.assertEqual(flags, 0b1) self.assertEqual(flags, 0b1)
class PyCompileTestsWithSourceEpoch(PyCompileTestsBase,
unittest.TestCase,
metaclass=SourceDateEpochTestMeta,
source_date_epoch=True):
pass
class PyCompileTestsWithoutSourceEpoch(PyCompileTestsBase,
unittest.TestCase,
metaclass=SourceDateEpochTestMeta,
source_date_epoch=False):
pass
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View File

@ -0,0 +1,3 @@
The :envvar:`SOURCE_DATE_EPOCH` environment variable no longer overrides the
value of the *invalidation_mode* argument to :func:`py_compile.compile`, and
determines its default value instead.