mirror of https://github.com/python/cpython
closes bpo-31650: PEP 552 (Deterministic pycs) implementation (#4575)
Python now supports checking bytecode cache up-to-dateness with a hash of the source contents rather than volatile source metadata. See the PEP for details. While a fairly straightforward idea, quite a lot of code had to be modified due to the pervasiveness of pyc implementation details in the codebase. Changes in this commit include: - The core changes to importlib to understand how to read, validate, and regenerate hash-based pycs. - Support for generating hash-based pycs in py_compile and compileall. - Modifications to our siphash implementation to support passing a custom key. We then expose it to importlib through _imp. - Updates to all places in the interpreter, standard library, and tests that manually generate or parse pyc files to grok the new format. - Support in the interpreter command line code for long options like --check-hash-based-pycs. - Tests and documentation for all of the above.
This commit is contained in:
parent
28d8d14013
commit
42aa93b8ff
|
@ -458,6 +458,12 @@ Glossary
|
|||
is believed that overcoming this performance issue would make the
|
||||
implementation much more complicated and therefore costlier to maintain.
|
||||
|
||||
|
||||
hash-based pyc
|
||||
A bytecode cache file that uses the the hash rather than the last-modified
|
||||
time of the corresponding source file to determine its validity. See
|
||||
:ref:`pyc-invalidation`.
|
||||
|
||||
hashable
|
||||
An object is *hashable* if it has a hash value which never changes during
|
||||
its lifetime (it needs a :meth:`__hash__` method), and can be compared to
|
||||
|
|
|
@ -83,6 +83,16 @@ compile Python sources.
|
|||
If ``0`` is used, then the result of :func:`os.cpu_count()`
|
||||
will be used.
|
||||
|
||||
.. cmdoption:: --invalidation-mode [timestamp|checked-hash|unchecked-hash]
|
||||
|
||||
Control how the generated pycs will be invalidated at runtime. The default
|
||||
setting, ``timestamp``, means that ``.pyc`` files with the source timestamp
|
||||
and size embedded will be generated. The ``checked-hash`` and
|
||||
``unchecked-hash`` values cause hash-based pycs to be generated. Hash-based
|
||||
pycs embed a hash of the source file contents rather than a timestamp. See
|
||||
:ref:`pyc-invalidation` for more information on how Python validates bytecode
|
||||
cache files at runtime.
|
||||
|
||||
.. versionchanged:: 3.2
|
||||
Added the ``-i``, ``-b`` and ``-h`` options.
|
||||
|
||||
|
@ -91,6 +101,9 @@ compile Python sources.
|
|||
was changed to a multilevel value. ``-b`` will always produce a
|
||||
byte-code file ending in ``.pyc``, never ``.pyo``.
|
||||
|
||||
.. versionchanged:: 3.7
|
||||
Added the ``--invalidation-mode`` parameter.
|
||||
|
||||
|
||||
There is no command-line option to control the optimization level used by the
|
||||
:func:`compile` function, because the Python interpreter itself already
|
||||
|
@ -99,7 +112,7 @@ provides the option: :program:`python -O -m compileall`.
|
|||
Public functions
|
||||
----------------
|
||||
|
||||
.. function:: compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, workers=1)
|
||||
.. function:: compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, workers=1, invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP)
|
||||
|
||||
Recursively descend the directory tree named by *dir*, compiling all :file:`.py`
|
||||
files along the way. Return a true value if all the files compiled successfully,
|
||||
|
@ -140,6 +153,10 @@ Public functions
|
|||
then sequential compilation will be used as a fallback. If *workers* is
|
||||
lower than ``0``, a :exc:`ValueError` will be raised.
|
||||
|
||||
*invalidation_mode* should be a member of the
|
||||
:class:`py_compile.PycInvalidationMode` enum and controls how the generated
|
||||
pycs are invalidated at runtime.
|
||||
|
||||
.. versionchanged:: 3.2
|
||||
Added the *legacy* and *optimize* parameter.
|
||||
|
||||
|
@ -156,7 +173,10 @@ Public functions
|
|||
.. versionchanged:: 3.6
|
||||
Accepts a :term:`path-like object`.
|
||||
|
||||
.. function:: compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1)
|
||||
.. versionchanged:: 3.7
|
||||
The *invalidation_mode* parameter was added.
|
||||
|
||||
.. function:: compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP)
|
||||
|
||||
Compile the file with path *fullname*. Return a true value if the file
|
||||
compiled successfully, and a false value otherwise.
|
||||
|
@ -184,6 +204,10 @@ Public functions
|
|||
*optimize* specifies the optimization level for the compiler. It is passed to
|
||||
the built-in :func:`compile` function.
|
||||
|
||||
*invalidation_mode* should be a member of the
|
||||
:class:`py_compile.PycInvalidationMode` enum and controls how the generated
|
||||
pycs are invalidated at runtime.
|
||||
|
||||
.. versionadded:: 3.2
|
||||
|
||||
.. versionchanged:: 3.5
|
||||
|
@ -193,7 +217,10 @@ Public functions
|
|||
The *legacy* parameter only writes out ``.pyc`` files, not ``.pyo`` files
|
||||
no matter what the value of *optimize* is.
|
||||
|
||||
.. function:: compile_path(skip_curdir=True, maxlevels=0, force=False, quiet=0, legacy=False, optimize=-1)
|
||||
.. versionchanged:: 3.7
|
||||
The *invalidation_mode* parameter was added.
|
||||
|
||||
.. function:: compile_path(skip_curdir=True, maxlevels=0, force=False, quiet=0, legacy=False, optimize=-1, invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP)
|
||||
|
||||
Byte-compile all the :file:`.py` files found along ``sys.path``. Return a
|
||||
true value if all the files compiled successfully, and a false value otherwise.
|
||||
|
@ -213,6 +240,9 @@ Public functions
|
|||
The *legacy* parameter only writes out ``.pyc`` files, not ``.pyo`` files
|
||||
no matter what the value of *optimize* is.
|
||||
|
||||
.. versionchanged:: 3.7
|
||||
The *invalidation_mode* parameter was added.
|
||||
|
||||
To force a recompile of all the :file:`.py` files in the :file:`Lib/`
|
||||
subdirectory and all its subdirectories::
|
||||
|
||||
|
|
|
@ -67,6 +67,9 @@ generically as an :term:`importer`) to participate in the import process.
|
|||
:pep:`489`
|
||||
Multi-phase extension module initialization
|
||||
|
||||
:pep:`552`
|
||||
Deterministic pycs
|
||||
|
||||
:pep:`3120`
|
||||
Using UTF-8 as the Default Source Encoding
|
||||
|
||||
|
@ -1327,6 +1330,14 @@ an :term:`importer`.
|
|||
.. versionchanged:: 3.6
|
||||
Accepts a :term:`path-like object`.
|
||||
|
||||
.. function:: source_hash(source_bytes)
|
||||
|
||||
Return the hash of *source_bytes* as bytes. A hash-based ``.pyc`` file embeds
|
||||
the :func:`source_hash` of the corresponding source file's contents in its
|
||||
header.
|
||||
|
||||
.. versionadded:: 3.7
|
||||
|
||||
.. class:: LazyLoader(loader)
|
||||
|
||||
A class which postpones the execution of the loader of a module until the
|
||||
|
|
|
@ -27,7 +27,7 @@ byte-code cache files in the directory containing the source code.
|
|||
Exception raised when an error occurs while attempting to compile the file.
|
||||
|
||||
|
||||
.. function:: compile(file, cfile=None, dfile=None, doraise=False, optimize=-1)
|
||||
.. function:: compile(file, cfile=None, dfile=None, doraise=False, optimize=-1, invalidation_mode=PycInvalidationMode.TIMESTAMP)
|
||||
|
||||
Compile a source file to byte-code and write out the byte-code cache file.
|
||||
The source code is loaded from the file named *file*. The byte-code is
|
||||
|
@ -53,6 +53,10 @@ byte-code cache files in the directory containing the source code.
|
|||
:func:`compile` function. The default of ``-1`` selects the optimization
|
||||
level of the current interpreter.
|
||||
|
||||
*invalidation_mode* should be a member of the :class:`PycInvalidationMode`
|
||||
enum and controls how the generated ``.pyc`` files are invalidated at
|
||||
runtime.
|
||||
|
||||
.. versionchanged:: 3.2
|
||||
Changed default value of *cfile* to be :PEP:`3147`-compliant. Previous
|
||||
default was *file* + ``'c'`` (``'o'`` if optimization was enabled).
|
||||
|
@ -65,6 +69,41 @@ byte-code cache files in the directory containing the source code.
|
|||
caveat that :exc:`FileExistsError` is raised if *cfile* is a symlink or
|
||||
non-regular file.
|
||||
|
||||
.. versionchanged:: 3.7
|
||||
The *invalidation_mode* parameter was added as specified in :pep:`552`.
|
||||
|
||||
|
||||
.. class:: PycInvalidationMode
|
||||
|
||||
A enumeration of possible methods the interpreter can use to determine
|
||||
whether a bytecode file is up to date with a source file. The ``.pyc`` file
|
||||
indicates the desired invalidation mode in its header. See
|
||||
:ref:`pyc-invalidation` for more information on how Python invalidates
|
||||
``.pyc`` files at runtime.
|
||||
|
||||
.. versionadded:: 3.7
|
||||
|
||||
.. attribute:: TIMESTAMP
|
||||
|
||||
The ``.pyc`` file includes the timestamp and size of the source file,
|
||||
which Python will compare against the metadata of the source file at
|
||||
runtime to determine if the ``.pyc`` file needs to be regenerated.
|
||||
|
||||
.. attribute:: CHECKED_HASH
|
||||
|
||||
The ``.pyc`` file includes a hash of the source file content, which Python
|
||||
will compare against the source at runtime to determine if the ``.pyc``
|
||||
file needs to be regenerated.
|
||||
|
||||
.. attribute:: UNCHECKED_HASH
|
||||
|
||||
Like :attr:`CHECKED_HASH`, the ``.pyc`` file includes a hash of the source
|
||||
file content. However, Python will at runtime assume the ``.pyc`` file is
|
||||
up to date and not validate the ``.pyc`` against the source file at all.
|
||||
|
||||
This option is useful when the ``.pycs`` are kept up to date by some
|
||||
system external to Python like a build system.
|
||||
|
||||
|
||||
.. function:: main(args=None)
|
||||
|
||||
|
|
|
@ -675,6 +675,33 @@ Here are the exact rules used:
|
|||
:meth:`~importlib.abc.Loader.module_repr` method, if defined, before
|
||||
trying either approach described above. However, the method is deprecated.
|
||||
|
||||
.. _pyc-invalidation:
|
||||
|
||||
Cached bytecode invalidation
|
||||
----------------------------
|
||||
|
||||
Before Python loads cached bytecode from ``.pyc`` file, it checks whether the
|
||||
cache is up-to-date with the source ``.py`` file. By default, Python does this
|
||||
by storing the source's last-modified timestamp and size in the cache file when
|
||||
writing it. At runtime, the import system then validates the cache file by
|
||||
checking the stored metadata in the cache file against at source's
|
||||
metadata.
|
||||
|
||||
Python also supports "hash-based" cache files, which store a hash of the source
|
||||
file's contents rather than its metadata. There are two variants of hash-based
|
||||
``.pyc`` files: checked and unchecked. For checked hash-based ``.pyc`` files,
|
||||
Python validates the cache file by hashing the source file and comparing the
|
||||
resulting hash with the hash in the cache file. If a checked hash-based cache
|
||||
file is found to be invalid, Python regenerates it and writes a new checked
|
||||
hash-based cache file. For unchecked hash-based ``.pyc`` files, Python simply
|
||||
assumes the cache file is valid if it exists. Hash-based ``.pyc`` files
|
||||
validation behavior may be overridden with the :option:`--check-hash-based-pycs`
|
||||
flag.
|
||||
|
||||
.. versionchanged:: 3.7
|
||||
Added hash-based ``.pyc`` files. Previously, Python only supported
|
||||
timestamp-based invalidation of bytecode caches.
|
||||
|
||||
|
||||
The Path Based Finder
|
||||
=====================
|
||||
|
|
|
@ -210,6 +210,20 @@ Miscellaneous options
|
|||
import of source modules. See also :envvar:`PYTHONDONTWRITEBYTECODE`.
|
||||
|
||||
|
||||
.. cmdoption:: --check-hash-based-pycs default|always|never
|
||||
|
||||
Control the validation behavior of hash-based ``.pyc`` files. See
|
||||
:ref:`pyc-invalidation`. When set to ``default``, checked and unchecked
|
||||
hash-based bytecode cache files are validated according to their default
|
||||
semantics. When set to ``always``, all hash-based ``.pyc`` files, whether
|
||||
checked or unchecked, are validated against their corresponding source
|
||||
file. When set to ``never``, hash-based ``.pyc`` files are not validated
|
||||
against their corresponding source files.
|
||||
|
||||
The semantics of timestamp-based ``.pyc`` files are unaffected by this
|
||||
option.
|
||||
|
||||
|
||||
.. cmdoption:: -d
|
||||
|
||||
Turn on parser debugging output (for expert only, depending on compilation
|
||||
|
|
|
@ -197,6 +197,33 @@ variable is not set in practice.
|
|||
|
||||
See :option:`-X` ``dev`` for the details.
|
||||
|
||||
Hash-based pycs
|
||||
---------------
|
||||
|
||||
Python has traditionally checked the up-to-dateness of bytecode cache files
|
||||
(i.e., ``.pyc`` files) by comparing the source metadata (last-modified timestamp
|
||||
and size) with source metadata saved in the cache file header when it was
|
||||
generated. While effective, this invalidation method has its drawbacks. When
|
||||
filesystem timestamps are too coarse, Python can miss source updates, leading to
|
||||
user confusion. Additionally, having a timestamp in the cache file is
|
||||
problematic for `build reproduciblity <https://reproducible-builds.org/>`_ and
|
||||
content-based build systems.
|
||||
|
||||
:pep:`552` extends the pyc format to allow the hash of the source file to be
|
||||
used for invalidation instead of the source timestamp. Such ``.pyc`` files are
|
||||
called "hash-based". By default, Python still uses timestamp-based invalidation
|
||||
and does not generate hash-based ``.pyc`` files at runtime. Hash-based ``.pyc``
|
||||
files may be generated with :mod:`py_compile` or :mod:`compileall`.
|
||||
|
||||
Hash-based ``.pyc`` files come in two variants: checked and unchecked. Python
|
||||
validates checked hash-based ``.pyc`` files against the corresponding source
|
||||
files at runtime but doesn't do so for unchecked hash-based pycs. Unchecked
|
||||
hash-based ``.pyc`` files are a useful performance optimization for environments
|
||||
where a system external to Python (e.g., the build system) is responsible for
|
||||
keeping ``.pyc`` files up-to-date.
|
||||
|
||||
See :ref:`pyc-invalidation` for more information.
|
||||
|
||||
|
||||
Other Language Changes
|
||||
======================
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
#ifndef Py_INTERNAL_HASH_H
|
||||
#define Py_INTERNAL_HASH_H
|
||||
|
||||
uint64_t _Py_KeyedHash(uint64_t, const char *, Py_ssize_t);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,6 @@
|
|||
#ifndef Py_INTERNAL_IMPORT_H
|
||||
#define Py_INTERNAL_IMPORT_H
|
||||
|
||||
extern const char *_Py_CheckHashBasedPycsMode;
|
||||
|
||||
#endif
|
|
@ -12,7 +12,14 @@ PyAPI_DATA(wchar_t *) _PyOS_optarg;
|
|||
|
||||
PyAPI_FUNC(void) _PyOS_ResetGetOpt(void);
|
||||
|
||||
PyAPI_FUNC(int) _PyOS_GetOpt(int argc, wchar_t **argv, wchar_t *optstring);
|
||||
typedef struct {
|
||||
const wchar_t *name;
|
||||
int has_arg;
|
||||
int val;
|
||||
} _PyOS_LongOption;
|
||||
|
||||
PyAPI_FUNC(int) _PyOS_GetOpt(int argc, wchar_t **argv, wchar_t *optstring,
|
||||
const _PyOS_LongOption *longopts, int *longindex);
|
||||
#endif /* !Py_LIMITED_API */
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -52,7 +52,8 @@ def _walk_dir(dir, ddir=None, maxlevels=10, quiet=0):
|
|||
maxlevels=maxlevels - 1, quiet=quiet)
|
||||
|
||||
def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None,
|
||||
quiet=0, legacy=False, optimize=-1, workers=1):
|
||||
quiet=0, legacy=False, optimize=-1, workers=1,
|
||||
invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP):
|
||||
"""Byte-compile all modules in the given directory tree.
|
||||
|
||||
Arguments (only dir is required):
|
||||
|
@ -67,6 +68,7 @@ def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None,
|
|||
legacy: if True, produce legacy pyc paths instead of PEP 3147 paths
|
||||
optimize: optimization level or -1 for level of the interpreter
|
||||
workers: maximum number of parallel workers
|
||||
invalidation_mode: how the up-to-dateness of the pyc will be checked
|
||||
"""
|
||||
if workers is not None and workers < 0:
|
||||
raise ValueError('workers must be greater or equal to 0')
|
||||
|
@ -81,18 +83,20 @@ def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None,
|
|||
ddir=ddir, force=force,
|
||||
rx=rx, quiet=quiet,
|
||||
legacy=legacy,
|
||||
optimize=optimize),
|
||||
optimize=optimize,
|
||||
invalidation_mode=invalidation_mode),
|
||||
files)
|
||||
success = min(results, default=True)
|
||||
else:
|
||||
for file in files:
|
||||
if not compile_file(file, ddir, force, rx, quiet,
|
||||
legacy, optimize):
|
||||
legacy, optimize, invalidation_mode):
|
||||
success = False
|
||||
return success
|
||||
|
||||
def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
|
||||
legacy=False, optimize=-1):
|
||||
legacy=False, optimize=-1,
|
||||
invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP):
|
||||
"""Byte-compile one file.
|
||||
|
||||
Arguments (only fullname is required):
|
||||
|
@ -105,6 +109,7 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
|
|||
no output with 2
|
||||
legacy: if True, produce legacy pyc paths instead of PEP 3147 paths
|
||||
optimize: optimization level or -1 for level of the interpreter
|
||||
invalidation_mode: how the up-to-dateness of the pyc will be checked
|
||||
"""
|
||||
success = True
|
||||
if quiet < 2 and isinstance(fullname, os.PathLike):
|
||||
|
@ -134,10 +139,10 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
|
|||
if not force:
|
||||
try:
|
||||
mtime = int(os.stat(fullname).st_mtime)
|
||||
expect = struct.pack('<4sl', importlib.util.MAGIC_NUMBER,
|
||||
mtime)
|
||||
expect = struct.pack('<4sll', importlib.util.MAGIC_NUMBER,
|
||||
0, mtime)
|
||||
with open(cfile, 'rb') as chandle:
|
||||
actual = chandle.read(8)
|
||||
actual = chandle.read(12)
|
||||
if expect == actual:
|
||||
return success
|
||||
except OSError:
|
||||
|
@ -146,7 +151,8 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
|
|||
print('Compiling {!r}...'.format(fullname))
|
||||
try:
|
||||
ok = py_compile.compile(fullname, cfile, dfile, True,
|
||||
optimize=optimize)
|
||||
optimize=optimize,
|
||||
invalidation_mode=invalidation_mode)
|
||||
except py_compile.PyCompileError as err:
|
||||
success = False
|
||||
if quiet >= 2:
|
||||
|
@ -175,7 +181,8 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
|
|||
return success
|
||||
|
||||
def compile_path(skip_curdir=1, maxlevels=0, force=False, quiet=0,
|
||||
legacy=False, optimize=-1):
|
||||
legacy=False, optimize=-1,
|
||||
invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP):
|
||||
"""Byte-compile all module on sys.path.
|
||||
|
||||
Arguments (all optional):
|
||||
|
@ -186,6 +193,7 @@ def compile_path(skip_curdir=1, maxlevels=0, force=False, quiet=0,
|
|||
quiet: as for compile_dir() (default 0)
|
||||
legacy: as for compile_dir() (default False)
|
||||
optimize: as for compile_dir() (default -1)
|
||||
invalidation_mode: as for compiler_dir()
|
||||
"""
|
||||
success = True
|
||||
for dir in sys.path:
|
||||
|
@ -193,9 +201,16 @@ def compile_path(skip_curdir=1, maxlevels=0, force=False, quiet=0,
|
|||
if quiet < 2:
|
||||
print('Skipping current directory')
|
||||
else:
|
||||
success = success and compile_dir(dir, maxlevels, None,
|
||||
force, quiet=quiet,
|
||||
legacy=legacy, optimize=optimize)
|
||||
success = success and compile_dir(
|
||||
dir,
|
||||
maxlevels,
|
||||
None,
|
||||
force,
|
||||
quiet=quiet,
|
||||
legacy=legacy,
|
||||
optimize=optimize,
|
||||
invalidation_mode=invalidation_mode,
|
||||
)
|
||||
return success
|
||||
|
||||
|
||||
|
@ -238,6 +253,11 @@ def main():
|
|||
'to the equivalent of -l sys.path'))
|
||||
parser.add_argument('-j', '--workers', default=1,
|
||||
type=int, help='Run compileall concurrently')
|
||||
invalidation_modes = [mode.name.lower().replace('_', '-')
|
||||
for mode in py_compile.PycInvalidationMode]
|
||||
parser.add_argument('--invalidation-mode', default='timestamp',
|
||||
choices=sorted(invalidation_modes),
|
||||
help='How the pycs will be invalidated at runtime')
|
||||
|
||||
args = parser.parse_args()
|
||||
compile_dests = args.compile_dest
|
||||
|
@ -266,23 +286,29 @@ def main():
|
|||
if args.workers is not None:
|
||||
args.workers = args.workers or None
|
||||
|
||||
ivl_mode = args.invalidation_mode.replace('-', '_').upper()
|
||||
invalidation_mode = py_compile.PycInvalidationMode[ivl_mode]
|
||||
|
||||
success = True
|
||||
try:
|
||||
if compile_dests:
|
||||
for dest in compile_dests:
|
||||
if os.path.isfile(dest):
|
||||
if not compile_file(dest, args.ddir, args.force, args.rx,
|
||||
args.quiet, args.legacy):
|
||||
args.quiet, args.legacy,
|
||||
invalidation_mode=invalidation_mode):
|
||||
success = False
|
||||
else:
|
||||
if not compile_dir(dest, maxlevels, args.ddir,
|
||||
args.force, args.rx, args.quiet,
|
||||
args.legacy, workers=args.workers):
|
||||
args.legacy, workers=args.workers,
|
||||
invalidation_mode=invalidation_mode):
|
||||
success = False
|
||||
return success
|
||||
else:
|
||||
return compile_path(legacy=args.legacy, force=args.force,
|
||||
quiet=args.quiet)
|
||||
quiet=args.quiet,
|
||||
invalidation_mode=invalidation_mode)
|
||||
except KeyboardInterrupt:
|
||||
if args.quiet < 2:
|
||||
print("\n[interrupted]")
|
||||
|
|
|
@ -242,6 +242,7 @@ _code_type = type(_write_atomic.__code__)
|
|||
# Python 3.6rc1 3379 (more thorough __class__ validation #23722)
|
||||
# Python 3.7a0 3390 (add LOAD_METHOD and CALL_METHOD opcodes)
|
||||
# Python 3.7a0 3391 (update GET_AITER #31709)
|
||||
# Python 3.7a0 3392 (PEP 552: Deterministic pycs)
|
||||
#
|
||||
# MAGIC must change whenever the bytecode emitted by the compiler may no
|
||||
# longer be understood by older implementations of the eval loop (usually
|
||||
|
@ -250,7 +251,7 @@ _code_type = type(_write_atomic.__code__)
|
|||
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
|
||||
# in PC/launcher.c must also be updated.
|
||||
|
||||
MAGIC_NUMBER = (3391).to_bytes(2, 'little') + b'\r\n'
|
||||
MAGIC_NUMBER = (3392).to_bytes(2, 'little') + b'\r\n'
|
||||
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c
|
||||
|
||||
_PYCACHE = '__pycache__'
|
||||
|
@ -429,63 +430,93 @@ def _find_module_shim(self, fullname):
|
|||
return loader
|
||||
|
||||
|
||||
def _validate_bytecode_header(data, source_stats=None, name=None, path=None):
|
||||
"""Validate the header of the passed-in bytecode against source_stats (if
|
||||
given) and returning the bytecode that can be compiled by compile().
|
||||
def _classify_pyc(data, name, exc_details):
|
||||
"""Perform basic validity checking of a pyc header and return the flags field,
|
||||
which determines how the pyc should be further validated against the source.
|
||||
|
||||
All other arguments are used to enhance error reporting.
|
||||
*data* is the contents of the pyc file. (Only the first 16 bytes are
|
||||
required, though.)
|
||||
|
||||
ImportError is raised when the magic number is incorrect or the bytecode is
|
||||
found to be stale. EOFError is raised when the data is found to be
|
||||
truncated.
|
||||
*name* is the name of the module being imported. It is used for logging.
|
||||
|
||||
*exc_details* is a dictionary passed to ImportError if it raised for
|
||||
improved debugging.
|
||||
|
||||
ImportError is raised when the magic number is incorrect or when the flags
|
||||
field is invalid. EOFError is raised when the data is found to be truncated.
|
||||
|
||||
"""
|
||||
exc_details = {}
|
||||
if name is not None:
|
||||
exc_details['name'] = name
|
||||
else:
|
||||
# To prevent having to make all messages have a conditional name.
|
||||
name = '<bytecode>'
|
||||
if path is not None:
|
||||
exc_details['path'] = path
|
||||
magic = data[:4]
|
||||
raw_timestamp = data[4:8]
|
||||
raw_size = data[8:12]
|
||||
if magic != MAGIC_NUMBER:
|
||||
message = 'bad magic number in {!r}: {!r}'.format(name, magic)
|
||||
message = f'bad magic number in {name!r}: {magic!r}'
|
||||
_bootstrap._verbose_message('{}', message)
|
||||
raise ImportError(message, **exc_details)
|
||||
elif len(raw_timestamp) != 4:
|
||||
message = 'reached EOF while reading timestamp in {!r}'.format(name)
|
||||
if len(data) < 16:
|
||||
message = f'reached EOF while reading pyc header of {name!r}'
|
||||
_bootstrap._verbose_message('{}', message)
|
||||
raise EOFError(message)
|
||||
elif len(raw_size) != 4:
|
||||
message = 'reached EOF while reading size of source in {!r}'.format(name)
|
||||
flags = _r_long(data[4:8])
|
||||
# Only the first two flags are defined.
|
||||
if flags & ~0b11:
|
||||
message = f'invalid flags {flags!r} in {name!r}'
|
||||
raise ImportError(message, **exc_details)
|
||||
return flags
|
||||
|
||||
|
||||
def _validate_timestamp_pyc(data, source_mtime, source_size, name,
|
||||
exc_details):
|
||||
"""Validate a pyc against the source last-modified time.
|
||||
|
||||
*data* is the contents of the pyc file. (Only the first 16 bytes are
|
||||
required.)
|
||||
|
||||
*source_mtime* is the last modified timestamp of the source file.
|
||||
|
||||
*source_size* is None or the size of the source file in bytes.
|
||||
|
||||
*name* is the name of the module being imported. It is used for logging.
|
||||
|
||||
*exc_details* is a dictionary passed to ImportError if it raised for
|
||||
improved debugging.
|
||||
|
||||
An ImportError is raised if the bytecode is stale.
|
||||
|
||||
"""
|
||||
if _r_long(data[8:12]) != (source_mtime & 0xFFFFFFFF):
|
||||
message = f'bytecode is stale for {name!r}'
|
||||
_bootstrap._verbose_message('{}', message)
|
||||
raise EOFError(message)
|
||||
if source_stats is not None:
|
||||
try:
|
||||
source_mtime = int(source_stats['mtime'])
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
if _r_long(raw_timestamp) != source_mtime:
|
||||
message = 'bytecode is stale for {!r}'.format(name)
|
||||
_bootstrap._verbose_message('{}', message)
|
||||
raise ImportError(message, **exc_details)
|
||||
try:
|
||||
source_size = source_stats['size'] & 0xFFFFFFFF
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
if _r_long(raw_size) != source_size:
|
||||
raise ImportError('bytecode is stale for {!r}'.format(name),
|
||||
**exc_details)
|
||||
return data[12:]
|
||||
raise ImportError(message, **exc_details)
|
||||
if (source_size is not None and
|
||||
_r_long(data[12:16]) != (source_size & 0xFFFFFFFF)):
|
||||
raise ImportError(f'bytecode is stale for {name!r}', **exc_details)
|
||||
|
||||
|
||||
def _validate_hash_pyc(data, source_hash, name, exc_details):
|
||||
"""Validate a hash-based pyc by checking the real source hash against the one in
|
||||
the pyc header.
|
||||
|
||||
*data* is the contents of the pyc file. (Only the first 16 bytes are
|
||||
required.)
|
||||
|
||||
*source_hash* is the importlib.util.source_hash() of the source file.
|
||||
|
||||
*name* is the name of the module being imported. It is used for logging.
|
||||
|
||||
*exc_details* is a dictionary passed to ImportError if it raised for
|
||||
improved debugging.
|
||||
|
||||
An ImportError is raised if the bytecode is stale.
|
||||
|
||||
"""
|
||||
if data[8:16] != source_hash:
|
||||
raise ImportError(
|
||||
f'hash in bytecode doesn\'t match hash of source {name!r}',
|
||||
**exc_details,
|
||||
)
|
||||
|
||||
|
||||
def _compile_bytecode(data, name=None, bytecode_path=None, source_path=None):
|
||||
"""Compile bytecode as returned by _validate_bytecode_header()."""
|
||||
"""Compile bytecode as found in a pyc."""
|
||||
code = marshal.loads(data)
|
||||
if isinstance(code, _code_type):
|
||||
_bootstrap._verbose_message('code object from {!r}', bytecode_path)
|
||||
|
@ -496,16 +527,28 @@ def _compile_bytecode(data, name=None, bytecode_path=None, source_path=None):
|
|||
raise ImportError('Non-code object in {!r}'.format(bytecode_path),
|
||||
name=name, path=bytecode_path)
|
||||
|
||||
def _code_to_bytecode(code, mtime=0, source_size=0):
|
||||
"""Compile a code object into bytecode for writing out to a byte-compiled
|
||||
file."""
|
||||
|
||||
def _code_to_timestamp_pyc(code, mtime=0, source_size=0):
|
||||
"Produce the data for a timestamp-based pyc."
|
||||
data = bytearray(MAGIC_NUMBER)
|
||||
data.extend(_w_long(0))
|
||||
data.extend(_w_long(mtime))
|
||||
data.extend(_w_long(source_size))
|
||||
data.extend(marshal.dumps(code))
|
||||
return data
|
||||
|
||||
|
||||
def _code_to_hash_pyc(code, source_hash, checked=True):
|
||||
"Produce the data for a hash-based pyc."
|
||||
data = bytearray(MAGIC_NUMBER)
|
||||
flags = 0b1 | checked << 1
|
||||
data.extend(_w_long(flags))
|
||||
assert len(source_hash) == 8
|
||||
data.extend(source_hash)
|
||||
data.extend(marshal.dumps(code))
|
||||
return data
|
||||
|
||||
|
||||
def decode_source(source_bytes):
|
||||
"""Decode bytes representing source code and return the string.
|
||||
|
||||
|
@ -751,6 +794,10 @@ class SourceLoader(_LoaderBasics):
|
|||
"""
|
||||
source_path = self.get_filename(fullname)
|
||||
source_mtime = None
|
||||
source_bytes = None
|
||||
source_hash = None
|
||||
hash_based = False
|
||||
check_source = True
|
||||
try:
|
||||
bytecode_path = cache_from_source(source_path)
|
||||
except NotImplementedError:
|
||||
|
@ -767,10 +814,34 @@ class SourceLoader(_LoaderBasics):
|
|||
except OSError:
|
||||
pass
|
||||
else:
|
||||
exc_details = {
|
||||
'name': fullname,
|
||||
'path': bytecode_path,
|
||||
}
|
||||
try:
|
||||
bytes_data = _validate_bytecode_header(data,
|
||||
source_stats=st, name=fullname,
|
||||
path=bytecode_path)
|
||||
flags = _classify_pyc(data, fullname, exc_details)
|
||||
bytes_data = memoryview(data)[16:]
|
||||
hash_based = flags & 0b1 != 0
|
||||
if hash_based:
|
||||
check_source = flags & 0b10 != 0
|
||||
if (_imp.check_hash_based_pycs != 'never' and
|
||||
(check_source or
|
||||
_imp.check_hash_based_pycs == 'always')):
|
||||
source_bytes = self.get_data(source_path)
|
||||
source_hash = _imp.source_hash(
|
||||
_RAW_MAGIC_NUMBER,
|
||||
source_bytes,
|
||||
)
|
||||
_validate_hash_pyc(data, source_hash, fullname,
|
||||
exc_details)
|
||||
else:
|
||||
_validate_timestamp_pyc(
|
||||
data,
|
||||
source_mtime,
|
||||
st['size'],
|
||||
fullname,
|
||||
exc_details,
|
||||
)
|
||||
except (ImportError, EOFError):
|
||||
pass
|
||||
else:
|
||||
|
@ -779,13 +850,19 @@ class SourceLoader(_LoaderBasics):
|
|||
return _compile_bytecode(bytes_data, name=fullname,
|
||||
bytecode_path=bytecode_path,
|
||||
source_path=source_path)
|
||||
source_bytes = self.get_data(source_path)
|
||||
if source_bytes is None:
|
||||
source_bytes = self.get_data(source_path)
|
||||
code_object = self.source_to_code(source_bytes, source_path)
|
||||
_bootstrap._verbose_message('code object from {}', source_path)
|
||||
if (not sys.dont_write_bytecode and bytecode_path is not None and
|
||||
source_mtime is not None):
|
||||
data = _code_to_bytecode(code_object, source_mtime,
|
||||
len(source_bytes))
|
||||
if hash_based:
|
||||
if source_hash is None:
|
||||
source_hash = _imp.source_hash(source_bytes)
|
||||
data = _code_to_hash_pyc(code_object, source_hash, check_source)
|
||||
else:
|
||||
data = _code_to_timestamp_pyc(code_object, source_mtime,
|
||||
len(source_bytes))
|
||||
try:
|
||||
self._cache_bytecode(source_path, bytecode_path, data)
|
||||
_bootstrap._verbose_message('wrote {!r}', bytecode_path)
|
||||
|
@ -887,8 +964,18 @@ class SourcelessFileLoader(FileLoader, _LoaderBasics):
|
|||
def get_code(self, fullname):
|
||||
path = self.get_filename(fullname)
|
||||
data = self.get_data(path)
|
||||
bytes_data = _validate_bytecode_header(data, name=fullname, path=path)
|
||||
return _compile_bytecode(bytes_data, name=fullname, bytecode_path=path)
|
||||
# Call _classify_pyc to do basic validation of the pyc but ignore the
|
||||
# result. There's no source to check against.
|
||||
exc_details = {
|
||||
'name': fullname,
|
||||
'path': path,
|
||||
}
|
||||
_classify_pyc(data, fullname, exc_details)
|
||||
return _compile_bytecode(
|
||||
memoryview(data)[16:],
|
||||
name=fullname,
|
||||
bytecode_path=path,
|
||||
)
|
||||
|
||||
def get_source(self, fullname):
|
||||
"""Return None as there is no source code."""
|
||||
|
|
|
@ -5,18 +5,25 @@ from ._bootstrap import _resolve_name
|
|||
from ._bootstrap import spec_from_loader
|
||||
from ._bootstrap import _find_spec
|
||||
from ._bootstrap_external import MAGIC_NUMBER
|
||||
from ._bootstrap_external import _RAW_MAGIC_NUMBER
|
||||
from ._bootstrap_external import cache_from_source
|
||||
from ._bootstrap_external import decode_source
|
||||
from ._bootstrap_external import source_from_cache
|
||||
from ._bootstrap_external import spec_from_file_location
|
||||
|
||||
from contextlib import contextmanager
|
||||
import _imp
|
||||
import functools
|
||||
import sys
|
||||
import types
|
||||
import warnings
|
||||
|
||||
|
||||
def source_hash(source_bytes):
|
||||
"Return the hash of *source_bytes* as used in hash-based pyc files."
|
||||
return _imp.source_hash(_RAW_MAGIC_NUMBER, source_bytes)
|
||||
|
||||
|
||||
def resolve_name(name, package):
|
||||
"""Resolve a relative module name to an absolute one."""
|
||||
if not name.startswith('.'):
|
||||
|
|
|
@ -287,11 +287,12 @@ class ModuleFinder:
|
|||
co = compile(fp.read()+'\n', pathname, 'exec')
|
||||
elif type == imp.PY_COMPILED:
|
||||
try:
|
||||
marshal_data = importlib._bootstrap_external._validate_bytecode_header(fp.read())
|
||||
data = fp.read()
|
||||
importlib._bootstrap_external._classify_pyc(data, fqname, {})
|
||||
except ImportError as exc:
|
||||
self.msgout(2, "raise ImportError: " + str(exc), pathname)
|
||||
raise
|
||||
co = marshal.loads(marshal_data)
|
||||
co = marshal.loads(memoryview(data)[16:])
|
||||
else:
|
||||
co = None
|
||||
m = self.add_module(fqname)
|
||||
|
|
|
@ -46,7 +46,7 @@ def read_code(stream):
|
|||
if magic != importlib.util.MAGIC_NUMBER:
|
||||
return None
|
||||
|
||||
stream.read(8) # Skip timestamp and size
|
||||
stream.read(12) # Skip rest of the header
|
||||
return marshal.load(stream)
|
||||
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
This module has intimate knowledge of the format of .pyc files.
|
||||
"""
|
||||
|
||||
import enum
|
||||
import importlib._bootstrap_external
|
||||
import importlib.machinery
|
||||
import importlib.util
|
||||
|
@ -11,7 +12,7 @@ import os.path
|
|||
import sys
|
||||
import traceback
|
||||
|
||||
__all__ = ["compile", "main", "PyCompileError"]
|
||||
__all__ = ["compile", "main", "PyCompileError", "PycInvalidationMode"]
|
||||
|
||||
|
||||
class PyCompileError(Exception):
|
||||
|
@ -62,7 +63,14 @@ class PyCompileError(Exception):
|
|||
return self.msg
|
||||
|
||||
|
||||
def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1):
|
||||
class PycInvalidationMode(enum.Enum):
|
||||
TIMESTAMP = 1
|
||||
CHECKED_HASH = 2
|
||||
UNCHECKED_HASH = 3
|
||||
|
||||
|
||||
def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1,
|
||||
invalidation_mode=PycInvalidationMode.TIMESTAMP):
|
||||
"""Byte-compile one Python source file to Python bytecode.
|
||||
|
||||
:param file: The source file name.
|
||||
|
@ -79,6 +87,7 @@ def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1):
|
|||
:param optimize: The optimization level for the compiler. Valid values
|
||||
are -1, 0, 1 and 2. A value of -1 means to use the optimization
|
||||
level of the current interpreter, as given by -O command line options.
|
||||
:param invalidation_mode:
|
||||
|
||||
:return: Path to the resulting byte compiled file.
|
||||
|
||||
|
@ -136,9 +145,17 @@ def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1):
|
|||
os.makedirs(dirname)
|
||||
except FileExistsError:
|
||||
pass
|
||||
source_stats = loader.path_stats(file)
|
||||
bytecode = importlib._bootstrap_external._code_to_bytecode(
|
||||
if invalidation_mode == PycInvalidationMode.TIMESTAMP:
|
||||
source_stats = loader.path_stats(file)
|
||||
bytecode = importlib._bootstrap_external._code_to_timestamp_pyc(
|
||||
code, source_stats['mtime'], source_stats['size'])
|
||||
else:
|
||||
source_hash = importlib.util.source_hash(source_bytes)
|
||||
bytecode = importlib._bootstrap_external._code_to_hash_pyc(
|
||||
code,
|
||||
source_hash,
|
||||
(invalidation_mode == PycInvalidationMode.CHECKED_HASH),
|
||||
)
|
||||
mode = importlib._bootstrap_external._calc_mode(file)
|
||||
importlib._bootstrap_external._write_atomic(cfile, bytecode, mode)
|
||||
return cfile
|
||||
|
|
|
@ -48,9 +48,9 @@ class CompileallTests(unittest.TestCase):
|
|||
|
||||
def data(self):
|
||||
with open(self.bc_path, 'rb') as file:
|
||||
data = file.read(8)
|
||||
data = file.read(12)
|
||||
mtime = int(os.stat(self.source_path).st_mtime)
|
||||
compare = struct.pack('<4sl', importlib.util.MAGIC_NUMBER, mtime)
|
||||
compare = struct.pack('<4sll', importlib.util.MAGIC_NUMBER, 0, mtime)
|
||||
return data, compare
|
||||
|
||||
@unittest.skipUnless(hasattr(os, 'stat'), 'test needs os.stat()')
|
||||
|
@ -70,8 +70,8 @@ class CompileallTests(unittest.TestCase):
|
|||
|
||||
def test_mtime(self):
|
||||
# Test a change in mtime leads to a new .pyc.
|
||||
self.recreation_check(struct.pack('<4sl', importlib.util.MAGIC_NUMBER,
|
||||
1))
|
||||
self.recreation_check(struct.pack('<4sll', importlib.util.MAGIC_NUMBER,
|
||||
0, 1))
|
||||
|
||||
def test_magic_number(self):
|
||||
# Test a change in mtime leads to a new .pyc.
|
||||
|
@ -519,6 +519,19 @@ class CommandLineTests(unittest.TestCase):
|
|||
out = self.assertRunOK('badfilename')
|
||||
self.assertRegex(out, b"Can't list 'badfilename'")
|
||||
|
||||
def test_pyc_invalidation_mode(self):
|
||||
script_helper.make_script(self.pkgdir, 'f1', '')
|
||||
pyc = importlib.util.cache_from_source(
|
||||
os.path.join(self.pkgdir, 'f1.py'))
|
||||
self.assertRunOK('--invalidation-mode=checked-hash', self.pkgdir)
|
||||
with open(pyc, 'rb') as fp:
|
||||
data = fp.read()
|
||||
self.assertEqual(int.from_bytes(data[4:8], 'little'), 0b11)
|
||||
self.assertRunOK('--invalidation-mode=unchecked-hash', self.pkgdir)
|
||||
with open(pyc, 'rb') as fp:
|
||||
data = fp.read()
|
||||
self.assertEqual(int.from_bytes(data[4:8], 'little'), 0b01)
|
||||
|
||||
@skipUnless(_have_multiprocessing, "requires multiprocessing")
|
||||
def test_workers(self):
|
||||
bar2fn = script_helper.make_script(self.directory, 'bar2', '')
|
||||
|
|
|
@ -4,11 +4,13 @@ import os
|
|||
import os.path
|
||||
import sys
|
||||
from test import support
|
||||
from test.support import script_helper
|
||||
import unittest
|
||||
import warnings
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('ignore', DeprecationWarning)
|
||||
import imp
|
||||
import _imp
|
||||
|
||||
|
||||
def requires_load_dynamic(meth):
|
||||
|
@ -329,6 +331,25 @@ class ImportTests(unittest.TestCase):
|
|||
with self.assertRaises(TypeError):
|
||||
create_dynamic(BadSpec())
|
||||
|
||||
def test_source_hash(self):
|
||||
self.assertEqual(_imp.source_hash(42, b'hi'), b'\xc6\xe7Z\r\x03:}\xab')
|
||||
self.assertEqual(_imp.source_hash(43, b'hi'), b'\x85\x9765\xf8\x9a\x8b9')
|
||||
|
||||
def test_pyc_invalidation_mode_from_cmdline(self):
|
||||
cases = [
|
||||
([], "default"),
|
||||
(["--check-hash-based-pycs", "default"], "default"),
|
||||
(["--check-hash-based-pycs", "always"], "always"),
|
||||
(["--check-hash-based-pycs", "never"], "never"),
|
||||
]
|
||||
for interp_args, expected in cases:
|
||||
args = interp_args + [
|
||||
"-c",
|
||||
"import _imp; print(_imp.check_hash_based_pycs)",
|
||||
]
|
||||
res = script_helper.assert_python_ok(*args)
|
||||
self.assertEqual(res.out.strip().decode('utf-8'), expected)
|
||||
|
||||
|
||||
class ReloadTests(unittest.TestCase):
|
||||
|
||||
|
|
|
@ -598,7 +598,7 @@ func_filename = func.__code__.co_filename
|
|||
def test_foreign_code(self):
|
||||
py_compile.compile(self.file_name)
|
||||
with open(self.compiled_name, "rb") as f:
|
||||
header = f.read(12)
|
||||
header = f.read(16)
|
||||
code = marshal.load(f)
|
||||
constants = list(code.co_consts)
|
||||
foreign_code = importlib.import_module.__code__
|
||||
|
|
|
@ -235,6 +235,123 @@ class SimpleTest(abc.LoaderTests):
|
|||
warnings.simplefilter('ignore', DeprecationWarning)
|
||||
loader.load_module('bad name')
|
||||
|
||||
@util.writes_bytecode_files
|
||||
def test_checked_hash_based_pyc(self):
|
||||
with util.create_modules('_temp') as mapping:
|
||||
source = mapping['_temp']
|
||||
pyc = self.util.cache_from_source(source)
|
||||
with open(source, 'wb') as fp:
|
||||
fp.write(b'state = "old"')
|
||||
os.utime(source, (50, 50))
|
||||
py_compile.compile(
|
||||
source,
|
||||
invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH,
|
||||
)
|
||||
loader = self.machinery.SourceFileLoader('_temp', source)
|
||||
mod = types.ModuleType('_temp')
|
||||
mod.__spec__ = self.util.spec_from_loader('_temp', loader)
|
||||
loader.exec_module(mod)
|
||||
self.assertEqual(mod.state, 'old')
|
||||
# Write a new source with the same mtime and size as before.
|
||||
with open(source, 'wb') as fp:
|
||||
fp.write(b'state = "new"')
|
||||
os.utime(source, (50, 50))
|
||||
loader.exec_module(mod)
|
||||
self.assertEqual(mod.state, 'new')
|
||||
with open(pyc, 'rb') as fp:
|
||||
data = fp.read()
|
||||
self.assertEqual(int.from_bytes(data[4:8], 'little'), 0b11)
|
||||
self.assertEqual(
|
||||
self.util.source_hash(b'state = "new"'),
|
||||
data[8:16],
|
||||
)
|
||||
|
||||
@util.writes_bytecode_files
|
||||
def test_overriden_checked_hash_based_pyc(self):
|
||||
with util.create_modules('_temp') as mapping, \
|
||||
unittest.mock.patch('_imp.check_hash_based_pycs', 'never'):
|
||||
source = mapping['_temp']
|
||||
pyc = self.util.cache_from_source(source)
|
||||
with open(source, 'wb') as fp:
|
||||
fp.write(b'state = "old"')
|
||||
os.utime(source, (50, 50))
|
||||
py_compile.compile(
|
||||
source,
|
||||
invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH,
|
||||
)
|
||||
loader = self.machinery.SourceFileLoader('_temp', source)
|
||||
mod = types.ModuleType('_temp')
|
||||
mod.__spec__ = self.util.spec_from_loader('_temp', loader)
|
||||
loader.exec_module(mod)
|
||||
self.assertEqual(mod.state, 'old')
|
||||
# Write a new source with the same mtime and size as before.
|
||||
with open(source, 'wb') as fp:
|
||||
fp.write(b'state = "new"')
|
||||
os.utime(source, (50, 50))
|
||||
loader.exec_module(mod)
|
||||
self.assertEqual(mod.state, 'old')
|
||||
|
||||
@util.writes_bytecode_files
|
||||
def test_unchecked_hash_based_pyc(self):
|
||||
with util.create_modules('_temp') as mapping:
|
||||
source = mapping['_temp']
|
||||
pyc = self.util.cache_from_source(source)
|
||||
with open(source, 'wb') as fp:
|
||||
fp.write(b'state = "old"')
|
||||
os.utime(source, (50, 50))
|
||||
py_compile.compile(
|
||||
source,
|
||||
invalidation_mode=py_compile.PycInvalidationMode.UNCHECKED_HASH,
|
||||
)
|
||||
loader = self.machinery.SourceFileLoader('_temp', source)
|
||||
mod = types.ModuleType('_temp')
|
||||
mod.__spec__ = self.util.spec_from_loader('_temp', loader)
|
||||
loader.exec_module(mod)
|
||||
self.assertEqual(mod.state, 'old')
|
||||
# Update the source file, which should be ignored.
|
||||
with open(source, 'wb') as fp:
|
||||
fp.write(b'state = "new"')
|
||||
loader.exec_module(mod)
|
||||
self.assertEqual(mod.state, 'old')
|
||||
with open(pyc, 'rb') as fp:
|
||||
data = fp.read()
|
||||
self.assertEqual(int.from_bytes(data[4:8], 'little'), 0b1)
|
||||
self.assertEqual(
|
||||
self.util.source_hash(b'state = "old"'),
|
||||
data[8:16],
|
||||
)
|
||||
|
||||
@util.writes_bytecode_files
|
||||
def test_overiden_unchecked_hash_based_pyc(self):
|
||||
with util.create_modules('_temp') as mapping, \
|
||||
unittest.mock.patch('_imp.check_hash_based_pycs', 'always'):
|
||||
source = mapping['_temp']
|
||||
pyc = self.util.cache_from_source(source)
|
||||
with open(source, 'wb') as fp:
|
||||
fp.write(b'state = "old"')
|
||||
os.utime(source, (50, 50))
|
||||
py_compile.compile(
|
||||
source,
|
||||
invalidation_mode=py_compile.PycInvalidationMode.UNCHECKED_HASH,
|
||||
)
|
||||
loader = self.machinery.SourceFileLoader('_temp', source)
|
||||
mod = types.ModuleType('_temp')
|
||||
mod.__spec__ = self.util.spec_from_loader('_temp', loader)
|
||||
loader.exec_module(mod)
|
||||
self.assertEqual(mod.state, 'old')
|
||||
# Update the source file, which should be ignored.
|
||||
with open(source, 'wb') as fp:
|
||||
fp.write(b'state = "new"')
|
||||
loader.exec_module(mod)
|
||||
self.assertEqual(mod.state, 'new')
|
||||
with open(pyc, 'rb') as fp:
|
||||
data = fp.read()
|
||||
self.assertEqual(int.from_bytes(data[4:8], 'little'), 0b1)
|
||||
self.assertEqual(
|
||||
self.util.source_hash(b'state = "new"'),
|
||||
data[8:16],
|
||||
)
|
||||
|
||||
|
||||
(Frozen_SimpleTest,
|
||||
Source_SimpleTest
|
||||
|
@ -247,15 +364,17 @@ class BadBytecodeTest:
|
|||
def import_(self, file, module_name):
|
||||
raise NotImplementedError
|
||||
|
||||
def manipulate_bytecode(self, name, mapping, manipulator, *,
|
||||
del_source=False):
|
||||
def manipulate_bytecode(self,
|
||||
name, mapping, manipulator, *,
|
||||
del_source=False,
|
||||
invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP):
|
||||
"""Manipulate the bytecode of a module by passing it into a callable
|
||||
that returns what to use as the new bytecode."""
|
||||
try:
|
||||
del sys.modules['_temp']
|
||||
except KeyError:
|
||||
pass
|
||||
py_compile.compile(mapping[name])
|
||||
py_compile.compile(mapping[name], invalidation_mode=invalidation_mode)
|
||||
if not del_source:
|
||||
bytecode_path = self.util.cache_from_source(mapping[name])
|
||||
else:
|
||||
|
@ -294,24 +413,51 @@ class BadBytecodeTest:
|
|||
del_source=del_source)
|
||||
test('_temp', mapping, bc_path)
|
||||
|
||||
def _test_partial_timestamp(self, test, *, del_source=False):
|
||||
def _test_partial_flags(self, test, *, del_source=False):
|
||||
with util.create_modules('_temp') as mapping:
|
||||
bc_path = self.manipulate_bytecode('_temp', mapping,
|
||||
lambda bc: bc[:7],
|
||||
del_source=del_source)
|
||||
lambda bc: bc[:7],
|
||||
del_source=del_source)
|
||||
test('_temp', mapping, bc_path)
|
||||
|
||||
def _test_partial_size(self, test, *, del_source=False):
|
||||
def _test_partial_hash(self, test, *, del_source=False):
|
||||
with util.create_modules('_temp') as mapping:
|
||||
bc_path = self.manipulate_bytecode(
|
||||
'_temp',
|
||||
mapping,
|
||||
lambda bc: bc[:13],
|
||||
del_source=del_source,
|
||||
invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH,
|
||||
)
|
||||
test('_temp', mapping, bc_path)
|
||||
with util.create_modules('_temp') as mapping:
|
||||
bc_path = self.manipulate_bytecode(
|
||||
'_temp',
|
||||
mapping,
|
||||
lambda bc: bc[:13],
|
||||
del_source=del_source,
|
||||
invalidation_mode=py_compile.PycInvalidationMode.UNCHECKED_HASH,
|
||||
)
|
||||
test('_temp', mapping, bc_path)
|
||||
|
||||
def _test_partial_timestamp(self, test, *, del_source=False):
|
||||
with util.create_modules('_temp') as mapping:
|
||||
bc_path = self.manipulate_bytecode('_temp', mapping,
|
||||
lambda bc: bc[:11],
|
||||
del_source=del_source)
|
||||
test('_temp', mapping, bc_path)
|
||||
|
||||
def _test_partial_size(self, test, *, del_source=False):
|
||||
with util.create_modules('_temp') as mapping:
|
||||
bc_path = self.manipulate_bytecode('_temp', mapping,
|
||||
lambda bc: bc[:15],
|
||||
del_source=del_source)
|
||||
test('_temp', mapping, bc_path)
|
||||
|
||||
def _test_no_marshal(self, *, del_source=False):
|
||||
with util.create_modules('_temp') as mapping:
|
||||
bc_path = self.manipulate_bytecode('_temp', mapping,
|
||||
lambda bc: bc[:12],
|
||||
lambda bc: bc[:16],
|
||||
del_source=del_source)
|
||||
file_path = mapping['_temp'] if not del_source else bc_path
|
||||
with self.assertRaises(EOFError):
|
||||
|
@ -320,7 +466,7 @@ class BadBytecodeTest:
|
|||
def _test_non_code_marshal(self, *, del_source=False):
|
||||
with util.create_modules('_temp') as mapping:
|
||||
bytecode_path = self.manipulate_bytecode('_temp', mapping,
|
||||
lambda bc: bc[:12] + marshal.dumps(b'abcd'),
|
||||
lambda bc: bc[:16] + marshal.dumps(b'abcd'),
|
||||
del_source=del_source)
|
||||
file_path = mapping['_temp'] if not del_source else bytecode_path
|
||||
with self.assertRaises(ImportError) as cm:
|
||||
|
@ -331,7 +477,7 @@ class BadBytecodeTest:
|
|||
def _test_bad_marshal(self, *, del_source=False):
|
||||
with util.create_modules('_temp') as mapping:
|
||||
bytecode_path = self.manipulate_bytecode('_temp', mapping,
|
||||
lambda bc: bc[:12] + b'<test>',
|
||||
lambda bc: bc[:16] + b'<test>',
|
||||
del_source=del_source)
|
||||
file_path = mapping['_temp'] if not del_source else bytecode_path
|
||||
with self.assertRaises(EOFError):
|
||||
|
@ -376,7 +522,7 @@ class SourceLoaderBadBytecodeTest:
|
|||
def test(name, mapping, bytecode_path):
|
||||
self.import_(mapping[name], name)
|
||||
with open(bytecode_path, 'rb') as file:
|
||||
self.assertGreater(len(file.read()), 12)
|
||||
self.assertGreater(len(file.read()), 16)
|
||||
|
||||
self._test_empty_file(test)
|
||||
|
||||
|
@ -384,7 +530,7 @@ class SourceLoaderBadBytecodeTest:
|
|||
def test(name, mapping, bytecode_path):
|
||||
self.import_(mapping[name], name)
|
||||
with open(bytecode_path, 'rb') as file:
|
||||
self.assertGreater(len(file.read()), 12)
|
||||
self.assertGreater(len(file.read()), 16)
|
||||
|
||||
self._test_partial_magic(test)
|
||||
|
||||
|
@ -395,7 +541,7 @@ class SourceLoaderBadBytecodeTest:
|
|||
def test(name, mapping, bytecode_path):
|
||||
self.import_(mapping[name], name)
|
||||
with open(bytecode_path, 'rb') as file:
|
||||
self.assertGreater(len(file.read()), 12)
|
||||
self.assertGreater(len(file.read()), 16)
|
||||
|
||||
self._test_magic_only(test)
|
||||
|
||||
|
@ -418,10 +564,30 @@ class SourceLoaderBadBytecodeTest:
|
|||
def test(name, mapping, bc_path):
|
||||
self.import_(mapping[name], name)
|
||||
with open(bc_path, 'rb') as file:
|
||||
self.assertGreater(len(file.read()), 12)
|
||||
self.assertGreater(len(file.read()), 16)
|
||||
|
||||
self._test_partial_timestamp(test)
|
||||
|
||||
@util.writes_bytecode_files
|
||||
def test_partial_flags(self):
|
||||
# When the flags is partial, regenerate the .pyc, else raise EOFError.
|
||||
def test(name, mapping, bc_path):
|
||||
self.import_(mapping[name], name)
|
||||
with open(bc_path, 'rb') as file:
|
||||
self.assertGreater(len(file.read()), 16)
|
||||
|
||||
self._test_partial_flags(test)
|
||||
|
||||
@util.writes_bytecode_files
|
||||
def test_partial_hash(self):
|
||||
# When the hash is partial, regenerate the .pyc, else raise EOFError.
|
||||
def test(name, mapping, bc_path):
|
||||
self.import_(mapping[name], name)
|
||||
with open(bc_path, 'rb') as file:
|
||||
self.assertGreater(len(file.read()), 16)
|
||||
|
||||
self._test_partial_hash(test)
|
||||
|
||||
@util.writes_bytecode_files
|
||||
def test_partial_size(self):
|
||||
# When the size is partial, regenerate the .pyc, else
|
||||
|
@ -429,7 +595,7 @@ class SourceLoaderBadBytecodeTest:
|
|||
def test(name, mapping, bc_path):
|
||||
self.import_(mapping[name], name)
|
||||
with open(bc_path, 'rb') as file:
|
||||
self.assertGreater(len(file.read()), 12)
|
||||
self.assertGreater(len(file.read()), 16)
|
||||
|
||||
self._test_partial_size(test)
|
||||
|
||||
|
@ -459,13 +625,13 @@ class SourceLoaderBadBytecodeTest:
|
|||
py_compile.compile(mapping['_temp'])
|
||||
bytecode_path = self.util.cache_from_source(mapping['_temp'])
|
||||
with open(bytecode_path, 'r+b') as bytecode_file:
|
||||
bytecode_file.seek(4)
|
||||
bytecode_file.seek(8)
|
||||
bytecode_file.write(zeros)
|
||||
self.import_(mapping['_temp'], '_temp')
|
||||
source_mtime = os.path.getmtime(mapping['_temp'])
|
||||
source_timestamp = self.importlib._w_long(source_mtime)
|
||||
with open(bytecode_path, 'rb') as bytecode_file:
|
||||
bytecode_file.seek(4)
|
||||
bytecode_file.seek(8)
|
||||
self.assertEqual(bytecode_file.read(4), source_timestamp)
|
||||
|
||||
# [bytecode read-only]
|
||||
|
@ -560,6 +726,20 @@ class SourcelessLoaderBadBytecodeTest:
|
|||
|
||||
self._test_partial_timestamp(test, del_source=True)
|
||||
|
||||
def test_partial_flags(self):
|
||||
def test(name, mapping, bytecode_path):
|
||||
with self.assertRaises(EOFError):
|
||||
self.import_(bytecode_path, name)
|
||||
|
||||
self._test_partial_flags(test, del_source=True)
|
||||
|
||||
def test_partial_hash(self):
|
||||
def test(name, mapping, bytecode_path):
|
||||
with self.assertRaises(EOFError):
|
||||
self.import_(bytecode_path, name)
|
||||
|
||||
self._test_partial_hash(test, del_source=True)
|
||||
|
||||
def test_partial_size(self):
|
||||
def test(name, mapping, bytecode_path):
|
||||
with self.assertRaises(EOFError):
|
||||
|
|
|
@ -673,6 +673,7 @@ class SourceLoader(SourceOnlyLoader):
|
|||
if magic is None:
|
||||
magic = self.util.MAGIC_NUMBER
|
||||
data = bytearray(magic)
|
||||
data.extend(self.init._w_long(0))
|
||||
data.extend(self.init._w_long(self.source_mtime))
|
||||
data.extend(self.init._w_long(self.source_size))
|
||||
code_object = compile(self.source, self.path, 'exec',
|
||||
|
@ -836,6 +837,7 @@ class SourceLoaderBytecodeTests(SourceLoaderTestHarness):
|
|||
if bytecode_written:
|
||||
self.assertIn(self.cached, self.loader.written)
|
||||
data = bytearray(self.util.MAGIC_NUMBER)
|
||||
data.extend(self.init._w_long(0))
|
||||
data.extend(self.init._w_long(self.loader.source_mtime))
|
||||
data.extend(self.init._w_long(self.loader.source_size))
|
||||
data.extend(marshal.dumps(code_object))
|
||||
|
|
|
@ -122,6 +122,24 @@ class PyCompileTests(unittest.TestCase):
|
|||
# Specifying optimized bytecode should lead to a path reflecting that.
|
||||
self.assertIn('opt-2', py_compile.compile(self.source_path, optimize=2))
|
||||
|
||||
def test_invalidation_mode(self):
|
||||
py_compile.compile(
|
||||
self.source_path,
|
||||
invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH,
|
||||
)
|
||||
with open(self.cache_path, 'rb') as fp:
|
||||
flags = importlib._bootstrap_external._classify_pyc(
|
||||
fp.read(), 'test', {})
|
||||
self.assertEqual(flags, 0b11)
|
||||
py_compile.compile(
|
||||
self.source_path,
|
||||
invalidation_mode=py_compile.PycInvalidationMode.UNCHECKED_HASH,
|
||||
)
|
||||
with open(self.cache_path, 'rb') as fp:
|
||||
flags = importlib._bootstrap_external._classify_pyc(
|
||||
fp.read(), 'test', {})
|
||||
self.assertEqual(flags, 0b1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
@ -40,7 +40,7 @@ def make_pyc(co, mtime, size):
|
|||
else:
|
||||
mtime = int(-0x100000000 + int(mtime))
|
||||
pyc = (importlib.util.MAGIC_NUMBER +
|
||||
struct.pack("<ii", int(mtime), size & 0xFFFFFFFF) + data)
|
||||
struct.pack("<iii", 0, int(mtime), size & 0xFFFFFFFF) + data)
|
||||
return pyc
|
||||
|
||||
def module_path_to_dotted_name(path):
|
||||
|
@ -187,6 +187,20 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
|
|||
TESTMOD + pyc_ext: (NOW, test_pyc)}
|
||||
self.doTest(pyc_ext, files, TESTMOD)
|
||||
|
||||
def testUncheckedHashBasedPyc(self):
|
||||
source = b"state = 'old'"
|
||||
source_hash = importlib.util.source_hash(source)
|
||||
bytecode = importlib._bootstrap_external._code_to_hash_pyc(
|
||||
compile(source, "???", "exec"),
|
||||
source_hash,
|
||||
False, # unchecked
|
||||
)
|
||||
files = {TESTMOD + ".py": (NOW, "state = 'new'"),
|
||||
TESTMOD + ".pyc": (NOW - 20, bytecode)}
|
||||
def check(mod):
|
||||
self.assertEqual(mod.state, 'old')
|
||||
self.doTest(None, files, TESTMOD, call=check)
|
||||
|
||||
def testEmptyPy(self):
|
||||
files = {TESTMOD + ".py": (NOW, "")}
|
||||
self.doTest(None, files, TESTMOD)
|
||||
|
@ -215,7 +229,7 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
|
|||
badtime_pyc = bytearray(test_pyc)
|
||||
# flip the second bit -- not the first as that one isn't stored in the
|
||||
# .py's mtime in the zip archive.
|
||||
badtime_pyc[7] ^= 0x02
|
||||
badtime_pyc[11] ^= 0x02
|
||||
files = {TESTMOD + ".py": (NOW, test_src),
|
||||
TESTMOD + pyc_ext: (NOW, badtime_pyc)}
|
||||
self.doTest(".py", files, TESTMOD)
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
Implement PEP 552 (Deterministic pycs). Python now supports invalidating
|
||||
bytecode cache files bashed on a source content hash rather than source
|
||||
last-modified time.
|
|
@ -72,6 +72,11 @@ python \- an interpreted, interactive, object-oriented programming language
|
|||
]
|
||||
.B \-?
|
||||
]
|
||||
.br
|
||||
[
|
||||
.B \--check-hash-based-pycs
|
||||
\'default\'|\'always\'|\'never\'
|
||||
]
|
||||
.br
|
||||
[
|
||||
.B \-c
|
||||
|
@ -123,6 +128,9 @@ Specify the command to execute (see next section).
|
|||
This terminates the option list (following options are passed as
|
||||
arguments to the command).
|
||||
.TP
|
||||
.BI "\-\-check-hash-based-pycs " mode
|
||||
Configure how Python evaluates the up-to-dateness of hash-based .pyc files.
|
||||
.TP
|
||||
.B \-d
|
||||
Turn on parser debugging output (for expert only, depending on
|
||||
compilation options).
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include "Python.h"
|
||||
#include "osdefs.h"
|
||||
#include "internal/import.h"
|
||||
#include "internal/pystate.h"
|
||||
|
||||
#include <locale.h>
|
||||
|
@ -61,6 +62,11 @@ static int orig_argc;
|
|||
|
||||
#define PROGRAM_OPTS BASE_OPTS
|
||||
|
||||
static const _PyOS_LongOption longoptions[] = {
|
||||
{L"check-hash-based-pycs", 1, 0},
|
||||
{NULL, 0, 0},
|
||||
};
|
||||
|
||||
/* Short usage message (with %s for argv0) */
|
||||
static const char usage_line[] =
|
||||
"usage: %ls [option] ... [-c cmd | -m mod | file | -] [arg] ...\n";
|
||||
|
@ -98,6 +104,8 @@ static const char usage_3[] = "\
|
|||
also PYTHONWARNINGS=arg\n\
|
||||
-x : skip first line of source, allowing use of non-Unix forms of #!cmd\n\
|
||||
-X opt : set implementation-specific option\n\
|
||||
--check-hash-based-pycs always|default|never:\n\
|
||||
control how Python invalidates hash-based .pyc files\n\
|
||||
";
|
||||
static const char usage_4[] = "\
|
||||
file : program read from script file\n\
|
||||
|
@ -393,6 +401,7 @@ typedef struct {
|
|||
int quiet_flag; /* Py_QuietFlag, -q */
|
||||
int skip_first_line; /* -x option */
|
||||
_Py_OptList xoptions; /* -X options */
|
||||
const char *check_hash_pycs_mode; /* --check-hash-based-pycs */
|
||||
#ifdef MS_WINDOWS
|
||||
int legacy_windows_fs_encoding; /* Py_LegacyWindowsFSEncodingFlag,
|
||||
PYTHONLEGACYWINDOWSFSENCODING */
|
||||
|
@ -577,7 +586,9 @@ pymain_parse_cmdline_impl(_PyMain *pymain)
|
|||
|
||||
_PyOS_ResetGetOpt();
|
||||
do {
|
||||
int c = _PyOS_GetOpt(pymain->argc, pymain->argv, PROGRAM_OPTS);
|
||||
int longindex = -1;
|
||||
int c = _PyOS_GetOpt(pymain->argc, pymain->argv, PROGRAM_OPTS,
|
||||
longoptions, &longindex);
|
||||
if (c == EOF) {
|
||||
break;
|
||||
}
|
||||
|
@ -608,6 +619,22 @@ pymain_parse_cmdline_impl(_PyMain *pymain)
|
|||
}
|
||||
|
||||
switch (c) {
|
||||
case 0:
|
||||
// Handle long option.
|
||||
assert(longindex == 0); // Only one long option now.
|
||||
if (!wcscmp(_PyOS_optarg, L"always")) {
|
||||
cmdline->check_hash_pycs_mode = "always";
|
||||
} else if (!wcscmp(_PyOS_optarg, L"never")) {
|
||||
cmdline->check_hash_pycs_mode = "never";
|
||||
} else if (!wcscmp(_PyOS_optarg, L"default")) {
|
||||
cmdline->check_hash_pycs_mode = "default";
|
||||
} else {
|
||||
fprintf(stderr, "--check-hash-based-pycs must be one of "
|
||||
"'default', 'always', or 'never'\n");
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
|
||||
case 'b':
|
||||
cmdline->bytes_warning++;
|
||||
break;
|
||||
|
@ -1085,6 +1112,8 @@ pymain_set_global_config(_PyMain *pymain)
|
|||
pymain_set_flag(&Py_UnbufferedStdioFlag, cmdline->use_unbuffered_io);
|
||||
pymain_set_flag(&Py_VerboseFlag, cmdline->verbosity);
|
||||
pymain_set_flag(&Py_QuietFlag, cmdline->quiet_flag);
|
||||
if (cmdline->check_hash_pycs_mode)
|
||||
_Py_CheckHashBasedPycsMode = cmdline->check_hash_pycs_mode;
|
||||
#ifdef MS_WINDOWS
|
||||
pymain_set_flag(&Py_LegacyWindowsFSEncodingFlag, cmdline->legacy_windows_fs_encoding);
|
||||
pymain_set_flag(&Py_LegacyWindowsStdioFlag, cmdline->legacy_windows_stdio);
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include "Python.h"
|
||||
#include "internal/import.h"
|
||||
#include "internal/pystate.h"
|
||||
#include "structmember.h"
|
||||
#include "osdefs.h"
|
||||
|
@ -1305,7 +1306,7 @@ unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime)
|
|||
unsigned char *buf = (unsigned char *)PyBytes_AsString(data);
|
||||
Py_ssize_t size = PyBytes_Size(data);
|
||||
|
||||
if (size < 12) {
|
||||
if (size < 16) {
|
||||
PyErr_SetString(ZipImportError,
|
||||
"bad pyc data");
|
||||
return NULL;
|
||||
|
@ -1319,7 +1320,16 @@ unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime)
|
|||
Py_RETURN_NONE; /* signal caller to try alternative */
|
||||
}
|
||||
|
||||
if (mtime != 0 && !eq_mtime(get_uint32(buf + 4), mtime)) {
|
||||
uint32_t flags = get_uint32(buf + 4);
|
||||
if (flags != 0) {
|
||||
// Hash-based pyc. We currently refuse to handle checked hash-based
|
||||
// pycs. We could validate hash-based pycs against the source, but it
|
||||
// seems likely that most people putting hash-based pycs in a zipfile
|
||||
// will use unchecked ones.
|
||||
if (strcmp(_Py_CheckHashBasedPycsMode, "never") &&
|
||||
(flags != 0x1 || !strcmp(_Py_CheckHashBasedPycsMode, "always")))
|
||||
Py_RETURN_NONE;
|
||||
} else if ((mtime != 0 && !eq_mtime(get_uint32(buf + 8), mtime))) {
|
||||
if (Py_VerboseFlag) {
|
||||
PySys_FormatStderr("# %R has bad mtime\n",
|
||||
pathname);
|
||||
|
@ -1329,7 +1339,7 @@ unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime)
|
|||
|
||||
/* XXX the pyc's size field is ignored; timestamp collisions are probably
|
||||
unimportant with zip files. */
|
||||
code = PyMarshal_ReadObjectFromString((char *)buf + 12, size - 12);
|
||||
code = PyMarshal_ReadObjectFromString((char *)buf + 16, size - 16);
|
||||
if (code == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -354,6 +354,41 @@ exit:
|
|||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_imp_source_hash__doc__,
|
||||
"source_hash($module, /, key, source)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _IMP_SOURCE_HASH_METHODDEF \
|
||||
{"source_hash", (PyCFunction)_imp_source_hash, METH_FASTCALL|METH_KEYWORDS, _imp_source_hash__doc__},
|
||||
|
||||
static PyObject *
|
||||
_imp_source_hash_impl(PyObject *module, long key, Py_buffer *source);
|
||||
|
||||
static PyObject *
|
||||
_imp_source_hash(PyObject *module, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
static const char * const _keywords[] = {"key", "source", NULL};
|
||||
static _PyArg_Parser _parser = {"ly*:source_hash", _keywords, 0};
|
||||
long key;
|
||||
Py_buffer source = {NULL, NULL};
|
||||
|
||||
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
|
||||
&key, &source)) {
|
||||
goto exit;
|
||||
}
|
||||
return_value = _imp_source_hash_impl(module, key, &source);
|
||||
|
||||
exit:
|
||||
/* Cleanup for source */
|
||||
if (source.obj) {
|
||||
PyBuffer_Release(&source);
|
||||
}
|
||||
|
||||
return return_value;
|
||||
}
|
||||
|
||||
#ifndef _IMP_CREATE_DYNAMIC_METHODDEF
|
||||
#define _IMP_CREATE_DYNAMIC_METHODDEF
|
||||
#endif /* !defined(_IMP_CREATE_DYNAMIC_METHODDEF) */
|
||||
|
@ -361,4 +396,4 @@ exit:
|
|||
#ifndef _IMP_EXEC_DYNAMIC_METHODDEF
|
||||
#define _IMP_EXEC_DYNAMIC_METHODDEF
|
||||
#endif /* !defined(_IMP_EXEC_DYNAMIC_METHODDEF) */
|
||||
/*[clinic end generated code: output=d068dd493e513604 input=a9049054013a1b77]*/
|
||||
/*[clinic end generated code: output=e8b2c0b0d0a75da8 input=a9049054013a1b77]*/
|
||||
|
|
|
@ -51,7 +51,8 @@ void _PyOS_ResetGetOpt(void)
|
|||
opt_ptr = L"";
|
||||
}
|
||||
|
||||
int _PyOS_GetOpt(int argc, wchar_t **argv, wchar_t *optstring)
|
||||
int _PyOS_GetOpt(int argc, wchar_t **argv, wchar_t *optstring,
|
||||
const _PyOS_LongOption *longopts, int *longindex)
|
||||
{
|
||||
wchar_t *ptr;
|
||||
wchar_t option;
|
||||
|
@ -86,13 +87,41 @@ int _PyOS_GetOpt(int argc, wchar_t **argv, wchar_t *optstring)
|
|||
return 'V';
|
||||
}
|
||||
|
||||
|
||||
opt_ptr = &argv[_PyOS_optind++][1];
|
||||
}
|
||||
|
||||
if ((option = *opt_ptr++) == L'\0')
|
||||
return -1;
|
||||
|
||||
if (option == L'-') {
|
||||
// Parse long option.
|
||||
if (*opt_ptr == L'\0') {
|
||||
fprintf(stderr, "expected long option\n");
|
||||
return -1;
|
||||
}
|
||||
*longindex = 0;
|
||||
const _PyOS_LongOption *opt;
|
||||
for (opt = &longopts[*longindex]; opt->name; opt = &longopts[++(*longindex)]) {
|
||||
if (!wcscmp(opt->name, opt_ptr))
|
||||
break;
|
||||
}
|
||||
if (!opt->name) {
|
||||
fprintf(stderr, "unknown option %ls\n", argv[_PyOS_optind - 1]);
|
||||
return '_';
|
||||
}
|
||||
opt_ptr = L"";
|
||||
if (!opt->has_arg) {
|
||||
return opt->val;
|
||||
}
|
||||
if (_PyOS_optind >= argc) {
|
||||
fprintf(stderr, "Argument expected for the %ls options\n",
|
||||
argv[_PyOS_optind - 1]);
|
||||
return '_';
|
||||
}
|
||||
_PyOS_optarg = argv[_PyOS_optind++];
|
||||
return opt->val;
|
||||
}
|
||||
|
||||
if (option == 'J') {
|
||||
if (_PyOS_opterr)
|
||||
fprintf(stderr, "-J is reserved for Jython\n");
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
|
||||
#include "Python-ast.h"
|
||||
#undef Yield /* undefine macro conflicting with winbase.h */
|
||||
#include "internal/hash.h"
|
||||
#include "internal/import.h"
|
||||
#include "internal/pystate.h"
|
||||
#include "errcode.h"
|
||||
#include "marshal.h"
|
||||
|
@ -2184,6 +2186,34 @@ _imp_exec_builtin_impl(PyObject *module, PyObject *mod)
|
|||
return exec_builtin_or_dynamic(mod);
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_imp.source_hash
|
||||
|
||||
key: long
|
||||
source: Py_buffer
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
_imp_source_hash_impl(PyObject *module, long key, Py_buffer *source)
|
||||
/*[clinic end generated code: output=edb292448cf399ea input=9aaad1e590089789]*/
|
||||
{
|
||||
uint64_t hash = _Py_KeyedHash((uint64_t)key, source->buf, source->len);
|
||||
#if !PY_LITTLE_ENDIAN
|
||||
// Force to little-endian. There really ought to be a succinct standard way
|
||||
// to do this.
|
||||
union {
|
||||
uint64_t x;
|
||||
unsigned char data[sizeof(uint64_t)];
|
||||
} pun;
|
||||
pun.x = hash;
|
||||
for (size_t i = 0; i < sizeof(pun.data); i++) {
|
||||
pun.data[sizeof(pun.data) - i - 1] = pun.data[i];
|
||||
}
|
||||
hash = pun.x;
|
||||
#endif
|
||||
return PyBytes_FromStringAndSize((const char *)&hash, sizeof(hash));
|
||||
}
|
||||
|
||||
|
||||
PyDoc_STRVAR(doc_imp,
|
||||
"(Extremely) low-level import machinery bits as used by importlib and imp.");
|
||||
|
@ -2203,6 +2233,7 @@ static PyMethodDef imp_methods[] = {
|
|||
_IMP_EXEC_DYNAMIC_METHODDEF
|
||||
_IMP_EXEC_BUILTIN_METHODDEF
|
||||
_IMP__FIX_CO_FILENAME_METHODDEF
|
||||
_IMP_SOURCE_HASH_METHODDEF
|
||||
{NULL, NULL} /* sentinel */
|
||||
};
|
||||
|
||||
|
@ -2219,6 +2250,8 @@ static struct PyModuleDef impmodule = {
|
|||
NULL
|
||||
};
|
||||
|
||||
const char *_Py_CheckHashBasedPycsMode = "default";
|
||||
|
||||
PyMODINIT_FUNC
|
||||
PyInit_imp(void)
|
||||
{
|
||||
|
@ -2230,6 +2263,15 @@ PyInit_imp(void)
|
|||
d = PyModule_GetDict(m);
|
||||
if (d == NULL)
|
||||
goto failure;
|
||||
PyObject *pyc_mode = PyUnicode_FromString(_Py_CheckHashBasedPycsMode);
|
||||
if (pyc_mode == NULL) {
|
||||
goto failure;
|
||||
}
|
||||
if (PyDict_SetItemString(d, "check_hash_based_pycs", pyc_mode) < 0) {
|
||||
Py_DECREF(pyc_mode);
|
||||
goto failure;
|
||||
}
|
||||
Py_DECREF(pyc_mode);
|
||||
|
||||
return m;
|
||||
failure:
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -284,7 +284,6 @@ static PyHash_FuncDef PyHash_Func = {fnv, "fnv", 8 * SIZEOF_PY_HASH_T,
|
|||
#endif /* Py_HASH_ALGORITHM == Py_HASH_FNV */
|
||||
|
||||
|
||||
#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
|
||||
/* **************************************************************************
|
||||
<MIT License>
|
||||
Copyright (c) 2013 Marek Majkowski <marek@popcount.org>
|
||||
|
@ -364,10 +363,10 @@ static PyHash_FuncDef PyHash_Func = {fnv, "fnv", 8 * SIZEOF_PY_HASH_T,
|
|||
HALF_ROUND(v2,v1,v0,v3,17,21);
|
||||
|
||||
|
||||
static Py_hash_t
|
||||
siphash24(const void *src, Py_ssize_t src_sz) {
|
||||
uint64_t k0 = _le64toh(_Py_HashSecret.siphash.k0);
|
||||
uint64_t k1 = _le64toh(_Py_HashSecret.siphash.k1);
|
||||
static uint64_t
|
||||
siphash24(uint64_t key0, uint64_t key1, const void *src, Py_ssize_t src_sz) {
|
||||
uint64_t k0 = _le64toh(key0);
|
||||
uint64_t k1 = _le64toh(key1);
|
||||
uint64_t b = (uint64_t)src_sz << 56;
|
||||
const uint64_t *in = (uint64_t*)src;
|
||||
|
||||
|
@ -412,12 +411,26 @@ siphash24(const void *src, Py_ssize_t src_sz) {
|
|||
|
||||
/* modified */
|
||||
t = (v0 ^ v1) ^ (v2 ^ v3);
|
||||
return (Py_hash_t)t;
|
||||
return t;
|
||||
}
|
||||
|
||||
static PyHash_FuncDef PyHash_Func = {siphash24, "siphash24", 64, 128};
|
||||
static Py_hash_t
|
||||
pysiphash(const void *src, Py_ssize_t src_sz) {
|
||||
return (Py_hash_t)siphash24(
|
||||
_Py_HashSecret.siphash.k0, _Py_HashSecret.siphash.k1,
|
||||
src, src_sz);
|
||||
}
|
||||
|
||||
#endif /* Py_HASH_ALGORITHM == Py_HASH_SIPHASH24 */
|
||||
uint64_t
|
||||
_Py_KeyedHash(uint64_t key, const void *src, Py_ssize_t src_sz)
|
||||
{
|
||||
return siphash24(key, 0, src, src_sz);
|
||||
}
|
||||
|
||||
|
||||
#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
|
||||
static PyHash_FuncDef PyHash_Func = {pysiphash, "siphash24", 64, 128};
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -1053,7 +1053,8 @@ run_pyc_file(FILE *fp, const char *filename, PyObject *globals,
|
|||
"Bad magic number in .pyc file");
|
||||
return NULL;
|
||||
}
|
||||
/* Skip mtime and size */
|
||||
/* Skip the rest of the header. */
|
||||
(void) PyMarshal_ReadLongFromFile(fp);
|
||||
(void) PyMarshal_ReadLongFromFile(fp);
|
||||
(void) PyMarshal_ReadLongFromFile(fp);
|
||||
if (PyErr_Occurred())
|
||||
|
|
Loading…
Reference in New Issue