Issue #13645: pyc files now contain the size of the corresponding source

code, to avoid timestamp collisions (especially on filesystems with a low
timestamp resolution) when checking for freshness of the bytecode.
This commit is contained in:
Antoine Pitrou 2012-01-13 18:52:16 +01:00
parent 1f918c1480
commit 5136ac0ca2
14 changed files with 167 additions and 49 deletions

View File

@ -239,11 +239,30 @@ are also provided to help in implementing the core ABCs.
optimization to speed up loading by removing the parsing step of Python's
compiler, and so no bytecode-specific API is exposed.
.. method:: path_stats(self, path)
Optional abstract method which returns a :class:`dict` containing
metadata about the specifed path. Supported dictionary keys are:
- ``'mtime'`` (mandatory): an integer or floating-point number
representing the modification time of the source code;
- ``'size'`` (optional): the size in bytes of the source code.
Any other keys in the dictionary are ignored, to allow for future
extensions.
.. versionadded:: 3.3
.. method:: path_mtime(self, path)
Optional abstract method which returns the modification time for the
specified path.
.. deprecated:: 3.3
This method is deprecated in favour of :meth:`path_stats`. You don't
have to implement it, but it is still available for compatibility
purposes.
.. method:: set_data(self, path, data)
Optional abstract method which writes the specified bytes to a file

View File

@ -331,25 +331,40 @@ class _LoaderBasics:
filename = self.get_filename(fullname).rpartition(path_sep)[2]
return filename.rsplit('.', 1)[0] == '__init__'
def _bytes_from_bytecode(self, fullname, data, source_mtime):
def _bytes_from_bytecode(self, fullname, data, source_stats):
"""Return the marshalled bytes from bytecode, verifying the magic
number and timestamp along the way.
number, timestamp and source size along the way.
If source_mtime is None then skip the timestamp check.
If source_stats is None then skip the timestamp check.
"""
magic = data[:4]
raw_timestamp = data[4:8]
raw_size = data[8:12]
if len(magic) != 4 or magic != imp.get_magic():
raise ImportError("bad magic number in {}".format(fullname))
elif len(raw_timestamp) != 4:
raise EOFError("bad timestamp in {}".format(fullname))
elif source_mtime is not None:
elif len(raw_size) != 4:
raise EOFError("bad size in {}".format(fullname))
if source_stats is not None:
try:
source_mtime = int(source_stats['mtime'])
except KeyError:
pass
else:
if marshal._r_long(raw_timestamp) != source_mtime:
raise ImportError("bytecode is stale for {}".format(fullname))
try:
source_size = source_stats['size'] & 0xFFFFFFFF
except KeyError:
pass
else:
if marshal._r_long(raw_size) != source_size:
raise ImportError("bytecode is stale for {}".format(fullname))
# Can't return the code object as errors from marshal loading need to
# propagate even when source is available.
return data[8:]
return data[12:]
@module_for_loader
def _load_module(self, module, *, sourceless=False):
@ -377,12 +392,21 @@ class SourceLoader(_LoaderBasics):
def path_mtime(self, path):
"""Optional method that returns the modification time (an int) for the
specified path, where path is a str.
Implementing this method allows the loader to read bytecode files.
"""
raise NotImplementedError
def path_stats(self, path):
"""Optional method returning a metadata dict for the specified path
to by the path (str).
Possible keys:
- 'mtime' (mandatory) is the numeric timestamp of last source
code modification;
- 'size' (optional) is the size in bytes of the source code.
Implementing this method allows the loader to read bytecode files.
"""
return {'mtime': self.path_mtime(path)}
def set_data(self, path, data):
"""Optional method which writes data (bytes) to a file path (a str).
@ -407,7 +431,7 @@ class SourceLoader(_LoaderBasics):
def get_code(self, fullname):
"""Concrete implementation of InspectLoader.get_code.
Reading of bytecode requires path_mtime to be implemented. To write
Reading of bytecode requires path_stats to be implemented. To write
bytecode, set_data must also be implemented.
"""
@ -416,10 +440,11 @@ class SourceLoader(_LoaderBasics):
source_mtime = None
if bytecode_path is not None:
try:
source_mtime = self.path_mtime(source_path)
st = self.path_stats(source_path)
except NotImplementedError:
pass
else:
source_mtime = int(st['mtime'])
try:
data = self.get_data(bytecode_path)
except IOError:
@ -427,7 +452,7 @@ class SourceLoader(_LoaderBasics):
else:
try:
bytes_data = self._bytes_from_bytecode(fullname, data,
source_mtime)
st)
except (ImportError, EOFError):
pass
else:
@ -448,6 +473,7 @@ class SourceLoader(_LoaderBasics):
# throw an exception.
data = bytearray(imp.get_magic())
data.extend(marshal._w_long(source_mtime))
data.extend(marshal._w_long(len(source_bytes)))
data.extend(marshal.dumps(code_object))
try:
self.set_data(bytecode_path, data)
@ -492,9 +518,10 @@ class _SourceFileLoader(_FileLoader, SourceLoader):
"""Concrete implementation of SourceLoader using the file system."""
def path_mtime(self, path):
"""Return the modification time for the path."""
return int(_os.stat(path).st_mtime)
def path_stats(self, path):
"""Return the metadat for the path."""
st = _os.stat(path)
return {'mtime': st.st_mtime, 'size': st.st_size}
def set_data(self, path, data):
"""Write bytes data to a file."""

View File

@ -123,7 +123,20 @@ class SourceLoader(_bootstrap.SourceLoader, ResourceLoader, ExecutionLoader):
def path_mtime(self, path):
"""Return the (int) modification time for the path (str)."""
if self.path_stats.__func__ is SourceLoader.path_stats:
raise NotImplementedError
return int(self.path_stats(path)['mtime'])
def path_stats(self, path):
"""Return a metadata dict for the source pointed to by the path (str).
Possible keys:
- 'mtime' (mandatory) is the numeric timestamp of last source
code modification;
- 'size' (optional) is the size in bytes of the source code.
"""
if self.path_mtime.__func__ is SourceLoader.path_mtime:
raise NotImplementedError
return {'mtime': self.path_mtime(path)}
def set_data(self, path, data):
"""Write the bytes to the path (if possible).

View File

@ -5,6 +5,7 @@ from .. import abc as testing_abc
from .. import util
from . import util as source_util
import collections
import imp
import inspect
import io
@ -40,8 +41,10 @@ class SourceLoaderMock(SourceOnlyLoaderMock):
def __init__(self, path, magic=imp.get_magic()):
super().__init__(path)
self.bytecode_path = imp.cache_from_source(self.path)
self.source_size = len(self.source)
data = bytearray(magic)
data.extend(marshal._w_long(self.source_mtime))
data.extend(marshal._w_long(self.source_size))
code_object = compile(self.source, self.path, 'exec',
dont_inherit=True)
data.extend(marshal.dumps(code_object))
@ -56,9 +59,9 @@ class SourceLoaderMock(SourceOnlyLoaderMock):
else:
raise IOError
def path_mtime(self, path):
def path_stats(self, path):
assert path == self.path
return self.source_mtime
return {'mtime': self.source_mtime, 'size': self.source_size}
def set_data(self, path, data):
self.written[path] = bytes(data)
@ -657,6 +660,7 @@ class SourceLoaderBytecodeTests(SourceLoaderTestHarness):
self.assertIn(self.cached, self.loader.written)
data = bytearray(imp.get_magic())
data.extend(marshal._w_long(self.loader.source_mtime))
data.extend(marshal._w_long(self.loader.source_size))
data.extend(marshal.dumps(code_object))
self.assertEqual(self.loader.written[self.cached], bytes(data))
@ -847,7 +851,7 @@ class AbstractMethodImplTests(unittest.TestCase):
# Required abstractmethods.
self.raises_NotImplementedError(ins, 'get_filename', 'get_data')
# Optional abstractmethods.
self.raises_NotImplementedError(ins,'path_mtime', 'set_data')
self.raises_NotImplementedError(ins,'path_stats', 'set_data')
def test_PyLoader(self):
self.raises_NotImplementedError(self.PyLoader(), 'source_path',

View File

@ -70,11 +70,6 @@ class SimpleTest(unittest.TestCase):
module_dict_id = id(module.__dict__)
with open(mapping['_temp'], 'w') as file:
file.write("testing_var = 42\n")
# For filesystems where the mtime is only to a second granularity,
# everything that has happened above can be too fast;
# force an mtime on the source that is guaranteed to be different
# than the original mtime.
loader.path_mtime = self.fake_mtime(loader.path_mtime)
module = loader.load_module('_temp')
self.assertTrue('testing_var' in module.__dict__,
"'testing_var' not in "
@ -190,10 +185,17 @@ class BadBytecodeTest(unittest.TestCase):
del_source=del_source)
test('_temp', mapping, bc_path)
def _test_partial_size(self, test, *, del_source=False):
with source_util.create_modules('_temp') as mapping:
bc_path = self.manipulate_bytecode('_temp', mapping,
lambda bc: bc[:11],
del_source=del_source)
test('_temp', mapping, bc_path)
def _test_no_marshal(self, *, del_source=False):
with source_util.create_modules('_temp') as mapping:
bc_path = self.manipulate_bytecode('_temp', mapping,
lambda bc: bc[:8],
lambda bc: bc[:12],
del_source=del_source)
file_path = mapping['_temp'] if not del_source else bc_path
with self.assertRaises(EOFError):
@ -202,7 +204,7 @@ class BadBytecodeTest(unittest.TestCase):
def _test_non_code_marshal(self, *, del_source=False):
with source_util.create_modules('_temp') as mapping:
bytecode_path = self.manipulate_bytecode('_temp', mapping,
lambda bc: bc[:8] + marshal.dumps(b'abcd'),
lambda bc: bc[:12] + marshal.dumps(b'abcd'),
del_source=del_source)
file_path = mapping['_temp'] if not del_source else bytecode_path
with self.assertRaises(ImportError):
@ -211,7 +213,7 @@ class BadBytecodeTest(unittest.TestCase):
def _test_bad_marshal(self, *, del_source=False):
with source_util.create_modules('_temp') as mapping:
bytecode_path = self.manipulate_bytecode('_temp', mapping,
lambda bc: bc[:8] + b'<test>',
lambda bc: bc[:12] + b'<test>',
del_source=del_source)
file_path = mapping['_temp'] if not del_source else bytecode_path
with self.assertRaises(EOFError):
@ -235,7 +237,7 @@ class SourceLoaderBadBytecodeTest(BadBytecodeTest):
def test(name, mapping, bytecode_path):
self.import_(mapping[name], name)
with open(bytecode_path, 'rb') as file:
self.assertGreater(len(file.read()), 8)
self.assertGreater(len(file.read()), 12)
self._test_empty_file(test)
@ -243,7 +245,7 @@ class SourceLoaderBadBytecodeTest(BadBytecodeTest):
def test(name, mapping, bytecode_path):
self.import_(mapping[name], name)
with open(bytecode_path, 'rb') as file:
self.assertGreater(len(file.read()), 8)
self.assertGreater(len(file.read()), 12)
self._test_partial_magic(test)
@ -254,7 +256,7 @@ class SourceLoaderBadBytecodeTest(BadBytecodeTest):
def test(name, mapping, bytecode_path):
self.import_(mapping[name], name)
with open(bytecode_path, 'rb') as file:
self.assertGreater(len(file.read()), 8)
self.assertGreater(len(file.read()), 12)
self._test_magic_only(test)
@ -276,10 +278,21 @@ class SourceLoaderBadBytecodeTest(BadBytecodeTest):
def test(name, mapping, bc_path):
self.import_(mapping[name], name)
with open(bc_path, 'rb') as file:
self.assertGreater(len(file.read()), 8)
self.assertGreater(len(file.read()), 12)
self._test_partial_timestamp(test)
@source_util.writes_bytecode_files
def test_partial_size(self):
# When the size is partial, regenerate the .pyc, else
# raise EOFError.
def test(name, mapping, bc_path):
self.import_(mapping[name], name)
with open(bc_path, 'rb') as file:
self.assertGreater(len(file.read()), 12)
self._test_partial_size(test)
@source_util.writes_bytecode_files
def test_no_marshal(self):
# When there is only the magic number and timestamp, raise EOFError.
@ -375,6 +388,13 @@ class SourcelessLoaderBadBytecodeTest(BadBytecodeTest):
self._test_partial_timestamp(test, del_source=True)
def test_partial_size(self):
def test(name, mapping, bytecode_path):
with self.assertRaises(EOFError):
self.import_(bytecode_path, name)
self._test_partial_size(test, del_source=True)
def test_no_marshal(self):
self._test_no_marshal(del_source=True)

View File

@ -21,7 +21,7 @@ def read_code(stream):
if magic != imp.get_magic():
return None
stream.read(4) # Skip timestamp
stream.read(8) # Skip timestamp and size
return marshal.load(stream)

View File

@ -110,9 +110,11 @@ def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1):
"""
with tokenize.open(file) as f:
try:
timestamp = int(os.fstat(f.fileno()).st_mtime)
st = os.fstat(f.fileno())
except AttributeError:
timestamp = int(os.stat(file).st_mtime)
st = os.stat(file)
timestamp = int(st.st_mtime)
size = st.st_size & 0xFFFFFFFF
codestring = f.read()
try:
codeobject = builtins.compile(codestring, dfile or file, 'exec',
@ -139,6 +141,7 @@ def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1):
with open(cfile, 'wb') as fc:
fc.write(b'\0\0\0\0')
wr_long(fc, timestamp)
wr_long(fc, size)
marshal.dump(codeobject, fc)
fc.flush()
fc.seek(0, 0)

View File

@ -380,7 +380,7 @@ func_filename = func.__code__.co_filename
def test_foreign_code(self):
py_compile.compile(self.file_name)
with open(self.compiled_name, "rb") as f:
header = f.read(8)
header = f.read(12)
code = marshal.load(f)
constants = list(code.co_consts)
foreign_code = test_main.__code__
@ -644,6 +644,16 @@ class PycacheTests(unittest.TestCase):
self.assertEqual(sys.modules['pep3147.foo'].__cached__,
os.path.join(os.curdir, foo_pyc))
def test_recompute_pyc_same_second(self):
# Even when the source file doesn't change timestamp, a change in
# source size is enough to trigger recomputation of the pyc file.
__import__(TESTFN)
unload(TESTFN)
with open(self.source, 'a') as fp:
print("x = 5", file=fp)
m = __import__(TESTFN)
self.assertEqual(m.x, 5)
class RelativeImportFromImportlibTests(test_relative_imports.RelativeImports):

View File

@ -19,7 +19,7 @@ import io
from traceback import extract_tb, extract_stack, print_tb
raise_src = 'def do_raise(): raise TypeError\n'
def make_pyc(co, mtime):
def make_pyc(co, mtime, size):
data = marshal.dumps(co)
if type(mtime) is type(0.0):
# Mac mtimes need a bit of special casing
@ -27,14 +27,14 @@ def make_pyc(co, mtime):
mtime = int(mtime)
else:
mtime = int(-0x100000000 + int(mtime))
pyc = imp.get_magic() + struct.pack("<i", int(mtime)) + data
pyc = imp.get_magic() + struct.pack("<ii", int(mtime), size & 0xFFFFFFFF) + data
return pyc
def module_path_to_dotted_name(path):
return path.replace(os.sep, '.')
NOW = time.time()
test_pyc = make_pyc(test_co, NOW)
test_pyc = make_pyc(test_co, NOW, len(test_src))
TESTMOD = "ziptestmodule"
@ -293,7 +293,7 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
return __file__
if __loader__.get_data("some.data") != b"some data":
raise AssertionError("bad data")\n"""
pyc = make_pyc(compile(src, "<???>", "exec"), NOW)
pyc = make_pyc(compile(src, "<???>", "exec"), NOW, len(src))
files = {TESTMOD + pyc_ext: (NOW, pyc),
"some.data": (NOW, "some data")}
self.doTest(pyc_ext, files, TESTMOD)
@ -313,7 +313,7 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
self.doTest(".py", files, TESTMOD, call=self.assertModuleSource)
def testGetCompiledSource(self):
pyc = make_pyc(compile(test_src, "<???>", "exec"), NOW)
pyc = make_pyc(compile(test_src, "<???>", "exec"), NOW, len(test_src))
files = {TESTMOD + ".py": (NOW, test_src),
TESTMOD + pyc_ext: (NOW, pyc)}
self.doTest(pyc_ext, files, TESTMOD, call=self.assertModuleSource)

View File

@ -10,6 +10,10 @@ What's New in Python 3.3 Alpha 1?
Core and Builtins
-----------------
- Issue #13645: pyc files now contain the size of the corresponding source
code, to avoid timestamp collisions (especially on filesystems with a low
timestamp resolution) when checking for freshness of the bytecode.
- PEP 380, Issue #11682: Add "yield from <x>" to support easy delegation to
subgenerators (initial patch by Greg Ewing, integration into 3.3 by
Renaud Blanch, Ryan Kelly, Zbigniew Jędrzejewski-Szmek and Nick Coghlan)

View File

@ -1033,7 +1033,9 @@ unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime)
return Py_None; /* signal caller to try alternative */
}
code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
/* XXX the pyc's size field is ignored; timestamp collisions are probably
unimportant with zip files. */
code = PyMarshal_ReadObjectFromString(buf + 12, size - 12);
if (code == NULL)
return NULL;
if (!PyCode_Check(code)) {

View File

@ -2800,7 +2800,7 @@ ast2obj_expr(void* _o)
if (!result) goto failed;
value = ast2obj_int(o->v.Yield.is_from);
if (!value) goto failed;
if (PyObject_SetAttrString(result, "is_from", value) == -1)
if (_PyObject_SetAttrId(result, &PyId_is_from, value) == -1)
goto failed;
Py_DECREF(value);
value = ast2obj_expr(o->v.Yield.value);

View File

@ -104,6 +104,7 @@ typedef unsigned short mode_t;
Python 3.2a2 3180 (add DELETE_DEREF)
Python 3.3a0 3190 __class__ super closure changed
Python 3.3a0 3200 (__qualname__ added)
3210 (added size modulo 2**32 to the pyc header)
*/
/* MAGIC must change whenever the bytecode emitted by the compiler may no
@ -116,7 +117,7 @@ typedef unsigned short mode_t;
#define STRIFY(name) QUOTE(name)
#define MAJOR STRIFY(PY_MAJOR_VERSION)
#define MINOR STRIFY(PY_MINOR_VERSION)
#define MAGIC (3200 | ((long)'\r'<<16) | ((long)'\n'<<24))
#define MAGIC (3210 | ((long)'\r'<<16) | ((long)'\n'<<24))
#define TAG "cpython-" MAJOR MINOR;
#define CACHEDIR "__pycache__"
/* Current magic word and string tag as globals. */
@ -1071,11 +1072,12 @@ make_source_pathname(PyObject *path)
Doesn't set an exception. */
static FILE *
check_compiled_module(PyObject *pathname, time_t mtime, PyObject *cpathname)
check_compiled_module(PyObject *pathname, struct stat *srcstat, PyObject *cpathname)
{
FILE *fp;
long magic;
long pyc_mtime;
long pyc_size;
fp = _Py_fopen(cpathname, "rb");
if (fp == NULL)
@ -1088,12 +1090,19 @@ check_compiled_module(PyObject *pathname, time_t mtime, PyObject *cpathname)
return NULL;
}
pyc_mtime = PyMarshal_ReadLongFromFile(fp);
if (pyc_mtime != mtime) {
if (pyc_mtime != srcstat->st_mtime) {
if (Py_VerboseFlag)
PySys_FormatStderr("# %R has bad mtime\n", cpathname);
fclose(fp);
return NULL;
}
pyc_size = PyMarshal_ReadLongFromFile(fp);
if (pyc_size != (srcstat->st_size & 0xFFFFFFFF)) {
if (Py_VerboseFlag)
PySys_FormatStderr("# %R has bad size\n", cpathname);
fclose(fp);
return NULL;
}
if (Py_VerboseFlag)
PySys_FormatStderr("# %R matches %R\n", cpathname, pathname);
return fp;
@ -1136,6 +1145,8 @@ load_compiled_module(PyObject *name, PyObject *cpathname, FILE *fp)
"Bad magic number in %R", cpathname);
return NULL;
}
/* Skip mtime and size */
(void) PyMarshal_ReadLongFromFile(fp);
(void) PyMarshal_ReadLongFromFile(fp);
co = read_compiled_module(cpathname, fp);
if (co == NULL)
@ -1196,6 +1207,7 @@ write_compiled_module(PyCodeObject *co, PyObject *cpathname,
Py_UCS4 *cpathname_ucs4;
FILE *fp;
time_t mtime = srcstat->st_mtime;
long size = srcstat->st_size & 0xFFFFFFFF;
PyObject *cpathname_tmp;
#ifdef MS_WINDOWS /* since Windows uses different permissions */
mode_t mode = srcstat->st_mode & ~S_IEXEC;
@ -1326,14 +1338,16 @@ write_compiled_module(PyCodeObject *co, PyObject *cpathname,
return;
}
PyMarshal_WriteLongToFile(pyc_magic, fp, Py_MARSHAL_VERSION);
/* First write a 0 for mtime */
/* First write a 0 for mtime and size */
PyMarshal_WriteLongToFile(0L, fp, Py_MARSHAL_VERSION);
PyMarshal_WriteLongToFile(0L, fp, Py_MARSHAL_VERSION);
PyMarshal_WriteObjectToFile((PyObject *)co, fp, Py_MARSHAL_VERSION);
fflush(fp);
/* Now write the true mtime */
/* Now write the true mtime and size */
fseek(fp, 4L, 0);
assert(mtime < LONG_MAX);
PyMarshal_WriteLongToFile((long)mtime, fp, Py_MARSHAL_VERSION);
PyMarshal_WriteLongToFile(size, fp, Py_MARSHAL_VERSION);
if (fflush(fp) != 0 || ferror(fp)) {
if (Py_VerboseFlag)
PySys_FormatStderr("# can't write %R\n", cpathname);
@ -1478,7 +1492,7 @@ load_source_module(PyObject *name, PyObject *pathname, FILE *fp)
cpathname = make_compiled_pathname(pathname, !Py_OptimizeFlag);
if (cpathname != NULL)
fpc = check_compiled_module(pathname, st.st_mtime, cpathname);
fpc = check_compiled_module(pathname, &st, cpathname);
else
fpc = NULL;

View File

@ -1844,6 +1844,8 @@ run_pyc_file(FILE *fp, const char *filename, PyObject *globals,
"Bad magic number in .pyc file");
return NULL;
}
/* Skip mtime and size */
(void) PyMarshal_ReadLongFromFile(fp);
(void) PyMarshal_ReadLongFromFile(fp);
v = PyMarshal_ReadLastObjectFromFile(fp);
fclose(fp);