Issues #15169, #14599: Make PyImport_ExecCodeModuleWithPathnames() use

Lib/imp.py for imp.source_from_cache() instead of its own C version.

Also change PyImport_ExecCodeModuleObject() to not infer the source
path from the bytecode path like
PyImport_ExecCodeModuleWithPathnames() does. This makes the function
less magical.

This also has the side-effect of removing all uses of MAXPATHLEN in
Python/import.c which can cause failures on really long filenames.
This commit is contained in:
Brett Cannon 2012-07-13 13:57:03 -04:00
parent d104eef118
commit a6473f9cfd
6 changed files with 4086 additions and 4065 deletions

View File

@ -163,9 +163,14 @@ Importing Modules
.. c:function:: PyObject* PyImport_ExecCodeModuleWithPathnames(char *name, PyObject *co, char *pathname, char *cpathname)
Like :c:func:`PyImport_ExecCodeModuleObject`, but *name*, *pathname* and
*cpathname* are UTF-8 encoded strings.
*cpathname* are UTF-8 encoded strings. Attempts are also made to figure out
what the value for *pathname* should be from *cpathname* if the former is
set to ``NULL``.
.. versionadded:: 3.2
.. versionchanged:: 3.3
Uses :func:`imp.source_from_cache()` in calculating the source path if
only the bytecode path is provided.
.. c:function:: long PyImport_GetMagicNumber()

View File

@ -13,7 +13,7 @@ from _imp import (lock_held, acquire_lock, release_lock,
# Directly exposed by this module
from importlib._bootstrap import new_module
from importlib._bootstrap import cache_from_source
from importlib._bootstrap import cache_from_source, source_from_cache
from importlib import _bootstrap
@ -58,29 +58,6 @@ def get_suffixes():
return extensions + source + bytecode
def source_from_cache(path):
"""Given the path to a .pyc./.pyo file, return the path to its .py file.
The .pyc/.pyo file does not need to exist; this simply returns the path to
the .py file calculated to correspond to the .pyc/.pyo file. If path does
not conform to PEP 3147 format, ValueError will be raised. If
sys.implementation.cache_tag is None then NotImplementedError is raised.
"""
if sys.implementation.cache_tag is None:
raise NotImplementedError('sys.implementation.cache_tag is None')
head, pycache_filename = os.path.split(path)
head, pycache = os.path.split(head)
if pycache != _bootstrap._PYCACHE:
raise ValueError('{} not bottom-level directory in '
'{!r}'.format(_bootstrap._PYCACHE, path))
if pycache_filename.count('.') != 2:
raise ValueError('expected only 2 dots in '
'{!r}'.format(pycache_filename))
base_filename = pycache_filename.partition('.')[0]
return os.path.join(head, base_filename + machinery.SOURCE_SUFFIXES[0])
class NullImporter:
"""Null import object."""

View File

@ -428,6 +428,50 @@ def cache_from_source(path, debug_override=None):
return _path_join(head, _PYCACHE, filename)
def source_from_cache(path):
"""Given the path to a .pyc./.pyo file, return the path to its .py file.
The .pyc/.pyo file does not need to exist; this simply returns the path to
the .py file calculated to correspond to the .pyc/.pyo file. If path does
not conform to PEP 3147 format, ValueError will be raised. If
sys.implementation.cache_tag is None then NotImplementedError is raised.
"""
if sys.implementation.cache_tag is None:
raise NotImplementedError('sys.implementation.cache_tag is None')
head, pycache_filename = _path_split(path)
head, pycache = _path_split(head)
if pycache != _PYCACHE:
raise ValueError('{} not bottom-level directory in '
'{!r}'.format(_PYCACHE, path))
if pycache_filename.count('.') != 2:
raise ValueError('expected only 2 dots in '
'{!r}'.format(pycache_filename))
base_filename = pycache_filename.partition('.')[0]
return _path_join(head, base_filename + SOURCE_SUFFIXES[0])
def _get_sourcefile(bytecode_path):
"""Convert a bytecode file path to a source path (if possible).
This function exists purely for backwards-compatibility for
PyImport_ExecCodeModuleWithFilenames() in the C API.
"""
if len(bytecode_path) == 0:
return None
rest, _, extension = bytecode_path.rparition('.')
if not rest or extension.lower()[-3:-1] != '.py':
return bytecode_path
try:
source_path = source_from_cache(bytecode_path)
except (NotImplementedError, ValueError):
source_path = bytcode_path[-1:]
return source_path if _path_isfile(source_stats) else bytecode_path
def _verbose_message(message, *args):
"""Print the message to stderr if -v/PYTHONVERBOSE is turned on."""
if sys.flags.verbose:

View File

@ -92,6 +92,15 @@ Library
- Issue 10924: Fixed mksalt() to use a RNG that is suitable for cryptographic
purpose.
C API
-----
- Issues #15169, #14599: Strip out the C implementation of
imp.source_from_cache() used by PyImport_ExecCodeModuleWithPathnames() and
used the Python code instead. Leads to PyImport_ExecCodeModuleObject() to not
try to infer the source path from the bytecode path as
PyImport_ExecCodeModuleWithPathnames() does.
Extension Modules
-----------------

View File

@ -630,8 +630,6 @@ remove_module(PyObject *name)
"sys.modules failed");
}
static PyObject * get_sourcefile(PyObject *filename);
static PyObject *make_source_pathname(PyObject *pathname);
/* Execute a code object in a module and return the module object
* WITH INCREMENTED REFERENCE COUNT. If an error occurs, name is
@ -668,18 +666,37 @@ PyImport_ExecCodeModuleWithPathnames(char *name, PyObject *co, char *pathname,
if (nameobj == NULL)
return NULL;
if (pathname != NULL) {
pathobj = PyUnicode_DecodeFSDefault(pathname);
if (pathobj == NULL)
goto error;
} else
pathobj = NULL;
if (cpathname != NULL) {
cpathobj = PyUnicode_DecodeFSDefault(cpathname);
if (cpathobj == NULL)
goto error;
} else
}
else
cpathobj = NULL;
if (pathname != NULL) {
pathobj = PyUnicode_DecodeFSDefault(pathname);
if (pathobj == NULL)
goto error;
}
else if (cpathobj != NULL) {
PyInterpreterState *interp = PyThreadState_GET()->interp;
_Py_IDENTIFIER(_get_sourcefile);
if (interp == NULL) {
Py_FatalError("PyImport_ExecCodeModuleWithPathnames: "
"no interpreter!");
}
pathobj = _PyObject_CallMethodObjIdArgs(interp->importlib,
&PyId__get_sourcefile, cpathobj,
NULL);
if (pathobj == NULL)
PyErr_Clear();
}
else
pathobj = NULL;
m = PyImport_ExecCodeModuleObject(nameobj, co, pathobj, cpathobj);
error:
Py_DECREF(nameobj);
@ -706,18 +723,13 @@ PyImport_ExecCodeModuleObject(PyObject *name, PyObject *co, PyObject *pathname,
PyEval_GetBuiltins()) != 0)
goto error;
}
/* Remember the filename as the __file__ attribute */
if (pathname != NULL) {
v = get_sourcefile(pathname);
if (v == NULL)
PyErr_Clear();
v = pathname;
}
else
v = NULL;
if (v == NULL) {
else {
v = ((PyCodeObject *)co)->co_filename;
Py_INCREF(v);
}
Py_INCREF(v);
if (PyDict_SetItemString(d, "__file__", v) != 0)
PyErr_Clear(); /* Not important enough to report */
Py_DECREF(v);
@ -752,100 +764,6 @@ PyImport_ExecCodeModuleObject(PyObject *name, PyObject *co, PyObject *pathname,
}
/* Like rightmost_sep, but operate on unicode objects. */
static Py_ssize_t
rightmost_sep_obj(PyObject* o, Py_ssize_t start, Py_ssize_t end)
{
Py_ssize_t found, i;
Py_UCS4 c;
for (found = -1, i = start; i < end; i++) {
c = PyUnicode_READ_CHAR(o, i);
if (c == SEP
#ifdef ALTSEP
|| c == ALTSEP
#endif
)
{
found = i;
}
}
return found;
}
/* Given a pathname to a Python byte compiled file, return the path to the
source file, if the path matches the PEP 3147 format. This does not check
for any file existence, however, if the pyc file name does not match PEP
3147 style, NULL is returned. buf must be at least as big as pathname;
the resulting path will always be shorter.
(...)/__pycache__/foo.<tag>.pyc -> (...)/foo.py */
static PyObject*
make_source_pathname(PyObject *path)
{
Py_ssize_t left, right, dot0, dot1, len;
Py_ssize_t i, j;
PyObject *result;
int kind;
void *data;
len = PyUnicode_GET_LENGTH(path);
if (len > MAXPATHLEN)
return NULL;
/* Look back two slashes from the end. In between these two slashes
must be the string __pycache__ or this is not a PEP 3147 style
path. It's possible for there to be only one slash.
*/
right = rightmost_sep_obj(path, 0, len);
if (right == -1)
return NULL;
left = rightmost_sep_obj(path, 0, right);
if (left == -1)
left = 0;
else
left++;
if (right-left != sizeof(CACHEDIR)-1)
return NULL;
for (i = 0; i < sizeof(CACHEDIR)-1; i++)
if (PyUnicode_READ_CHAR(path, left+i) != CACHEDIR[i])
return NULL;
/* Now verify that the path component to the right of the last slash
has two dots in it.
*/
dot0 = PyUnicode_FindChar(path, '.', right+1, len, 1);
if (dot0 < 0)
return NULL;
dot1 = PyUnicode_FindChar(path, '.', dot0+1, len, 1);
if (dot1 < 0)
return NULL;
/* Too many dots? */
if (PyUnicode_FindChar(path, '.', dot1+1, len, 1) != -1)
return NULL;
/* This is a PEP 3147 path. Start by copying everything from the
start of pathname up to and including the leftmost slash. Then
copy the file's basename, removing the magic tag and adding a .py
suffix.
*/
result = PyUnicode_New(left + (dot0-right) + 2,
PyUnicode_MAX_CHAR_VALUE(path));
if (!result)
return NULL;
kind = PyUnicode_KIND(result);
data = PyUnicode_DATA(result);
PyUnicode_CopyCharacters(result, 0, path, 0, (i = left));
PyUnicode_CopyCharacters(result, left, path, right+1,
(j = dot0-right));
PyUnicode_WRITE(kind, data, i+j, 'p');
PyUnicode_WRITE(kind, data, i+j+1, 'y');
assert(_PyUnicode_CheckConsistency(result, 1));
return result;
}
static void
update_code_filenames(PyCodeObject *co, PyObject *oldname, PyObject *newname)
{
@ -911,61 +829,6 @@ imp_fix_co_filename(PyObject *self, PyObject *args)
}
/* Get source file -> unicode or None
* Returns the path to the py file if available, else the given path
*/
static PyObject *
get_sourcefile(PyObject *filename)
{
Py_ssize_t len;
PyObject *py;
struct stat statbuf;
int err;
void *data;
unsigned int kind;
len = PyUnicode_GET_LENGTH(filename);
if (len == 0)
Py_RETURN_NONE;
/* don't match *.pyc or *.pyo? */
data = PyUnicode_DATA(filename);
kind = PyUnicode_KIND(filename);
if (len < 5
|| PyUnicode_READ(kind, data, len-4) != '.'
|| (PyUnicode_READ(kind, data, len-3) != 'p'
&& PyUnicode_READ(kind, data, len-3) != 'P')
|| (PyUnicode_READ(kind, data, len-2) != 'y'
&& PyUnicode_READ(kind, data, len-2) != 'Y'))
goto unchanged;
/* Start by trying to turn PEP 3147 path into source path. If that
* fails, just chop off the trailing character, i.e. legacy pyc path
* to py.
*/
py = make_source_pathname(filename);
if (py == NULL) {
PyErr_Clear();
py = PyUnicode_Substring(filename, 0, len - 1);
}
if (py == NULL)
goto error;
err = _Py_stat(py, &statbuf);
if (err == -2)
goto error;
if (err == 0 && S_ISREG(statbuf.st_mode))
return py;
Py_DECREF(py);
goto unchanged;
error:
PyErr_Clear();
unchanged:
Py_INCREF(filename);
return filename;
}
/* Forward */
static struct _frozen * find_frozen(PyObject *);

File diff suppressed because it is too large Load Diff