mirror of https://github.com/python/cpython
gh-94526: getpath_dirname() no longer encodes the path (#97645)
Fix the Python path configuration used to initialized sys.path at Python startup. Paths are no longer encoded to UTF-8/strict to avoid encoding errors if it contains surrogate characters (bytes paths are decoded with the surrogateescape error handler). getpath_basename() and getpath_dirname() functions no longer encode the path to UTF-8/strict, but work directly on Unicode strings. These functions now use PyUnicode_FindChar() and PyUnicode_Substring() on the Unicode path, rather than strrchr() on the encoded bytes string.
This commit is contained in:
parent
ff54dd96cb
commit
9f2f1dd131
|
@ -0,0 +1,4 @@
|
|||
Fix the Python path configuration used to initialized :data:`sys.path` at
|
||||
Python startup. Paths are no longer encoded to UTF-8/strict to avoid encoding
|
||||
errors if it contains surrogate characters (bytes paths are decoded with the
|
||||
surrogateescape error handler). Patch by Victor Stinner.
|
|
@ -82,27 +82,32 @@ getpath_abspath(PyObject *Py_UNUSED(self), PyObject *args)
|
|||
static PyObject *
|
||||
getpath_basename(PyObject *Py_UNUSED(self), PyObject *args)
|
||||
{
|
||||
const char *path;
|
||||
if (!PyArg_ParseTuple(args, "s", &path)) {
|
||||
PyObject *path;
|
||||
if (!PyArg_ParseTuple(args, "U", &path)) {
|
||||
return NULL;
|
||||
}
|
||||
const char *name = strrchr(path, SEP);
|
||||
return PyUnicode_FromString(name ? name + 1 : path);
|
||||
Py_ssize_t end = PyUnicode_GET_LENGTH(path);
|
||||
Py_ssize_t pos = PyUnicode_FindChar(path, SEP, 0, end, -1);
|
||||
if (pos < 0) {
|
||||
return Py_NewRef(path);
|
||||
}
|
||||
return PyUnicode_Substring(path, pos + 1, end);
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
getpath_dirname(PyObject *Py_UNUSED(self), PyObject *args)
|
||||
{
|
||||
const char *path;
|
||||
if (!PyArg_ParseTuple(args, "s", &path)) {
|
||||
PyObject *path;
|
||||
if (!PyArg_ParseTuple(args, "U", &path)) {
|
||||
return NULL;
|
||||
}
|
||||
const char *name = strrchr(path, SEP);
|
||||
if (!name) {
|
||||
Py_ssize_t end = PyUnicode_GET_LENGTH(path);
|
||||
Py_ssize_t pos = PyUnicode_FindChar(path, SEP, 0, end, -1);
|
||||
if (pos < 0) {
|
||||
return PyUnicode_FromStringAndSize(NULL, 0);
|
||||
}
|
||||
return PyUnicode_FromStringAndSize(path, (name - path));
|
||||
return PyUnicode_Substring(path, 0, pos);
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue