bpo-37412: os.getcwdb() now uses UTF-8 on Windows (GH-14396)
The os.getcwdb() function now uses the UTF-8 encoding on Windows, rather than the ANSI code page: see PEP 529 for the rationale. The function is no longer deprecated on Windows. os.getcwd() and os.getcwdb() now detect integer overflow on memory allocations. On Unix, these functions properly report MemoryError on memory allocation failure.
This commit is contained in:
parent
c6a2320e87
commit
689830ee62
|
@ -1730,6 +1730,11 @@ features:
|
||||||
|
|
||||||
Return a bytestring representing the current working directory.
|
Return a bytestring representing the current working directory.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.8
|
||||||
|
The function now uses the UTF-8 encoding on Windows, rather than the ANSI
|
||||||
|
code page: see :pep:`529` for the rationale. The function is no longer
|
||||||
|
deprecated on Windows.
|
||||||
|
|
||||||
|
|
||||||
.. function:: lchflags(path, flags)
|
.. function:: lchflags(path, flags)
|
||||||
|
|
||||||
|
|
|
@ -1231,6 +1231,11 @@ Changes in Python behavior
|
||||||
Changes in the Python API
|
Changes in the Python API
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
||||||
|
* The :func:`os.getcwdb` function now uses the UTF-8 encoding on Windows,
|
||||||
|
rather than the ANSI code page: see :pep:`529` for the rationale. The
|
||||||
|
function is no longer deprecated on Windows.
|
||||||
|
(Contributed by Victor Stinner in :issue:`37412`.)
|
||||||
|
|
||||||
* :class:`subprocess.Popen` can now use :func:`os.posix_spawn` in some cases
|
* :class:`subprocess.Popen` can now use :func:`os.posix_spawn` in some cases
|
||||||
for better performance. On Windows Subsystem for Linux and QEMU User
|
for better performance. On Windows Subsystem for Linux and QEMU User
|
||||||
Emulation, Popen constructor using :func:`os.posix_spawn` no longer raise an
|
Emulation, Popen constructor using :func:`os.posix_spawn` no longer raise an
|
||||||
|
|
|
@ -82,6 +82,17 @@ def create_file(filename, content=b'content'):
|
||||||
fp.write(content)
|
fp.write(content)
|
||||||
|
|
||||||
|
|
||||||
|
class MiscTests(unittest.TestCase):
|
||||||
|
def test_getcwd(self):
|
||||||
|
cwd = os.getcwd()
|
||||||
|
self.assertIsInstance(cwd, str)
|
||||||
|
|
||||||
|
def test_getcwdb(self):
|
||||||
|
cwd = os.getcwdb()
|
||||||
|
self.assertIsInstance(cwd, bytes)
|
||||||
|
self.assertEqual(os.fsdecode(cwd), os.getcwd())
|
||||||
|
|
||||||
|
|
||||||
# Tests creating TESTFN
|
# Tests creating TESTFN
|
||||||
class FileTests(unittest.TestCase):
|
class FileTests(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
The :func:`os.getcwdb` function now uses the UTF-8 encoding on Windows,
|
||||||
|
rather than the ANSI code page: see :pep:`529` for the rationale. The function
|
||||||
|
is no longer deprecated on Windows.
|
|
@ -506,17 +506,6 @@ void _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *,
|
||||||
ULONG, struct _Py_stat_struct *);
|
ULONG, struct _Py_stat_struct *);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef MS_WINDOWS
|
|
||||||
static int
|
|
||||||
win32_warn_bytes_api()
|
|
||||||
{
|
|
||||||
return PyErr_WarnEx(PyExc_DeprecationWarning,
|
|
||||||
"The Windows bytes API has been deprecated, "
|
|
||||||
"use Unicode filenames instead",
|
|
||||||
1);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef MS_WINDOWS
|
#ifndef MS_WINDOWS
|
||||||
PyObject *
|
PyObject *
|
||||||
|
@ -3334,83 +3323,99 @@ os_lchown_impl(PyObject *module, path_t *path, uid_t uid, gid_t gid)
|
||||||
static PyObject *
|
static PyObject *
|
||||||
posix_getcwd(int use_bytes)
|
posix_getcwd(int use_bytes)
|
||||||
{
|
{
|
||||||
char *buf, *tmpbuf;
|
|
||||||
char *cwd;
|
|
||||||
const size_t chunk = 1024;
|
|
||||||
size_t buflen = 0;
|
|
||||||
PyObject *obj;
|
|
||||||
|
|
||||||
#ifdef MS_WINDOWS
|
#ifdef MS_WINDOWS
|
||||||
if (!use_bytes) {
|
wchar_t wbuf[MAXPATHLEN];
|
||||||
wchar_t wbuf[MAXPATHLEN];
|
wchar_t *wbuf2 = wbuf;
|
||||||
wchar_t *wbuf2 = wbuf;
|
DWORD len;
|
||||||
PyObject *resobj;
|
|
||||||
DWORD len;
|
Py_BEGIN_ALLOW_THREADS
|
||||||
Py_BEGIN_ALLOW_THREADS
|
len = GetCurrentDirectoryW(Py_ARRAY_LENGTH(wbuf), wbuf);
|
||||||
len = GetCurrentDirectoryW(Py_ARRAY_LENGTH(wbuf), wbuf);
|
/* If the buffer is large enough, len does not include the
|
||||||
/* If the buffer is large enough, len does not include the
|
terminating \0. If the buffer is too small, len includes
|
||||||
terminating \0. If the buffer is too small, len includes
|
the space needed for the terminator. */
|
||||||
the space needed for the terminator. */
|
if (len >= Py_ARRAY_LENGTH(wbuf)) {
|
||||||
if (len >= Py_ARRAY_LENGTH(wbuf)) {
|
if (len >= PY_SSIZE_T_MAX / sizeof(wchar_t)) {
|
||||||
wbuf2 = PyMem_RawMalloc(len * sizeof(wchar_t));
|
wbuf2 = PyMem_RawMalloc(len * sizeof(wchar_t));
|
||||||
if (wbuf2)
|
|
||||||
len = GetCurrentDirectoryW(len, wbuf2);
|
|
||||||
}
|
}
|
||||||
Py_END_ALLOW_THREADS
|
else {
|
||||||
if (!wbuf2) {
|
wbuf2 = NULL;
|
||||||
PyErr_NoMemory();
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
if (!len) {
|
if (wbuf2) {
|
||||||
if (wbuf2 != wbuf)
|
len = GetCurrentDirectoryW(len, wbuf2);
|
||||||
PyMem_RawFree(wbuf2);
|
|
||||||
return PyErr_SetFromWindowsErr(0);
|
|
||||||
}
|
}
|
||||||
resobj = PyUnicode_FromWideChar(wbuf2, len);
|
}
|
||||||
|
Py_END_ALLOW_THREADS
|
||||||
|
|
||||||
|
if (!wbuf2) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (!len) {
|
||||||
if (wbuf2 != wbuf)
|
if (wbuf2 != wbuf)
|
||||||
PyMem_RawFree(wbuf2);
|
PyMem_RawFree(wbuf2);
|
||||||
return resobj;
|
return PyErr_SetFromWindowsErr(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (win32_warn_bytes_api())
|
PyObject *resobj = PyUnicode_FromWideChar(wbuf2, len);
|
||||||
return NULL;
|
if (wbuf2 != wbuf) {
|
||||||
#endif
|
PyMem_RawFree(wbuf2);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (use_bytes) {
|
||||||
|
if (resobj == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
Py_SETREF(resobj, PyUnicode_EncodeFSDefault(resobj));
|
||||||
|
}
|
||||||
|
|
||||||
|
return resobj;
|
||||||
|
#else
|
||||||
|
const size_t chunk = 1024;
|
||||||
|
|
||||||
|
char *buf = NULL;
|
||||||
|
char *cwd = NULL;
|
||||||
|
size_t buflen = 0;
|
||||||
|
|
||||||
buf = cwd = NULL;
|
|
||||||
Py_BEGIN_ALLOW_THREADS
|
Py_BEGIN_ALLOW_THREADS
|
||||||
do {
|
do {
|
||||||
buflen += chunk;
|
char *newbuf;
|
||||||
#ifdef MS_WINDOWS
|
if (buflen <= PY_SSIZE_T_MAX - chunk) {
|
||||||
if (buflen > INT_MAX) {
|
buflen += chunk;
|
||||||
PyErr_NoMemory();
|
newbuf = PyMem_RawRealloc(buf, buflen);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
newbuf = NULL;
|
||||||
|
}
|
||||||
|
if (newbuf == NULL) {
|
||||||
|
PyMem_RawFree(buf);
|
||||||
|
buf = NULL;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#endif
|
buf = newbuf;
|
||||||
tmpbuf = PyMem_RawRealloc(buf, buflen);
|
|
||||||
if (tmpbuf == NULL)
|
|
||||||
break;
|
|
||||||
|
|
||||||
buf = tmpbuf;
|
|
||||||
#ifdef MS_WINDOWS
|
|
||||||
cwd = getcwd(buf, (int)buflen);
|
|
||||||
#else
|
|
||||||
cwd = getcwd(buf, buflen);
|
cwd = getcwd(buf, buflen);
|
||||||
#endif
|
|
||||||
} while (cwd == NULL && errno == ERANGE);
|
} while (cwd == NULL && errno == ERANGE);
|
||||||
Py_END_ALLOW_THREADS
|
Py_END_ALLOW_THREADS
|
||||||
|
|
||||||
|
if (buf == NULL) {
|
||||||
|
return PyErr_NoMemory();
|
||||||
|
}
|
||||||
if (cwd == NULL) {
|
if (cwd == NULL) {
|
||||||
PyMem_RawFree(buf);
|
PyMem_RawFree(buf);
|
||||||
return posix_error();
|
return posix_error();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (use_bytes)
|
PyObject *obj;
|
||||||
|
if (use_bytes) {
|
||||||
obj = PyBytes_FromStringAndSize(buf, strlen(buf));
|
obj = PyBytes_FromStringAndSize(buf, strlen(buf));
|
||||||
else
|
}
|
||||||
|
else {
|
||||||
obj = PyUnicode_DecodeFSDefault(buf);
|
obj = PyUnicode_DecodeFSDefault(buf);
|
||||||
|
}
|
||||||
PyMem_RawFree(buf);
|
PyMem_RawFree(buf);
|
||||||
|
|
||||||
return obj;
|
return obj;
|
||||||
|
#endif /* !MS_WINDOWS */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue