bpo-37412: os.getcwdb() now uses UTF-8 on Windows (GH-14396)

The os.getcwdb() function now uses the UTF-8 encoding on Windows,
rather than the ANSI code page: see PEP 529 for the rationale. The
function is no longer deprecated on Windows.

os.getcwd() and os.getcwdb() now detect integer overflow on memory
allocations. On Unix, these functions properly report MemoryError on
memory allocation failure.
This commit is contained in:
Victor Stinner 2019-06-26 17:31:12 +02:00 committed by GitHub
parent c6a2320e87
commit 689830ee62
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 88 additions and 59 deletions

View File

@ -1730,6 +1730,11 @@ features:
Return a bytestring representing the current working directory.
.. versionchanged:: 3.8
The function now uses the UTF-8 encoding on Windows, rather than the ANSI
code page: see :pep:`529` for the rationale. The function is no longer
deprecated on Windows.
.. function:: lchflags(path, flags)

View File

@ -1231,6 +1231,11 @@ Changes in Python behavior
Changes in the Python API
-------------------------
* The :func:`os.getcwdb` function now uses the UTF-8 encoding on Windows,
rather than the ANSI code page: see :pep:`529` for the rationale. The
function is no longer deprecated on Windows.
(Contributed by Victor Stinner in :issue:`37412`.)
* :class:`subprocess.Popen` can now use :func:`os.posix_spawn` in some cases
for better performance. On Windows Subsystem for Linux and QEMU User
Emulation, Popen constructor using :func:`os.posix_spawn` no longer raise an

View File

@ -82,6 +82,17 @@ def create_file(filename, content=b'content'):
fp.write(content)
class MiscTests(unittest.TestCase):
def test_getcwd(self):
cwd = os.getcwd()
self.assertIsInstance(cwd, str)
def test_getcwdb(self):
cwd = os.getcwdb()
self.assertIsInstance(cwd, bytes)
self.assertEqual(os.fsdecode(cwd), os.getcwd())
# Tests creating TESTFN
class FileTests(unittest.TestCase):
def setUp(self):

View File

@ -0,0 +1,3 @@
The :func:`os.getcwdb` function now uses the UTF-8 encoding on Windows,
rather than the ANSI code page: see :pep:`529` for the rationale. The function
is no longer deprecated on Windows.

View File

@ -506,17 +506,6 @@ void _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *,
ULONG, struct _Py_stat_struct *);
#endif
#ifdef MS_WINDOWS
static int
win32_warn_bytes_api()
{
return PyErr_WarnEx(PyExc_DeprecationWarning,
"The Windows bytes API has been deprecated, "
"use Unicode filenames instead",
1);
}
#endif
#ifndef MS_WINDOWS
PyObject *
@ -3334,83 +3323,99 @@ os_lchown_impl(PyObject *module, path_t *path, uid_t uid, gid_t gid)
static PyObject *
posix_getcwd(int use_bytes)
{
char *buf, *tmpbuf;
char *cwd;
const size_t chunk = 1024;
size_t buflen = 0;
PyObject *obj;
#ifdef MS_WINDOWS
if (!use_bytes) {
wchar_t wbuf[MAXPATHLEN];
wchar_t *wbuf2 = wbuf;
PyObject *resobj;
DWORD len;
Py_BEGIN_ALLOW_THREADS
len = GetCurrentDirectoryW(Py_ARRAY_LENGTH(wbuf), wbuf);
/* If the buffer is large enough, len does not include the
terminating \0. If the buffer is too small, len includes
the space needed for the terminator. */
if (len >= Py_ARRAY_LENGTH(wbuf)) {
wchar_t wbuf[MAXPATHLEN];
wchar_t *wbuf2 = wbuf;
DWORD len;
Py_BEGIN_ALLOW_THREADS
len = GetCurrentDirectoryW(Py_ARRAY_LENGTH(wbuf), wbuf);
/* If the buffer is large enough, len does not include the
terminating \0. If the buffer is too small, len includes
the space needed for the terminator. */
if (len >= Py_ARRAY_LENGTH(wbuf)) {
if (len >= PY_SSIZE_T_MAX / sizeof(wchar_t)) {
wbuf2 = PyMem_RawMalloc(len * sizeof(wchar_t));
if (wbuf2)
len = GetCurrentDirectoryW(len, wbuf2);
}
Py_END_ALLOW_THREADS
if (!wbuf2) {
PyErr_NoMemory();
return NULL;
else {
wbuf2 = NULL;
}
if (!len) {
if (wbuf2 != wbuf)
PyMem_RawFree(wbuf2);
return PyErr_SetFromWindowsErr(0);
if (wbuf2) {
len = GetCurrentDirectoryW(len, wbuf2);
}
resobj = PyUnicode_FromWideChar(wbuf2, len);
}
Py_END_ALLOW_THREADS
if (!wbuf2) {
PyErr_NoMemory();
return NULL;
}
if (!len) {
if (wbuf2 != wbuf)
PyMem_RawFree(wbuf2);
return resobj;
return PyErr_SetFromWindowsErr(0);
}
if (win32_warn_bytes_api())
return NULL;
#endif
PyObject *resobj = PyUnicode_FromWideChar(wbuf2, len);
if (wbuf2 != wbuf) {
PyMem_RawFree(wbuf2);
}
if (use_bytes) {
if (resobj == NULL) {
return NULL;
}
Py_SETREF(resobj, PyUnicode_EncodeFSDefault(resobj));
}
return resobj;
#else
const size_t chunk = 1024;
char *buf = NULL;
char *cwd = NULL;
size_t buflen = 0;
buf = cwd = NULL;
Py_BEGIN_ALLOW_THREADS
do {
buflen += chunk;
#ifdef MS_WINDOWS
if (buflen > INT_MAX) {
PyErr_NoMemory();
char *newbuf;
if (buflen <= PY_SSIZE_T_MAX - chunk) {
buflen += chunk;
newbuf = PyMem_RawRealloc(buf, buflen);
}
else {
newbuf = NULL;
}
if (newbuf == NULL) {
PyMem_RawFree(buf);
buf = NULL;
break;
}
#endif
tmpbuf = PyMem_RawRealloc(buf, buflen);
if (tmpbuf == NULL)
break;
buf = newbuf;
buf = tmpbuf;
#ifdef MS_WINDOWS
cwd = getcwd(buf, (int)buflen);
#else
cwd = getcwd(buf, buflen);
#endif
} while (cwd == NULL && errno == ERANGE);
Py_END_ALLOW_THREADS
if (buf == NULL) {
return PyErr_NoMemory();
}
if (cwd == NULL) {
PyMem_RawFree(buf);
return posix_error();
}
if (use_bytes)
PyObject *obj;
if (use_bytes) {
obj = PyBytes_FromStringAndSize(buf, strlen(buf));
else
}
else {
obj = PyUnicode_DecodeFSDefault(buf);
}
PyMem_RawFree(buf);
return obj;
#endif /* !MS_WINDOWS */
}