From 313a120ab6515f1bcddb13a9403a857078a9e474 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 11 Jun 2010 23:56:51 +0000 Subject: [PATCH] Issue #8969: On Windows, use mbcs codec in strict mode to encode and decode filenames and enable os.fsencode(). --- Doc/library/os.rst | 8 ++++---- Lib/os.py | 21 ++++++++++++--------- Lib/test/test_ssl.py | 9 ++++----- Misc/NEWS | 3 +++ Objects/unicodeobject.c | 14 ++++++++++---- 5 files changed, 33 insertions(+), 22 deletions(-) diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 970725a1c50..75036972c52 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -159,10 +159,10 @@ process and user. .. function:: fsencode(value) Encode *value* to bytes for use in the file system, environment variables or - the command line. Uses :func:`sys.getfilesystemencoding` and - ``'surrogateescape'`` error handler for strings and returns bytes unchanged. - - Availability: Unix. + the command line. Use :func:`sys.getfilesystemencoding` and + ``'surrogateescape'`` error handler for strings and return bytes unchanged. + On Windows, use ``'strict'`` error handler for strings if the file system + encoding is ``'mbcs'`` (which is the default encoding). .. versionadded:: 3.2 diff --git a/Lib/os.py b/Lib/os.py index 8f47137f3bf..e9d44cc614b 100644 --- a/Lib/os.py +++ b/Lib/os.py @@ -533,16 +533,19 @@ if supports_bytes_environ: return environb.get(key, default) __all__.append("getenvb") -if name != 'nt': - def fsencode(value): - """Encode value for use in the file system, environment variables - or the command line.""" - if isinstance(value, bytes): - return value - elif isinstance(value, str): - return value.encode(sys.getfilesystemencoding(), 'surrogateescape') +def fsencode(value): + """Encode value for use in the file system, environment variables + or the command line.""" + if isinstance(value, bytes): + return value + elif isinstance(value, str): + encoding = sys.getfilesystemencoding() + if encoding == 'mbcs': + return value.encode(encoding) else: - raise TypeError("expect bytes or str, not %s" % type(value).__name__) + return value.encode(encoding, 'surrogateescape') + else: + raise TypeError("expect bytes or str, not %s" % type(value).__name__) def _exists(name): return name in globals() diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py index c4644401806..9c0d263e0f1 100644 --- a/Lib/test/test_ssl.py +++ b/Lib/test/test_ssl.py @@ -33,16 +33,15 @@ else: HOST = support.HOST data_file = lambda name: os.path.join(os.path.dirname(__file__), name) -fsencode = lambda name: name.encode(sys.getfilesystemencoding(), "surrogateescape") CERTFILE = data_file("keycert.pem") -BYTES_CERTFILE = fsencode(CERTFILE) +BYTES_CERTFILE = os.fsencode(CERTFILE) ONLYCERT = data_file("ssl_cert.pem") ONLYKEY = data_file("ssl_key.pem") -BYTES_ONLYCERT = fsencode(ONLYCERT) -BYTES_ONLYKEY = fsencode(ONLYKEY) +BYTES_ONLYCERT = os.fsencode(ONLYCERT) +BYTES_ONLYKEY = os.fsencode(ONLYKEY) CAPATH = data_file("capath") -BYTES_CAPATH = fsencode(CAPATH) +BYTES_CAPATH = os.fsencode(CAPATH) SVN_PYTHON_ORG_ROOT_CERT = data_file("https_svn_python_org_root.pem") diff --git a/Misc/NEWS b/Misc/NEWS index 5a59310ebf1..3fd017847b5 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,9 @@ What's New in Python 3.2 Alpha 1? Core and Builtins ----------------- +- Issue #8969: On Windows, use mbcs codec in strict mode to encode and decode + filenames and enable os.fsencode(). + - Issue #8941: decoding big endian UTF-32 data in UCS-2 builds could crash the interpreter with characters outside the Basic Multilingual Plane (higher than 0x10000). diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index de92787cc69..8d75b205de7 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1478,11 +1478,17 @@ PyObject *PyUnicode_AsEncodedObject(PyObject *unicode, PyObject *PyUnicode_EncodeFSDefault(PyObject *unicode) { - if (Py_FileSystemDefaultEncoding) + if (Py_FileSystemDefaultEncoding) { +#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) + if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0) + return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode), + PyUnicode_GET_SIZE(unicode), + NULL); +#endif return PyUnicode_AsEncodedString(unicode, Py_FileSystemDefaultEncoding, "surrogateescape"); - else + } else return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), PyUnicode_GET_SIZE(unicode), "surrogateescape"); @@ -1639,7 +1645,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) if (Py_FileSystemDefaultEncoding) { #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0) { - return PyUnicode_DecodeMBCS(s, size, "surrogateescape"); + return PyUnicode_DecodeMBCS(s, size, NULL); } #elif defined(__APPLE__) if (strcmp(Py_FileSystemDefaultEncoding, "utf-8") == 0) { @@ -2745,7 +2751,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s, #endif PyObject *errorHandler = NULL; PyObject *exc = NULL; - + q = (unsigned char *)s; e = q + size;