Issue #8969: On Windows, use mbcs codec in strict mode to encode and decode

filenames and enable os.fsencode().
This commit is contained in:
Victor Stinner 2010-06-11 23:56:51 +00:00
parent 0f35e2c0f4
commit 313a120ab6
5 changed files with 33 additions and 22 deletions

View File

@ -159,10 +159,10 @@ process and user.
.. function:: fsencode(value)
Encode *value* to bytes for use in the file system, environment variables or
the command line. Uses :func:`sys.getfilesystemencoding` and
``'surrogateescape'`` error handler for strings and returns bytes unchanged.
Availability: Unix.
the command line. Use :func:`sys.getfilesystemencoding` and
``'surrogateescape'`` error handler for strings and return bytes unchanged.
On Windows, use ``'strict'`` error handler for strings if the file system
encoding is ``'mbcs'`` (which is the default encoding).
.. versionadded:: 3.2

View File

@ -533,16 +533,19 @@ if supports_bytes_environ:
return environb.get(key, default)
__all__.append("getenvb")
if name != 'nt':
def fsencode(value):
"""Encode value for use in the file system, environment variables
or the command line."""
if isinstance(value, bytes):
return value
elif isinstance(value, str):
return value.encode(sys.getfilesystemencoding(), 'surrogateescape')
def fsencode(value):
"""Encode value for use in the file system, environment variables
or the command line."""
if isinstance(value, bytes):
return value
elif isinstance(value, str):
encoding = sys.getfilesystemencoding()
if encoding == 'mbcs':
return value.encode(encoding)
else:
raise TypeError("expect bytes or str, not %s" % type(value).__name__)
return value.encode(encoding, 'surrogateescape')
else:
raise TypeError("expect bytes or str, not %s" % type(value).__name__)
def _exists(name):
return name in globals()

View File

@ -33,16 +33,15 @@ else:
HOST = support.HOST
data_file = lambda name: os.path.join(os.path.dirname(__file__), name)
fsencode = lambda name: name.encode(sys.getfilesystemencoding(), "surrogateescape")
CERTFILE = data_file("keycert.pem")
BYTES_CERTFILE = fsencode(CERTFILE)
BYTES_CERTFILE = os.fsencode(CERTFILE)
ONLYCERT = data_file("ssl_cert.pem")
ONLYKEY = data_file("ssl_key.pem")
BYTES_ONLYCERT = fsencode(ONLYCERT)
BYTES_ONLYKEY = fsencode(ONLYKEY)
BYTES_ONLYCERT = os.fsencode(ONLYCERT)
BYTES_ONLYKEY = os.fsencode(ONLYKEY)
CAPATH = data_file("capath")
BYTES_CAPATH = fsencode(CAPATH)
BYTES_CAPATH = os.fsencode(CAPATH)
SVN_PYTHON_ORG_ROOT_CERT = data_file("https_svn_python_org_root.pem")

View File

@ -12,6 +12,9 @@ What's New in Python 3.2 Alpha 1?
Core and Builtins
-----------------
- Issue #8969: On Windows, use mbcs codec in strict mode to encode and decode
filenames and enable os.fsencode().
- Issue #8941: decoding big endian UTF-32 data in UCS-2 builds could crash
the interpreter with characters outside the Basic Multilingual Plane
(higher than 0x10000).

View File

@ -1478,11 +1478,17 @@ PyObject *PyUnicode_AsEncodedObject(PyObject *unicode,
PyObject *PyUnicode_EncodeFSDefault(PyObject *unicode)
{
if (Py_FileSystemDefaultEncoding)
if (Py_FileSystemDefaultEncoding) {
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0)
return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),
NULL);
#endif
return PyUnicode_AsEncodedString(unicode,
Py_FileSystemDefaultEncoding,
"surrogateescape");
else
} else
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),
"surrogateescape");
@ -1639,7 +1645,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
if (Py_FileSystemDefaultEncoding) {
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0) {
return PyUnicode_DecodeMBCS(s, size, "surrogateescape");
return PyUnicode_DecodeMBCS(s, size, NULL);
}
#elif defined(__APPLE__)
if (strcmp(Py_FileSystemDefaultEncoding, "utf-8") == 0) {
@ -2745,7 +2751,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s,
#endif
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
q = (unsigned char *)s;
e = q + size;