Create os.fsdecode(): decode from the filesystem encoding with surrogateescape

error handler, or strict error handler on Windows.

 * Rewrite os.fsencode() documentation
 * Improve os.fsencode and os.fsdecode() tests using the new PYTHONFSENCODING
   environment variable
This commit is contained in:
Victor Stinner 2010-08-19 01:05:19 +00:00
parent dbe6042f0a
commit e8d5145e18
5 changed files with 95 additions and 34 deletions

View File

@ -155,13 +155,26 @@ process and user.
These functions are described in :ref:`os-file-dir`.
.. function:: fsencode(value)
.. function:: fsencode(filename)
Encode *value* to bytes for use in the file system, environment variables or
the command line. Use :func:`sys.getfilesystemencoding` and
``'surrogateescape'`` error handler for strings and return bytes unchanged.
On Windows, use ``'strict'`` error handler for strings if the file system
encoding is ``'mbcs'`` (which is the default encoding).
Encode *filename* to the filesystem encoding with ``'surrogateescape'``
error handler, return :class:`bytes` unchanged. On Windows, use ``'strict'``
error handler if the filesystem encoding is ``'mbcs'`` (which is the default
encoding).
:func:`fsdencode` is the reverse function.
.. versionadded:: 3.2
.. function:: fsdecode(filename)
Decode *filename* from the filesystem encoding with ``'surrogateescape'``
error handler, return :class:`str` unchanged. On Windows, use ``'strict'``
error handler if the filesystem encoding is ``'mbcs'`` (which is the default
encoding).
:func:`fsencode` is the reverse function.
.. versionadded:: 3.2

View File

@ -237,13 +237,16 @@ Major performance enhancements have been added:
* Stub
Unicode
=======
Filenames and unicode
=====================
The filesystem encoding can be specified by setting the
:envvar:`PYTHONFSENCODING` environment variable before running the interpreter.
The value should be a string in the form ``<encoding>``, e.g. ``utf-8``.
The :mod:`os` module has two new functions: :func:`os.fsencode` and
:func:`os.fsdecode`.
IDLE
====

View File

@ -402,8 +402,7 @@ def get_exec_path(env=None):
path_list = path_listb
if path_list is not None and isinstance(path_list, bytes):
path_list = path_list.decode(sys.getfilesystemencoding(),
'surrogateescape')
path_list = fsdecode(path_list)
if path_list is None:
path_list = defpath
@ -536,19 +535,39 @@ if supports_bytes_environ:
__all__.extend(("environb", "getenvb"))
def fsencode(value):
"""Encode value for use in the file system, environment variables
or the command line."""
if isinstance(value, bytes):
return value
elif isinstance(value, str):
def fsencode(filename):
"""
Encode filename to the filesystem encoding with 'surrogateescape' error
handler, return bytes unchanged. On Windows, use 'strict' error handler if
the file system encoding is 'mbcs' (which is the default encoding).
"""
if isinstance(filename, bytes):
return filename
elif isinstance(filename, str):
encoding = sys.getfilesystemencoding()
if encoding == 'mbcs':
return value.encode(encoding)
return filename.encode(encoding)
else:
return value.encode(encoding, 'surrogateescape')
return filename.encode(encoding, 'surrogateescape')
else:
raise TypeError("expect bytes or str, not %s" % type(value).__name__)
raise TypeError("expect bytes or str, not %s" % type(filename).__name__)
def fsdecode(filename):
"""
Decode filename from the filesystem encoding with 'surrogateescape' error
handler, return str unchanged. On Windows, use 'strict' error handler if
the file system encoding is 'mbcs' (which is the default encoding).
"""
if isinstance(filename, str):
return filename
elif isinstance(filename, bytes):
encoding = sys.getfilesystemencoding()
if encoding == 'mbcs':
return filename.decode(encoding)
else:
return filename.decode(encoding, 'surrogateescape')
else:
raise TypeError("expect bytes or str, not %s" % type(filename).__name__)
def _exists(name):
return name in globals()

View File

@ -897,14 +897,6 @@ if sys.platform != 'win32':
class Pep383Tests(unittest.TestCase):
def setUp(self):
def fsdecode(filename):
encoding = sys.getfilesystemencoding()
if encoding == 'mbcs':
errors = 'strict'
else:
errors = 'surrogateescape'
return filename.decode(encoding, errors)
if support.TESTFN_UNENCODABLE:
self.dir = support.TESTFN_UNENCODABLE
else:
@ -930,7 +922,7 @@ if sys.platform != 'win32':
for fn in bytesfn:
f = open(os.path.join(self.bdir, fn), "w")
f.close()
fn = fsdecode(fn)
fn = os.fsdecode(fn)
if fn in self.unicodefn:
raise ValueError("duplicate filename")
self.unicodefn.add(fn)
@ -1139,12 +1131,43 @@ class Win32SymlinkTests(unittest.TestCase):
self.assertNotEqual(os.lstat(link), os.stat(link))
class MiscTests(unittest.TestCase):
class FSEncodingTests(unittest.TestCase):
def test_nop(self):
self.assertEquals(os.fsencode(b'abc\xff'), b'abc\xff')
self.assertEquals(os.fsdecode('abc\u0141'), 'abc\u0141')
@unittest.skipIf(os.name == "nt", "POSIX specific test")
def test_fsencode(self):
self.assertEquals(os.fsencode(b'ab\xff'), b'ab\xff')
self.assertEquals(os.fsencode('ab\uDCFF'), b'ab\xff')
def test_identity(self):
# assert fsdecode(fsencode(x)) == x
for fn in ('unicode\u0141', 'latin\xe9', 'ascii'):
try:
bytesfn = os.fsencode(fn)
except UnicodeEncodeError:
continue
self.assertEquals(os.fsdecode(bytesfn), fn)
def get_output(self, fs_encoding, func):
env = os.environ.copy()
env['PYTHONIOENCODING'] = 'utf-8'
env['PYTHONFSENCODING'] = fs_encoding
code = 'import os; print(%s, end="")' % func
process = subprocess.Popen(
[sys.executable, "-c", code],
stdout=subprocess.PIPE, env=env)
stdout, stderr = process.communicate()
self.assertEqual(process.returncode, 0)
return stdout.decode('utf-8')
def test_encodings(self):
def check(encoding, bytesfn, unicodefn):
encoded = self.get_output(encoding, 'repr(os.fsencode(%a))' % unicodefn)
self.assertEqual(encoded, repr(bytesfn))
decoded = self.get_output(encoding, 'repr(os.fsdecode(%a))' % bytesfn)
self.assertEqual(decoded, repr(unicodefn))
check('ascii', b'abc\xff', 'abc\udcff')
check('utf-8', b'\xc3\xa9\x80', '\xe9\udc80')
check('iso-8859-15', b'\xef\xa4', '\xef\u20ac')
def test_main():
@ -1163,7 +1186,7 @@ def test_main():
Pep383Tests,
Win32KillTests,
Win32SymlinkTests,
MiscTests,
FSEncodingTests,
)
if __name__ == "__main__":

View File

@ -116,6 +116,9 @@ Extensions
Library
-------
- Create os.fsdecode(): decode from the filesystem encoding with
surrogateescape error handler, or strict error handler on Windows.
- Issue #3488: Provide convenient shorthand functions ``gzip.compress``
and ``gzip.decompress``. Original patch by Anand B. Pillai.