Issue #8622: Add PYTHONFSENCODING environment variable to override the

filesystem encoding.

initfsencoding() displays also a better error message if get_codeset() failed.
This commit is contained in:
Victor Stinner 2010-08-18 21:23:25 +00:00
parent 56ab01b66a
commit 94908bbc15
7 changed files with 93 additions and 34 deletions

View File

@ -442,11 +442,20 @@ These environment variables influence Python's behavior.
import of source modules.
.. envvar:: PYTHONFSENCODING
If this is set before running the intepreter, it overrides the encoding used
for the filesystem encoding (see :func:`sys.getfilesystemencoding`).
.. versionadded:: 3.2
.. envvar:: PYTHONIOENCODING
Overrides the encoding used for stdin/stdout/stderr, in the syntax
``encodingname:errorhandler``. The ``:errorhandler`` part is optional and
has the same meaning as in :func:`str.encode`.
If this is set before running the intepreter, it overrides the encoding used
for stdin/stdout/stderr, in the syntax ``encodingname:errorhandler``. The
``:errorhandler`` part is optional and has the same meaning as in
:func:`str.encode`.
For stderr, the ``:errorhandler`` part is ignored; the handler will always be
``'backslashreplace'``.

View File

@ -232,6 +232,15 @@ Major performance enhancements have been added:
* Stub
Unicode
=======
The filesystem encoding can be specified by setting the
:envvar:`PYTHONFSENCODING` environment variable before running the intepreter.
The value should be a string in the form ``<encoding>``, e.g. ``utf-8``.
IDLE
====

View File

@ -43,7 +43,7 @@ if sys.platform != 'darwin':
# Is it Unicode-friendly?
if not os.path.supports_unicode_filenames:
fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
fsencoding = sys.getfilesystemencoding()
try:
for name in filenames:
name.encode(fsencoding)

View File

@ -863,16 +863,24 @@ class SizeofTest(unittest.TestCase):
def test_getfilesystemencoding(self):
import codecs
def check_fsencoding(fs_encoding):
def check_fsencoding(fs_encoding, expected=None):
self.assertIsNotNone(fs_encoding)
if sys.platform == 'darwin':
self.assertEqual(fs_encoding, 'utf-8')
codecs.lookup(fs_encoding)
if expected:
self.assertEqual(fs_encoding, expected)
fs_encoding = sys.getfilesystemencoding()
check_fsencoding(fs_encoding)
# Even in C locale
def get_fsencoding(env):
output = subprocess.check_output(
[sys.executable, "-c",
"import sys; print(sys.getfilesystemencoding())"],
env=env)
return output.rstrip().decode('ascii')
try:
sys.executable.encode('ascii')
except UnicodeEncodeError:
@ -880,14 +888,22 @@ class SizeofTest(unittest.TestCase):
# see issue #8611
pass
else:
# Even in C locale
env = os.environ.copy()
env['LANG'] = 'C'
output = subprocess.check_output(
[sys.executable, "-c",
"import sys; print(sys.getfilesystemencoding())"],
env=env)
fs_encoding = output.rstrip().decode('ascii')
check_fsencoding(fs_encoding)
try:
del env['PYTHONFSENCODING']
except KeyError:
pass
check_fsencoding(get_fsencoding(env), 'ascii')
# Filesystem encoding is hardcoded on Windows and Mac OS X
if sys.platform not in ('win32', 'darwin'):
for encoding in ('ascii', 'cp850', 'iso8859-1', 'utf-8'):
env = os.environ.copy()
env['PYTHONFSENCODING'] = encoding
check_fsencoding(get_fsencoding(env), encoding)
def test_setfilesystemencoding(self):
old = sys.getfilesystemencoding()

View File

@ -12,6 +12,9 @@ What's New in Python 3.2 Alpha 2?
Core and Builtins
-----------------
- Issue #8622: Add PYTHONFSENCODING environment variable to override the
filesystem encoding.
- Issue #5127: The C functions that access the Unicode Database now accept and
return characters from the full Unicode range, even on narrow unicode builds
(Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others). A visible difference

View File

@ -99,6 +99,7 @@ PYTHONHOME : alternate <prefix> directory (or <prefix>%c<exec_prefix>).\n\
The default module search path uses %s.\n\
PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\
PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\
PYTHONFSENCODING: Encoding used for the filesystem.\n\
";
FILE *

View File

@ -134,18 +134,13 @@ add_flag(int flag, const char *envs)
return flag;
}
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
static char*
get_codeset(void)
get_codec_name(const char *encoding)
{
char* codeset, *name_str;
char *name_utf8, *name_str;
PyObject *codec, *name = NULL;
codeset = nl_langinfo(CODESET);
if (!codeset || codeset[0] == '\0')
return NULL;
codec = _PyCodec_Lookup(codeset);
codec = _PyCodec_Lookup(encoding);
if (!codec)
goto error;
@ -154,18 +149,34 @@ get_codeset(void)
if (!name)
goto error;
name_str = _PyUnicode_AsString(name);
name_utf8 = _PyUnicode_AsString(name);
if (name == NULL)
goto error;
codeset = strdup(name_str);
name_str = strdup(name_utf8);
Py_DECREF(name);
return codeset;
if (name_str == NULL) {
PyErr_NoMemory();
return NULL;
}
return name_str;
error:
Py_XDECREF(codec);
Py_XDECREF(name);
return NULL;
}
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
static char*
get_codeset(void)
{
char* codeset = nl_langinfo(CODESET);
if (!codeset || codeset[0] == '\0') {
PyErr_SetString(PyExc_ValueError, "CODESET is not set or empty");
return NULL;
}
return get_codec_name(codeset);
}
#endif
void
@ -706,25 +717,35 @@ initfsencoding(void)
{
PyObject *codec;
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
char *codeset;
char *codeset = NULL;
if (Py_FileSystemDefaultEncoding == NULL) {
/* On Unix, set the file system encoding according to the
user's preference, if the CODESET names a well-known
Python codec, and Py_FileSystemDefaultEncoding isn't
initialized by other means. Also set the encoding of
stdin and stdout if these are terminals. */
codeset = get_codeset();
const char *env_encoding = Py_GETENV("PYTHONFSENCODING");
if (env_encoding != NULL) {
codeset = get_codec_name(env_encoding);
if (!codeset) {
fprintf(stderr, "PYTHONFSENCODING is not a valid encoding:\n");
PyErr_Print();
}
}
if (!codeset) {
/* On Unix, set the file system encoding according to the
user's preference, if the CODESET names a well-known
Python codec, and Py_FileSystemDefaultEncoding isn't
initialized by other means. Also set the encoding of
stdin and stdout if these are terminals. */
codeset = get_codeset();
}
if (codeset != NULL) {
Py_FileSystemDefaultEncoding = codeset;
Py_HasFileSystemDefaultEncoding = 0;
return;
} else {
fprintf(stderr, "Unable to get the locale encoding:\n");
PyErr_Print();
}
PyErr_Clear();
fprintf(stderr,
"Unable to get the locale encoding: "
"fallback to utf-8\n");
fprintf(stderr, "Unable to get the filesystem encoding: fallback to utf-8\n");
Py_FileSystemDefaultEncoding = "utf-8";
Py_HasFileSystemDefaultEncoding = 1;
}