Issue #8622: Add PYTHONFSENCODING environment variable to override the
filesystem encoding. initfsencoding() displays also a better error message if get_codeset() failed.
This commit is contained in:
parent
56ab01b66a
commit
94908bbc15
|
@ -442,11 +442,20 @@ These environment variables influence Python's behavior.
|
|||
import of source modules.
|
||||
|
||||
|
||||
.. envvar:: PYTHONFSENCODING
|
||||
|
||||
If this is set before running the intepreter, it overrides the encoding used
|
||||
for the filesystem encoding (see :func:`sys.getfilesystemencoding`).
|
||||
|
||||
.. versionadded:: 3.2
|
||||
|
||||
|
||||
.. envvar:: PYTHONIOENCODING
|
||||
|
||||
Overrides the encoding used for stdin/stdout/stderr, in the syntax
|
||||
``encodingname:errorhandler``. The ``:errorhandler`` part is optional and
|
||||
has the same meaning as in :func:`str.encode`.
|
||||
If this is set before running the intepreter, it overrides the encoding used
|
||||
for stdin/stdout/stderr, in the syntax ``encodingname:errorhandler``. The
|
||||
``:errorhandler`` part is optional and has the same meaning as in
|
||||
:func:`str.encode`.
|
||||
|
||||
For stderr, the ``:errorhandler`` part is ignored; the handler will always be
|
||||
``'backslashreplace'``.
|
||||
|
|
|
@ -232,6 +232,15 @@ Major performance enhancements have been added:
|
|||
|
||||
* Stub
|
||||
|
||||
|
||||
Unicode
|
||||
=======
|
||||
|
||||
The filesystem encoding can be specified by setting the
|
||||
:envvar:`PYTHONFSENCODING` environment variable before running the intepreter.
|
||||
The value should be a string in the form ``<encoding>``, e.g. ``utf-8``.
|
||||
|
||||
|
||||
IDLE
|
||||
====
|
||||
|
||||
|
|
|
@ -43,7 +43,7 @@ if sys.platform != 'darwin':
|
|||
|
||||
# Is it Unicode-friendly?
|
||||
if not os.path.supports_unicode_filenames:
|
||||
fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
|
||||
fsencoding = sys.getfilesystemencoding()
|
||||
try:
|
||||
for name in filenames:
|
||||
name.encode(fsencoding)
|
||||
|
|
|
@ -863,16 +863,24 @@ class SizeofTest(unittest.TestCase):
|
|||
def test_getfilesystemencoding(self):
|
||||
import codecs
|
||||
|
||||
def check_fsencoding(fs_encoding):
|
||||
def check_fsencoding(fs_encoding, expected=None):
|
||||
self.assertIsNotNone(fs_encoding)
|
||||
if sys.platform == 'darwin':
|
||||
self.assertEqual(fs_encoding, 'utf-8')
|
||||
codecs.lookup(fs_encoding)
|
||||
if expected:
|
||||
self.assertEqual(fs_encoding, expected)
|
||||
|
||||
fs_encoding = sys.getfilesystemencoding()
|
||||
check_fsencoding(fs_encoding)
|
||||
|
||||
# Even in C locale
|
||||
def get_fsencoding(env):
|
||||
output = subprocess.check_output(
|
||||
[sys.executable, "-c",
|
||||
"import sys; print(sys.getfilesystemencoding())"],
|
||||
env=env)
|
||||
return output.rstrip().decode('ascii')
|
||||
|
||||
try:
|
||||
sys.executable.encode('ascii')
|
||||
except UnicodeEncodeError:
|
||||
|
@ -880,14 +888,22 @@ class SizeofTest(unittest.TestCase):
|
|||
# see issue #8611
|
||||
pass
|
||||
else:
|
||||
# Even in C locale
|
||||
env = os.environ.copy()
|
||||
env['LANG'] = 'C'
|
||||
output = subprocess.check_output(
|
||||
[sys.executable, "-c",
|
||||
"import sys; print(sys.getfilesystemencoding())"],
|
||||
env=env)
|
||||
fs_encoding = output.rstrip().decode('ascii')
|
||||
check_fsencoding(fs_encoding)
|
||||
try:
|
||||
del env['PYTHONFSENCODING']
|
||||
except KeyError:
|
||||
pass
|
||||
check_fsencoding(get_fsencoding(env), 'ascii')
|
||||
|
||||
# Filesystem encoding is hardcoded on Windows and Mac OS X
|
||||
if sys.platform not in ('win32', 'darwin'):
|
||||
for encoding in ('ascii', 'cp850', 'iso8859-1', 'utf-8'):
|
||||
env = os.environ.copy()
|
||||
env['PYTHONFSENCODING'] = encoding
|
||||
check_fsencoding(get_fsencoding(env), encoding)
|
||||
|
||||
|
||||
def test_setfilesystemencoding(self):
|
||||
old = sys.getfilesystemencoding()
|
||||
|
|
|
@ -12,6 +12,9 @@ What's New in Python 3.2 Alpha 2?
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #8622: Add PYTHONFSENCODING environment variable to override the
|
||||
filesystem encoding.
|
||||
|
||||
- Issue #5127: The C functions that access the Unicode Database now accept and
|
||||
return characters from the full Unicode range, even on narrow unicode builds
|
||||
(Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others). A visible difference
|
||||
|
|
|
@ -99,6 +99,7 @@ PYTHONHOME : alternate <prefix> directory (or <prefix>%c<exec_prefix>).\n\
|
|||
The default module search path uses %s.\n\
|
||||
PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\
|
||||
PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\
|
||||
PYTHONFSENCODING: Encoding used for the filesystem.\n\
|
||||
";
|
||||
|
||||
FILE *
|
||||
|
|
|
@ -134,18 +134,13 @@ add_flag(int flag, const char *envs)
|
|||
return flag;
|
||||
}
|
||||
|
||||
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
|
||||
static char*
|
||||
get_codeset(void)
|
||||
get_codec_name(const char *encoding)
|
||||
{
|
||||
char* codeset, *name_str;
|
||||
char *name_utf8, *name_str;
|
||||
PyObject *codec, *name = NULL;
|
||||
|
||||
codeset = nl_langinfo(CODESET);
|
||||
if (!codeset || codeset[0] == '\0')
|
||||
return NULL;
|
||||
|
||||
codec = _PyCodec_Lookup(codeset);
|
||||
codec = _PyCodec_Lookup(encoding);
|
||||
if (!codec)
|
||||
goto error;
|
||||
|
||||
|
@ -154,18 +149,34 @@ get_codeset(void)
|
|||
if (!name)
|
||||
goto error;
|
||||
|
||||
name_str = _PyUnicode_AsString(name);
|
||||
name_utf8 = _PyUnicode_AsString(name);
|
||||
if (name == NULL)
|
||||
goto error;
|
||||
codeset = strdup(name_str);
|
||||
name_str = strdup(name_utf8);
|
||||
Py_DECREF(name);
|
||||
return codeset;
|
||||
if (name_str == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
return name_str;
|
||||
|
||||
error:
|
||||
Py_XDECREF(codec);
|
||||
Py_XDECREF(name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
|
||||
static char*
|
||||
get_codeset(void)
|
||||
{
|
||||
char* codeset = nl_langinfo(CODESET);
|
||||
if (!codeset || codeset[0] == '\0') {
|
||||
PyErr_SetString(PyExc_ValueError, "CODESET is not set or empty");
|
||||
return NULL;
|
||||
}
|
||||
return get_codec_name(codeset);
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
|
@ -706,25 +717,35 @@ initfsencoding(void)
|
|||
{
|
||||
PyObject *codec;
|
||||
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
|
||||
char *codeset;
|
||||
char *codeset = NULL;
|
||||
|
||||
if (Py_FileSystemDefaultEncoding == NULL) {
|
||||
const char *env_encoding = Py_GETENV("PYTHONFSENCODING");
|
||||
if (env_encoding != NULL) {
|
||||
codeset = get_codec_name(env_encoding);
|
||||
if (!codeset) {
|
||||
fprintf(stderr, "PYTHONFSENCODING is not a valid encoding:\n");
|
||||
PyErr_Print();
|
||||
}
|
||||
}
|
||||
if (!codeset) {
|
||||
/* On Unix, set the file system encoding according to the
|
||||
user's preference, if the CODESET names a well-known
|
||||
Python codec, and Py_FileSystemDefaultEncoding isn't
|
||||
initialized by other means. Also set the encoding of
|
||||
stdin and stdout if these are terminals. */
|
||||
codeset = get_codeset();
|
||||
}
|
||||
if (codeset != NULL) {
|
||||
Py_FileSystemDefaultEncoding = codeset;
|
||||
Py_HasFileSystemDefaultEncoding = 0;
|
||||
return;
|
||||
} else {
|
||||
fprintf(stderr, "Unable to get the locale encoding:\n");
|
||||
PyErr_Print();
|
||||
}
|
||||
|
||||
PyErr_Clear();
|
||||
fprintf(stderr,
|
||||
"Unable to get the locale encoding: "
|
||||
"fallback to utf-8\n");
|
||||
fprintf(stderr, "Unable to get the filesystem encoding: fallback to utf-8\n");
|
||||
Py_FileSystemDefaultEncoding = "utf-8";
|
||||
Py_HasFileSystemDefaultEncoding = 1;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue