Issue #8610: Load file system codec at startup, and display a fatal error on
failure. Set the file system encoding to utf-8 (instead of None) if getting the locale encoding failed, or if nl_langinfo(CODESET) function is missing.
This commit is contained in:
parent
06ba9ade85
commit
b744ba1d14
|
@ -298,15 +298,13 @@ always available.
|
||||||
|
|
||||||
.. function:: getfilesystemencoding()
|
.. function:: getfilesystemencoding()
|
||||||
|
|
||||||
Return the name of the encoding used to convert Unicode filenames into system
|
Return the name of the encoding used to convert Unicode filenames into
|
||||||
file names, or ``None`` if the system default encoding is used. The result value
|
system file names. The result value depends on the operating system:
|
||||||
depends on the operating system:
|
|
||||||
|
|
||||||
* On Mac OS X, the encoding is ``'utf-8'``.
|
* On Mac OS X, the encoding is ``'utf-8'``.
|
||||||
|
|
||||||
* On Unix, the encoding is the user's preference according to the result of
|
* On Unix, the encoding is the user's preference according to the result of
|
||||||
nl_langinfo(CODESET), or ``None`` if the ``nl_langinfo(CODESET)``
|
nl_langinfo(CODESET), or ``'utf-8'`` if ``nl_langinfo(CODESET)`` failed.
|
||||||
failed.
|
|
||||||
|
|
||||||
* On Windows NT+, file names are Unicode natively, so no conversion is
|
* On Windows NT+, file names are Unicode natively, so no conversion is
|
||||||
performed. :func:`getfilesystemencoding` still returns ``'mbcs'``, as
|
performed. :func:`getfilesystemencoding` still returns ``'mbcs'``, as
|
||||||
|
@ -316,6 +314,10 @@ always available.
|
||||||
|
|
||||||
* On Windows 9x, the encoding is ``'mbcs'``.
|
* On Windows 9x, the encoding is ``'mbcs'``.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.2
|
||||||
|
On Unix, use ``'utf-8'`` instead of ``None`` if ``nl_langinfo(CODESET)``
|
||||||
|
failed. :func:`getfilesystemencoding` result cannot be ``None``.
|
||||||
|
|
||||||
|
|
||||||
.. function:: getrefcount(object)
|
.. function:: getrefcount(object)
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,10 @@ What's New in Python 3.2 Alpha 1?
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #8610: Load file system codec at startup, and display a fatal error on
|
||||||
|
failure. Set the file system encoding to utf-8 (instead of None) if getting
|
||||||
|
the locale encoding failed, or if nl_langinfo(CODESET) function is missing.
|
||||||
|
|
||||||
- PyFile_FromFd() uses PyUnicode_DecodeFSDefault() instead of
|
- PyFile_FromFd() uses PyUnicode_DecodeFSDefault() instead of
|
||||||
PyUnicode_FromString() to support surrogates in the filename and use the
|
PyUnicode_FromString() to support surrogates in the filename and use the
|
||||||
right encoding
|
right encoding
|
||||||
|
|
|
@ -9,6 +9,10 @@
|
||||||
|
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
|
||||||
|
#ifdef HAVE_LANGINFO_H
|
||||||
|
#include <langinfo.h> /* CODESET */
|
||||||
|
#endif
|
||||||
|
|
||||||
/* The default encoding used by the platform file system APIs
|
/* The default encoding used by the platform file system APIs
|
||||||
Can remain NULL for all platforms that don't have such a concept
|
Can remain NULL for all platforms that don't have such a concept
|
||||||
|
|
||||||
|
@ -21,9 +25,12 @@ int Py_HasFileSystemDefaultEncoding = 1;
|
||||||
#elif defined(__APPLE__)
|
#elif defined(__APPLE__)
|
||||||
const char *Py_FileSystemDefaultEncoding = "utf-8";
|
const char *Py_FileSystemDefaultEncoding = "utf-8";
|
||||||
int Py_HasFileSystemDefaultEncoding = 1;
|
int Py_HasFileSystemDefaultEncoding = 1;
|
||||||
#else
|
#elif defined(HAVE_LANGINFO_H) && defined(CODESET)
|
||||||
const char *Py_FileSystemDefaultEncoding = NULL; /* use default */
|
const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */
|
||||||
int Py_HasFileSystemDefaultEncoding = 0;
|
int Py_HasFileSystemDefaultEncoding = 0;
|
||||||
|
#else
|
||||||
|
const char *Py_FileSystemDefaultEncoding = "utf-8";
|
||||||
|
int Py_HasFileSystemDefaultEncoding = 1;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int
|
int
|
||||||
|
|
|
@ -57,6 +57,7 @@ extern grammar _PyParser_Grammar; /* From graminit.c */
|
||||||
|
|
||||||
/* Forward */
|
/* Forward */
|
||||||
static void initmain(void);
|
static void initmain(void);
|
||||||
|
static void initfsencoding(void);
|
||||||
static void initsite(void);
|
static void initsite(void);
|
||||||
static int initstdio(void);
|
static int initstdio(void);
|
||||||
static void flush_io(void);
|
static void flush_io(void);
|
||||||
|
@ -159,7 +160,6 @@ get_codeset(void)
|
||||||
|
|
||||||
error:
|
error:
|
||||||
Py_XDECREF(codec);
|
Py_XDECREF(codec);
|
||||||
PyErr_Clear();
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -171,9 +171,6 @@ Py_InitializeEx(int install_sigs)
|
||||||
PyThreadState *tstate;
|
PyThreadState *tstate;
|
||||||
PyObject *bimod, *sysmod, *pstderr;
|
PyObject *bimod, *sysmod, *pstderr;
|
||||||
char *p;
|
char *p;
|
||||||
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
|
|
||||||
char *codeset;
|
|
||||||
#endif
|
|
||||||
extern void _Py_ReadyTypes(void);
|
extern void _Py_ReadyTypes(void);
|
||||||
|
|
||||||
if (initialized)
|
if (initialized)
|
||||||
|
@ -264,21 +261,7 @@ Py_InitializeEx(int install_sigs)
|
||||||
|
|
||||||
_PyImportHooks_Init();
|
_PyImportHooks_Init();
|
||||||
|
|
||||||
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
|
initfsencoding();
|
||||||
/* On Unix, set the file system encoding according to the
|
|
||||||
user's preference, if the CODESET names a well-known
|
|
||||||
Python codec, and Py_FileSystemDefaultEncoding isn't
|
|
||||||
initialized by other means. Also set the encoding of
|
|
||||||
stdin and stdout if these are terminals. */
|
|
||||||
|
|
||||||
codeset = get_codeset();
|
|
||||||
if (codeset) {
|
|
||||||
if (!Py_FileSystemDefaultEncoding)
|
|
||||||
Py_FileSystemDefaultEncoding = codeset;
|
|
||||||
else
|
|
||||||
free(codeset);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (install_sigs)
|
if (install_sigs)
|
||||||
initsigs(); /* Signal handling stuff, including initintr() */
|
initsigs(); /* Signal handling stuff, including initintr() */
|
||||||
|
@ -496,7 +479,7 @@ Py_Finalize(void)
|
||||||
_PyUnicode_Fini();
|
_PyUnicode_Fini();
|
||||||
|
|
||||||
/* reset file system default encoding */
|
/* reset file system default encoding */
|
||||||
if (!Py_HasFileSystemDefaultEncoding) {
|
if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) {
|
||||||
free((char*)Py_FileSystemDefaultEncoding);
|
free((char*)Py_FileSystemDefaultEncoding);
|
||||||
Py_FileSystemDefaultEncoding = NULL;
|
Py_FileSystemDefaultEncoding = NULL;
|
||||||
}
|
}
|
||||||
|
@ -707,6 +690,45 @@ initmain(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
initfsencoding(void)
|
||||||
|
{
|
||||||
|
PyObject *codec;
|
||||||
|
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
|
||||||
|
char *codeset;
|
||||||
|
|
||||||
|
/* On Unix, set the file system encoding according to the
|
||||||
|
user's preference, if the CODESET names a well-known
|
||||||
|
Python codec, and Py_FileSystemDefaultEncoding isn't
|
||||||
|
initialized by other means. Also set the encoding of
|
||||||
|
stdin and stdout if these are terminals. */
|
||||||
|
codeset = get_codeset();
|
||||||
|
if (codeset != NULL) {
|
||||||
|
Py_FileSystemDefaultEncoding = codeset;
|
||||||
|
Py_HasFileSystemDefaultEncoding = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyErr_Clear();
|
||||||
|
fprintf(stderr,
|
||||||
|
"Unable to get the locale encoding: "
|
||||||
|
"fallback to utf-8\n");
|
||||||
|
Py_FileSystemDefaultEncoding = "utf-8";
|
||||||
|
Py_HasFileSystemDefaultEncoding = 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* the encoding is mbcs, utf-8 or ascii */
|
||||||
|
codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding);
|
||||||
|
if (!codec) {
|
||||||
|
/* Such error can only occurs in critical situations: no more
|
||||||
|
* memory, import a module of the standard library failed,
|
||||||
|
* etc. */
|
||||||
|
Py_FatalError("Py_Initialize: unable to load the file system codec");
|
||||||
|
} else {
|
||||||
|
Py_DECREF(codec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Import the site module (not into __main__ though) */
|
/* Import the site module (not into __main__ though) */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
Loading…
Reference in New Issue