From de427556746aa41a8b5198924ce423021bc0c718 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 29 Aug 2018 23:26:55 +0200 Subject: [PATCH] bpo-34523: Py_FileSystemDefaultEncoding NULL by default (GH-9003) * Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors default value is now NULL: initfsencoding() set them during Python initialization. * Document how Python chooses the filesystem encoding and error handler. * Add an assertion to _PyCoreConfig_Read(). --- Include/coreconfig.h | 29 ++++++++++++++++++++++++++--- Python/coreconfig.c | 24 ++++++------------------ 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/Include/coreconfig.h b/Include/coreconfig.h index f46bc9d7e9f..431c292b816 100644 --- a/Include/coreconfig.h +++ b/Include/coreconfig.h @@ -66,11 +66,34 @@ typedef struct { int coerce_c_locale; /* PYTHONCOERCECLOCALE, -1 means unknown */ int coerce_c_locale_warn; /* PYTHONCOERCECLOCALE=warn */ - /* Python filesystem encoding and error handler: see + /* Python filesystem encoding and error handler: sys.getfilesystemencoding() and sys.getfilesystemencodeerrors(). - Updated later by initfsencoding(). On Windows, can be updated by - sys._enablelegacywindowsfsencoding() at runtime. + Default encoding and error handler: + + * if Py_SetStandardStreamEncoding() has been called: they have the + highest priority; + * PYTHONIOENCODING environment variable; + * The UTF-8 Mode uses UTF-8/surrogateescape; + * locale encoding: ANSI code page on Windows, UTF-8 on Android, + LC_CTYPE locale encoding on other platforms; + * On Windows, "surrogateescape" error handler; + * "surrogateescape" error handler if the LC_CTYPE locale is "C" or "POSIX"; + * "surrogateescape" error handler if the LC_CTYPE locale has been coerced + (PEP 538); + * "strict" error handler. + + Supported error handlers: "strict", "surrogateescape" and + "surrogatepass". The surrogatepass error handler is only supported + if Py_DecodeLocale() and Py_EncodeLocale() use directly the UTF-8 codec; + it's only used on Windows. + + initfsencoding() updates the encoding to the Python codec name. + For example, "ANSI_X3.4-1968" is replaced with "ascii". + + On Windows, sys._enablelegacywindowsfsencoding() sets the + encoding/errors to mbcs/replace at runtime. + See Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors. */ diff --git a/Python/coreconfig.c b/Python/coreconfig.c index 625c743a419..fae32e533aa 100644 --- a/Python/coreconfig.c +++ b/Python/coreconfig.c @@ -19,25 +19,12 @@ /* Global configuration variables */ -/* The default encoding used by the platform file system APIs - Can remain NULL for all platforms that don't have such a concept - - Don't forget to modify PyUnicode_DecodeFSDefault() if you touch any of the - values for Py_FileSystemDefaultEncoding! -*/ -#if defined(__APPLE__) -const char *Py_FileSystemDefaultEncoding = "utf-8"; -int Py_HasFileSystemDefaultEncoding = 1; -#elif defined(MS_WINDOWS) -/* may be changed by initfsencoding(), but should never be free()d */ -const char *Py_FileSystemDefaultEncoding = "utf-8"; -int Py_HasFileSystemDefaultEncoding = 1; -#else -const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */ +/* The filesystem encoding is chosen by config_init_fs_encoding(), + see also initfsencoding(). */ +const char *Py_FileSystemDefaultEncoding = NULL; int Py_HasFileSystemDefaultEncoding = 0; -#endif -const char *Py_FileSystemDefaultEncodeErrors = "surrogateescape"; -static int _Py_HasFileSystemDefaultEncodeErrors = 1; +const char *Py_FileSystemDefaultEncodeErrors = NULL; +static int _Py_HasFileSystemDefaultEncodeErrors = 0; /* UTF-8 mode (PEP 540): if equals to 1, use the UTF-8 encoding, and change stdin and stdout error handler to "surrogateescape". It is equal to @@ -1362,6 +1349,7 @@ _PyCoreConfig_Read(_PyCoreConfig *config) assert(config->filesystem_errors != NULL); assert(config->stdio_encoding != NULL); assert(config->stdio_errors != NULL); + assert(config->_check_hash_pycs_mode != NULL); return _Py_INIT_OK(); }