mirror of https://github.com/python/cpython
bpo-34523: Add _PyCoreConfig.filesystem_encoding (GH-8963)
_PyCoreConfig_Read() is now responsible to choose the filesystem encoding and error handler. Using Py_Main(), the encoding is now chosen even before calling Py_Initialize(). _PyCoreConfig.filesystem_encoding is now the reference, instead of Py_FileSystemDefaultEncoding, for the Python filesystem encoding. Changes: * Add filesystem_encoding and filesystem_errors to _PyCoreConfig * _PyCoreConfig_Read() now reads the locale encoding for the file system encoding. * PyUnicode_EncodeFSDefault() and PyUnicode_DecodeFSDefaultAndSize() now use the interpreter configuration rather than Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors global configuration variables. * Add _Py_SetFileSystemEncoding() and _Py_ClearFileSystemEncoding() private functions to only modify Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors in coreconfig.c. * _Py_CoerceLegacyLocale() now takes an int rather than _PyCoreConfig for the warning.
This commit is contained in:
parent
dfe0dc7453
commit
b2457efc78
|
@ -66,6 +66,17 @@ typedef struct {
|
||||||
int coerce_c_locale; /* PYTHONCOERCECLOCALE, -1 means unknown */
|
int coerce_c_locale; /* PYTHONCOERCECLOCALE, -1 means unknown */
|
||||||
int coerce_c_locale_warn; /* PYTHONCOERCECLOCALE=warn */
|
int coerce_c_locale_warn; /* PYTHONCOERCECLOCALE=warn */
|
||||||
|
|
||||||
|
/* Python filesystem encoding and error handler: see
|
||||||
|
sys.getfilesystemencoding() and sys.getfilesystemencodeerrors().
|
||||||
|
|
||||||
|
Updated later by initfsencoding(). On Windows, can be updated by
|
||||||
|
sys._enablelegacywindowsfsencoding() at runtime.
|
||||||
|
|
||||||
|
See Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors.
|
||||||
|
*/
|
||||||
|
char *filesystem_encoding;
|
||||||
|
char *filesystem_errors;
|
||||||
|
|
||||||
/* Enable UTF-8 mode?
|
/* Enable UTF-8 mode?
|
||||||
Set by -X utf8 command line option and PYTHONUTF8 environment variable.
|
Set by -X utf8 command line option and PYTHONUTF8 environment variable.
|
||||||
If set to -1 (default), inherit Py_UTF8Mode value. */
|
If set to -1 (default), inherit Py_UTF8Mode value. */
|
||||||
|
@ -325,6 +336,14 @@ PyAPI_FUNC(int) _PyCoreConfig_GetEnvDup(
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef Py_BUILD_CORE
|
||||||
|
PyAPI_FUNC(int) _Py_SetFileSystemEncoding(
|
||||||
|
const char *encoding,
|
||||||
|
const char *errors);
|
||||||
|
PyAPI_FUNC(void) _Py_ClearFileSystemEncoding(void);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -175,7 +175,7 @@ PyAPI_FUNC(int) _PyOS_URandomNonblock(void *buffer, Py_ssize_t size);
|
||||||
|
|
||||||
/* Legacy locale support */
|
/* Legacy locale support */
|
||||||
#ifndef Py_LIMITED_API
|
#ifndef Py_LIMITED_API
|
||||||
PyAPI_FUNC(void) _Py_CoerceLegacyLocale(const _PyCoreConfig *config);
|
PyAPI_FUNC(void) _Py_CoerceLegacyLocale(int warn);
|
||||||
PyAPI_FUNC(int) _Py_LegacyLocaleDetected(void);
|
PyAPI_FUNC(int) _Py_LegacyLocaleDetected(void);
|
||||||
PyAPI_FUNC(char *) _Py_SetLocaleFromEnv(int category);
|
PyAPI_FUNC(char *) _Py_SetLocaleFromEnv(int category);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -251,6 +251,8 @@ class EmbeddingTests(EmbeddingTestsMixin, unittest.TestCase):
|
||||||
|
|
||||||
class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
||||||
maxDiff = 4096
|
maxDiff = 4096
|
||||||
|
UTF8_MODE_ERRORS = ('surrogatepass' if sys.platform == 'win32'
|
||||||
|
else 'surrogateescape')
|
||||||
DEFAULT_CONFIG = {
|
DEFAULT_CONFIG = {
|
||||||
'install_signal_handlers': 1,
|
'install_signal_handlers': 1,
|
||||||
'use_environment': 1,
|
'use_environment': 1,
|
||||||
|
@ -265,8 +267,12 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
||||||
'show_alloc_count': 0,
|
'show_alloc_count': 0,
|
||||||
'dump_refs': 0,
|
'dump_refs': 0,
|
||||||
'malloc_stats': 0,
|
'malloc_stats': 0,
|
||||||
'utf8_mode': 0,
|
|
||||||
|
|
||||||
|
# None means that the default encoding is read at runtime:
|
||||||
|
# see get_locale_encoding().
|
||||||
|
'filesystem_encoding': None,
|
||||||
|
'filesystem_errors': sys.getfilesystemencodeerrors(),
|
||||||
|
'utf8_mode': 0,
|
||||||
'coerce_c_locale': 0,
|
'coerce_c_locale': 0,
|
||||||
'coerce_c_locale_warn': 0,
|
'coerce_c_locale_warn': 0,
|
||||||
|
|
||||||
|
@ -297,6 +303,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
||||||
'_frozen': 0,
|
'_frozen': 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_stdio_encoding(self, env):
|
def get_stdio_encoding(self, env):
|
||||||
code = 'import sys; print(sys.stdout.encoding, sys.stdout.errors)'
|
code = 'import sys; print(sys.stdout.encoding, sys.stdout.errors)'
|
||||||
args = (sys.executable, '-c', code)
|
args = (sys.executable, '-c', code)
|
||||||
|
@ -308,6 +315,29 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
||||||
out = proc.stdout.rstrip()
|
out = proc.stdout.rstrip()
|
||||||
return out.split()
|
return out.split()
|
||||||
|
|
||||||
|
def get_locale_encoding(self, isolated):
|
||||||
|
if sys.platform in ('win32', 'darwin') or support.is_android:
|
||||||
|
# Windows, macOS and Android use UTF-8
|
||||||
|
return "utf-8"
|
||||||
|
|
||||||
|
code = ('import codecs, locale, sys',
|
||||||
|
'locale.setlocale(locale.LC_CTYPE, "")',
|
||||||
|
'enc = locale.nl_langinfo(locale.CODESET)',
|
||||||
|
'enc = codecs.lookup(enc).name',
|
||||||
|
'print(enc)')
|
||||||
|
args = (sys.executable, '-c', '; '.join(code))
|
||||||
|
env = dict(os.environ)
|
||||||
|
if not isolated:
|
||||||
|
env['PYTHONCOERCECLOCALE'] = '0'
|
||||||
|
env['PYTHONUTF8'] = '0'
|
||||||
|
proc = subprocess.run(args, text=True, env=env,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE)
|
||||||
|
if proc.returncode:
|
||||||
|
raise Exception(f"failed to get the locale encoding: "
|
||||||
|
f"stdout={proc.stdout!r} stderr={proc.stderr!r}")
|
||||||
|
return proc.stdout.rstrip()
|
||||||
|
|
||||||
def check_config(self, testname, expected):
|
def check_config(self, testname, expected):
|
||||||
expected = dict(self.DEFAULT_CONFIG, **expected)
|
expected = dict(self.DEFAULT_CONFIG, **expected)
|
||||||
|
|
||||||
|
@ -326,6 +356,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
||||||
expected['stdio_encoding'] = res[0]
|
expected['stdio_encoding'] = res[0]
|
||||||
if expected['stdio_errors'] is None:
|
if expected['stdio_errors'] is None:
|
||||||
expected['stdio_errors'] = res[1]
|
expected['stdio_errors'] = res[1]
|
||||||
|
if expected['filesystem_encoding'] is None:
|
||||||
|
expected['filesystem_encoding'] = self.get_locale_encoding(expected['isolated'])
|
||||||
for key, value in expected.items():
|
for key, value in expected.items():
|
||||||
expected[key] = str(value)
|
expected[key] = str(value)
|
||||||
|
|
||||||
|
@ -357,7 +389,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
||||||
'utf8_mode': 1,
|
'utf8_mode': 1,
|
||||||
'stdio_encoding': 'utf-8',
|
'stdio_encoding': 'utf-8',
|
||||||
'stdio_errors': 'surrogateescape',
|
'stdio_errors': 'surrogateescape',
|
||||||
|
'filesystem_encoding': 'utf-8',
|
||||||
|
'filesystem_errors': self.UTF8_MODE_ERRORS,
|
||||||
'user_site_directory': 0,
|
'user_site_directory': 0,
|
||||||
'_frozen': 1,
|
'_frozen': 1,
|
||||||
}
|
}
|
||||||
|
@ -378,6 +411,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
||||||
'utf8_mode': 1,
|
'utf8_mode': 1,
|
||||||
'stdio_encoding': 'iso8859-1',
|
'stdio_encoding': 'iso8859-1',
|
||||||
'stdio_errors': 'replace',
|
'stdio_errors': 'replace',
|
||||||
|
'filesystem_encoding': 'utf-8',
|
||||||
|
'filesystem_errors': self.UTF8_MODE_ERRORS,
|
||||||
|
|
||||||
'pycache_prefix': 'conf_pycache_prefix',
|
'pycache_prefix': 'conf_pycache_prefix',
|
||||||
'program_name': './conf_program_name',
|
'program_name': './conf_program_name',
|
||||||
|
@ -409,6 +444,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
||||||
'import_time': 1,
|
'import_time': 1,
|
||||||
'malloc_stats': 1,
|
'malloc_stats': 1,
|
||||||
'utf8_mode': 1,
|
'utf8_mode': 1,
|
||||||
|
'filesystem_encoding': 'utf-8',
|
||||||
|
'filesystem_errors': self.UTF8_MODE_ERRORS,
|
||||||
'inspect': 1,
|
'inspect': 1,
|
||||||
'optimization_level': 2,
|
'optimization_level': 2,
|
||||||
'pycache_prefix': 'env_pycache_prefix',
|
'pycache_prefix': 'env_pycache_prefix',
|
||||||
|
|
|
@ -861,6 +861,16 @@ class SysModuleTest(unittest.TestCase):
|
||||||
def test_no_duplicates_in_meta_path(self):
|
def test_no_duplicates_in_meta_path(self):
|
||||||
self.assertEqual(len(sys.meta_path), len(set(sys.meta_path)))
|
self.assertEqual(len(sys.meta_path), len(set(sys.meta_path)))
|
||||||
|
|
||||||
|
@unittest.skipUnless(hasattr(sys, "_enablelegacywindowsfsencoding"),
|
||||||
|
'needs sys._enablelegacywindowsfsencoding()')
|
||||||
|
def test__enablelegacywindowsfsencoding(self):
|
||||||
|
code = ('import sys',
|
||||||
|
'sys._enablelegacywindowsfsencoding()',
|
||||||
|
'print(sys.getfilesystemencoding(), sys.getfilesystemencodeerrors())')
|
||||||
|
rc, out, err = assert_python_ok('-c', '; '.join(code))
|
||||||
|
out = out.decode('ascii', 'replace').rstrip()
|
||||||
|
self.assertEqual(out, 'mbcs replace')
|
||||||
|
|
||||||
|
|
||||||
@test.support.cpython_only
|
@test.support.cpython_only
|
||||||
class SizeofTest(unittest.TestCase):
|
class SizeofTest(unittest.TestCase):
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
The Python filesystem encoding is now read earlier during the Python
|
||||||
|
initialization.
|
|
@ -1339,7 +1339,7 @@ pymain_read_conf(_PyMain *pymain, _PyCoreConfig *config,
|
||||||
*/
|
*/
|
||||||
if (config->coerce_c_locale && !locale_coerced) {
|
if (config->coerce_c_locale && !locale_coerced) {
|
||||||
locale_coerced = 1;
|
locale_coerced = 1;
|
||||||
_Py_CoerceLegacyLocale(config);
|
_Py_CoerceLegacyLocale(config->coerce_c_locale_warn);
|
||||||
encoding_changed = 1;
|
encoding_changed = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3410,27 +3410,24 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
|
||||||
PyObject *
|
PyObject *
|
||||||
PyUnicode_EncodeFSDefault(PyObject *unicode)
|
PyUnicode_EncodeFSDefault(PyObject *unicode)
|
||||||
{
|
{
|
||||||
#if defined(__APPLE__)
|
|
||||||
return _PyUnicode_AsUTF8String(unicode, Py_FileSystemDefaultEncodeErrors);
|
|
||||||
#else
|
|
||||||
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||||
|
const _PyCoreConfig *config = &interp->core_config;
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
return _PyUnicode_AsUTF8String(unicode, config->filesystem_errors);
|
||||||
|
#else
|
||||||
/* Bootstrap check: if the filesystem codec is implemented in Python, we
|
/* Bootstrap check: if the filesystem codec is implemented in Python, we
|
||||||
cannot use it to encode and decode filenames before it is loaded. Load
|
cannot use it to encode and decode filenames before it is loaded. Load
|
||||||
the Python codec requires to encode at least its own filename. Use the C
|
the Python codec requires to encode at least its own filename. Use the C
|
||||||
version of the locale codec until the codec registry is initialized and
|
implementation of the locale codec until the codec registry is
|
||||||
the Python codec is loaded.
|
initialized and the Python codec is loaded. See initfsencoding(). */
|
||||||
|
if (interp->fscodec_initialized) {
|
||||||
Py_FileSystemDefaultEncoding is shared between all interpreters, we
|
|
||||||
cannot only rely on it: check also interp->fscodec_initialized for
|
|
||||||
subinterpreters. */
|
|
||||||
if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) {
|
|
||||||
return PyUnicode_AsEncodedString(unicode,
|
return PyUnicode_AsEncodedString(unicode,
|
||||||
Py_FileSystemDefaultEncoding,
|
config->filesystem_encoding,
|
||||||
Py_FileSystemDefaultEncodeErrors);
|
config->filesystem_errors);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return unicode_encode_locale(unicode,
|
return unicode_encode_locale(unicode,
|
||||||
Py_FileSystemDefaultEncodeErrors, 0);
|
config->filesystem_errors, 0);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -3636,27 +3633,24 @@ PyUnicode_DecodeFSDefault(const char *s) {
|
||||||
PyObject*
|
PyObject*
|
||||||
PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
|
PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
|
||||||
{
|
{
|
||||||
#if defined(__APPLE__)
|
|
||||||
return PyUnicode_DecodeUTF8Stateful(s, size, Py_FileSystemDefaultEncodeErrors, NULL);
|
|
||||||
#else
|
|
||||||
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||||
|
const _PyCoreConfig *config = &interp->core_config;
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
return PyUnicode_DecodeUTF8Stateful(s, size, config->filesystem_errors, NULL);
|
||||||
|
#else
|
||||||
/* Bootstrap check: if the filesystem codec is implemented in Python, we
|
/* Bootstrap check: if the filesystem codec is implemented in Python, we
|
||||||
cannot use it to encode and decode filenames before it is loaded. Load
|
cannot use it to encode and decode filenames before it is loaded. Load
|
||||||
the Python codec requires to encode at least its own filename. Use the C
|
the Python codec requires to encode at least its own filename. Use the C
|
||||||
version of the locale codec until the codec registry is initialized and
|
implementation of the locale codec until the codec registry is
|
||||||
the Python codec is loaded.
|
initialized and the Python codec is loaded. See initfsencoding(). */
|
||||||
|
if (interp->fscodec_initialized) {
|
||||||
Py_FileSystemDefaultEncoding is shared between all interpreters, we
|
|
||||||
cannot only rely on it: check also interp->fscodec_initialized for
|
|
||||||
subinterpreters. */
|
|
||||||
if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) {
|
|
||||||
return PyUnicode_Decode(s, size,
|
return PyUnicode_Decode(s, size,
|
||||||
Py_FileSystemDefaultEncoding,
|
config->filesystem_encoding,
|
||||||
Py_FileSystemDefaultEncodeErrors);
|
config->filesystem_errors);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return unicode_decode_locale(s, size,
|
return unicode_decode_locale(s, size,
|
||||||
Py_FileSystemDefaultEncodeErrors, 0);
|
config->filesystem_errors, 0);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -81,8 +81,15 @@ main(int argc, char *argv[])
|
||||||
config.program_name = L"./_freeze_importlib";
|
config.program_name = L"./_freeze_importlib";
|
||||||
/* Don't install importlib, since it could execute outdated bytecode. */
|
/* Don't install importlib, since it could execute outdated bytecode. */
|
||||||
config._install_importlib = 0;
|
config._install_importlib = 0;
|
||||||
config.install_signal_handlers = 1;
|
|
||||||
config._frozen = 1;
|
config._frozen = 1;
|
||||||
|
#ifdef MS_WINDOWS
|
||||||
|
/* bpo-34523: initfsencoding() is not called if _install_importlib=0,
|
||||||
|
so interp->fscodec_initialized value remains 0.
|
||||||
|
PyUnicode_EncodeFSDefault() doesn't support the "surrogatepass" error
|
||||||
|
handler in such case, whereas it's the default error handler on Windows.
|
||||||
|
Force the "strict" error handler to work around this bootstrap issue. */
|
||||||
|
config.filesystem_errors = "strict";
|
||||||
|
#endif
|
||||||
|
|
||||||
_PyInitError err = _Py_InitializeFromConfig(&config);
|
_PyInitError err = _Py_InitializeFromConfig(&config);
|
||||||
/* No need to call _PyCoreConfig_Clear() since we didn't allocate any
|
/* No need to call _PyCoreConfig_Clear() since we didn't allocate any
|
||||||
|
|
|
@ -328,6 +328,8 @@ dump_config(void)
|
||||||
printf("dump_refs = %i\n", config->dump_refs);
|
printf("dump_refs = %i\n", config->dump_refs);
|
||||||
printf("malloc_stats = %i\n", config->malloc_stats);
|
printf("malloc_stats = %i\n", config->malloc_stats);
|
||||||
|
|
||||||
|
printf("filesystem_encoding = %s\n", config->filesystem_encoding);
|
||||||
|
printf("filesystem_errors = %s\n", config->filesystem_errors);
|
||||||
printf("coerce_c_locale = %i\n", config->coerce_c_locale);
|
printf("coerce_c_locale = %i\n", config->coerce_c_locale);
|
||||||
printf("coerce_c_locale_warn = %i\n", config->coerce_c_locale_warn);
|
printf("coerce_c_locale_warn = %i\n", config->coerce_c_locale_warn);
|
||||||
printf("utf8_mode = %i\n", config->utf8_mode);
|
printf("utf8_mode = %i\n", config->utf8_mode);
|
||||||
|
|
|
@ -5,6 +5,11 @@
|
||||||
# include <langinfo.h>
|
# include <langinfo.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <locale.h> /* setlocale() */
|
||||||
|
#ifdef HAVE_LANGINFO_H
|
||||||
|
#include <langinfo.h> /* nl_langinfo(CODESET) */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#define DECODE_LOCALE_ERR(NAME, LEN) \
|
#define DECODE_LOCALE_ERR(NAME, LEN) \
|
||||||
(((LEN) == -2) \
|
(((LEN) == -2) \
|
||||||
|
@ -32,6 +37,8 @@ const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */
|
||||||
int Py_HasFileSystemDefaultEncoding = 0;
|
int Py_HasFileSystemDefaultEncoding = 0;
|
||||||
#endif
|
#endif
|
||||||
const char *Py_FileSystemDefaultEncodeErrors = "surrogateescape";
|
const char *Py_FileSystemDefaultEncodeErrors = "surrogateescape";
|
||||||
|
static int _Py_HasFileSystemDefaultEncodeErrors = 1;
|
||||||
|
|
||||||
/* UTF-8 mode (PEP 540): if equals to 1, use the UTF-8 encoding, and change
|
/* UTF-8 mode (PEP 540): if equals to 1, use the UTF-8 encoding, and change
|
||||||
stdin and stdout error handler to "surrogateescape". It is equal to
|
stdin and stdout error handler to "surrogateescape". It is equal to
|
||||||
-1 by default: unknown, will be set by Py_Main() */
|
-1 by default: unknown, will be set by Py_Main() */
|
||||||
|
@ -88,6 +95,47 @@ _Py_wstrlist_copy(int len, wchar_t **list)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
_Py_ClearFileSystemEncoding(void)
|
||||||
|
{
|
||||||
|
if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) {
|
||||||
|
PyMem_RawFree((char*)Py_FileSystemDefaultEncoding);
|
||||||
|
Py_FileSystemDefaultEncoding = NULL;
|
||||||
|
}
|
||||||
|
if (!_Py_HasFileSystemDefaultEncodeErrors && Py_FileSystemDefaultEncodeErrors) {
|
||||||
|
PyMem_RawFree((char*)Py_FileSystemDefaultEncodeErrors);
|
||||||
|
Py_FileSystemDefaultEncodeErrors = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
|
||||||
|
global configuration variables. */
|
||||||
|
int
|
||||||
|
_Py_SetFileSystemEncoding(const char *encoding, const char *errors)
|
||||||
|
{
|
||||||
|
char *encoding2 = _PyMem_RawStrdup(encoding);
|
||||||
|
if (encoding2 == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *errors2 = _PyMem_RawStrdup(errors);
|
||||||
|
if (errors2 == NULL) {
|
||||||
|
PyMem_RawFree(encoding2);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
_Py_ClearFileSystemEncoding();
|
||||||
|
|
||||||
|
Py_FileSystemDefaultEncoding = encoding2;
|
||||||
|
Py_HasFileSystemDefaultEncoding = 0;
|
||||||
|
|
||||||
|
Py_FileSystemDefaultEncodeErrors = errors2;
|
||||||
|
_Py_HasFileSystemDefaultEncodeErrors = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Helper to allow an embedding application to override the normal
|
/* Helper to allow an embedding application to override the normal
|
||||||
* mechanism that attempts to figure out an appropriate IO encoding
|
* mechanism that attempts to figure out an appropriate IO encoding
|
||||||
*/
|
*/
|
||||||
|
@ -209,6 +257,8 @@ _PyCoreConfig_Clear(_PyCoreConfig *config)
|
||||||
#endif
|
#endif
|
||||||
CLEAR(config->base_exec_prefix);
|
CLEAR(config->base_exec_prefix);
|
||||||
|
|
||||||
|
CLEAR(config->filesystem_encoding);
|
||||||
|
CLEAR(config->filesystem_errors);
|
||||||
CLEAR(config->stdio_encoding);
|
CLEAR(config->stdio_encoding);
|
||||||
CLEAR(config->stdio_errors);
|
CLEAR(config->stdio_errors);
|
||||||
#undef CLEAR
|
#undef CLEAR
|
||||||
|
@ -302,6 +352,8 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
|
||||||
COPY_ATTR(quiet);
|
COPY_ATTR(quiet);
|
||||||
COPY_ATTR(user_site_directory);
|
COPY_ATTR(user_site_directory);
|
||||||
COPY_ATTR(buffered_stdio);
|
COPY_ATTR(buffered_stdio);
|
||||||
|
COPY_STR_ATTR(filesystem_encoding);
|
||||||
|
COPY_STR_ATTR(filesystem_errors);
|
||||||
COPY_STR_ATTR(stdio_encoding);
|
COPY_STR_ATTR(stdio_encoding);
|
||||||
COPY_STR_ATTR(stdio_errors);
|
COPY_STR_ATTR(stdio_errors);
|
||||||
#ifdef MS_WINDOWS
|
#ifdef MS_WINDOWS
|
||||||
|
@ -312,6 +364,7 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
|
||||||
COPY_ATTR(_frozen);
|
COPY_ATTR(_frozen);
|
||||||
|
|
||||||
#undef COPY_ATTR
|
#undef COPY_ATTR
|
||||||
|
#undef COPY_STR_ATTR
|
||||||
#undef COPY_WSTR_ATTR
|
#undef COPY_WSTR_ATTR
|
||||||
#undef COPY_WSTRLIST
|
#undef COPY_WSTRLIST
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -976,8 +1029,8 @@ get_stdio_errors(const _PyCoreConfig *config)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
_PyInitError
|
static _PyInitError
|
||||||
_Py_get_locale_encoding(char **locale_encoding)
|
get_locale_encoding(char **locale_encoding)
|
||||||
{
|
{
|
||||||
#ifdef MS_WINDOWS
|
#ifdef MS_WINDOWS
|
||||||
char encoding[20];
|
char encoding[20];
|
||||||
|
@ -1087,7 +1140,7 @@ config_init_stdio_encoding(_PyCoreConfig *config)
|
||||||
|
|
||||||
/* Choose the default error handler based on the current locale. */
|
/* Choose the default error handler based on the current locale. */
|
||||||
if (config->stdio_encoding == NULL) {
|
if (config->stdio_encoding == NULL) {
|
||||||
_PyInitError err = _Py_get_locale_encoding(&config->stdio_encoding);
|
_PyInitError err = get_locale_encoding(&config->stdio_encoding);
|
||||||
if (_Py_INIT_FAILED(err)) {
|
if (_Py_INIT_FAILED(err)) {
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -1104,6 +1157,81 @@ config_init_stdio_encoding(_PyCoreConfig *config)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static _PyInitError
|
||||||
|
config_init_fs_encoding(_PyCoreConfig *config)
|
||||||
|
{
|
||||||
|
#ifdef MS_WINDOWS
|
||||||
|
if (config->legacy_windows_fs_encoding) {
|
||||||
|
/* Legacy Windows filesystem encoding: mbcs/replace */
|
||||||
|
if (config->filesystem_encoding == NULL) {
|
||||||
|
config->filesystem_encoding = _PyMem_RawStrdup("mbcs");
|
||||||
|
if (config->filesystem_encoding == NULL) {
|
||||||
|
return _Py_INIT_NO_MEMORY();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (config->filesystem_errors == NULL) {
|
||||||
|
config->filesystem_errors = _PyMem_RawStrdup("replace");
|
||||||
|
if (config->filesystem_errors == NULL) {
|
||||||
|
return _Py_INIT_NO_MEMORY();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Windows defaults to utf-8/surrogatepass (PEP 529) */
|
||||||
|
if (config->filesystem_encoding == NULL) {
|
||||||
|
config->filesystem_encoding = _PyMem_RawStrdup("utf-8");
|
||||||
|
if (config->filesystem_encoding == NULL) {
|
||||||
|
return _Py_INIT_NO_MEMORY();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (config->filesystem_errors == NULL) {
|
||||||
|
config->filesystem_errors = _PyMem_RawStrdup("surrogatepass");
|
||||||
|
if (config->filesystem_errors == NULL) {
|
||||||
|
return _Py_INIT_NO_MEMORY();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (config->utf8_mode) {
|
||||||
|
/* UTF-8 Mode use: utf-8/surrogateescape */
|
||||||
|
if (config->filesystem_encoding == NULL) {
|
||||||
|
config->filesystem_encoding = _PyMem_RawStrdup("utf-8");
|
||||||
|
if (config->filesystem_encoding == NULL) {
|
||||||
|
return _Py_INIT_NO_MEMORY();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* errors defaults to surrogateescape above */
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config->filesystem_encoding == NULL) {
|
||||||
|
/* macOS and Android use UTF-8, other platforms use
|
||||||
|
the locale encoding. */
|
||||||
|
char *locale_encoding;
|
||||||
|
#if defined(__APPLE__) || defined(__ANDROID__)
|
||||||
|
locale_encoding = "UTF-8";
|
||||||
|
#else
|
||||||
|
_PyInitError err = get_locale_encoding(&locale_encoding);
|
||||||
|
if (_Py_INIT_FAILED(err)) {
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
config->filesystem_encoding = _PyMem_RawStrdup(locale_encoding);
|
||||||
|
if (config->filesystem_encoding == NULL) {
|
||||||
|
return _Py_INIT_NO_MEMORY();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config->filesystem_errors == NULL) {
|
||||||
|
/* by default, use the "surrogateescape" error handler */
|
||||||
|
config->filesystem_errors = _PyMem_RawStrdup("surrogateescape");
|
||||||
|
if (config->filesystem_errors == NULL) {
|
||||||
|
return _Py_INIT_NO_MEMORY();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return _Py_INIT_OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Read configuration settings from standard locations
|
/* Read configuration settings from standard locations
|
||||||
*
|
*
|
||||||
* This function doesn't make any changes to the interpreter state - it
|
* This function doesn't make any changes to the interpreter state - it
|
||||||
|
@ -1216,6 +1344,13 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
|
||||||
config->argc = 0;
|
config->argc = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (config->filesystem_encoding == NULL && config->filesystem_errors == NULL) {
|
||||||
|
err = config_init_fs_encoding(config);
|
||||||
|
if (_Py_INIT_FAILED(err)) {
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
err = config_init_stdio_encoding(config);
|
err = config_init_stdio_encoding(config);
|
||||||
if (_Py_INIT_FAILED(err)) {
|
if (_Py_INIT_FAILED(err)) {
|
||||||
return err;
|
return err;
|
||||||
|
@ -1223,6 +1358,10 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
|
||||||
|
|
||||||
assert(config->coerce_c_locale >= 0);
|
assert(config->coerce_c_locale >= 0);
|
||||||
assert(config->use_environment >= 0);
|
assert(config->use_environment >= 0);
|
||||||
|
assert(config->filesystem_encoding != NULL);
|
||||||
|
assert(config->filesystem_errors != NULL);
|
||||||
|
assert(config->stdio_encoding != NULL);
|
||||||
|
assert(config->stdio_errors != NULL);
|
||||||
|
|
||||||
return _Py_INIT_OK();
|
return _Py_INIT_OK();
|
||||||
}
|
}
|
||||||
|
|
|
@ -339,7 +339,7 @@ static const char C_LOCALE_COERCION_WARNING[] =
|
||||||
"or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n";
|
"or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n";
|
||||||
|
|
||||||
static void
|
static void
|
||||||
_coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoercionTarget *target)
|
_coerce_default_locale_settings(int warn, const _LocaleCoercionTarget *target)
|
||||||
{
|
{
|
||||||
const char *newloc = target->locale_name;
|
const char *newloc = target->locale_name;
|
||||||
|
|
||||||
|
@ -352,7 +352,7 @@ _coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoerci
|
||||||
"Error setting LC_CTYPE, skipping C locale coercion\n");
|
"Error setting LC_CTYPE, skipping C locale coercion\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (config->coerce_c_locale_warn) {
|
if (warn) {
|
||||||
fprintf(stderr, C_LOCALE_COERCION_WARNING, newloc);
|
fprintf(stderr, C_LOCALE_COERCION_WARNING, newloc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -362,7 +362,7 @@ _coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoerci
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void
|
void
|
||||||
_Py_CoerceLegacyLocale(const _PyCoreConfig *config)
|
_Py_CoerceLegacyLocale(int warn)
|
||||||
{
|
{
|
||||||
#ifdef PY_COERCE_C_LOCALE
|
#ifdef PY_COERCE_C_LOCALE
|
||||||
const char *locale_override = getenv("LC_ALL");
|
const char *locale_override = getenv("LC_ALL");
|
||||||
|
@ -385,7 +385,7 @@ defined(HAVE_LANGINFO_H) && defined(CODESET)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
/* Successfully configured locale, so make it the default */
|
/* Successfully configured locale, so make it the default */
|
||||||
_coerce_default_locale_settings(config, target);
|
_coerce_default_locale_settings(warn, target);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1162,11 +1162,7 @@ Py_FinalizeEx(void)
|
||||||
/* Cleanup Unicode implementation */
|
/* Cleanup Unicode implementation */
|
||||||
_PyUnicode_Fini();
|
_PyUnicode_Fini();
|
||||||
|
|
||||||
/* reset file system default encoding */
|
_Py_ClearFileSystemEncoding();
|
||||||
if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) {
|
|
||||||
PyMem_RawFree((char*)Py_FileSystemDefaultEncoding);
|
|
||||||
Py_FileSystemDefaultEncoding = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* XXX Still allocated:
|
/* XXX Still allocated:
|
||||||
- various static ad-hoc pointers to interned strings
|
- various static ad-hoc pointers to interned strings
|
||||||
|
@ -1475,59 +1471,31 @@ add_main_module(PyInterpreterState *interp)
|
||||||
static _PyInitError
|
static _PyInitError
|
||||||
initfsencoding(PyInterpreterState *interp)
|
initfsencoding(PyInterpreterState *interp)
|
||||||
{
|
{
|
||||||
PyObject *codec;
|
_PyCoreConfig *config = &interp->core_config;
|
||||||
|
|
||||||
#ifdef MS_WINDOWS
|
char *encoding = get_codec_name(config->filesystem_encoding);
|
||||||
if (Py_LegacyWindowsFSEncodingFlag) {
|
if (encoding == NULL) {
|
||||||
Py_FileSystemDefaultEncoding = "mbcs";
|
|
||||||
Py_FileSystemDefaultEncodeErrors = "replace";
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
Py_FileSystemDefaultEncoding = "utf-8";
|
|
||||||
Py_FileSystemDefaultEncodeErrors = "surrogatepass";
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
if (Py_FileSystemDefaultEncoding == NULL) {
|
|
||||||
if (interp->core_config.utf8_mode) {
|
|
||||||
Py_FileSystemDefaultEncoding = "utf-8";
|
|
||||||
Py_HasFileSystemDefaultEncoding = 1;
|
|
||||||
}
|
|
||||||
else if (_Py_GetForceASCII()) {
|
|
||||||
Py_FileSystemDefaultEncoding = "ascii";
|
|
||||||
Py_HasFileSystemDefaultEncoding = 1;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
extern _PyInitError _Py_get_locale_encoding(char **locale_encoding);
|
|
||||||
|
|
||||||
char *locale_encoding;
|
|
||||||
_PyInitError err = _Py_get_locale_encoding(&locale_encoding);
|
|
||||||
if (_Py_INIT_FAILED(err)) {
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
Py_FileSystemDefaultEncoding = get_codec_name(locale_encoding);
|
|
||||||
PyMem_RawFree(locale_encoding);
|
|
||||||
if (Py_FileSystemDefaultEncoding == NULL) {
|
|
||||||
return _Py_INIT_ERR("failed to get the Python codec "
|
|
||||||
"of the locale encoding");
|
|
||||||
}
|
|
||||||
|
|
||||||
Py_HasFileSystemDefaultEncoding = 0;
|
|
||||||
interp->fscodec_initialized = 1;
|
|
||||||
return _Py_INIT_OK();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* the encoding is mbcs, utf-8 or ascii */
|
|
||||||
codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding);
|
|
||||||
if (!codec) {
|
|
||||||
/* Such error can only occurs in critical situations: no more
|
/* Such error can only occurs in critical situations: no more
|
||||||
* memory, import a module of the standard library failed,
|
memory, import a module of the standard library failed, etc. */
|
||||||
* etc. */
|
return _Py_INIT_ERR("failed to get the Python codec "
|
||||||
return _Py_INIT_ERR("unable to load the file system codec");
|
"of the filesystem encoding");
|
||||||
}
|
}
|
||||||
Py_DECREF(codec);
|
|
||||||
|
/* Update the filesystem encoding to the normalized Python codec name.
|
||||||
|
For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
|
||||||
|
(Python codec name). */
|
||||||
|
PyMem_RawFree(config->filesystem_encoding);
|
||||||
|
config->filesystem_encoding = encoding;
|
||||||
|
|
||||||
|
/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
|
||||||
|
global configuration variables. */
|
||||||
|
if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
|
||||||
|
config->filesystem_errors) < 0) {
|
||||||
|
return _Py_INIT_NO_MEMORY();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* PyUnicode can now use the Python codec rather than C implementation
|
||||||
|
for the filesystem encoding */
|
||||||
interp->fscodec_initialized = 1;
|
interp->fscodec_initialized = 1;
|
||||||
return _Py_INIT_OK();
|
return _Py_INIT_OK();
|
||||||
}
|
}
|
||||||
|
|
|
@ -389,11 +389,9 @@ implementation."
|
||||||
static PyObject *
|
static PyObject *
|
||||||
sys_getfilesystemencoding(PyObject *self, PyObject *Py_UNUSED(ignored))
|
sys_getfilesystemencoding(PyObject *self, PyObject *Py_UNUSED(ignored))
|
||||||
{
|
{
|
||||||
if (Py_FileSystemDefaultEncoding)
|
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||||
return PyUnicode_FromString(Py_FileSystemDefaultEncoding);
|
const _PyCoreConfig *config = &interp->core_config;
|
||||||
PyErr_SetString(PyExc_RuntimeError,
|
return PyUnicode_FromString(config->filesystem_encoding);
|
||||||
"filesystem encoding is not initialized");
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(getfilesystemencoding_doc,
|
PyDoc_STRVAR(getfilesystemencoding_doc,
|
||||||
|
@ -406,11 +404,9 @@ operating system filenames."
|
||||||
static PyObject *
|
static PyObject *
|
||||||
sys_getfilesystemencodeerrors(PyObject *self, PyObject *Py_UNUSED(ignored))
|
sys_getfilesystemencodeerrors(PyObject *self, PyObject *Py_UNUSED(ignored))
|
||||||
{
|
{
|
||||||
if (Py_FileSystemDefaultEncodeErrors)
|
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||||
return PyUnicode_FromString(Py_FileSystemDefaultEncodeErrors);
|
const _PyCoreConfig *config = &interp->core_config;
|
||||||
PyErr_SetString(PyExc_RuntimeError,
|
return PyUnicode_FromString(config->filesystem_errors);
|
||||||
"filesystem encoding is not initialized");
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(getfilesystemencodeerrors_doc,
|
PyDoc_STRVAR(getfilesystemencodeerrors_doc,
|
||||||
|
@ -1150,8 +1146,30 @@ environment variable before launching Python."
|
||||||
static PyObject *
|
static PyObject *
|
||||||
sys_enablelegacywindowsfsencoding(PyObject *self)
|
sys_enablelegacywindowsfsencoding(PyObject *self)
|
||||||
{
|
{
|
||||||
Py_FileSystemDefaultEncoding = "mbcs";
|
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||||
Py_FileSystemDefaultEncodeErrors = "replace";
|
_PyCoreConfig *config = &interp->core_config;
|
||||||
|
|
||||||
|
/* Set the filesystem encoding to mbcs/replace (PEP 529) */
|
||||||
|
char *encoding = _PyMem_RawStrdup("mbcs");
|
||||||
|
char *errors = _PyMem_RawStrdup("replace");
|
||||||
|
if (encoding == NULL || errors == NULL) {
|
||||||
|
PyMem_Free(encoding);
|
||||||
|
PyMem_Free(errors);
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyMem_RawFree(config->filesystem_encoding);
|
||||||
|
config->filesystem_encoding = encoding;
|
||||||
|
PyMem_RawFree(config->filesystem_errors);
|
||||||
|
config->filesystem_errors = errors;
|
||||||
|
|
||||||
|
if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
|
||||||
|
config->filesystem_errors) < 0) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue