bpo-34523: Add _PyCoreConfig.filesystem_encoding (GH-8963)
_PyCoreConfig_Read() is now responsible to choose the filesystem encoding and error handler. Using Py_Main(), the encoding is now chosen even before calling Py_Initialize(). _PyCoreConfig.filesystem_encoding is now the reference, instead of Py_FileSystemDefaultEncoding, for the Python filesystem encoding. Changes: * Add filesystem_encoding and filesystem_errors to _PyCoreConfig * _PyCoreConfig_Read() now reads the locale encoding for the file system encoding. * PyUnicode_EncodeFSDefault() and PyUnicode_DecodeFSDefaultAndSize() now use the interpreter configuration rather than Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors global configuration variables. * Add _Py_SetFileSystemEncoding() and _Py_ClearFileSystemEncoding() private functions to only modify Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors in coreconfig.c. * _Py_CoerceLegacyLocale() now takes an int rather than _PyCoreConfig for the warning.
This commit is contained in:
parent
dfe0dc7453
commit
b2457efc78
|
@ -66,6 +66,17 @@ typedef struct {
|
|||
int coerce_c_locale; /* PYTHONCOERCECLOCALE, -1 means unknown */
|
||||
int coerce_c_locale_warn; /* PYTHONCOERCECLOCALE=warn */
|
||||
|
||||
/* Python filesystem encoding and error handler: see
|
||||
sys.getfilesystemencoding() and sys.getfilesystemencodeerrors().
|
||||
|
||||
Updated later by initfsencoding(). On Windows, can be updated by
|
||||
sys._enablelegacywindowsfsencoding() at runtime.
|
||||
|
||||
See Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors.
|
||||
*/
|
||||
char *filesystem_encoding;
|
||||
char *filesystem_errors;
|
||||
|
||||
/* Enable UTF-8 mode?
|
||||
Set by -X utf8 command line option and PYTHONUTF8 environment variable.
|
||||
If set to -1 (default), inherit Py_UTF8Mode value. */
|
||||
|
@ -325,6 +336,14 @@ PyAPI_FUNC(int) _PyCoreConfig_GetEnvDup(
|
|||
#endif
|
||||
|
||||
|
||||
#ifdef Py_BUILD_CORE
|
||||
PyAPI_FUNC(int) _Py_SetFileSystemEncoding(
|
||||
const char *encoding,
|
||||
const char *errors);
|
||||
PyAPI_FUNC(void) _Py_ClearFileSystemEncoding(void);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -175,7 +175,7 @@ PyAPI_FUNC(int) _PyOS_URandomNonblock(void *buffer, Py_ssize_t size);
|
|||
|
||||
/* Legacy locale support */
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(void) _Py_CoerceLegacyLocale(const _PyCoreConfig *config);
|
||||
PyAPI_FUNC(void) _Py_CoerceLegacyLocale(int warn);
|
||||
PyAPI_FUNC(int) _Py_LegacyLocaleDetected(void);
|
||||
PyAPI_FUNC(char *) _Py_SetLocaleFromEnv(int category);
|
||||
#endif
|
||||
|
|
|
@ -251,6 +251,8 @@ class EmbeddingTests(EmbeddingTestsMixin, unittest.TestCase):
|
|||
|
||||
class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
||||
maxDiff = 4096
|
||||
UTF8_MODE_ERRORS = ('surrogatepass' if sys.platform == 'win32'
|
||||
else 'surrogateescape')
|
||||
DEFAULT_CONFIG = {
|
||||
'install_signal_handlers': 1,
|
||||
'use_environment': 1,
|
||||
|
@ -265,8 +267,12 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
|||
'show_alloc_count': 0,
|
||||
'dump_refs': 0,
|
||||
'malloc_stats': 0,
|
||||
'utf8_mode': 0,
|
||||
|
||||
# None means that the default encoding is read at runtime:
|
||||
# see get_locale_encoding().
|
||||
'filesystem_encoding': None,
|
||||
'filesystem_errors': sys.getfilesystemencodeerrors(),
|
||||
'utf8_mode': 0,
|
||||
'coerce_c_locale': 0,
|
||||
'coerce_c_locale_warn': 0,
|
||||
|
||||
|
@ -297,6 +303,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
|||
'_frozen': 0,
|
||||
}
|
||||
|
||||
|
||||
def get_stdio_encoding(self, env):
|
||||
code = 'import sys; print(sys.stdout.encoding, sys.stdout.errors)'
|
||||
args = (sys.executable, '-c', code)
|
||||
|
@ -308,6 +315,29 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
|||
out = proc.stdout.rstrip()
|
||||
return out.split()
|
||||
|
||||
def get_locale_encoding(self, isolated):
|
||||
if sys.platform in ('win32', 'darwin') or support.is_android:
|
||||
# Windows, macOS and Android use UTF-8
|
||||
return "utf-8"
|
||||
|
||||
code = ('import codecs, locale, sys',
|
||||
'locale.setlocale(locale.LC_CTYPE, "")',
|
||||
'enc = locale.nl_langinfo(locale.CODESET)',
|
||||
'enc = codecs.lookup(enc).name',
|
||||
'print(enc)')
|
||||
args = (sys.executable, '-c', '; '.join(code))
|
||||
env = dict(os.environ)
|
||||
if not isolated:
|
||||
env['PYTHONCOERCECLOCALE'] = '0'
|
||||
env['PYTHONUTF8'] = '0'
|
||||
proc = subprocess.run(args, text=True, env=env,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
if proc.returncode:
|
||||
raise Exception(f"failed to get the locale encoding: "
|
||||
f"stdout={proc.stdout!r} stderr={proc.stderr!r}")
|
||||
return proc.stdout.rstrip()
|
||||
|
||||
def check_config(self, testname, expected):
|
||||
expected = dict(self.DEFAULT_CONFIG, **expected)
|
||||
|
||||
|
@ -326,6 +356,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
|||
expected['stdio_encoding'] = res[0]
|
||||
if expected['stdio_errors'] is None:
|
||||
expected['stdio_errors'] = res[1]
|
||||
if expected['filesystem_encoding'] is None:
|
||||
expected['filesystem_encoding'] = self.get_locale_encoding(expected['isolated'])
|
||||
for key, value in expected.items():
|
||||
expected[key] = str(value)
|
||||
|
||||
|
@ -357,7 +389,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
|||
'utf8_mode': 1,
|
||||
'stdio_encoding': 'utf-8',
|
||||
'stdio_errors': 'surrogateescape',
|
||||
|
||||
'filesystem_encoding': 'utf-8',
|
||||
'filesystem_errors': self.UTF8_MODE_ERRORS,
|
||||
'user_site_directory': 0,
|
||||
'_frozen': 1,
|
||||
}
|
||||
|
@ -378,6 +411,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
|||
'utf8_mode': 1,
|
||||
'stdio_encoding': 'iso8859-1',
|
||||
'stdio_errors': 'replace',
|
||||
'filesystem_encoding': 'utf-8',
|
||||
'filesystem_errors': self.UTF8_MODE_ERRORS,
|
||||
|
||||
'pycache_prefix': 'conf_pycache_prefix',
|
||||
'program_name': './conf_program_name',
|
||||
|
@ -409,6 +444,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
|||
'import_time': 1,
|
||||
'malloc_stats': 1,
|
||||
'utf8_mode': 1,
|
||||
'filesystem_encoding': 'utf-8',
|
||||
'filesystem_errors': self.UTF8_MODE_ERRORS,
|
||||
'inspect': 1,
|
||||
'optimization_level': 2,
|
||||
'pycache_prefix': 'env_pycache_prefix',
|
||||
|
|
|
@ -861,6 +861,16 @@ class SysModuleTest(unittest.TestCase):
|
|||
def test_no_duplicates_in_meta_path(self):
|
||||
self.assertEqual(len(sys.meta_path), len(set(sys.meta_path)))
|
||||
|
||||
@unittest.skipUnless(hasattr(sys, "_enablelegacywindowsfsencoding"),
|
||||
'needs sys._enablelegacywindowsfsencoding()')
|
||||
def test__enablelegacywindowsfsencoding(self):
|
||||
code = ('import sys',
|
||||
'sys._enablelegacywindowsfsencoding()',
|
||||
'print(sys.getfilesystemencoding(), sys.getfilesystemencodeerrors())')
|
||||
rc, out, err = assert_python_ok('-c', '; '.join(code))
|
||||
out = out.decode('ascii', 'replace').rstrip()
|
||||
self.assertEqual(out, 'mbcs replace')
|
||||
|
||||
|
||||
@test.support.cpython_only
|
||||
class SizeofTest(unittest.TestCase):
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
The Python filesystem encoding is now read earlier during the Python
|
||||
initialization.
|
|
@ -1339,7 +1339,7 @@ pymain_read_conf(_PyMain *pymain, _PyCoreConfig *config,
|
|||
*/
|
||||
if (config->coerce_c_locale && !locale_coerced) {
|
||||
locale_coerced = 1;
|
||||
_Py_CoerceLegacyLocale(config);
|
||||
_Py_CoerceLegacyLocale(config->coerce_c_locale_warn);
|
||||
encoding_changed = 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -3410,27 +3410,24 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
|
|||
PyObject *
|
||||
PyUnicode_EncodeFSDefault(PyObject *unicode)
|
||||
{
|
||||
#if defined(__APPLE__)
|
||||
return _PyUnicode_AsUTF8String(unicode, Py_FileSystemDefaultEncodeErrors);
|
||||
#else
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||
const _PyCoreConfig *config = &interp->core_config;
|
||||
#if defined(__APPLE__)
|
||||
return _PyUnicode_AsUTF8String(unicode, config->filesystem_errors);
|
||||
#else
|
||||
/* Bootstrap check: if the filesystem codec is implemented in Python, we
|
||||
cannot use it to encode and decode filenames before it is loaded. Load
|
||||
the Python codec requires to encode at least its own filename. Use the C
|
||||
version of the locale codec until the codec registry is initialized and
|
||||
the Python codec is loaded.
|
||||
|
||||
Py_FileSystemDefaultEncoding is shared between all interpreters, we
|
||||
cannot only rely on it: check also interp->fscodec_initialized for
|
||||
subinterpreters. */
|
||||
if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) {
|
||||
implementation of the locale codec until the codec registry is
|
||||
initialized and the Python codec is loaded. See initfsencoding(). */
|
||||
if (interp->fscodec_initialized) {
|
||||
return PyUnicode_AsEncodedString(unicode,
|
||||
Py_FileSystemDefaultEncoding,
|
||||
Py_FileSystemDefaultEncodeErrors);
|
||||
config->filesystem_encoding,
|
||||
config->filesystem_errors);
|
||||
}
|
||||
else {
|
||||
return unicode_encode_locale(unicode,
|
||||
Py_FileSystemDefaultEncodeErrors, 0);
|
||||
config->filesystem_errors, 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -3636,27 +3633,24 @@ PyUnicode_DecodeFSDefault(const char *s) {
|
|||
PyObject*
|
||||
PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
|
||||
{
|
||||
#if defined(__APPLE__)
|
||||
return PyUnicode_DecodeUTF8Stateful(s, size, Py_FileSystemDefaultEncodeErrors, NULL);
|
||||
#else
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||
const _PyCoreConfig *config = &interp->core_config;
|
||||
#if defined(__APPLE__)
|
||||
return PyUnicode_DecodeUTF8Stateful(s, size, config->filesystem_errors, NULL);
|
||||
#else
|
||||
/* Bootstrap check: if the filesystem codec is implemented in Python, we
|
||||
cannot use it to encode and decode filenames before it is loaded. Load
|
||||
the Python codec requires to encode at least its own filename. Use the C
|
||||
version of the locale codec until the codec registry is initialized and
|
||||
the Python codec is loaded.
|
||||
|
||||
Py_FileSystemDefaultEncoding is shared between all interpreters, we
|
||||
cannot only rely on it: check also interp->fscodec_initialized for
|
||||
subinterpreters. */
|
||||
if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) {
|
||||
implementation of the locale codec until the codec registry is
|
||||
initialized and the Python codec is loaded. See initfsencoding(). */
|
||||
if (interp->fscodec_initialized) {
|
||||
return PyUnicode_Decode(s, size,
|
||||
Py_FileSystemDefaultEncoding,
|
||||
Py_FileSystemDefaultEncodeErrors);
|
||||
config->filesystem_encoding,
|
||||
config->filesystem_errors);
|
||||
}
|
||||
else {
|
||||
return unicode_decode_locale(s, size,
|
||||
Py_FileSystemDefaultEncodeErrors, 0);
|
||||
config->filesystem_errors, 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -81,8 +81,15 @@ main(int argc, char *argv[])
|
|||
config.program_name = L"./_freeze_importlib";
|
||||
/* Don't install importlib, since it could execute outdated bytecode. */
|
||||
config._install_importlib = 0;
|
||||
config.install_signal_handlers = 1;
|
||||
config._frozen = 1;
|
||||
#ifdef MS_WINDOWS
|
||||
/* bpo-34523: initfsencoding() is not called if _install_importlib=0,
|
||||
so interp->fscodec_initialized value remains 0.
|
||||
PyUnicode_EncodeFSDefault() doesn't support the "surrogatepass" error
|
||||
handler in such case, whereas it's the default error handler on Windows.
|
||||
Force the "strict" error handler to work around this bootstrap issue. */
|
||||
config.filesystem_errors = "strict";
|
||||
#endif
|
||||
|
||||
_PyInitError err = _Py_InitializeFromConfig(&config);
|
||||
/* No need to call _PyCoreConfig_Clear() since we didn't allocate any
|
||||
|
|
|
@ -328,6 +328,8 @@ dump_config(void)
|
|||
printf("dump_refs = %i\n", config->dump_refs);
|
||||
printf("malloc_stats = %i\n", config->malloc_stats);
|
||||
|
||||
printf("filesystem_encoding = %s\n", config->filesystem_encoding);
|
||||
printf("filesystem_errors = %s\n", config->filesystem_errors);
|
||||
printf("coerce_c_locale = %i\n", config->coerce_c_locale);
|
||||
printf("coerce_c_locale_warn = %i\n", config->coerce_c_locale_warn);
|
||||
printf("utf8_mode = %i\n", config->utf8_mode);
|
||||
|
|
|
@ -5,6 +5,11 @@
|
|||
# include <langinfo.h>
|
||||
#endif
|
||||
|
||||
#include <locale.h> /* setlocale() */
|
||||
#ifdef HAVE_LANGINFO_H
|
||||
#include <langinfo.h> /* nl_langinfo(CODESET) */
|
||||
#endif
|
||||
|
||||
|
||||
#define DECODE_LOCALE_ERR(NAME, LEN) \
|
||||
(((LEN) == -2) \
|
||||
|
@ -32,6 +37,8 @@ const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */
|
|||
int Py_HasFileSystemDefaultEncoding = 0;
|
||||
#endif
|
||||
const char *Py_FileSystemDefaultEncodeErrors = "surrogateescape";
|
||||
static int _Py_HasFileSystemDefaultEncodeErrors = 1;
|
||||
|
||||
/* UTF-8 mode (PEP 540): if equals to 1, use the UTF-8 encoding, and change
|
||||
stdin and stdout error handler to "surrogateescape". It is equal to
|
||||
-1 by default: unknown, will be set by Py_Main() */
|
||||
|
@ -88,6 +95,47 @@ _Py_wstrlist_copy(int len, wchar_t **list)
|
|||
}
|
||||
|
||||
|
||||
void
|
||||
_Py_ClearFileSystemEncoding(void)
|
||||
{
|
||||
if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) {
|
||||
PyMem_RawFree((char*)Py_FileSystemDefaultEncoding);
|
||||
Py_FileSystemDefaultEncoding = NULL;
|
||||
}
|
||||
if (!_Py_HasFileSystemDefaultEncodeErrors && Py_FileSystemDefaultEncodeErrors) {
|
||||
PyMem_RawFree((char*)Py_FileSystemDefaultEncodeErrors);
|
||||
Py_FileSystemDefaultEncodeErrors = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
|
||||
global configuration variables. */
|
||||
int
|
||||
_Py_SetFileSystemEncoding(const char *encoding, const char *errors)
|
||||
{
|
||||
char *encoding2 = _PyMem_RawStrdup(encoding);
|
||||
if (encoding2 == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
char *errors2 = _PyMem_RawStrdup(errors);
|
||||
if (errors2 == NULL) {
|
||||
PyMem_RawFree(encoding2);
|
||||
return -1;
|
||||
}
|
||||
|
||||
_Py_ClearFileSystemEncoding();
|
||||
|
||||
Py_FileSystemDefaultEncoding = encoding2;
|
||||
Py_HasFileSystemDefaultEncoding = 0;
|
||||
|
||||
Py_FileSystemDefaultEncodeErrors = errors2;
|
||||
_Py_HasFileSystemDefaultEncodeErrors = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Helper to allow an embedding application to override the normal
|
||||
* mechanism that attempts to figure out an appropriate IO encoding
|
||||
*/
|
||||
|
@ -209,6 +257,8 @@ _PyCoreConfig_Clear(_PyCoreConfig *config)
|
|||
#endif
|
||||
CLEAR(config->base_exec_prefix);
|
||||
|
||||
CLEAR(config->filesystem_encoding);
|
||||
CLEAR(config->filesystem_errors);
|
||||
CLEAR(config->stdio_encoding);
|
||||
CLEAR(config->stdio_errors);
|
||||
#undef CLEAR
|
||||
|
@ -302,6 +352,8 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
|
|||
COPY_ATTR(quiet);
|
||||
COPY_ATTR(user_site_directory);
|
||||
COPY_ATTR(buffered_stdio);
|
||||
COPY_STR_ATTR(filesystem_encoding);
|
||||
COPY_STR_ATTR(filesystem_errors);
|
||||
COPY_STR_ATTR(stdio_encoding);
|
||||
COPY_STR_ATTR(stdio_errors);
|
||||
#ifdef MS_WINDOWS
|
||||
|
@ -312,6 +364,7 @@ _PyCoreConfig_Copy(_PyCoreConfig *config, const _PyCoreConfig *config2)
|
|||
COPY_ATTR(_frozen);
|
||||
|
||||
#undef COPY_ATTR
|
||||
#undef COPY_STR_ATTR
|
||||
#undef COPY_WSTR_ATTR
|
||||
#undef COPY_WSTRLIST
|
||||
return 0;
|
||||
|
@ -976,8 +1029,8 @@ get_stdio_errors(const _PyCoreConfig *config)
|
|||
}
|
||||
|
||||
|
||||
_PyInitError
|
||||
_Py_get_locale_encoding(char **locale_encoding)
|
||||
static _PyInitError
|
||||
get_locale_encoding(char **locale_encoding)
|
||||
{
|
||||
#ifdef MS_WINDOWS
|
||||
char encoding[20];
|
||||
|
@ -1087,7 +1140,7 @@ config_init_stdio_encoding(_PyCoreConfig *config)
|
|||
|
||||
/* Choose the default error handler based on the current locale. */
|
||||
if (config->stdio_encoding == NULL) {
|
||||
_PyInitError err = _Py_get_locale_encoding(&config->stdio_encoding);
|
||||
_PyInitError err = get_locale_encoding(&config->stdio_encoding);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
@ -1104,6 +1157,81 @@ config_init_stdio_encoding(_PyCoreConfig *config)
|
|||
}
|
||||
|
||||
|
||||
static _PyInitError
|
||||
config_init_fs_encoding(_PyCoreConfig *config)
|
||||
{
|
||||
#ifdef MS_WINDOWS
|
||||
if (config->legacy_windows_fs_encoding) {
|
||||
/* Legacy Windows filesystem encoding: mbcs/replace */
|
||||
if (config->filesystem_encoding == NULL) {
|
||||
config->filesystem_encoding = _PyMem_RawStrdup("mbcs");
|
||||
if (config->filesystem_encoding == NULL) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
}
|
||||
if (config->filesystem_errors == NULL) {
|
||||
config->filesystem_errors = _PyMem_RawStrdup("replace");
|
||||
if (config->filesystem_errors == NULL) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Windows defaults to utf-8/surrogatepass (PEP 529) */
|
||||
if (config->filesystem_encoding == NULL) {
|
||||
config->filesystem_encoding = _PyMem_RawStrdup("utf-8");
|
||||
if (config->filesystem_encoding == NULL) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
}
|
||||
if (config->filesystem_errors == NULL) {
|
||||
config->filesystem_errors = _PyMem_RawStrdup("surrogatepass");
|
||||
if (config->filesystem_errors == NULL) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (config->utf8_mode) {
|
||||
/* UTF-8 Mode use: utf-8/surrogateescape */
|
||||
if (config->filesystem_encoding == NULL) {
|
||||
config->filesystem_encoding = _PyMem_RawStrdup("utf-8");
|
||||
if (config->filesystem_encoding == NULL) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
}
|
||||
/* errors defaults to surrogateescape above */
|
||||
}
|
||||
|
||||
if (config->filesystem_encoding == NULL) {
|
||||
/* macOS and Android use UTF-8, other platforms use
|
||||
the locale encoding. */
|
||||
char *locale_encoding;
|
||||
#if defined(__APPLE__) || defined(__ANDROID__)
|
||||
locale_encoding = "UTF-8";
|
||||
#else
|
||||
_PyInitError err = get_locale_encoding(&locale_encoding);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
config->filesystem_encoding = _PyMem_RawStrdup(locale_encoding);
|
||||
if (config->filesystem_encoding == NULL) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
}
|
||||
|
||||
if (config->filesystem_errors == NULL) {
|
||||
/* by default, use the "surrogateescape" error handler */
|
||||
config->filesystem_errors = _PyMem_RawStrdup("surrogateescape");
|
||||
if (config->filesystem_errors == NULL) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return _Py_INIT_OK();
|
||||
}
|
||||
|
||||
|
||||
/* Read configuration settings from standard locations
|
||||
*
|
||||
* This function doesn't make any changes to the interpreter state - it
|
||||
|
@ -1216,6 +1344,13 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
|
|||
config->argc = 0;
|
||||
}
|
||||
|
||||
if (config->filesystem_encoding == NULL && config->filesystem_errors == NULL) {
|
||||
err = config_init_fs_encoding(config);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
err = config_init_stdio_encoding(config);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
|
@ -1223,6 +1358,10 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
|
|||
|
||||
assert(config->coerce_c_locale >= 0);
|
||||
assert(config->use_environment >= 0);
|
||||
assert(config->filesystem_encoding != NULL);
|
||||
assert(config->filesystem_errors != NULL);
|
||||
assert(config->stdio_encoding != NULL);
|
||||
assert(config->stdio_errors != NULL);
|
||||
|
||||
return _Py_INIT_OK();
|
||||
}
|
||||
|
|
|
@ -339,7 +339,7 @@ static const char C_LOCALE_COERCION_WARNING[] =
|
|||
"or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n";
|
||||
|
||||
static void
|
||||
_coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoercionTarget *target)
|
||||
_coerce_default_locale_settings(int warn, const _LocaleCoercionTarget *target)
|
||||
{
|
||||
const char *newloc = target->locale_name;
|
||||
|
||||
|
@ -352,7 +352,7 @@ _coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoerci
|
|||
"Error setting LC_CTYPE, skipping C locale coercion\n");
|
||||
return;
|
||||
}
|
||||
if (config->coerce_c_locale_warn) {
|
||||
if (warn) {
|
||||
fprintf(stderr, C_LOCALE_COERCION_WARNING, newloc);
|
||||
}
|
||||
|
||||
|
@ -362,7 +362,7 @@ _coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoerci
|
|||
#endif
|
||||
|
||||
void
|
||||
_Py_CoerceLegacyLocale(const _PyCoreConfig *config)
|
||||
_Py_CoerceLegacyLocale(int warn)
|
||||
{
|
||||
#ifdef PY_COERCE_C_LOCALE
|
||||
const char *locale_override = getenv("LC_ALL");
|
||||
|
@ -385,7 +385,7 @@ defined(HAVE_LANGINFO_H) && defined(CODESET)
|
|||
}
|
||||
#endif
|
||||
/* Successfully configured locale, so make it the default */
|
||||
_coerce_default_locale_settings(config, target);
|
||||
_coerce_default_locale_settings(warn, target);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -1162,11 +1162,7 @@ Py_FinalizeEx(void)
|
|||
/* Cleanup Unicode implementation */
|
||||
_PyUnicode_Fini();
|
||||
|
||||
/* reset file system default encoding */
|
||||
if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) {
|
||||
PyMem_RawFree((char*)Py_FileSystemDefaultEncoding);
|
||||
Py_FileSystemDefaultEncoding = NULL;
|
||||
}
|
||||
_Py_ClearFileSystemEncoding();
|
||||
|
||||
/* XXX Still allocated:
|
||||
- various static ad-hoc pointers to interned strings
|
||||
|
@ -1475,59 +1471,31 @@ add_main_module(PyInterpreterState *interp)
|
|||
static _PyInitError
|
||||
initfsencoding(PyInterpreterState *interp)
|
||||
{
|
||||
PyObject *codec;
|
||||
_PyCoreConfig *config = &interp->core_config;
|
||||
|
||||
#ifdef MS_WINDOWS
|
||||
if (Py_LegacyWindowsFSEncodingFlag) {
|
||||
Py_FileSystemDefaultEncoding = "mbcs";
|
||||
Py_FileSystemDefaultEncodeErrors = "replace";
|
||||
}
|
||||
else {
|
||||
Py_FileSystemDefaultEncoding = "utf-8";
|
||||
Py_FileSystemDefaultEncodeErrors = "surrogatepass";
|
||||
}
|
||||
#else
|
||||
if (Py_FileSystemDefaultEncoding == NULL) {
|
||||
if (interp->core_config.utf8_mode) {
|
||||
Py_FileSystemDefaultEncoding = "utf-8";
|
||||
Py_HasFileSystemDefaultEncoding = 1;
|
||||
}
|
||||
else if (_Py_GetForceASCII()) {
|
||||
Py_FileSystemDefaultEncoding = "ascii";
|
||||
Py_HasFileSystemDefaultEncoding = 1;
|
||||
}
|
||||
else {
|
||||
extern _PyInitError _Py_get_locale_encoding(char **locale_encoding);
|
||||
|
||||
char *locale_encoding;
|
||||
_PyInitError err = _Py_get_locale_encoding(&locale_encoding);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
||||
Py_FileSystemDefaultEncoding = get_codec_name(locale_encoding);
|
||||
PyMem_RawFree(locale_encoding);
|
||||
if (Py_FileSystemDefaultEncoding == NULL) {
|
||||
return _Py_INIT_ERR("failed to get the Python codec "
|
||||
"of the locale encoding");
|
||||
}
|
||||
|
||||
Py_HasFileSystemDefaultEncoding = 0;
|
||||
interp->fscodec_initialized = 1;
|
||||
return _Py_INIT_OK();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* the encoding is mbcs, utf-8 or ascii */
|
||||
codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding);
|
||||
if (!codec) {
|
||||
char *encoding = get_codec_name(config->filesystem_encoding);
|
||||
if (encoding == NULL) {
|
||||
/* Such error can only occurs in critical situations: no more
|
||||
* memory, import a module of the standard library failed,
|
||||
* etc. */
|
||||
return _Py_INIT_ERR("unable to load the file system codec");
|
||||
memory, import a module of the standard library failed, etc. */
|
||||
return _Py_INIT_ERR("failed to get the Python codec "
|
||||
"of the filesystem encoding");
|
||||
}
|
||||
Py_DECREF(codec);
|
||||
|
||||
/* Update the filesystem encoding to the normalized Python codec name.
|
||||
For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
|
||||
(Python codec name). */
|
||||
PyMem_RawFree(config->filesystem_encoding);
|
||||
config->filesystem_encoding = encoding;
|
||||
|
||||
/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
|
||||
global configuration variables. */
|
||||
if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
|
||||
config->filesystem_errors) < 0) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
|
||||
/* PyUnicode can now use the Python codec rather than C implementation
|
||||
for the filesystem encoding */
|
||||
interp->fscodec_initialized = 1;
|
||||
return _Py_INIT_OK();
|
||||
}
|
||||
|
|
|
@ -389,11 +389,9 @@ implementation."
|
|||
static PyObject *
|
||||
sys_getfilesystemencoding(PyObject *self, PyObject *Py_UNUSED(ignored))
|
||||
{
|
||||
if (Py_FileSystemDefaultEncoding)
|
||||
return PyUnicode_FromString(Py_FileSystemDefaultEncoding);
|
||||
PyErr_SetString(PyExc_RuntimeError,
|
||||
"filesystem encoding is not initialized");
|
||||
return NULL;
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||
const _PyCoreConfig *config = &interp->core_config;
|
||||
return PyUnicode_FromString(config->filesystem_encoding);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(getfilesystemencoding_doc,
|
||||
|
@ -406,11 +404,9 @@ operating system filenames."
|
|||
static PyObject *
|
||||
sys_getfilesystemencodeerrors(PyObject *self, PyObject *Py_UNUSED(ignored))
|
||||
{
|
||||
if (Py_FileSystemDefaultEncodeErrors)
|
||||
return PyUnicode_FromString(Py_FileSystemDefaultEncodeErrors);
|
||||
PyErr_SetString(PyExc_RuntimeError,
|
||||
"filesystem encoding is not initialized");
|
||||
return NULL;
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||
const _PyCoreConfig *config = &interp->core_config;
|
||||
return PyUnicode_FromString(config->filesystem_errors);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(getfilesystemencodeerrors_doc,
|
||||
|
@ -1150,8 +1146,30 @@ environment variable before launching Python."
|
|||
static PyObject *
|
||||
sys_enablelegacywindowsfsencoding(PyObject *self)
|
||||
{
|
||||
Py_FileSystemDefaultEncoding = "mbcs";
|
||||
Py_FileSystemDefaultEncodeErrors = "replace";
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
|
||||
_PyCoreConfig *config = &interp->core_config;
|
||||
|
||||
/* Set the filesystem encoding to mbcs/replace (PEP 529) */
|
||||
char *encoding = _PyMem_RawStrdup("mbcs");
|
||||
char *errors = _PyMem_RawStrdup("replace");
|
||||
if (encoding == NULL || errors == NULL) {
|
||||
PyMem_Free(encoding);
|
||||
PyMem_Free(errors);
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyMem_RawFree(config->filesystem_encoding);
|
||||
config->filesystem_encoding = encoding;
|
||||
PyMem_RawFree(config->filesystem_errors);
|
||||
config->filesystem_errors = errors;
|
||||
|
||||
if (_Py_SetFileSystemEncoding(config->filesystem_encoding,
|
||||
config->filesystem_errors) < 0) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue