From 9454060e84a669dde63824d9e2fcaf295e34f687 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 16 Dec 2017 04:54:22 +0100 Subject: [PATCH] bpo-29240, bpo-32030: Py_Main() re-reads config if encoding changes (#4899) bpo-29240, bpo-32030: If the encoding change (C locale coerced or UTF-8 Mode changed), Py_Main() now reads again the configuration with the new encoding. Changes: * Add _Py_UnixMain() called by main(). * Rename pymain_free_pymain() to pymain_clear_pymain(), it can now be called multipled times. * Rename pymain_parse_cmdline_envvars() to pymain_read_conf(). * Py_Main() now clears orig_argc and orig_argv at exit. * Remove argv_copy2, Py_Main() doesn't modify argv anymore. There is no need anymore to get two copies of the wchar_t** argv. * _PyCoreConfig: add coerce_c_locale and coerce_c_locale_warn. * Py_UTF8Mode is now initialized to -1. * Locale coercion (PEP 538) now respects -I and -E options. --- Doc/using/cmdline.rst | 4 +- Include/pylifecycle.h | 5 +- Include/pystate.h | 8 +- Lib/test/test_c_locale_coercion.py | 5 +- Lib/test/test_cmd_line.py | 2 +- Lib/test/test_utf8_mode.py | 30 ++- Modules/getpath.c | 2 +- Modules/main.c | 302 +++++++++++++++++++++++------ Programs/python.c | 91 +-------- Python/bltinmodule.c | 7 +- Python/fileutils.c | 4 +- Python/pylifecycle.c | 80 +++----- 12 files changed, 325 insertions(+), 215 deletions(-) diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 3915f336a5a..b1bd47fa6b9 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -779,9 +779,7 @@ conflict. If set to the value ``0``, causes the main Python command line application to skip coercing the legacy ASCII-based C locale to a more capable UTF-8 - based alternative. Note that this setting is checked even when the - :option:`-E` or :option:`-I` options are used, as it is handled prior to - the processing of command line options. + based alternative. If this variable is *not* set, or is set to a value other than ``0``, and the current locale reported for the ``LC_CTYPE`` category is the default diff --git a/Include/pylifecycle.h b/Include/pylifecycle.h index dcb7fcb8495..3ea8ad65088 100644 --- a/Include/pylifecycle.h +++ b/Include/pylifecycle.h @@ -105,6 +105,9 @@ PyAPI_FUNC(int) Py_FdIsInteractive(FILE *, const char *); /* Bootstrap __main__ (defined in Modules/main.c) */ PyAPI_FUNC(int) Py_Main(int argc, wchar_t **argv); +#ifdef Py_BUILD_CORE +PyAPI_FUNC(int) _Py_UnixMain(int argc, char **argv); +#endif /* In getpath.c */ PyAPI_FUNC(wchar_t *) Py_GetProgramFullPath(void); @@ -194,7 +197,7 @@ PyAPI_FUNC(int) _PyOS_URandomNonblock(void *buffer, Py_ssize_t size); /* Legacy locale support */ #ifndef Py_LIMITED_API -PyAPI_FUNC(void) _Py_CoerceLegacyLocale(void); +PyAPI_FUNC(void) _Py_CoerceLegacyLocale(const _PyCoreConfig *config); PyAPI_FUNC(int) _Py_LegacyLocaleDetected(void); PyAPI_FUNC(char *) _Py_SetLocaleFromEnv(int category); #endif diff --git a/Include/pystate.h b/Include/pystate.h index a56c9b4ea6c..fff134a4970 100644 --- a/Include/pystate.h +++ b/Include/pystate.h @@ -38,7 +38,10 @@ typedef struct { int show_alloc_count; /* -X showalloccount */ int dump_refs; /* PYTHONDUMPREFS */ int malloc_stats; /* PYTHONMALLOCSTATS */ - int utf8_mode; /* -X utf8 or PYTHONUTF8 environment variable */ + int coerce_c_locale; /* PYTHONCOERCECLOCALE, -1 means unknown */ + int coerce_c_locale_warn; /* PYTHONCOERCECLOCALE=warn */ + int utf8_mode; /* -X utf8 or PYTHONUTF8 environment variable, + -1 means unknown */ wchar_t *module_search_path_env; /* PYTHONPATH environment variable */ wchar_t *home; /* PYTHONHOME environment variable, @@ -46,7 +49,8 @@ typedef struct { wchar_t *program_name; /* Program name, see also Py_GetProgramName() */ } _PyCoreConfig; -#define _PyCoreConfig_INIT (_PyCoreConfig){.use_hash_seed = -1} +#define _PyCoreConfig_INIT \ + (_PyCoreConfig){.use_hash_seed = -1, .coerce_c_locale = -1, .utf8_mode = -1} /* Note: _PyCoreConfig_INIT sets other fields to 0/NULL */ /* Placeholders while working on the new configuration API diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py index c0845d75a29..37dd834781c 100644 --- a/Lib/test/test_c_locale_coercion.py +++ b/Lib/test/test_c_locale_coercion.py @@ -65,7 +65,7 @@ def _set_locale_in_subprocess(locale_name): # If there's no valid CODESET, we expect coercion to be skipped cmd_fmt += "; import sys; sys.exit(not locale.nl_langinfo(locale.CODESET))" cmd = cmd_fmt.format(locale_name) - result, py_cmd = run_python_until_end("-c", cmd, __isolated=True) + result, py_cmd = run_python_until_end("-c", cmd, PYTHONCOERCECLOCALE='') return result.rc == 0 @@ -131,7 +131,6 @@ class EncodingDetails(_EncodingDetails): """ result, py_cmd = run_python_until_end( "-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT, - __isolated=True, **env_vars ) if not result.rc == 0: @@ -236,6 +235,7 @@ class LocaleConfigurationTests(_LocaleHandlingTestCase): "LANG": "", "LC_CTYPE": "", "LC_ALL": "", + "PYTHONCOERCECLOCALE": "", } for env_var in ("LANG", "LC_CTYPE"): for locale_to_set in AVAILABLE_TARGETS: @@ -294,6 +294,7 @@ class LocaleCoercionTests(_LocaleHandlingTestCase): "LANG": "", "LC_CTYPE": "", "LC_ALL": "", + "PYTHONCOERCECLOCALE": "", } base_var_dict.update(extra_vars) for env_var in ("LANG", "LC_CTYPE"): diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index 2b14c301c7d..54ea3773a06 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -551,7 +551,7 @@ class CmdLineTest(unittest.TestCase): self.assertEqual(out, "True") # Warnings - code = ("import sys, warnings; " + code = ("import warnings; " "print(' '.join('%s::%s' % (f[0], f[2].__name__) " "for f in warnings.filters))") if Py_DEBUG: diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py index 275a6ea8ed6..73d1bd424ca 100644 --- a/Lib/test/test_utf8_mode.py +++ b/Lib/test/test_utf8_mode.py @@ -7,6 +7,7 @@ import os import sys import textwrap import unittest +from test import support from test.support.script_helper import assert_python_ok, assert_python_failure @@ -14,9 +15,11 @@ MS_WINDOWS = (sys.platform == 'win32') class UTF8ModeTests(unittest.TestCase): - # Override PYTHONUTF8 and PYTHONLEGACYWINDOWSFSENCODING environment - # variables by default - DEFAULT_ENV = {'PYTHONUTF8': '', 'PYTHONLEGACYWINDOWSFSENCODING': ''} + DEFAULT_ENV = { + 'PYTHONUTF8': '', + 'PYTHONLEGACYWINDOWSFSENCODING': '', + 'PYTHONCOERCECLOCALE': '0', + } def posix_locale(self): loc = locale.setlocale(locale.LC_CTYPE, None) @@ -53,7 +56,7 @@ class UTF8ModeTests(unittest.TestCase): self.assertEqual(out, '0') if MS_WINDOWS: - # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 + # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 Mode # and has the priority over -X utf8 out = self.get_output('-X', 'utf8', '-c', code, PYTHONLEGACYWINDOWSFSENCODING='1') @@ -201,6 +204,25 @@ class UTF8ModeTests(unittest.TestCase): out = self.get_output('-X', 'utf8', '-c', code, LC_ALL='C') self.assertEqual(out, 'UTF-8 UTF-8') + @unittest.skipIf(MS_WINDOWS, 'test specific to Unix') + def test_cmd_line(self): + arg = 'h\xe9\u20ac'.encode('utf-8') + arg_utf8 = arg.decode('utf-8') + arg_ascii = arg.decode('ascii', 'surrogateescape') + code = 'import locale, sys; print("%s:%s" % (locale.getpreferredencoding(), ascii(sys.argv[1:])))' + + def check(utf8_opt, expected, **kw): + out = self.get_output('-X', utf8_opt, '-c', code, arg, **kw) + args = out.partition(':')[2].rstrip() + self.assertEqual(args, ascii(expected), out) + + check('utf8', [arg_utf8]) + if sys.platform == 'darwin' or support.is_android: + c_arg = arg_utf8 + else: + c_arg = arg_ascii + check('utf8=0', [c_arg], LC_ALL='C') + if __name__ == "__main__": unittest.main() diff --git a/Modules/getpath.c b/Modules/getpath.c index 6208a17f02e..b4b33437b6f 100644 --- a/Modules/getpath.c +++ b/Modules/getpath.c @@ -112,7 +112,7 @@ extern "C" { #define DECODE_LOCALE_ERR(NAME, LEN) \ ((LEN) == (size_t)-2) \ - ? _Py_INIT_USER_ERR("cannot decode " #NAME) \ + ? _Py_INIT_USER_ERR("cannot decode " NAME) \ : _Py_INIT_NO_MEMORY() typedef struct { diff --git a/Modules/main.c b/Modules/main.c index 6b602cf9b77..00de7f0d181 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -38,14 +38,14 @@ extern "C" { #define DECODE_LOCALE_ERR(NAME, LEN) \ (((LEN) == -2) \ - ? _Py_INIT_USER_ERR("cannot decode " #NAME) \ + ? _Py_INIT_USER_ERR("cannot decode " NAME) \ : _Py_INIT_NO_MEMORY()) #define SET_DECODE_ERROR(NAME, LEN) \ do { \ if ((LEN) == (size_t)-2) { \ - pymain->err = _Py_INIT_USER_ERR("cannot decode " #NAME); \ + pymain->err = _Py_INIT_USER_ERR("cannot decode " NAME); \ } \ else { \ pymain->err = _Py_INIT_NO_MEMORY(); \ @@ -53,8 +53,8 @@ extern "C" { } while (0) /* For Py_GetArgcArgv(); set by main() */ -static wchar_t **orig_argv; -static int orig_argc; +static wchar_t **orig_argv = NULL; +static int orig_argc = 0; /* command line options */ #define BASE_OPTS L"bBc:dEhiIJm:OqRsStuvVW:xX:?" @@ -427,8 +427,11 @@ typedef struct { _PyInitError err; /* PYTHONWARNINGS env var */ _Py_OptList env_warning_options; + int argc; wchar_t **argv; + int use_bytes_argv; + char **bytes_argv; int sys_argc; wchar_t **sys_argv; @@ -466,7 +469,6 @@ pymain_free_globals(_PyMain *pymain) { _PyPathConfig_Clear(&_Py_path_config); _PyImport_Fini2(); - _PyCoreConfig_Clear(&pymain->core_config); #ifdef __INSURE__ /* Insure++ is a memory analysis tool that aids in discovering @@ -483,22 +485,69 @@ pymain_free_globals(_PyMain *pymain) } +/* Clear argv allocated by pymain_decode_bytes_argv() */ static void -pymain_free_pymain(_PyMain *pymain) +pymain_clear_bytes_argv(_PyMain *pymain, int argc) +{ + if (pymain->use_bytes_argv && pymain->argv != NULL) { + for (int i = 0; i < argc; i++) { + PyMem_RawFree(pymain->argv[i]); + } + PyMem_RawFree(pymain->argv); + pymain->argv = NULL; + } +} + + +static int +pymain_decode_bytes_argv(_PyMain *pymain) +{ + assert(pymain->argv == NULL); + + /* +1 for a the NULL terminator */ + size_t size = sizeof(wchar_t*) * (pymain->argc + 1); + pymain->argv = (wchar_t **)PyMem_RawMalloc(size); + if (pymain->argv == NULL) { + pymain->err = _Py_INIT_NO_MEMORY(); + return -1; + } + + for (int i = 0; i < pymain->argc; i++) { + size_t len; + pymain->argv[i] = Py_DecodeLocale(pymain->bytes_argv[i], &len); + if (pymain->argv[i] == NULL) { + pymain_clear_bytes_argv(pymain, i); + pymain->err = DECODE_LOCALE_ERR("command line arguments", + (Py_ssize_t)len); + return -1; + } + } + pymain->argv[pymain->argc] = NULL; + return 0; +} + + +static void +pymain_clear_pymain(_PyMain *pymain) { _Py_CommandLineDetails *cmdline = &pymain->cmdline; pymain_optlist_clear(&cmdline->warning_options); pymain_optlist_clear(&cmdline->xoptions); PyMem_RawFree(cmdline->command); + cmdline->command = NULL; PyMem_RawFree(pymain->sys_argv); + pymain->sys_argv = NULL; pymain_optlist_clear(&pymain->env_warning_options); + pymain_clear_bytes_argv(pymain, pymain->argc); + + _PyCoreConfig_Clear(&pymain->core_config); } /* Clear Python ojects */ static void -pymain_free_python(_PyMain *pymain) +pymain_clear_python(_PyMain *pymain) { Py_CLEAR(pymain->main_importer_path); @@ -509,12 +558,12 @@ pymain_free_python(_PyMain *pymain) static void pymain_free(_PyMain *pymain) { - /* Force the allocator used by pymain_parse_cmdline_envvars() */ + /* Force the allocator used by pymain_read_conf() */ PyMemAllocatorEx old_alloc; _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc); - pymain_free_python(pymain); - pymain_free_pymain(pymain); + pymain_clear_python(pymain); + pymain_clear_pymain(pymain); pymain_free_globals(pymain); PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); @@ -743,6 +792,9 @@ pymain_parse_cmdline_impl(_PyMain *pymain) cmdline->filename = pymain->argv[_PyOS_optind]; } + pymain->run_code = (cmdline->command != NULL || cmdline->filename != NULL + || cmdline->module != NULL); + /* -c and -m options are exclusive */ assert(!(cmdline->command != NULL && cmdline->module != NULL)); @@ -1434,8 +1486,6 @@ pymain_repl(_PyMain *pymain) static int pymain_parse_cmdline(_PyMain *pymain) { - _Py_CommandLineDetails *cmdline = &pymain->cmdline; - int res = pymain_parse_cmdline_impl(pymain); if (res < 0) { return -1; @@ -1446,21 +1496,6 @@ pymain_parse_cmdline(_PyMain *pymain) return 1; } - if (cmdline->print_help) { - pymain_usage(0, pymain->argv[0]); - pymain->status = 0; - return 1; - } - - if (cmdline->print_version) { - printf("Python %s\n", - (cmdline->print_version >= 2) ? Py_GetVersion() : PY_VERSION); - return 1; - } - - pymain->run_code = (cmdline->command != NULL || cmdline->filename != NULL - || cmdline->module != NULL); - return 0; } @@ -1852,6 +1887,19 @@ pymain_parse_envvars(_PyMain *pymain) pymain->core_config.malloc_stats = 1; } + const char* env = pymain_get_env_var("PYTHONCOERCECLOCALE"); + if (env) { + if (strcmp(env, "0") == 0) { + pymain->core_config.coerce_c_locale = 0; + } + else if (strcmp(env, "warn") == 0) { + pymain->core_config.coerce_c_locale_warn = 1; + } + else { + pymain->core_config.coerce_c_locale = 1; + } + } + if (pymain_init_utf8_mode(pymain) < 0) { return -1; } @@ -1867,23 +1915,19 @@ pymain_parse_envvars(_PyMain *pymain) Return 1 if Python is done and must exit. Set pymain->err and return -1 on error. */ static int -pymain_parse_cmdline_envvars_impl(_PyMain *pymain) +pymain_read_conf_impl(_PyMain *pymain) { int res = pymain_parse_cmdline(pymain); - if (res < 0) { - return -1; - } - if (res > 0) { - return 1; + if (res != 0) { + return res; } - /* Set Py_IgnoreEnvironmentFlag needed by Py_GETENV() */ - pymain_set_global_config(pymain); + /* Set Py_IgnoreEnvironmentFlag for Py_GETENV() */ + Py_IgnoreEnvironmentFlag = pymain->core_config.ignore_environment; if (pymain_parse_envvars(pymain) < 0) { return -1; } - /* FIXME: if utf8_mode value changed, parse again cmdline */ if (pymain_init_sys_argv(pymain) < 0) { return -1; @@ -1899,14 +1943,101 @@ pymain_parse_cmdline_envvars_impl(_PyMain *pymain) static int -pymain_parse_cmdline_envvars(_PyMain *pymain) +pymain_read_conf(_PyMain *pymain) { + int res = -1; + /* Force default allocator, since pymain_free() must use the same allocator than this function. */ PyMemAllocatorEx old_alloc; _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc); - int res = pymain_parse_cmdline_envvars_impl(pymain); + char *oldloc = _PyMem_RawStrdup(setlocale(LC_ALL, NULL)); + if (oldloc == NULL) { + pymain->err = _Py_INIT_NO_MEMORY(); + goto done; + } + + /* Reconfigure the locale to the default for this process */ + _Py_SetLocaleFromEnv(LC_ALL); + + int locale_coerced = 0; + int loops = 0; + int init_ignore_env = pymain->core_config.ignore_environment; + + while (1) { + int utf8_mode = pymain->core_config.utf8_mode; + int encoding_changed = 0; + + /* Watchdog to prevent an infinite loop */ + loops++; + if (loops == 3) { + pymain->err = _Py_INIT_ERR("Encoding changed twice while " + "reading the configuration"); + goto done; + } + + if (pymain->use_bytes_argv) { + if (pymain_decode_bytes_argv(pymain) < 0) { + goto done; + } + } + + res = pymain_read_conf_impl(pymain); + if (res != 0) { + goto done; + } + + /* The legacy C locale assumes ASCII as the default text encoding, which + * causes problems not only for the CPython runtime, but also other + * components like GNU readline. + * + * Accordingly, when the CLI detects it, it attempts to coerce it to a + * more capable UTF-8 based alternative. + * + * See the documentation of the PYTHONCOERCECLOCALE setting for more + * details. + */ + if (pymain->core_config.coerce_c_locale == 1 && !locale_coerced) { + locale_coerced = 1; + _Py_CoerceLegacyLocale(&pymain->core_config); + encoding_changed = 1; + } + + if (utf8_mode == -1) { + if (pymain->core_config.utf8_mode == 1) { + /* UTF-8 Mode enabled */ + encoding_changed = 1; + } + } + else { + if (pymain->core_config.utf8_mode != utf8_mode) { + encoding_changed = 1; + } + } + + if (!encoding_changed) { + break; + } + + /* Reset the configuration, except UTF-8 Mode. Set Py_UTF8Mode for + Py_DecodeLocale(). Reset Py_IgnoreEnvironmentFlag, modified by + pymain_read_conf_impl(). */ + Py_UTF8Mode = pymain->core_config.utf8_mode; + Py_IgnoreEnvironmentFlag = init_ignore_env; + pymain_clear_pymain(pymain); + pymain_get_global_config(pymain); + + /* The encoding changed: read again the configuration + with the new encoding */ + } + res = 0; + +done: + if (oldloc != NULL) { + setlocale(LC_ALL, oldloc); + PyMem_RawFree(oldloc); + } PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); return res; @@ -1940,6 +2071,24 @@ _PyCoreConfig_Read(_PyCoreConfig *config) } } + if (config->utf8_mode < 0 || config->coerce_c_locale < 0) { + if (_Py_LegacyLocaleDetected()) { + if (config->utf8_mode < 0) { + config->utf8_mode = 1; + } + if (config->coerce_c_locale < 0) { + config->coerce_c_locale = 1; + } + } + + if (config->coerce_c_locale < 0) { + config->coerce_c_locale = 0; + } + if (config->utf8_mode < 0) { + config->utf8_mode = 0; + } + } + return _Py_INIT_OK(); } @@ -2247,17 +2396,24 @@ pymain_run_python(_PyMain *pymain) static int pymain_init(_PyMain *pymain) { + /* 754 requires that FP exceptions run in "no stop" mode by default, + * and until C vendors implement C99's ways to control FP exceptions, + * Python requires non-stop mode. Alas, some platforms enable FP + * exceptions by default. Here we disable them. + */ +#ifdef __FreeBSD__ + fedisableexcept(FE_OVERFLOW); +#endif + pymain->err = _PyRuntime_Initialize(); if (_Py_INIT_FAILED(pymain->err)) { return -1; } - pymain->core_config.utf8_mode = Py_UTF8Mode; pymain->core_config._disable_importlib = 0; pymain->config.install_signal_handlers = 1; - orig_argc = pymain->argc; /* For Py_GetArgcArgv() */ - orig_argv = pymain->argv; + pymain_get_global_config(pymain); return 0; } @@ -2265,14 +2421,13 @@ pymain_init(_PyMain *pymain) static int pymain_impl(_PyMain *pymain) { - int res = pymain_init(pymain); - if (res < 0) { + if (pymain_init(pymain) < 0) { return -1; } - pymain_get_global_config(pymain); - - res = pymain_parse_cmdline_envvars(pymain); + /* Read the configuration, but initialize also the LC_CTYPE locale: + enable UTF-8 mode (PEP 540) and/or coerce the C locale (PEP 538) */ + int res = pymain_read_conf(pymain); if (res < 0) { return -1; } @@ -2281,6 +2436,21 @@ pymain_impl(_PyMain *pymain) return 0; } + _Py_CommandLineDetails *cmdline = &pymain->cmdline; + if (cmdline->print_help) { + pymain_usage(0, pymain->argv[0]); + return 0; + } + + if (cmdline->print_version) { + printf("Python %s\n", + (cmdline->print_version >= 2) ? Py_GetVersion() : PY_VERSION); + return 0; + } + + orig_argc = pymain->argc; /* For Py_GetArgcArgv() */ + orig_argv = pymain->argv; + res = pymain_init_python_core(pymain); if (res < 0) { return -1; @@ -2293,7 +2463,7 @@ pymain_impl(_PyMain *pymain) pymain_run_python(pymain); - pymain_free_python(pymain); + pymain_clear_python(pymain); if (Py_FinalizeEx() < 0) { /* Value unlikely to be confused with a non-error exit status or @@ -2304,22 +2474,46 @@ pymain_impl(_PyMain *pymain) } +static int +pymain_main(_PyMain *pymain) +{ + memset(&pymain->cmdline, 0, sizeof(pymain->cmdline)); + + if (pymain_impl(pymain) < 0) { + _Py_FatalInitError(pymain->err); + } + pymain_free(pymain); + + orig_argc = 0; + orig_argv = NULL; + + return pymain->status; +} + + int Py_Main(int argc, wchar_t **argv) { _PyMain pymain = _PyMain_INIT; - memset(&pymain.cmdline, 0, sizeof(pymain.cmdline)); pymain.argc = argc; pymain.argv = argv; - if (pymain_impl(&pymain) < 0) { - _Py_FatalInitError(pymain.err); - } - pymain_free(&pymain); - - return pymain.status; + return pymain_main(&pymain); } + +int +_Py_UnixMain(int argc, char **argv) +{ + _PyMain pymain = _PyMain_INIT; + pymain.argc = argc; + pymain.use_bytes_argv = 1; + pymain.bytes_argv = argv; + + return pymain_main(&pymain); +} + + /* this is gonna seem *real weird*, but if you put some other code between Py_Main() and Py_GetArgcArgv() you will need to adjust the test in the while statement in Misc/gdbinit:ppystack */ diff --git a/Programs/python.c b/Programs/python.c index aef7122517a..a295486d73f 100644 --- a/Programs/python.c +++ b/Programs/python.c @@ -17,98 +17,9 @@ wmain(int argc, wchar_t **argv) #else -static void _Py_NO_RETURN -fatal_error(const char *msg) -{ - fprintf(stderr, "Fatal Python error: %s\n", msg); - fflush(stderr); - exit(1); -} - - int main(int argc, char **argv) { - wchar_t **argv_copy; - /* We need a second copy, as Python might modify the first one. */ - wchar_t **argv_copy2; - int i, status; - char *oldloc; - - _PyInitError err = _PyRuntime_Initialize(); - if (_Py_INIT_FAILED(err)) { - fatal_error(err.msg); - } - - /* Force default allocator, to be able to release memory above - with a known allocator. */ - _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL); - - argv_copy = (wchar_t **)PyMem_RawMalloc(sizeof(wchar_t*) * (argc+1)); - argv_copy2 = (wchar_t **)PyMem_RawMalloc(sizeof(wchar_t*) * (argc+1)); - if (!argv_copy || !argv_copy2) { - fatal_error("out of memory"); - return 1; - } - - /* 754 requires that FP exceptions run in "no stop" mode by default, - * and until C vendors implement C99's ways to control FP exceptions, - * Python requires non-stop mode. Alas, some platforms enable FP - * exceptions by default. Here we disable them. - */ -#ifdef __FreeBSD__ - fedisableexcept(FE_OVERFLOW); -#endif - - oldloc = _PyMem_RawStrdup(setlocale(LC_ALL, NULL)); - if (!oldloc) { - fatal_error("out of memory"); - return 1; - } - - /* Reconfigure the locale to the default for this process */ - _Py_SetLocaleFromEnv(LC_ALL); - - /* The legacy C locale assumes ASCII as the default text encoding, which - * causes problems not only for the CPython runtime, but also other - * components like GNU readline. - * - * Accordingly, when the CLI detects it, it attempts to coerce it to a - * more capable UTF-8 based alternative. - * - * See the documentation of the PYTHONCOERCECLOCALE setting for more - * details. - */ - if (_Py_LegacyLocaleDetected()) { - Py_UTF8Mode = 1; - _Py_CoerceLegacyLocale(); - } - - /* Convert from char to wchar_t based on the locale settings */ - for (i = 0; i < argc; i++) { - argv_copy[i] = Py_DecodeLocale(argv[i], NULL); - if (!argv_copy[i]) { - PyMem_RawFree(oldloc); - fatal_error("unable to decode the command line arguments"); - } - argv_copy2[i] = argv_copy[i]; - } - argv_copy2[argc] = argv_copy[argc] = NULL; - - setlocale(LC_ALL, oldloc); - PyMem_RawFree(oldloc); - - status = Py_Main(argc, argv_copy); - - /* Py_Main() can change PyMem_RawMalloc() allocator, so restore the default - to release memory blocks allocated before Py_Main() */ - _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL); - - for (i = 0; i < argc; i++) { - PyMem_RawFree(argv_copy2[i]); - } - PyMem_RawFree(argv_copy); - PyMem_RawFree(argv_copy2); - return status; + return _Py_UnixMain(argc, argv); } #endif diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 604493dc5e2..e702f7c6e9e 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -29,9 +29,10 @@ const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */ int Py_HasFileSystemDefaultEncoding = 0; #endif const char *Py_FileSystemDefaultEncodeErrors = "surrogateescape"; -/* UTF-8 mode (PEP 540): if non-zero, use the UTF-8 encoding, and change stdin - and stdout error handler to "surrogateescape". */ -int Py_UTF8Mode = 0; +/* UTF-8 mode (PEP 540): if equals to 1, use the UTF-8 encoding, and change + stdin and stdout error handler to "surrogateescape". It is equal to + -1 by default: unknown, will be set by Py_Main() */ +int Py_UTF8Mode = -1; _Py_IDENTIFIER(__builtins__); _Py_IDENTIFIER(__dict__); diff --git a/Python/fileutils.c b/Python/fileutils.c index 4b69049ce58..c4d495d0d63 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -393,7 +393,7 @@ Py_DecodeLocale(const char* arg, size_t *size) #if defined(__APPLE__) || defined(__ANDROID__) return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size); #else - if (Py_UTF8Mode) { + if (Py_UTF8Mode == 1) { return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size); } @@ -539,7 +539,7 @@ Py_EncodeLocale(const wchar_t *text, size_t *error_pos) #if defined(__APPLE__) || defined(__ANDROID__) return _Py_EncodeLocaleUTF8(text, error_pos); #else /* __APPLE__ */ - if (Py_UTF8Mode) { + if (Py_UTF8Mode == 1) { return _Py_EncodeLocaleUTF8(text, error_pos); } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 8c626075d5d..6500995ee24 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -385,18 +385,10 @@ static const char *_C_LOCALE_WARNING = "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible " "locales is recommended.\n"; -static int -_legacy_locale_warnings_enabled(void) -{ - const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE"); - return (coerce_c_locale != NULL && - strncmp(coerce_c_locale, "warn", 5) == 0); -} - static void -_emit_stderr_warning_for_legacy_locale(void) +_emit_stderr_warning_for_legacy_locale(const _PyCoreConfig *core_config) { - if (_legacy_locale_warnings_enabled()) { + if (core_config->coerce_c_locale_warn) { if (_Py_LegacyLocaleDetected()) { fprintf(stderr, "%s", _C_LOCALE_WARNING); } @@ -440,12 +432,12 @@ get_default_standard_stream_error_handler(void) } #ifdef PY_COERCE_C_LOCALE -static const char _C_LOCALE_COERCION_WARNING[] = +static const char C_LOCALE_COERCION_WARNING[] = "Python detected LC_CTYPE=C: LC_CTYPE coerced to %.20s (set another locale " "or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior).\n"; static void -_coerce_default_locale_settings(const _LocaleCoercionTarget *target) +_coerce_default_locale_settings(const _PyCoreConfig *config, const _LocaleCoercionTarget *target) { const char *newloc = target->locale_name; @@ -458,8 +450,8 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target) "Error setting LC_CTYPE, skipping C locale coercion\n"); return; } - if (_legacy_locale_warnings_enabled()) { - fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc); + if (config->coerce_c_locale_warn) { + fprintf(stderr, C_LOCALE_COERCION_WARNING, newloc); } /* Reconfigure with the overridden environment variables */ @@ -468,47 +460,31 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target) #endif void -_Py_CoerceLegacyLocale(void) +_Py_CoerceLegacyLocale(const _PyCoreConfig *config) { #ifdef PY_COERCE_C_LOCALE - /* We ignore the Python -E and -I flags here, as the CLI needs to sort out - * the locale settings *before* we try to do anything with the command - * line arguments. For cross-platform debugging purposes, we also need - * to give end users a way to force even scripts that are otherwise - * isolated from their environment to use the legacy ASCII-centric C - * locale. - * - * Ignoring -E and -I is safe from a security perspective, as we only use - * the setting to turn *off* the implicit locale coercion, and anyone with - * access to the process environment already has the ability to set - * `LC_ALL=C` to override the C level locale settings anyway. - */ - const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE"); - if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) { - /* PYTHONCOERCECLOCALE is not set, or is set to something other than "0" */ - const char *locale_override = getenv("LC_ALL"); - if (locale_override == NULL || *locale_override == '\0') { - /* LC_ALL is also not set (or is set to an empty string) */ - const _LocaleCoercionTarget *target = NULL; - for (target = _TARGET_LOCALES; target->locale_name; target++) { - const char *new_locale = setlocale(LC_CTYPE, - target->locale_name); - if (new_locale != NULL) { + const char *locale_override = getenv("LC_ALL"); + if (locale_override == NULL || *locale_override == '\0') { + /* LC_ALL is also not set (or is set to an empty string) */ + const _LocaleCoercionTarget *target = NULL; + for (target = _TARGET_LOCALES; target->locale_name; target++) { + const char *new_locale = setlocale(LC_CTYPE, + target->locale_name); + if (new_locale != NULL) { #if !defined(__APPLE__) && !defined(__ANDROID__) && \ - defined(HAVE_LANGINFO_H) && defined(CODESET) - /* Also ensure that nl_langinfo works in this locale */ - char *codeset = nl_langinfo(CODESET); - if (!codeset || *codeset == '\0') { - /* CODESET is not set or empty, so skip coercion */ - new_locale = NULL; - _Py_SetLocaleFromEnv(LC_CTYPE); - continue; - } -#endif - /* Successfully configured locale, so make it the default */ - _coerce_default_locale_settings(target); - return; +defined(HAVE_LANGINFO_H) && defined(CODESET) + /* Also ensure that nl_langinfo works in this locale */ + char *codeset = nl_langinfo(CODESET); + if (!codeset || *codeset == '\0') { + /* CODESET is not set or empty, so skip coercion */ + new_locale = NULL; + _Py_SetLocaleFromEnv(LC_CTYPE); + continue; } +#endif + /* Successfully configured locale, so make it the default */ + _coerce_default_locale_settings(config, target); + return; } } } @@ -648,7 +624,7 @@ _Py_InitializeCore(const _PyCoreConfig *core_config) the locale's charset without having to switch locales. */ _Py_SetLocaleFromEnv(LC_CTYPE); - _emit_stderr_warning_for_legacy_locale(); + _emit_stderr_warning_for_legacy_locale(core_config); #endif err = _Py_HashRandomization_Init(core_config);