mirror of https://github.com/python/cpython
bpo-34485: Enhance init_sys_streams() (GH-8978)
Python now gets the locale encoding with C code to initialize the encoding of standard streams like sys.stdout. Moreover, the encoding is now initialized to the Python codec name to get a normalized encoding name and to ensure that the codec is loaded. The change avoids importing _bootlocale and _locale modules at startup by default. When the PYTHONIOENCODING environment variable only contains an encoding, the error handler is now is now set explicitly to "strict". Rename also get_default_standard_stream_error_handler() to get_stdio_errors(). Reduce the buffer to format the "cpXXX" string (Windows locale encoding).
This commit is contained in:
parent
d500e5307a
commit
9e4994d410
|
@ -171,17 +171,17 @@ class EmbeddingTests(EmbeddingTestsMixin, unittest.TestCase):
|
|||
"stdout: {out_encoding}:ignore",
|
||||
"stderr: {out_encoding}:backslashreplace",
|
||||
"--- Set encoding only ---",
|
||||
"Expected encoding: latin-1",
|
||||
"Expected encoding: iso8859-1",
|
||||
"Expected errors: default",
|
||||
"stdin: latin-1:{errors}",
|
||||
"stdout: latin-1:{errors}",
|
||||
"stderr: latin-1:backslashreplace",
|
||||
"stdin: iso8859-1:{errors}",
|
||||
"stdout: iso8859-1:{errors}",
|
||||
"stderr: iso8859-1:backslashreplace",
|
||||
"--- Set encoding and errors ---",
|
||||
"Expected encoding: latin-1",
|
||||
"Expected encoding: iso8859-1",
|
||||
"Expected errors: replace",
|
||||
"stdin: latin-1:replace",
|
||||
"stdout: latin-1:replace",
|
||||
"stderr: latin-1:backslashreplace"])
|
||||
"stdin: iso8859-1:replace",
|
||||
"stdout: iso8859-1:replace",
|
||||
"stderr: iso8859-1:backslashreplace"])
|
||||
expected_output = expected_output.format(
|
||||
in_encoding=expected_stream_encoding,
|
||||
out_encoding=expected_stream_encoding,
|
||||
|
|
|
@ -668,7 +668,7 @@ class SysModuleTest(unittest.TestCase):
|
|||
'dump("stdout")',
|
||||
'dump("stderr")',
|
||||
))
|
||||
args = [sys.executable, "-c", code]
|
||||
args = [sys.executable, "-X", "utf8=0", "-c", code]
|
||||
if isolated:
|
||||
args.append("-I")
|
||||
if encoding is not None:
|
||||
|
@ -712,8 +712,8 @@ class SysModuleTest(unittest.TestCase):
|
|||
# have no any effect
|
||||
out = self.c_locale_get_error_handler(encoding=':')
|
||||
self.assertEqual(out,
|
||||
'stdin: strict\n'
|
||||
'stdout: strict\n'
|
||||
'stdin: surrogateescape\n'
|
||||
'stdout: surrogateescape\n'
|
||||
'stderr: backslashreplace\n')
|
||||
out = self.c_locale_get_error_handler(encoding='')
|
||||
self.assertEqual(out,
|
||||
|
|
|
@ -139,16 +139,16 @@ class UTF8ModeTests(unittest.TestCase):
|
|||
out = self.get_output('-X', 'utf8', '-c', code,
|
||||
PYTHONIOENCODING="latin1")
|
||||
self.assertEqual(out.splitlines(),
|
||||
['stdin: latin1/strict',
|
||||
'stdout: latin1/strict',
|
||||
'stderr: latin1/backslashreplace'])
|
||||
['stdin: iso8859-1/strict',
|
||||
'stdout: iso8859-1/strict',
|
||||
'stderr: iso8859-1/backslashreplace'])
|
||||
|
||||
out = self.get_output('-X', 'utf8', '-c', code,
|
||||
PYTHONIOENCODING=":namereplace")
|
||||
self.assertEqual(out.splitlines(),
|
||||
['stdin: UTF-8/namereplace',
|
||||
'stdout: UTF-8/namereplace',
|
||||
'stderr: UTF-8/backslashreplace'])
|
||||
['stdin: utf-8/namereplace',
|
||||
'stdout: utf-8/namereplace',
|
||||
'stderr: utf-8/backslashreplace'])
|
||||
|
||||
def test_io(self):
|
||||
code = textwrap.dedent('''
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
Python now gets the locale encoding with C code to initialize the encoding
|
||||
of standard streams like sys.stdout. Moreover, the encoding is now
|
||||
initialized to the Python codec name to get a normalized encoding name and
|
||||
to ensure that the codec is loaded. The change avoids importing _bootlocale
|
||||
and _locale modules at startup by default.
|
|
@ -0,0 +1,3 @@
|
|||
Fix the error handler of standard streams like sys.stdout:
|
||||
PYTHONIOENCODING=":" is now ignored instead of setting the error handler to
|
||||
"strict".
|
|
@ -319,7 +319,7 @@ exit:
|
|||
static PyObject*
|
||||
PyLocale_getdefaultlocale(PyObject* self, PyObject *Py_UNUSED(ignored))
|
||||
{
|
||||
char encoding[100];
|
||||
char encoding[20];
|
||||
char locale[100];
|
||||
|
||||
PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP());
|
||||
|
|
|
@ -113,9 +113,9 @@ static int test_forced_io_encoding(void)
|
|||
printf("--- Set errors only ---\n");
|
||||
check_stdio_details(NULL, "ignore");
|
||||
printf("--- Set encoding only ---\n");
|
||||
check_stdio_details("latin-1", NULL);
|
||||
check_stdio_details("iso8859-1", NULL);
|
||||
printf("--- Set encoding and errors ---\n");
|
||||
check_stdio_details("latin-1", "replace");
|
||||
check_stdio_details("iso8859-1", "replace");
|
||||
|
||||
/* Check calling after initialization fails */
|
||||
Py_Initialize();
|
||||
|
|
|
@ -244,22 +244,26 @@ error:
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static char*
|
||||
get_locale_encoding(void)
|
||||
static _PyInitError
|
||||
get_locale_encoding(char **locale_encoding)
|
||||
{
|
||||
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
|
||||
char* codeset = nl_langinfo(CODESET);
|
||||
if (!codeset || codeset[0] == '\0') {
|
||||
PyErr_SetString(PyExc_ValueError, "CODESET is not set or empty");
|
||||
return NULL;
|
||||
}
|
||||
return get_codec_name(codeset);
|
||||
#ifdef MS_WINDOWS
|
||||
char encoding[20];
|
||||
PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP());
|
||||
#elif defined(__ANDROID__)
|
||||
return get_codec_name("UTF-8");
|
||||
const char *encoding = "UTF-8";
|
||||
#else
|
||||
PyErr_SetNone(PyExc_NotImplementedError);
|
||||
return NULL;
|
||||
const char *encoding = nl_langinfo(CODESET);
|
||||
if (!encoding || encoding[0] == '\0') {
|
||||
return _Py_INIT_USER_ERR("failed to get the locale encoding: "
|
||||
"nl_langinfo(CODESET) failed");
|
||||
}
|
||||
#endif
|
||||
*locale_encoding = _PyMem_RawStrdup(encoding);
|
||||
if (*locale_encoding == NULL) {
|
||||
return _Py_INIT_NO_MEMORY();
|
||||
}
|
||||
return _Py_INIT_OK();
|
||||
}
|
||||
|
||||
static _PyInitError
|
||||
|
@ -397,7 +401,7 @@ static _LocaleCoercionTarget _TARGET_LOCALES[] = {
|
|||
};
|
||||
|
||||
static const char *
|
||||
get_default_standard_stream_error_handler(void)
|
||||
get_stdio_errors(void)
|
||||
{
|
||||
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
|
||||
if (ctype_loc != NULL) {
|
||||
|
@ -417,8 +421,7 @@ get_default_standard_stream_error_handler(void)
|
|||
#endif
|
||||
}
|
||||
|
||||
/* Otherwise return NULL to request the typical default error handler */
|
||||
return NULL;
|
||||
return "strict";
|
||||
}
|
||||
|
||||
#ifdef PY_COERCE_C_LOCALE
|
||||
|
@ -1586,9 +1589,17 @@ initfsencoding(PyInterpreterState *interp)
|
|||
Py_HasFileSystemDefaultEncoding = 1;
|
||||
}
|
||||
else {
|
||||
Py_FileSystemDefaultEncoding = get_locale_encoding();
|
||||
char *locale_encoding;
|
||||
_PyInitError err = get_locale_encoding(&locale_encoding);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
|
||||
Py_FileSystemDefaultEncoding = get_codec_name(locale_encoding);
|
||||
PyMem_RawFree(locale_encoding);
|
||||
if (Py_FileSystemDefaultEncoding == NULL) {
|
||||
return _Py_INIT_ERR("Unable to get the locale encoding");
|
||||
return _Py_INIT_ERR("failed to get the Python codec "
|
||||
"of the locale encoding");
|
||||
}
|
||||
|
||||
Py_HasFileSystemDefaultEncoding = 0;
|
||||
|
@ -1787,6 +1798,8 @@ init_sys_streams(PyInterpreterState *interp)
|
|||
PyObject * encoding_attr;
|
||||
char *pythonioencoding = NULL;
|
||||
const char *encoding, *errors;
|
||||
char *locale_encoding = NULL;
|
||||
char *codec_name = NULL;
|
||||
_PyInitError res = _Py_INIT_OK();
|
||||
|
||||
/* Hack to avoid a nasty recursion issue when Python is invoked
|
||||
|
@ -1838,21 +1851,46 @@ init_sys_streams(PyInterpreterState *interp)
|
|||
errors = err;
|
||||
}
|
||||
}
|
||||
if (*pythonioencoding && !encoding) {
|
||||
if (!encoding && *pythonioencoding) {
|
||||
encoding = pythonioencoding;
|
||||
if (!errors) {
|
||||
errors = "strict";
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (interp->core_config.utf8_mode) {
|
||||
encoding = "utf-8";
|
||||
errors = "surrogateescape";
|
||||
|
||||
if (interp->core_config.utf8_mode) {
|
||||
if (!encoding) {
|
||||
encoding = "utf-8";
|
||||
}
|
||||
if (!errors) {
|
||||
errors = "surrogateescape";
|
||||
}
|
||||
}
|
||||
|
||||
if (!errors && !pythonioencoding) {
|
||||
if (!errors) {
|
||||
/* Choose the default error handler based on the current locale */
|
||||
errors = get_default_standard_stream_error_handler();
|
||||
errors = get_stdio_errors();
|
||||
}
|
||||
}
|
||||
|
||||
if (encoding == NULL) {
|
||||
_PyInitError err = get_locale_encoding(&locale_encoding);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
encoding = locale_encoding;
|
||||
}
|
||||
|
||||
codec_name = get_codec_name(encoding);
|
||||
if (codec_name == NULL) {
|
||||
PyErr_SetString(PyExc_RuntimeError,
|
||||
"failed to get the Python codec name "
|
||||
"of stdio encoding");
|
||||
goto error;
|
||||
}
|
||||
encoding = codec_name;
|
||||
|
||||
/* Set sys.stdin */
|
||||
fd = fileno(stdin);
|
||||
/* Under some conditions stdin, stdout and stderr may not be connected
|
||||
|
@ -1928,6 +1966,8 @@ done:
|
|||
|
||||
PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
|
||||
|
||||
PyMem_RawFree(locale_encoding);
|
||||
PyMem_RawFree(codec_name);
|
||||
PyMem_Free(pythonioencoding);
|
||||
Py_XDECREF(bimod);
|
||||
Py_XDECREF(iomod);
|
||||
|
|
Loading…
Reference in New Issue