Issue #16129: Add `Py_SetStandardStreamEncoding`
This new pre-initialization API allows embedding applications like Blender to force a particular encoding and error handler for the standard IO streams. Also refactors Modules/_testembed.c to let us start testing multiple embedding scenarios. (Initial patch by Bastien Montagne)
This commit is contained in:
parent
26f92680da
commit
7d270ee05d
|
@ -86,6 +86,33 @@ Process-wide parameters
|
||||||
=======================
|
=======================
|
||||||
|
|
||||||
|
|
||||||
|
.. c:function:: int Py_SetStandardStreamEncoding(char *encoding, char *errors)
|
||||||
|
|
||||||
|
.. index::
|
||||||
|
single: Py_Initialize()
|
||||||
|
single: main()
|
||||||
|
triple: stdin; stdout; sdterr
|
||||||
|
|
||||||
|
This function should be called before :c:func:`Py_Initialize`. It
|
||||||
|
specifies which encoding and error handling to use with standard io,
|
||||||
|
with the same meanings as in :func:`str.encode`.
|
||||||
|
|
||||||
|
It overrides :envvar:`PYTHONIOENCODING` values, and allows embedding code
|
||||||
|
to control io encoding when the environment variable does not work.
|
||||||
|
|
||||||
|
``encoding`` and/or ``errors`` may be NULL to use
|
||||||
|
:envvar:`PYTHONIOENCODING` and/or default values (depending on other
|
||||||
|
settings).
|
||||||
|
|
||||||
|
Note that :data:`sys.stderr` always uses the "backslashreplace" error
|
||||||
|
handler, regardless of this (or any other) setting.
|
||||||
|
|
||||||
|
If :c:func:`Py_Finalize` is called, this function will need to be called
|
||||||
|
again in order to affect subsequent calls to :c:func:`Py_Initialize`.
|
||||||
|
|
||||||
|
Returns 0 if successful.
|
||||||
|
|
||||||
|
|
||||||
.. c:function:: void Py_SetProgramName(wchar_t *name)
|
.. c:function:: void Py_SetProgramName(wchar_t *name)
|
||||||
|
|
||||||
.. index::
|
.. index::
|
||||||
|
|
|
@ -564,7 +564,10 @@ Build and C API Changes
|
||||||
|
|
||||||
Changes to Python's build process and to the C API include:
|
Changes to Python's build process and to the C API include:
|
||||||
|
|
||||||
* None yet.
|
* The new :c:func:`Py_SetStandardStreamEncoding` pre-initialization API
|
||||||
|
allows applications embedding the CPython interpreter to reliably force
|
||||||
|
a particular encoding and error handler for the standard streams
|
||||||
|
(Contributed by Bastien Montagne and Nick Coghlan in :issue:`16129`)
|
||||||
|
|
||||||
|
|
||||||
Deprecated
|
Deprecated
|
||||||
|
|
|
@ -38,6 +38,8 @@ PyAPI_FUNC(int) Py_IsInitialized(void);
|
||||||
PyAPI_FUNC(PyThreadState *) Py_NewInterpreter(void);
|
PyAPI_FUNC(PyThreadState *) Py_NewInterpreter(void);
|
||||||
PyAPI_FUNC(void) Py_EndInterpreter(PyThreadState *);
|
PyAPI_FUNC(void) Py_EndInterpreter(PyThreadState *);
|
||||||
|
|
||||||
|
PyAPI_FUNC(int) Py_SetStandardStreamEncoding(const char *encoding, const char *errors);
|
||||||
|
|
||||||
#ifndef Py_LIMITED_API
|
#ifndef Py_LIMITED_API
|
||||||
PyAPI_FUNC(int) PyRun_SimpleStringFlags(const char *, PyCompilerFlags *);
|
PyAPI_FUNC(int) PyRun_SimpleStringFlags(const char *, PyCompilerFlags *);
|
||||||
PyAPI_FUNC(int) PyRun_AnyFileFlags(FILE *, const char *, PyCompilerFlags *);
|
PyAPI_FUNC(int) PyRun_AnyFileFlags(FILE *, const char *, PyCompilerFlags *);
|
||||||
|
|
|
@ -9,6 +9,7 @@ import subprocess
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import unittest
|
import unittest
|
||||||
|
import textwrap
|
||||||
from test import support
|
from test import support
|
||||||
try:
|
try:
|
||||||
import _posixsubprocess
|
import _posixsubprocess
|
||||||
|
@ -218,36 +219,81 @@ class Test6012(unittest.TestCase):
|
||||||
self.assertEqual(_testcapi.argparsing("Hello", "World"), 1)
|
self.assertEqual(_testcapi.argparsing("Hello", "World"), 1)
|
||||||
|
|
||||||
|
|
||||||
class EmbeddingTest(unittest.TestCase):
|
@unittest.skipIf(
|
||||||
|
sys.platform.startswith('win'),
|
||||||
|
"interpreter embedding tests aren't built under Windows")
|
||||||
|
class EmbeddingTests(unittest.TestCase):
|
||||||
|
# XXX only tested under Unix checkouts
|
||||||
|
|
||||||
@unittest.skipIf(
|
def setUp(self):
|
||||||
sys.platform.startswith('win'),
|
|
||||||
"test doesn't work under Windows")
|
|
||||||
def test_subinterps(self):
|
|
||||||
# XXX only tested under Unix checkouts
|
|
||||||
basepath = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
|
basepath = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
|
||||||
oldcwd = os.getcwd()
|
self.test_exe = exe = os.path.join(basepath, "Modules", "_testembed")
|
||||||
|
if not os.path.exists(exe):
|
||||||
|
self.skipTest("%r doesn't exist" % exe)
|
||||||
# This is needed otherwise we get a fatal error:
|
# This is needed otherwise we get a fatal error:
|
||||||
# "Py_Initialize: Unable to get the locale encoding
|
# "Py_Initialize: Unable to get the locale encoding
|
||||||
# LookupError: no codec search functions registered: can't find encoding"
|
# LookupError: no codec search functions registered: can't find encoding"
|
||||||
|
self.oldcwd = os.getcwd()
|
||||||
os.chdir(basepath)
|
os.chdir(basepath)
|
||||||
try:
|
|
||||||
exe = os.path.join(basepath, "Modules", "_testembed")
|
def tearDown(self):
|
||||||
if not os.path.exists(exe):
|
os.chdir(self.oldcwd)
|
||||||
self.skipTest("%r doesn't exist" % exe)
|
|
||||||
p = subprocess.Popen([exe],
|
def run_embedded_interpreter(self, *args):
|
||||||
stdout=subprocess.PIPE,
|
"""Runs a test in the embedded interpreter"""
|
||||||
stderr=subprocess.PIPE)
|
cmd = [self.test_exe]
|
||||||
(out, err) = p.communicate()
|
cmd.extend(args)
|
||||||
self.assertEqual(p.returncode, 0,
|
p = subprocess.Popen(cmd,
|
||||||
"bad returncode %d, stderr is %r" %
|
stdout=subprocess.PIPE,
|
||||||
(p.returncode, err))
|
stderr=subprocess.PIPE)
|
||||||
if support.verbose:
|
(out, err) = p.communicate()
|
||||||
print()
|
self.assertEqual(p.returncode, 0,
|
||||||
print(out.decode('latin1'))
|
"bad returncode %d, stderr is %r" %
|
||||||
print(err.decode('latin1'))
|
(p.returncode, err))
|
||||||
finally:
|
return out.decode("latin1"), err.decode("latin1")
|
||||||
os.chdir(oldcwd)
|
|
||||||
|
def test_subinterps(self):
|
||||||
|
# This is just a "don't crash" test
|
||||||
|
out, err = self.run_embedded_interpreter()
|
||||||
|
if support.verbose:
|
||||||
|
print()
|
||||||
|
print(out)
|
||||||
|
print(err)
|
||||||
|
|
||||||
|
def test_forced_io_encoding(self):
|
||||||
|
# Checks forced configuration of embedded interpreter IO streams
|
||||||
|
out, err = self.run_embedded_interpreter("forced_io_encoding")
|
||||||
|
if support.verbose:
|
||||||
|
print()
|
||||||
|
print(out)
|
||||||
|
print(err)
|
||||||
|
expected_output = textwrap.dedent("""\
|
||||||
|
--- Use defaults ---
|
||||||
|
Expected encoding: default
|
||||||
|
Expected errors: default
|
||||||
|
stdin: {0.stdin.encoding}:strict
|
||||||
|
stdout: {0.stdout.encoding}:strict
|
||||||
|
stderr: {0.stderr.encoding}:backslashreplace
|
||||||
|
--- Set errors only ---
|
||||||
|
Expected encoding: default
|
||||||
|
Expected errors: surrogateescape
|
||||||
|
stdin: {0.stdin.encoding}:surrogateescape
|
||||||
|
stdout: {0.stdout.encoding}:surrogateescape
|
||||||
|
stderr: {0.stderr.encoding}:backslashreplace
|
||||||
|
--- Set encoding only ---
|
||||||
|
Expected encoding: latin-1
|
||||||
|
Expected errors: default
|
||||||
|
stdin: latin-1:strict
|
||||||
|
stdout: latin-1:strict
|
||||||
|
stderr: latin-1:backslashreplace
|
||||||
|
--- Set encoding and errors ---
|
||||||
|
Expected encoding: latin-1
|
||||||
|
Expected errors: surrogateescape
|
||||||
|
stdin: latin-1:surrogateescape
|
||||||
|
stdout: latin-1:surrogateescape
|
||||||
|
stderr: latin-1:backslashreplace""").format(sys)
|
||||||
|
|
||||||
|
self.assertEqual(out.strip(), expected_output)
|
||||||
|
|
||||||
class SkipitemTest(unittest.TestCase):
|
class SkipitemTest(unittest.TestCase):
|
||||||
|
|
||||||
|
@ -358,7 +404,7 @@ class TestThreadState(unittest.TestCase):
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
support.run_unittest(CAPITest, TestPendingCalls, Test6012,
|
support.run_unittest(CAPITest, TestPendingCalls, Test6012,
|
||||||
EmbeddingTest, SkipitemTest, TestThreadState,
|
EmbeddingTests, SkipitemTest, TestThreadState,
|
||||||
SubinterpreterTest)
|
SubinterpreterTest)
|
||||||
|
|
||||||
for name in dir(_testcapi):
|
for name in dir(_testcapi):
|
||||||
|
|
|
@ -872,6 +872,7 @@ Zubin Mithra
|
||||||
Florian Mladitsch
|
Florian Mladitsch
|
||||||
Doug Moen
|
Doug Moen
|
||||||
The Dragon De Monsyne
|
The Dragon De Monsyne
|
||||||
|
Bastien Montagne
|
||||||
Skip Montanaro
|
Skip Montanaro
|
||||||
Peter Moody
|
Peter Moody
|
||||||
Paul Moore
|
Paul Moore
|
||||||
|
|
|
@ -120,6 +120,14 @@ Library
|
||||||
- Issue #4366: Fix building extensions on all platforms when --enable-shared
|
- Issue #4366: Fix building extensions on all platforms when --enable-shared
|
||||||
is used.
|
is used.
|
||||||
|
|
||||||
|
C API
|
||||||
|
-----
|
||||||
|
|
||||||
|
- Issue #16129: Added a `Py_SetStandardStreamEncoding` pre-initialization API
|
||||||
|
to allow embedding applications like Blender to force a particular
|
||||||
|
encoding and error handler for the standard IO streams (initial patch by
|
||||||
|
Bastien Montagne)
|
||||||
|
|
||||||
Tests
|
Tests
|
||||||
-----
|
-----
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,26 @@
|
||||||
#include <Python.h>
|
#include <Python.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
void print_subinterp(void)
|
/*********************************************************
|
||||||
|
* Embedded interpreter tests that need a custom exe
|
||||||
|
*
|
||||||
|
* Executed via 'EmbeddingTests' in Lib/test/test_capi.py
|
||||||
|
*********************************************************/
|
||||||
|
|
||||||
|
static void _testembed_Py_Initialize(void)
|
||||||
|
{
|
||||||
|
/* HACK: the "./" at front avoids a search along the PATH in
|
||||||
|
Modules/getpath.c */
|
||||||
|
Py_SetProgramName(L"./_testembed");
|
||||||
|
Py_Initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*****************************************************
|
||||||
|
* Test repeated initalisation and subinterpreters
|
||||||
|
*****************************************************/
|
||||||
|
|
||||||
|
static void print_subinterp(void)
|
||||||
{
|
{
|
||||||
/* Just output some debug stuff */
|
/* Just output some debug stuff */
|
||||||
PyThreadState *ts = PyThreadState_Get();
|
PyThreadState *ts = PyThreadState_Get();
|
||||||
|
@ -14,7 +33,7 @@ void print_subinterp(void)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
static void test_repeated_init_and_subinterpreters(void)
|
||||||
{
|
{
|
||||||
PyThreadState *mainstate, *substate;
|
PyThreadState *mainstate, *substate;
|
||||||
#ifdef WITH_THREAD
|
#ifdef WITH_THREAD
|
||||||
|
@ -24,10 +43,7 @@ int main(int argc, char *argv[])
|
||||||
|
|
||||||
for (i=0; i<3; i++) {
|
for (i=0; i<3; i++) {
|
||||||
printf("--- Pass %d ---\n", i);
|
printf("--- Pass %d ---\n", i);
|
||||||
/* HACK: the "./" at front avoids a search along the PATH in
|
_testembed_Py_Initialize();
|
||||||
Modules/getpath.c */
|
|
||||||
Py_SetProgramName(L"./_testembed");
|
|
||||||
Py_Initialize();
|
|
||||||
mainstate = PyThreadState_Get();
|
mainstate = PyThreadState_Get();
|
||||||
|
|
||||||
#ifdef WITH_THREAD
|
#ifdef WITH_THREAD
|
||||||
|
@ -54,5 +70,71 @@ int main(int argc, char *argv[])
|
||||||
PyEval_RestoreThread(mainstate);
|
PyEval_RestoreThread(mainstate);
|
||||||
Py_Finalize();
|
Py_Finalize();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*****************************************************
|
||||||
|
* Test forcing a particular IO encoding
|
||||||
|
*****************************************************/
|
||||||
|
|
||||||
|
static void check_stdio_details(const char *encoding, const char * errors)
|
||||||
|
{
|
||||||
|
/* Output info for the test case to check */
|
||||||
|
if (encoding) {
|
||||||
|
printf("Expected encoding: %s\n", encoding);
|
||||||
|
} else {
|
||||||
|
printf("Expected encoding: default\n");
|
||||||
|
}
|
||||||
|
if (errors) {
|
||||||
|
printf("Expected errors: %s\n", errors);
|
||||||
|
} else {
|
||||||
|
printf("Expected errors: default\n");
|
||||||
|
}
|
||||||
|
fflush(stdout);
|
||||||
|
/* Force the given IO encoding */
|
||||||
|
Py_SetStandardStreamEncoding(encoding, errors);
|
||||||
|
_testembed_Py_Initialize();
|
||||||
|
PyRun_SimpleString(
|
||||||
|
"import sys;"
|
||||||
|
"print('stdin: {0.encoding}:{0.errors}'.format(sys.stdin));"
|
||||||
|
"print('stdout: {0.encoding}:{0.errors}'.format(sys.stdout));"
|
||||||
|
"print('stderr: {0.encoding}:{0.errors}'.format(sys.stderr));"
|
||||||
|
"sys.stdout.flush()"
|
||||||
|
);
|
||||||
|
Py_Finalize();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_forced_io_encoding(void)
|
||||||
|
{
|
||||||
|
/* Check various combinations */
|
||||||
|
printf("--- Use defaults ---\n");
|
||||||
|
check_stdio_details(NULL, NULL);
|
||||||
|
printf("--- Set errors only ---\n");
|
||||||
|
check_stdio_details(NULL, "surrogateescape");
|
||||||
|
printf("--- Set encoding only ---\n");
|
||||||
|
check_stdio_details("latin-1", NULL);
|
||||||
|
printf("--- Set encoding and errors ---\n");
|
||||||
|
check_stdio_details("latin-1", "surrogateescape");
|
||||||
|
|
||||||
|
/* Check calling after initialization fails */
|
||||||
|
Py_Initialize();
|
||||||
|
|
||||||
|
if (Py_SetStandardStreamEncoding(NULL, NULL) == 0) {
|
||||||
|
printf("Unexpected success calling Py_SetStandardStreamEncoding");
|
||||||
|
}
|
||||||
|
Py_Finalize();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Different embedding tests */
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
|
||||||
|
/* TODO: Check the argument string to allow for more test cases */
|
||||||
|
if (argc > 1) {
|
||||||
|
/* For now: assume "forced_io_encoding */
|
||||||
|
test_forced_io_encoding();
|
||||||
|
} else {
|
||||||
|
/* Run the original embedding test case by default */
|
||||||
|
test_repeated_init_and_subinterpreters();
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -134,6 +134,40 @@ Py_IsInitialized(void)
|
||||||
return initialized;
|
return initialized;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Helper to allow an embedding application to override the normal
|
||||||
|
* mechanism that attempts to figure out an appropriate IO encoding
|
||||||
|
*/
|
||||||
|
|
||||||
|
static char *_Py_StandardStreamEncoding = NULL;
|
||||||
|
static char *_Py_StandardStreamErrors = NULL;
|
||||||
|
|
||||||
|
int
|
||||||
|
Py_SetStandardStreamEncoding(const char *encoding, const char *errors)
|
||||||
|
{
|
||||||
|
if (Py_IsInitialized()) {
|
||||||
|
/* This is too late to have any effect */
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (encoding) {
|
||||||
|
_Py_StandardStreamEncoding = _PyMem_RawStrdup(encoding);
|
||||||
|
if (!_Py_StandardStreamEncoding) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (errors) {
|
||||||
|
_Py_StandardStreamErrors = _PyMem_RawStrdup(errors);
|
||||||
|
if (!_Py_StandardStreamErrors) {
|
||||||
|
if (_Py_StandardStreamEncoding) {
|
||||||
|
PyMem_RawFree(_Py_StandardStreamEncoding);
|
||||||
|
}
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Global initializations. Can be undone by Py_Finalize(). Don't
|
/* Global initializations. Can be undone by Py_Finalize(). Don't
|
||||||
call this twice without an intervening Py_Finalize() call. When
|
call this twice without an intervening Py_Finalize() call. When
|
||||||
initializations fail, a fatal error is issued and the function does
|
initializations fail, a fatal error is issued and the function does
|
||||||
|
@ -1088,23 +1122,29 @@ initstdio(void)
|
||||||
}
|
}
|
||||||
Py_DECREF(wrapper);
|
Py_DECREF(wrapper);
|
||||||
|
|
||||||
pythonioencoding = Py_GETENV("PYTHONIOENCODING");
|
encoding = _Py_StandardStreamEncoding;
|
||||||
encoding = errors = NULL;
|
errors = _Py_StandardStreamErrors;
|
||||||
if (pythonioencoding) {
|
if (!encoding || !errors) {
|
||||||
pythonioencoding = _PyMem_Strdup(pythonioencoding);
|
pythonioencoding = Py_GETENV("PYTHONIOENCODING");
|
||||||
if (pythonioencoding == NULL) {
|
if (pythonioencoding) {
|
||||||
PyErr_NoMemory();
|
char *err;
|
||||||
goto error;
|
pythonioencoding = _PyMem_Strdup(pythonioencoding);
|
||||||
|
if (pythonioencoding == NULL) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
err = strchr(pythonioencoding, ':');
|
||||||
|
if (err) {
|
||||||
|
*err = '\0';
|
||||||
|
err++;
|
||||||
|
if (*err && !errors) {
|
||||||
|
errors = err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (*pythonioencoding && !encoding) {
|
||||||
|
encoding = pythonioencoding;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
errors = strchr(pythonioencoding, ':');
|
|
||||||
if (errors) {
|
|
||||||
*errors = '\0';
|
|
||||||
errors++;
|
|
||||||
if (!*errors)
|
|
||||||
errors = NULL;
|
|
||||||
}
|
|
||||||
if (*pythonioencoding)
|
|
||||||
encoding = pythonioencoding;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set sys.stdin */
|
/* Set sys.stdin */
|
||||||
|
@ -1184,6 +1224,15 @@ initstdio(void)
|
||||||
status = -1;
|
status = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* We won't need them anymore. */
|
||||||
|
if (_Py_StandardStreamEncoding) {
|
||||||
|
PyMem_RawFree(_Py_StandardStreamEncoding);
|
||||||
|
_Py_StandardStreamEncoding = NULL;
|
||||||
|
}
|
||||||
|
if (_Py_StandardStreamErrors) {
|
||||||
|
PyMem_RawFree(_Py_StandardStreamErrors);
|
||||||
|
_Py_StandardStreamErrors = NULL;
|
||||||
|
}
|
||||||
PyMem_Free(pythonioencoding);
|
PyMem_Free(pythonioencoding);
|
||||||
Py_XDECREF(bimod);
|
Py_XDECREF(bimod);
|
||||||
Py_XDECREF(iomod);
|
Py_XDECREF(iomod);
|
||||||
|
|
Loading…
Reference in New Issue