Issue #16129: Add `Py_SetStandardStreamEncoding`

This new pre-initialization API allows embedding
applications like Blender to force a particular
encoding and error handler for the standard IO streams.

Also refactors Modules/_testembed.c to let us start
testing multiple embedding scenarios.

(Initial patch by Bastien Montagne)
This commit is contained in:
Nick Coghlan 2013-10-17 22:35:35 +10:00
parent 26f92680da
commit 7d270ee05d
8 changed files with 266 additions and 48 deletions

View File

@ -86,6 +86,33 @@ Process-wide parameters
======================= =======================
.. c:function:: int Py_SetStandardStreamEncoding(char *encoding, char *errors)
.. index::
single: Py_Initialize()
single: main()
triple: stdin; stdout; sdterr
This function should be called before :c:func:`Py_Initialize`. It
specifies which encoding and error handling to use with standard io,
with the same meanings as in :func:`str.encode`.
It overrides :envvar:`PYTHONIOENCODING` values, and allows embedding code
to control io encoding when the environment variable does not work.
``encoding`` and/or ``errors`` may be NULL to use
:envvar:`PYTHONIOENCODING` and/or default values (depending on other
settings).
Note that :data:`sys.stderr` always uses the "backslashreplace" error
handler, regardless of this (or any other) setting.
If :c:func:`Py_Finalize` is called, this function will need to be called
again in order to affect subsequent calls to :c:func:`Py_Initialize`.
Returns 0 if successful.
.. c:function:: void Py_SetProgramName(wchar_t *name) .. c:function:: void Py_SetProgramName(wchar_t *name)
.. index:: .. index::

View File

@ -564,7 +564,10 @@ Build and C API Changes
Changes to Python's build process and to the C API include: Changes to Python's build process and to the C API include:
* None yet. * The new :c:func:`Py_SetStandardStreamEncoding` pre-initialization API
allows applications embedding the CPython interpreter to reliably force
a particular encoding and error handler for the standard streams
(Contributed by Bastien Montagne and Nick Coghlan in :issue:`16129`)
Deprecated Deprecated

View File

@ -38,6 +38,8 @@ PyAPI_FUNC(int) Py_IsInitialized(void);
PyAPI_FUNC(PyThreadState *) Py_NewInterpreter(void); PyAPI_FUNC(PyThreadState *) Py_NewInterpreter(void);
PyAPI_FUNC(void) Py_EndInterpreter(PyThreadState *); PyAPI_FUNC(void) Py_EndInterpreter(PyThreadState *);
PyAPI_FUNC(int) Py_SetStandardStreamEncoding(const char *encoding, const char *errors);
#ifndef Py_LIMITED_API #ifndef Py_LIMITED_API
PyAPI_FUNC(int) PyRun_SimpleStringFlags(const char *, PyCompilerFlags *); PyAPI_FUNC(int) PyRun_SimpleStringFlags(const char *, PyCompilerFlags *);
PyAPI_FUNC(int) PyRun_AnyFileFlags(FILE *, const char *, PyCompilerFlags *); PyAPI_FUNC(int) PyRun_AnyFileFlags(FILE *, const char *, PyCompilerFlags *);

View File

@ -9,6 +9,7 @@ import subprocess
import sys import sys
import time import time
import unittest import unittest
import textwrap
from test import support from test import support
try: try:
import _posixsubprocess import _posixsubprocess
@ -218,36 +219,81 @@ class Test6012(unittest.TestCase):
self.assertEqual(_testcapi.argparsing("Hello", "World"), 1) self.assertEqual(_testcapi.argparsing("Hello", "World"), 1)
class EmbeddingTest(unittest.TestCase): @unittest.skipIf(
sys.platform.startswith('win'),
"interpreter embedding tests aren't built under Windows")
class EmbeddingTests(unittest.TestCase):
# XXX only tested under Unix checkouts
@unittest.skipIf( def setUp(self):
sys.platform.startswith('win'),
"test doesn't work under Windows")
def test_subinterps(self):
# XXX only tested under Unix checkouts
basepath = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) basepath = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
oldcwd = os.getcwd() self.test_exe = exe = os.path.join(basepath, "Modules", "_testembed")
if not os.path.exists(exe):
self.skipTest("%r doesn't exist" % exe)
# This is needed otherwise we get a fatal error: # This is needed otherwise we get a fatal error:
# "Py_Initialize: Unable to get the locale encoding # "Py_Initialize: Unable to get the locale encoding
# LookupError: no codec search functions registered: can't find encoding" # LookupError: no codec search functions registered: can't find encoding"
self.oldcwd = os.getcwd()
os.chdir(basepath) os.chdir(basepath)
try:
exe = os.path.join(basepath, "Modules", "_testembed") def tearDown(self):
if not os.path.exists(exe): os.chdir(self.oldcwd)
self.skipTest("%r doesn't exist" % exe)
p = subprocess.Popen([exe], def run_embedded_interpreter(self, *args):
stdout=subprocess.PIPE, """Runs a test in the embedded interpreter"""
stderr=subprocess.PIPE) cmd = [self.test_exe]
(out, err) = p.communicate() cmd.extend(args)
self.assertEqual(p.returncode, 0, p = subprocess.Popen(cmd,
"bad returncode %d, stderr is %r" % stdout=subprocess.PIPE,
(p.returncode, err)) stderr=subprocess.PIPE)
if support.verbose: (out, err) = p.communicate()
print() self.assertEqual(p.returncode, 0,
print(out.decode('latin1')) "bad returncode %d, stderr is %r" %
print(err.decode('latin1')) (p.returncode, err))
finally: return out.decode("latin1"), err.decode("latin1")
os.chdir(oldcwd)
def test_subinterps(self):
# This is just a "don't crash" test
out, err = self.run_embedded_interpreter()
if support.verbose:
print()
print(out)
print(err)
def test_forced_io_encoding(self):
# Checks forced configuration of embedded interpreter IO streams
out, err = self.run_embedded_interpreter("forced_io_encoding")
if support.verbose:
print()
print(out)
print(err)
expected_output = textwrap.dedent("""\
--- Use defaults ---
Expected encoding: default
Expected errors: default
stdin: {0.stdin.encoding}:strict
stdout: {0.stdout.encoding}:strict
stderr: {0.stderr.encoding}:backslashreplace
--- Set errors only ---
Expected encoding: default
Expected errors: surrogateescape
stdin: {0.stdin.encoding}:surrogateescape
stdout: {0.stdout.encoding}:surrogateescape
stderr: {0.stderr.encoding}:backslashreplace
--- Set encoding only ---
Expected encoding: latin-1
Expected errors: default
stdin: latin-1:strict
stdout: latin-1:strict
stderr: latin-1:backslashreplace
--- Set encoding and errors ---
Expected encoding: latin-1
Expected errors: surrogateescape
stdin: latin-1:surrogateescape
stdout: latin-1:surrogateescape
stderr: latin-1:backslashreplace""").format(sys)
self.assertEqual(out.strip(), expected_output)
class SkipitemTest(unittest.TestCase): class SkipitemTest(unittest.TestCase):
@ -358,7 +404,7 @@ class TestThreadState(unittest.TestCase):
def test_main(): def test_main():
support.run_unittest(CAPITest, TestPendingCalls, Test6012, support.run_unittest(CAPITest, TestPendingCalls, Test6012,
EmbeddingTest, SkipitemTest, TestThreadState, EmbeddingTests, SkipitemTest, TestThreadState,
SubinterpreterTest) SubinterpreterTest)
for name in dir(_testcapi): for name in dir(_testcapi):

View File

@ -872,6 +872,7 @@ Zubin Mithra
Florian Mladitsch Florian Mladitsch
Doug Moen Doug Moen
The Dragon De Monsyne The Dragon De Monsyne
Bastien Montagne
Skip Montanaro Skip Montanaro
Peter Moody Peter Moody
Paul Moore Paul Moore

View File

@ -120,6 +120,14 @@ Library
- Issue #4366: Fix building extensions on all platforms when --enable-shared - Issue #4366: Fix building extensions on all platforms when --enable-shared
is used. is used.
C API
-----
- Issue #16129: Added a `Py_SetStandardStreamEncoding` pre-initialization API
to allow embedding applications like Blender to force a particular
encoding and error handler for the standard IO streams (initial patch by
Bastien Montagne)
Tests Tests
----- -----

View File

@ -1,7 +1,26 @@
#include <Python.h> #include <Python.h>
#include <stdio.h> #include <stdio.h>
void print_subinterp(void) /*********************************************************
* Embedded interpreter tests that need a custom exe
*
* Executed via 'EmbeddingTests' in Lib/test/test_capi.py
*********************************************************/
static void _testembed_Py_Initialize(void)
{
/* HACK: the "./" at front avoids a search along the PATH in
Modules/getpath.c */
Py_SetProgramName(L"./_testembed");
Py_Initialize();
}
/*****************************************************
* Test repeated initalisation and subinterpreters
*****************************************************/
static void print_subinterp(void)
{ {
/* Just output some debug stuff */ /* Just output some debug stuff */
PyThreadState *ts = PyThreadState_Get(); PyThreadState *ts = PyThreadState_Get();
@ -14,7 +33,7 @@ void print_subinterp(void)
); );
} }
int main(int argc, char *argv[]) static void test_repeated_init_and_subinterpreters(void)
{ {
PyThreadState *mainstate, *substate; PyThreadState *mainstate, *substate;
#ifdef WITH_THREAD #ifdef WITH_THREAD
@ -24,10 +43,7 @@ int main(int argc, char *argv[])
for (i=0; i<3; i++) { for (i=0; i<3; i++) {
printf("--- Pass %d ---\n", i); printf("--- Pass %d ---\n", i);
/* HACK: the "./" at front avoids a search along the PATH in _testembed_Py_Initialize();
Modules/getpath.c */
Py_SetProgramName(L"./_testembed");
Py_Initialize();
mainstate = PyThreadState_Get(); mainstate = PyThreadState_Get();
#ifdef WITH_THREAD #ifdef WITH_THREAD
@ -54,5 +70,71 @@ int main(int argc, char *argv[])
PyEval_RestoreThread(mainstate); PyEval_RestoreThread(mainstate);
Py_Finalize(); Py_Finalize();
} }
}
/*****************************************************
* Test forcing a particular IO encoding
*****************************************************/
static void check_stdio_details(const char *encoding, const char * errors)
{
/* Output info for the test case to check */
if (encoding) {
printf("Expected encoding: %s\n", encoding);
} else {
printf("Expected encoding: default\n");
}
if (errors) {
printf("Expected errors: %s\n", errors);
} else {
printf("Expected errors: default\n");
}
fflush(stdout);
/* Force the given IO encoding */
Py_SetStandardStreamEncoding(encoding, errors);
_testembed_Py_Initialize();
PyRun_SimpleString(
"import sys;"
"print('stdin: {0.encoding}:{0.errors}'.format(sys.stdin));"
"print('stdout: {0.encoding}:{0.errors}'.format(sys.stdout));"
"print('stderr: {0.encoding}:{0.errors}'.format(sys.stderr));"
"sys.stdout.flush()"
);
Py_Finalize();
}
static void test_forced_io_encoding(void)
{
/* Check various combinations */
printf("--- Use defaults ---\n");
check_stdio_details(NULL, NULL);
printf("--- Set errors only ---\n");
check_stdio_details(NULL, "surrogateescape");
printf("--- Set encoding only ---\n");
check_stdio_details("latin-1", NULL);
printf("--- Set encoding and errors ---\n");
check_stdio_details("latin-1", "surrogateescape");
/* Check calling after initialization fails */
Py_Initialize();
if (Py_SetStandardStreamEncoding(NULL, NULL) == 0) {
printf("Unexpected success calling Py_SetStandardStreamEncoding");
}
Py_Finalize();
}
/* Different embedding tests */
int main(int argc, char *argv[])
{
/* TODO: Check the argument string to allow for more test cases */
if (argc > 1) {
/* For now: assume "forced_io_encoding */
test_forced_io_encoding();
} else {
/* Run the original embedding test case by default */
test_repeated_init_and_subinterpreters();
}
return 0; return 0;
} }

View File

@ -134,6 +134,40 @@ Py_IsInitialized(void)
return initialized; return initialized;
} }
/* Helper to allow an embedding application to override the normal
* mechanism that attempts to figure out an appropriate IO encoding
*/
static char *_Py_StandardStreamEncoding = NULL;
static char *_Py_StandardStreamErrors = NULL;
int
Py_SetStandardStreamEncoding(const char *encoding, const char *errors)
{
if (Py_IsInitialized()) {
/* This is too late to have any effect */
return -1;
}
if (encoding) {
_Py_StandardStreamEncoding = _PyMem_RawStrdup(encoding);
if (!_Py_StandardStreamEncoding) {
PyErr_NoMemory();
return -1;
}
}
if (errors) {
_Py_StandardStreamErrors = _PyMem_RawStrdup(errors);
if (!_Py_StandardStreamErrors) {
if (_Py_StandardStreamEncoding) {
PyMem_RawFree(_Py_StandardStreamEncoding);
}
PyErr_NoMemory();
return -1;
}
}
return 0;
}
/* Global initializations. Can be undone by Py_Finalize(). Don't /* Global initializations. Can be undone by Py_Finalize(). Don't
call this twice without an intervening Py_Finalize() call. When call this twice without an intervening Py_Finalize() call. When
initializations fail, a fatal error is issued and the function does initializations fail, a fatal error is issued and the function does
@ -1088,23 +1122,29 @@ initstdio(void)
} }
Py_DECREF(wrapper); Py_DECREF(wrapper);
pythonioencoding = Py_GETENV("PYTHONIOENCODING"); encoding = _Py_StandardStreamEncoding;
encoding = errors = NULL; errors = _Py_StandardStreamErrors;
if (pythonioencoding) { if (!encoding || !errors) {
pythonioencoding = _PyMem_Strdup(pythonioencoding); pythonioencoding = Py_GETENV("PYTHONIOENCODING");
if (pythonioencoding == NULL) { if (pythonioencoding) {
PyErr_NoMemory(); char *err;
goto error; pythonioencoding = _PyMem_Strdup(pythonioencoding);
if (pythonioencoding == NULL) {
PyErr_NoMemory();
goto error;
}
err = strchr(pythonioencoding, ':');
if (err) {
*err = '\0';
err++;
if (*err && !errors) {
errors = err;
}
}
if (*pythonioencoding && !encoding) {
encoding = pythonioencoding;
}
} }
errors = strchr(pythonioencoding, ':');
if (errors) {
*errors = '\0';
errors++;
if (!*errors)
errors = NULL;
}
if (*pythonioencoding)
encoding = pythonioencoding;
} }
/* Set sys.stdin */ /* Set sys.stdin */
@ -1184,6 +1224,15 @@ initstdio(void)
status = -1; status = -1;
} }
/* We won't need them anymore. */
if (_Py_StandardStreamEncoding) {
PyMem_RawFree(_Py_StandardStreamEncoding);
_Py_StandardStreamEncoding = NULL;
}
if (_Py_StandardStreamErrors) {
PyMem_RawFree(_Py_StandardStreamErrors);
_Py_StandardStreamErrors = NULL;
}
PyMem_Free(pythonioencoding); PyMem_Free(pythonioencoding);
Py_XDECREF(bimod); Py_XDECREF(bimod);
Py_XDECREF(iomod); Py_XDECREF(iomod);