bpo-20891: Fix PyGILState_Ensure() (#4650) (#4655)

When PyGILState_Ensure() is called in a non-Python thread before PyEval_InitThreads(), only call PyEval_InitThreads() after calling PyThreadState_New() to fix a crash. Add an unit test in test_embed. Enhance also embedded tests, backport from master: * Add test_pre_initialization_api() * Set PYTHONIOENCODING environment variable in test_forced_io_encoding() (cherry picked from commit b4d1e1f7c1)
2017-11-30 23:36:49 +01:00 · 2017-11-30 23:36:49 +01:00 · e10c9de9d7
parent 29cb50ba34
commit e10c9de9d7
4 changed files with 179 additions and 31 deletions
--- a/Lib/test/test_capi.py
+++ b/Lib/test/test_capi.py
@ -401,23 +401,30 @@ class EmbeddingTests(unittest.TestCase):
    def tearDown(self):
        os.chdir(self.oldcwd)

-    def run_embedded_interpreter(self, *args):
+    def run_embedded_interpreter(self, *args, env=None):
        """Runs a test in the embedded interpreter"""
        cmd = [self.test_exe]
        cmd.extend(args)
+        if env is not None and sys.platform == 'win32':
+            # Windows requires at least the SYSTEMROOT environment variable to
+            # start Python.
+            env = env.copy()
+            env['SYSTEMROOT'] = os.environ['SYSTEMROOT']
+
        p = subprocess.Popen(cmd,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
-                             universal_newlines=True)
+                             universal_newlines=True,
+                             env=env)
        (out, err) = p.communicate()
        self.assertEqual(p.returncode, 0,
                         "bad returncode %d, stderr is %r" %
                         (p.returncode, err))
        return out, err

-    def test_subinterps(self):
+    def test_repeated_init_and_subinterpreters(self):
        # This is just a "don't crash" test
-        out, err = self.run_embedded_interpreter()
+        out, err = self.run_embedded_interpreter('repeated_init_and_subinterpreters')
        if support.verbose:
            print()
            print(out)
@ -435,13 +442,14 @@ class EmbeddingTests(unittest.TestCase):

    def test_forced_io_encoding(self):
        # Checks forced configuration of embedded interpreter IO streams
-        out, err = self.run_embedded_interpreter("forced_io_encoding")
+        env = dict(os.environ, PYTHONIOENCODING="utf-8:surrogateescape")
+        out, err = self.run_embedded_interpreter("forced_io_encoding", env=env)
        if support.verbose:
            print()
            print(out)
            print(err)
-        expected_errors = sys.__stdout__.errors
-        expected_stdin_encoding = sys.__stdin__.encoding
+        expected_stream_encoding = "utf-8"
+        expected_errors = "surrogateescape"
        expected_pipe_encoding = self._get_default_pipe_encoding()
        expected_output = '\n'.join([
        "--- Use defaults ---",
@ -469,13 +477,33 @@ class EmbeddingTests(unittest.TestCase):
        "stdout: latin-1:replace",
        "stderr: latin-1:backslashreplace"])
        expected_output = expected_output.format(
-                                in_encoding=expected_stdin_encoding,
-                                out_encoding=expected_pipe_encoding,
+                                in_encoding=expected_stream_encoding,
+                                out_encoding=expected_stream_encoding,
                                errors=expected_errors)
        # This is useful if we ever trip over odd platform behaviour
        self.maxDiff = None
        self.assertEqual(out.strip(), expected_output)

+    def test_pre_initialization_api(self):
+        """
+        Checks the few parts of the C-API that work before the runtine
+        is initialized (via Py_Initialize()).
+        """
+        env = dict(os.environ, PYTHONPATH=os.pathsep.join(sys.path))
+        out, err = self.run_embedded_interpreter("pre_initialization_api", env=env)
+        self.assertEqual(out, '')
+        self.assertEqual(err, '')
+
+    def test_bpo20891(self):
+        """
+        bpo-20891: Calling PyGILState_Ensure in a non-Python thread before
+        calling PyEval_InitThreads() must not crash. PyGILState_Ensure() must
+        call PyEval_InitThreads() for us in this case.
+        """
+        out, err = self.run_embedded_interpreter("bpo20891")
+        self.assertEqual(out, '')
+        self.assertEqual(err, '')
+

 class SkipitemTest(unittest.TestCase):

--- a/API/2017-11-30-18-13-45.bpo-20891.wBnMdF.rst
+++ b/API/2017-11-30-18-13-45.bpo-20891.wBnMdF.rst
@ -0,0 +1,3 @@
+Fix PyGILState_Ensure(). When PyGILState_Ensure() is called in a non-Python
+thread before PyEval_InitThreads(), only call PyEval_InitThreads() after
+calling PyThreadState_New() to fix a crash.
--- a/Programs/_testembed.c
+++ b/Programs/_testembed.c
@ -1,4 +1,5 @@
 #include <Python.h>
+#include "pythread.h"
 #include <stdio.h>

 /*********************************************************
@ -33,7 +34,7 @@ static void print_subinterp(void)
    );
 }

-static void test_repeated_init_and_subinterpreters(void)
+static int test_repeated_init_and_subinterpreters(void)
 {
    PyThreadState *mainstate, *substate;
 #ifdef WITH_THREAD
@ -70,6 +71,7 @@ static void test_repeated_init_and_subinterpreters(void)
        PyEval_RestoreThread(mainstate);
        Py_Finalize();
    }
+    return 0;
 }

 /*****************************************************
@ -103,7 +105,7 @@ static void check_stdio_details(const char *encoding, const char * errors)
    Py_Finalize();
 }

-static void test_forced_io_encoding(void)
+static int test_forced_io_encoding(void)
 {
    /* Check various combinations */
    printf("--- Use defaults ---\n");
@ -122,19 +124,123 @@ static void test_forced_io_encoding(void)
        printf("Unexpected success calling Py_SetStandardStreamEncoding");
    }
    Py_Finalize();
-}
-
-/* Different embedding tests */
-int main(int argc, char *argv[])
-{
-
-    /* TODO: Check the argument string to allow for more test cases */
-    if (argc > 1) {
-        /* For now: assume "forced_io_encoding */
-        test_forced_io_encoding();
-    } else {
-        /* Run the original embedding test case by default */
-        test_repeated_init_and_subinterpreters();
-    }
    return 0;
 }
+
+
+/*********************************************************
+ * Test parts of the C-API that work before initialization
+ *********************************************************/
+
+static int test_pre_initialization_api(void)
+{
+    /* Leading "./" ensures getpath.c can still find the standard library */
+    wchar_t *program = Py_DecodeLocale("./spam", NULL);
+    if (program == NULL) {
+        fprintf(stderr, "Fatal error: cannot decode program name\n");
+        return 1;
+    }
+    Py_SetProgramName(program);
+
+    Py_Initialize();
+    Py_Finalize();
+
+    PyMem_RawFree(program);
+    return 0;
+}
+
+static void bpo20891_thread(void *lockp)
+{
+    PyThread_type_lock lock = *((PyThread_type_lock*)lockp);
+
+    PyGILState_STATE state = PyGILState_Ensure();
+    if (!PyGILState_Check()) {
+        fprintf(stderr, "PyGILState_Check failed!");
+        abort();
+    }
+
+    PyGILState_Release(state);
+
+    PyThread_release_lock(lock);
+
+    PyThread_exit_thread();
+}
+
+static int test_bpo20891(void)
+{
+    /* bpo-20891: Calling PyGILState_Ensure in a non-Python thread before
+       calling PyEval_InitThreads() must not crash. PyGILState_Ensure() must
+       call PyEval_InitThreads() for us in this case. */
+    PyThread_type_lock lock = PyThread_allocate_lock();
+    if (!lock) {
+        fprintf(stderr, "PyThread_allocate_lock failed!");
+        return 1;
+    }
+
+    _testembed_Py_Initialize();
+
+    long thrd = PyThread_start_new_thread(bpo20891_thread, &lock);
+    if (thrd == -1) {
+        fprintf(stderr, "PyThread_start_new_thread failed!");
+        return 1;
+    }
+    PyThread_acquire_lock(lock, WAIT_LOCK);
+
+    Py_BEGIN_ALLOW_THREADS
+    /* wait until the thread exit */
+    PyThread_acquire_lock(lock, WAIT_LOCK);
+    Py_END_ALLOW_THREADS
+
+    PyThread_free_lock(lock);
+
+    return 0;
+}
+
+
+/* *********************************************************
+ * List of test cases and the function that implements it.
+ *
+ * Names are compared case-sensitively with the first
+ * argument. If no match is found, or no first argument was
+ * provided, the names of all test cases are printed and
+ * the exit code will be -1.
+ *
+ * The int returned from test functions is used as the exit
+ * code, and test_capi treats all non-zero exit codes as a
+ * failed test.
+ *********************************************************/
+struct TestCase
+{
+    const char *name;
+    int (*func)(void);
+};
+
+static struct TestCase TestCases[] = {
+    { "forced_io_encoding", test_forced_io_encoding },
+    { "repeated_init_and_subinterpreters", test_repeated_init_and_subinterpreters },
+    { "pre_initialization_api", test_pre_initialization_api },
+    { "bpo20891", test_bpo20891 },
+    { NULL, NULL }
+};
+
+int main(int argc, char *argv[])
+{
+    if (argc > 1) {
+        for (struct TestCase *tc = TestCases; tc && tc->name; tc++) {
+            if (strcmp(argv[1], tc->name) == 0)
+                return (*tc->func)();
+        }
+    }
+
+    /* No match found, or no test name provided, so display usage */
+    printf("Python " PY_VERSION " _testembed executable for embedded interpreter tests\n"
+           "Normally executed via 'EmbeddingTests' in Lib/test/test_capi.py\n\n"
+           "Usage: %s TESTNAME\n\nAll available tests:\n", argv[0]);
+    for (struct TestCase *tc = TestCases; tc && tc->name; tc++) {
+        printf("  %s\n", tc->name);
+    }
+
+    /* Non-zero exit code will cause test_capi.py tests to fail.
+       This is intentional. */
+    return -1;
+}
--- a/Python/pystate.c
+++ b/Python/pystate.c
@ -865,6 +865,8 @@ PyGILState_Ensure(void)
 {
    int current;
    PyThreadState *tcur;
+    int need_init_threads = 0;
+
    /* Note that we do not auto-init Python here - apart from
       potential races with 2 threads auto-initializing, pep-311
       spells out other issues.  Embedders are expected to have
@ -873,10 +875,7 @@ PyGILState_Ensure(void)
    assert(autoInterpreterState); /* Py_Initialize() hasn't been called! */
    tcur = (PyThreadState *)PyThread_get_key_value(autoTLSkey);
    if (tcur == NULL) {
-        /* At startup, Python has no concrete GIL. If PyGILState_Ensure() is
-           called from a new thread for the first time, we need the create the
-           GIL. */
-        PyEval_InitThreads();
+        need_init_threads = 1;

        /* Create a new thread state for this thread */
        tcur = PyThreadState_New(autoInterpreterState);
@ -887,16 +886,28 @@ PyGILState_Ensure(void)
        tcur->gilstate_counter = 0;
        current = 0; /* new thread state is never current */
    }
-    else
+    else {
        current = PyThreadState_IsCurrent(tcur);
-    if (current == 0)
+    }
+
+    if (current == 0) {
        PyEval_RestoreThread(tcur);
+    }
+
    /* Update our counter in the thread-state - no need for locks:
       - tcur will remain valid as we hold the GIL.
       - the counter is safe as we are the only thread "allowed"
         to modify this value
    */
    ++tcur->gilstate_counter;
+
+    if (need_init_threads) {
+        /* At startup, Python has no concrete GIL. If PyGILState_Ensure() is
+           called from a new thread for the first time, we need the create the
+           GIL. */
+        PyEval_InitThreads();
+    }
+
    return current ? PyGILState_LOCKED : PyGILState_UNLOCKED;
 }