From be0c106789322273f1f76d232c768c09880a14bd Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Sun, 21 May 2023 03:12:24 -0700 Subject: [PATCH] gh-103295: expose API for writing perf map files (#103546) Co-authored-by: Aniket Panse Co-authored-by: Gregory P. Smith Co-authored-by: Carl Meyer --- Doc/c-api/perfmaps.rst | 50 +++++++++++ Doc/c-api/utilities.rst | 1 + Doc/howto/perf_profiling.rst | 4 +- Include/sysmodule.h | 13 +++ Lib/test/test_perfmaps.py | 19 ++++ ...-04-14-23-05-52.gh-issue-103295.GRHY1Z.rst | 5 ++ Modules/_testinternalcapi.c | 27 ++++++ Python/perf_trampoline.c | 86 ++++--------------- Python/pylifecycle.c | 1 + Python/sysmodule.c | 80 ++++++++++++++++- Tools/c-analyzer/cpython/ignored.tsv | 1 + 11 files changed, 214 insertions(+), 73 deletions(-) create mode 100644 Doc/c-api/perfmaps.rst create mode 100644 Lib/test/test_perfmaps.py create mode 100644 Misc/NEWS.d/next/C API/2023-04-14-23-05-52.gh-issue-103295.GRHY1Z.rst diff --git a/Doc/c-api/perfmaps.rst b/Doc/c-api/perfmaps.rst new file mode 100644 index 00000000000..3d44d2eb6bf --- /dev/null +++ b/Doc/c-api/perfmaps.rst @@ -0,0 +1,50 @@ +.. highlight:: c + +.. _perfmaps: + +Support for Perf Maps +---------------------- + +On supported platforms (as of this writing, only Linux), the runtime can take +advantage of *perf map files* to make Python functions visible to an external +profiling tool (such as `perf `_). +A running process may create a file in the ``/tmp`` directory, which contains entries +that can map a section of executable code to a name. This interface is described in the +`documentation of the Linux Perf tool `_. + +In Python, these helper APIs can be used by libraries and features that rely +on generating machine code on the fly. + +Note that holding the Global Interpreter Lock (GIL) is not required for these APIs. + +.. c:function:: int PyUnstable_PerfMapState_Init(void) + + Open the ``/tmp/perf-$pid.map`` file, unless it's already opened, and create + a lock to ensure thread-safe writes to the file (provided the writes are + done through :c:func:`PyUnstable_WritePerfMapEntry`). Normally, there's no need + to call this explicitly; just use :c:func:`PyUnstable_WritePerfMapEntry` + and it will initialize the state on first call. + + Returns ``0`` on success, ``-1`` on failure to create/open the perf map file, + or ``-2`` on failure to create a lock. Check ``errno`` for more information + about the cause of a failure. + +.. c:function:: int PyUnstable_WritePerfMapEntry(const void *code_addr, unsigned int code_size, const char *entry_name) + + Write one single entry to the ``/tmp/perf-$pid.map`` file. This function is + thread safe. Here is what an example entry looks like:: + + # address size name + 7f3529fcf759 b py::bar:/run/t.py + + Will call :c:func:`PyUnstable_PerfMapState_Init` before writing the entry, if + the perf map file is not already opened. Returns ``0`` on success, or the + same error codes as :c:func:`PyUnstable_PerfMapState_Init` on failure. + +.. c:function:: void PyUnstable_PerfMapState_Fini(void) + + Close the perf map file opened by :c:func:`PyUnstable_PerfMapState_Init`. + This is called by the runtime itself during interpreter shut-down. In + general, there shouldn't be a reason to explicitly call this, except to + handle specific scenarios such as forking. diff --git a/Doc/c-api/utilities.rst b/Doc/c-api/utilities.rst index a805b564763..ccbf14e1850 100644 --- a/Doc/c-api/utilities.rst +++ b/Doc/c-api/utilities.rst @@ -19,3 +19,4 @@ and parsing function arguments and constructing Python values from C values. conversion.rst reflection.rst codec.rst + perfmaps.rst diff --git a/Doc/howto/perf_profiling.rst b/Doc/howto/perf_profiling.rst index 6af5536166f..61812c19ae6 100644 --- a/Doc/howto/perf_profiling.rst +++ b/Doc/howto/perf_profiling.rst @@ -24,7 +24,7 @@ functions to appear in the output of the ``perf`` profiler. When this mode is enabled, the interpreter will interpose a small piece of code compiled on the fly before the execution of every Python function and it will teach ``perf`` the relationship between this piece of code and the associated Python function using -`perf map files`_. +:doc:`perf map files <../c-api/perfmaps>`. .. note:: @@ -206,5 +206,3 @@ You can check if your system has been compiled with this flag by running:: If you don't see any output it means that your interpreter has not been compiled with frame pointers and therefore it may not be able to show Python functions in the output of ``perf``. - -.. _perf map files: https://github.com/torvalds/linux/blob/0513e464f9007b70b96740271a948ca5ab6e7dd7/tools/perf/Documentation/jit-interface.txt diff --git a/Include/sysmodule.h b/Include/sysmodule.h index b5087119b1c..96f883870b3 100644 --- a/Include/sysmodule.h +++ b/Include/sysmodule.h @@ -29,6 +29,19 @@ Py_DEPRECATED(3.11) PyAPI_FUNC(int) PySys_HasWarnOptions(void); Py_DEPRECATED(3.11) PyAPI_FUNC(void) PySys_AddXOption(const wchar_t *); PyAPI_FUNC(PyObject *) PySys_GetXOptions(void); +#if !defined(Py_LIMITED_API) +typedef struct { + FILE* perf_map; + PyThread_type_lock map_lock; +} PerfMapState; + +PyAPI_FUNC(int) PyUnstable_PerfMapState_Init(void); + +PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(const void *code_addr, unsigned int code_size, const char *entry_name); + +PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void); +#endif + #ifndef Py_LIMITED_API # define Py_CPYTHON_SYSMODULE_H # include "cpython/sysmodule.h" diff --git a/Lib/test/test_perfmaps.py b/Lib/test/test_perfmaps.py new file mode 100644 index 00000000000..a17adb89f55 --- /dev/null +++ b/Lib/test/test_perfmaps.py @@ -0,0 +1,19 @@ +import os +import sys +import unittest + +from _testinternalcapi import perf_map_state_teardown, write_perf_map_entry + +if sys.platform != 'linux': + raise unittest.SkipTest('Linux only') + + +class TestPerfMapWriting(unittest.TestCase): + def test_write_perf_map_entry(self): + self.assertEqual(write_perf_map_entry(0x1234, 5678, "entry1"), 0) + self.assertEqual(write_perf_map_entry(0x2345, 6789, "entry2"), 0) + with open(f"/tmp/perf-{os.getpid()}.map") as f: + perf_file_contents = f.read() + self.assertIn("1234 162e entry1", perf_file_contents) + self.assertIn("2345 1a85 entry2", perf_file_contents) + perf_map_state_teardown() diff --git a/Misc/NEWS.d/next/C API/2023-04-14-23-05-52.gh-issue-103295.GRHY1Z.rst b/Misc/NEWS.d/next/C API/2023-04-14-23-05-52.gh-issue-103295.GRHY1Z.rst new file mode 100644 index 00000000000..88222239858 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-04-14-23-05-52.gh-issue-103295.GRHY1Z.rst @@ -0,0 +1,5 @@ +Introduced :c:func:`PyUnstable_WritePerfMapEntry`, :c:func:`PyUnstable_PerfMapState_Init` and +:c:func:`PyUnstable_PerfMapState_Fini`. These allow extension modules (JIT compilers in +particular) to write to perf-map files in a thread safe manner. The +:doc:`../howto/perf_profiling` also uses these APIs to write +entries in the perf-map file. diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 3ee33239308..b91f7b620fd 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -759,6 +759,31 @@ clear_extension(PyObject *self, PyObject *args) Py_RETURN_NONE; } +static PyObject * +write_perf_map_entry(PyObject *self, PyObject *args) +{ + const void *code_addr; + unsigned int code_size; + const char *entry_name; + + if (!PyArg_ParseTuple(args, "KIs", &code_addr, &code_size, &entry_name)) + return NULL; + + int ret = PyUnstable_WritePerfMapEntry(code_addr, code_size, entry_name); + if (ret == -1) { + PyErr_SetString(PyExc_OSError, "Failed to write performance map entry"); + return NULL; + } + return Py_BuildValue("i", ret); +} + +static PyObject * +perf_map_state_teardown(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) +{ + PyUnstable_PerfMapState_Fini(); + Py_RETURN_NONE; +} + static PyObject * iframe_getcode(PyObject *self, PyObject *frame) { @@ -815,6 +840,8 @@ static PyMethodDef module_functions[] = { _TESTINTERNALCAPI_ASSEMBLE_CODE_OBJECT_METHODDEF {"get_interp_settings", get_interp_settings, METH_VARARGS, NULL}, {"clear_extension", clear_extension, METH_VARARGS, NULL}, + {"write_perf_map_entry", write_perf_map_entry, METH_VARARGS}, + {"perf_map_state_teardown", perf_map_state_teardown, METH_NOARGS}, {"iframe_getcode", iframe_getcode, METH_O, NULL}, {"iframe_getline", iframe_getline, METH_O, NULL}, {"iframe_getlasti", iframe_getlasti, METH_O, NULL}, diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c index 3b183280e1f..6a6f223d095 100644 --- a/Python/perf_trampoline.c +++ b/Python/perf_trampoline.c @@ -193,75 +193,33 @@ typedef struct trampoline_api_st trampoline_api_t; #define trampoline_api _PyRuntime.ceval.perf.trampoline_api #define perf_map_file _PyRuntime.ceval.perf.map_file -static void * -perf_map_get_file(void) -{ - if (perf_map_file) { - return perf_map_file; - } - char filename[100]; - pid_t pid = getpid(); - // Location and file name of perf map is hard-coded in perf tool. - // Use exclusive create flag wit nofollow to prevent symlink attacks. - int flags = O_WRONLY | O_CREAT | O_EXCL | O_NOFOLLOW | O_CLOEXEC; - snprintf(filename, sizeof(filename) - 1, "/tmp/perf-%jd.map", - (intmax_t)pid); - int fd = open(filename, flags, 0600); - if (fd == -1) { - perf_status = PERF_STATUS_FAILED; - PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename); - return NULL; - } - perf_map_file = fdopen(fd, "w"); - if (!perf_map_file) { - perf_status = PERF_STATUS_FAILED; - PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename); - close(fd); - return NULL; - } - return perf_map_file; -} - -static int -perf_map_close(void *state) -{ - FILE *fp = (FILE *)state; - int ret = 0; - if (fp) { - ret = fclose(fp); - } - perf_map_file = NULL; - perf_status = PERF_STATUS_NO_INIT; - return ret; -} static void perf_map_write_entry(void *state, const void *code_addr, unsigned int code_size, PyCodeObject *co) { - assert(state != NULL); - FILE *method_file = (FILE *)state; - const char *entry = PyUnicode_AsUTF8(co->co_qualname); - if (entry == NULL) { - _PyErr_WriteUnraisableMsg("Failed to get qualname from code object", - NULL); + const char *entry = ""; + if (co->co_qualname != NULL) { + entry = PyUnicode_AsUTF8(co->co_qualname); + } + const char *filename = ""; + if (co->co_filename != NULL) { + filename = PyUnicode_AsUTF8(co->co_filename); + } + size_t perf_map_entry_size = snprintf(NULL, 0, "py::%s:%s", entry, filename) + 1; + char* perf_map_entry = (char*) PyMem_RawMalloc(perf_map_entry_size); + if (perf_map_entry == NULL) { return; } - const char *filename = PyUnicode_AsUTF8(co->co_filename); - if (filename == NULL) { - _PyErr_WriteUnraisableMsg("Failed to get filename from code object", - NULL); - return; - } - fprintf(method_file, "%" PRIxPTR " %x py::%s:%s\n", (uintptr_t) code_addr, code_size, entry, - filename); - fflush(method_file); + snprintf(perf_map_entry, perf_map_entry_size, "py::%s:%s", entry, filename); + PyUnstable_WritePerfMapEntry(code_addr, code_size, perf_map_entry); + PyMem_RawFree(perf_map_entry); } _PyPerf_Callbacks _Py_perfmap_callbacks = { - &perf_map_get_file, + NULL, &perf_map_write_entry, - &perf_map_close + NULL, }; static int @@ -465,13 +423,6 @@ _PyPerfTrampoline_Init(int activate) if (new_code_arena() < 0) { return -1; } - if (trampoline_api.state == NULL) { - void *state = trampoline_api.init_state(); - if (state == NULL) { - return -1; - } - trampoline_api.state = state; - } extra_code_index = _PyEval_RequestCodeExtraIndex(NULL); if (extra_code_index == -1) { return -1; @@ -491,10 +442,6 @@ _PyPerfTrampoline_Fini(void) tstate->interp->eval_frame = NULL; } free_code_arenas(); - if (trampoline_api.state != NULL) { - trampoline_api.free_state(trampoline_api.state); - trampoline_api.state = NULL; - } extra_code_index = -1; #endif return 0; @@ -507,6 +454,7 @@ _PyPerfTrampoline_AfterFork_Child(void) // Restart trampoline in file in child. int was_active = _PyIsPerfTrampolineActive(); _PyPerfTrampoline_Fini(); + PyUnstable_PerfMapState_Fini(); if (was_active) { _PyPerfTrampoline_Init(1); } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 93ad1fe0980..06c43459624 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1775,6 +1775,7 @@ Py_FinalizeEx(void) */ _PyAtExit_Call(tstate->interp); + PyUnstable_PerfMapState_Fini(); /* Copy the core config, PyInterpreterState_Delete() free the core config memory */ diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 7510be538ca..33147f012b6 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -52,6 +52,10 @@ extern const char *PyWin_DLLVersionString; #include #endif +#ifdef HAVE_FCNTL_H +#include +#endif + /*[clinic input] module sys [clinic start generated code]*/ @@ -2144,7 +2148,7 @@ sys_activate_stack_trampoline_impl(PyObject *module, const char *backend) if (strcmp(backend, "perf") == 0) { _PyPerf_Callbacks cur_cb; _PyPerfTrampoline_GetCallbacks(&cur_cb); - if (cur_cb.init_state != _Py_perfmap_callbacks.init_state) { + if (cur_cb.write_state != _Py_perfmap_callbacks.write_state) { if (_PyPerfTrampoline_SetCallbacks(&_Py_perfmap_callbacks) < 0 ) { PyErr_SetString(PyExc_ValueError, "can't activate perf trampoline"); return NULL; @@ -2240,6 +2244,80 @@ sys__getframemodulename_impl(PyObject *module, int depth) } +#ifdef __cplusplus +extern "C" { +#endif + +static PerfMapState perf_map_state; + +PyAPI_FUNC(int) PyUnstable_PerfMapState_Init(void) { +#ifndef MS_WINDOWS + char filename[100]; + pid_t pid = getpid(); + // Use nofollow flag to prevent symlink attacks. + int flags = O_WRONLY | O_CREAT | O_APPEND | O_NOFOLLOW | O_CLOEXEC; + snprintf(filename, sizeof(filename) - 1, "/tmp/perf-%jd.map", + (intmax_t)pid); + int fd = open(filename, flags, 0600); + if (fd == -1) { + return -1; + } + else{ + perf_map_state.perf_map = fdopen(fd, "a"); + if (perf_map_state.perf_map == NULL) { + close(fd); + return -1; + } + } + perf_map_state.map_lock = PyThread_allocate_lock(); + if (perf_map_state.map_lock == NULL) { + fclose(perf_map_state.perf_map); + return -2; + } +#endif + return 0; +} + +PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry( + const void *code_addr, + unsigned int code_size, + const char *entry_name +) { +#ifndef MS_WINDOWS + if (perf_map_state.perf_map == NULL) { + int ret = PyUnstable_PerfMapState_Init(); + if(ret != 0){ + return ret; + } + } + PyThread_acquire_lock(perf_map_state.map_lock, 1); + fprintf(perf_map_state.perf_map, "%" PRIxPTR " %x %s\n", (uintptr_t) code_addr, code_size, entry_name); + fflush(perf_map_state.perf_map); + PyThread_release_lock(perf_map_state.map_lock); +#endif + return 0; +} + +PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void) { +#ifndef MS_WINDOWS + if (perf_map_state.perf_map != NULL) { + // close the file + PyThread_acquire_lock(perf_map_state.map_lock, 1); + fclose(perf_map_state.perf_map); + PyThread_release_lock(perf_map_state.map_lock); + + // clean up the lock and state + PyThread_free_lock(perf_map_state.map_lock); + perf_map_state.perf_map = NULL; + } +#endif +} + +#ifdef __cplusplus +} +#endif + + static PyMethodDef sys_methods[] = { /* Might as well keep this in alphabetic order */ SYS_ADDAUDITHOOK_METHODDEF diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 7ba116dcb17..e9056aed5dc 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -356,6 +356,7 @@ Python/pystate.c - initial - Python/specialize.c - adaptive_opcodes - Python/specialize.c - cache_requirements - Python/stdlib_module_names.h - _Py_stdlib_module_names - +Python/sysmodule.c - perf_map_state - Python/sysmodule.c - _PySys_ImplCacheTag - Python/sysmodule.c - _PySys_ImplName - Python/sysmodule.c - whatstrings -