gh-103295: expose API for writing perf map files (#103546)

Co-authored-by: Aniket Panse <aniketpanse@fb.com>
Co-authored-by: Gregory P. Smith <greg@krypto.org>
Co-authored-by: Carl Meyer <carl@oddbird.net>
This commit is contained in:
gsallam 2023-05-21 03:12:24 -07:00 committed by GitHub
parent 2e91c7e626
commit be0c106789
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 214 additions and 73 deletions

50
Doc/c-api/perfmaps.rst Normal file
View File

@ -0,0 +1,50 @@
.. highlight:: c
.. _perfmaps:
Support for Perf Maps
----------------------
On supported platforms (as of this writing, only Linux), the runtime can take
advantage of *perf map files* to make Python functions visible to an external
profiling tool (such as `perf <https://perf.wiki.kernel.org/index.php/Main_Page>`_).
A running process may create a file in the ``/tmp`` directory, which contains entries
that can map a section of executable code to a name. This interface is described in the
`documentation of the Linux Perf tool <https://git.kernel.org/pub/scm/linux/
kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt>`_.
In Python, these helper APIs can be used by libraries and features that rely
on generating machine code on the fly.
Note that holding the Global Interpreter Lock (GIL) is not required for these APIs.
.. c:function:: int PyUnstable_PerfMapState_Init(void)
Open the ``/tmp/perf-$pid.map`` file, unless it's already opened, and create
a lock to ensure thread-safe writes to the file (provided the writes are
done through :c:func:`PyUnstable_WritePerfMapEntry`). Normally, there's no need
to call this explicitly; just use :c:func:`PyUnstable_WritePerfMapEntry`
and it will initialize the state on first call.
Returns ``0`` on success, ``-1`` on failure to create/open the perf map file,
or ``-2`` on failure to create a lock. Check ``errno`` for more information
about the cause of a failure.
.. c:function:: int PyUnstable_WritePerfMapEntry(const void *code_addr, unsigned int code_size, const char *entry_name)
Write one single entry to the ``/tmp/perf-$pid.map`` file. This function is
thread safe. Here is what an example entry looks like::
# address size name
7f3529fcf759 b py::bar:/run/t.py
Will call :c:func:`PyUnstable_PerfMapState_Init` before writing the entry, if
the perf map file is not already opened. Returns ``0`` on success, or the
same error codes as :c:func:`PyUnstable_PerfMapState_Init` on failure.
.. c:function:: void PyUnstable_PerfMapState_Fini(void)
Close the perf map file opened by :c:func:`PyUnstable_PerfMapState_Init`.
This is called by the runtime itself during interpreter shut-down. In
general, there shouldn't be a reason to explicitly call this, except to
handle specific scenarios such as forking.

View File

@ -19,3 +19,4 @@ and parsing function arguments and constructing Python values from C values.
conversion.rst conversion.rst
reflection.rst reflection.rst
codec.rst codec.rst
perfmaps.rst

View File

@ -24,7 +24,7 @@ functions to appear in the output of the ``perf`` profiler. When this mode is
enabled, the interpreter will interpose a small piece of code compiled on the enabled, the interpreter will interpose a small piece of code compiled on the
fly before the execution of every Python function and it will teach ``perf`` the fly before the execution of every Python function and it will teach ``perf`` the
relationship between this piece of code and the associated Python function using relationship between this piece of code and the associated Python function using
`perf map files`_. :doc:`perf map files <../c-api/perfmaps>`.
.. note:: .. note::
@ -206,5 +206,3 @@ You can check if your system has been compiled with this flag by running::
If you don't see any output it means that your interpreter has not been compiled with If you don't see any output it means that your interpreter has not been compiled with
frame pointers and therefore it may not be able to show Python functions in the output frame pointers and therefore it may not be able to show Python functions in the output
of ``perf``. of ``perf``.
.. _perf map files: https://github.com/torvalds/linux/blob/0513e464f9007b70b96740271a948ca5ab6e7dd7/tools/perf/Documentation/jit-interface.txt

View File

@ -29,6 +29,19 @@ Py_DEPRECATED(3.11) PyAPI_FUNC(int) PySys_HasWarnOptions(void);
Py_DEPRECATED(3.11) PyAPI_FUNC(void) PySys_AddXOption(const wchar_t *); Py_DEPRECATED(3.11) PyAPI_FUNC(void) PySys_AddXOption(const wchar_t *);
PyAPI_FUNC(PyObject *) PySys_GetXOptions(void); PyAPI_FUNC(PyObject *) PySys_GetXOptions(void);
#if !defined(Py_LIMITED_API)
typedef struct {
FILE* perf_map;
PyThread_type_lock map_lock;
} PerfMapState;
PyAPI_FUNC(int) PyUnstable_PerfMapState_Init(void);
PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(const void *code_addr, unsigned int code_size, const char *entry_name);
PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void);
#endif
#ifndef Py_LIMITED_API #ifndef Py_LIMITED_API
# define Py_CPYTHON_SYSMODULE_H # define Py_CPYTHON_SYSMODULE_H
# include "cpython/sysmodule.h" # include "cpython/sysmodule.h"

19
Lib/test/test_perfmaps.py Normal file
View File

@ -0,0 +1,19 @@
import os
import sys
import unittest
from _testinternalcapi import perf_map_state_teardown, write_perf_map_entry
if sys.platform != 'linux':
raise unittest.SkipTest('Linux only')
class TestPerfMapWriting(unittest.TestCase):
def test_write_perf_map_entry(self):
self.assertEqual(write_perf_map_entry(0x1234, 5678, "entry1"), 0)
self.assertEqual(write_perf_map_entry(0x2345, 6789, "entry2"), 0)
with open(f"/tmp/perf-{os.getpid()}.map") as f:
perf_file_contents = f.read()
self.assertIn("1234 162e entry1", perf_file_contents)
self.assertIn("2345 1a85 entry2", perf_file_contents)
perf_map_state_teardown()

View File

@ -0,0 +1,5 @@
Introduced :c:func:`PyUnstable_WritePerfMapEntry`, :c:func:`PyUnstable_PerfMapState_Init` and
:c:func:`PyUnstable_PerfMapState_Fini`. These allow extension modules (JIT compilers in
particular) to write to perf-map files in a thread safe manner. The
:doc:`../howto/perf_profiling` also uses these APIs to write
entries in the perf-map file.

View File

@ -759,6 +759,31 @@ clear_extension(PyObject *self, PyObject *args)
Py_RETURN_NONE; Py_RETURN_NONE;
} }
static PyObject *
write_perf_map_entry(PyObject *self, PyObject *args)
{
const void *code_addr;
unsigned int code_size;
const char *entry_name;
if (!PyArg_ParseTuple(args, "KIs", &code_addr, &code_size, &entry_name))
return NULL;
int ret = PyUnstable_WritePerfMapEntry(code_addr, code_size, entry_name);
if (ret == -1) {
PyErr_SetString(PyExc_OSError, "Failed to write performance map entry");
return NULL;
}
return Py_BuildValue("i", ret);
}
static PyObject *
perf_map_state_teardown(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored))
{
PyUnstable_PerfMapState_Fini();
Py_RETURN_NONE;
}
static PyObject * static PyObject *
iframe_getcode(PyObject *self, PyObject *frame) iframe_getcode(PyObject *self, PyObject *frame)
{ {
@ -815,6 +840,8 @@ static PyMethodDef module_functions[] = {
_TESTINTERNALCAPI_ASSEMBLE_CODE_OBJECT_METHODDEF _TESTINTERNALCAPI_ASSEMBLE_CODE_OBJECT_METHODDEF
{"get_interp_settings", get_interp_settings, METH_VARARGS, NULL}, {"get_interp_settings", get_interp_settings, METH_VARARGS, NULL},
{"clear_extension", clear_extension, METH_VARARGS, NULL}, {"clear_extension", clear_extension, METH_VARARGS, NULL},
{"write_perf_map_entry", write_perf_map_entry, METH_VARARGS},
{"perf_map_state_teardown", perf_map_state_teardown, METH_NOARGS},
{"iframe_getcode", iframe_getcode, METH_O, NULL}, {"iframe_getcode", iframe_getcode, METH_O, NULL},
{"iframe_getline", iframe_getline, METH_O, NULL}, {"iframe_getline", iframe_getline, METH_O, NULL},
{"iframe_getlasti", iframe_getlasti, METH_O, NULL}, {"iframe_getlasti", iframe_getlasti, METH_O, NULL},

View File

@ -193,75 +193,33 @@ typedef struct trampoline_api_st trampoline_api_t;
#define trampoline_api _PyRuntime.ceval.perf.trampoline_api #define trampoline_api _PyRuntime.ceval.perf.trampoline_api
#define perf_map_file _PyRuntime.ceval.perf.map_file #define perf_map_file _PyRuntime.ceval.perf.map_file
static void *
perf_map_get_file(void)
{
if (perf_map_file) {
return perf_map_file;
}
char filename[100];
pid_t pid = getpid();
// Location and file name of perf map is hard-coded in perf tool.
// Use exclusive create flag wit nofollow to prevent symlink attacks.
int flags = O_WRONLY | O_CREAT | O_EXCL | O_NOFOLLOW | O_CLOEXEC;
snprintf(filename, sizeof(filename) - 1, "/tmp/perf-%jd.map",
(intmax_t)pid);
int fd = open(filename, flags, 0600);
if (fd == -1) {
perf_status = PERF_STATUS_FAILED;
PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
return NULL;
}
perf_map_file = fdopen(fd, "w");
if (!perf_map_file) {
perf_status = PERF_STATUS_FAILED;
PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
close(fd);
return NULL;
}
return perf_map_file;
}
static int
perf_map_close(void *state)
{
FILE *fp = (FILE *)state;
int ret = 0;
if (fp) {
ret = fclose(fp);
}
perf_map_file = NULL;
perf_status = PERF_STATUS_NO_INIT;
return ret;
}
static void static void
perf_map_write_entry(void *state, const void *code_addr, perf_map_write_entry(void *state, const void *code_addr,
unsigned int code_size, PyCodeObject *co) unsigned int code_size, PyCodeObject *co)
{ {
assert(state != NULL); const char *entry = "";
FILE *method_file = (FILE *)state; if (co->co_qualname != NULL) {
const char *entry = PyUnicode_AsUTF8(co->co_qualname); entry = PyUnicode_AsUTF8(co->co_qualname);
if (entry == NULL) { }
_PyErr_WriteUnraisableMsg("Failed to get qualname from code object", const char *filename = "";
NULL); if (co->co_filename != NULL) {
filename = PyUnicode_AsUTF8(co->co_filename);
}
size_t perf_map_entry_size = snprintf(NULL, 0, "py::%s:%s", entry, filename) + 1;
char* perf_map_entry = (char*) PyMem_RawMalloc(perf_map_entry_size);
if (perf_map_entry == NULL) {
return; return;
} }
const char *filename = PyUnicode_AsUTF8(co->co_filename); snprintf(perf_map_entry, perf_map_entry_size, "py::%s:%s", entry, filename);
if (filename == NULL) { PyUnstable_WritePerfMapEntry(code_addr, code_size, perf_map_entry);
_PyErr_WriteUnraisableMsg("Failed to get filename from code object", PyMem_RawFree(perf_map_entry);
NULL);
return;
}
fprintf(method_file, "%" PRIxPTR " %x py::%s:%s\n", (uintptr_t) code_addr, code_size, entry,
filename);
fflush(method_file);
} }
_PyPerf_Callbacks _Py_perfmap_callbacks = { _PyPerf_Callbacks _Py_perfmap_callbacks = {
&perf_map_get_file, NULL,
&perf_map_write_entry, &perf_map_write_entry,
&perf_map_close NULL,
}; };
static int static int
@ -465,13 +423,6 @@ _PyPerfTrampoline_Init(int activate)
if (new_code_arena() < 0) { if (new_code_arena() < 0) {
return -1; return -1;
} }
if (trampoline_api.state == NULL) {
void *state = trampoline_api.init_state();
if (state == NULL) {
return -1;
}
trampoline_api.state = state;
}
extra_code_index = _PyEval_RequestCodeExtraIndex(NULL); extra_code_index = _PyEval_RequestCodeExtraIndex(NULL);
if (extra_code_index == -1) { if (extra_code_index == -1) {
return -1; return -1;
@ -491,10 +442,6 @@ _PyPerfTrampoline_Fini(void)
tstate->interp->eval_frame = NULL; tstate->interp->eval_frame = NULL;
} }
free_code_arenas(); free_code_arenas();
if (trampoline_api.state != NULL) {
trampoline_api.free_state(trampoline_api.state);
trampoline_api.state = NULL;
}
extra_code_index = -1; extra_code_index = -1;
#endif #endif
return 0; return 0;
@ -507,6 +454,7 @@ _PyPerfTrampoline_AfterFork_Child(void)
// Restart trampoline in file in child. // Restart trampoline in file in child.
int was_active = _PyIsPerfTrampolineActive(); int was_active = _PyIsPerfTrampolineActive();
_PyPerfTrampoline_Fini(); _PyPerfTrampoline_Fini();
PyUnstable_PerfMapState_Fini();
if (was_active) { if (was_active) {
_PyPerfTrampoline_Init(1); _PyPerfTrampoline_Init(1);
} }

View File

@ -1775,6 +1775,7 @@ Py_FinalizeEx(void)
*/ */
_PyAtExit_Call(tstate->interp); _PyAtExit_Call(tstate->interp);
PyUnstable_PerfMapState_Fini();
/* Copy the core config, PyInterpreterState_Delete() free /* Copy the core config, PyInterpreterState_Delete() free
the core config memory */ the core config memory */

View File

@ -52,6 +52,10 @@ extern const char *PyWin_DLLVersionString;
#include <emscripten.h> #include <emscripten.h>
#endif #endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
/*[clinic input] /*[clinic input]
module sys module sys
[clinic start generated code]*/ [clinic start generated code]*/
@ -2144,7 +2148,7 @@ sys_activate_stack_trampoline_impl(PyObject *module, const char *backend)
if (strcmp(backend, "perf") == 0) { if (strcmp(backend, "perf") == 0) {
_PyPerf_Callbacks cur_cb; _PyPerf_Callbacks cur_cb;
_PyPerfTrampoline_GetCallbacks(&cur_cb); _PyPerfTrampoline_GetCallbacks(&cur_cb);
if (cur_cb.init_state != _Py_perfmap_callbacks.init_state) { if (cur_cb.write_state != _Py_perfmap_callbacks.write_state) {
if (_PyPerfTrampoline_SetCallbacks(&_Py_perfmap_callbacks) < 0 ) { if (_PyPerfTrampoline_SetCallbacks(&_Py_perfmap_callbacks) < 0 ) {
PyErr_SetString(PyExc_ValueError, "can't activate perf trampoline"); PyErr_SetString(PyExc_ValueError, "can't activate perf trampoline");
return NULL; return NULL;
@ -2240,6 +2244,80 @@ sys__getframemodulename_impl(PyObject *module, int depth)
} }
#ifdef __cplusplus
extern "C" {
#endif
static PerfMapState perf_map_state;
PyAPI_FUNC(int) PyUnstable_PerfMapState_Init(void) {
#ifndef MS_WINDOWS
char filename[100];
pid_t pid = getpid();
// Use nofollow flag to prevent symlink attacks.
int flags = O_WRONLY | O_CREAT | O_APPEND | O_NOFOLLOW | O_CLOEXEC;
snprintf(filename, sizeof(filename) - 1, "/tmp/perf-%jd.map",
(intmax_t)pid);
int fd = open(filename, flags, 0600);
if (fd == -1) {
return -1;
}
else{
perf_map_state.perf_map = fdopen(fd, "a");
if (perf_map_state.perf_map == NULL) {
close(fd);
return -1;
}
}
perf_map_state.map_lock = PyThread_allocate_lock();
if (perf_map_state.map_lock == NULL) {
fclose(perf_map_state.perf_map);
return -2;
}
#endif
return 0;
}
PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(
const void *code_addr,
unsigned int code_size,
const char *entry_name
) {
#ifndef MS_WINDOWS
if (perf_map_state.perf_map == NULL) {
int ret = PyUnstable_PerfMapState_Init();
if(ret != 0){
return ret;
}
}
PyThread_acquire_lock(perf_map_state.map_lock, 1);
fprintf(perf_map_state.perf_map, "%" PRIxPTR " %x %s\n", (uintptr_t) code_addr, code_size, entry_name);
fflush(perf_map_state.perf_map);
PyThread_release_lock(perf_map_state.map_lock);
#endif
return 0;
}
PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void) {
#ifndef MS_WINDOWS
if (perf_map_state.perf_map != NULL) {
// close the file
PyThread_acquire_lock(perf_map_state.map_lock, 1);
fclose(perf_map_state.perf_map);
PyThread_release_lock(perf_map_state.map_lock);
// clean up the lock and state
PyThread_free_lock(perf_map_state.map_lock);
perf_map_state.perf_map = NULL;
}
#endif
}
#ifdef __cplusplus
}
#endif
static PyMethodDef sys_methods[] = { static PyMethodDef sys_methods[] = {
/* Might as well keep this in alphabetic order */ /* Might as well keep this in alphabetic order */
SYS_ADDAUDITHOOK_METHODDEF SYS_ADDAUDITHOOK_METHODDEF

View File

@ -356,6 +356,7 @@ Python/pystate.c - initial -
Python/specialize.c - adaptive_opcodes - Python/specialize.c - adaptive_opcodes -
Python/specialize.c - cache_requirements - Python/specialize.c - cache_requirements -
Python/stdlib_module_names.h - _Py_stdlib_module_names - Python/stdlib_module_names.h - _Py_stdlib_module_names -
Python/sysmodule.c - perf_map_state -
Python/sysmodule.c - _PySys_ImplCacheTag - Python/sysmodule.c - _PySys_ImplCacheTag -
Python/sysmodule.c - _PySys_ImplName - Python/sysmodule.c - _PySys_ImplName -
Python/sysmodule.c - whatstrings - Python/sysmodule.c - whatstrings -

Can't render this file because it has a wrong number of fields in line 4.