mirror of https://github.com/python/cpython
gh-103295: expose API for writing perf map files (#103546)
Co-authored-by: Aniket Panse <aniketpanse@fb.com> Co-authored-by: Gregory P. Smith <greg@krypto.org> Co-authored-by: Carl Meyer <carl@oddbird.net>
This commit is contained in:
parent
2e91c7e626
commit
be0c106789
|
@ -0,0 +1,50 @@
|
|||
.. highlight:: c
|
||||
|
||||
.. _perfmaps:
|
||||
|
||||
Support for Perf Maps
|
||||
----------------------
|
||||
|
||||
On supported platforms (as of this writing, only Linux), the runtime can take
|
||||
advantage of *perf map files* to make Python functions visible to an external
|
||||
profiling tool (such as `perf <https://perf.wiki.kernel.org/index.php/Main_Page>`_).
|
||||
A running process may create a file in the ``/tmp`` directory, which contains entries
|
||||
that can map a section of executable code to a name. This interface is described in the
|
||||
`documentation of the Linux Perf tool <https://git.kernel.org/pub/scm/linux/
|
||||
kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt>`_.
|
||||
|
||||
In Python, these helper APIs can be used by libraries and features that rely
|
||||
on generating machine code on the fly.
|
||||
|
||||
Note that holding the Global Interpreter Lock (GIL) is not required for these APIs.
|
||||
|
||||
.. c:function:: int PyUnstable_PerfMapState_Init(void)
|
||||
|
||||
Open the ``/tmp/perf-$pid.map`` file, unless it's already opened, and create
|
||||
a lock to ensure thread-safe writes to the file (provided the writes are
|
||||
done through :c:func:`PyUnstable_WritePerfMapEntry`). Normally, there's no need
|
||||
to call this explicitly; just use :c:func:`PyUnstable_WritePerfMapEntry`
|
||||
and it will initialize the state on first call.
|
||||
|
||||
Returns ``0`` on success, ``-1`` on failure to create/open the perf map file,
|
||||
or ``-2`` on failure to create a lock. Check ``errno`` for more information
|
||||
about the cause of a failure.
|
||||
|
||||
.. c:function:: int PyUnstable_WritePerfMapEntry(const void *code_addr, unsigned int code_size, const char *entry_name)
|
||||
|
||||
Write one single entry to the ``/tmp/perf-$pid.map`` file. This function is
|
||||
thread safe. Here is what an example entry looks like::
|
||||
|
||||
# address size name
|
||||
7f3529fcf759 b py::bar:/run/t.py
|
||||
|
||||
Will call :c:func:`PyUnstable_PerfMapState_Init` before writing the entry, if
|
||||
the perf map file is not already opened. Returns ``0`` on success, or the
|
||||
same error codes as :c:func:`PyUnstable_PerfMapState_Init` on failure.
|
||||
|
||||
.. c:function:: void PyUnstable_PerfMapState_Fini(void)
|
||||
|
||||
Close the perf map file opened by :c:func:`PyUnstable_PerfMapState_Init`.
|
||||
This is called by the runtime itself during interpreter shut-down. In
|
||||
general, there shouldn't be a reason to explicitly call this, except to
|
||||
handle specific scenarios such as forking.
|
|
@ -19,3 +19,4 @@ and parsing function arguments and constructing Python values from C values.
|
|||
conversion.rst
|
||||
reflection.rst
|
||||
codec.rst
|
||||
perfmaps.rst
|
||||
|
|
|
@ -24,7 +24,7 @@ functions to appear in the output of the ``perf`` profiler. When this mode is
|
|||
enabled, the interpreter will interpose a small piece of code compiled on the
|
||||
fly before the execution of every Python function and it will teach ``perf`` the
|
||||
relationship between this piece of code and the associated Python function using
|
||||
`perf map files`_.
|
||||
:doc:`perf map files <../c-api/perfmaps>`.
|
||||
|
||||
.. note::
|
||||
|
||||
|
@ -206,5 +206,3 @@ You can check if your system has been compiled with this flag by running::
|
|||
If you don't see any output it means that your interpreter has not been compiled with
|
||||
frame pointers and therefore it may not be able to show Python functions in the output
|
||||
of ``perf``.
|
||||
|
||||
.. _perf map files: https://github.com/torvalds/linux/blob/0513e464f9007b70b96740271a948ca5ab6e7dd7/tools/perf/Documentation/jit-interface.txt
|
||||
|
|
|
@ -29,6 +29,19 @@ Py_DEPRECATED(3.11) PyAPI_FUNC(int) PySys_HasWarnOptions(void);
|
|||
Py_DEPRECATED(3.11) PyAPI_FUNC(void) PySys_AddXOption(const wchar_t *);
|
||||
PyAPI_FUNC(PyObject *) PySys_GetXOptions(void);
|
||||
|
||||
#if !defined(Py_LIMITED_API)
|
||||
typedef struct {
|
||||
FILE* perf_map;
|
||||
PyThread_type_lock map_lock;
|
||||
} PerfMapState;
|
||||
|
||||
PyAPI_FUNC(int) PyUnstable_PerfMapState_Init(void);
|
||||
|
||||
PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(const void *code_addr, unsigned int code_size, const char *entry_name);
|
||||
|
||||
PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void);
|
||||
#endif
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
# define Py_CPYTHON_SYSMODULE_H
|
||||
# include "cpython/sysmodule.h"
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
from _testinternalcapi import perf_map_state_teardown, write_perf_map_entry
|
||||
|
||||
if sys.platform != 'linux':
|
||||
raise unittest.SkipTest('Linux only')
|
||||
|
||||
|
||||
class TestPerfMapWriting(unittest.TestCase):
|
||||
def test_write_perf_map_entry(self):
|
||||
self.assertEqual(write_perf_map_entry(0x1234, 5678, "entry1"), 0)
|
||||
self.assertEqual(write_perf_map_entry(0x2345, 6789, "entry2"), 0)
|
||||
with open(f"/tmp/perf-{os.getpid()}.map") as f:
|
||||
perf_file_contents = f.read()
|
||||
self.assertIn("1234 162e entry1", perf_file_contents)
|
||||
self.assertIn("2345 1a85 entry2", perf_file_contents)
|
||||
perf_map_state_teardown()
|
|
@ -0,0 +1,5 @@
|
|||
Introduced :c:func:`PyUnstable_WritePerfMapEntry`, :c:func:`PyUnstable_PerfMapState_Init` and
|
||||
:c:func:`PyUnstable_PerfMapState_Fini`. These allow extension modules (JIT compilers in
|
||||
particular) to write to perf-map files in a thread safe manner. The
|
||||
:doc:`../howto/perf_profiling` also uses these APIs to write
|
||||
entries in the perf-map file.
|
|
@ -759,6 +759,31 @@ clear_extension(PyObject *self, PyObject *args)
|
|||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
write_perf_map_entry(PyObject *self, PyObject *args)
|
||||
{
|
||||
const void *code_addr;
|
||||
unsigned int code_size;
|
||||
const char *entry_name;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "KIs", &code_addr, &code_size, &entry_name))
|
||||
return NULL;
|
||||
|
||||
int ret = PyUnstable_WritePerfMapEntry(code_addr, code_size, entry_name);
|
||||
if (ret == -1) {
|
||||
PyErr_SetString(PyExc_OSError, "Failed to write performance map entry");
|
||||
return NULL;
|
||||
}
|
||||
return Py_BuildValue("i", ret);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
perf_map_state_teardown(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored))
|
||||
{
|
||||
PyUnstable_PerfMapState_Fini();
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
iframe_getcode(PyObject *self, PyObject *frame)
|
||||
{
|
||||
|
@ -815,6 +840,8 @@ static PyMethodDef module_functions[] = {
|
|||
_TESTINTERNALCAPI_ASSEMBLE_CODE_OBJECT_METHODDEF
|
||||
{"get_interp_settings", get_interp_settings, METH_VARARGS, NULL},
|
||||
{"clear_extension", clear_extension, METH_VARARGS, NULL},
|
||||
{"write_perf_map_entry", write_perf_map_entry, METH_VARARGS},
|
||||
{"perf_map_state_teardown", perf_map_state_teardown, METH_NOARGS},
|
||||
{"iframe_getcode", iframe_getcode, METH_O, NULL},
|
||||
{"iframe_getline", iframe_getline, METH_O, NULL},
|
||||
{"iframe_getlasti", iframe_getlasti, METH_O, NULL},
|
||||
|
|
|
@ -193,75 +193,33 @@ typedef struct trampoline_api_st trampoline_api_t;
|
|||
#define trampoline_api _PyRuntime.ceval.perf.trampoline_api
|
||||
#define perf_map_file _PyRuntime.ceval.perf.map_file
|
||||
|
||||
static void *
|
||||
perf_map_get_file(void)
|
||||
{
|
||||
if (perf_map_file) {
|
||||
return perf_map_file;
|
||||
}
|
||||
char filename[100];
|
||||
pid_t pid = getpid();
|
||||
// Location and file name of perf map is hard-coded in perf tool.
|
||||
// Use exclusive create flag wit nofollow to prevent symlink attacks.
|
||||
int flags = O_WRONLY | O_CREAT | O_EXCL | O_NOFOLLOW | O_CLOEXEC;
|
||||
snprintf(filename, sizeof(filename) - 1, "/tmp/perf-%jd.map",
|
||||
(intmax_t)pid);
|
||||
int fd = open(filename, flags, 0600);
|
||||
if (fd == -1) {
|
||||
perf_status = PERF_STATUS_FAILED;
|
||||
PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
|
||||
return NULL;
|
||||
}
|
||||
perf_map_file = fdopen(fd, "w");
|
||||
if (!perf_map_file) {
|
||||
perf_status = PERF_STATUS_FAILED;
|
||||
PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
|
||||
close(fd);
|
||||
return NULL;
|
||||
}
|
||||
return perf_map_file;
|
||||
}
|
||||
|
||||
static int
|
||||
perf_map_close(void *state)
|
||||
{
|
||||
FILE *fp = (FILE *)state;
|
||||
int ret = 0;
|
||||
if (fp) {
|
||||
ret = fclose(fp);
|
||||
}
|
||||
perf_map_file = NULL;
|
||||
perf_status = PERF_STATUS_NO_INIT;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
perf_map_write_entry(void *state, const void *code_addr,
|
||||
unsigned int code_size, PyCodeObject *co)
|
||||
{
|
||||
assert(state != NULL);
|
||||
FILE *method_file = (FILE *)state;
|
||||
const char *entry = PyUnicode_AsUTF8(co->co_qualname);
|
||||
if (entry == NULL) {
|
||||
_PyErr_WriteUnraisableMsg("Failed to get qualname from code object",
|
||||
NULL);
|
||||
const char *entry = "";
|
||||
if (co->co_qualname != NULL) {
|
||||
entry = PyUnicode_AsUTF8(co->co_qualname);
|
||||
}
|
||||
const char *filename = "";
|
||||
if (co->co_filename != NULL) {
|
||||
filename = PyUnicode_AsUTF8(co->co_filename);
|
||||
}
|
||||
size_t perf_map_entry_size = snprintf(NULL, 0, "py::%s:%s", entry, filename) + 1;
|
||||
char* perf_map_entry = (char*) PyMem_RawMalloc(perf_map_entry_size);
|
||||
if (perf_map_entry == NULL) {
|
||||
return;
|
||||
}
|
||||
const char *filename = PyUnicode_AsUTF8(co->co_filename);
|
||||
if (filename == NULL) {
|
||||
_PyErr_WriteUnraisableMsg("Failed to get filename from code object",
|
||||
NULL);
|
||||
return;
|
||||
}
|
||||
fprintf(method_file, "%" PRIxPTR " %x py::%s:%s\n", (uintptr_t) code_addr, code_size, entry,
|
||||
filename);
|
||||
fflush(method_file);
|
||||
snprintf(perf_map_entry, perf_map_entry_size, "py::%s:%s", entry, filename);
|
||||
PyUnstable_WritePerfMapEntry(code_addr, code_size, perf_map_entry);
|
||||
PyMem_RawFree(perf_map_entry);
|
||||
}
|
||||
|
||||
_PyPerf_Callbacks _Py_perfmap_callbacks = {
|
||||
&perf_map_get_file,
|
||||
NULL,
|
||||
&perf_map_write_entry,
|
||||
&perf_map_close
|
||||
NULL,
|
||||
};
|
||||
|
||||
static int
|
||||
|
@ -465,13 +423,6 @@ _PyPerfTrampoline_Init(int activate)
|
|||
if (new_code_arena() < 0) {
|
||||
return -1;
|
||||
}
|
||||
if (trampoline_api.state == NULL) {
|
||||
void *state = trampoline_api.init_state();
|
||||
if (state == NULL) {
|
||||
return -1;
|
||||
}
|
||||
trampoline_api.state = state;
|
||||
}
|
||||
extra_code_index = _PyEval_RequestCodeExtraIndex(NULL);
|
||||
if (extra_code_index == -1) {
|
||||
return -1;
|
||||
|
@ -491,10 +442,6 @@ _PyPerfTrampoline_Fini(void)
|
|||
tstate->interp->eval_frame = NULL;
|
||||
}
|
||||
free_code_arenas();
|
||||
if (trampoline_api.state != NULL) {
|
||||
trampoline_api.free_state(trampoline_api.state);
|
||||
trampoline_api.state = NULL;
|
||||
}
|
||||
extra_code_index = -1;
|
||||
#endif
|
||||
return 0;
|
||||
|
@ -507,6 +454,7 @@ _PyPerfTrampoline_AfterFork_Child(void)
|
|||
// Restart trampoline in file in child.
|
||||
int was_active = _PyIsPerfTrampolineActive();
|
||||
_PyPerfTrampoline_Fini();
|
||||
PyUnstable_PerfMapState_Fini();
|
||||
if (was_active) {
|
||||
_PyPerfTrampoline_Init(1);
|
||||
}
|
||||
|
|
|
@ -1775,6 +1775,7 @@ Py_FinalizeEx(void)
|
|||
*/
|
||||
|
||||
_PyAtExit_Call(tstate->interp);
|
||||
PyUnstable_PerfMapState_Fini();
|
||||
|
||||
/* Copy the core config, PyInterpreterState_Delete() free
|
||||
the core config memory */
|
||||
|
|
|
@ -52,6 +52,10 @@ extern const char *PyWin_DLLVersionString;
|
|||
#include <emscripten.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_FCNTL_H
|
||||
#include <fcntl.h>
|
||||
#endif
|
||||
|
||||
/*[clinic input]
|
||||
module sys
|
||||
[clinic start generated code]*/
|
||||
|
@ -2144,7 +2148,7 @@ sys_activate_stack_trampoline_impl(PyObject *module, const char *backend)
|
|||
if (strcmp(backend, "perf") == 0) {
|
||||
_PyPerf_Callbacks cur_cb;
|
||||
_PyPerfTrampoline_GetCallbacks(&cur_cb);
|
||||
if (cur_cb.init_state != _Py_perfmap_callbacks.init_state) {
|
||||
if (cur_cb.write_state != _Py_perfmap_callbacks.write_state) {
|
||||
if (_PyPerfTrampoline_SetCallbacks(&_Py_perfmap_callbacks) < 0 ) {
|
||||
PyErr_SetString(PyExc_ValueError, "can't activate perf trampoline");
|
||||
return NULL;
|
||||
|
@ -2240,6 +2244,80 @@ sys__getframemodulename_impl(PyObject *module, int depth)
|
|||
}
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static PerfMapState perf_map_state;
|
||||
|
||||
PyAPI_FUNC(int) PyUnstable_PerfMapState_Init(void) {
|
||||
#ifndef MS_WINDOWS
|
||||
char filename[100];
|
||||
pid_t pid = getpid();
|
||||
// Use nofollow flag to prevent symlink attacks.
|
||||
int flags = O_WRONLY | O_CREAT | O_APPEND | O_NOFOLLOW | O_CLOEXEC;
|
||||
snprintf(filename, sizeof(filename) - 1, "/tmp/perf-%jd.map",
|
||||
(intmax_t)pid);
|
||||
int fd = open(filename, flags, 0600);
|
||||
if (fd == -1) {
|
||||
return -1;
|
||||
}
|
||||
else{
|
||||
perf_map_state.perf_map = fdopen(fd, "a");
|
||||
if (perf_map_state.perf_map == NULL) {
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
perf_map_state.map_lock = PyThread_allocate_lock();
|
||||
if (perf_map_state.map_lock == NULL) {
|
||||
fclose(perf_map_state.perf_map);
|
||||
return -2;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(
|
||||
const void *code_addr,
|
||||
unsigned int code_size,
|
||||
const char *entry_name
|
||||
) {
|
||||
#ifndef MS_WINDOWS
|
||||
if (perf_map_state.perf_map == NULL) {
|
||||
int ret = PyUnstable_PerfMapState_Init();
|
||||
if(ret != 0){
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
PyThread_acquire_lock(perf_map_state.map_lock, 1);
|
||||
fprintf(perf_map_state.perf_map, "%" PRIxPTR " %x %s\n", (uintptr_t) code_addr, code_size, entry_name);
|
||||
fflush(perf_map_state.perf_map);
|
||||
PyThread_release_lock(perf_map_state.map_lock);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void) {
|
||||
#ifndef MS_WINDOWS
|
||||
if (perf_map_state.perf_map != NULL) {
|
||||
// close the file
|
||||
PyThread_acquire_lock(perf_map_state.map_lock, 1);
|
||||
fclose(perf_map_state.perf_map);
|
||||
PyThread_release_lock(perf_map_state.map_lock);
|
||||
|
||||
// clean up the lock and state
|
||||
PyThread_free_lock(perf_map_state.map_lock);
|
||||
perf_map_state.perf_map = NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static PyMethodDef sys_methods[] = {
|
||||
/* Might as well keep this in alphabetic order */
|
||||
SYS_ADDAUDITHOOK_METHODDEF
|
||||
|
|
|
@ -356,6 +356,7 @@ Python/pystate.c - initial -
|
|||
Python/specialize.c - adaptive_opcodes -
|
||||
Python/specialize.c - cache_requirements -
|
||||
Python/stdlib_module_names.h - _Py_stdlib_module_names -
|
||||
Python/sysmodule.c - perf_map_state -
|
||||
Python/sysmodule.c - _PySys_ImplCacheTag -
|
||||
Python/sysmodule.c - _PySys_ImplName -
|
||||
Python/sysmodule.c - whatstrings -
|
||||
|
|
Can't render this file because it has a wrong number of fields in line 4.
|
Loading…
Reference in New Issue