Issue #28618: Make hot functions using __attribute__((hot))
When Python is not compiled with PGO, the performance of Python on call_simple and call_method microbenchmarks depend highly on the code placement. In the worst case, the performance slowdown can be up to 70%. The GCC __attribute__((hot)) attribute helps to keep hot code close to reduce the risk of such major slowdown. This attribute is ignored when Python is compiled with PGO. The following functions are considered as hot according to statistics collected by perf record/perf report: * _PyEval_EvalFrameDefault() * call_function() * _PyFunction_FastCall() * PyFrame_New() * frame_dealloc() * PyErr_Occurred()
This commit is contained in:
parent
0cae609847
commit
c6944e7edc
|
@ -490,13 +490,36 @@ extern "C" {
|
||||||
* typedef int T1 Py_DEPRECATED(2.4);
|
* typedef int T1 Py_DEPRECATED(2.4);
|
||||||
* extern int x() Py_DEPRECATED(2.5);
|
* extern int x() Py_DEPRECATED(2.5);
|
||||||
*/
|
*/
|
||||||
#if defined(__GNUC__) && ((__GNUC__ >= 4) || \
|
#if defined(__GNUC__) \
|
||||||
(__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))
|
&& ((__GNUC__ >= 4) || (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))
|
||||||
#define Py_DEPRECATED(VERSION_UNUSED) __attribute__((__deprecated__))
|
#define Py_DEPRECATED(VERSION_UNUSED) __attribute__((__deprecated__))
|
||||||
#else
|
#else
|
||||||
#define Py_DEPRECATED(VERSION_UNUSED)
|
#define Py_DEPRECATED(VERSION_UNUSED)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/* Py_HOT_FUNCTION
|
||||||
|
* The hot attribute on a function is used to inform the compiler that the
|
||||||
|
* function is a hot spot of the compiled program. The function is optimized
|
||||||
|
* more aggressively and on many target it is placed into special subsection of
|
||||||
|
* the text section so all hot functions appears close together improving
|
||||||
|
* locality.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* int Py_HOT_FUNCTION x() { return 3; }
|
||||||
|
*
|
||||||
|
* Issue #28618: This attribute must not be abused, otherwise it can have a
|
||||||
|
* negative effect on performance. Only the functions were Python spend most of
|
||||||
|
* its time must use it. Use a profiler when running performance benchmark
|
||||||
|
* suite to find these functions.
|
||||||
|
*/
|
||||||
|
#if defined(__GNUC__) \
|
||||||
|
&& ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 3))
|
||||||
|
#define _Py_HOT_FUNCTION __attribute__((hot))
|
||||||
|
#else
|
||||||
|
#define _Py_HOT_FUNCTION
|
||||||
|
#endif
|
||||||
|
|
||||||
/**************************************************************************
|
/**************************************************************************
|
||||||
Prototypes that are missing from the standard include files on some systems
|
Prototypes that are missing from the standard include files on some systems
|
||||||
(and possibly only some versions of such systems.)
|
(and possibly only some versions of such systems.)
|
||||||
|
|
|
@ -409,7 +409,7 @@ static int numfree = 0; /* number of frames currently in free_list */
|
||||||
/* max value for numfree */
|
/* max value for numfree */
|
||||||
#define PyFrame_MAXFREELIST 200
|
#define PyFrame_MAXFREELIST 200
|
||||||
|
|
||||||
static void
|
static void _Py_HOT_FUNCTION
|
||||||
frame_dealloc(PyFrameObject *f)
|
frame_dealloc(PyFrameObject *f)
|
||||||
{
|
{
|
||||||
PyObject **p, **valuestack;
|
PyObject **p, **valuestack;
|
||||||
|
@ -605,7 +605,7 @@ int _PyFrame_Init()
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyFrameObject *
|
PyFrameObject* _Py_HOT_FUNCTION
|
||||||
PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals,
|
PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals,
|
||||||
PyObject *locals)
|
PyObject *locals)
|
||||||
{
|
{
|
||||||
|
|
|
@ -718,7 +718,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
|
||||||
return tstate->interp->eval_frame(f, throwflag);
|
return tstate->interp->eval_frame(f, throwflag);
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *
|
PyObject* _Py_HOT_FUNCTION
|
||||||
_PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
|
_PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
|
||||||
{
|
{
|
||||||
#ifdef DXPAIRS
|
#ifdef DXPAIRS
|
||||||
|
@ -4771,7 +4771,7 @@ if (tstate->use_tracing && tstate->c_profilefunc) { \
|
||||||
x = call; \
|
x = call; \
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject* _Py_HOT_FUNCTION
|
||||||
call_function(PyObject ***pp_stack, Py_ssize_t oparg, PyObject *kwnames)
|
call_function(PyObject ***pp_stack, Py_ssize_t oparg, PyObject *kwnames)
|
||||||
{
|
{
|
||||||
PyObject **pfunc = (*pp_stack) - oparg - 1;
|
PyObject **pfunc = (*pp_stack) - oparg - 1;
|
||||||
|
@ -4844,7 +4844,7 @@ call_function(PyObject ***pp_stack, Py_ssize_t oparg, PyObject *kwnames)
|
||||||
done before evaluating the frame.
|
done before evaluating the frame.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static PyObject*
|
static PyObject* _Py_HOT_FUNCTION
|
||||||
_PyFunction_FastCall(PyCodeObject *co, PyObject **args, Py_ssize_t nargs,
|
_PyFunction_FastCall(PyCodeObject *co, PyObject **args, Py_ssize_t nargs,
|
||||||
PyObject *globals)
|
PyObject *globals)
|
||||||
{
|
{
|
||||||
|
|
|
@ -158,7 +158,7 @@ PyErr_SetString(PyObject *exception, const char *string)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
PyObject *
|
PyObject* _Py_HOT_FUNCTION
|
||||||
PyErr_Occurred(void)
|
PyErr_Occurred(void)
|
||||||
{
|
{
|
||||||
PyThreadState *tstate = PyThreadState_GET();
|
PyThreadState *tstate = PyThreadState_GET();
|
||||||
|
|
Loading…
Reference in New Issue