diff --git a/Include/ceval.h b/Include/ceval.h index e1af801bc05..9bb145d14ea 100644 --- a/Include/ceval.h +++ b/Include/ceval.h @@ -48,6 +48,8 @@ PyAPI_FUNC(int) Py_GetRecursionLimit(void); PyAPI_FUNC(char *) PyEval_GetFuncName(PyObject *); PyAPI_FUNC(char *) PyEval_GetFuncDesc(PyObject *); +PyAPI_FUNC(PyObject *) PyEval_GetCallStats(PyObject *); + /* this used to be handled on a per-thread basis - now just two globals */ PyAPI_DATA(volatile int) _Py_Ticker; PyAPI_DATA(int) _Py_CheckInterval; diff --git a/Include/compile.h b/Include/compile.h index a462d77f79e..594d7df7a3a 100644 --- a/Include/compile.h +++ b/Include/compile.h @@ -34,6 +34,12 @@ typedef struct { #define CO_VARKEYWORDS 0x0008 #define CO_NESTED 0x0010 #define CO_GENERATOR 0x0020 +/* The CO_NOFREE flag is set if there are no free or cell variables. + This information is redundant, but it allows a single flag test + to determine whether there is any extra work to be done when the + call frame it setup. +*/ +#define CO_NOFREE 0x0040 /* XXX Temporary hack. Until generators are a permanent part of the language, we need a way for a code object to record that generators were *possible* when it was compiled. This is so code dynamically diff --git a/Misc/SpecialBuilds.txt b/Misc/SpecialBuilds.txt index f48817f6bc9..a4226c92ae6 100644 --- a/Misc/SpecialBuilds.txt +++ b/Misc/SpecialBuilds.txt @@ -199,3 +199,13 @@ sprayed to stdout, such as every opcode and opcode argument and values pushed onto and popped off the value stack. Not useful very often, but very useful when needed. + +--------------------------------------------------------------------------- +CALL_PROFILE introduced for Python 2.3 + +Count the number of function calls executed. + +When this symbol is defined, the ceval mainloop and helper functions +count the number of function calls made. It keeps detailed statistics +about what kind of object was called and whether the call hit any of +the special fast paths in the code. diff --git a/Python/ceval.c b/Python/ceval.c index 8547f85c1fb..0f52a0bb814 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -87,6 +87,62 @@ static long dxp[256]; #endif #endif +/* Function call profile */ +#ifdef CALL_PROFILE +#define PCALL_NUM 11 +static int pcall[PCALL_NUM]; + +#define PCALL_ALL 0 +#define PCALL_FUNCTION 1 +#define PCALL_FAST_FUNCTION 2 +#define PCALL_FASTER_FUNCTION 3 +#define PCALL_METHOD 4 +#define PCALL_BOUND_METHOD 5 +#define PCALL_CFUNCTION 6 +#define PCALL_TYPE 7 +#define PCALL_GENERATOR 8 +#define PCALL_OTHER 9 +#define PCALL_POP 10 + +/* Notes about the statistics + + PCALL_FAST stats + + FAST_FUNCTION means no argument tuple needs to be created. + FASTER_FUNCTION means that the fast-path frame setup code is used. + + If there is a method call where the call can be optimized by changing + the argument tuple and calling the function directly, it gets recorded + twice. + + As a result, the relationship among the statistics appears to be + PCALL_ALL == PCALL_FUNCTION + PCALL_METHOD - PCALL_BOUND_METHOD + + PCALL_CFUNCTION + PCALL_TYPE + PCALL_GENERATOR + PCALL_OTHER + PCALL_FUNCTION > PCALL_FAST_FUNCTION > PCALL_FASTER_FUNCTION + PCALL_METHOD > PCALL_BOUND_METHOD +*/ + +#define PCALL(POS) pcall[POS]++ + +PyObject * +PyEval_GetCallStats(PyObject *self) +{ + return Py_BuildValue("iiiiiiiiii", + pcall[0], pcall[1], pcall[2], pcall[3], + pcall[4], pcall[5], pcall[6], pcall[7], + pcall[8], pcall[9]); +} +#else +#define PCALL(O) + +PyObject * +PyEval_GetCallStats(PyObject *self) +{ + Py_INCREF(Py_None); + return Py_None; +} +#endif + static PyTypeObject gentype; typedef struct { @@ -475,6 +531,7 @@ volatile int _Py_Ticker = 100; PyObject * PyEval_EvalCode(PyCodeObject *co, PyObject *globals, PyObject *locals) { + /* XXX raise SystemError if globals is NULL */ return PyEval_EvalCodeEx(co, globals, locals, (PyObject **)NULL, 0, @@ -1980,6 +2037,7 @@ eval_frame(PyFrameObject *f) continue; case CALL_FUNCTION: + PCALL(PCALL_ALL); x = call_function(&stack_pointer, oparg); PUSH(x); if (x != NULL) @@ -1995,6 +2053,7 @@ eval_frame(PyFrameObject *f) int flags = (opcode - CALL_FUNCTION) & 3; int n = na + 2 * nk; PyObject **pfunc, *func; + PCALL(PCALL_ALL); if (flags & CALL_FLAG_VAR) n++; if (flags & CALL_FLAG_KW) @@ -2317,9 +2376,8 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals, return NULL; } - f = PyFrame_New(tstate, /*back*/ - co, /*code*/ - globals, locals); + assert(globals != NULL); + f = PyFrame_New(tstate, co, globals, locals); if (f == NULL) return NULL; @@ -2520,6 +2578,8 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals, Py_XDECREF(f->f_back); f->f_back = NULL; + PCALL(PCALL_GENERATOR); + /* Create a new generator that owns the ready to run frame * and return that as the value. */ return gen_new(f); @@ -3198,12 +3258,12 @@ call_function(PyObject ***pp_stack, int oparg) PyObject *func = *pfunc; PyObject *x, *w; - /* Always dispatch PyCFunction first, because - these are presumed to be the most frequent - callable object. + /* Always dispatch PyCFunction first, because these are + presumed to be the most frequent callable object. */ if (PyCFunction_Check(func) && nk == 0) { int flags = PyCFunction_GET_FLAGS(func); + PCALL(PCALL_CFUNCTION); if (flags & (METH_NOARGS | METH_O)) { PyCFunction meth = PyCFunction_GET_FUNCTION(func); PyObject *self = PyCFunction_GET_SELF(func); @@ -3229,6 +3289,8 @@ call_function(PyObject ***pp_stack, int oparg) if (PyMethod_Check(func) && PyMethod_GET_SELF(func) != NULL) { /* optimize access to bound methods */ PyObject *self = PyMethod_GET_SELF(func); + PCALL(PCALL_METHOD); + PCALL(PCALL_BOUND_METHOD); Py_INCREF(self); func = PyMethod_GET_FUNCTION(func); Py_INCREF(func); @@ -3245,35 +3307,75 @@ call_function(PyObject ***pp_stack, int oparg) Py_DECREF(func); } + /* What does this do? */ while ((*pp_stack) > pfunc) { w = EXT_POP(*pp_stack); Py_DECREF(w); + PCALL(PCALL_POP); } return x; } /* The fast_function() function optimize calls for which no argument tuple is necessary; the objects are passed directly from the stack. + For the simplest case -- a function that takes only positional + arguments and is called with only positional arguments -- it + inlines the most primitive frame setup code from + PyEval_EvalCodeEx(), which vastly reduces the checks that must be + done before evaluating the frame. */ static PyObject * fast_function(PyObject *func, PyObject ***pp_stack, int n, int na, int nk) { - PyObject *co = PyFunction_GET_CODE(func); + PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); PyObject *globals = PyFunction_GET_GLOBALS(func); PyObject *argdefs = PyFunction_GET_DEFAULTS(func); - PyObject *closure = PyFunction_GET_CLOSURE(func); PyObject **d = NULL; int nd = 0; + PCALL(PCALL_FUNCTION); + PCALL(PCALL_FAST_FUNCTION); + if (argdefs == NULL && co->co_argcount == n && + co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) { + PyFrameObject *f; + PyObject *retval = NULL; + PyThreadState *tstate = PyThreadState_GET(); + PyObject **fastlocals, **stack; + int i; + + PCALL(PCALL_FASTER_FUNCTION); + assert(globals != NULL); + /* XXX Perhaps we should create a specialized + PyFrame_New() that doesn't take locals, but does + take builtins without sanity checking them. + */ + f = PyFrame_New(tstate, co, globals, NULL); + if (f == NULL) + return NULL; + + fastlocals = f->f_localsplus; + stack = (*pp_stack) - n; + + for (i = 0; i < n; i++) { + Py_INCREF(*stack); + fastlocals[i] = *stack++; + } + retval = eval_frame(f); + assert(tstate != NULL); + ++tstate->recursion_depth; + Py_DECREF(f); + --tstate->recursion_depth; + return retval; + } if (argdefs != NULL) { d = &PyTuple_GET_ITEM(argdefs, 0); nd = ((PyTupleObject *)argdefs)->ob_size; } - return PyEval_EvalCodeEx((PyCodeObject *)co, globals, - (PyObject *)NULL, (*pp_stack)-n, na, - (*pp_stack)-2*nk, nk, d, nd, - closure); + return PyEval_EvalCodeEx(co, globals, + (PyObject *)NULL, (*pp_stack)-n, na, + (*pp_stack)-2*nk, nk, d, nd, + PyFunction_GET_CLOSURE(func)); } static PyObject * @@ -3371,6 +3473,20 @@ do_call(PyObject *func, PyObject ***pp_stack, int na, int nk) callargs = load_args(pp_stack, na); if (callargs == NULL) goto call_fail; +#ifdef CALL_PROFILE + /* At this point, we have to look at the type of func to + update the call stats properly. Do it here so as to avoid + exposing the call stats machinery outside ceval.c + */ + if (PyFunction_Check(func)) + PCALL(PCALL_FUNCTION); + else if (PyMethod_Check(func)) + PCALL(PCALL_METHOD); + else if (PyType_Check(func)) + PCALL(PCALL_TYPE); + else + PCALL(PCALL_OTHER); +#endif result = PyObject_Call(func, callargs, kwdict); call_fail: Py_XDECREF(callargs); @@ -3426,6 +3542,20 @@ ext_do_call(PyObject *func, PyObject ***pp_stack, int flags, int na, int nk) callargs = update_star_args(na, nstar, stararg, pp_stack); if (callargs == NULL) goto ext_call_fail; +#ifdef CALL_PROFILE + /* At this point, we have to look at the type of func to + update the call stats properly. Do it here so as to avoid + exposing the call stats machinery outside ceval.c + */ + if (PyFunction_Check(func)) + PCALL(PCALL_FUNCTION); + else if (PyMethod_Check(func)) + PCALL(PCALL_METHOD); + else if (PyType_Check(func)) + PCALL(PCALL_TYPE); + else + PCALL(PCALL_OTHER); +#endif result = PyObject_Call(func, callargs, kwdict); ext_call_fail: Py_XDECREF(callargs); diff --git a/Python/compile.c b/Python/compile.c index 49e57d128cc..01e961b6910 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -385,6 +385,9 @@ PyCode_New(int argcount, int nlocals, int stacksize, int flags, co->co_firstlineno = firstlineno; Py_INCREF(lnotab); co->co_lnotab = lnotab; + if (PyTuple_GET_SIZE(freevars) == 0 && + PyTuple_GET_SIZE(cellvars) == 0) + co->co_flags |= CO_NOFREE; } return co; } diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 2b4c6b49a07..765621ecdb5 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -562,6 +562,28 @@ sys_getframe(PyObject *self, PyObject *args) return (PyObject*)f; } +PyDoc_STRVAR(callstats_doc, +"callstats() -> tuple of integers\n\ +\n\ +Return a tuple of function call statistics, if CALL_PROFILE was defined\n\ +when Python was built. Otherwise, return None.\n\ +\n\ +When enabled, this function returns detailed, implementation-specific\n\ +details about the number of function calls executed. The return value is\n\ +a 11-tuple where the entries in the tuple are counts of:\n\ +0. all function calls\n\ +1. calls to PyFunction_Type objects\n\ +2. PyFunction calls that do not create an argument tuple\n\ +3. PyFunction calls that do not create an argument tuple\n\ + and bypass PyEval_EvalCodeEx()\n\ +4. PyMethod calls\n\ +5. PyMethod calls on bound methods\n\ +6. PyType calls\n\ +7. PyCFunction calls\n\ +8. generator calls\n\ +9. All other calls\n\ +10. Number of stack pops performed by call_function()" +); #ifdef Py_TRACE_REFS /* Defined in objects.c because it uses static globals if that file */ @@ -575,13 +597,15 @@ extern PyObject *_Py_GetDXProfile(PyObject *, PyObject *); static PyMethodDef sys_methods[] = { /* Might as well keep this in alphabetic order */ + {"callstats", (PyCFunction)PyEval_GetCallStats, METH_NOARGS, + callstats_doc}, {"displayhook", sys_displayhook, METH_O, displayhook_doc}, {"exc_info", (PyCFunction)sys_exc_info, METH_NOARGS, exc_info_doc}, {"excepthook", sys_excepthook, METH_VARARGS, excepthook_doc}, {"exit", sys_exit, METH_VARARGS, exit_doc}, #ifdef Py_USING_UNICODE - {"getdefaultencoding", (PyCFunction)sys_getdefaultencoding, METH_NOARGS, - getdefaultencoding_doc}, + {"getdefaultencoding", (PyCFunction)sys_getdefaultencoding, + METH_NOARGS, getdefaultencoding_doc}, #endif #ifdef HAVE_DLOPEN {"getdlopenflags", (PyCFunction)sys_getdlopenflags, METH_NOARGS,