From 9a2310d1b6b80bae072892de04464d23b1e88881 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Fri, 25 Jul 2008 22:39:39 +0000 Subject: [PATCH] Merged revisions 65240-65242 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r65240 | antoine.pitrou | 2008-07-26 00:02:07 +0200 (sam., 26 juil. 2008) | 3 lines add a pybench test for complex function calls (part of #1819) ........ r65241 | antoine.pitrou | 2008-07-26 00:13:52 +0200 (sam., 26 juil. 2008) | 4 lines Raymond's patch for #1819: speedup function calls with named parameters (35% faster according to pybench) ........ r65242 | antoine.pitrou | 2008-07-26 00:22:08 +0200 (sam., 26 juil. 2008) | 3 lines add a NEWS entry ........ --- Misc/NEWS | 6 ++++ Python/ceval.c | 62 ++++++++++++++++++++++++------------------ Tools/pybench/Calls.py | 58 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 26 deletions(-) diff --git a/Misc/NEWS b/Misc/NEWS index 76ecbccf81f..0ef8b05ceed 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -9,6 +9,12 @@ What's new in Python 3.0b3? *Release date: XX-XXX-2008* +Core and Builtins +----------------- + +- Issue #1819: function calls with several named parameters are now on + average 35% faster (as measured by pybench). + Library ------- diff --git a/Python/ceval.c b/Python/ceval.c index 9601de57711..af7a67a6d7a 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -642,9 +642,9 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) processor's own internal branch predication has a high likelihood of success, resulting in a nearly zero-overhead transition to the next opcode. A successful prediction saves a trip through the eval-loop - including its two unpredictable branches, the HAS_ARG test and the + including its two unpredictable branches, the HAS_ARG test and the switch-case. Combined with the processor's internal branch prediction, - a successful PREDICT has the effect of making the two opcodes run as if + a successful PREDICT has the effect of making the two opcodes run as if they were a single new opcode with the bodies combined. If collecting opcode statistics, your choices are to either keep the @@ -796,7 +796,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) an argument which depends on the situation. The global trace function is also called whenever an exception is detected. */ - if (call_trace_protected(tstate->c_tracefunc, + if (call_trace_protected(tstate->c_tracefunc, tstate->c_traceobj, f, PyTrace_CALL, Py_None)) { /* Trace function raised an error */ @@ -828,10 +828,10 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) this wasn't always true before 2.3! PyFrame_New now sets f->f_lasti to -1 (i.e. the index *before* the first instruction) and YIELD_VALUE doesn't fiddle with f_lasti any more. So this - does work. Promise. + does work. Promise. When the PREDICT() macros are enabled, some opcode pairs follow in - direct succession without updating f->f_lasti. A successful + direct succession without updating f->f_lasti. A successful prediction effectively links the two codes together as if they were a single new opcode; accordingly,f->f_lasti will point to the first code in the pair (for instance, GET_ITER followed by @@ -1678,7 +1678,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) { int totalargs = 1 + (oparg & 0xFF) + (oparg >> 8); v = POP(); - + if (unpack_iterable(v, oparg & 0xFF, oparg >> 8, stack_pointer + totalargs)) { stack_pointer += totalargs; @@ -2071,7 +2071,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) because it prevents detection of a control-break in tight loops like "while 1: pass". Compile with this option turned-on when you need the speed-up and do not need break checking inside tight loops (ones - that contain only instructions ending with goto fast_next_opcode). + that contain only instructions ending with goto fast_next_opcode). */ goto fast_next_opcode; #else @@ -2257,7 +2257,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) break; } - case MAKE_CLOSURE: + case MAKE_CLOSURE: case MAKE_FUNCTION: { int posdefaults = oparg & 0xff; @@ -2267,7 +2267,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) v = POP(); /* code object */ x = PyFunction_New(v, f->f_globals); Py_DECREF(v); - + if (x != NULL && opcode == MAKE_CLOSURE) { v = POP(); err = PyFunction_SetClosure(x, v); @@ -2650,6 +2650,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals, } } for (i = 0; i < kwcount; i++) { + PyObject **co_varnames; PyObject *keyword = kws[2*i]; PyObject *value = kws[2*i + 1]; int j; @@ -2659,16 +2660,25 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals, co->co_name); goto fail; } - /* XXX slow -- speed up using dictionary? */ + /* Speed hack: do raw pointer compares. As names are + normally interned this should almost always hit. */ + co_varnames = PySequence_Fast_ITEMS(co->co_varnames); for (j = 0; j < co->co_argcount + co->co_kwonlyargcount; j++) { - PyObject *nm = PyTuple_GET_ITEM( - co->co_varnames, j); + PyObject *nm = co_varnames[j]; + if (nm == keyword) + goto kw_found; + } + /* Slow fallback, just in case */ + for (j = 0; + j < co->co_argcount + co->co_kwonlyargcount; + j++) { + PyObject *nm = co_varnames[j]; int cmp = PyObject_RichCompareBool( keyword, nm, Py_EQ); if (cmp > 0) - break; + goto kw_found; else if (cmp < 0) goto fail; } @@ -2685,20 +2695,20 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals, goto fail; } PyDict_SetItem(kwdict, keyword, value); + continue; } - else { - if (GETLOCAL(j) != NULL) { - PyErr_Format(PyExc_TypeError, - "%U() got multiple " - "values for keyword " - "argument '%S'", - co->co_name, - keyword); - goto fail; - } - Py_INCREF(value); - SETLOCAL(j, value); +kw_found: + if (GETLOCAL(j) != NULL) { + PyErr_Format(PyExc_TypeError, + "%U() got multiple " + "values for keyword " + "argument '%S'", + co->co_name, + keyword); + goto fail; } + Py_INCREF(value); + SETLOCAL(j, value); } if (co->co_kwonlyargcount > 0) { for (i = co->co_argcount; @@ -2930,7 +2940,7 @@ raise_error: /* Iterate v argcnt times and store the results on the stack (via decreasing sp). Return 1 for success, 0 if error. - + If argcntafter == -1, do a simple unpack. If it is >= 0, do an unpack with a variable target. */ diff --git a/Tools/pybench/Calls.py b/Tools/pybench/Calls.py index cfe07152639..7c11867eae3 100644 --- a/Tools/pybench/Calls.py +++ b/Tools/pybench/Calls.py @@ -109,6 +109,64 @@ class PythonFunctionCalls(Test): ### +class ComplexPythonFunctionCalls(Test): + + version = 2.0 + operations = 4*5 + rounds = 100000 + + def test(self): + + # define functions + def f(a,b,c,d=1,e=2,f=3): + return f + + args = 1,2 + kwargs = dict(c=3,d=4,e=5) + + # do calls + for i in range(self.rounds): + f(a=i,b=i,c=i) + f(f=i,e=i,d=i,c=2,b=i,a=3) + f(1,b=i,**kwargs) + f(*args,**kwargs) + + f(a=i,b=i,c=i) + f(f=i,e=i,d=i,c=2,b=i,a=3) + f(1,b=i,**kwargs) + f(*args,**kwargs) + + f(a=i,b=i,c=i) + f(f=i,e=i,d=i,c=2,b=i,a=3) + f(1,b=i,**kwargs) + f(*args,**kwargs) + + f(a=i,b=i,c=i) + f(f=i,e=i,d=i,c=2,b=i,a=3) + f(1,b=i,**kwargs) + f(*args,**kwargs) + + f(a=i,b=i,c=i) + f(f=i,e=i,d=i,c=2,b=i,a=3) + f(1,b=i,**kwargs) + f(*args,**kwargs) + + + def calibrate(self): + + # define functions + def f(a,b,c,d=1,e=2,f=3): + return f + + args = 1,2 + kwargs = dict(c=3,d=4,e=5) + + # do calls + for i in range(self.rounds): + pass + +### + class BuiltinFunctionCalls(Test): version = 2.0