Merged revisions 65240-65242 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk

........
  r65240 | antoine.pitrou | 2008-07-26 00:02:07 +0200 (sam., 26 juil. 2008) | 3 lines

  add a pybench test for complex function calls (part of #1819)
........
  r65241 | antoine.pitrou | 2008-07-26 00:13:52 +0200 (sam., 26 juil. 2008) | 4 lines

  Raymond's patch for #1819: speedup function calls with named parameters
  (35% faster according to pybench)
........
  r65242 | antoine.pitrou | 2008-07-26 00:22:08 +0200 (sam., 26 juil. 2008) | 3 lines

  add a NEWS entry
........
This commit is contained in:
Antoine Pitrou 2008-07-25 22:39:39 +00:00
parent 83d6a87a40
commit 9a2310d1b6
3 changed files with 100 additions and 26 deletions

View File

@ -9,6 +9,12 @@ What's new in Python 3.0b3?
*Release date: XX-XXX-2008* *Release date: XX-XXX-2008*
Core and Builtins
-----------------
- Issue #1819: function calls with several named parameters are now on
average 35% faster (as measured by pybench).
Library Library
------- -------

View File

@ -642,9 +642,9 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
processor's own internal branch predication has a high likelihood of processor's own internal branch predication has a high likelihood of
success, resulting in a nearly zero-overhead transition to the success, resulting in a nearly zero-overhead transition to the
next opcode. A successful prediction saves a trip through the eval-loop next opcode. A successful prediction saves a trip through the eval-loop
including its two unpredictable branches, the HAS_ARG test and the including its two unpredictable branches, the HAS_ARG test and the
switch-case. Combined with the processor's internal branch prediction, switch-case. Combined with the processor's internal branch prediction,
a successful PREDICT has the effect of making the two opcodes run as if a successful PREDICT has the effect of making the two opcodes run as if
they were a single new opcode with the bodies combined. they were a single new opcode with the bodies combined.
If collecting opcode statistics, your choices are to either keep the If collecting opcode statistics, your choices are to either keep the
@ -796,7 +796,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
an argument which depends on the situation. an argument which depends on the situation.
The global trace function is also called The global trace function is also called
whenever an exception is detected. */ whenever an exception is detected. */
if (call_trace_protected(tstate->c_tracefunc, if (call_trace_protected(tstate->c_tracefunc,
tstate->c_traceobj, tstate->c_traceobj,
f, PyTrace_CALL, Py_None)) { f, PyTrace_CALL, Py_None)) {
/* Trace function raised an error */ /* Trace function raised an error */
@ -828,10 +828,10 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
this wasn't always true before 2.3! PyFrame_New now sets this wasn't always true before 2.3! PyFrame_New now sets
f->f_lasti to -1 (i.e. the index *before* the first instruction) f->f_lasti to -1 (i.e. the index *before* the first instruction)
and YIELD_VALUE doesn't fiddle with f_lasti any more. So this and YIELD_VALUE doesn't fiddle with f_lasti any more. So this
does work. Promise. does work. Promise.
When the PREDICT() macros are enabled, some opcode pairs follow in When the PREDICT() macros are enabled, some opcode pairs follow in
direct succession without updating f->f_lasti. A successful direct succession without updating f->f_lasti. A successful
prediction effectively links the two codes together as if they prediction effectively links the two codes together as if they
were a single new opcode; accordingly,f->f_lasti will point to were a single new opcode; accordingly,f->f_lasti will point to
the first code in the pair (for instance, GET_ITER followed by the first code in the pair (for instance, GET_ITER followed by
@ -1678,7 +1678,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
{ {
int totalargs = 1 + (oparg & 0xFF) + (oparg >> 8); int totalargs = 1 + (oparg & 0xFF) + (oparg >> 8);
v = POP(); v = POP();
if (unpack_iterable(v, oparg & 0xFF, oparg >> 8, if (unpack_iterable(v, oparg & 0xFF, oparg >> 8,
stack_pointer + totalargs)) { stack_pointer + totalargs)) {
stack_pointer += totalargs; stack_pointer += totalargs;
@ -2071,7 +2071,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
because it prevents detection of a control-break in tight loops like because it prevents detection of a control-break in tight loops like
"while 1: pass". Compile with this option turned-on when you need "while 1: pass". Compile with this option turned-on when you need
the speed-up and do not need break checking inside tight loops (ones the speed-up and do not need break checking inside tight loops (ones
that contain only instructions ending with goto fast_next_opcode). that contain only instructions ending with goto fast_next_opcode).
*/ */
goto fast_next_opcode; goto fast_next_opcode;
#else #else
@ -2257,7 +2257,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
break; break;
} }
case MAKE_CLOSURE: case MAKE_CLOSURE:
case MAKE_FUNCTION: case MAKE_FUNCTION:
{ {
int posdefaults = oparg & 0xff; int posdefaults = oparg & 0xff;
@ -2267,7 +2267,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
v = POP(); /* code object */ v = POP(); /* code object */
x = PyFunction_New(v, f->f_globals); x = PyFunction_New(v, f->f_globals);
Py_DECREF(v); Py_DECREF(v);
if (x != NULL && opcode == MAKE_CLOSURE) { if (x != NULL && opcode == MAKE_CLOSURE) {
v = POP(); v = POP();
err = PyFunction_SetClosure(x, v); err = PyFunction_SetClosure(x, v);
@ -2650,6 +2650,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
} }
} }
for (i = 0; i < kwcount; i++) { for (i = 0; i < kwcount; i++) {
PyObject **co_varnames;
PyObject *keyword = kws[2*i]; PyObject *keyword = kws[2*i];
PyObject *value = kws[2*i + 1]; PyObject *value = kws[2*i + 1];
int j; int j;
@ -2659,16 +2660,25 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
co->co_name); co->co_name);
goto fail; goto fail;
} }
/* XXX slow -- speed up using dictionary? */ /* Speed hack: do raw pointer compares. As names are
normally interned this should almost always hit. */
co_varnames = PySequence_Fast_ITEMS(co->co_varnames);
for (j = 0; for (j = 0;
j < co->co_argcount + co->co_kwonlyargcount; j < co->co_argcount + co->co_kwonlyargcount;
j++) { j++) {
PyObject *nm = PyTuple_GET_ITEM( PyObject *nm = co_varnames[j];
co->co_varnames, j); if (nm == keyword)
goto kw_found;
}
/* Slow fallback, just in case */
for (j = 0;
j < co->co_argcount + co->co_kwonlyargcount;
j++) {
PyObject *nm = co_varnames[j];
int cmp = PyObject_RichCompareBool( int cmp = PyObject_RichCompareBool(
keyword, nm, Py_EQ); keyword, nm, Py_EQ);
if (cmp > 0) if (cmp > 0)
break; goto kw_found;
else if (cmp < 0) else if (cmp < 0)
goto fail; goto fail;
} }
@ -2685,20 +2695,20 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
goto fail; goto fail;
} }
PyDict_SetItem(kwdict, keyword, value); PyDict_SetItem(kwdict, keyword, value);
continue;
} }
else { kw_found:
if (GETLOCAL(j) != NULL) { if (GETLOCAL(j) != NULL) {
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"%U() got multiple " "%U() got multiple "
"values for keyword " "values for keyword "
"argument '%S'", "argument '%S'",
co->co_name, co->co_name,
keyword); keyword);
goto fail; goto fail;
}
Py_INCREF(value);
SETLOCAL(j, value);
} }
Py_INCREF(value);
SETLOCAL(j, value);
} }
if (co->co_kwonlyargcount > 0) { if (co->co_kwonlyargcount > 0) {
for (i = co->co_argcount; for (i = co->co_argcount;
@ -2930,7 +2940,7 @@ raise_error:
/* Iterate v argcnt times and store the results on the stack (via decreasing /* Iterate v argcnt times and store the results on the stack (via decreasing
sp). Return 1 for success, 0 if error. sp). Return 1 for success, 0 if error.
If argcntafter == -1, do a simple unpack. If it is >= 0, do an unpack If argcntafter == -1, do a simple unpack. If it is >= 0, do an unpack
with a variable target. with a variable target.
*/ */

View File

@ -109,6 +109,64 @@ class PythonFunctionCalls(Test):
### ###
class ComplexPythonFunctionCalls(Test):
version = 2.0
operations = 4*5
rounds = 100000
def test(self):
# define functions
def f(a,b,c,d=1,e=2,f=3):
return f
args = 1,2
kwargs = dict(c=3,d=4,e=5)
# do calls
for i in range(self.rounds):
f(a=i,b=i,c=i)
f(f=i,e=i,d=i,c=2,b=i,a=3)
f(1,b=i,**kwargs)
f(*args,**kwargs)
f(a=i,b=i,c=i)
f(f=i,e=i,d=i,c=2,b=i,a=3)
f(1,b=i,**kwargs)
f(*args,**kwargs)
f(a=i,b=i,c=i)
f(f=i,e=i,d=i,c=2,b=i,a=3)
f(1,b=i,**kwargs)
f(*args,**kwargs)
f(a=i,b=i,c=i)
f(f=i,e=i,d=i,c=2,b=i,a=3)
f(1,b=i,**kwargs)
f(*args,**kwargs)
f(a=i,b=i,c=i)
f(f=i,e=i,d=i,c=2,b=i,a=3)
f(1,b=i,**kwargs)
f(*args,**kwargs)
def calibrate(self):
# define functions
def f(a,b,c,d=1,e=2,f=3):
return f
args = 1,2
kwargs = dict(c=3,d=4,e=5)
# do calls
for i in range(self.rounds):
pass
###
class BuiltinFunctionCalls(Test): class BuiltinFunctionCalls(Test):
version = 2.0 version = 2.0