From 9a2310d1b6b80bae072892de04464d23b1e88881 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <solipsis@pitrou.net>
Date: Fri, 25 Jul 2008 22:39:39 +0000
Subject: [PATCH] Merged revisions 65240-65242 via svnmerge from
 svn+ssh://pythondev@svn.python.org/python/trunk

........
  r65240 | antoine.pitrou | 2008-07-26 00:02:07 +0200 (sam., 26 juil. 2008) | 3 lines

  add a pybench test for complex function calls (part of #1819)
........
  r65241 | antoine.pitrou | 2008-07-26 00:13:52 +0200 (sam., 26 juil. 2008) | 4 lines

  Raymond's patch for #1819: speedup function calls with named parameters
  (35% faster according to pybench)
........
  r65242 | antoine.pitrou | 2008-07-26 00:22:08 +0200 (sam., 26 juil. 2008) | 3 lines

  add a NEWS entry
........
---
 Misc/NEWS              |  6 ++++
 Python/ceval.c         | 62 ++++++++++++++++++++++++------------------
 Tools/pybench/Calls.py | 58 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 100 insertions(+), 26 deletions(-)

diff --git a/Misc/NEWS b/Misc/NEWS
index 76ecbccf81f..0ef8b05ceed 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -9,6 +9,12 @@ What's new in Python 3.0b3?
 
 *Release date: XX-XXX-2008*
 
+Core and Builtins
+-----------------
+
+- Issue #1819: function calls with several named parameters are now on
+  average 35% faster (as measured by pybench).
+
 Library
 -------
 
diff --git a/Python/ceval.c b/Python/ceval.c
index 9601de57711..af7a67a6d7a 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -642,9 +642,9 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
 	processor's own internal branch predication has a high likelihood of
 	success, resulting in a nearly zero-overhead transition to the
 	next opcode.  A successful prediction saves a trip through the eval-loop
-	including its two unpredictable branches, the HAS_ARG test and the 
+	including its two unpredictable branches, the HAS_ARG test and the
 	switch-case.  Combined with the processor's internal branch prediction,
-	a successful PREDICT has the effect of making the two opcodes run as if 
+	a successful PREDICT has the effect of making the two opcodes run as if
 	they were a single new opcode with the bodies combined.
 
     If collecting opcode statistics, your choices are to either keep the
@@ -796,7 +796,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
 			   an argument which depends on the situation.
 			   The global trace function is also called
 			   whenever an exception is detected. */
-			if (call_trace_protected(tstate->c_tracefunc, 
+			if (call_trace_protected(tstate->c_tracefunc,
 						 tstate->c_traceobj,
 						 f, PyTrace_CALL, Py_None)) {
 				/* Trace function raised an error */
@@ -828,10 +828,10 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
 	   this wasn't always true before 2.3!  PyFrame_New now sets
 	   f->f_lasti to -1 (i.e. the index *before* the first instruction)
 	   and YIELD_VALUE doesn't fiddle with f_lasti any more.  So this
-	   does work.  Promise. 
+	   does work.  Promise.
 
 	   When the PREDICT() macros are enabled, some opcode pairs follow in
-           direct succession without updating f->f_lasti.  A successful 
+           direct succession without updating f->f_lasti.  A successful
            prediction effectively links the two codes together as if they
            were a single new opcode; accordingly,f->f_lasti will point to
            the first code in the pair (for instance, GET_ITER followed by
@@ -1678,7 +1678,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
 		{
 			int totalargs = 1 + (oparg & 0xFF) + (oparg >> 8);
 			v = POP();
-			
+
 			if (unpack_iterable(v, oparg & 0xFF, oparg >> 8,
 					    stack_pointer + totalargs)) {
 				stack_pointer += totalargs;
@@ -2071,7 +2071,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
                            because it prevents detection of a control-break in tight loops like
                            "while 1: pass".  Compile with this option turned-on when you need
                            the speed-up and do not need break checking inside tight loops (ones
-                           that contain only instructions ending with goto fast_next_opcode). 
+                           that contain only instructions ending with goto fast_next_opcode).
                         */
 			goto fast_next_opcode;
 #else
@@ -2257,7 +2257,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
 		    break;
 		}
 
-		case MAKE_CLOSURE:		
+		case MAKE_CLOSURE:
 		case MAKE_FUNCTION:
 		{
 		    int posdefaults = oparg & 0xff;
@@ -2267,7 +2267,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
 			v = POP(); /* code object */
 			x = PyFunction_New(v, f->f_globals);
 			Py_DECREF(v);
-			
+
 			if (x != NULL && opcode == MAKE_CLOSURE) {
 				v = POP();
 				err = PyFunction_SetClosure(x, v);
@@ -2650,6 +2650,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
 			}
 		}
 		for (i = 0; i < kwcount; i++) {
+			PyObject **co_varnames;
 			PyObject *keyword = kws[2*i];
 			PyObject *value = kws[2*i + 1];
 			int j;
@@ -2659,16 +2660,25 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
 				    co->co_name);
 				goto fail;
 			}
-			/* XXX slow -- speed up using dictionary? */
+			/* Speed hack: do raw pointer compares. As names are
+			   normally interned this should almost always hit. */
+			co_varnames = PySequence_Fast_ITEMS(co->co_varnames);
 			for (j = 0;
 			     j < co->co_argcount + co->co_kwonlyargcount;
 			     j++) {
-				PyObject *nm = PyTuple_GET_ITEM(
-					co->co_varnames, j);
+				PyObject *nm = co_varnames[j];
+				if (nm == keyword)
+					goto kw_found;
+			}
+			/* Slow fallback, just in case */
+			for (j = 0;
+			     j < co->co_argcount + co->co_kwonlyargcount;
+			     j++) {
+				PyObject *nm = co_varnames[j];
 				int cmp = PyObject_RichCompareBool(
 					keyword, nm, Py_EQ);
 				if (cmp > 0)
-					break;
+					goto kw_found;
 				else if (cmp < 0)
 					goto fail;
 			}
@@ -2685,20 +2695,20 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
 					goto fail;
 				}
 				PyDict_SetItem(kwdict, keyword, value);
+				continue;
 			}
-			else {
-				if (GETLOCAL(j) != NULL) {
-					PyErr_Format(PyExc_TypeError,
-					     "%U() got multiple "
-					     "values for keyword "
-					     "argument '%S'",
-					     co->co_name,
-					     keyword);
-					goto fail;
-				}
-				Py_INCREF(value);
-				SETLOCAL(j, value);
+kw_found:
+			if (GETLOCAL(j) != NULL) {
+				PyErr_Format(PyExc_TypeError,
+					 "%U() got multiple "
+					 "values for keyword "
+					 "argument '%S'",
+					 co->co_name,
+					 keyword);
+				goto fail;
 			}
+			Py_INCREF(value);
+			SETLOCAL(j, value);
 		}
 		if (co->co_kwonlyargcount > 0) {
 			for (i = co->co_argcount;
@@ -2930,7 +2940,7 @@ raise_error:
 
 /* Iterate v argcnt times and store the results on the stack (via decreasing
    sp).  Return 1 for success, 0 if error.
-   
+
    If argcntafter == -1, do a simple unpack. If it is >= 0, do an unpack
    with a variable target.
 */
diff --git a/Tools/pybench/Calls.py b/Tools/pybench/Calls.py
index cfe07152639..7c11867eae3 100644
--- a/Tools/pybench/Calls.py
+++ b/Tools/pybench/Calls.py
@@ -109,6 +109,64 @@ class PythonFunctionCalls(Test):
 
 ###
 
+class ComplexPythonFunctionCalls(Test):
+
+    version = 2.0
+    operations = 4*5
+    rounds = 100000
+
+    def test(self):
+
+        # define functions
+        def f(a,b,c,d=1,e=2,f=3):
+            return f
+
+        args = 1,2
+        kwargs = dict(c=3,d=4,e=5)
+
+        # do calls
+        for i in range(self.rounds):
+            f(a=i,b=i,c=i)
+            f(f=i,e=i,d=i,c=2,b=i,a=3)
+            f(1,b=i,**kwargs)
+            f(*args,**kwargs)
+
+            f(a=i,b=i,c=i)
+            f(f=i,e=i,d=i,c=2,b=i,a=3)
+            f(1,b=i,**kwargs)
+            f(*args,**kwargs)
+
+            f(a=i,b=i,c=i)
+            f(f=i,e=i,d=i,c=2,b=i,a=3)
+            f(1,b=i,**kwargs)
+            f(*args,**kwargs)
+
+            f(a=i,b=i,c=i)
+            f(f=i,e=i,d=i,c=2,b=i,a=3)
+            f(1,b=i,**kwargs)
+            f(*args,**kwargs)
+
+            f(a=i,b=i,c=i)
+            f(f=i,e=i,d=i,c=2,b=i,a=3)
+            f(1,b=i,**kwargs)
+            f(*args,**kwargs)
+
+
+    def calibrate(self):
+
+        # define functions
+        def f(a,b,c,d=1,e=2,f=3):
+            return f
+
+        args = 1,2
+        kwargs = dict(c=3,d=4,e=5)
+
+        # do calls
+        for i in range(self.rounds):
+            pass
+
+###
+
 class BuiltinFunctionCalls(Test):
 
     version = 2.0