From 57640f5c575ce284b041595c238f8ac615f1618d Mon Sep 17 00:00:00 2001 From: Fredrik Lundh Date: Fri, 26 May 2006 11:54:04 +0000 Subject: [PATCH] needforspeed: added PY_LOCAL_AGGRESSIVE macro to enable "aggressive" LOCAL inlining; also added some missing whitespace --- Include/pyport.h | 8 ++++++++ Python/ceval.c | 38 +++++++++++++++++++++++--------------- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/Include/pyport.h b/Include/pyport.h index 9d46470eb1e..07fdf284e54 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -141,6 +141,10 @@ typedef Py_intptr_t Py_ssize_t; * convention for functions that are local to a given module. It also enables * inlining, where suitable. * + * If PY_LOCAL_AGGRESSIVE is defined before python.h is included, a more + * "aggressive" inlining is enabled. This may lead to code bloat, and may + * slow things down for those reasons. Use with care. + * * NOTE: You can only use this for functions that are entirely local to a * module; functions that are exported via method tables, callbacks, etc, * should keep using static. @@ -149,6 +153,10 @@ typedef Py_intptr_t Py_ssize_t; #undef USE_INLINE /* XXX - set via configure? */ #if defined(_MSC_VER) +#if defined(PY_LOCAL_AGGRESSIVE) +/* enable more aggressive optimization for visual studio */ +#pragma optimize("agtw", on) +#endif /* ignore warnings if the compiler decides not to inline a function */ #pragma warning(disable: 4710) /* fastest possible local call under MSVC */ diff --git a/Python/ceval.c b/Python/ceval.c index 53a263aa162..da27fff9e27 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -6,6 +6,9 @@ XXX document it! */ +/* enable more aggressive local inlining (platform dependent) */ +#define PY_LOCAL_AGGRESSIVE + #include "Python.h" #include "code.h" @@ -16,6 +19,11 @@ #include +#if defined(_MSC_VER) +/* enable more aggressive optimization for visual studio */ +#pragma optimize("agtw", on) +#endif + #ifndef WITH_TSC #define READ_TIMESTAMP(var) @@ -83,16 +91,16 @@ typedef PyObject *(*callproc)(PyObject *, PyObject *, PyObject *); /* Forward declarations */ #ifdef WITH_TSC -Py_LOCAL(PyObject *)call_function(PyObject ***, int, uint64*, uint64*); +Py_LOCAL(PyObject *) call_function(PyObject ***, int, uint64*, uint64*); #else -Py_LOCAL(PyObject *)call_function(PyObject ***, int); +Py_LOCAL(PyObject *) call_function(PyObject ***, int); #endif -Py_LOCAL(PyObject *)fast_function(PyObject *, PyObject ***, int, int, int); -Py_LOCAL(PyObject *)do_call(PyObject *, PyObject ***, int, int); -Py_LOCAL(PyObject *)ext_do_call(PyObject *, PyObject ***, int, int, int); -Py_LOCAL(PyObject *)update_keyword_args(PyObject *, int, PyObject ***,PyObject *); -Py_LOCAL(PyObject *)update_star_args(int, int, PyObject *, PyObject ***); -Py_LOCAL(PyObject *)load_args(PyObject ***, int); +Py_LOCAL(PyObject *) fast_function(PyObject *, PyObject ***, int, int, int); +Py_LOCAL(PyObject *) do_call(PyObject *, PyObject ***, int, int); +Py_LOCAL(PyObject *) ext_do_call(PyObject *, PyObject ***, int, int, int); +Py_LOCAL(PyObject *) update_keyword_args(PyObject *, int, PyObject ***,PyObject *); +Py_LOCAL(PyObject *) update_star_args(int, int, PyObject *, PyObject ***); +Py_LOCAL(PyObject *) load_args(PyObject ***, int); #define CALL_FLAG_VAR 1 #define CALL_FLAG_KW 2 @@ -108,19 +116,19 @@ Py_LOCAL(void) call_exc_trace(Py_tracefunc, PyObject *, PyFrameObject *); Py_LOCAL(int) maybe_call_line_trace(Py_tracefunc, PyObject *, PyFrameObject *, int *, int *, int *); -Py_LOCAL(PyObject *)apply_slice(PyObject *, PyObject *, PyObject *); +Py_LOCAL(PyObject *) apply_slice(PyObject *, PyObject *, PyObject *); Py_LOCAL(int) assign_slice(PyObject *, PyObject *, PyObject *, PyObject *); -Py_LOCAL(PyObject *)cmp_outcome(int, PyObject *, PyObject *); -Py_LOCAL(PyObject *)import_from(PyObject *, PyObject *); +Py_LOCAL(PyObject *) cmp_outcome(int, PyObject *, PyObject *); +Py_LOCAL(PyObject *) import_from(PyObject *, PyObject *); Py_LOCAL(int) import_all_from(PyObject *, PyObject *); -Py_LOCAL(PyObject *)build_class(PyObject *, PyObject *, PyObject *); +Py_LOCAL(PyObject *) build_class(PyObject *, PyObject *, PyObject *); Py_LOCAL(int) exec_statement(PyFrameObject *, PyObject *, PyObject *, PyObject *); Py_LOCAL(void) set_exc_info(PyThreadState *, PyObject *, PyObject *, PyObject *); Py_LOCAL(void) reset_exc_info(PyThreadState *); Py_LOCAL(void) format_exc_check_arg(PyObject *, char *, PyObject *); -Py_LOCAL(PyObject *)string_concatenate(PyObject *, PyObject *, +Py_LOCAL(PyObject *) string_concatenate(PyObject *, PyObject *, PyFrameObject *, unsigned char *); #define NAME_ERROR_MSG \ @@ -476,7 +484,7 @@ enum why_code { WHY_YIELD = 0x0040 /* 'yield' operator */ }; -static enum why_code do_raise(PyObject *, PyObject *, PyObject *); +Py_LOCAL(enum why_code) do_raise(PyObject *, PyObject *, PyObject *); Py_LOCAL(int) unpack_iterable(PyObject *, int, PyObject **); /* for manipulating the thread switch and periodic "stuff" - used to be @@ -2971,7 +2979,7 @@ reset_exc_info(PyThreadState *tstate) /* Logic for the raise statement (too complicated for inlining). This *consumes* a reference count to each of its arguments. */ -static enum why_code +Py_LOCAL(enum why_code) do_raise(PyObject *type, PyObject *value, PyObject *tb) { if (type == NULL) {