remove ceval timestamp support

This commit is contained in:
Benjamin Peterson 2016-09-09 14:57:58 -07:00
parent ad46443e9d
commit 4fd64b9a6a
9 changed files with 1 additions and 268 deletions

View File

@ -1104,18 +1104,6 @@ always available.
thus may not be available in all Python implementations.
.. function:: settscdump(on_flag)
Activate dumping of VM measurements using the Pentium timestamp counter, if
*on_flag* is true. Deactivate these dumps if *on_flag* is off. The function is
available only if Python was compiled with ``--with-tsc``. To understand
the output of this dump, read :file:`Python/ceval.c` in the Python sources.
.. impl-detail::
This function is intimately bound to CPython implementation details and
thus not likely to be implemented elsewhere.
.. function:: set_coroutine_wrapper(wrapper)
Allows intercepting creation of :term:`coroutine` objects (only ones that

View File

@ -43,9 +43,6 @@ typedef struct _is {
#ifdef HAVE_DLOPEN
int dlopenflags;
#endif
#ifdef WITH_TSC
int tscdump;
#endif
PyObject *builtins_copy;
PyObject *import_func;

View File

@ -234,29 +234,3 @@ When this symbol is defined, the ceval mainloop and helper functions count the
number of function calls made. It keeps detailed statistics about what kind of
object was called and whether the call hit any of the special fast paths in the
code.
WITH_TSC
--------
Super-lowlevel profiling of the interpreter. When enabled, the sys module grows
a new function:
settscdump(bool)
If true, tell the Python interpreter to dump VM measurements to stderr. If
false, turn off dump. The measurements are based on the processor's
time-stamp counter.
This build option requires a small amount of platform specific code. Currently
this code is present for linux/x86 and any PowerPC platform that uses GCC
(i.e. OS X and linux/ppc).
On the PowerPC the rate at which the time base register is incremented is not
defined by the architecture specification, so you'll need to find the manual for
your specific processor. For the 750CX, 750CXe and 750FX (all sold as the G3)
we find:
The time base counter is clocked at a frequency that is one-fourth that of
the bus clock.
This build is enabled by the --with-tsc flag to configure.

View File

@ -20,82 +20,6 @@
#include <ctype.h>
#ifndef WITH_TSC
#define READ_TIMESTAMP(var)
#else
typedef unsigned long long uint64;
/* PowerPC support.
"__ppc__" appears to be the preprocessor definition to detect on OS X, whereas
"__powerpc__" appears to be the correct one for Linux with GCC
*/
#if defined(__ppc__) || defined (__powerpc__)
#define READ_TIMESTAMP(var) ppc_getcounter(&var)
static void
ppc_getcounter(uint64 *v)
{
unsigned long tbu, tb, tbu2;
loop:
asm volatile ("mftbu %0" : "=r" (tbu) );
asm volatile ("mftb %0" : "=r" (tb) );
asm volatile ("mftbu %0" : "=r" (tbu2));
if (__builtin_expect(tbu != tbu2, 0)) goto loop;
/* The slightly peculiar way of writing the next lines is
compiled better by GCC than any other way I tried. */
((long*)(v))[0] = tbu;
((long*)(v))[1] = tb;
}
#elif defined(__i386__)
/* this is for linux/x86 (and probably any other GCC/x86 combo) */
#define READ_TIMESTAMP(val) \
__asm__ __volatile__("rdtsc" : "=A" (val))
#elif defined(__x86_64__)
/* for gcc/x86_64, the "A" constraint in DI mode means *either* rax *or* rdx;
not edx:eax as it does for i386. Since rdtsc puts its result in edx:eax
even in 64-bit mode, we need to use "a" and "d" for the lower and upper
32-bit pieces of the result. */
#define READ_TIMESTAMP(val) do { \
unsigned int h, l; \
__asm__ __volatile__("rdtsc" : "=a" (l), "=d" (h)); \
(val) = ((uint64)l) | (((uint64)h) << 32); \
} while(0)
#else
#error "Don't know how to implement timestamp counter for this architecture"
#endif
void dump_tsc(int opcode, int ticked, uint64 inst0, uint64 inst1,
uint64 loop0, uint64 loop1, uint64 intr0, uint64 intr1)
{
uint64 intr, inst, loop;
PyThreadState *tstate = PyThreadState_Get();
if (!tstate->interp->tscdump)
return;
intr = intr1 - intr0;
inst = inst1 - inst0 - intr;
loop = loop1 - loop0 - intr;
fprintf(stderr, "opcode=%03d t=%d inst=%06lld loop=%06lld\n",
opcode, ticked, inst, loop);
}
#endif
/* Turn this on if your compiler chokes on the big switch: */
/* #define CASE_TOO_BIG 1 */
@ -108,11 +32,7 @@ void dump_tsc(int opcode, int ticked, uint64 inst0, uint64 inst1,
typedef PyObject *(*callproc)(PyObject *, PyObject *, PyObject *);
/* Forward declarations */
#ifdef WITH_TSC
static PyObject * call_function(PyObject ***, Py_ssize_t, PyObject *, uint64*, uint64*);
#else
static PyObject * call_function(PyObject ***, Py_ssize_t, PyObject *);
#endif
static PyObject * fast_function(PyObject *, PyObject **, Py_ssize_t, PyObject *);
static PyObject * do_call_core(PyObject *, PyObject *, PyObject *);
@ -938,46 +858,6 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
#define GETITEM(v, i) PyTuple_GetItem((v), (i))
#endif
#ifdef WITH_TSC
/* Use Pentium timestamp counter to mark certain events:
inst0 -- beginning of switch statement for opcode dispatch
inst1 -- end of switch statement (may be skipped)
loop0 -- the top of the mainloop
loop1 -- place where control returns again to top of mainloop
(may be skipped)
intr1 -- beginning of long interruption
intr2 -- end of long interruption
Many opcodes call out to helper C functions. In some cases, the
time in those functions should be counted towards the time for the
opcode, but not in all cases. For example, a CALL_FUNCTION opcode
calls another Python function; there's no point in charge all the
bytecode executed by the called function to the caller.
It's hard to make a useful judgement statically. In the presence
of operator overloading, it's impossible to tell if a call will
execute new Python code or not.
It's a case-by-case judgement. I'll use intr1 for the following
cases:
IMPORT_STAR
IMPORT_FROM
CALL_FUNCTION (and friends)
*/
uint64 inst0, inst1, loop0, loop1, intr0 = 0, intr1 = 0;
int ticked = 0;
READ_TIMESTAMP(inst0);
READ_TIMESTAMP(inst1);
READ_TIMESTAMP(loop0);
READ_TIMESTAMP(loop1);
/* shut up the compiler */
opcode = 0;
#endif
/* Code access macros */
#ifdef WORDS_BIGENDIAN
@ -1225,23 +1105,6 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
#endif
for (;;) {
#ifdef WITH_TSC
if (inst1 == 0) {
/* Almost surely, the opcode executed a break
or a continue, preventing inst1 from being set
on the way out of the loop.
*/
READ_TIMESTAMP(inst1);
loop1 = inst1;
}
dump_tsc(opcode, ticked, inst0, inst1, loop0, loop1,
intr0, intr1);
ticked = 0;
inst1 = 0;
intr0 = 0;
intr1 = 0;
READ_TIMESTAMP(loop0);
#endif
assert(stack_pointer >= f->f_valuestack); /* else underflow */
assert(STACK_LEVEL() <= co->co_stacksize); /* else overflow */
assert(!PyErr_Occurred());
@ -1260,9 +1123,6 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
a try: finally: block uninterruptible. */
goto fast_next_opcode;
}
#ifdef WITH_TSC
ticked = 1;
#endif
if (_Py_atomic_load_relaxed(&pendingcalls_to_do)) {
if (Py_MakePendingCalls() < 0)
goto error;
@ -3403,11 +3263,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
PyObject **sp, *res;
PCALL(PCALL_ALL);
sp = stack_pointer;
#ifdef WITH_TSC
res = call_function(&sp, oparg, NULL, &intr0, &intr1);
#else
res = call_function(&sp, oparg, NULL);
#endif
stack_pointer = sp;
PUSH(res);
if (res == NULL) {
@ -3423,11 +3279,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
assert(PyTuple_CheckExact(names) && PyTuple_GET_SIZE(names) <= oparg);
PCALL(PCALL_ALL);
sp = stack_pointer;
#ifdef WITH_TSC
res = call_function(&sp, oparg, names, &intr0, &intr1);
#else
res = call_function(&sp, oparg, names);
#endif
stack_pointer = sp;
PUSH(res);
Py_DECREF(names);
@ -4922,11 +4774,7 @@ if (tstate->use_tracing && tstate->c_profilefunc) { \
}
static PyObject *
call_function(PyObject ***pp_stack, Py_ssize_t oparg, PyObject *kwnames
#ifdef WITH_TSC
, uint64* pintr0, uint64* pintr1
#endif
)
call_function(PyObject ***pp_stack, Py_ssize_t oparg, PyObject *kwnames)
{
PyObject **pfunc = (*pp_stack) - oparg - 1;
PyObject *func = *pfunc;

View File

@ -98,9 +98,6 @@ PyInterpreterState_New(void)
#else
interp->dlopenflags = RTLD_LAZY;
#endif
#endif
#ifdef WITH_TSC
interp->tscdump = 0;
#endif
HEAD_LOCK();

View File

@ -609,33 +609,6 @@ PyDoc_STRVAR(getswitchinterval_doc,
#endif /* WITH_THREAD */
#ifdef WITH_TSC
static PyObject *
sys_settscdump(PyObject *self, PyObject *args)
{
int bool;
PyThreadState *tstate = PyThreadState_Get();
if (!PyArg_ParseTuple(args, "i:settscdump", &bool))
return NULL;
if (bool)
tstate->interp->tscdump = 1;
else
tstate->interp->tscdump = 0;
Py_INCREF(Py_None);
return Py_None;
}
PyDoc_STRVAR(settscdump_doc,
"settscdump(bool)\n\
\n\
If true, tell the Python interpreter to dump VM measurements to\n\
stderr. If false, turn off dump. The measurements are based on the\n\
processor's time-stamp counter."
);
#endif /* TSC */
static PyObject *
sys_setrecursionlimit(PyObject *self, PyObject *args)
{
@ -1410,9 +1383,6 @@ static PyMethodDef sys_methods[] = {
{"getprofile", sys_getprofile, METH_NOARGS, getprofile_doc},
{"setrecursionlimit", sys_setrecursionlimit, METH_VARARGS,
setrecursionlimit_doc},
#ifdef WITH_TSC
{"settscdump", sys_settscdump, METH_VARARGS, settscdump_doc},
#endif
{"settrace", sys_settrace, METH_O, settrace_doc},
{"gettrace", sys_gettrace, METH_NOARGS, gettrace_doc},
{"call_tracing", sys_call_tracing, METH_VARARGS, call_tracing_doc},

25
configure vendored
View File

@ -830,7 +830,6 @@ with_threads
with_thread
enable_ipv6
with_doc_strings
with_tsc
with_pymalloc
with_valgrind
with_fpectl
@ -1534,7 +1533,6 @@ Optional Packages:
--with(out)-thread[=DIRECTORY]
deprecated; use --with(out)-threads
--with(out)-doc-strings disable/enable documentation strings
--with(out)-tsc enable/disable timestamp counter profile
--with(out)-pymalloc disable/enable specialized mallocs
--with-valgrind Enable Valgrind support
--with-fpectl enable SIGFPE catching
@ -10798,29 +10796,6 @@ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_doc_strings" >&5
$as_echo "$with_doc_strings" >&6; }
# Check if eval loop should use timestamp counter profiling
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-tsc" >&5
$as_echo_n "checking for --with-tsc... " >&6; }
# Check whether --with-tsc was given.
if test "${with_tsc+set}" = set; then :
withval=$with_tsc;
if test "$withval" != no
then
$as_echo "#define WITH_TSC 1" >>confdefs.h
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
fi
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
fi
# Check for Python-specific malloc support
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-pymalloc" >&5
$as_echo_n "checking for --with-pymalloc... " >&6; }

View File

@ -3198,19 +3198,6 @@ then
fi
AC_MSG_RESULT($with_doc_strings)
# Check if eval loop should use timestamp counter profiling
AC_MSG_CHECKING(for --with-tsc)
AC_ARG_WITH(tsc,
AS_HELP_STRING([--with(out)-tsc],[enable/disable timestamp counter profile]),[
if test "$withval" != no
then
AC_DEFINE(WITH_TSC, 1,
[Define to profile with the Pentium timestamp counter])
AC_MSG_RESULT(yes)
else AC_MSG_RESULT(no)
fi],
[AC_MSG_RESULT(no)])
# Check for Python-specific malloc support
AC_MSG_CHECKING(for --with-pymalloc)
AC_ARG_WITH(pymalloc,

View File

@ -1388,9 +1388,6 @@
/* Define if you want to compile in rudimentary thread support */
#undef WITH_THREAD
/* Define to profile with the Pentium timestamp counter */
#undef WITH_TSC
/* Define if you want pymalloc to be disabled when running under valgrind */
#undef WITH_VALGRIND