From 7abf8d4066e9b4dd21f9a498427ac1ec8914c0ab Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Sat, 18 Apr 2009 20:17:52 +0000 Subject: [PATCH] The SSE2 detection and enabling could potentially cause problems for binary distributions of Python in situations where the build machine has SSE2 but the target machine does not. Therefore, don't enable SSE2 instructions automatically on x86. --- Include/pyport.h | 2 +- Misc/NEWS | 7 -- Objects/floatobject.c | 14 +++- Python/pymath.c | 6 +- configure | 187 ++++-------------------------------------- configure.in | 107 +++++------------------- pyconfig.h.in | 4 - 7 files changed, 47 insertions(+), 280 deletions(-) diff --git a/Include/pyport.h b/Include/pyport.h index 9b8b1e7f82d..c1ed4a69012 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -469,7 +469,7 @@ extern "C" { the FPU is using 53-bit precision. Here are macros that force this. See Python/pystrtod.c for an example of their use. */ -#ifdef USING_X87_FPU +#ifdef HAVE_GCC_ASM_FOR_X87 #define _Py_SET_53BIT_PRECISION_HEADER \ unsigned short old_387controlword, new_387controlword #define _Py_SET_53BIT_PRECISION_START \ diff --git a/Misc/NEWS b/Misc/NEWS index 295ebd01390..e230d82ffc5 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -31,13 +31,6 @@ Core and Builtins value: str(1e11 + 0.5). (This minor issue has existed in 2.x for a long time.) -- On x86, SSE2 instructions for floating-point are automatically - detected and, where possible, enabled on platforms using the gcc - compiler. As a consequence, some arithmetic operations may have - different (more accurate!) results on some platforms, and - cross-platform consistency of Python arithmetic should be improved. - This applies particularly to Linux/x86. - - Issue #1580: On most platforms, use a 'short' float repr: for a finite float x, repr(x) now outputs a string based on the shortest sequence of decimal digits that rounds to x. Previous behaviour was diff --git a/Objects/floatobject.c b/Objects/floatobject.c index b7b52207e36..f78f7df88a9 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -15,6 +15,11 @@ #define MAX(x, y) ((x) < (y) ? (y) : (x)) #define MIN(x, y) ((x) < (y) ? (x) : (y)) +/* ascii character tests (as opposed to locale tests) */ +#define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \ + (c) == '\r' || (c) == '\t' || (c) == '\v') +#define ISDIGIT(c) ((c) >= '0' && (c) <= '9') + #ifdef HAVE_IEEEFP_H #include #endif @@ -188,7 +193,7 @@ PyFloat_FromString(PyObject *v) } last = s + len; - while (*s && isspace(Py_CHARMASK(*s))) + while (*s && ISSPACE(Py_CHARMASK(*s))) s++; if (*s == '\0') { PyErr_SetString(PyExc_ValueError, "empty string for float()"); @@ -245,7 +250,7 @@ PyFloat_FromString(PyObject *v) } /* Since end != s, the platform made *some* kind of sense out of the input. Trust it. */ - while (*end && isspace(Py_CHARMASK(*end))) + while (*end && ISSPACE(Py_CHARMASK(*end))) end++; if (*end != '\0') { PyOS_snprintf(buffer, sizeof(buffer), @@ -1275,7 +1280,7 @@ float_fromhex(PyObject *cls, PyObject *arg) ********************/ /* leading whitespace and optional sign */ - while (isspace(Py_CHARMASK(*s))) + while (ISSPACE(Py_CHARMASK(*s))) s++; if (*s == '-') { s++; @@ -1299,6 +1304,7 @@ float_fromhex(PyObject *cls, PyObject *arg) s_store = s; if (*s == '0') { s++; + if (*s == 'x' || *s == 'X') if (tolower(*s) == (int)'x') s++; else @@ -1345,7 +1351,7 @@ float_fromhex(PyObject *cls, PyObject *arg) exp = 0; /* optional trailing whitespace leading to the end of the string */ - while (isspace(Py_CHARMASK(*s))) + while (ISSPACE(Py_CHARMASK(*s))) s++; if (s != s_end) goto parse_error; diff --git a/Python/pymath.c b/Python/pymath.c index a5c0dd94148..db2920ce209 100644 --- a/Python/pymath.c +++ b/Python/pymath.c @@ -13,8 +13,7 @@ double _Py_force_double(double x) } #endif -#ifdef USING_X87_FPU -# ifdef HAVE_GCC_ASM_FOR_X87 +#ifdef HAVE_GCC_ASM_FOR_X87 /* inline assembly for getting and setting the 387 FPU control word on gcc/x86 */ @@ -29,9 +28,6 @@ void _Py_set_387controlword(unsigned short cw) { __asm__ __volatile__ ("fldcw %0" : : "m" (cw)); } -# else -# error "Unable to get and set x87 control word" -# endif #endif diff --git a/configure b/configure index de5c3541fa0..e988b85f3eb 100755 --- a/configure +++ b/configure @@ -1,5 +1,5 @@ #! /bin/sh -# From configure.in Revision: 71663 . +# From configure.in Revision: 71704 . # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.61 for python 3.1. # @@ -21827,174 +21827,21 @@ _ACEOF fi -# David Gay's code in Python/dtoa.c requires that the FPU uses 53-bit +# The short float repr introduced in Python 3.1 requires the +# correctly-rounded string <-> double conversion functions in +# Python/dtoa.c, which in turn require that the FPU uses 53-bit # rounding; this is a particular problem on x86, where the x87 FPU has # a default rounding precision of 64 bits. For gcc/x86, we try to fix -# this by: -# -# (1) using the SSE2 instruction set when available (it usually is -# on modern machines) -# (2) using inline assembler to get and set the x87 FPU control word -# otherwise. -# -# On AMD64 (aka x86-64), gcc automatically enables use of SSE2 -# instructions, so we don't bother trying to detect. +# this by using inline assembler to get and set the x87 FPU control +# word. if test "$GCC" = yes && test -n "`$CC -dM -E - &5 -echo $ECHO_N "checking whether SSE2 instructions are already enabled for math... $ECHO_C" >&6; } - if test -n "`$CC -dM -E - &5 -echo "${ECHO_T}$ac_sse2_enabled" >&6; } - - # if we're not using SSE2 already, we need to either enable it - # (when available), or use inline assembler to get and set the - # 387 control word. - if test $ac_sse2_enabled = no - then - # Check cpuid for SSE2 availability. Bits 25 and 26 of edx tell - # us about SSE and SSE2 respectively. - { echo "$as_me:$LINENO: checking whether SSE2 instructions are available on this CPU" >&5 -echo $ECHO_N "checking whether SSE2 instructions are available on this CPU... $ECHO_C" >&6; } - if test "$cross_compiling" = yes; then - ac_cv_cpu_has_sse2=no -else - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - - int main() { - unsigned int ax, bx, cx, dx, func; - func = 1U; - __asm__ __volatile__ ( - "pushl %%ebx\n\t" /* don't clobber ebx */ - "cpuid\n\t" - "movl %%ebx, %1\n\t" - "popl %%ebx" - : "=a" (ax), "=r" (bx), "=c" (cx), "=d" (dx) - : "a" (func) - : "cc" ); - if ((dx & (1U << 25)) && (dx & (1U << 26))) - return 0; - else - return 1; - } - -_ACEOF -rm -f conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { ac_try='./conftest$ac_exeext' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - ac_cv_cpu_has_sse2=yes -else - echo "$as_me: program exited with status $ac_status" >&5 -echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -( exit $ac_status ) -ac_cv_cpu_has_sse2=no -fi -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext -fi - - - { echo "$as_me:$LINENO: result: $ac_cv_cpu_has_sse2" >&5 -echo "${ECHO_T}$ac_cv_cpu_has_sse2" >&6; } - - # determine whether gcc accepts options to turn on SSE2 - { echo "$as_me:$LINENO: checking whether $CC accepts -msse2 -mfpmath=sse" >&5 -echo $ECHO_N "checking whether $CC accepts -msse2 -mfpmath=sse... $ECHO_C" >&6; } - ac_save_cc="$CC" - CC="$CC -msse2 -mfpmath=sse" - if test "$cross_compiling" = yes; then - ac_cv_msse2_ok=no -else - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -int main() { return 0; } -_ACEOF -rm -f conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { ac_try='./conftest$ac_exeext' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - ac_cv_msse2_ok=yes -else - echo "$as_me: program exited with status $ac_status" >&5 -echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -( exit $ac_status ) -ac_cv_msse2_ok=no -fi -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext -fi - - - CC="$ac_save_cc" - { echo "$as_me:$LINENO: result: $ac_cv_msse2_ok" >&5 -echo "${ECHO_T}$ac_cv_msse2_ok" >&6; } - - if test $ac_cv_cpu_has_sse2 = yes && test $ac_cv_msse2_ok = yes - then - BASECFLAGS="$BASECFLAGS -msse2 -mfpmath=sse" - else - # SSE2 doesn't appear to be available. Check that it's okay - # to use gcc inline assembler to get and set x87 control word - -cat >>confdefs.h <<\_ACEOF -#define USING_X87_FPU 1 -_ACEOF - - { echo "$as_me:$LINENO: checking whether we can use gcc inline assembler to get and set x87 control word" >&5 + # Check that it's okay to use gcc inline assembler to get and set + # x87 control word + { echo "$as_me:$LINENO: checking whether we can use gcc inline assembler to get and set x87 control word" >&5 echo $ECHO_N "checking whether we can use gcc inline assembler to get and set x87 control word... $ECHO_C" >&6; } - cat >conftest.$ac_ext <<_ACEOF + cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext @@ -22005,9 +21852,9 @@ int main () { - unsigned short cw; - __asm__ __volatile__ ("fnstcw %0" : "=m" (cw)); - __asm__ __volatile__ ("fldcw %0" : : "m" (cw)); + unsigned short cw; + __asm__ __volatile__ ("fnstcw %0" : "=m" (cw)); + __asm__ __volatile__ ("fldcw %0" : : "m" (cw)); ; return 0; @@ -22039,17 +21886,15 @@ sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - { echo "$as_me:$LINENO: result: $have_gcc_asm_for_x87" >&5 + { echo "$as_me:$LINENO: result: $have_gcc_asm_for_x87" >&5 echo "${ECHO_T}$have_gcc_asm_for_x87" >&6; } - if test "$have_gcc_asm_for_x87" = yes - then + if test "$have_gcc_asm_for_x87" = yes + then cat >>confdefs.h <<\_ACEOF #define HAVE_GCC_ASM_FOR_X87 1 _ACEOF - fi - fi fi fi diff --git a/configure.in b/configure.in index 7f7dc7f0c87..618ff4eaaa9 100644 --- a/configure.in +++ b/configure.in @@ -3142,97 +3142,28 @@ then in ARM mixed-endian order (byte order 45670123)]) fi -# David Gay's code in Python/dtoa.c requires that the FPU uses 53-bit -# rounding; this is a particular problem on x86, where the x87 FPU has -# a default rounding precision of 64 bits. For gcc/x86, we try to fix -# this by: -# -# (1) using the SSE2 instruction set when available (it usually is -# on modern machines) -# (2) using inline assembler to get and set the x87 FPU control word -# otherwise. -# -# On AMD64 (aka x86-64), gcc automatically enables use of SSE2 -# instructions, so we don't bother trying to detect. - +# The short float repr introduced in Python 3.1 requires the +# correctly-rounded string <-> double conversion functions from +# Python/dtoa.c, which in turn require that the FPU uses 53-bit +# rounding; this is a problem on x86, where the x87 FPU has a default +# rounding precision of 64 bits. For gcc/x86, we try to fix this by +# using inline assembler to get and set the x87 FPU control word. if test "$GCC" = yes && test -n "`$CC -dM -E -