From cd777eaf53e438e2c3b7aab384f18d56b262bc0b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 8 Apr 2013 22:43:44 +0200 Subject: [PATCH] Issue #17615: Comparing two Unicode strings now uses wmemcmp() when possible wmemcmp() is twice faster than a dummy loop (342 usec vs 744 usec) on Fedora 18/x86_64, GCC 4.7.2. --- Objects/unicodeobject.c | 22 ++++++++++++++++++++++ PC/pyconfig.h | 3 +++ configure | 2 +- configure.ac | 2 +- pyconfig.h.in | 6 +++--- 5 files changed, 30 insertions(+), 5 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index d450b4df506..e9153c0de8c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10304,8 +10304,19 @@ unicode_compare(PyObject *str1, PyObject *str2) COMPARE(Py_UCS2, Py_UCS1); break; case PyUnicode_2BYTE_KIND: + { +#if defined(HAVE_WMEMCMP) && SIZEOF_WCHAR_T == 2 + int cmp = wmemcmp((wchar_t *)data1, (wchar_t *)data2, len); + /* normalize result of wmemcmp() into the range [-1; 1] */ + if (cmp < 0) + return -1; + if (cmp > 0) + return 1; +#else COMPARE(Py_UCS2, Py_UCS2); +#endif break; + } case PyUnicode_4BYTE_KIND: COMPARE(Py_UCS2, Py_UCS4); break; @@ -10324,8 +10335,19 @@ unicode_compare(PyObject *str1, PyObject *str2) COMPARE(Py_UCS4, Py_UCS2); break; case PyUnicode_4BYTE_KIND: + { +#if defined(HAVE_WMEMCMP) && SIZEOF_WCHAR_T == 4 + int cmp = wmemcmp((wchar_t *)data1, (wchar_t *)data2, len); + /* normalize result of wmemcmp() into the range [-1; 1] */ + if (cmp < 0) + return -1; + if (cmp > 0) + return 1; +#else COMPARE(Py_UCS4, Py_UCS4); +#endif break; + } default: assert(0); } diff --git a/PC/pyconfig.h b/PC/pyconfig.h index c5f16e580ff..1284db8610e 100644 --- a/PC/pyconfig.h +++ b/PC/pyconfig.h @@ -645,6 +645,9 @@ Py_NO_ENABLE_SHARED to find out. Also support MS_NO_COREDLL for b/w compat */ #define HAVE_WCSXFRM 1 #endif +/* Define to 1 if you have the `wmemcmp' function. */ +#define HAVE_WMEMCMP 1 + /* Define if the zlib library has inflateCopy */ #define HAVE_ZLIB_COPY 1 diff --git a/configure b/configure index 7d8a65526ac..53d38908af4 100755 --- a/configure +++ b/configure @@ -10273,7 +10273,7 @@ for ac_func in alarm accept4 setitimer getitimer bind_textdomain_codeset chown \ sigtimedwait sigwait sigwaitinfo snprintf strftime strlcpy symlinkat sync \ sysconf tcgetpgrp tcsetpgrp tempnam timegm times tmpfile tmpnam tmpnam_r \ truncate uname unlinkat unsetenv utimensat utimes waitid waitpid wait3 wait4 \ - wcscoll wcsftime wcsxfrm writev _getpty + wcscoll wcsftime wcsxfrm wmemcmp writev _getpty do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" diff --git a/configure.ac b/configure.ac index caef808348f..053e4a46b40 100644 --- a/configure.ac +++ b/configure.ac @@ -2816,7 +2816,7 @@ AC_CHECK_FUNCS(alarm accept4 setitimer getitimer bind_textdomain_codeset chown \ sigtimedwait sigwait sigwaitinfo snprintf strftime strlcpy symlinkat sync \ sysconf tcgetpgrp tcsetpgrp tempnam timegm times tmpfile tmpnam tmpnam_r \ truncate uname unlinkat unsetenv utimensat utimes waitid waitpid wait3 wait4 \ - wcscoll wcsftime wcsxfrm writev _getpty) + wcscoll wcsftime wcsxfrm wmemcmp writev _getpty) AC_CHECK_DECL(dirfd, AC_DEFINE(HAVE_DIRFD, 1, diff --git a/pyconfig.h.in b/pyconfig.h.in index 231146a666e..4f252dc100c 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -1118,6 +1118,9 @@ /* Define to 1 if you have the `wcsxfrm' function. */ #undef HAVE_WCSXFRM +/* Define to 1 if you have the `wmemcmp' function. */ +#undef HAVE_WMEMCMP + /* Define if tzset() actually switches the local timezone in a meaningful way. */ #undef HAVE_WORKING_TZSET @@ -1190,9 +1193,6 @@ /* Define if setpgrp() must be called as setpgrp(0, 0). */ #undef SETPGRP_HAVE_ARG -/* Define this to be extension of shared libraries (including the dot!). */ -#undef SHLIB_EXT - /* Define if i>>j for signed int i does not extend the sign bit when i < 0 */ #undef SIGNED_RIGHT_SHIFT_ZERO_FILLS