mirror of https://github.com/python/cpython
bpo-43179: Generalise alignment for optimised string routines (GH-24624)
* Remove m68k-specific hack from ascii_decode On m68k, alignments of primitives is more relaxed, with 4-byte and 8-byte types only requiring 2-byte alignment, thus using sizeof(size_t) does not work. Instead, use the portable alternative. Note that this is a minimal fix that only relaxes the assertion and the condition for when to use the optimised version remains overly strict. Such issues will be fixed tree-wide in the next commit. NB: In C11 we could use _Alignof(size_t) instead, but for compatibility we use autoconf. * Optimise string routines for architectures with non-natural alignment C only requires that sizeof(x) is a multiple of alignof(x), not that the two are equal. Thus anywhere where we optimise based on alignment we should be using alignof(x) not sizeof(x). This is more annoying than it would be in C11 where we could just use _Alignof(x) (and alignof(x) in C++11), but since we still require only C99 we must plumb the information all the way from autoconf through the various typedefs and defines.
This commit is contained in:
parent
cfa176685a
commit
dec0757549
|
@ -115,15 +115,14 @@ _Py_bytes_isascii(const char *cptr, Py_ssize_t len)
|
||||||
{
|
{
|
||||||
const char *p = cptr;
|
const char *p = cptr;
|
||||||
const char *end = p + len;
|
const char *end = p + len;
|
||||||
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
|
|
||||||
|
|
||||||
while (p < end) {
|
while (p < end) {
|
||||||
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
|
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
|
||||||
for an explanation. */
|
for an explanation. */
|
||||||
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
|
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
|
||||||
/* Help allocation */
|
/* Help allocation */
|
||||||
const char *_p = p;
|
const char *_p = p;
|
||||||
while (_p < aligned_end) {
|
while (_p + SIZEOF_SIZE_T <= end) {
|
||||||
size_t value = *(const size_t *) _p;
|
size_t value = *(const size_t *) _p;
|
||||||
if (value & ASCII_CHAR_MASK) {
|
if (value & ASCII_CHAR_MASK) {
|
||||||
Py_RETURN_FALSE;
|
Py_RETURN_FALSE;
|
||||||
|
|
|
@ -26,7 +26,6 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
|
||||||
{
|
{
|
||||||
Py_UCS4 ch;
|
Py_UCS4 ch;
|
||||||
const char *s = *inptr;
|
const char *s = *inptr;
|
||||||
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
|
|
||||||
STRINGLIB_CHAR *p = dest + *outpos;
|
STRINGLIB_CHAR *p = dest + *outpos;
|
||||||
|
|
||||||
while (s < end) {
|
while (s < end) {
|
||||||
|
@ -40,11 +39,11 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
|
||||||
First, check if we can do an aligned read, as most CPUs have
|
First, check if we can do an aligned read, as most CPUs have
|
||||||
a penalty for unaligned reads.
|
a penalty for unaligned reads.
|
||||||
*/
|
*/
|
||||||
if (_Py_IS_ALIGNED(s, SIZEOF_SIZE_T)) {
|
if (_Py_IS_ALIGNED(s, ALIGNOF_SIZE_T)) {
|
||||||
/* Help register allocation */
|
/* Help register allocation */
|
||||||
const char *_s = s;
|
const char *_s = s;
|
||||||
STRINGLIB_CHAR *_p = p;
|
STRINGLIB_CHAR *_p = p;
|
||||||
while (_s < aligned_end) {
|
while (_s + SIZEOF_SIZE_T <= end) {
|
||||||
/* Read a whole size_t at a time (either 4 or 8 bytes),
|
/* Read a whole size_t at a time (either 4 or 8 bytes),
|
||||||
and do a fast unrolled copy if it only contains ASCII
|
and do a fast unrolled copy if it only contains ASCII
|
||||||
characters. */
|
characters. */
|
||||||
|
@ -496,8 +495,6 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
|
||||||
int native_ordering)
|
int native_ordering)
|
||||||
{
|
{
|
||||||
Py_UCS4 ch;
|
Py_UCS4 ch;
|
||||||
const unsigned char *aligned_end =
|
|
||||||
(const unsigned char *) _Py_ALIGN_DOWN(e, SIZEOF_LONG);
|
|
||||||
const unsigned char *q = *inptr;
|
const unsigned char *q = *inptr;
|
||||||
STRINGLIB_CHAR *p = dest + *outpos;
|
STRINGLIB_CHAR *p = dest + *outpos;
|
||||||
/* Offsets from q for retrieving byte pairs in the right order. */
|
/* Offsets from q for retrieving byte pairs in the right order. */
|
||||||
|
@ -512,10 +509,10 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
|
||||||
Py_UCS4 ch2;
|
Py_UCS4 ch2;
|
||||||
/* First check for possible aligned read of a C 'long'. Unaligned
|
/* First check for possible aligned read of a C 'long'. Unaligned
|
||||||
reads are more expensive, better to defer to another iteration. */
|
reads are more expensive, better to defer to another iteration. */
|
||||||
if (_Py_IS_ALIGNED(q, SIZEOF_LONG)) {
|
if (_Py_IS_ALIGNED(q, ALIGNOF_LONG)) {
|
||||||
/* Fast path for runs of in-range non-surrogate chars. */
|
/* Fast path for runs of in-range non-surrogate chars. */
|
||||||
const unsigned char *_q = q;
|
const unsigned char *_q = q;
|
||||||
while (_q < aligned_end) {
|
while (_q + SIZEOF_LONG <= e) {
|
||||||
unsigned long block = * (const unsigned long *) _q;
|
unsigned long block = * (const unsigned long *) _q;
|
||||||
if (native_ordering) {
|
if (native_ordering) {
|
||||||
/* Can use buffer directly */
|
/* Can use buffer directly */
|
||||||
|
|
|
@ -20,14 +20,12 @@ Py_LOCAL_INLINE(Py_UCS4)
|
||||||
STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
|
STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
|
||||||
{
|
{
|
||||||
const unsigned char *p = (const unsigned char *) begin;
|
const unsigned char *p = (const unsigned char *) begin;
|
||||||
const unsigned char *aligned_end =
|
|
||||||
(const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
|
|
||||||
|
|
||||||
while (p < end) {
|
while (p < end) {
|
||||||
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
|
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
|
||||||
/* Help register allocation */
|
/* Help register allocation */
|
||||||
const unsigned char *_p = p;
|
const unsigned char *_p = p;
|
||||||
while (_p < aligned_end) {
|
while (_p + SIZEOF_SIZE_T <= end) {
|
||||||
size_t value = *(const size_t *) _p;
|
size_t value = *(const size_t *) _p;
|
||||||
if (value & UCS1_ASCII_CHAR_MASK)
|
if (value & UCS1_ASCII_CHAR_MASK)
|
||||||
return 255;
|
return 255;
|
||||||
|
|
|
@ -5070,25 +5070,16 @@ static Py_ssize_t
|
||||||
ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
|
ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
|
||||||
{
|
{
|
||||||
const char *p = start;
|
const char *p = start;
|
||||||
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Issue #17237: m68k is a bit different from most architectures in
|
|
||||||
* that objects do not use "natural alignment" - for example, int and
|
|
||||||
* long are only aligned at 2-byte boundaries. Therefore the assert()
|
|
||||||
* won't work; also, tests have shown that skipping the "optimised
|
|
||||||
* version" will even speed up m68k.
|
|
||||||
*/
|
|
||||||
#if !defined(__m68k__)
|
|
||||||
#if SIZEOF_SIZE_T <= SIZEOF_VOID_P
|
#if SIZEOF_SIZE_T <= SIZEOF_VOID_P
|
||||||
assert(_Py_IS_ALIGNED(dest, SIZEOF_SIZE_T));
|
assert(_Py_IS_ALIGNED(dest, ALIGNOF_SIZE_T));
|
||||||
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
|
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
|
||||||
/* Fast path, see in STRINGLIB(utf8_decode) for
|
/* Fast path, see in STRINGLIB(utf8_decode) for
|
||||||
an explanation. */
|
an explanation. */
|
||||||
/* Help allocation */
|
/* Help allocation */
|
||||||
const char *_p = p;
|
const char *_p = p;
|
||||||
Py_UCS1 * q = dest;
|
Py_UCS1 * q = dest;
|
||||||
while (_p < aligned_end) {
|
while (_p + SIZEOF_SIZE_T <= end) {
|
||||||
size_t value = *(const size_t *) _p;
|
size_t value = *(const size_t *) _p;
|
||||||
if (value & ASCII_CHAR_MASK)
|
if (value & ASCII_CHAR_MASK)
|
||||||
break;
|
break;
|
||||||
|
@ -5104,15 +5095,14 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
|
||||||
}
|
}
|
||||||
return p - start;
|
return p - start;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
while (p < end) {
|
while (p < end) {
|
||||||
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
|
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
|
||||||
for an explanation. */
|
for an explanation. */
|
||||||
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
|
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
|
||||||
/* Help allocation */
|
/* Help allocation */
|
||||||
const char *_p = p;
|
const char *_p = p;
|
||||||
while (_p < aligned_end) {
|
while (_p + SIZEOF_SIZE_T <= end) {
|
||||||
size_t value = *(const size_t *) _p;
|
size_t value = *(const size_t *) _p;
|
||||||
if (value & ASCII_CHAR_MASK)
|
if (value & ASCII_CHAR_MASK)
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -289,6 +289,7 @@ Py_NO_ENABLE_SHARED to find out. Also support MS_NO_COREDLL for b/w compat */
|
||||||
# define SIZEOF_FPOS_T 8
|
# define SIZEOF_FPOS_T 8
|
||||||
# define SIZEOF_HKEY 8
|
# define SIZEOF_HKEY 8
|
||||||
# define SIZEOF_SIZE_T 8
|
# define SIZEOF_SIZE_T 8
|
||||||
|
# define ALIGNOF_SIZE_T 8
|
||||||
/* configure.ac defines HAVE_LARGEFILE_SUPPORT iff
|
/* configure.ac defines HAVE_LARGEFILE_SUPPORT iff
|
||||||
sizeof(off_t) > sizeof(long), and sizeof(long long) >= sizeof(off_t).
|
sizeof(off_t) > sizeof(long), and sizeof(long long) >= sizeof(off_t).
|
||||||
On Win64 the second condition is not true, but if fpos_t replaces off_t
|
On Win64 the second condition is not true, but if fpos_t replaces off_t
|
||||||
|
@ -303,6 +304,7 @@ Py_NO_ENABLE_SHARED to find out. Also support MS_NO_COREDLL for b/w compat */
|
||||||
# define SIZEOF_FPOS_T 8
|
# define SIZEOF_FPOS_T 8
|
||||||
# define SIZEOF_HKEY 4
|
# define SIZEOF_HKEY 4
|
||||||
# define SIZEOF_SIZE_T 4
|
# define SIZEOF_SIZE_T 4
|
||||||
|
# define ALIGNOF_SIZE_T 4
|
||||||
/* MS VS2005 changes time_t to a 64-bit type on all platforms */
|
/* MS VS2005 changes time_t to a 64-bit type on all platforms */
|
||||||
# if defined(_MSC_VER) && _MSC_VER >= 1400
|
# if defined(_MSC_VER) && _MSC_VER >= 1400
|
||||||
# define SIZEOF_TIME_T 8
|
# define SIZEOF_TIME_T 8
|
||||||
|
@ -321,6 +323,7 @@ Py_NO_ENABLE_SHARED to find out. Also support MS_NO_COREDLL for b/w compat */
|
||||||
#define SIZEOF_SHORT 2
|
#define SIZEOF_SHORT 2
|
||||||
#define SIZEOF_INT 4
|
#define SIZEOF_INT 4
|
||||||
#define SIZEOF_LONG 4
|
#define SIZEOF_LONG 4
|
||||||
|
#define ALIGNOF_LONG 4
|
||||||
#define SIZEOF_LONG_LONG 8
|
#define SIZEOF_LONG_LONG 8
|
||||||
#define SIZEOF_DOUBLE 8
|
#define SIZEOF_DOUBLE 8
|
||||||
#define SIZEOF_FLOAT 4
|
#define SIZEOF_FLOAT 4
|
||||||
|
|
|
@ -8644,7 +8644,7 @@ $as_echo "#define HAVE_GCC_UINT128_T 1" >>confdefs.h
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
# Sizes of various common basic types
|
# Sizes and alignments of various common basic types
|
||||||
# ANSI C requires sizeof(char) == 1, so no need to check it
|
# ANSI C requires sizeof(char) == 1, so no need to check it
|
||||||
# The cast to long int works around a bug in the HP C Compiler
|
# The cast to long int works around a bug in the HP C Compiler
|
||||||
# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
|
# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
|
||||||
|
@ -8712,6 +8712,41 @@ cat >>confdefs.h <<_ACEOF
|
||||||
_ACEOF
|
_ACEOF
|
||||||
|
|
||||||
|
|
||||||
|
# The cast to long int works around a bug in the HP C Compiler,
|
||||||
|
# see AC_CHECK_SIZEOF for more information.
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking alignment of long" >&5
|
||||||
|
$as_echo_n "checking alignment of long... " >&6; }
|
||||||
|
if ${ac_cv_alignof_long+:} false; then :
|
||||||
|
$as_echo_n "(cached) " >&6
|
||||||
|
else
|
||||||
|
if ac_fn_c_compute_int "$LINENO" "(long int) offsetof (ac__type_alignof_, y)" "ac_cv_alignof_long" "$ac_includes_default
|
||||||
|
#ifndef offsetof
|
||||||
|
# define offsetof(type, member) ((char *) &((type *) 0)->member - (char *) 0)
|
||||||
|
#endif
|
||||||
|
typedef struct { char x; long y; } ac__type_alignof_;"; then :
|
||||||
|
|
||||||
|
else
|
||||||
|
if test "$ac_cv_type_long" = yes; then
|
||||||
|
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
|
||||||
|
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
|
||||||
|
as_fn_error 77 "cannot compute alignment of long
|
||||||
|
See \`config.log' for more details" "$LINENO" 5; }
|
||||||
|
else
|
||||||
|
ac_cv_alignof_long=0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
fi
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_alignof_long" >&5
|
||||||
|
$as_echo "$ac_cv_alignof_long" >&6; }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
cat >>confdefs.h <<_ACEOF
|
||||||
|
#define ALIGNOF_LONG $ac_cv_alignof_long
|
||||||
|
_ACEOF
|
||||||
|
|
||||||
|
|
||||||
# The cast to long int works around a bug in the HP C Compiler
|
# The cast to long int works around a bug in the HP C Compiler
|
||||||
# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
|
# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
|
||||||
# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
|
# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
|
||||||
|
@ -8943,6 +8978,41 @@ cat >>confdefs.h <<_ACEOF
|
||||||
_ACEOF
|
_ACEOF
|
||||||
|
|
||||||
|
|
||||||
|
# The cast to long int works around a bug in the HP C Compiler,
|
||||||
|
# see AC_CHECK_SIZEOF for more information.
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking alignment of size_t" >&5
|
||||||
|
$as_echo_n "checking alignment of size_t... " >&6; }
|
||||||
|
if ${ac_cv_alignof_size_t+:} false; then :
|
||||||
|
$as_echo_n "(cached) " >&6
|
||||||
|
else
|
||||||
|
if ac_fn_c_compute_int "$LINENO" "(long int) offsetof (ac__type_alignof_, y)" "ac_cv_alignof_size_t" "$ac_includes_default
|
||||||
|
#ifndef offsetof
|
||||||
|
# define offsetof(type, member) ((char *) &((type *) 0)->member - (char *) 0)
|
||||||
|
#endif
|
||||||
|
typedef struct { char x; size_t y; } ac__type_alignof_;"; then :
|
||||||
|
|
||||||
|
else
|
||||||
|
if test "$ac_cv_type_size_t" = yes; then
|
||||||
|
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
|
||||||
|
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
|
||||||
|
as_fn_error 77 "cannot compute alignment of size_t
|
||||||
|
See \`config.log' for more details" "$LINENO" 5; }
|
||||||
|
else
|
||||||
|
ac_cv_alignof_size_t=0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
fi
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_alignof_size_t" >&5
|
||||||
|
$as_echo "$ac_cv_alignof_size_t" >&6; }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
cat >>confdefs.h <<_ACEOF
|
||||||
|
#define ALIGNOF_SIZE_T $ac_cv_alignof_size_t
|
||||||
|
_ACEOF
|
||||||
|
|
||||||
|
|
||||||
# The cast to long int works around a bug in the HP C Compiler
|
# The cast to long int works around a bug in the HP C Compiler
|
||||||
# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
|
# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
|
||||||
# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
|
# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
|
||||||
|
|
|
@ -2354,10 +2354,11 @@ AC_CHECK_TYPE(ssize_t,
|
||||||
AC_CHECK_TYPE(__uint128_t,
|
AC_CHECK_TYPE(__uint128_t,
|
||||||
AC_DEFINE(HAVE_GCC_UINT128_T, 1, [Define if your compiler provides __uint128_t]),,)
|
AC_DEFINE(HAVE_GCC_UINT128_T, 1, [Define if your compiler provides __uint128_t]),,)
|
||||||
|
|
||||||
# Sizes of various common basic types
|
# Sizes and alignments of various common basic types
|
||||||
# ANSI C requires sizeof(char) == 1, so no need to check it
|
# ANSI C requires sizeof(char) == 1, so no need to check it
|
||||||
AC_CHECK_SIZEOF(int, 4)
|
AC_CHECK_SIZEOF(int, 4)
|
||||||
AC_CHECK_SIZEOF(long, 4)
|
AC_CHECK_SIZEOF(long, 4)
|
||||||
|
AC_CHECK_ALIGNOF(long)
|
||||||
AC_CHECK_SIZEOF(long long, 8)
|
AC_CHECK_SIZEOF(long long, 8)
|
||||||
AC_CHECK_SIZEOF(void *, 4)
|
AC_CHECK_SIZEOF(void *, 4)
|
||||||
AC_CHECK_SIZEOF(short, 2)
|
AC_CHECK_SIZEOF(short, 2)
|
||||||
|
@ -2365,6 +2366,7 @@ AC_CHECK_SIZEOF(float, 4)
|
||||||
AC_CHECK_SIZEOF(double, 8)
|
AC_CHECK_SIZEOF(double, 8)
|
||||||
AC_CHECK_SIZEOF(fpos_t, 4)
|
AC_CHECK_SIZEOF(fpos_t, 4)
|
||||||
AC_CHECK_SIZEOF(size_t, 4)
|
AC_CHECK_SIZEOF(size_t, 4)
|
||||||
|
AC_CHECK_ALIGNOF(size_t)
|
||||||
AC_CHECK_SIZEOF(pid_t, 4)
|
AC_CHECK_SIZEOF(pid_t, 4)
|
||||||
AC_CHECK_SIZEOF(uintptr_t)
|
AC_CHECK_SIZEOF(uintptr_t)
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,12 @@
|
||||||
support for AIX C++ shared extension modules. */
|
support for AIX C++ shared extension modules. */
|
||||||
#undef AIX_GENUINE_CPLUSPLUS
|
#undef AIX_GENUINE_CPLUSPLUS
|
||||||
|
|
||||||
|
/* The normal alignment of `long', in bytes. */
|
||||||
|
#undef ALIGNOF_LONG
|
||||||
|
|
||||||
|
/* The normal alignment of `size_t', in bytes. */
|
||||||
|
#undef ALIGNOF_SIZE_T
|
||||||
|
|
||||||
/* Alternative SOABI used in debug build to load C extensions built in release
|
/* Alternative SOABI used in debug build to load C extensions built in release
|
||||||
mode */
|
mode */
|
||||||
#undef ALT_SOABI
|
#undef ALT_SOABI
|
||||||
|
|
Loading…
Reference in New Issue