bpo-38252: Use 8-byte step to detect ASCII sequence in 64bit Windows build (GH-16334)

This commit is contained in:
Ma Lin 2020-10-18 22:48:38 +08:00 committed by GitHub
parent 3635388f52
commit a0c603cb9d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 53 additions and 52 deletions

View File

@ -0,0 +1 @@
Use 8-byte step to detect ASCII sequence in 64-bit Windows build.

View File

@ -100,14 +100,14 @@ Return True if B is empty or all characters in B are ASCII,\n\
False otherwise."); False otherwise.");
// Optimization is copied from ascii_decode in unicodeobject.c // Optimization is copied from ascii_decode in unicodeobject.c
/* Mask to quickly check whether a C 'long' contains a /* Mask to quickly check whether a C 'size_t' contains a
non-ASCII, UTF8-encoded char. */ non-ASCII, UTF8-encoded char. */
#if (SIZEOF_LONG == 8) #if (SIZEOF_SIZE_T == 8)
# define ASCII_CHAR_MASK 0x8080808080808080UL # define ASCII_CHAR_MASK 0x8080808080808080ULL
#elif (SIZEOF_LONG == 4) #elif (SIZEOF_SIZE_T == 4)
# define ASCII_CHAR_MASK 0x80808080UL # define ASCII_CHAR_MASK 0x80808080U
#else #else
# error C 'long' size should be either 4 or 8! # error C 'size_t' size should be either 4 or 8!
#endif #endif
PyObject* PyObject*
@ -115,20 +115,20 @@ _Py_bytes_isascii(const char *cptr, Py_ssize_t len)
{ {
const char *p = cptr; const char *p = cptr;
const char *end = p + len; const char *end = p + len;
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG); const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
while (p < end) { while (p < end) {
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
for an explanation. */ for an explanation. */
if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
/* Help allocation */ /* Help allocation */
const char *_p = p; const char *_p = p;
while (_p < aligned_end) { while (_p < aligned_end) {
unsigned long value = *(const unsigned long *) _p; size_t value = *(const size_t *) _p;
if (value & ASCII_CHAR_MASK) { if (value & ASCII_CHAR_MASK) {
Py_RETURN_FALSE; Py_RETURN_FALSE;
} }
_p += SIZEOF_LONG; _p += SIZEOF_SIZE_T;
} }
p = _p; p = _p;
if (_p == end) if (_p == end)

View File

@ -6,14 +6,14 @@
#include "pycore_bitutils.h" // _Py_bswap32() #include "pycore_bitutils.h" // _Py_bswap32()
/* Mask to quickly check whether a C 'long' contains a /* Mask to quickly check whether a C 'size_t' contains a
non-ASCII, UTF8-encoded char. */ non-ASCII, UTF8-encoded char. */
#if (SIZEOF_LONG == 8) #if (SIZEOF_SIZE_T == 8)
# define ASCII_CHAR_MASK 0x8080808080808080UL # define ASCII_CHAR_MASK 0x8080808080808080ULL
#elif (SIZEOF_LONG == 4) #elif (SIZEOF_SIZE_T == 4)
# define ASCII_CHAR_MASK 0x80808080UL # define ASCII_CHAR_MASK 0x80808080U
#else #else
# error C 'long' size should be either 4 or 8! # error C 'size_t' size should be either 4 or 8!
#endif #endif
/* 10xxxxxx */ /* 10xxxxxx */
@ -26,7 +26,7 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
{ {
Py_UCS4 ch; Py_UCS4 ch;
const char *s = *inptr; const char *s = *inptr;
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG); const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
STRINGLIB_CHAR *p = dest + *outpos; STRINGLIB_CHAR *p = dest + *outpos;
while (s < end) { while (s < end) {
@ -36,19 +36,19 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
/* Fast path for runs of ASCII characters. Given that common UTF-8 /* Fast path for runs of ASCII characters. Given that common UTF-8
input will consist of an overwhelming majority of ASCII input will consist of an overwhelming majority of ASCII
characters, we try to optimize for this case by checking characters, we try to optimize for this case by checking
as many characters as a C 'long' can contain. as many characters as a C 'size_t' can contain.
First, check if we can do an aligned read, as most CPUs have First, check if we can do an aligned read, as most CPUs have
a penalty for unaligned reads. a penalty for unaligned reads.
*/ */
if (_Py_IS_ALIGNED(s, SIZEOF_LONG)) { if (_Py_IS_ALIGNED(s, SIZEOF_SIZE_T)) {
/* Help register allocation */ /* Help register allocation */
const char *_s = s; const char *_s = s;
STRINGLIB_CHAR *_p = p; STRINGLIB_CHAR *_p = p;
while (_s < aligned_end) { while (_s < aligned_end) {
/* Read a whole long at a time (either 4 or 8 bytes), /* Read a whole size_t at a time (either 4 or 8 bytes),
and do a fast unrolled copy if it only contains ASCII and do a fast unrolled copy if it only contains ASCII
characters. */ characters. */
unsigned long value = *(const unsigned long *) _s; size_t value = *(const size_t *) _s;
if (value & ASCII_CHAR_MASK) if (value & ASCII_CHAR_MASK)
break; break;
#if PY_LITTLE_ENDIAN #if PY_LITTLE_ENDIAN
@ -56,14 +56,14 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
_p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu); _p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
_p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu); _p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
_p[3] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu); _p[3] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);
# if SIZEOF_LONG == 8 # if SIZEOF_SIZE_T == 8
_p[4] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu); _p[4] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);
_p[5] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu); _p[5] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);
_p[6] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu); _p[6] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);
_p[7] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu); _p[7] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);
# endif # endif
#else #else
# if SIZEOF_LONG == 8 # if SIZEOF_SIZE_T == 8
_p[0] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu); _p[0] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);
_p[1] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu); _p[1] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);
_p[2] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu); _p[2] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);
@ -79,8 +79,8 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
_p[3] = (STRINGLIB_CHAR)(value & 0xFFu); _p[3] = (STRINGLIB_CHAR)(value & 0xFFu);
# endif # endif
#endif #endif
_s += SIZEOF_LONG; _s += SIZEOF_SIZE_T;
_p += SIZEOF_LONG; _p += SIZEOF_SIZE_T;
} }
s = _s; s = _s;
p = _p; p = _p;

View File

@ -4,14 +4,14 @@
# error "find_max_char.h is specific to Unicode" # error "find_max_char.h is specific to Unicode"
#endif #endif
/* Mask to quickly check whether a C 'long' contains a /* Mask to quickly check whether a C 'size_t' contains a
non-ASCII, UTF8-encoded char. */ non-ASCII, UTF8-encoded char. */
#if (SIZEOF_LONG == 8) #if (SIZEOF_SIZE_T == 8)
# define UCS1_ASCII_CHAR_MASK 0x8080808080808080UL # define UCS1_ASCII_CHAR_MASK 0x8080808080808080ULL
#elif (SIZEOF_LONG == 4) #elif (SIZEOF_SIZE_T == 4)
# define UCS1_ASCII_CHAR_MASK 0x80808080UL # define UCS1_ASCII_CHAR_MASK 0x80808080U
#else #else
# error C 'long' size should be either 4 or 8! # error C 'size_t' size should be either 4 or 8!
#endif #endif
#if STRINGLIB_SIZEOF_CHAR == 1 #if STRINGLIB_SIZEOF_CHAR == 1
@ -21,17 +21,17 @@ STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
{ {
const unsigned char *p = (const unsigned char *) begin; const unsigned char *p = (const unsigned char *) begin;
const unsigned char *aligned_end = const unsigned char *aligned_end =
(const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG); (const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
while (p < end) { while (p < end) {
if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
/* Help register allocation */ /* Help register allocation */
const unsigned char *_p = p; const unsigned char *_p = p;
while (_p < aligned_end) { while (_p < aligned_end) {
unsigned long value = *(const unsigned long *) _p; size_t value = *(const size_t *) _p;
if (value & UCS1_ASCII_CHAR_MASK) if (value & UCS1_ASCII_CHAR_MASK)
return 255; return 255;
_p += SIZEOF_LONG; _p += SIZEOF_SIZE_T;
} }
p = _p; p = _p;
if (p == end) if (p == end)

View File

@ -5025,21 +5025,21 @@ PyUnicode_DecodeUTF8(const char *s,
#include "stringlib/codecs.h" #include "stringlib/codecs.h"
#include "stringlib/undef.h" #include "stringlib/undef.h"
/* Mask to quickly check whether a C 'long' contains a /* Mask to quickly check whether a C 'size_t' contains a
non-ASCII, UTF8-encoded char. */ non-ASCII, UTF8-encoded char. */
#if (SIZEOF_LONG == 8) #if (SIZEOF_SIZE_T == 8)
# define ASCII_CHAR_MASK 0x8080808080808080UL # define ASCII_CHAR_MASK 0x8080808080808080ULL
#elif (SIZEOF_LONG == 4) #elif (SIZEOF_SIZE_T == 4)
# define ASCII_CHAR_MASK 0x80808080UL # define ASCII_CHAR_MASK 0x80808080U
#else #else
# error C 'long' size should be either 4 or 8! # error C 'size_t' size should be either 4 or 8!
#endif #endif
static Py_ssize_t static Py_ssize_t
ascii_decode(const char *start, const char *end, Py_UCS1 *dest) ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
{ {
const char *p = start; const char *p = start;
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG); const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
/* /*
* Issue #17237: m68k is a bit different from most architectures in * Issue #17237: m68k is a bit different from most architectures in
@ -5049,21 +5049,21 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
* version" will even speed up m68k. * version" will even speed up m68k.
*/ */
#if !defined(__m68k__) #if !defined(__m68k__)
#if SIZEOF_LONG <= SIZEOF_VOID_P #if SIZEOF_SIZE_T <= SIZEOF_VOID_P
assert(_Py_IS_ALIGNED(dest, SIZEOF_LONG)); assert(_Py_IS_ALIGNED(dest, SIZEOF_SIZE_T));
if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
/* Fast path, see in STRINGLIB(utf8_decode) for /* Fast path, see in STRINGLIB(utf8_decode) for
an explanation. */ an explanation. */
/* Help allocation */ /* Help allocation */
const char *_p = p; const char *_p = p;
Py_UCS1 * q = dest; Py_UCS1 * q = dest;
while (_p < aligned_end) { while (_p < aligned_end) {
unsigned long value = *(const unsigned long *) _p; size_t value = *(const size_t *) _p;
if (value & ASCII_CHAR_MASK) if (value & ASCII_CHAR_MASK)
break; break;
*((unsigned long *)q) = value; *((size_t *)q) = value;
_p += SIZEOF_LONG; _p += SIZEOF_SIZE_T;
q += SIZEOF_LONG; q += SIZEOF_SIZE_T;
} }
p = _p; p = _p;
while (p < end) { while (p < end) {
@ -5078,14 +5078,14 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
while (p < end) { while (p < end) {
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
for an explanation. */ for an explanation. */
if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
/* Help allocation */ /* Help allocation */
const char *_p = p; const char *_p = p;
while (_p < aligned_end) { while (_p < aligned_end) {
unsigned long value = *(const unsigned long *) _p; size_t value = *(const size_t *) _p;
if (value & ASCII_CHAR_MASK) if (value & ASCII_CHAR_MASK)
break; break;
_p += SIZEOF_LONG; _p += SIZEOF_SIZE_T;
} }
p = _p; p = _p;
if (_p == end) if (_p == end)