bpo-38252: Use 8-byte step to detect ASCII sequence in 64bit Windows build (GH-16334)
This commit is contained in:
parent
3635388f52
commit
a0c603cb9d
|
@ -0,0 +1 @@
|
|||
Use 8-byte step to detect ASCII sequence in 64-bit Windows build.
|
|
@ -100,14 +100,14 @@ Return True if B is empty or all characters in B are ASCII,\n\
|
|||
False otherwise.");
|
||||
|
||||
// Optimization is copied from ascii_decode in unicodeobject.c
|
||||
/* Mask to quickly check whether a C 'long' contains a
|
||||
/* Mask to quickly check whether a C 'size_t' contains a
|
||||
non-ASCII, UTF8-encoded char. */
|
||||
#if (SIZEOF_LONG == 8)
|
||||
# define ASCII_CHAR_MASK 0x8080808080808080UL
|
||||
#elif (SIZEOF_LONG == 4)
|
||||
# define ASCII_CHAR_MASK 0x80808080UL
|
||||
#if (SIZEOF_SIZE_T == 8)
|
||||
# define ASCII_CHAR_MASK 0x8080808080808080ULL
|
||||
#elif (SIZEOF_SIZE_T == 4)
|
||||
# define ASCII_CHAR_MASK 0x80808080U
|
||||
#else
|
||||
# error C 'long' size should be either 4 or 8!
|
||||
# error C 'size_t' size should be either 4 or 8!
|
||||
#endif
|
||||
|
||||
PyObject*
|
||||
|
@ -115,20 +115,20 @@ _Py_bytes_isascii(const char *cptr, Py_ssize_t len)
|
|||
{
|
||||
const char *p = cptr;
|
||||
const char *end = p + len;
|
||||
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
|
||||
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
|
||||
|
||||
while (p < end) {
|
||||
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
|
||||
for an explanation. */
|
||||
if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
|
||||
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
|
||||
/* Help allocation */
|
||||
const char *_p = p;
|
||||
while (_p < aligned_end) {
|
||||
unsigned long value = *(const unsigned long *) _p;
|
||||
size_t value = *(const size_t *) _p;
|
||||
if (value & ASCII_CHAR_MASK) {
|
||||
Py_RETURN_FALSE;
|
||||
}
|
||||
_p += SIZEOF_LONG;
|
||||
_p += SIZEOF_SIZE_T;
|
||||
}
|
||||
p = _p;
|
||||
if (_p == end)
|
||||
|
|
|
@ -6,14 +6,14 @@
|
|||
|
||||
#include "pycore_bitutils.h" // _Py_bswap32()
|
||||
|
||||
/* Mask to quickly check whether a C 'long' contains a
|
||||
/* Mask to quickly check whether a C 'size_t' contains a
|
||||
non-ASCII, UTF8-encoded char. */
|
||||
#if (SIZEOF_LONG == 8)
|
||||
# define ASCII_CHAR_MASK 0x8080808080808080UL
|
||||
#elif (SIZEOF_LONG == 4)
|
||||
# define ASCII_CHAR_MASK 0x80808080UL
|
||||
#if (SIZEOF_SIZE_T == 8)
|
||||
# define ASCII_CHAR_MASK 0x8080808080808080ULL
|
||||
#elif (SIZEOF_SIZE_T == 4)
|
||||
# define ASCII_CHAR_MASK 0x80808080U
|
||||
#else
|
||||
# error C 'long' size should be either 4 or 8!
|
||||
# error C 'size_t' size should be either 4 or 8!
|
||||
#endif
|
||||
|
||||
/* 10xxxxxx */
|
||||
|
@ -26,7 +26,7 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
|
|||
{
|
||||
Py_UCS4 ch;
|
||||
const char *s = *inptr;
|
||||
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
|
||||
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
|
||||
STRINGLIB_CHAR *p = dest + *outpos;
|
||||
|
||||
while (s < end) {
|
||||
|
@ -36,19 +36,19 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
|
|||
/* Fast path for runs of ASCII characters. Given that common UTF-8
|
||||
input will consist of an overwhelming majority of ASCII
|
||||
characters, we try to optimize for this case by checking
|
||||
as many characters as a C 'long' can contain.
|
||||
as many characters as a C 'size_t' can contain.
|
||||
First, check if we can do an aligned read, as most CPUs have
|
||||
a penalty for unaligned reads.
|
||||
*/
|
||||
if (_Py_IS_ALIGNED(s, SIZEOF_LONG)) {
|
||||
if (_Py_IS_ALIGNED(s, SIZEOF_SIZE_T)) {
|
||||
/* Help register allocation */
|
||||
const char *_s = s;
|
||||
STRINGLIB_CHAR *_p = p;
|
||||
while (_s < aligned_end) {
|
||||
/* Read a whole long at a time (either 4 or 8 bytes),
|
||||
/* Read a whole size_t at a time (either 4 or 8 bytes),
|
||||
and do a fast unrolled copy if it only contains ASCII
|
||||
characters. */
|
||||
unsigned long value = *(const unsigned long *) _s;
|
||||
size_t value = *(const size_t *) _s;
|
||||
if (value & ASCII_CHAR_MASK)
|
||||
break;
|
||||
#if PY_LITTLE_ENDIAN
|
||||
|
@ -56,14 +56,14 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
|
|||
_p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
|
||||
_p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
|
||||
_p[3] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);
|
||||
# if SIZEOF_LONG == 8
|
||||
# if SIZEOF_SIZE_T == 8
|
||||
_p[4] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);
|
||||
_p[5] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);
|
||||
_p[6] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);
|
||||
_p[7] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);
|
||||
# endif
|
||||
#else
|
||||
# if SIZEOF_LONG == 8
|
||||
# if SIZEOF_SIZE_T == 8
|
||||
_p[0] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);
|
||||
_p[1] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);
|
||||
_p[2] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);
|
||||
|
@ -79,8 +79,8 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
|
|||
_p[3] = (STRINGLIB_CHAR)(value & 0xFFu);
|
||||
# endif
|
||||
#endif
|
||||
_s += SIZEOF_LONG;
|
||||
_p += SIZEOF_LONG;
|
||||
_s += SIZEOF_SIZE_T;
|
||||
_p += SIZEOF_SIZE_T;
|
||||
}
|
||||
s = _s;
|
||||
p = _p;
|
||||
|
|
|
@ -4,14 +4,14 @@
|
|||
# error "find_max_char.h is specific to Unicode"
|
||||
#endif
|
||||
|
||||
/* Mask to quickly check whether a C 'long' contains a
|
||||
/* Mask to quickly check whether a C 'size_t' contains a
|
||||
non-ASCII, UTF8-encoded char. */
|
||||
#if (SIZEOF_LONG == 8)
|
||||
# define UCS1_ASCII_CHAR_MASK 0x8080808080808080UL
|
||||
#elif (SIZEOF_LONG == 4)
|
||||
# define UCS1_ASCII_CHAR_MASK 0x80808080UL
|
||||
#if (SIZEOF_SIZE_T == 8)
|
||||
# define UCS1_ASCII_CHAR_MASK 0x8080808080808080ULL
|
||||
#elif (SIZEOF_SIZE_T == 4)
|
||||
# define UCS1_ASCII_CHAR_MASK 0x80808080U
|
||||
#else
|
||||
# error C 'long' size should be either 4 or 8!
|
||||
# error C 'size_t' size should be either 4 or 8!
|
||||
#endif
|
||||
|
||||
#if STRINGLIB_SIZEOF_CHAR == 1
|
||||
|
@ -21,17 +21,17 @@ STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
|
|||
{
|
||||
const unsigned char *p = (const unsigned char *) begin;
|
||||
const unsigned char *aligned_end =
|
||||
(const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
|
||||
(const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
|
||||
|
||||
while (p < end) {
|
||||
if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
|
||||
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
|
||||
/* Help register allocation */
|
||||
const unsigned char *_p = p;
|
||||
while (_p < aligned_end) {
|
||||
unsigned long value = *(const unsigned long *) _p;
|
||||
size_t value = *(const size_t *) _p;
|
||||
if (value & UCS1_ASCII_CHAR_MASK)
|
||||
return 255;
|
||||
_p += SIZEOF_LONG;
|
||||
_p += SIZEOF_SIZE_T;
|
||||
}
|
||||
p = _p;
|
||||
if (p == end)
|
||||
|
|
|
@ -5025,21 +5025,21 @@ PyUnicode_DecodeUTF8(const char *s,
|
|||
#include "stringlib/codecs.h"
|
||||
#include "stringlib/undef.h"
|
||||
|
||||
/* Mask to quickly check whether a C 'long' contains a
|
||||
/* Mask to quickly check whether a C 'size_t' contains a
|
||||
non-ASCII, UTF8-encoded char. */
|
||||
#if (SIZEOF_LONG == 8)
|
||||
# define ASCII_CHAR_MASK 0x8080808080808080UL
|
||||
#elif (SIZEOF_LONG == 4)
|
||||
# define ASCII_CHAR_MASK 0x80808080UL
|
||||
#if (SIZEOF_SIZE_T == 8)
|
||||
# define ASCII_CHAR_MASK 0x8080808080808080ULL
|
||||
#elif (SIZEOF_SIZE_T == 4)
|
||||
# define ASCII_CHAR_MASK 0x80808080U
|
||||
#else
|
||||
# error C 'long' size should be either 4 or 8!
|
||||
# error C 'size_t' size should be either 4 or 8!
|
||||
#endif
|
||||
|
||||
static Py_ssize_t
|
||||
ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
|
||||
{
|
||||
const char *p = start;
|
||||
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
|
||||
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
|
||||
|
||||
/*
|
||||
* Issue #17237: m68k is a bit different from most architectures in
|
||||
|
@ -5049,21 +5049,21 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
|
|||
* version" will even speed up m68k.
|
||||
*/
|
||||
#if !defined(__m68k__)
|
||||
#if SIZEOF_LONG <= SIZEOF_VOID_P
|
||||
assert(_Py_IS_ALIGNED(dest, SIZEOF_LONG));
|
||||
if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
|
||||
#if SIZEOF_SIZE_T <= SIZEOF_VOID_P
|
||||
assert(_Py_IS_ALIGNED(dest, SIZEOF_SIZE_T));
|
||||
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
|
||||
/* Fast path, see in STRINGLIB(utf8_decode) for
|
||||
an explanation. */
|
||||
/* Help allocation */
|
||||
const char *_p = p;
|
||||
Py_UCS1 * q = dest;
|
||||
while (_p < aligned_end) {
|
||||
unsigned long value = *(const unsigned long *) _p;
|
||||
size_t value = *(const size_t *) _p;
|
||||
if (value & ASCII_CHAR_MASK)
|
||||
break;
|
||||
*((unsigned long *)q) = value;
|
||||
_p += SIZEOF_LONG;
|
||||
q += SIZEOF_LONG;
|
||||
*((size_t *)q) = value;
|
||||
_p += SIZEOF_SIZE_T;
|
||||
q += SIZEOF_SIZE_T;
|
||||
}
|
||||
p = _p;
|
||||
while (p < end) {
|
||||
|
@ -5078,14 +5078,14 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
|
|||
while (p < end) {
|
||||
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
|
||||
for an explanation. */
|
||||
if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
|
||||
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
|
||||
/* Help allocation */
|
||||
const char *_p = p;
|
||||
while (_p < aligned_end) {
|
||||
unsigned long value = *(const unsigned long *) _p;
|
||||
size_t value = *(const size_t *) _p;
|
||||
if (value & ASCII_CHAR_MASK)
|
||||
break;
|
||||
_p += SIZEOF_LONG;
|
||||
_p += SIZEOF_SIZE_T;
|
||||
}
|
||||
p = _p;
|
||||
if (_p == end)
|
||||
|
|
Loading…
Reference in New Issue