mirror of https://github.com/python/cpython
gh-120196: Reuse find_max_char() for bytes objects (#120497)
This commit is contained in:
parent
21866c8ed2
commit
945a89b48f
|
@ -92,57 +92,6 @@ _Py_bytes_isalnum(const char *cptr, Py_ssize_t len)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
PyDoc_STRVAR_shared(_Py_isascii__doc__,
|
|
||||||
"B.isascii() -> bool\n\
|
|
||||||
\n\
|
|
||||||
Return True if B is empty or all characters in B are ASCII,\n\
|
|
||||||
False otherwise.");
|
|
||||||
|
|
||||||
// Optimization is copied from ascii_decode in unicodeobject.c
|
|
||||||
/* Mask to quickly check whether a C 'size_t' contains a
|
|
||||||
non-ASCII, UTF8-encoded char. */
|
|
||||||
#if (SIZEOF_SIZE_T == 8)
|
|
||||||
# define ASCII_CHAR_MASK 0x8080808080808080ULL
|
|
||||||
#elif (SIZEOF_SIZE_T == 4)
|
|
||||||
# define ASCII_CHAR_MASK 0x80808080U
|
|
||||||
#else
|
|
||||||
# error C 'size_t' size should be either 4 or 8!
|
|
||||||
#endif
|
|
||||||
|
|
||||||
PyObject*
|
|
||||||
_Py_bytes_isascii(const char *cptr, Py_ssize_t len)
|
|
||||||
{
|
|
||||||
const char *p = cptr;
|
|
||||||
const char *end = p + len;
|
|
||||||
|
|
||||||
while (p < end) {
|
|
||||||
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
|
|
||||||
for an explanation. */
|
|
||||||
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
|
|
||||||
/* Help allocation */
|
|
||||||
const char *_p = p;
|
|
||||||
while (_p + SIZEOF_SIZE_T <= end) {
|
|
||||||
size_t value = *(const size_t *) _p;
|
|
||||||
if (value & ASCII_CHAR_MASK) {
|
|
||||||
Py_RETURN_FALSE;
|
|
||||||
}
|
|
||||||
_p += SIZEOF_SIZE_T;
|
|
||||||
}
|
|
||||||
p = _p;
|
|
||||||
if (_p == end)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if ((unsigned char)*p & 0x80) {
|
|
||||||
Py_RETURN_FALSE;
|
|
||||||
}
|
|
||||||
p++;
|
|
||||||
}
|
|
||||||
Py_RETURN_TRUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef ASCII_CHAR_MASK
|
|
||||||
|
|
||||||
|
|
||||||
PyDoc_STRVAR_shared(_Py_isdigit__doc__,
|
PyDoc_STRVAR_shared(_Py_isdigit__doc__,
|
||||||
"B.isdigit() -> bool\n\
|
"B.isdigit() -> bool\n\
|
||||||
\n\
|
\n\
|
||||||
|
@ -438,6 +387,7 @@ _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to)
|
||||||
#include "stringlib/fastsearch.h"
|
#include "stringlib/fastsearch.h"
|
||||||
#include "stringlib/count.h"
|
#include "stringlib/count.h"
|
||||||
#include "stringlib/find.h"
|
#include "stringlib/find.h"
|
||||||
|
#include "stringlib/find_max_char.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Wraps stringlib_parse_args_finds() and additionally checks the first
|
Wraps stringlib_parse_args_finds() and additionally checks the first
|
||||||
|
@ -765,3 +715,21 @@ _Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *subobj,
|
||||||
{
|
{
|
||||||
return _Py_bytes_tailmatch(str, len, "endswith", subobj, start, end, +1);
|
return _Py_bytes_tailmatch(str, len, "endswith", subobj, start, end, +1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyDoc_STRVAR_shared(_Py_isascii__doc__,
|
||||||
|
"B.isascii() -> bool\n\
|
||||||
|
\n\
|
||||||
|
Return True if B is empty or all characters in B are ASCII,\n\
|
||||||
|
False otherwise.");
|
||||||
|
|
||||||
|
PyObject*
|
||||||
|
_Py_bytes_isascii(const char *cptr, Py_ssize_t len)
|
||||||
|
{
|
||||||
|
const char *p = cptr;
|
||||||
|
const char *end = p + len;
|
||||||
|
Py_ssize_t max_char = stringlib_find_max_char(cptr, end);
|
||||||
|
if (max_char > 127) {
|
||||||
|
Py_RETURN_FALSE;
|
||||||
|
}
|
||||||
|
Py_RETURN_TRUE;
|
||||||
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
/* Finding the optimal width of unicode characters in a buffer */
|
/* Finding the optimal width of unicode characters in a buffer */
|
||||||
|
|
||||||
#if !STRINGLIB_IS_UNICODE
|
/* find_max_char for one-byte will work for bytes objects as well. */
|
||||||
|
#if !STRINGLIB_IS_UNICODE && STRINGLIB_SIZEOF_CHAR > 1
|
||||||
# error "find_max_char.h is specific to Unicode"
|
# error "find_max_char.h is specific to Unicode"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -20,19 +21,20 @@ Py_LOCAL_INLINE(Py_UCS4)
|
||||||
STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
|
STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
|
||||||
{
|
{
|
||||||
const unsigned char *p = (const unsigned char *) begin;
|
const unsigned char *p = (const unsigned char *) begin;
|
||||||
|
const unsigned char *_end = (const unsigned char *)end;
|
||||||
|
|
||||||
while (p < end) {
|
while (p < _end) {
|
||||||
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
|
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
|
||||||
/* Help register allocation */
|
/* Help register allocation */
|
||||||
const unsigned char *_p = p;
|
const unsigned char *_p = p;
|
||||||
while (_p + SIZEOF_SIZE_T <= end) {
|
while (_p + SIZEOF_SIZE_T <= _end) {
|
||||||
size_t value = *(const size_t *) _p;
|
size_t value = *(const size_t *) _p;
|
||||||
if (value & UCS1_ASCII_CHAR_MASK)
|
if (value & UCS1_ASCII_CHAR_MASK)
|
||||||
return 255;
|
return 255;
|
||||||
_p += SIZEOF_SIZE_T;
|
_p += SIZEOF_SIZE_T;
|
||||||
}
|
}
|
||||||
p = _p;
|
p = _p;
|
||||||
if (p == end)
|
if (p == _end)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (*p++ & 0x80)
|
if (*p++ & 0x80)
|
||||||
|
|
Loading…
Reference in New Issue