bpo-40130: _PyUnicode_AsKind() should not be exported. (GH-19265)

Make it a static function, and pass known attributes
(kind, data, length) instead of the PyUnicode object.
This commit is contained in:
Serhiy Storchaka 2020-04-01 15:41:49 +03:00 committed by GitHub
parent 3ef4a7e5a7
commit 17b4733f2f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 46 additions and 55 deletions

View File

@ -726,12 +726,6 @@ PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
Py_ssize_t start, Py_ssize_t start,
Py_ssize_t end); Py_ssize_t end);
/* --- wchar_t support for platforms which support it --------------------- */
#ifdef HAVE_WCHAR_H
PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
#endif
/* --- Manage the default encoding ---------------------------------------- */ /* --- Manage the default encoding ---------------------------------------- */
/* Returns a pointer to the default encoding (UTF-8) of the /* Returns a pointer to the default encoding (UTF-8) of the

View File

@ -2043,9 +2043,9 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
void *data = PyUnicode_DATA(unicode); void *data = PyUnicode_DATA(unicode);
const char *end = str + len; const char *end = str + len;
assert(index + len <= PyUnicode_GET_LENGTH(unicode));
switch (kind) { switch (kind) {
case PyUnicode_1BYTE_KIND: { case PyUnicode_1BYTE_KIND: {
assert(index + len <= PyUnicode_GET_LENGTH(unicode));
#ifdef Py_DEBUG #ifdef Py_DEBUG
if (PyUnicode_IS_ASCII(unicode)) { if (PyUnicode_IS_ASCII(unicode)) {
Py_UCS4 maxchar = ucs1lib_find_max_char( Py_UCS4 maxchar = ucs1lib_find_max_char(
@ -2060,7 +2060,6 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
case PyUnicode_2BYTE_KIND: { case PyUnicode_2BYTE_KIND: {
Py_UCS2 *start = (Py_UCS2 *)data + index; Py_UCS2 *start = (Py_UCS2 *)data + index;
Py_UCS2 *ucs2 = start; Py_UCS2 *ucs2 = start;
assert(index <= PyUnicode_GET_LENGTH(unicode));
for (; str < end; ++ucs2, ++str) for (; str < end; ++ucs2, ++str)
*ucs2 = (Py_UCS2)*str; *ucs2 = (Py_UCS2)*str;
@ -2068,17 +2067,18 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
assert((ucs2 - start) <= PyUnicode_GET_LENGTH(unicode)); assert((ucs2 - start) <= PyUnicode_GET_LENGTH(unicode));
break; break;
} }
default: { case PyUnicode_4BYTE_KIND: {
Py_UCS4 *start = (Py_UCS4 *)data + index; Py_UCS4 *start = (Py_UCS4 *)data + index;
Py_UCS4 *ucs4 = start; Py_UCS4 *ucs4 = start;
assert(kind == PyUnicode_4BYTE_KIND);
assert(index <= PyUnicode_GET_LENGTH(unicode));
for (; str < end; ++ucs4, ++str) for (; str < end; ++ucs4, ++str)
*ucs4 = (Py_UCS4)*str; *ucs4 = (Py_UCS4)*str;
assert((ucs4 - start) <= PyUnicode_GET_LENGTH(unicode)); assert((ucs4 - start) <= PyUnicode_GET_LENGTH(unicode));
break;
} }
default:
Py_UNREACHABLE();
} }
} }
@ -2458,13 +2458,15 @@ unicode_adjust_maxchar(PyObject **p_unicode)
if (max_char >= 256) if (max_char >= 256)
return; return;
} }
else { else if (kind == PyUnicode_4BYTE_KIND) {
const Py_UCS4 *u = PyUnicode_4BYTE_DATA(unicode); const Py_UCS4 *u = PyUnicode_4BYTE_DATA(unicode);
assert(kind == PyUnicode_4BYTE_KIND);
max_char = ucs4lib_find_max_char(u, u + len); max_char = ucs4lib_find_max_char(u, u + len);
if (max_char >= 0x10000) if (max_char >= 0x10000)
return; return;
} }
else
Py_UNREACHABLE();
copy = PyUnicode_New(len, max_char); copy = PyUnicode_New(len, max_char);
if (copy != NULL) if (copy != NULL)
_PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, len); _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, len);
@ -2501,22 +2503,12 @@ _PyUnicode_Copy(PyObject *unicode)
/* Widen Unicode objects to larger buffers. Don't write terminating null /* Widen Unicode objects to larger buffers. Don't write terminating null
character. Return NULL on error. */ character. Return NULL on error. */
void* static void*
_PyUnicode_AsKind(PyObject *s, unsigned int kind) unicode_askind(unsigned int skind, void const *data, Py_ssize_t len, unsigned int kind)
{ {
Py_ssize_t len;
void *result; void *result;
unsigned int skind;
if (PyUnicode_READY(s) == -1) assert(skind < kind);
return NULL;
len = PyUnicode_GET_LENGTH(s);
skind = PyUnicode_KIND(s);
if (skind >= kind) {
PyErr_SetString(PyExc_SystemError, "invalid widening attempt");
return NULL;
}
switch (kind) { switch (kind) {
case PyUnicode_2BYTE_KIND: case PyUnicode_2BYTE_KIND:
result = PyMem_New(Py_UCS2, len); result = PyMem_New(Py_UCS2, len);
@ -2525,8 +2517,8 @@ _PyUnicode_AsKind(PyObject *s, unsigned int kind)
assert(skind == PyUnicode_1BYTE_KIND); assert(skind == PyUnicode_1BYTE_KIND);
_PyUnicode_CONVERT_BYTES( _PyUnicode_CONVERT_BYTES(
Py_UCS1, Py_UCS2, Py_UCS1, Py_UCS2,
PyUnicode_1BYTE_DATA(s), (const Py_UCS1 *)data,
PyUnicode_1BYTE_DATA(s) + len, ((const Py_UCS1 *)data) + len,
result); result);
return result; return result;
case PyUnicode_4BYTE_KIND: case PyUnicode_4BYTE_KIND:
@ -2536,24 +2528,23 @@ _PyUnicode_AsKind(PyObject *s, unsigned int kind)
if (skind == PyUnicode_2BYTE_KIND) { if (skind == PyUnicode_2BYTE_KIND) {
_PyUnicode_CONVERT_BYTES( _PyUnicode_CONVERT_BYTES(
Py_UCS2, Py_UCS4, Py_UCS2, Py_UCS4,
PyUnicode_2BYTE_DATA(s), (const Py_UCS2 *)data,
PyUnicode_2BYTE_DATA(s) + len, ((const Py_UCS2 *)data) + len,
result); result);
} }
else { else {
assert(skind == PyUnicode_1BYTE_KIND); assert(skind == PyUnicode_1BYTE_KIND);
_PyUnicode_CONVERT_BYTES( _PyUnicode_CONVERT_BYTES(
Py_UCS1, Py_UCS4, Py_UCS1, Py_UCS4,
PyUnicode_1BYTE_DATA(s), (const Py_UCS1 *)data,
PyUnicode_1BYTE_DATA(s) + len, ((const Py_UCS1 *)data) + len,
result); result);
} }
return result; return result;
default: default:
break; Py_UNREACHABLE();
return NULL;
} }
PyErr_SetString(PyExc_SystemError, "invalid kind");
return NULL;
} }
static Py_UCS4* static Py_UCS4*
@ -9420,7 +9411,7 @@ any_find_slice(PyObject* s1, PyObject* s2,
} }
if (kind2 != kind1) { if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(s2, kind1); buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2) if (!buf2)
return -2; return -2;
} }
@ -9642,7 +9633,7 @@ PyUnicode_Count(PyObject *str,
buf1 = PyUnicode_DATA(str); buf1 = PyUnicode_DATA(str);
buf2 = PyUnicode_DATA(substr); buf2 = PyUnicode_DATA(substr);
if (kind2 != kind1) { if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(substr, kind1); buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2) if (!buf2)
goto onError; goto onError;
} }
@ -10415,7 +10406,7 @@ split(PyObject *self,
buf1 = PyUnicode_DATA(self); buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring); buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) { if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(substring, kind1); buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2) if (!buf2)
return NULL; return NULL;
} }
@ -10506,7 +10497,7 @@ rsplit(PyObject *self,
buf1 = PyUnicode_DATA(self); buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring); buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) { if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(substring, kind1); buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2) if (!buf2)
return NULL; return NULL;
} }
@ -10665,7 +10656,7 @@ replace(PyObject *self, PyObject *str1,
if (kind1 < rkind) { if (kind1 < rkind) {
/* widen substring */ /* widen substring */
buf1 = _PyUnicode_AsKind(str1, rkind); buf1 = unicode_askind(kind1, buf1, len1, rkind);
if (!buf1) goto error; if (!buf1) goto error;
release1 = 1; release1 = 1;
} }
@ -10674,19 +10665,22 @@ replace(PyObject *self, PyObject *str1,
goto nothing; goto nothing;
if (rkind > kind2) { if (rkind > kind2) {
/* widen replacement */ /* widen replacement */
buf2 = _PyUnicode_AsKind(str2, rkind); buf2 = unicode_askind(kind2, buf2, len2, rkind);
if (!buf2) goto error; if (!buf2) goto error;
release2 = 1; release2 = 1;
} }
else if (rkind < kind2) { else if (rkind < kind2) {
/* widen self and buf1 */ /* widen self and buf1 */
rkind = kind2; rkind = kind2;
if (release1) PyMem_Free(buf1); if (release1) {
release1 = 0; PyMem_Free(buf1);
sbuf = _PyUnicode_AsKind(self, rkind); buf1 = PyUnicode_DATA(str1);
release1 = 0;
}
sbuf = unicode_askind(skind, sbuf, slen, rkind);
if (!sbuf) goto error; if (!sbuf) goto error;
srelease = 1; srelease = 1;
buf1 = _PyUnicode_AsKind(str1, rkind); buf1 = unicode_askind(kind1, buf1, len1, rkind);
if (!buf1) goto error; if (!buf1) goto error;
release1 = 1; release1 = 1;
} }
@ -10724,7 +10718,7 @@ replace(PyObject *self, PyObject *str1,
if (kind1 < rkind) { if (kind1 < rkind) {
/* widen substring */ /* widen substring */
buf1 = _PyUnicode_AsKind(str1, rkind); buf1 = unicode_askind(kind1, buf1, len1, rkind);
if (!buf1) goto error; if (!buf1) goto error;
release1 = 1; release1 = 1;
} }
@ -10733,19 +10727,22 @@ replace(PyObject *self, PyObject *str1,
goto nothing; goto nothing;
if (kind2 < rkind) { if (kind2 < rkind) {
/* widen replacement */ /* widen replacement */
buf2 = _PyUnicode_AsKind(str2, rkind); buf2 = unicode_askind(kind2, buf2, len2, rkind);
if (!buf2) goto error; if (!buf2) goto error;
release2 = 1; release2 = 1;
} }
else if (kind2 > rkind) { else if (kind2 > rkind) {
/* widen self and buf1 */ /* widen self and buf1 */
rkind = kind2; rkind = kind2;
sbuf = _PyUnicode_AsKind(self, rkind); sbuf = unicode_askind(skind, sbuf, slen, rkind);
if (!sbuf) goto error; if (!sbuf) goto error;
srelease = 1; srelease = 1;
if (release1) PyMem_Free(buf1); if (release1) {
release1 = 0; PyMem_Free(buf1);
buf1 = _PyUnicode_AsKind(str1, rkind); buf1 = PyUnicode_DATA(str1);
release1 = 0;
}
buf1 = unicode_askind(kind1, buf1, len1, rkind);
if (!buf1) goto error; if (!buf1) goto error;
release1 = 1; release1 = 1;
} }
@ -11361,7 +11358,7 @@ PyUnicode_Contains(PyObject *str, PyObject *substr)
return result; return result;
} }
if (kind2 != kind1) { if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(substr, kind1); buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2) if (!buf2)
return -1; return -1;
} }
@ -11578,7 +11575,7 @@ unicode_count(PyObject *self, PyObject *args)
buf1 = PyUnicode_DATA(self); buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring); buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) { if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(substring, kind1); buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2) if (!buf2)
return NULL; return NULL;
} }
@ -13081,7 +13078,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
buf1 = PyUnicode_DATA(str_obj); buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sep_obj); buf2 = PyUnicode_DATA(sep_obj);
if (kind2 != kind1) { if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(sep_obj, kind1); buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2) if (!buf2)
return NULL; return NULL;
} }
@ -13138,7 +13135,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
buf1 = PyUnicode_DATA(str_obj); buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sep_obj); buf2 = PyUnicode_DATA(sep_obj);
if (kind2 != kind1) { if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(sep_obj, kind1); buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2) if (!buf2)
return NULL; return NULL;
} }