bpo-40130: _PyUnicode_AsKind() should not be exported. (GH-19265)

Make it a static function, and pass known attributes
(kind, data, length) instead of the PyUnicode object.
This commit is contained in:
Serhiy Storchaka 2020-04-01 15:41:49 +03:00 committed by GitHub
parent 3ef4a7e5a7
commit 17b4733f2f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 46 additions and 55 deletions

View File

@ -726,12 +726,6 @@ PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
Py_ssize_t start,
Py_ssize_t end);
/* --- wchar_t support for platforms which support it --------------------- */
#ifdef HAVE_WCHAR_H
PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
#endif
/* --- Manage the default encoding ---------------------------------------- */
/* Returns a pointer to the default encoding (UTF-8) of the

View File

@ -2043,9 +2043,9 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
void *data = PyUnicode_DATA(unicode);
const char *end = str + len;
assert(index + len <= PyUnicode_GET_LENGTH(unicode));
switch (kind) {
case PyUnicode_1BYTE_KIND: {
assert(index + len <= PyUnicode_GET_LENGTH(unicode));
#ifdef Py_DEBUG
if (PyUnicode_IS_ASCII(unicode)) {
Py_UCS4 maxchar = ucs1lib_find_max_char(
@ -2060,7 +2060,6 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
case PyUnicode_2BYTE_KIND: {
Py_UCS2 *start = (Py_UCS2 *)data + index;
Py_UCS2 *ucs2 = start;
assert(index <= PyUnicode_GET_LENGTH(unicode));
for (; str < end; ++ucs2, ++str)
*ucs2 = (Py_UCS2)*str;
@ -2068,17 +2067,18 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
assert((ucs2 - start) <= PyUnicode_GET_LENGTH(unicode));
break;
}
default: {
case PyUnicode_4BYTE_KIND: {
Py_UCS4 *start = (Py_UCS4 *)data + index;
Py_UCS4 *ucs4 = start;
assert(kind == PyUnicode_4BYTE_KIND);
assert(index <= PyUnicode_GET_LENGTH(unicode));
for (; str < end; ++ucs4, ++str)
*ucs4 = (Py_UCS4)*str;
assert((ucs4 - start) <= PyUnicode_GET_LENGTH(unicode));
break;
}
default:
Py_UNREACHABLE();
}
}
@ -2458,13 +2458,15 @@ unicode_adjust_maxchar(PyObject **p_unicode)
if (max_char >= 256)
return;
}
else {
else if (kind == PyUnicode_4BYTE_KIND) {
const Py_UCS4 *u = PyUnicode_4BYTE_DATA(unicode);
assert(kind == PyUnicode_4BYTE_KIND);
max_char = ucs4lib_find_max_char(u, u + len);
if (max_char >= 0x10000)
return;
}
else
Py_UNREACHABLE();
copy = PyUnicode_New(len, max_char);
if (copy != NULL)
_PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, len);
@ -2501,22 +2503,12 @@ _PyUnicode_Copy(PyObject *unicode)
/* Widen Unicode objects to larger buffers. Don't write terminating null
character. Return NULL on error. */
void*
_PyUnicode_AsKind(PyObject *s, unsigned int kind)
static void*
unicode_askind(unsigned int skind, void const *data, Py_ssize_t len, unsigned int kind)
{
Py_ssize_t len;
void *result;
unsigned int skind;
if (PyUnicode_READY(s) == -1)
return NULL;
len = PyUnicode_GET_LENGTH(s);
skind = PyUnicode_KIND(s);
if (skind >= kind) {
PyErr_SetString(PyExc_SystemError, "invalid widening attempt");
return NULL;
}
assert(skind < kind);
switch (kind) {
case PyUnicode_2BYTE_KIND:
result = PyMem_New(Py_UCS2, len);
@ -2525,8 +2517,8 @@ _PyUnicode_AsKind(PyObject *s, unsigned int kind)
assert(skind == PyUnicode_1BYTE_KIND);
_PyUnicode_CONVERT_BYTES(
Py_UCS1, Py_UCS2,
PyUnicode_1BYTE_DATA(s),
PyUnicode_1BYTE_DATA(s) + len,
(const Py_UCS1 *)data,
((const Py_UCS1 *)data) + len,
result);
return result;
case PyUnicode_4BYTE_KIND:
@ -2536,25 +2528,24 @@ _PyUnicode_AsKind(PyObject *s, unsigned int kind)
if (skind == PyUnicode_2BYTE_KIND) {
_PyUnicode_CONVERT_BYTES(
Py_UCS2, Py_UCS4,
PyUnicode_2BYTE_DATA(s),
PyUnicode_2BYTE_DATA(s) + len,
(const Py_UCS2 *)data,
((const Py_UCS2 *)data) + len,
result);
}
else {
assert(skind == PyUnicode_1BYTE_KIND);
_PyUnicode_CONVERT_BYTES(
Py_UCS1, Py_UCS4,
PyUnicode_1BYTE_DATA(s),
PyUnicode_1BYTE_DATA(s) + len,
(const Py_UCS1 *)data,
((const Py_UCS1 *)data) + len,
result);
}
return result;
default:
break;
}
PyErr_SetString(PyExc_SystemError, "invalid kind");
Py_UNREACHABLE();
return NULL;
}
}
static Py_UCS4*
as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize,
@ -9420,7 +9411,7 @@ any_find_slice(PyObject* s1, PyObject* s2,
}
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(s2, kind1);
buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return -2;
}
@ -9642,7 +9633,7 @@ PyUnicode_Count(PyObject *str,
buf1 = PyUnicode_DATA(str);
buf2 = PyUnicode_DATA(substr);
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(substr, kind1);
buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
goto onError;
}
@ -10415,7 +10406,7 @@ split(PyObject *self,
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(substring, kind1);
buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
@ -10506,7 +10497,7 @@ rsplit(PyObject *self,
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(substring, kind1);
buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
@ -10665,7 +10656,7 @@ replace(PyObject *self, PyObject *str1,
if (kind1 < rkind) {
/* widen substring */
buf1 = _PyUnicode_AsKind(str1, rkind);
buf1 = unicode_askind(kind1, buf1, len1, rkind);
if (!buf1) goto error;
release1 = 1;
}
@ -10674,19 +10665,22 @@ replace(PyObject *self, PyObject *str1,
goto nothing;
if (rkind > kind2) {
/* widen replacement */
buf2 = _PyUnicode_AsKind(str2, rkind);
buf2 = unicode_askind(kind2, buf2, len2, rkind);
if (!buf2) goto error;
release2 = 1;
}
else if (rkind < kind2) {
/* widen self and buf1 */
rkind = kind2;
if (release1) PyMem_Free(buf1);
if (release1) {
PyMem_Free(buf1);
buf1 = PyUnicode_DATA(str1);
release1 = 0;
sbuf = _PyUnicode_AsKind(self, rkind);
}
sbuf = unicode_askind(skind, sbuf, slen, rkind);
if (!sbuf) goto error;
srelease = 1;
buf1 = _PyUnicode_AsKind(str1, rkind);
buf1 = unicode_askind(kind1, buf1, len1, rkind);
if (!buf1) goto error;
release1 = 1;
}
@ -10724,7 +10718,7 @@ replace(PyObject *self, PyObject *str1,
if (kind1 < rkind) {
/* widen substring */
buf1 = _PyUnicode_AsKind(str1, rkind);
buf1 = unicode_askind(kind1, buf1, len1, rkind);
if (!buf1) goto error;
release1 = 1;
}
@ -10733,19 +10727,22 @@ replace(PyObject *self, PyObject *str1,
goto nothing;
if (kind2 < rkind) {
/* widen replacement */
buf2 = _PyUnicode_AsKind(str2, rkind);
buf2 = unicode_askind(kind2, buf2, len2, rkind);
if (!buf2) goto error;
release2 = 1;
}
else if (kind2 > rkind) {
/* widen self and buf1 */
rkind = kind2;
sbuf = _PyUnicode_AsKind(self, rkind);
sbuf = unicode_askind(skind, sbuf, slen, rkind);
if (!sbuf) goto error;
srelease = 1;
if (release1) PyMem_Free(buf1);
if (release1) {
PyMem_Free(buf1);
buf1 = PyUnicode_DATA(str1);
release1 = 0;
buf1 = _PyUnicode_AsKind(str1, rkind);
}
buf1 = unicode_askind(kind1, buf1, len1, rkind);
if (!buf1) goto error;
release1 = 1;
}
@ -11361,7 +11358,7 @@ PyUnicode_Contains(PyObject *str, PyObject *substr)
return result;
}
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(substr, kind1);
buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return -1;
}
@ -11578,7 +11575,7 @@ unicode_count(PyObject *self, PyObject *args)
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(substring, kind1);
buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
@ -13081,7 +13078,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sep_obj);
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(sep_obj, kind1);
buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
@ -13138,7 +13135,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sep_obj);
if (kind2 != kind1) {
buf2 = _PyUnicode_AsKind(sep_obj, kind1);
buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}