Document utf8_length and wstr_length states
Ensure these states with assertions in _PyUnicode_CheckConsistency().
This commit is contained in:
parent
9566311014
commit
a41463c203
|
@ -226,9 +226,11 @@ typedef struct {
|
|||
* ready = 1
|
||||
* ascii = 0
|
||||
* utf8 != data
|
||||
* wstr is shared with data if kind=PyUnicode_2BYTE_KIND
|
||||
and sizeof(wchar_t)=2 or if kind=PyUnicode_4BYTE_KIND and
|
||||
sizeof(wchar_4)=4
|
||||
* utf8_length = 0 if utf8 is NULL
|
||||
* wstr is shared with data and wstr_length=length
|
||||
if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
|
||||
or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
|
||||
* wstr_length = 0 if wstr is NULL
|
||||
|
||||
- legacy string, not ready:
|
||||
|
||||
|
@ -239,6 +241,7 @@ typedef struct {
|
|||
* wstr is not NULL
|
||||
* data.any is NULL
|
||||
* utf8 is NULL
|
||||
* utf8_length = 0
|
||||
* interned = SSTATE_NOT_INTERNED
|
||||
* ascii = 0
|
||||
|
||||
|
@ -250,10 +253,12 @@ typedef struct {
|
|||
* compact = 0
|
||||
* ready = 1
|
||||
* data.any is not NULL
|
||||
* utf8 is shared with data.any if ascii = 1
|
||||
* wstr is shared with data.any if kind=PyUnicode_2BYTE_KIND
|
||||
and sizeof(wchar_t)=2 or if kind=PyUnicode_4BYTE_KIND and
|
||||
sizeof(wchar_4)=4
|
||||
* utf8 is shared and utf8_length = length with data.any if ascii = 1
|
||||
* utf8_length = 0 if utf8 is NULL
|
||||
* wstr is shared and wstr_length = length with data.any
|
||||
if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
|
||||
or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
|
||||
* wstr_length = 0 if wstr is NULL
|
||||
|
||||
Compact strings use only one memory block (structure + characters),
|
||||
whereas legacy strings use one block for the structure and one block
|
||||
|
|
|
@ -300,50 +300,47 @@ _PyUnicode_CheckConsistency(void *op)
|
|||
assert(kind == PyUnicode_1BYTE_KIND);
|
||||
assert(ascii->state.ready == 1);
|
||||
}
|
||||
else if (ascii->state.compact == 1) {
|
||||
else {
|
||||
PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
|
||||
void *data;
|
||||
assert(kind == PyUnicode_1BYTE_KIND
|
||||
|| kind == PyUnicode_2BYTE_KIND
|
||||
|| kind == PyUnicode_4BYTE_KIND);
|
||||
assert(ascii->state.ascii == 0);
|
||||
assert(ascii->state.ready == 1);
|
||||
data = compact + 1;
|
||||
assert (compact->utf8 != data);
|
||||
if (
|
||||
#if SIZEOF_WCHAR_T == 2
|
||||
kind == PyUnicode_2BYTE_KIND
|
||||
#else
|
||||
kind == PyUnicode_4BYTE_KIND
|
||||
#endif
|
||||
)
|
||||
assert(ascii->wstr == data);
|
||||
else
|
||||
assert(ascii->wstr != data);
|
||||
} else {
|
||||
PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
|
||||
PyUnicodeObject *unicode = (PyUnicodeObject *)op;
|
||||
|
||||
if (kind == PyUnicode_WCHAR_KIND) {
|
||||
assert(ascii->state.compact == 0);
|
||||
assert(ascii->state.ascii == 0);
|
||||
assert(ascii->state.ready == 0);
|
||||
assert(ascii->wstr != NULL);
|
||||
assert(unicode->data.any == NULL);
|
||||
assert(compact->utf8 == NULL);
|
||||
assert(ascii->state.interned == SSTATE_NOT_INTERNED);
|
||||
}
|
||||
else {
|
||||
if (ascii->state.compact == 1) {
|
||||
data = compact + 1;
|
||||
assert(kind == PyUnicode_1BYTE_KIND
|
||||
|| kind == PyUnicode_2BYTE_KIND
|
||||
|| kind == PyUnicode_4BYTE_KIND);
|
||||
assert(ascii->state.compact == 0);
|
||||
assert(ascii->state.ascii == 0);
|
||||
assert(ascii->state.ready == 1);
|
||||
assert(unicode->data.any != NULL);
|
||||
if (ascii->state.ascii)
|
||||
assert (compact->utf8 == unicode->data.any);
|
||||
else
|
||||
assert (compact->utf8 != unicode->data.any);
|
||||
assert (compact->utf8 != data);
|
||||
} else {
|
||||
PyUnicodeObject *unicode = (PyUnicodeObject *)op;
|
||||
|
||||
data = unicode->data.any;
|
||||
if (kind == PyUnicode_WCHAR_KIND) {
|
||||
assert(ascii->state.compact == 0);
|
||||
assert(ascii->state.ascii == 0);
|
||||
assert(ascii->state.ready == 0);
|
||||
assert(ascii->wstr != NULL);
|
||||
assert(data == NULL);
|
||||
assert(compact->utf8 == NULL);
|
||||
assert(ascii->state.interned == SSTATE_NOT_INTERNED);
|
||||
}
|
||||
else {
|
||||
assert(kind == PyUnicode_1BYTE_KIND
|
||||
|| kind == PyUnicode_2BYTE_KIND
|
||||
|| kind == PyUnicode_4BYTE_KIND);
|
||||
assert(ascii->state.compact == 0);
|
||||
assert(ascii->state.ready == 1);
|
||||
assert(data != NULL);
|
||||
if (ascii->state.ascii) {
|
||||
assert (compact->utf8 == data);
|
||||
assert (compact->utf8_length == ascii->length);
|
||||
}
|
||||
else
|
||||
assert (compact->utf8 != data);
|
||||
}
|
||||
}
|
||||
if (kind != PyUnicode_WCHAR_KIND) {
|
||||
if (
|
||||
#if SIZEOF_WCHAR_T == 2
|
||||
kind == PyUnicode_2BYTE_KIND
|
||||
|
@ -351,10 +348,17 @@ _PyUnicode_CheckConsistency(void *op)
|
|||
kind == PyUnicode_4BYTE_KIND
|
||||
#endif
|
||||
)
|
||||
assert(ascii->wstr == unicode->data.any);
|
||||
else
|
||||
assert(ascii->wstr != unicode->data.any);
|
||||
{
|
||||
assert(ascii->wstr == data);
|
||||
assert(compact->wstr_length == ascii->length);
|
||||
} else
|
||||
assert(ascii->wstr != data);
|
||||
}
|
||||
|
||||
if (compact->utf8 == NULL)
|
||||
assert(compact->utf8_length == 0);
|
||||
if (ascii->wstr == NULL)
|
||||
assert(compact->wstr_length == 0);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue