Document utf8_length and wstr_length states

Ensure these states with assertions in _PyUnicode_CheckConsistency().
This commit is contained in:
Victor Stinner 2011-10-04 01:05:08 +02:00
parent 9566311014
commit a41463c203
2 changed files with 56 additions and 47 deletions

View File

@ -226,9 +226,11 @@ typedef struct {
* ready = 1 * ready = 1
* ascii = 0 * ascii = 0
* utf8 != data * utf8 != data
* wstr is shared with data if kind=PyUnicode_2BYTE_KIND * utf8_length = 0 if utf8 is NULL
and sizeof(wchar_t)=2 or if kind=PyUnicode_4BYTE_KIND and * wstr is shared with data and wstr_length=length
sizeof(wchar_4)=4 if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
* wstr_length = 0 if wstr is NULL
- legacy string, not ready: - legacy string, not ready:
@ -239,6 +241,7 @@ typedef struct {
* wstr is not NULL * wstr is not NULL
* data.any is NULL * data.any is NULL
* utf8 is NULL * utf8 is NULL
* utf8_length = 0
* interned = SSTATE_NOT_INTERNED * interned = SSTATE_NOT_INTERNED
* ascii = 0 * ascii = 0
@ -250,10 +253,12 @@ typedef struct {
* compact = 0 * compact = 0
* ready = 1 * ready = 1
* data.any is not NULL * data.any is not NULL
* utf8 is shared with data.any if ascii = 1 * utf8 is shared and utf8_length = length with data.any if ascii = 1
* wstr is shared with data.any if kind=PyUnicode_2BYTE_KIND * utf8_length = 0 if utf8 is NULL
and sizeof(wchar_t)=2 or if kind=PyUnicode_4BYTE_KIND and * wstr is shared and wstr_length = length with data.any
sizeof(wchar_4)=4 if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
* wstr_length = 0 if wstr is NULL
Compact strings use only one memory block (structure + characters), Compact strings use only one memory block (structure + characters),
whereas legacy strings use one block for the structure and one block whereas legacy strings use one block for the structure and one block

View File

@ -300,36 +300,28 @@ _PyUnicode_CheckConsistency(void *op)
assert(kind == PyUnicode_1BYTE_KIND); assert(kind == PyUnicode_1BYTE_KIND);
assert(ascii->state.ready == 1); assert(ascii->state.ready == 1);
} }
else if (ascii->state.compact == 1) { else {
PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op; PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
void *data; void *data;
if (ascii->state.compact == 1) {
data = compact + 1;
assert(kind == PyUnicode_1BYTE_KIND assert(kind == PyUnicode_1BYTE_KIND
|| kind == PyUnicode_2BYTE_KIND || kind == PyUnicode_2BYTE_KIND
|| kind == PyUnicode_4BYTE_KIND); || kind == PyUnicode_4BYTE_KIND);
assert(ascii->state.ascii == 0); assert(ascii->state.ascii == 0);
assert(ascii->state.ready == 1); assert(ascii->state.ready == 1);
data = compact + 1;
assert (compact->utf8 != data); assert (compact->utf8 != data);
if (
#if SIZEOF_WCHAR_T == 2
kind == PyUnicode_2BYTE_KIND
#else
kind == PyUnicode_4BYTE_KIND
#endif
)
assert(ascii->wstr == data);
else
assert(ascii->wstr != data);
} else { } else {
PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
PyUnicodeObject *unicode = (PyUnicodeObject *)op; PyUnicodeObject *unicode = (PyUnicodeObject *)op;
data = unicode->data.any;
if (kind == PyUnicode_WCHAR_KIND) { if (kind == PyUnicode_WCHAR_KIND) {
assert(ascii->state.compact == 0); assert(ascii->state.compact == 0);
assert(ascii->state.ascii == 0); assert(ascii->state.ascii == 0);
assert(ascii->state.ready == 0); assert(ascii->state.ready == 0);
assert(ascii->wstr != NULL); assert(ascii->wstr != NULL);
assert(unicode->data.any == NULL); assert(data == NULL);
assert(compact->utf8 == NULL); assert(compact->utf8 == NULL);
assert(ascii->state.interned == SSTATE_NOT_INTERNED); assert(ascii->state.interned == SSTATE_NOT_INTERNED);
} }
@ -339,11 +331,16 @@ _PyUnicode_CheckConsistency(void *op)
|| kind == PyUnicode_4BYTE_KIND); || kind == PyUnicode_4BYTE_KIND);
assert(ascii->state.compact == 0); assert(ascii->state.compact == 0);
assert(ascii->state.ready == 1); assert(ascii->state.ready == 1);
assert(unicode->data.any != NULL); assert(data != NULL);
if (ascii->state.ascii) if (ascii->state.ascii) {
assert (compact->utf8 == unicode->data.any); assert (compact->utf8 == data);
assert (compact->utf8_length == ascii->length);
}
else else
assert (compact->utf8 != unicode->data.any); assert (compact->utf8 != data);
}
}
if (kind != PyUnicode_WCHAR_KIND) {
if ( if (
#if SIZEOF_WCHAR_T == 2 #if SIZEOF_WCHAR_T == 2
kind == PyUnicode_2BYTE_KIND kind == PyUnicode_2BYTE_KIND
@ -351,10 +348,17 @@ _PyUnicode_CheckConsistency(void *op)
kind == PyUnicode_4BYTE_KIND kind == PyUnicode_4BYTE_KIND
#endif #endif
) )
assert(ascii->wstr == unicode->data.any); {
else assert(ascii->wstr == data);
assert(ascii->wstr != unicode->data.any); assert(compact->wstr_length == ascii->length);
} else
assert(ascii->wstr != data);
} }
if (compact->utf8 == NULL)
assert(compact->utf8_length == 0);
if (ascii->wstr == NULL)
assert(compact->wstr_length == 0);
} }
return 1; return 1;
} }