diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 331e8399914..8e19ebc0ad3 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -226,6 +226,9 @@ typedef struct { * ready = 1 * ascii = 0 * utf8 != data + * wstr is shared with data if kind=PyUnicode_2BYTE_KIND + and sizeof(wchar_t)=2 or if kind=PyUnicode_4BYTE_KIND and + sizeof(wchar_4)=4 - legacy string, not ready: @@ -247,7 +250,10 @@ typedef struct { * compact = 0 * ready = 1 * data.any is not NULL - * utf8 = data if ascii is 1 + * utf8 is shared with data.any if ascii = 1 + * wstr is shared with data.any if kind=PyUnicode_2BYTE_KIND + and sizeof(wchar_t)=2 or if kind=PyUnicode_4BYTE_KIND and + sizeof(wchar_4)=4 Compact strings use only one memory block (structure + characters), whereas legacy strings use one block for the structure and one block diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 77cc0820f91..46578128bf9 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -302,12 +302,24 @@ _PyUnicode_CheckConsistency(void *op) } else if (ascii->state.compact == 1) { PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op; + void *data; assert(kind == PyUnicode_1BYTE_KIND || kind == PyUnicode_2BYTE_KIND || kind == PyUnicode_4BYTE_KIND); assert(ascii->state.ascii == 0); assert(ascii->state.ready == 1); - assert (compact->utf8 != (void*)(compact + 1)); + data = compact + 1; + assert (compact->utf8 != data); + if ( +#if SIZEOF_WCHAR_T == 2 + kind == PyUnicode_2BYTE_KIND +#else + kind == PyUnicode_4BYTE_KIND +#endif + ) + assert(ascii->wstr == data); + else + assert(ascii->wstr != data); } else { PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op; PyUnicodeObject *unicode = (PyUnicodeObject *)op; @@ -332,6 +344,16 @@ _PyUnicode_CheckConsistency(void *op) assert (compact->utf8 == unicode->data.any); else assert (compact->utf8 != unicode->data.any); + if ( +#if SIZEOF_WCHAR_T == 2 + kind == PyUnicode_2BYTE_KIND +#else + kind == PyUnicode_4BYTE_KIND +#endif + ) + assert(ascii->wstr == unicode->data.any); + else + assert(ascii->wstr != unicode->data.any); } } return 1;