Unicode: document when the wstr pointer is shared with data

Add also related assertions to _PyUnicode_CheckConsistency().
2011-10-04 00:00:20 +02:00 · 2011-10-04 00:00:20 +02:00 · 7f11ad4594
parent 03490918b7
commit 7f11ad4594
2 changed files with 30 additions and 2 deletions
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@ -226,6 +226,9 @@ typedef struct {
         * ready = 1
         * ascii = 0
         * utf8 != data
         * wstr is shared with data if kind=PyUnicode_2BYTE_KIND
           and sizeof(wchar_t)=2 or if kind=PyUnicode_4BYTE_KIND and
           sizeof(wchar_4)=4
       - legacy string, not ready:
@ -247,7 +250,10 @@ typedef struct {
         * compact = 0
         * ready = 1
         * data.any is not NULL
-         * utf8 = data if ascii is 1
+         * utf8 is shared with data.any if ascii = 1
         * wstr is shared with data.any if kind=PyUnicode_2BYTE_KIND
           and sizeof(wchar_t)=2 or if kind=PyUnicode_4BYTE_KIND and
           sizeof(wchar_4)=4
       Compact strings use only one memory block (structure + characters),
       whereas legacy strings use one block for the structure and one block
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -302,12 +302,24 @@ _PyUnicode_CheckConsistency(void *op)
    }
    else if (ascii->state.compact == 1) {
        PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
        void *data;
        assert(kind == PyUnicode_1BYTE_KIND
               || kind == PyUnicode_2BYTE_KIND
               || kind == PyUnicode_4BYTE_KIND);
        assert(ascii->state.ascii == 0);
        assert(ascii->state.ready == 1);
-        assert (compact->utf8 != (void*)(compact + 1));
+        data = compact + 1;
        assert (compact->utf8 != data);
        if (
 #if SIZEOF_WCHAR_T == 2
            kind == PyUnicode_2BYTE_KIND
 #else
            kind == PyUnicode_4BYTE_KIND
 #endif
           )
            assert(ascii->wstr == data);
        else
            assert(ascii->wstr != data);
    } else {
        PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
        PyUnicodeObject *unicode = (PyUnicodeObject *)op;
@ -332,6 +344,16 @@ _PyUnicode_CheckConsistency(void *op)
                assert (compact->utf8 == unicode->data.any);
            else
                assert (compact->utf8 != unicode->data.any);
            if (
 #if SIZEOF_WCHAR_T == 2
                kind == PyUnicode_2BYTE_KIND
 #else
                kind == PyUnicode_4BYTE_KIND
 #endif
               )
                assert(ascii->wstr == unicode->data.any);
            else
                assert(ascii->wstr != unicode->data.any);
        }
    }
    return 1;