bpo-10746: Fix ctypes PEP 3118 type codes for c_long, c_bool, c_int (#31)

Ctypes currently produces wrong pep3118 type codes for several types. E.g. memoryview(ctypes.c_long()).format gives "<l" on 64-bit platforms, but it should be "<q" instead for sizeof(c_long) == 8 The problem is that the '<>' endian specification in the struct syntax also turns on the "standard size" mode, which makes type characters have a platform-independent meaning, which does not match with the codes used internally in ctypes. The struct module format syntax also does not allow specifying native-size non-native-endian items. This commit adds a converter function that maps the internal ctypes codes to appropriate struct module standard-size codes in the pep3118 format strings. The tests are modified to check for this.
2017-08-28 14:08:49 +02:00 · 2017-08-28 14:08:49 +02:00 · 07f1658aa0
parent a30f6d45ac
commit 07f1658aa0
3 changed files with 123 additions and 30 deletions
--- a/Lib/ctypes/test/test_pep3118.py
+++ b/Lib/ctypes/test/test_pep3118.py
@ -112,6 +112,34 @@ Complete._fields_ = [("a", c_long)]
 # This table contains format strings as they look on little endian
 # machines.  The test replaces '<' with '>' on big endian machines.
 #
+
+# Platform-specific type codes
+s_bool = {1: '?', 2: 'H', 4: 'L', 8: 'Q'}[sizeof(c_bool)]
+s_short = {2: 'h', 4: 'l', 8: 'q'}[sizeof(c_short)]
+s_ushort = {2: 'H', 4: 'L', 8: 'Q'}[sizeof(c_ushort)]
+s_int = {2: 'h', 4: 'i', 8: 'q'}[sizeof(c_int)]
+s_uint = {2: 'H', 4: 'I', 8: 'Q'}[sizeof(c_uint)]
+s_long = {4: 'l', 8: 'q'}[sizeof(c_long)]
+s_ulong = {4: 'L', 8: 'Q'}[sizeof(c_ulong)]
+s_longlong = "q"
+s_ulonglong = "Q"
+s_float = "f"
+s_double = "d"
+s_longdouble = "g"
+
+# Alias definitions in ctypes/__init__.py
+if c_int is c_long:
+    s_int = s_long
+if c_uint is c_ulong:
+    s_uint = s_ulong
+if c_longlong is c_long:
+    s_longlong = s_long
+if c_ulonglong is c_ulong:
+    s_ulonglong = s_ulong
+if c_longdouble is c_double:
+    s_longdouble = s_double
+
+
 native_types = [
    # type                      format                  shape           calc itemsize

@ -120,52 +148,51 @@ native_types = [
    (c_char,                    "<c",                   (),           c_char),
    (c_byte,                    "<b",                   (),           c_byte),
    (c_ubyte,                   "<B",                   (),           c_ubyte),
-    (c_short,                   "<h",                   (),           c_short),
-    (c_ushort,                  "<H",                   (),           c_ushort),
+    (c_short,                   "<" + s_short,          (),           c_short),
+    (c_ushort,                  "<" + s_ushort,         (),           c_ushort),

-    # c_int and c_uint may be aliases to c_long
-    #(c_int,                     "<i",                   (),           c_int),
-    #(c_uint,                    "<I",                   (),           c_uint),
+    (c_int,                     "<" + s_int,            (),           c_int),
+    (c_uint,                    "<" + s_uint,           (),           c_uint),

-    (c_long,                    "<l",                   (),           c_long),
-    (c_ulong,                   "<L",                   (),           c_ulong),
+    (c_long,                    "<" + s_long,           (),           c_long),
+    (c_ulong,                   "<" + s_ulong,          (),           c_ulong),

-    # c_longlong and c_ulonglong are aliases on 64-bit platforms
-    #(c_longlong,                "<q",                   None,           c_longlong),
-    #(c_ulonglong,               "<Q",                   None,           c_ulonglong),
+    (c_longlong,                "<" + s_longlong,       (),           c_longlong),
+    (c_ulonglong,               "<" + s_ulonglong,      (),           c_ulonglong),

    (c_float,                   "<f",                   (),           c_float),
    (c_double,                  "<d",                   (),           c_double),
-    # c_longdouble may be an alias to c_double

-    (c_bool,                    "<?",                   (),           c_bool),
+    (c_longdouble,              "<" + s_longdouble,     (),           c_longdouble),
+
+    (c_bool,                    "<" + s_bool,           (),           c_bool),
    (py_object,                 "<O",                   (),           py_object),

    ## pointers

    (POINTER(c_byte),           "&<b",                  (),           POINTER(c_byte)),
-    (POINTER(POINTER(c_long)),  "&&<l",                 (),           POINTER(POINTER(c_long))),
+    (POINTER(POINTER(c_long)),  "&&<" + s_long,         (),           POINTER(POINTER(c_long))),

    ## arrays and pointers

    (c_double * 4,              "<d",                   (4,),           c_double),
    (c_float * 4 * 3 * 2,       "<f",                   (2,3,4),        c_float),
-    (POINTER(c_short) * 2,      "&<h",                  (2,),           POINTER(c_short)),
-    (POINTER(c_short) * 2 * 3,  "&<h",                  (3,2,),         POINTER(c_short)),
-    (POINTER(c_short * 2),      "&(2)<h",               (),           POINTER(c_short)),
+    (POINTER(c_short) * 2,      "&<" + s_short,         (2,),           POINTER(c_short)),
+    (POINTER(c_short) * 2 * 3,  "&<" + s_short,         (3,2,),         POINTER(c_short)),
+    (POINTER(c_short * 2),      "&(2)<" + s_short,      (),             POINTER(c_short)),

    ## structures and unions

-    (Point,                     "T{<l:x:<l:y:}",        (),           Point),
+    (Point,                     "T{<l:x:<l:y:}".replace('l', s_long),  (),  Point),
    # packed structures do not implement the pep
-    (PackedPoint,               "B",                    (),           PackedPoint),
-    (Point2,                    "T{<l:x:<l:y:}",        (),           Point2),
-    (EmptyStruct,               "T{}",                  (),           EmptyStruct),
+    (PackedPoint,               "B",                                   (),  PackedPoint),
+    (Point2,                    "T{<l:x:<l:y:}".replace('l', s_long),  (),  Point2),
+    (EmptyStruct,               "T{}",                                 (),  EmptyStruct),
    # the pep does't support unions
-    (aUnion,                    "B",                    (),           aUnion),
+    (aUnion,                    "B",                                   (),  aUnion),
    # structure with sub-arrays
-    (StructWithArrays,          "T{(2,3)<l:x:(4)T{<l:x:<l:y:}:y:}", (),  StructWithArrays),
-    (StructWithArrays * 3,      "T{(2,3)<l:x:(4)T{<l:x:<l:y:}:y:}", (3,),  StructWithArrays),
+    (StructWithArrays, "T{(2,3)<l:x:(4)T{<l:x:<l:y:}:y:}".replace('l', s_long), (), StructWithArrays),
+    (StructWithArrays * 3, "T{(2,3)<l:x:(4)T{<l:x:<l:y:}:y:}".replace('l', s_long), (3,), StructWithArrays),

    ## pointer to incomplete structure
    (Incomplete,                "B",                    (),           Incomplete),
@ -173,7 +200,7 @@ native_types = [

    # 'Complete' is a structure that starts incomplete, but is completed after the
    # pointer type to it has been created.
-    (Complete,                  "T{<l:a:}",             (),           Complete),
+    (Complete,                  "T{<l:a:}".replace('l', s_long), (), Complete),
    # Unfortunately the pointer format string is not fixed...
    (POINTER(Complete),         "&B",                   (),           POINTER(Complete)),

@ -196,10 +223,10 @@ class LEPoint(LittleEndianStructure):
 # and little endian machines.
 #
 endian_types = [
-    (BEPoint,                   "T{>l:x:>l:y:}",        (),           BEPoint),
-    (LEPoint,                   "T{<l:x:<l:y:}",        (),           LEPoint),
-    (POINTER(BEPoint),          "&T{>l:x:>l:y:}",       (),           POINTER(BEPoint)),
-    (POINTER(LEPoint),          "&T{<l:x:<l:y:}",       (),           POINTER(LEPoint)),
+    (BEPoint, "T{>l:x:>l:y:}".replace('l', s_long), (), BEPoint),
+    (LEPoint, "T{<l:x:<l:y:}".replace('l', s_long), (), LEPoint),
+    (POINTER(BEPoint), "&T{>l:x:>l:y:}".replace('l', s_long), (), POINTER(BEPoint)),
+    (POINTER(LEPoint), "&T{<l:x:<l:y:}".replace('l', s_long), (), POINTER(LEPoint)),
    ]

 if __name__ == "__main__":
--- a/Misc/NEWS.d/next/Library/2017-08-28-13-01-05.bpo-10746.nmAvfu.rst
+++ b/Misc/NEWS.d/next/Library/2017-08-28-13-01-05.bpo-10746.nmAvfu.rst
@ -0,0 +1 @@
+Fix ctypes producing wrong PEP 3118 type codes for integer types.
--- a/Modules/_ctypes/_ctypes.c
+++ b/Modules/_ctypes/_ctypes.c
@ -249,6 +249,71 @@ PyDict_GetItemProxy(PyObject *dict, PyObject *key)
 }

 /******************************************************************/
+
+/*
+  Allocate a memory block for a pep3118 format string, filled with
+  a suitable PEP 3118 type code corresponding to the given ctypes
+  type. Returns NULL on failure, with the error indicator set.
+
+  This produces type codes in the standard size mode (cf. struct module),
+  since the endianness may need to be swapped to a non-native one
+  later on.
+ */
+static char *
+_ctypes_alloc_format_string_for_type(char code, int big_endian)
+{
+    char *result;
+    char pep_code = '\0';
+
+    switch (code) {
+#if SIZEOF_INT == 2
+    case 'i': pep_code = 'h'; break;
+    case 'I': pep_code = 'H'; break;
+#elif SIZEOF_INT == 4
+    case 'i': pep_code = 'i'; break;
+    case 'I': pep_code = 'I'; break;
+#elif SIZEOF_INT == 8
+    case 'i': pep_code = 'q'; break;
+    case 'I': pep_code = 'Q'; break;
+#else
+# error SIZEOF_INT has an unexpected value
+#endif /* SIZEOF_INT */
+#if SIZEOF_LONG == 4
+    case 'l': pep_code = 'l'; break;
+    case 'L': pep_code = 'L'; break;
+#elif SIZEOF_LONG == 8
+    case 'l': pep_code = 'q'; break;
+    case 'L': pep_code = 'Q'; break;
+#else
+# error SIZEOF_LONG has an unexpected value
+#endif /* SIZEOF_LONG */
+#if SIZEOF__BOOL == 1
+    case '?': pep_code = '?'; break;
+#elif SIZEOF__BOOL == 2
+    case '?': pep_code = 'H'; break;
+#elif SIZEOF__BOOL == 4
+    case '?': pep_code = 'L'; break;
+#elif SIZEOF__BOOL == 8
+    case '?': pep_code = 'Q'; break;
+#else
+# error SIZEOF__BOOL has an unexpected value
+#endif /* SIZEOF__BOOL */
+    default:
+        /* The standard-size code is the same as the ctypes one */
+        pep_code = code;
+        break;
+    }
+
+    result = PyMem_Malloc(3);
+    if (result == NULL)
+        return NULL;
+
+    result[0] = big_endian ? '>' : '<';
+    result[1] = pep_code;
+    result[2] = '\0';
+    return result;
+}
+
 /*
  Allocate a memory block for a pep3118 format string, copy prefix (if
  non-null) and suffix into it.  Returns NULL on failure, with the error
@ -1930,9 +1995,9 @@ PyCSimpleType_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
    stgdict->setfunc = fmt->setfunc;
    stgdict->getfunc = fmt->getfunc;
 #ifdef WORDS_BIGENDIAN
-    stgdict->format = _ctypes_alloc_format_string(">", proto_str);
+    stgdict->format = _ctypes_alloc_format_string_for_type(proto_str[0], 1);
 #else
-    stgdict->format = _ctypes_alloc_format_string("<", proto_str);
+    stgdict->format = _ctypes_alloc_format_string_for_type(proto_str[0], 0);
 #endif
    if (stgdict->format == NULL) {
        Py_DECREF(result);
				`@ -0,0 +1 @@`
				`Fix ctypes producing wrong PEP 3118 type codes for integer types.`