gh-76961: Fix the PEP3118 format string for ctypes.Structure (#5561)

The summary of this diff is that it:

* adds a `_ctypes_alloc_format_padding` function to append strings like `37x` to a format string to indicate 37 padding bytes
* removes the branches that amount to "give up on producing a valid format string if the struct is packed"
* combines the resulting adjacent `if (isStruct) {`s now that neither is `if (isStruct && !isPacked) {`
* invokes `_ctypes_alloc_format_padding` to add padding between structure fields, and after the last structure field. The computation used for the total size is unchanged from ctypes already used.

This patch does not affect any existing aligment computation; all it does is use subtraction to deduce the amount of paddnig introduced by the existing code.

---

Without this fix, it would never include padding bytes - an assumption that was only
valid in the case when `_pack_` was set - and this case was explicitly not implemented.

This should allow conversion from ctypes structs to numpy structs

Fixes https://github.com/numpy/numpy/issues/10528
This commit is contained in:
Eric Wieser 2023-02-05 17:10:53 +00:00 committed by GitHub
parent 0672a6c23b
commit 90d85a9b41
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 111 additions and 33 deletions

View File

@ -2510,6 +2510,7 @@ fields, or any other data types containing pointer type fields.
An optional small integer that allows overriding the alignment of
structure fields in the instance. :attr:`_pack_` must already be defined
when :attr:`_fields_` is assigned, otherwise it will have no effect.
Setting this attribute to 0 is the same as not setting it at all.
.. attribute:: _anonymous_

View File

@ -86,6 +86,20 @@ class PackedPoint(Structure):
_pack_ = 2
_fields_ = [("x", c_long), ("y", c_long)]
class PointMidPad(Structure):
_fields_ = [("x", c_byte), ("y", c_uint64)]
class PackedPointMidPad(Structure):
_pack_ = 2
_fields_ = [("x", c_byte), ("y", c_uint64)]
class PointEndPad(Structure):
_fields_ = [("x", c_uint64), ("y", c_byte)]
class PackedPointEndPad(Structure):
_pack_ = 2
_fields_ = [("x", c_uint64), ("y", c_byte)]
class Point2(Structure):
pass
Point2._fields_ = [("x", c_long), ("y", c_long)]
@ -185,10 +199,13 @@ native_types = [
## structures and unions
(Point, "T{<l:x:<l:y:}".replace('l', s_long), (), Point),
# packed structures do not implement the pep
(PackedPoint, "B", (), PackedPoint),
(Point2, "T{<l:x:<l:y:}".replace('l', s_long), (), Point2),
(Point, "T{<l:x:<l:y:}".replace('l', s_long), (), Point),
(PackedPoint, "T{<l:x:<l:y:}".replace('l', s_long), (), PackedPoint),
(PointMidPad, "T{<b:x:7x<Q:y:}", (), PointMidPad),
(PackedPointMidPad, "T{<b:x:x<Q:y:}", (), PackedPointMidPad),
(PointEndPad, "T{<Q:x:<b:y:7x}", (), PointEndPad),
(PackedPointEndPad, "T{<Q:x:<b:y:x}", (), PackedPointEndPad),
(EmptyStruct, "T{}", (), EmptyStruct),
# the pep doesn't support unions
(aUnion, "B", (), aUnion),

View File

@ -0,0 +1,3 @@
Inter-field padding is now inserted into the PEP3118 format strings obtained
from :class:`ctypes.Structure` objects, reflecting their true representation in
memory.

View File

@ -337,6 +337,29 @@ MakeAnonFields(PyObject *type)
return 0;
}
/*
Allocate a memory block for a pep3118 format string, copy prefix (if
non-null) into it and append `{padding}x` to the end.
Returns NULL on failure, with the error indicator set.
*/
char *
_ctypes_alloc_format_padding(const char *prefix, Py_ssize_t padding)
{
/* int64 decimal characters + x + null */
char buf[19 + 1 + 1];
assert(padding > 0);
if (padding == 1) {
/* Use x instead of 1x, for brevity */
return _ctypes_alloc_format_string(prefix, "x");
}
int ret = PyOS_snprintf(buf, sizeof(buf), "%zdx", padding);
assert(0 <= ret && ret < sizeof(buf));
return _ctypes_alloc_format_string(prefix, buf);
}
/*
Retrieve the (optional) _pack_ attribute from a type, the _fields_ attribute,
and create an StgDictObject. Used for Structure and Union subclasses.
@ -346,11 +369,10 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct
{
StgDictObject *stgdict, *basedict;
Py_ssize_t len, offset, size, align, i;
Py_ssize_t union_size, total_align;
Py_ssize_t union_size, total_align, aligned_size;
Py_ssize_t field_size = 0;
int bitofs;
PyObject *tmp;
int isPacked;
int pack;
Py_ssize_t ffi_ofs;
int big_endian;
@ -374,7 +396,6 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct
return -1;
}
if (tmp) {
isPacked = 1;
pack = _PyLong_AsInt(tmp);
Py_DECREF(tmp);
if (pack < 0) {
@ -389,7 +410,7 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct
}
}
else {
isPacked = 0;
/* Setting `_pack_ = 0` amounts to using the default alignment */
pack = 0;
}
@ -470,12 +491,10 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct
}
assert(stgdict->format == NULL);
if (isStruct && !isPacked) {
if (isStruct) {
stgdict->format = _ctypes_alloc_format_string(NULL, "T{");
} else {
/* PEP3118 doesn't support union, or packed structures (well,
only standard packing, but we don't support the pep for
that). Use 'B' for bytes. */
/* PEP3118 doesn't support union. Use 'B' for bytes. */
stgdict->format = _ctypes_alloc_format_string(NULL, "B");
}
if (stgdict->format == NULL)
@ -543,12 +562,14 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct
} else
bitsize = 0;
if (isStruct && !isPacked) {
if (isStruct) {
const char *fieldfmt = dict->format ? dict->format : "B";
const char *fieldname = PyUnicode_AsUTF8(name);
char *ptr;
Py_ssize_t len;
char *buf;
Py_ssize_t last_size = size;
Py_ssize_t padding;
if (fieldname == NULL)
{
@ -556,11 +577,38 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct
return -1;
}
/* construct the field now, as `prop->offset` is `offset` with
corrected alignment */
prop = PyCField_FromDesc(desc, i,
&field_size, bitsize, &bitofs,
&size, &offset, &align,
pack, big_endian);
if (prop == NULL) {
Py_DECREF(pair);
return -1;
}
/* number of bytes between the end of the last field and the start
of this one */
padding = ((CFieldObject *)prop)->offset - last_size;
if (padding > 0) {
ptr = stgdict->format;
stgdict->format = _ctypes_alloc_format_padding(ptr, padding);
PyMem_Free(ptr);
if (stgdict->format == NULL) {
Py_DECREF(pair);
Py_DECREF(prop);
return -1;
}
}
len = strlen(fieldname) + strlen(fieldfmt);
buf = PyMem_Malloc(len + 2 + 1);
if (buf == NULL) {
Py_DECREF(pair);
Py_DECREF(prop);
PyErr_NoMemory();
return -1;
}
@ -578,15 +626,9 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct
if (stgdict->format == NULL) {
Py_DECREF(pair);
Py_DECREF(prop);
return -1;
}
}
if (isStruct) {
prop = PyCField_FromDesc(desc, i,
&field_size, bitsize, &bitofs,
&size, &offset, &align,
pack, big_endian);
} else /* union */ {
size = 0;
offset = 0;
@ -595,14 +637,14 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct
&field_size, bitsize, &bitofs,
&size, &offset, &align,
pack, big_endian);
if (prop == NULL) {
Py_DECREF(pair);
return -1;
}
union_size = max(size, union_size);
}
total_align = max(align, total_align);
if (!prop) {
Py_DECREF(pair);
return -1;
}
if (-1 == PyObject_SetAttr(type, name, prop)) {
Py_DECREF(prop);
Py_DECREF(pair);
@ -612,26 +654,41 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct
Py_DECREF(prop);
}
if (isStruct && !isPacked) {
char *ptr = stgdict->format;
if (!isStruct) {
size = union_size;
}
/* Adjust the size according to the alignment requirements */
aligned_size = ((size + total_align - 1) / total_align) * total_align;
if (isStruct) {
char *ptr;
Py_ssize_t padding;
/* Pad up to the full size of the struct */
padding = aligned_size - size;
if (padding > 0) {
ptr = stgdict->format;
stgdict->format = _ctypes_alloc_format_padding(ptr, padding);
PyMem_Free(ptr);
if (stgdict->format == NULL) {
return -1;
}
}
ptr = stgdict->format;
stgdict->format = _ctypes_alloc_format_string(stgdict->format, "}");
PyMem_Free(ptr);
if (stgdict->format == NULL)
return -1;
}
if (!isStruct)
size = union_size;
/* Adjust the size according to the alignment requirements */
size = ((size + total_align - 1) / total_align) * total_align;
stgdict->ffi_type_pointer.alignment = Py_SAFE_DOWNCAST(total_align,
Py_ssize_t,
unsigned short);
stgdict->ffi_type_pointer.size = size;
stgdict->ffi_type_pointer.size = aligned_size;
stgdict->size = size;
stgdict->size = aligned_size;
stgdict->align = total_align;
stgdict->length = len; /* ADD ffi_ofs? */