Issue #8966: ctypes: Remove implicit bytes-unicode conversion

This commit is contained in:
Victor Stinner 2010-07-28 00:15:03 +00:00
parent 42746df17b
commit cf448832eb
9 changed files with 51 additions and 245 deletions

View File

@ -259,41 +259,31 @@ class c_bool(_SimpleCData):
from _ctypes import POINTER, pointer, _pointer_type_cache
try:
from _ctypes import set_conversion_mode
except ImportError:
pass
else:
if _os.name in ("nt", "ce"):
set_conversion_mode("mbcs", "strict")
else:
set_conversion_mode("ascii", "strict")
class c_wchar_p(_SimpleCData):
_type_ = "Z"
class c_wchar_p(_SimpleCData):
_type_ = "Z"
class c_wchar(_SimpleCData):
_type_ = "u"
class c_wchar(_SimpleCData):
_type_ = "u"
POINTER(c_wchar).from_param = c_wchar_p.from_param #_SimpleCData.c_wchar_p_from_param
POINTER(c_wchar).from_param = c_wchar_p.from_param #_SimpleCData.c_wchar_p_from_param
def create_unicode_buffer(init, size=None):
"""create_unicode_buffer(aString) -> character array
create_unicode_buffer(anInteger) -> character array
create_unicode_buffer(aString, anInteger) -> character array
"""
if isinstance(init, (str, bytes)):
if size is None:
size = len(init)+1
buftype = c_wchar * size
buf = buftype()
buf.value = init
return buf
elif isinstance(init, int):
buftype = c_wchar * init
buf = buftype()
return buf
raise TypeError(init)
def create_unicode_buffer(init, size=None):
"""create_unicode_buffer(aString) -> character array
create_unicode_buffer(anInteger) -> character array
create_unicode_buffer(aString, anInteger) -> character array
"""
if isinstance(init, (str, bytes)):
if size is None:
size = len(init)+1
buftype = c_wchar * size
buf = buftype()
buf.value = init
return buf
elif isinstance(init, int):
buftype = c_wchar * init
buf = buftype()
return buf
raise TypeError(init)
POINTER(c_char).from_param = c_char_p.from_param #_SimpleCData.c_char_p_from_param

View File

@ -20,18 +20,6 @@ class StringBufferTestCase(unittest.TestCase):
self.assertEqual(b[::2], b"ac")
self.assertEqual(b[::5], b"a")
def test_string_conversion(self):
b = create_string_buffer("abc")
self.assertEqual(len(b), 4) # trailing nul char
self.assertEqual(sizeof(b), 4 * sizeof(c_char))
self.assertTrue(type(b[0]) is bytes)
self.assertEqual(b[0], b"a")
self.assertEqual(b[:], b"abc\0")
self.assertEqual(b[::], b"abc\0")
self.assertEqual(b[::-1], b"\0cba")
self.assertEqual(b[::2], b"ac")
self.assertEqual(b[::5], b"a")
try:
c_wchar
except NameError:

View File

@ -17,18 +17,15 @@ class BytesTest(unittest.TestCase):
(c_wchar * 3)("a", "b", "c")
def test_c_char_p(self):
c_char_p("foo bar")
c_char_p(b"foo bar")
def test_c_wchar_p(self):
c_wchar_p("foo bar")
c_wchar_p(b"foo bar")
def test_struct(self):
class X(Structure):
_fields_ = [("a", c_char * 3)]
X("abc")
x = X(b"abc")
self.assertEqual(x.a, b"abc")
self.assertEqual(type(x.a), bytes)
@ -37,7 +34,6 @@ class BytesTest(unittest.TestCase):
class X(Structure):
_fields_ = [("a", c_wchar * 3)]
X(b"abc")
x = X("abc")
self.assertEqual(x.a, "abc")
self.assertEqual(type(x.a), str)

View File

@ -57,8 +57,8 @@ class SimpleTypesTestCase(unittest.TestCase):
self.assertTrue(c_char_p.from_param(s)._obj is s)
# new in 0.9.1: convert (encode) unicode to ascii
self.assertEqual(c_char_p.from_param("123")._obj, b"123")
self.assertRaises(UnicodeEncodeError, c_char_p.from_param, "123\377")
self.assertEqual(c_char_p.from_param(b"123")._obj, b"123")
self.assertRaises(TypeError, c_char_p.from_param, "123\377")
self.assertRaises(TypeError, c_char_p.from_param, 42)
# calling c_char_p.from_param with a c_char_p instance
@ -80,7 +80,7 @@ class SimpleTypesTestCase(unittest.TestCase):
# new in 0.9.1: convert (decode) ascii to unicode
self.assertEqual(c_wchar_p.from_param("123")._obj, "123")
self.assertRaises(UnicodeDecodeError, c_wchar_p.from_param, b"123\377")
self.assertRaises(TypeError, c_wchar_p.from_param, b"123\377")
pa = c_wchar_p.from_param(c_wchar_p("123"))
self.assertEqual(type(pa), c_wchar_p)

View File

@ -7,122 +7,53 @@ except AttributeError:
pass
else:
import _ctypes_test
dll = ctypes.CDLL(_ctypes_test.__file__)
wcslen = dll.my_wcslen
wcslen.argtypes = [ctypes.c_wchar_p]
class UnicodeTestCase(unittest.TestCase):
def setUp(self):
self.prev_conv_mode = ctypes.set_conversion_mode("ascii", "strict")
def test_wcslen(self):
dll = ctypes.CDLL(_ctypes_test.__file__)
wcslen = dll.my_wcslen
wcslen.argtypes = [ctypes.c_wchar_p]
def tearDown(self):
ctypes.set_conversion_mode(*self.prev_conv_mode)
def test_ascii_strict(self):
ctypes.set_conversion_mode("ascii", "strict")
# no conversions take place with unicode arguments
self.assertEqual(wcslen("abc"), 3)
self.assertEqual(wcslen("ab\u2070"), 3)
# string args are converted
self.assertEqual(wcslen("abc"), 3)
self.assertRaises(ctypes.ArgumentError, wcslen, b"ab\xe4")
def test_ascii_replace(self):
ctypes.set_conversion_mode("ascii", "replace")
self.assertEqual(wcslen("abc"), 3)
self.assertEqual(wcslen("ab\u2070"), 3)
self.assertEqual(wcslen("abc"), 3)
self.assertEqual(wcslen("ab\xe4"), 3)
def test_ascii_ignore(self):
ctypes.set_conversion_mode("ascii", "ignore")
self.assertEqual(wcslen("abc"), 3)
self.assertEqual(wcslen("ab\u2070"), 3)
# ignore error mode skips non-ascii characters
self.assertEqual(wcslen("abc"), 3)
self.assertEqual(wcslen(b"\xe4\xf6\xfc\xdf"), 0)
def test_latin1_strict(self):
ctypes.set_conversion_mode("latin-1", "strict")
self.assertEqual(wcslen("abc"), 3)
self.assertEqual(wcslen("ab\u2070"), 3)
self.assertEqual(wcslen("abc"), 3)
self.assertEqual(wcslen("\xe4\xf6\xfc\xdf"), 4)
def test_buffers(self):
ctypes.set_conversion_mode("ascii", "strict")
buf = ctypes.create_unicode_buffer("abc")
self.assertEqual(len(buf), 3+1)
ctypes.set_conversion_mode("ascii", "replace")
buf = ctypes.create_unicode_buffer(b"ab\xe4\xf6\xfc")
self.assertEqual(buf[:], "ab\uFFFD\uFFFD\uFFFD\0")
self.assertEqual(buf[::], "ab\uFFFD\uFFFD\uFFFD\0")
self.assertEqual(buf[::-1], "\0\uFFFD\uFFFD\uFFFDba")
self.assertEqual(buf[::2], "a\uFFFD\uFFFD")
buf = ctypes.create_unicode_buffer("ab\xe4\xf6\xfc")
self.assertEqual(buf[:], "ab\xe4\xf6\xfc\0")
self.assertEqual(buf[::], "ab\xe4\xf6\xfc\0")
self.assertEqual(buf[::-1], '\x00\xfc\xf6\xe4ba')
self.assertEqual(buf[::2], 'a\xe4\xfc')
self.assertEqual(buf[6:5:-1], "")
ctypes.set_conversion_mode("ascii", "ignore")
buf = ctypes.create_unicode_buffer(b"ab\xe4\xf6\xfc")
# is that correct? not sure. But with 'ignore', you get what you pay for..
self.assertEqual(buf[:], "ab\0\0\0\0")
self.assertEqual(buf[::], "ab\0\0\0\0")
self.assertEqual(buf[::-1], "\0\0\0\0ba")
self.assertEqual(buf[::2], "a\0\0")
self.assertEqual(buf[6:5:-1], "")
import _ctypes_test
func = ctypes.CDLL(_ctypes_test.__file__)._testfunc_p_p
class StringTestCase(UnicodeTestCase):
def setUp(self):
self.prev_conv_mode = ctypes.set_conversion_mode("ascii", "strict")
func.argtypes = [ctypes.c_char_p]
func.restype = ctypes.c_char_p
def tearDown(self):
ctypes.set_conversion_mode(*self.prev_conv_mode)
func.argtypes = None
func.restype = ctypes.c_int
def test_ascii_replace(self):
ctypes.set_conversion_mode("ascii", "strict")
self.assertEqual(func("abc"), "abc")
self.assertEqual(func("abc"), "abc")
self.assertRaises(ctypes.ArgumentError, func, "ab\xe4")
def test_ascii_ignore(self):
ctypes.set_conversion_mode("ascii", "ignore")
self.assertEqual(func("abc"), b"abc")
self.assertEqual(func("abc"), b"abc")
self.assertEqual(func("\xe4\xf6\xfc\xdf"), b"")
def test_ascii_replace(self):
ctypes.set_conversion_mode("ascii", "replace")
self.assertEqual(func("abc"), b"abc")
self.assertEqual(func("abc"), b"abc")
self.assertEqual(func("\xe4\xf6\xfc\xdf"), b"????")
def test_func(self):
self.assertEqual(func(b"abc\xe4"), b"abc\xe4")
def test_buffers(self):
ctypes.set_conversion_mode("ascii", "strict")
buf = ctypes.create_string_buffer("abc")
buf = ctypes.create_string_buffer(b"abc")
self.assertEqual(len(buf), 3+1)
ctypes.set_conversion_mode("ascii", "replace")
buf = ctypes.create_string_buffer("ab\xe4\xf6\xfc")
self.assertEqual(buf[:], b"ab???\0")
self.assertEqual(buf[::], b"ab???\0")
self.assertEqual(buf[::-1], b"\0???ba")
self.assertEqual(buf[::2], b"a??")
buf = ctypes.create_string_buffer(b"ab\xe4\xf6\xfc")
self.assertEqual(buf[:], b"ab\xe4\xf6\xfc\0")
self.assertEqual(buf[::], b"ab\xe4\xf6\xfc\0")
self.assertEqual(buf[::-1], b'\x00\xfc\xf6\xe4ba')
self.assertEqual(buf[::2], b'a\xe4\xfc')
self.assertEqual(buf[6:5:-1], b"")
ctypes.set_conversion_mode("ascii", "ignore")
buf = ctypes.create_string_buffer("ab\xe4\xf6\xfc")
# is that correct? not sure. But with 'ignore', you get what you pay for..
self.assertEqual(buf[:], b"ab\0\0\0\0")
self.assertEqual(buf[::], b"ab\0\0\0\0")
self.assertEqual(buf[::-1], b"\0\0\0\0ba")
if __name__ == '__main__':
unittest.main()

View File

@ -473,6 +473,8 @@ C-API
Library
-------
- Issue #8966: ctypes: Remove implicit bytes-unicode conversion.
- Issue #9378: python -m pickle <pickle file> will now load and
display the first object in the pickle file.

View File

@ -132,8 +132,6 @@ static PyTypeObject Simple_Type;
/* a callable object used for unpickling */
static PyObject *_unpickle;
char *_ctypes_conversion_encoding = NULL;
char *_ctypes_conversion_errors = NULL;
/****************************************************************/
@ -1090,13 +1088,7 @@ CharArray_set_value(CDataObject *self, PyObject *value)
return -1;
}
if (PyUnicode_Check(value)) {
value = PyUnicode_AsEncodedString(value,
_ctypes_conversion_encoding,
_ctypes_conversion_errors);
if (!value)
return -1;
} else if (!PyBytes_Check(value)) {
if (!PyBytes_Check(value)) {
PyErr_Format(PyExc_TypeError,
"str/bytes expected instead of %s instance",
Py_TYPE(value)->tp_name);
@ -1150,13 +1142,7 @@ WCharArray_set_value(CDataObject *self, PyObject *value)
"can't delete attribute");
return -1;
}
if (PyBytes_Check(value)) {
value = PyUnicode_FromEncodedObject(value,
_ctypes_conversion_encoding,
_ctypes_conversion_errors);
if (!value)
return -1;
} else if (!PyUnicode_Check(value)) {
if (!PyUnicode_Check(value)) {
PyErr_Format(PyExc_TypeError,
"unicode string expected instead of %s instance",
Py_TYPE(value)->tp_name);
@ -1510,7 +1496,7 @@ c_char_p_from_param(PyObject *type, PyObject *value)
Py_INCREF(Py_None);
return Py_None;
}
if (PyBytes_Check(value) || PyUnicode_Check(value)) {
if (PyBytes_Check(value)) {
PyCArgObject *parg;
struct fielddesc *fd = _ctypes_get_fielddesc("z");

View File

@ -1641,37 +1641,6 @@ My_Py_DECREF(PyObject *self, PyObject *arg)
return arg;
}
#ifdef CTYPES_UNICODE
static char set_conversion_mode_doc[] =
"set_conversion_mode(encoding, errors) -> (previous-encoding, previous-errors)\n\
\n\
Set the encoding and error handling ctypes uses when converting\n\
between unicode and strings. Returns the previous values.\n";
static PyObject *
set_conversion_mode(PyObject *self, PyObject *args)
{
char *coding, *mode;
PyObject *result;
if (!PyArg_ParseTuple(args, "zs:set_conversion_mode", &coding, &mode))
return NULL;
result = Py_BuildValue("(zz)", _ctypes_conversion_encoding, _ctypes_conversion_errors);
if (coding) {
PyMem_Free(_ctypes_conversion_encoding);
_ctypes_conversion_encoding = PyMem_Malloc(strlen(coding) + 1);
strcpy(_ctypes_conversion_encoding, coding);
} else {
_ctypes_conversion_encoding = NULL;
}
PyMem_Free(_ctypes_conversion_errors);
_ctypes_conversion_errors = PyMem_Malloc(strlen(mode) + 1);
strcpy(_ctypes_conversion_errors, mode);
return result;
}
#endif
static PyObject *
resize(PyObject *self, PyObject *args)
{
@ -1852,9 +1821,6 @@ PyMethodDef _ctypes_module_methods[] = {
{"_unpickle", unpickle, METH_VARARGS },
{"buffer_info", buffer_info, METH_O, "Return buffer interface information"},
{"resize", resize, METH_VARARGS, "Resize the memory buffer of a ctypes instance"},
#ifdef CTYPES_UNICODE
{"set_conversion_mode", set_conversion_mode, METH_VARARGS, set_conversion_mode_doc},
#endif
#ifdef MS_WIN32
{"get_last_error", get_last_error, METH_NOARGS},
{"set_last_error", set_last_error, METH_VARARGS},

View File

@ -1168,20 +1168,6 @@ O_set(void *ptr, PyObject *value, Py_ssize_t size)
static PyObject *
c_set(void *ptr, PyObject *value, Py_ssize_t size)
{
if (PyUnicode_Check(value)) {
value = PyUnicode_AsEncodedString(value,
_ctypes_conversion_encoding,
_ctypes_conversion_errors);
if (value == NULL)
return NULL;
if (PyBytes_GET_SIZE(value) != 1) {
Py_DECREF(value);
goto error;
}
*(char *)ptr = PyBytes_AS_STRING(value)[0];
Py_DECREF(value);
_RET(value);
}
if (PyBytes_Check(value) && PyBytes_GET_SIZE(value) == 1) {
*(char *)ptr = PyBytes_AS_STRING(value)[0];
_RET(value);
@ -1217,13 +1203,7 @@ static PyObject *
u_set(void *ptr, PyObject *value, Py_ssize_t size)
{
Py_ssize_t len;
if (PyBytes_Check(value)) {
value = PyUnicode_FromEncodedObject(value,
_ctypes_conversion_encoding,
_ctypes_conversion_errors);
if (!value)
return NULL;
} else if (!PyUnicode_Check(value)) {
if (!PyUnicode_Check(value)) {
PyErr_Format(PyExc_TypeError,
"unicode string expected instead of %s instance",
value->ob_type->tp_name);
@ -1292,13 +1272,7 @@ U_set(void *ptr, PyObject *value, Py_ssize_t length)
/* It's easier to calculate in characters than in bytes */
length /= sizeof(wchar_t);
if (PyBytes_Check(value)) {
value = PyUnicode_FromEncodedObject(value,
_ctypes_conversion_encoding,
_ctypes_conversion_errors);
if (!value)
return NULL;
} else if (!PyUnicode_Check(value)) {
if (!PyUnicode_Check(value)) {
PyErr_Format(PyExc_TypeError,
"unicode string expected instead of %s instance",
value->ob_type->tp_name);
@ -1342,14 +1316,7 @@ s_set(void *ptr, PyObject *value, Py_ssize_t length)
char *data;
Py_ssize_t size;
if (PyUnicode_Check(value)) {
value = PyUnicode_AsEncodedString(value,
_ctypes_conversion_encoding,
_ctypes_conversion_errors);
if (value == NULL)
return NULL;
assert(PyBytes_Check(value));
} else if(PyBytes_Check(value)) {
if(PyBytes_Check(value)) {
Py_INCREF(value);
} else {
PyErr_Format(PyExc_TypeError,
@ -1393,14 +1360,6 @@ z_set(void *ptr, PyObject *value, Py_ssize_t size)
*(char **)ptr = PyBytes_AsString(value);
Py_INCREF(value);
return value;
} else if (PyUnicode_Check(value)) {
PyObject *str = PyUnicode_AsEncodedString(value,
_ctypes_conversion_encoding,
_ctypes_conversion_errors);
if (str == NULL)
return NULL;
*(char **)ptr = PyBytes_AS_STRING(str);
return str;
} else if (PyLong_Check(value)) {
#if SIZEOF_VOID_P == SIZEOF_LONG_LONG
*(char **)ptr = (char *)PyLong_AsUnsignedLongLongMask(value);
@ -1454,13 +1413,7 @@ Z_set(void *ptr, PyObject *value, Py_ssize_t size)
Py_INCREF(Py_None);
return Py_None;
}
if (PyBytes_Check(value)) {
value = PyUnicode_FromEncodedObject(value,
_ctypes_conversion_encoding,
_ctypes_conversion_errors);
if (!value)
return NULL;
} else if (!PyUnicode_Check(value)) {
if (!PyUnicode_Check(value)) {
PyErr_Format(PyExc_TypeError,
"unicode string or integer address expected instead of %s instance",
value->ob_type->tp_name);
@ -1540,12 +1493,6 @@ BSTR_set(void *ptr, PyObject *value, Py_ssize_t size)
/* convert value into a PyUnicodeObject or NULL */
if (Py_None == value) {
value = NULL;
} else if (PyBytes_Check(value)) {
value = PyUnicode_FromEncodedObject(value,
_ctypes_conversion_encoding,
_ctypes_conversion_errors);
if (!value)
return NULL;
} else if (PyUnicode_Check(value)) {
Py_INCREF(value); /* for the descref below */
} else {