#include // ptrdiff_t #include "parts.h" #include "util.h" /* Test PyUnicode_New() */ static PyObject * unicode_new(PyObject *self, PyObject *args) { Py_ssize_t size; unsigned int maxchar; PyObject *result; if (!PyArg_ParseTuple(args, "nI", &size, &maxchar)) { return NULL; } result = PyUnicode_New(size, (Py_UCS4)maxchar); if (!result) { return NULL; } if (size > 0 && maxchar <= 0x10ffff && PyUnicode_Fill(result, 0, size, (Py_UCS4)maxchar) < 0) { Py_DECREF(result); return NULL; } return result; } static PyObject * unicode_copy(PyObject *unicode) { PyObject *copy; if (!unicode) { return NULL; } if (!PyUnicode_Check(unicode)) { Py_INCREF(unicode); return unicode; } copy = PyUnicode_New(PyUnicode_GET_LENGTH(unicode), PyUnicode_MAX_CHAR_VALUE(unicode)); if (!copy) { return NULL; } if (PyUnicode_CopyCharacters(copy, 0, unicode, 0, PyUnicode_GET_LENGTH(unicode)) < 0) { Py_DECREF(copy); return NULL; } return copy; } /* Test PyUnicode_Fill() */ static PyObject * unicode_fill(PyObject *self, PyObject *args) { PyObject *to, *to_copy; Py_ssize_t start, length, filled; unsigned int fill_char; if (!PyArg_ParseTuple(args, "OnnI", &to, &start, &length, &fill_char)) { return NULL; } NULLABLE(to); if (!(to_copy = unicode_copy(to)) && to) { return NULL; } filled = PyUnicode_Fill(to_copy, start, length, (Py_UCS4)fill_char); if (filled == -1 && PyErr_Occurred()) { Py_DECREF(to_copy); return NULL; } return Py_BuildValue("(Nn)", to_copy, filled); } /* Test PyUnicode_FromKindAndData() */ static PyObject * unicode_fromkindanddata(PyObject *self, PyObject *args) { int kind; void *buffer; Py_ssize_t bsize; Py_ssize_t size = -100; if (!PyArg_ParseTuple(args, "iz#|n", &kind, &buffer, &bsize, &size)) { return NULL; } if (size == -100) { size = bsize; } if (kind && size % kind) { PyErr_SetString(PyExc_AssertionError, "invalid size in unicode_fromkindanddata()"); return NULL; } return PyUnicode_FromKindAndData(kind, buffer, kind ? size / kind : 0); } // Test PyUnicode_AsUCS4(). // Part of the limited C API, but the test needs PyUnicode_FromKindAndData(). static PyObject * unicode_asucs4(PyObject *self, PyObject *args) { PyObject *unicode, *result; Py_UCS4 *buffer; int copy_null; Py_ssize_t str_len, buf_len; if (!PyArg_ParseTuple(args, "Onp:unicode_asucs4", &unicode, &str_len, ©_null)) { return NULL; } NULLABLE(unicode); buf_len = str_len + 1; buffer = PyMem_NEW(Py_UCS4, buf_len); if (buffer == NULL) { return PyErr_NoMemory(); } memset(buffer, 0, sizeof(Py_UCS4)*buf_len); buffer[str_len] = 0xffffU; if (!PyUnicode_AsUCS4(unicode, buffer, buf_len, copy_null)) { PyMem_Free(buffer); return NULL; } result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, buf_len); PyMem_Free(buffer); return result; } // Test PyUnicode_AsUCS4Copy(). // Part of the limited C API, but the test needs PyUnicode_FromKindAndData(). static PyObject * unicode_asucs4copy(PyObject *self, PyObject *args) { PyObject *unicode; Py_UCS4 *buffer; PyObject *result; if (!PyArg_ParseTuple(args, "O", &unicode)) { return NULL; } NULLABLE(unicode); buffer = PyUnicode_AsUCS4Copy(unicode); if (buffer == NULL) { return NULL; } result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, PyUnicode_GET_LENGTH(unicode) + 1); PyMem_FREE(buffer); return result; } /* Test PyUnicode_AsUTF8() */ static PyObject * unicode_asutf8(PyObject *self, PyObject *args) { PyObject *unicode; Py_ssize_t buflen; const char *s; if (!PyArg_ParseTuple(args, "On", &unicode, &buflen)) return NULL; NULLABLE(unicode); s = PyUnicode_AsUTF8(unicode); if (s == NULL) return NULL; return PyBytes_FromStringAndSize(s, buflen); } /* Test PyUnicode_CopyCharacters() */ static PyObject * unicode_copycharacters(PyObject *self, PyObject *args) { PyObject *from, *to, *to_copy; Py_ssize_t from_start, to_start, how_many, copied; if (!PyArg_ParseTuple(args, "UnOnn", &to, &to_start, &from, &from_start, &how_many)) { return NULL; } NULLABLE(from); if (!(to_copy = PyUnicode_New(PyUnicode_GET_LENGTH(to), PyUnicode_MAX_CHAR_VALUE(to)))) { return NULL; } if (PyUnicode_Fill(to_copy, 0, PyUnicode_GET_LENGTH(to_copy), 0U) < 0) { Py_DECREF(to_copy); return NULL; } copied = PyUnicode_CopyCharacters(to_copy, to_start, from, from_start, how_many); if (copied == -1 && PyErr_Occurred()) { Py_DECREF(to_copy); return NULL; } return Py_BuildValue("(Nn)", to_copy, copied); } // --- PyUnicodeWriter type ------------------------------------------------- typedef struct { PyObject_HEAD PyUnicodeWriter *writer; } WriterObject; static PyObject * writer_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { WriterObject *self = (WriterObject *)type->tp_alloc(type, 0); if (!self) { return NULL; } self->writer = NULL; return (PyObject*)self; } static int writer_init(PyObject *self_raw, PyObject *args, PyObject *kwargs) { WriterObject *self = (WriterObject *)self_raw; Py_ssize_t size; if (!PyArg_ParseTuple(args, "n", &size)) { return -1; } if (self->writer) { PyUnicodeWriter_Discard(self->writer); } self->writer = PyUnicodeWriter_Create(size); if (self->writer == NULL) { return -1; } return 0; } static void writer_dealloc(PyObject *self_raw) { WriterObject *self = (WriterObject *)self_raw; PyTypeObject *tp = Py_TYPE(self); if (self->writer) { PyUnicodeWriter_Discard(self->writer); } tp->tp_free(self); Py_DECREF(tp); } static inline int writer_check(WriterObject *self) { if (self->writer == NULL) { PyErr_SetString(PyExc_ValueError, "operation on finished writer"); return -1; } return 0; } static PyObject* writer_write_char(PyObject *self_raw, PyObject *args) { WriterObject *self = (WriterObject *)self_raw; if (writer_check(self) < 0) { return NULL; } PyObject *str; if (!PyArg_ParseTuple(args, "U", &str)) { return NULL; } if (PyUnicode_GET_LENGTH(str) != 1) { PyErr_SetString(PyExc_ValueError, "expect a single character"); } Py_UCS4 ch = PyUnicode_READ_CHAR(str, 0); if (PyUnicodeWriter_WriteChar(self->writer, ch) < 0) { return NULL; } Py_RETURN_NONE; } static PyObject* writer_write_utf8(PyObject *self_raw, PyObject *args) { WriterObject *self = (WriterObject *)self_raw; if (writer_check(self) < 0) { return NULL; } char *str; Py_ssize_t size; if (!PyArg_ParseTuple(args, "yn", &str, &size)) { return NULL; } if (PyUnicodeWriter_WriteUTF8(self->writer, str, size) < 0) { return NULL; } Py_RETURN_NONE; } static PyObject* writer_write_widechar(PyObject *self_raw, PyObject *args) { WriterObject *self = (WriterObject *)self_raw; if (writer_check(self) < 0) { return NULL; } PyObject *str; if (!PyArg_ParseTuple(args, "U", &str)) { return NULL; } Py_ssize_t size; wchar_t *wstr = PyUnicode_AsWideCharString(str, &size); if (wstr == NULL) { return NULL; } int res = PyUnicodeWriter_WriteWideChar(self->writer, wstr, size); PyMem_Free(wstr); if (res < 0) { return NULL; } Py_RETURN_NONE; } static PyObject* writer_write_ucs4(PyObject *self_raw, PyObject *args) { WriterObject *self = (WriterObject *)self_raw; if (writer_check(self) < 0) { return NULL; } PyObject *str; Py_ssize_t size; if (!PyArg_ParseTuple(args, "Un", &str, &size)) { return NULL; } Py_ssize_t len = PyUnicode_GET_LENGTH(str); size = Py_MIN(size, len); Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(str); if (ucs4 == NULL) { return NULL; } int res = PyUnicodeWriter_WriteUCS4(self->writer, ucs4, size); PyMem_Free(ucs4); if (res < 0) { return NULL; } Py_RETURN_NONE; } static PyObject* writer_write_str(PyObject *self_raw, PyObject *args) { WriterObject *self = (WriterObject *)self_raw; if (writer_check(self) < 0) { return NULL; } PyObject *obj; if (!PyArg_ParseTuple(args, "O", &obj)) { return NULL; } if (PyUnicodeWriter_WriteStr(self->writer, obj) < 0) { return NULL; } Py_RETURN_NONE; } static PyObject* writer_write_repr(PyObject *self_raw, PyObject *args) { WriterObject *self = (WriterObject *)self_raw; if (writer_check(self) < 0) { return NULL; } PyObject *obj; if (!PyArg_ParseTuple(args, "O", &obj)) { return NULL; } if (PyUnicodeWriter_WriteRepr(self->writer, obj) < 0) { return NULL; } Py_RETURN_NONE; } static PyObject* writer_write_substring(PyObject *self_raw, PyObject *args) { WriterObject *self = (WriterObject *)self_raw; if (writer_check(self) < 0) { return NULL; } PyObject *str; Py_ssize_t start, end; if (!PyArg_ParseTuple(args, "Unn", &str, &start, &end)) { return NULL; } if (PyUnicodeWriter_WriteSubstring(self->writer, str, start, end) < 0) { return NULL; } Py_RETURN_NONE; } static PyObject* writer_decodeutf8stateful(PyObject *self_raw, PyObject *args) { WriterObject *self = (WriterObject *)self_raw; if (writer_check(self) < 0) { return NULL; } const char *str; Py_ssize_t len; const char *errors; int use_consumed = 0; if (!PyArg_ParseTuple(args, "yny|i", &str, &len, &errors, &use_consumed)) { return NULL; } Py_ssize_t consumed = 12345; Py_ssize_t *pconsumed = use_consumed ? &consumed : NULL; if (PyUnicodeWriter_DecodeUTF8Stateful(self->writer, str, len, errors, pconsumed) < 0) { if (use_consumed) { assert(consumed == 0); } return NULL; } if (use_consumed) { return PyLong_FromSsize_t(consumed); } Py_RETURN_NONE; } static PyObject* writer_get_pointer(PyObject *self_raw, PyObject *args) { WriterObject *self = (WriterObject *)self_raw; if (writer_check(self) < 0) { return NULL; } return PyLong_FromVoidPtr(self->writer); } static PyObject* writer_finish(PyObject *self_raw, PyObject *Py_UNUSED(args)) { WriterObject *self = (WriterObject *)self_raw; if (writer_check(self) < 0) { return NULL; } PyObject *str = PyUnicodeWriter_Finish(self->writer); self->writer = NULL; return str; } static PyMethodDef writer_methods[] = { {"write_char", _PyCFunction_CAST(writer_write_char), METH_VARARGS}, {"write_utf8", _PyCFunction_CAST(writer_write_utf8), METH_VARARGS}, {"write_widechar", _PyCFunction_CAST(writer_write_widechar), METH_VARARGS}, {"write_ucs4", _PyCFunction_CAST(writer_write_ucs4), METH_VARARGS}, {"write_str", _PyCFunction_CAST(writer_write_str), METH_VARARGS}, {"write_repr", _PyCFunction_CAST(writer_write_repr), METH_VARARGS}, {"write_substring", _PyCFunction_CAST(writer_write_substring), METH_VARARGS}, {"decodeutf8stateful", _PyCFunction_CAST(writer_decodeutf8stateful), METH_VARARGS}, {"get_pointer", _PyCFunction_CAST(writer_get_pointer), METH_VARARGS}, {"finish", _PyCFunction_CAST(writer_finish), METH_NOARGS}, {NULL, NULL} /* sentinel */ }; static PyType_Slot Writer_Type_slots[] = { {Py_tp_new, writer_new}, {Py_tp_init, writer_init}, {Py_tp_dealloc, writer_dealloc}, {Py_tp_methods, writer_methods}, {0, 0}, /* sentinel */ }; static PyType_Spec Writer_spec = { .name = "_testcapi.PyUnicodeWriter", .basicsize = sizeof(WriterObject), .flags = Py_TPFLAGS_DEFAULT, .slots = Writer_Type_slots, }; static PyMethodDef TestMethods[] = { {"unicode_new", unicode_new, METH_VARARGS}, {"unicode_fill", unicode_fill, METH_VARARGS}, {"unicode_fromkindanddata", unicode_fromkindanddata, METH_VARARGS}, {"unicode_asucs4", unicode_asucs4, METH_VARARGS}, {"unicode_asucs4copy", unicode_asucs4copy, METH_VARARGS}, {"unicode_asutf8", unicode_asutf8, METH_VARARGS}, {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS}, {NULL}, }; int _PyTestCapi_Init_Unicode(PyObject *m) { if (PyModule_AddFunctions(m, TestMethods) < 0) { return -1; } PyTypeObject *writer_type = (PyTypeObject *)PyType_FromSpec(&Writer_spec); if (writer_type == NULL) { return -1; } if (PyModule_AddType(m, writer_type) < 0) { Py_DECREF(writer_type); return -1; } Py_DECREF(writer_type); return 0; }