mirror of https://github.com/python/cpython
gh-119182: Add PyUnicodeWriter_WriteUCS4() function (#120849)
This commit is contained in:
parent
a47abdb45d
commit
2e157851e3
|
@ -1563,6 +1563,15 @@ object.
|
||||||
On success, return ``0``.
|
On success, return ``0``.
|
||||||
On error, set an exception, leave the writer unchanged, and return ``-1``.
|
On error, set an exception, leave the writer unchanged, and return ``-1``.
|
||||||
|
|
||||||
|
.. c:function:: int PyUnicodeWriter_WriteUCS4(PyUnicodeWriter *writer, Py_UCS4 *str, Py_ssize_t size)
|
||||||
|
|
||||||
|
Writer the UCS4 string *str* into *writer*.
|
||||||
|
|
||||||
|
*size* is a number of UCS4 characters.
|
||||||
|
|
||||||
|
On success, return ``0``.
|
||||||
|
On error, set an exception, leave the writer unchanged, and return ``-1``.
|
||||||
|
|
||||||
.. c:function:: int PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
|
.. c:function:: int PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
|
||||||
|
|
||||||
Call :c:func:`PyObject_Str` on *obj* and write the output into *writer*.
|
Call :c:func:`PyObject_Str` on *obj* and write the output into *writer*.
|
||||||
|
|
|
@ -314,6 +314,7 @@ New Features
|
||||||
* :c:func:`PyUnicodeWriter_Finish`.
|
* :c:func:`PyUnicodeWriter_Finish`.
|
||||||
* :c:func:`PyUnicodeWriter_WriteChar`.
|
* :c:func:`PyUnicodeWriter_WriteChar`.
|
||||||
* :c:func:`PyUnicodeWriter_WriteUTF8`.
|
* :c:func:`PyUnicodeWriter_WriteUTF8`.
|
||||||
|
* :c:func:`PyUnicodeWriter_WriteUCS4`.
|
||||||
* :c:func:`PyUnicodeWriter_WriteWideChar`.
|
* :c:func:`PyUnicodeWriter_WriteWideChar`.
|
||||||
* :c:func:`PyUnicodeWriter_WriteStr`.
|
* :c:func:`PyUnicodeWriter_WriteStr`.
|
||||||
* :c:func:`PyUnicodeWriter_WriteRepr`.
|
* :c:func:`PyUnicodeWriter_WriteRepr`.
|
||||||
|
|
|
@ -463,6 +463,10 @@ PyAPI_FUNC(int) PyUnicodeWriter_WriteWideChar(
|
||||||
PyUnicodeWriter *writer,
|
PyUnicodeWriter *writer,
|
||||||
const wchar_t *str,
|
const wchar_t *str,
|
||||||
Py_ssize_t size);
|
Py_ssize_t size);
|
||||||
|
PyAPI_FUNC(int) PyUnicodeWriter_WriteUCS4(
|
||||||
|
PyUnicodeWriter *writer,
|
||||||
|
Py_UCS4 *str,
|
||||||
|
Py_ssize_t size);
|
||||||
|
|
||||||
PyAPI_FUNC(int) PyUnicodeWriter_WriteStr(
|
PyAPI_FUNC(int) PyUnicodeWriter_WriteStr(
|
||||||
PyUnicodeWriter *writer,
|
PyUnicodeWriter *writer,
|
||||||
|
|
|
@ -1826,8 +1826,42 @@ class PyUnicodeWriterTest(unittest.TestCase):
|
||||||
writer.write_widechar("latin1=\xE9")
|
writer.write_widechar("latin1=\xE9")
|
||||||
writer.write_widechar("-")
|
writer.write_widechar("-")
|
||||||
writer.write_widechar("euro=\u20AC")
|
writer.write_widechar("euro=\u20AC")
|
||||||
|
writer.write_char("-")
|
||||||
|
writer.write_widechar("max=\U0010ffff")
|
||||||
writer.write_char('.')
|
writer.write_char('.')
|
||||||
self.assertEqual(writer.finish(), "latin1=\xE9-euro=\u20AC.")
|
self.assertEqual(writer.finish(),
|
||||||
|
"latin1=\xE9-euro=\u20AC-max=\U0010ffff.")
|
||||||
|
|
||||||
|
def test_ucs4(self):
|
||||||
|
writer = self.create_writer(0)
|
||||||
|
writer.write_ucs4("ascii IGNORED", 5)
|
||||||
|
writer.write_char("-")
|
||||||
|
writer.write_ucs4("latin1=\xe9", 8)
|
||||||
|
writer.write_char("-")
|
||||||
|
writer.write_ucs4("euro=\u20ac", 6)
|
||||||
|
writer.write_char("-")
|
||||||
|
writer.write_ucs4("max=\U0010ffff", 5)
|
||||||
|
writer.write_char(".")
|
||||||
|
self.assertEqual(writer.finish(),
|
||||||
|
"ascii-latin1=\xE9-euro=\u20AC-max=\U0010ffff.")
|
||||||
|
|
||||||
|
# Test some special characters
|
||||||
|
writer = self.create_writer(0)
|
||||||
|
# Lone surrogate character
|
||||||
|
writer.write_ucs4("lone\uDC80", 5)
|
||||||
|
writer.write_char("-")
|
||||||
|
# Surrogate pair
|
||||||
|
writer.write_ucs4("pair\uDBFF\uDFFF", 5)
|
||||||
|
writer.write_char("-")
|
||||||
|
writer.write_ucs4("null[\0]", 7)
|
||||||
|
self.assertEqual(writer.finish(),
|
||||||
|
"lone\udc80-pair\udbff-null[\0]")
|
||||||
|
|
||||||
|
# invalid size
|
||||||
|
writer = self.create_writer(0)
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
writer.write_ucs4("text", -1)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@unittest.skipIf(ctypes is None, 'need ctypes')
|
@unittest.skipIf(ctypes is None, 'need ctypes')
|
||||||
|
|
|
@ -5,9 +5,12 @@ Add a new :c:type:`PyUnicodeWriter` API to create a Python :class:`str` object:
|
||||||
* :c:func:`PyUnicodeWriter_Finish`.
|
* :c:func:`PyUnicodeWriter_Finish`.
|
||||||
* :c:func:`PyUnicodeWriter_WriteChar`.
|
* :c:func:`PyUnicodeWriter_WriteChar`.
|
||||||
* :c:func:`PyUnicodeWriter_WriteUTF8`.
|
* :c:func:`PyUnicodeWriter_WriteUTF8`.
|
||||||
|
* :c:func:`PyUnicodeWriter_WriteUCS4`.
|
||||||
|
* :c:func:`PyUnicodeWriter_WriteWideChar`.
|
||||||
* :c:func:`PyUnicodeWriter_WriteStr`.
|
* :c:func:`PyUnicodeWriter_WriteStr`.
|
||||||
* :c:func:`PyUnicodeWriter_WriteRepr`.
|
* :c:func:`PyUnicodeWriter_WriteRepr`.
|
||||||
* :c:func:`PyUnicodeWriter_WriteSubstring`.
|
* :c:func:`PyUnicodeWriter_WriteSubstring`.
|
||||||
* :c:func:`PyUnicodeWriter_Format`.
|
* :c:func:`PyUnicodeWriter_Format`.
|
||||||
|
* :c:func:`PyUnicodeWriter_DecodeUTF8Stateful`.
|
||||||
|
|
||||||
Patch by Victor Stinner.
|
Patch by Victor Stinner.
|
||||||
|
|
|
@ -360,6 +360,36 @@ writer_write_widechar(PyObject *self_raw, PyObject *args)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject*
|
||||||
|
writer_write_ucs4(PyObject *self_raw, PyObject *args)
|
||||||
|
{
|
||||||
|
WriterObject *self = (WriterObject *)self_raw;
|
||||||
|
if (writer_check(self) < 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *str;
|
||||||
|
Py_ssize_t size;
|
||||||
|
if (!PyArg_ParseTuple(args, "Un", &str, &size)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
Py_ssize_t len = PyUnicode_GET_LENGTH(str);
|
||||||
|
size = Py_MIN(size, len);
|
||||||
|
|
||||||
|
Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(str);
|
||||||
|
if (ucs4 == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int res = PyUnicodeWriter_WriteUCS4(self->writer, ucs4, size);
|
||||||
|
PyMem_Free(ucs4);
|
||||||
|
if (res < 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
writer_write_str(PyObject *self_raw, PyObject *args)
|
writer_write_str(PyObject *self_raw, PyObject *args)
|
||||||
{
|
{
|
||||||
|
@ -484,6 +514,7 @@ static PyMethodDef writer_methods[] = {
|
||||||
{"write_char", _PyCFunction_CAST(writer_write_char), METH_VARARGS},
|
{"write_char", _PyCFunction_CAST(writer_write_char), METH_VARARGS},
|
||||||
{"write_utf8", _PyCFunction_CAST(writer_write_utf8), METH_VARARGS},
|
{"write_utf8", _PyCFunction_CAST(writer_write_utf8), METH_VARARGS},
|
||||||
{"write_widechar", _PyCFunction_CAST(writer_write_widechar), METH_VARARGS},
|
{"write_widechar", _PyCFunction_CAST(writer_write_widechar), METH_VARARGS},
|
||||||
|
{"write_ucs4", _PyCFunction_CAST(writer_write_ucs4), METH_VARARGS},
|
||||||
{"write_str", _PyCFunction_CAST(writer_write_str), METH_VARARGS},
|
{"write_str", _PyCFunction_CAST(writer_write_str), METH_VARARGS},
|
||||||
{"write_repr", _PyCFunction_CAST(writer_write_repr), METH_VARARGS},
|
{"write_repr", _PyCFunction_CAST(writer_write_repr), METH_VARARGS},
|
||||||
{"write_substring", _PyCFunction_CAST(writer_write_substring), METH_VARARGS},
|
{"write_substring", _PyCFunction_CAST(writer_write_substring), METH_VARARGS},
|
||||||
|
|
|
@ -2035,11 +2035,9 @@ PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *pub_writer,
|
||||||
if (!converted) {
|
if (!converted) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
PyObject *unicode = _PyUnicode_FromUCS4(converted, size);
|
|
||||||
PyMem_Free(converted);
|
|
||||||
|
|
||||||
int res = _PyUnicodeWriter_WriteStr(writer, unicode);
|
int res = PyUnicodeWriter_WriteUCS4(pub_writer, converted, size);
|
||||||
Py_DECREF(unicode);
|
PyMem_Free(converted);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -2289,6 +2287,51 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
PyUnicodeWriter_WriteUCS4(PyUnicodeWriter *pub_writer,
|
||||||
|
Py_UCS4 *str,
|
||||||
|
Py_ssize_t size)
|
||||||
|
{
|
||||||
|
_PyUnicodeWriter *writer = (_PyUnicodeWriter*)pub_writer;
|
||||||
|
|
||||||
|
if (size < 0) {
|
||||||
|
PyErr_SetString(PyExc_ValueError,
|
||||||
|
"size must be positive");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (size == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_UCS4 max_char = ucs4lib_find_max_char(str, str + size);
|
||||||
|
|
||||||
|
if (_PyUnicodeWriter_Prepare(writer, size, max_char) < 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int kind = writer->kind;
|
||||||
|
void *data = (Py_UCS1*)writer->data + writer->pos * kind;
|
||||||
|
if (kind == PyUnicode_1BYTE_KIND) {
|
||||||
|
_PyUnicode_CONVERT_BYTES(Py_UCS4, Py_UCS1,
|
||||||
|
str, str + size,
|
||||||
|
data);
|
||||||
|
}
|
||||||
|
else if (kind == PyUnicode_2BYTE_KIND) {
|
||||||
|
_PyUnicode_CONVERT_BYTES(Py_UCS4, Py_UCS2,
|
||||||
|
str, str + size,
|
||||||
|
data);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
memcpy(data, str, size * sizeof(Py_UCS4));
|
||||||
|
}
|
||||||
|
writer->pos += size;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
PyObject*
|
PyObject*
|
||||||
PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size)
|
PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size)
|
||||||
{
|
{
|
||||||
|
@ -13357,7 +13400,7 @@ PyUnicodeWriter*
|
||||||
PyUnicodeWriter_Create(Py_ssize_t length)
|
PyUnicodeWriter_Create(Py_ssize_t length)
|
||||||
{
|
{
|
||||||
if (length < 0) {
|
if (length < 0) {
|
||||||
PyErr_SetString(PyExc_TypeError,
|
PyErr_SetString(PyExc_ValueError,
|
||||||
"length must be positive");
|
"length must be positive");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue