mirror of https://github.com/python/cpython
gh-119182: Add PyUnicodeWriter C API (#119184)
This commit is contained in:
parent
2c7209a3bd
commit
5c4235cd8c
|
@ -1502,3 +1502,87 @@ They all return ``NULL`` or ``-1`` if an exception occurs.
|
||||||
:c:func:`PyUnicode_InternInPlace`, returning either a new Unicode string
|
:c:func:`PyUnicode_InternInPlace`, returning either a new Unicode string
|
||||||
object that has been interned, or a new ("owned") reference to an earlier
|
object that has been interned, or a new ("owned") reference to an earlier
|
||||||
interned string object with the same value.
|
interned string object with the same value.
|
||||||
|
|
||||||
|
PyUnicodeWriter
|
||||||
|
^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
The :c:type:`PyUnicodeWriter` API can be used to create a Python :class:`str`
|
||||||
|
object.
|
||||||
|
|
||||||
|
.. versionadded:: 3.14
|
||||||
|
|
||||||
|
.. c:type:: PyUnicodeWriter
|
||||||
|
|
||||||
|
A Unicode writer instance.
|
||||||
|
|
||||||
|
The instance must be destroyed by :c:func:`PyUnicodeWriter_Finish` on
|
||||||
|
success, or :c:func:`PyUnicodeWriter_Discard` on error.
|
||||||
|
|
||||||
|
.. c:function:: PyUnicodeWriter* PyUnicodeWriter_Create(Py_ssize_t length)
|
||||||
|
|
||||||
|
Create a Unicode writer instance.
|
||||||
|
|
||||||
|
Set an exception and return ``NULL`` on error.
|
||||||
|
|
||||||
|
.. c:function:: PyObject* PyUnicodeWriter_Finish(PyUnicodeWriter *writer)
|
||||||
|
|
||||||
|
Return the final Python :class:`str` object and destroy the writer instance.
|
||||||
|
|
||||||
|
Set an exception and return ``NULL`` on error.
|
||||||
|
|
||||||
|
.. c:function:: void PyUnicodeWriter_Discard(PyUnicodeWriter *writer)
|
||||||
|
|
||||||
|
Discard the internal Unicode buffer and destroy the writer instance.
|
||||||
|
|
||||||
|
.. c:function:: int PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch)
|
||||||
|
|
||||||
|
Write the single Unicode character *ch* into *writer*.
|
||||||
|
|
||||||
|
On success, return ``0``.
|
||||||
|
On error, set an exception, leave the writer unchanged, and return ``-1``.
|
||||||
|
|
||||||
|
.. c:function:: int PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer, const char *str, Py_ssize_t size)
|
||||||
|
|
||||||
|
Decode the string *str* from UTF-8 in strict mode and write the output into *writer*.
|
||||||
|
|
||||||
|
*size* is the string length in bytes. If *size* is equal to ``-1``, call
|
||||||
|
``strlen(str)`` to get the string length.
|
||||||
|
|
||||||
|
On success, return ``0``.
|
||||||
|
On error, set an exception, leave the writer unchanged, and return ``-1``.
|
||||||
|
|
||||||
|
To use a different error handler than ``strict``,
|
||||||
|
:c:func:`PyUnicode_DecodeUTF8` can be used with
|
||||||
|
:c:func:`PyUnicodeWriter_WriteStr`.
|
||||||
|
|
||||||
|
.. c:function:: int PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
|
||||||
|
|
||||||
|
Call :c:func:`PyObject_Str` on *obj* and write the output into *writer*.
|
||||||
|
|
||||||
|
On success, return ``0``.
|
||||||
|
On error, set an exception, leave the writer unchanged, and return ``-1``.
|
||||||
|
|
||||||
|
.. c:function:: int PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj)
|
||||||
|
|
||||||
|
Call :c:func:`PyObject_Repr` on *obj* and write the output into *writer*.
|
||||||
|
|
||||||
|
On success, return ``0``.
|
||||||
|
On error, set an exception, leave the writer unchanged, and return ``-1``.
|
||||||
|
|
||||||
|
.. c:function:: int PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str, Py_ssize_t start, Py_ssize_t end)
|
||||||
|
|
||||||
|
Write the substring ``str[start:end]`` into *writer*.
|
||||||
|
|
||||||
|
*str* must be Python :class:`str` object. *start* must be greater than or
|
||||||
|
equal to 0, and less than or equal to *end*. *end* must be less than or
|
||||||
|
equal to *str* length.
|
||||||
|
|
||||||
|
On success, return ``0``.
|
||||||
|
On error, set an exception, leave the writer unchanged, and return ``-1``.
|
||||||
|
|
||||||
|
.. c:function:: int PyUnicodeWriter_Format(PyUnicodeWriter *writer, const char *format, ...)
|
||||||
|
|
||||||
|
Similar to :c:func:`PyUnicode_FromFormat`, but write the output directly into *writer*.
|
||||||
|
|
||||||
|
On success, return ``0``.
|
||||||
|
On error, set an exception, leave the writer unchanged, and return ``-1``.
|
||||||
|
|
|
@ -283,6 +283,21 @@ New Features
|
||||||
* Add :c:func:`PyLong_GetSign` function to get the sign of :class:`int` objects.
|
* Add :c:func:`PyLong_GetSign` function to get the sign of :class:`int` objects.
|
||||||
(Contributed by Sergey B Kirpichev in :gh:`116560`.)
|
(Contributed by Sergey B Kirpichev in :gh:`116560`.)
|
||||||
|
|
||||||
|
* Add a new :c:type:`PyUnicodeWriter` API to create a Python :class:`str`
|
||||||
|
object:
|
||||||
|
|
||||||
|
* :c:func:`PyUnicodeWriter_Create`.
|
||||||
|
* :c:func:`PyUnicodeWriter_Discard`.
|
||||||
|
* :c:func:`PyUnicodeWriter_Finish`.
|
||||||
|
* :c:func:`PyUnicodeWriter_WriteChar`.
|
||||||
|
* :c:func:`PyUnicodeWriter_WriteUTF8`.
|
||||||
|
* :c:func:`PyUnicodeWriter_WriteStr`.
|
||||||
|
* :c:func:`PyUnicodeWriter_WriteRepr`.
|
||||||
|
* :c:func:`PyUnicodeWriter_WriteSubstring`.
|
||||||
|
* :c:func:`PyUnicodeWriter_Format`.
|
||||||
|
|
||||||
|
(Contributed by Victor Stinner in :gh:`119182`.)
|
||||||
|
|
||||||
Porting to Python 3.14
|
Porting to Python 3.14
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -444,7 +444,40 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
|
||||||
Py_ssize_t size);
|
Py_ssize_t size);
|
||||||
|
|
||||||
|
|
||||||
/* --- _PyUnicodeWriter API ----------------------------------------------- */
|
/* --- Public PyUnicodeWriter API ----------------------------------------- */
|
||||||
|
|
||||||
|
typedef struct PyUnicodeWriter PyUnicodeWriter;
|
||||||
|
|
||||||
|
PyAPI_FUNC(PyUnicodeWriter*) PyUnicodeWriter_Create(Py_ssize_t length);
|
||||||
|
PyAPI_FUNC(void) PyUnicodeWriter_Discard(PyUnicodeWriter *writer);
|
||||||
|
PyAPI_FUNC(PyObject*) PyUnicodeWriter_Finish(PyUnicodeWriter *writer);
|
||||||
|
|
||||||
|
PyAPI_FUNC(int) PyUnicodeWriter_WriteChar(
|
||||||
|
PyUnicodeWriter *writer,
|
||||||
|
Py_UCS4 ch);
|
||||||
|
PyAPI_FUNC(int) PyUnicodeWriter_WriteUTF8(
|
||||||
|
PyUnicodeWriter *writer,
|
||||||
|
const char *str,
|
||||||
|
Py_ssize_t size);
|
||||||
|
|
||||||
|
PyAPI_FUNC(int) PyUnicodeWriter_WriteStr(
|
||||||
|
PyUnicodeWriter *writer,
|
||||||
|
PyObject *obj);
|
||||||
|
PyAPI_FUNC(int) PyUnicodeWriter_WriteRepr(
|
||||||
|
PyUnicodeWriter *writer,
|
||||||
|
PyObject *obj);
|
||||||
|
PyAPI_FUNC(int) PyUnicodeWriter_WriteSubstring(
|
||||||
|
PyUnicodeWriter *writer,
|
||||||
|
PyObject *str,
|
||||||
|
Py_ssize_t start,
|
||||||
|
Py_ssize_t end);
|
||||||
|
PyAPI_FUNC(int) PyUnicodeWriter_Format(
|
||||||
|
PyUnicodeWriter *writer,
|
||||||
|
const char *format,
|
||||||
|
...);
|
||||||
|
|
||||||
|
|
||||||
|
/* --- Private _PyUnicodeWriter API --------------------------------------- */
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PyObject *buffer;
|
PyObject *buffer;
|
||||||
|
@ -466,7 +499,7 @@ typedef struct {
|
||||||
/* If readonly is 1, buffer is a shared string (cannot be modified)
|
/* If readonly is 1, buffer is a shared string (cannot be modified)
|
||||||
and size is set to 0. */
|
and size is set to 0. */
|
||||||
unsigned char readonly;
|
unsigned char readonly;
|
||||||
} _PyUnicodeWriter ;
|
} _PyUnicodeWriter;
|
||||||
|
|
||||||
// Initialize a Unicode writer.
|
// Initialize a Unicode writer.
|
||||||
//
|
//
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
Add a new :c:type:`PyUnicodeWriter` API to create a Python :class:`str` object:
|
||||||
|
|
||||||
|
* :c:func:`PyUnicodeWriter_Create`.
|
||||||
|
* :c:func:`PyUnicodeWriter_Discard`.
|
||||||
|
* :c:func:`PyUnicodeWriter_Finish`.
|
||||||
|
* :c:func:`PyUnicodeWriter_WriteChar`.
|
||||||
|
* :c:func:`PyUnicodeWriter_WriteUTF8`.
|
||||||
|
* :c:func:`PyUnicodeWriter_WriteStr`.
|
||||||
|
* :c:func:`PyUnicodeWriter_WriteRepr`.
|
||||||
|
* :c:func:`PyUnicodeWriter_WriteSubstring`.
|
||||||
|
* :c:func:`PyUnicodeWriter_Format`.
|
||||||
|
|
||||||
|
Patch by Victor Stinner.
|
|
@ -221,6 +221,221 @@ unicode_copycharacters(PyObject *self, PyObject *args)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
test_unicodewriter(PyObject *self, PyObject *Py_UNUSED(args))
|
||||||
|
{
|
||||||
|
PyUnicodeWriter *writer = PyUnicodeWriter_Create(100);
|
||||||
|
if (writer == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// test PyUnicodeWriter_WriteUTF8()
|
||||||
|
if (PyUnicodeWriter_WriteUTF8(writer, "var", -1) < 0) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// test PyUnicodeWriter_WriteChar()
|
||||||
|
if (PyUnicodeWriter_WriteChar(writer, '=') < 0) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// test PyUnicodeWriter_WriteSubstring()
|
||||||
|
PyObject *str = PyUnicode_FromString("[long]");
|
||||||
|
if (str == NULL) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
int ret = PyUnicodeWriter_WriteSubstring(writer, str, 1, 5);
|
||||||
|
Py_CLEAR(str);
|
||||||
|
if (ret < 0) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// test PyUnicodeWriter_WriteStr()
|
||||||
|
str = PyUnicode_FromString(" value ");
|
||||||
|
if (str == NULL) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
ret = PyUnicodeWriter_WriteStr(writer, str);
|
||||||
|
Py_CLEAR(str);
|
||||||
|
if (ret < 0) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// test PyUnicodeWriter_WriteRepr()
|
||||||
|
str = PyUnicode_FromString("repr");
|
||||||
|
if (str == NULL) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
ret = PyUnicodeWriter_WriteRepr(writer, str);
|
||||||
|
Py_CLEAR(str);
|
||||||
|
if (ret < 0) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *result = PyUnicodeWriter_Finish(writer);
|
||||||
|
if (result == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
assert(PyUnicode_EqualToUTF8(result, "var=long value 'repr'"));
|
||||||
|
Py_DECREF(result);
|
||||||
|
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
|
||||||
|
error:
|
||||||
|
PyUnicodeWriter_Discard(writer);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
test_unicodewriter_utf8(PyObject *self, PyObject *Py_UNUSED(args))
|
||||||
|
{
|
||||||
|
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
||||||
|
if (writer == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (PyUnicodeWriter_WriteUTF8(writer, "ascii", -1) < 0) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
if (PyUnicodeWriter_WriteUTF8(writer, "latin1=\xC3\xA9", -1) < 0) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
if (PyUnicodeWriter_WriteUTF8(writer, "euro=\xE2\x82\xAC", -1) < 0) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *result = PyUnicodeWriter_Finish(writer);
|
||||||
|
if (result == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
assert(PyUnicode_EqualToUTF8(result,
|
||||||
|
"ascii-latin1=\xC3\xA9-euro=\xE2\x82\xAC."));
|
||||||
|
Py_DECREF(result);
|
||||||
|
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
|
||||||
|
error:
|
||||||
|
PyUnicodeWriter_Discard(writer);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
test_unicodewriter_invalid_utf8(PyObject *self, PyObject *Py_UNUSED(args))
|
||||||
|
{
|
||||||
|
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
||||||
|
if (writer == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
assert(PyUnicodeWriter_WriteUTF8(writer, "invalid=\xFF", -1) < 0);
|
||||||
|
PyUnicodeWriter_Discard(writer);
|
||||||
|
|
||||||
|
assert(PyErr_ExceptionMatches(PyExc_UnicodeDecodeError));
|
||||||
|
PyErr_Clear();
|
||||||
|
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
test_unicodewriter_recover_error(PyObject *self, PyObject *Py_UNUSED(args))
|
||||||
|
{
|
||||||
|
// test recovering from PyUnicodeWriter_WriteUTF8() error
|
||||||
|
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
||||||
|
if (writer == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
assert(PyUnicodeWriter_WriteUTF8(writer, "value=", -1) == 0);
|
||||||
|
|
||||||
|
// write fails with an invalid string
|
||||||
|
assert(PyUnicodeWriter_WriteUTF8(writer, "invalid\xFF", -1) < 0);
|
||||||
|
PyErr_Clear();
|
||||||
|
|
||||||
|
// retry write with a valid string
|
||||||
|
assert(PyUnicodeWriter_WriteUTF8(writer, "valid", -1) == 0);
|
||||||
|
|
||||||
|
PyObject *result = PyUnicodeWriter_Finish(writer);
|
||||||
|
if (result == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
assert(PyUnicode_EqualToUTF8(result, "value=valid"));
|
||||||
|
Py_DECREF(result);
|
||||||
|
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
test_unicodewriter_format(PyObject *self, PyObject *Py_UNUSED(args))
|
||||||
|
{
|
||||||
|
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
||||||
|
if (writer == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// test PyUnicodeWriter_Format()
|
||||||
|
if (PyUnicodeWriter_Format(writer, "%s %i", "Hello", 123) < 0) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// test PyUnicodeWriter_WriteChar()
|
||||||
|
if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *result = PyUnicodeWriter_Finish(writer);
|
||||||
|
if (result == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
assert(PyUnicode_EqualToUTF8(result, "Hello 123."));
|
||||||
|
Py_DECREF(result);
|
||||||
|
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
|
||||||
|
error:
|
||||||
|
PyUnicodeWriter_Discard(writer);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
test_unicodewriter_format_recover_error(PyObject *self, PyObject *Py_UNUSED(args))
|
||||||
|
{
|
||||||
|
// test recovering from PyUnicodeWriter_Format() error
|
||||||
|
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
|
||||||
|
if (writer == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(PyUnicodeWriter_Format(writer, "%s ", "Hello") == 0);
|
||||||
|
|
||||||
|
// PyUnicodeWriter_Format() fails with an invalid format string
|
||||||
|
assert(PyUnicodeWriter_Format(writer, "%s\xff", "World") < 0);
|
||||||
|
PyErr_Clear();
|
||||||
|
|
||||||
|
// Retry PyUnicodeWriter_Format() with a valid format string
|
||||||
|
assert(PyUnicodeWriter_Format(writer, "%s.", "World") == 0);
|
||||||
|
|
||||||
|
PyObject *result = PyUnicodeWriter_Finish(writer);
|
||||||
|
if (result == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
assert(PyUnicode_EqualToUTF8(result, "Hello World."));
|
||||||
|
Py_DECREF(result);
|
||||||
|
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static PyMethodDef TestMethods[] = {
|
static PyMethodDef TestMethods[] = {
|
||||||
{"unicode_new", unicode_new, METH_VARARGS},
|
{"unicode_new", unicode_new, METH_VARARGS},
|
||||||
{"unicode_fill", unicode_fill, METH_VARARGS},
|
{"unicode_fill", unicode_fill, METH_VARARGS},
|
||||||
|
@ -229,6 +444,12 @@ static PyMethodDef TestMethods[] = {
|
||||||
{"unicode_asucs4copy", unicode_asucs4copy, METH_VARARGS},
|
{"unicode_asucs4copy", unicode_asucs4copy, METH_VARARGS},
|
||||||
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
|
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
|
||||||
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
|
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
|
||||||
|
{"test_unicodewriter", test_unicodewriter, METH_NOARGS},
|
||||||
|
{"test_unicodewriter_utf8", test_unicodewriter_utf8, METH_NOARGS},
|
||||||
|
{"test_unicodewriter_invalid_utf8", test_unicodewriter_invalid_utf8, METH_NOARGS},
|
||||||
|
{"test_unicodewriter_recover_error", test_unicodewriter_recover_error, METH_NOARGS},
|
||||||
|
{"test_unicodewriter_format", test_unicodewriter_format, METH_NOARGS},
|
||||||
|
{"test_unicodewriter_format_recover_error", test_unicodewriter_format_recover_error, METH_NOARGS},
|
||||||
{NULL},
|
{NULL},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -2872,23 +2872,21 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
|
||||||
return f;
|
return f;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *
|
static int
|
||||||
PyUnicode_FromFormatV(const char *format, va_list vargs)
|
unicode_from_format(_PyUnicodeWriter *writer, const char *format, va_list vargs)
|
||||||
{
|
{
|
||||||
|
writer->min_length += strlen(format) + 100;
|
||||||
|
writer->overallocate = 1;
|
||||||
|
|
||||||
va_list vargs2;
|
va_list vargs2;
|
||||||
const char *f;
|
const char *f;
|
||||||
_PyUnicodeWriter writer;
|
|
||||||
|
|
||||||
_PyUnicodeWriter_Init(&writer);
|
|
||||||
writer.min_length = strlen(format) + 100;
|
|
||||||
writer.overallocate = 1;
|
|
||||||
|
|
||||||
// Copy varags to be able to pass a reference to a subfunction.
|
// Copy varags to be able to pass a reference to a subfunction.
|
||||||
va_copy(vargs2, vargs);
|
va_copy(vargs2, vargs);
|
||||||
|
|
||||||
for (f = format; *f; ) {
|
for (f = format; *f; ) {
|
||||||
if (*f == '%') {
|
if (*f == '%') {
|
||||||
f = unicode_fromformat_arg(&writer, f, &vargs2);
|
f = unicode_fromformat_arg(writer, f, &vargs2);
|
||||||
if (f == NULL)
|
if (f == NULL)
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
@ -2912,21 +2910,33 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
len = p - f;
|
len = p - f;
|
||||||
|
|
||||||
if (*p == '\0')
|
if (*p == '\0')
|
||||||
writer.overallocate = 0;
|
writer->overallocate = 0;
|
||||||
|
|
||||||
if (_PyUnicodeWriter_WriteASCIIString(&writer, f, len) < 0)
|
if (_PyUnicodeWriter_WriteASCIIString(writer, f, len) < 0)
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
f = p;
|
f = p;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
va_end(vargs2);
|
va_end(vargs2);
|
||||||
return _PyUnicodeWriter_Finish(&writer);
|
return 0;
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
va_end(vargs2);
|
va_end(vargs2);
|
||||||
_PyUnicodeWriter_Dealloc(&writer);
|
return -1;
|
||||||
return NULL;
|
}
|
||||||
|
|
||||||
|
PyObject *
|
||||||
|
PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
|
{
|
||||||
|
_PyUnicodeWriter writer;
|
||||||
|
_PyUnicodeWriter_Init(&writer);
|
||||||
|
|
||||||
|
if (unicode_from_format(&writer, format, vargs) < 0) {
|
||||||
|
_PyUnicodeWriter_Dealloc(&writer);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return _PyUnicodeWriter_Finish(&writer);
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
|
@ -2941,6 +2951,23 @@ PyUnicode_FromFormat(const char *format, ...)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
PyUnicodeWriter_Format(PyUnicodeWriter *writer, const char *format, ...)
|
||||||
|
{
|
||||||
|
_PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer;
|
||||||
|
Py_ssize_t old_pos = _writer->pos;
|
||||||
|
|
||||||
|
va_list vargs;
|
||||||
|
va_start(vargs, format);
|
||||||
|
int res = unicode_from_format(_writer, format, vargs);
|
||||||
|
va_end(vargs);
|
||||||
|
|
||||||
|
if (res < 0) {
|
||||||
|
_writer->pos = old_pos;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
static Py_ssize_t
|
static Py_ssize_t
|
||||||
unicode_get_widechar_size(PyObject *unicode)
|
unicode_get_widechar_size(PyObject *unicode)
|
||||||
{
|
{
|
||||||
|
@ -4927,6 +4954,7 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Used by PyUnicodeWriter_WriteUTF8() implementation
|
||||||
static int
|
static int
|
||||||
unicode_decode_utf8_writer(_PyUnicodeWriter *writer,
|
unicode_decode_utf8_writer(_PyUnicodeWriter *writer,
|
||||||
const char *s, Py_ssize_t size,
|
const char *s, Py_ssize_t size,
|
||||||
|
@ -13080,6 +13108,7 @@ unicode_endswith_impl(PyObject *self, PyObject *subobj, Py_ssize_t start,
|
||||||
return PyBool_FromLong(result);
|
return PyBool_FromLong(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
_PyUnicodeWriter_Update(_PyUnicodeWriter *writer)
|
_PyUnicodeWriter_Update(_PyUnicodeWriter *writer)
|
||||||
{
|
{
|
||||||
|
@ -13103,6 +13132,7 @@ _PyUnicodeWriter_Update(_PyUnicodeWriter *writer)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
|
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
|
||||||
{
|
{
|
||||||
|
@ -13111,12 +13141,41 @@ _PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
|
||||||
/* ASCII is the bare minimum */
|
/* ASCII is the bare minimum */
|
||||||
writer->min_char = 127;
|
writer->min_char = 127;
|
||||||
|
|
||||||
/* use a value smaller than PyUnicode_1BYTE_KIND() so
|
/* use a kind value smaller than PyUnicode_1BYTE_KIND so
|
||||||
_PyUnicodeWriter_PrepareKind() will copy the buffer. */
|
_PyUnicodeWriter_PrepareKind() will copy the buffer. */
|
||||||
writer->kind = 0;
|
assert(writer->kind == 0);
|
||||||
assert(writer->kind <= PyUnicode_1BYTE_KIND);
|
assert(writer->kind < PyUnicode_1BYTE_KIND);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PyUnicodeWriter*
|
||||||
|
PyUnicodeWriter_Create(Py_ssize_t length)
|
||||||
|
{
|
||||||
|
const size_t size = sizeof(_PyUnicodeWriter);
|
||||||
|
PyUnicodeWriter *pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size);
|
||||||
|
if (pub_writer == NULL) {
|
||||||
|
return (PyUnicodeWriter *)PyErr_NoMemory();
|
||||||
|
}
|
||||||
|
_PyUnicodeWriter *writer = (_PyUnicodeWriter *)pub_writer;
|
||||||
|
|
||||||
|
_PyUnicodeWriter_Init(writer);
|
||||||
|
if (_PyUnicodeWriter_Prepare(writer, length, 127) < 0) {
|
||||||
|
PyUnicodeWriter_Discard(pub_writer);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
writer->overallocate = 1;
|
||||||
|
|
||||||
|
return pub_writer;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void PyUnicodeWriter_Discard(PyUnicodeWriter *writer)
|
||||||
|
{
|
||||||
|
_PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer);
|
||||||
|
PyMem_Free(writer);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Initialize _PyUnicodeWriter with initial buffer
|
// Initialize _PyUnicodeWriter with initial buffer
|
||||||
static inline void
|
static inline void
|
||||||
_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer)
|
_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer)
|
||||||
|
@ -13127,6 +13186,7 @@ _PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer)
|
||||||
writer->min_length = writer->size;
|
writer->min_length = writer->size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
|
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
|
||||||
Py_ssize_t length, Py_UCS4 maxchar)
|
Py_ssize_t length, Py_UCS4 maxchar)
|
||||||
|
@ -13242,9 +13302,17 @@ _PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch)
|
||||||
return _PyUnicodeWriter_WriteCharInline(writer, ch);
|
return _PyUnicodeWriter_WriteCharInline(writer, ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch)
|
||||||
|
{
|
||||||
|
return _PyUnicodeWriter_WriteChar((_PyUnicodeWriter*)writer, ch);
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
|
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
|
||||||
{
|
{
|
||||||
|
assert(PyUnicode_Check(str));
|
||||||
|
|
||||||
Py_UCS4 maxchar;
|
Py_UCS4 maxchar;
|
||||||
Py_ssize_t len;
|
Py_ssize_t len;
|
||||||
|
|
||||||
|
@ -13270,6 +13338,34 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
|
||||||
|
{
|
||||||
|
PyObject *str = PyObject_Str(obj);
|
||||||
|
if (str == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
|
||||||
|
Py_DECREF(str);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj)
|
||||||
|
{
|
||||||
|
PyObject *repr = PyObject_Repr(obj);
|
||||||
|
if (repr == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, repr);
|
||||||
|
Py_DECREF(repr);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
|
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
|
||||||
Py_ssize_t start, Py_ssize_t end)
|
Py_ssize_t start, Py_ssize_t end)
|
||||||
|
@ -13302,6 +13398,29 @@ _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str,
|
||||||
|
Py_ssize_t start, Py_ssize_t end)
|
||||||
|
{
|
||||||
|
if (!PyUnicode_Check(str)) {
|
||||||
|
PyErr_Format(PyExc_TypeError, "expect str, not %T", str);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (start < 0 || start > end) {
|
||||||
|
PyErr_Format(PyExc_ValueError, "invalid start argument");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (end > PyUnicode_GET_LENGTH(str)) {
|
||||||
|
PyErr_Format(PyExc_ValueError, "invalid end argument");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter*)writer, str,
|
||||||
|
start, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
|
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
|
||||||
const char *ascii, Py_ssize_t len)
|
const char *ascii, Py_ssize_t len)
|
||||||
|
@ -13362,6 +13481,25 @@ _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
|
||||||
|
const char *str,
|
||||||
|
Py_ssize_t size)
|
||||||
|
{
|
||||||
|
if (size < 0) {
|
||||||
|
size = strlen(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
_PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer;
|
||||||
|
Py_ssize_t old_pos = _writer->pos;
|
||||||
|
int res = unicode_decode_utf8_writer(_writer, str, size,
|
||||||
|
_Py_ERROR_STRICT, NULL, NULL);
|
||||||
|
if (res < 0) {
|
||||||
|
_writer->pos = old_pos;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
|
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
|
||||||
const char *str, Py_ssize_t len)
|
const char *str, Py_ssize_t len)
|
||||||
|
@ -13408,6 +13546,17 @@ _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
|
||||||
return unicode_result(str);
|
return unicode_result(str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PyObject*
|
||||||
|
PyUnicodeWriter_Finish(PyUnicodeWriter *writer)
|
||||||
|
{
|
||||||
|
PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer);
|
||||||
|
assert(((_PyUnicodeWriter*)writer)->buffer == NULL);
|
||||||
|
PyMem_Free(writer);
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer)
|
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue