From ca9381ea01211e79f5bc6078b95e177f1c04f52b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 22 Sep 2015 00:58:32 +0200 Subject: [PATCH] Issue #24870: Add _PyUnicodeWriter_PrepareKind() macro Add a macro which ensures that the writer has at least the requested kind. --- Include/unicodeobject.h | 17 +++++++++++++++++ Objects/unicodeobject.c | 38 +++++++++++++++++++++++++++++--------- 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 33e8f19af08..d0e01426146 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -942,6 +942,23 @@ PyAPI_FUNC(int) _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, Py_ssize_t length, Py_UCS4 maxchar); +/* Prepare the buffer to have at least the kind KIND. + For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will + support characters in range U+000-U+FFFF. + + Return 0 on success, raise an exception and return -1 on error. */ +#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \ + (assert((KIND) != PyUnicode_WCHAR_KIND), \ + (KIND) <= (WRITER)->kind \ + ? 0 \ + : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND))) + +/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind() + macro instead. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer, + enum PyUnicode_Kind kind); + /* Append a Unicode character. Return 0 on success, raise an exception and return -1 on error. */ PyAPI_FUNC(int) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f5f2d48e504..7c079e0799e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6722,14 +6722,11 @@ PyUnicode_DecodeASCII(const char *s, case _Py_ERROR_REPLACE: case _Py_ERROR_SURROGATEESCAPE: /* Fast-path: the error handler only writes one character, - but we must switch to UCS2 at the first write */ - if (kind < PyUnicode_2BYTE_KIND) { - if (_PyUnicodeWriter_Prepare(&writer, size - writer.pos, - 0xffff) < 0) - return NULL; - kind = writer.kind; - data = writer.data; - } + but we may switch to UCS2 at the first write */ + if (_PyUnicodeWriter_PrepareKind(&writer, PyUnicode_2BYTE_KIND) < 0) + goto onError; + kind = writer.kind; + data = writer.data; if (error_handler == _Py_ERROR_REPLACE) PyUnicode_WRITE(kind, data, writer.pos, 0xfffd); @@ -13309,7 +13306,8 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, Py_ssize_t newlen; PyObject *newbuffer; - assert(length > 0); + /* ensure that the _PyUnicodeWriter_Prepare macro was used */ + assert(maxchar > writer->maxchar || length > 0); if (length > PY_SSIZE_T_MAX - writer->pos) { PyErr_NoMemory(); @@ -13375,6 +13373,28 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, #undef OVERALLOCATE_FACTOR } +int +_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer, + enum PyUnicode_Kind kind) +{ + Py_UCS4 maxchar; + + /* ensure that the _PyUnicodeWriter_PrepareKind macro was used */ + assert(writer->kind < kind); + + switch (kind) + { + case PyUnicode_1BYTE_KIND: maxchar = 0xff; break; + case PyUnicode_2BYTE_KIND: maxchar = 0xffff; break; + case PyUnicode_4BYTE_KIND: maxchar = 0x10ffff; break; + default: + assert(0 && "invalid kind"); + return -1; + } + + return _PyUnicodeWriter_PrepareInternal(writer, 0, maxchar); +} + Py_LOCAL_INLINE(int) _PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch) {