From 4a58707a340cacea6f8e6a82adfcc10a230e1185 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 19 Nov 2013 12:54:53 +0100 Subject: [PATCH] Add _PyUnicodeWriter_WriteASCIIString() function --- Include/unicodeobject.h | 17 +++++-- Objects/listobject.c | 9 +--- Objects/unicodeobject.c | 90 ++++++++++++++++++++++++++++++-------- Python/formatter_unicode.c | 18 ++++---- 4 files changed, 96 insertions(+), 38 deletions(-) diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 20ce41d8310..b4891e4690f 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -962,12 +962,20 @@ _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, Py_ssize_t end ); +/* Append a ASCII-encoded byte string. + Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, + const char *str, /* ASCII-encoded byte string */ + Py_ssize_t len /* number of bytes, or -1 if unknown */ + ); + /* Append a latin1-encoded byte string. Return 0 on success, raise an exception and return -1 on error. */ PyAPI_FUNC(int) -_PyUnicodeWriter_WriteCstr(_PyUnicodeWriter *writer, - const char *str, /* latin1-encoded byte string */ - Py_ssize_t len /* length in bytes */ +_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer, + const char *str, /* latin1-encoded byte string */ + Py_ssize_t len /* length in bytes */ ); /* Get the value of the writer as an Unicode string. Clear the @@ -979,6 +987,9 @@ _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer); /* Deallocate memory of a writer (clear its internal buffer). */ PyAPI_FUNC(void) _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer); + +PyAPI_FUNC(int) _PyObject_ReprWriter(_PyUnicodeWriter *writer, + PyObject *v); #endif #ifndef Py_LIMITED_API diff --git a/Objects/listobject.c b/Objects/listobject.c index 45666fddfa9..7d5674c9f48 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -339,19 +339,12 @@ list_repr(PyListObject *v) { Py_ssize_t i; PyObject *s; - static PyObject *sep = NULL; _PyUnicodeWriter writer; if (Py_SIZE(v) == 0) { return PyUnicode_FromString("[]"); } - if (sep == NULL) { - sep = PyUnicode_FromString(", "); - if (sep == NULL) - return NULL; - } - i = Py_ReprEnter((PyObject*)v); if (i != 0) { return i > 0 ? PyUnicode_FromString("[...]") : NULL; @@ -369,7 +362,7 @@ list_repr(PyListObject *v) so must refetch the list size on each iteration. */ for (i = 0; i < Py_SIZE(v); ++i) { if (i > 0) { - if (_PyUnicodeWriter_WriteStr(&writer, sep) < 0) + if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) goto error; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7114006d35b..880889e8a29 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -140,9 +140,9 @@ extern "C" { buffer where the result characters are written to. */ #define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \ do { \ - to_type *_to = (to_type *) to; \ - const from_type *_iter = (begin); \ - const from_type *_end = (end); \ + to_type *_to = (to_type *)(to); \ + const from_type *_iter = (from_type *)(begin); \ + const from_type *_end = (from_type *)(end); \ Py_ssize_t n = (_end) - (_iter); \ const from_type *_unrolled_end = \ _iter + _Py_SIZE_ROUND_DOWN(n, 4); \ @@ -2562,7 +2562,6 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, precision = len; arglen = Py_MAX(precision, width); - assert(ucs1lib_find_max_char((Py_UCS1*)buffer, (Py_UCS1*)buffer + len) <= 127); if (_PyUnicodeWriter_Prepare(writer, arglen, 127) == -1) return NULL; @@ -2581,8 +2580,8 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, writer->pos += fill; } - unicode_write_cstr(writer->buffer, writer->pos, buffer, len); - writer->pos += len; + if (_PyUnicodeWriter_WriteASCIIString(writer, buffer, len) < 0) + return NULL; break; } @@ -2604,11 +2603,8 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, len += 2; } - assert(ucs1lib_find_max_char((Py_UCS1*)number, (Py_UCS1*)number + len) <= 127); - if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1) + if (_PyUnicodeWriter_WriteASCIIString(writer, number, len) < 0) return NULL; - unicode_write_cstr(writer->buffer, writer->pos, number, len); - writer->pos += len; break; } @@ -2707,7 +2703,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, skip the code, since there's no way to know what's in the argument list) */ len = strlen(p); - if (_PyUnicodeWriter_WriteCstr(writer, p, len) == -1) + if (_PyUnicodeWriter_WriteLatin1String(writer, p, len) == -1) return NULL; f = p+len; return f; @@ -2759,10 +2755,9 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) if (*p == '\0') writer.overallocate = 0; - if (_PyUnicodeWriter_Prepare(&writer, len, 127) == -1) + + if (_PyUnicodeWriter_WriteASCIIString(&writer, f, len) < 0) goto fail; - unicode_write_cstr(writer.buffer, writer.pos, f, len); - writer.pos += len; f = p; } @@ -13461,7 +13456,68 @@ _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str, } int -_PyUnicodeWriter_WriteCstr(_PyUnicodeWriter *writer, const char *str, Py_ssize_t len) +_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, + const char *ascii, Py_ssize_t len) +{ + if (len == -1) + len = strlen(ascii); + + assert(ucs1lib_find_max_char((Py_UCS1*)ascii, (Py_UCS1*)ascii + len) < 128); + + if (writer->buffer == NULL && !writer->overallocate) { + PyObject *str; + + str = _PyUnicode_FromASCII(ascii, len); + if (str == NULL) + return -1; + + writer->readonly = 1; + writer->buffer = str; + _PyUnicodeWriter_Update(writer); + writer->pos += len; + return 0; + } + + if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1) + return -1; + + switch (writer->kind) + { + case PyUnicode_1BYTE_KIND: + { + const Py_UCS1 *str = (const Py_UCS1 *)ascii; + Py_UCS1 *data = writer->data; + + Py_MEMCPY(data + writer->pos, str, len); + break; + } + case PyUnicode_2BYTE_KIND: + { + _PyUnicode_CONVERT_BYTES( + Py_UCS1, Py_UCS2, + ascii, ascii + len, + (Py_UCS2 *)writer->data + writer->pos); + break; + } + case PyUnicode_4BYTE_KIND: + { + _PyUnicode_CONVERT_BYTES( + Py_UCS1, Py_UCS4, + ascii, ascii + len, + (Py_UCS4 *)writer->data + writer->pos); + break; + } + default: + assert(0); + } + + writer->pos += len; + return 0; +} + +int +_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer, + const char *str, Py_ssize_t len) { Py_UCS4 maxchar; @@ -13828,12 +13884,10 @@ formatfloat(PyObject *v, struct unicode_format_arg_t *arg, return -1; len = strlen(p); if (writer) { - if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1) { + if (_PyUnicodeWriter_WriteASCIIString(writer, p, len) < 0) { PyMem_Free(p); return -1; } - unicode_write_cstr(writer->buffer, writer->pos, p, len); - writer->pos += len; } else *p_output = _PyUnicode_FromASCII(p, len); diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index e68087fd5a4..0a3cc593d64 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -1053,24 +1053,24 @@ format_float_internal(PyObject *value, n_digits += 1; } - /* Since there is no unicode version of PyOS_double_to_string, - just use the 8 bit version and then convert to unicode. */ - unicode_tmp = _PyUnicode_FromASCII(buf, n_digits); - PyMem_Free(buf); - if (unicode_tmp == NULL) - goto done; - if (format->sign != '+' && format->sign != ' ' && format->width == -1 && format->type != 'n' && !format->thousands_separators) { /* Fast path */ - result = _PyUnicodeWriter_WriteStr(writer, unicode_tmp); - Py_DECREF(unicode_tmp); + result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits); + PyMem_Free(buf); return result; } + /* Since there is no unicode version of PyOS_double_to_string, + just use the 8 bit version and then convert to unicode. */ + unicode_tmp = _PyUnicode_FromASCII(buf, n_digits); + PyMem_Free(buf); + if (unicode_tmp == NULL) + goto done; + /* Is a sign character present in the output? If so, remember it and skip it */ index = 0;