From d3f0882dfb3a15d604de1b1620b2bf8de9d643bb Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 29 May 2012 12:57:52 +0200 Subject: [PATCH] Issue #14744: Use the new _PyUnicodeWriter internal API to speed up str%args and str.format(args) * Formatting string, int, float and complex use the _PyUnicodeWriter API. It avoids a temporary buffer in most cases. * Add _PyUnicodeWriter_WriteStr() to restore the PyAccu optimization: just keep a reference to the string if the output is only composed of one string * Disable overallocation when formatting the last argument of str%args and str.format(args) * Overallocation allocates at least 100 characters: add min_length attribute to the _PyUnicodeWriter structure * Add new private functions: _PyUnicode_FastCopyCharacters(), _PyUnicode_FastFill() and _PyUnicode_FromASCII() The speed up is around 20% in average. --- Include/complexobject.h | 10 +- Include/floatobject.h | 10 +- Include/longobject.h | 18 +- Include/unicodeobject.h | 95 ++++++- Misc/NEWS | 3 + Objects/complexobject.c | 17 +- Objects/floatobject.c | 27 +- Objects/longobject.c | 311 +++++++++++++++------- Objects/stringlib/asciilib.h | 2 +- Objects/stringlib/unicode_format.h | 46 ++-- Objects/unicodeobject.c | 362 ++++++++++++++++++------- Python/formatter_unicode.c | 414 +++++++++++++++-------------- 12 files changed, 878 insertions(+), 437 deletions(-) diff --git a/Include/complexobject.h b/Include/complexobject.h index 3e4ecff248d..1934f3b3807 100644 --- a/Include/complexobject.h +++ b/Include/complexobject.h @@ -63,10 +63,12 @@ PyAPI_FUNC(Py_complex) PyComplex_AsCComplex(PyObject *op); /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ #ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject *) _PyComplex_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); +PyAPI_FUNC(int) _PyComplex_FormatAdvancedWriter( + _PyUnicodeWriter *writer, + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end); #endif #ifdef __cplusplus diff --git a/Include/floatobject.h b/Include/floatobject.h index 0ca4881a327..46ef6e60620 100644 --- a/Include/floatobject.h +++ b/Include/floatobject.h @@ -112,10 +112,12 @@ PyAPI_FUNC(int) PyFloat_ClearFreeList(void); /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyFloat_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); +PyAPI_FUNC(int) _PyFloat_FormatAdvancedWriter( + _PyUnicodeWriter *writer, + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end); #endif /* Py_LIMITED_API */ #ifdef __cplusplus diff --git a/Include/longobject.h b/Include/longobject.h index c58ddf44009..d741f1b4c01 100644 --- a/Include/longobject.h +++ b/Include/longobject.h @@ -151,14 +151,22 @@ PyAPI_FUNC(int) _PyLong_AsByteArray(PyLongObject* v, /* _PyLong_Format: Convert the long to a string object with given base, appending a base prefix of 0[box] if base is 2, 8 or 16. */ -PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *aa, int base); +PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *obj, int base); + +PyAPI_FUNC(int) _PyLong_FormatWriter( + _PyUnicodeWriter *writer, + PyObject *obj, + int base, + int alternate); /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyLong_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); +PyAPI_FUNC(int) _PyLong_FormatAdvancedWriter( + _PyUnicodeWriter *writer, + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end); #endif /* Py_LIMITED_API */ /* These aren't really part of the long object, but they're handy. The diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 486d4fae6df..99ea48ba069 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -648,8 +648,20 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters( Py_ssize_t from_start, Py_ssize_t how_many ); + +/* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so + may crash if parameters are invalid (e.g. if the output string + is too short). */ +PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters( + PyObject *to, + Py_ssize_t to_start, + PyObject *from, + Py_ssize_t from_start, + Py_ssize_t how_many + ); #endif +#ifndef Py_LIMITED_API /* Fill a string with a character: write fill_char into unicode[start:start+length]. @@ -658,13 +670,21 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters( Return the number of written character, or return -1 and raise an exception on error. */ -#ifndef Py_LIMITED_API PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill( PyObject *unicode, Py_ssize_t start, Py_ssize_t length, Py_UCS4 fill_char ); + +/* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash + if parameters are invalid (e.g. if length is longer than the string). */ +PyAPI_FUNC(void) _PyUnicode_FastFill( + PyObject *unicode, + Py_ssize_t start, + Py_ssize_t length, + Py_UCS4 fill_char + ); #endif /* Create a Unicode Object from the Py_UNICODE buffer u of the given @@ -696,13 +716,19 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromString( const char *u /* UTF-8 encoded string */ ); +#ifndef Py_LIMITED_API /* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters. Scan the string to find the maximum character. */ -#ifndef Py_LIMITED_API PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData( int kind, const void *buffer, Py_ssize_t size); + +/* Create a new string from a buffer of ASCII characters. + WARNING: Don't check if the string contains any non-ASCII character. */ +PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII( + const char *buffer, + Py_ssize_t size); #endif PyAPI_FUNC(PyObject*) PyUnicode_Substring( @@ -864,13 +890,70 @@ PyAPI_FUNC(PyObject *) PyUnicode_FromFormat( ... ); +#ifndef Py_LIMITED_API +typedef struct { + PyObject *buffer; + void *data; + enum PyUnicode_Kind kind; + Py_UCS4 maxchar; + Py_ssize_t size; + Py_ssize_t pos; + /* minimum length of the buffer when overallocation is enabled, + see _PyUnicodeWriter_Init() */ + Py_ssize_t min_length; + struct { + unsigned char overallocate:1; + /* If readonly is 1, buffer is a shared string (cannot be modified) + and size is set to 0. */ + unsigned char readonly:1; + } flags; +} _PyUnicodeWriter ; + +/* Initialize a Unicode writer. + + If min_length is greater than zero, _PyUnicodeWriter_Prepare() + overallocates the buffer and min_length is the minimum length in characters + of the buffer. */ +PyAPI_FUNC(void) +_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length); + +/* Prepare the buffer to write 'length' characters + with the specified maximum character. + + Return 0 on success, raise an exception and return -1 on error. */ +#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \ + (((MAXCHAR) <= (WRITER)->maxchar \ + && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \ + ? 0 \ + : (((LENGTH) == 0) \ + ? 0 \ + : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR)))) + +/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro + instead. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar); + +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str); + +PyAPI_FUNC(PyObject *) +_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer); + +PyAPI_FUNC(void) +_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer); +#endif + #ifndef Py_LIMITED_API /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); +PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter( + _PyUnicodeWriter *writer, + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end); #endif PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **); diff --git a/Misc/NEWS b/Misc/NEWS index 0d36966e02f..6a01d3eccd2 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -13,6 +13,9 @@ Core and Builtins - Issue #14835: Make plistlib output empty arrays & dicts like OS X. Patch by Sidney San Martín. +- Issue #14744: Use the new _PyUnicodeWriter internal API to speed up + str%args and str.format(args). + - Issue #14930: Make memoryview objects weakrefable. - Issue #14775: Fix a potential quadratic dict build-up due to the garbage diff --git a/Objects/complexobject.c b/Objects/complexobject.c index b73dc4b9112..403c60c917d 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -699,11 +699,22 @@ static PyObject * complex__format__(PyObject* self, PyObject* args) { PyObject *format_spec; + _PyUnicodeWriter writer; + int ret; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) - return NULL; - return _PyComplex_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); + return NULL; + + _PyUnicodeWriter_Init(&writer, 0); + ret = _PyComplex_FormatAdvancedWriter( + &writer, + self, + format_spec, 0, PyUnicode_GET_LENGTH(format_spec)); + if (ret == -1) { + _PyUnicodeWriter_Dealloc(&writer); + return NULL; + } + return _PyUnicodeWriter_Finish(&writer); } #if 0 diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 07d31b296ac..3c742c3a038 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -267,13 +267,15 @@ static PyObject * float_repr(PyFloatObject *v) { PyObject *result; - char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v), - 'r', 0, - Py_DTSF_ADD_DOT_0, - NULL); + char *buf; + + buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v), + 'r', 0, + Py_DTSF_ADD_DOT_0, + NULL); if (!buf) return PyErr_NoMemory(); - result = PyUnicode_FromString(buf); + result = _PyUnicode_FromASCII(buf, strlen(buf)); PyMem_Free(buf); return result; } @@ -1703,11 +1705,22 @@ static PyObject * float__format__(PyObject *self, PyObject *args) { PyObject *format_spec; + _PyUnicodeWriter writer; + int ret; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) return NULL; - return _PyFloat_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); + + _PyUnicodeWriter_Init(&writer, 0); + ret = _PyFloat_FormatAdvancedWriter( + &writer, + self, + format_spec, 0, PyUnicode_GET_LENGTH(format_spec)); + if (ret == -1) { + _PyUnicodeWriter_Dealloc(&writer); + return NULL; + } + return _PyUnicodeWriter_Finish(&writer); } PyDoc_STRVAR(float__format__doc, diff --git a/Objects/longobject.c b/Objects/longobject.c index 74c59c79743..1369dacca71 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1550,20 +1550,22 @@ divrem1(PyLongObject *a, digit n, digit *prem) string. (Return value is non-shared so that callers can modify the returned value if necessary.) */ -static PyObject * -long_to_decimal_string(PyObject *aa) +static int +long_to_decimal_string_internal(PyObject *aa, + PyObject **p_output, + _PyUnicodeWriter *writer) { PyLongObject *scratch, *a; PyObject *str; Py_ssize_t size, strlen, size_a, i, j; digit *pout, *pin, rem, tenpow; - unsigned char *p; int negative; + enum PyUnicode_Kind kind; a = (PyLongObject *)aa; if (a == NULL || !PyLong_Check(a)) { PyErr_BadInternalCall(); - return NULL; + return -1; } size_a = ABS(Py_SIZE(a)); negative = Py_SIZE(a) < 0; @@ -1580,13 +1582,13 @@ long_to_decimal_string(PyObject *aa) if (size_a > PY_SSIZE_T_MAX / PyLong_SHIFT) { PyErr_SetString(PyExc_OverflowError, "long is too large to format"); - return NULL; + return -1; } /* the expression size_a * PyLong_SHIFT is now safe from overflow */ size = 1 + size_a * PyLong_SHIFT / (3 * _PyLong_DECIMAL_SHIFT); scratch = _PyLong_New(size); if (scratch == NULL) - return NULL; + return -1; /* convert array of base _PyLong_BASE digits in pin to an array of base _PyLong_DECIMAL_BASE digits in pout, following Knuth (TAOCP, @@ -1609,7 +1611,7 @@ long_to_decimal_string(PyObject *aa) /* check for keyboard interrupt */ SIGCHECK({ Py_DECREF(scratch); - return NULL; + return -1; }); } /* pout should have at least one digit, so that the case when a = 0 @@ -1625,65 +1627,113 @@ long_to_decimal_string(PyObject *aa) tenpow *= 10; strlen++; } - str = PyUnicode_New(strlen, '9'); - if (str == NULL) { - Py_DECREF(scratch); - return NULL; + if (writer) { + if (_PyUnicodeWriter_Prepare(writer, strlen, '9') == -1) + return -1; + kind = writer->kind; + str = NULL; } + else { + str = PyUnicode_New(strlen, '9'); + if (str == NULL) { + Py_DECREF(scratch); + return -1; + } + kind = PyUnicode_KIND(str); + } + +#define WRITE_DIGITS(TYPE) \ + do { \ + if (writer) \ + p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + strlen; \ + else \ + p = (TYPE*)PyUnicode_DATA(str) + strlen; \ + \ + *p = '\0'; \ + /* pout[0] through pout[size-2] contribute exactly \ + _PyLong_DECIMAL_SHIFT digits each */ \ + for (i=0; i < size - 1; i++) { \ + rem = pout[i]; \ + for (j = 0; j < _PyLong_DECIMAL_SHIFT; j++) { \ + *--p = '0' + rem % 10; \ + rem /= 10; \ + } \ + } \ + /* pout[size-1]: always produce at least one decimal digit */ \ + rem = pout[i]; \ + do { \ + *--p = '0' + rem % 10; \ + rem /= 10; \ + } while (rem != 0); \ + \ + /* and sign */ \ + if (negative) \ + *--p = '-'; \ + \ + /* check we've counted correctly */ \ + if (writer) \ + assert(p == ((TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos)); \ + else \ + assert(p == (TYPE*)PyUnicode_DATA(str)); \ + } while (0) /* fill the string right-to-left */ - assert(PyUnicode_KIND(str) == PyUnicode_1BYTE_KIND); - p = PyUnicode_1BYTE_DATA(str) + strlen; - *p = '\0'; - /* pout[0] through pout[size-2] contribute exactly - _PyLong_DECIMAL_SHIFT digits each */ - for (i=0; i < size - 1; i++) { - rem = pout[i]; - for (j = 0; j < _PyLong_DECIMAL_SHIFT; j++) { - *--p = '0' + rem % 10; - rem /= 10; - } + if (kind == PyUnicode_1BYTE_KIND) { + Py_UCS1 *p; + WRITE_DIGITS(Py_UCS1); } - /* pout[size-1]: always produce at least one decimal digit */ - rem = pout[i]; - do { - *--p = '0' + rem % 10; - rem /= 10; - } while (rem != 0); + else if (kind == PyUnicode_2BYTE_KIND) { + Py_UCS2 *p; + WRITE_DIGITS(Py_UCS2); + } + else { + assert (kind == PyUnicode_4BYTE_KIND); + Py_UCS4 *p; + WRITE_DIGITS(Py_UCS4); + } +#undef WRITE_DIGITS - /* and sign */ - if (negative) - *--p = '-'; - - /* check we've counted correctly */ - assert(p == PyUnicode_1BYTE_DATA(str)); - assert(_PyUnicode_CheckConsistency(str, 1)); Py_DECREF(scratch); - return (PyObject *)str; + if (writer) { + writer->pos += strlen; + } + else { + assert(_PyUnicode_CheckConsistency(str, 1)); + *p_output = (PyObject *)str; + } + return 0; +} + +static PyObject * +long_to_decimal_string(PyObject *aa) +{ + PyObject *v; + if (long_to_decimal_string_internal(aa, &v, NULL) == -1) + return NULL; + return v; } /* Convert a long int object to a string, using a given conversion base, - which should be one of 2, 8, 10 or 16. Return a string object. - If base is 2, 8 or 16, add the proper prefix '0b', '0o' or '0x'. */ + which should be one of 2, 8 or 16. Return a string object. + If base is 2, 8 or 16, add the proper prefix '0b', '0o' or '0x' + if alternate is nonzero. */ -PyObject * -_PyLong_Format(PyObject *aa, int base) +static int +long_format_binary(PyObject *aa, int base, int alternate, + PyObject **p_output, _PyUnicodeWriter *writer) { register PyLongObject *a = (PyLongObject *)aa; PyObject *v; Py_ssize_t sz; Py_ssize_t size_a; - Py_UCS1 *p; + enum PyUnicode_Kind kind; int negative; int bits; - assert(base == 2 || base == 8 || base == 10 || base == 16); - if (base == 10) - return long_to_decimal_string((PyObject *)a); - + assert(base == 2 || base == 8 || base == 16); if (a == NULL || !PyLong_Check(a)) { PyErr_BadInternalCall(); - return NULL; + return -1; } size_a = ABS(Py_SIZE(a)); negative = Py_SIZE(a) < 0; @@ -1706,7 +1756,7 @@ _PyLong_Format(PyObject *aa, int base) /* Compute exact length 'sz' of output string. */ if (size_a == 0) { - sz = 3; + sz = 1; } else { Py_ssize_t size_a_in_bits; @@ -1714,56 +1764,126 @@ _PyLong_Format(PyObject *aa, int base) if (size_a > (PY_SSIZE_T_MAX - 3) / PyLong_SHIFT) { PyErr_SetString(PyExc_OverflowError, "int is too large to format"); - return NULL; + return -1; } size_a_in_bits = (size_a - 1) * PyLong_SHIFT + bits_in_digit(a->ob_digit[size_a - 1]); - /* Allow 2 characters for prefix and 1 for a '-' sign. */ - sz = 2 + negative + (size_a_in_bits + (bits - 1)) / bits; + /* Allow 1 character for a '-' sign. */ + sz = negative + (size_a_in_bits + (bits - 1)) / bits; + } + if (alternate) { + /* 2 characters for prefix */ + sz += 2; } - v = PyUnicode_New(sz, 'x'); - if (v == NULL) { - return NULL; - } - assert(PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); - - p = PyUnicode_1BYTE_DATA(v) + sz; - if (size_a == 0) { - *--p = '0'; + if (writer) { + if (_PyUnicodeWriter_Prepare(writer, sz, 'x') == -1) + return -1; + kind = writer->kind; + v = NULL; } else { - /* JRH: special case for power-of-2 bases */ - twodigits accum = 0; - int accumbits = 0; /* # of bits in accum */ - Py_ssize_t i; - for (i = 0; i < size_a; ++i) { - accum |= (twodigits)a->ob_digit[i] << accumbits; - accumbits += PyLong_SHIFT; - assert(accumbits >= bits); - do { - char cdigit; - cdigit = (char)(accum & (base - 1)); - cdigit += (cdigit < 10) ? '0' : 'a'-10; - *--p = cdigit; - accumbits -= bits; - accum >>= bits; - } while (i < size_a-1 ? accumbits >= bits : accum > 0); - } + v = PyUnicode_New(sz, 'x'); + if (v == NULL) + return -1; + kind = PyUnicode_KIND(v); } - if (base == 16) - *--p = 'x'; - else if (base == 8) - *--p = 'o'; - else /* (base == 2) */ - *--p = 'b'; - *--p = '0'; - if (negative) - *--p = '-'; - assert(p == PyUnicode_1BYTE_DATA(v)); - assert(_PyUnicode_CheckConsistency(v, 1)); - return v; +#define WRITE_DIGITS(TYPE) \ + do { \ + if (writer) \ + p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + sz; \ + else \ + p = (TYPE*)PyUnicode_DATA(v) + sz; \ + \ + if (size_a == 0) { \ + *--p = '0'; \ + } \ + else { \ + /* JRH: special case for power-of-2 bases */ \ + twodigits accum = 0; \ + int accumbits = 0; /* # of bits in accum */ \ + Py_ssize_t i; \ + for (i = 0; i < size_a; ++i) { \ + accum |= (twodigits)a->ob_digit[i] << accumbits; \ + accumbits += PyLong_SHIFT; \ + assert(accumbits >= bits); \ + do { \ + char cdigit; \ + cdigit = (char)(accum & (base - 1)); \ + cdigit += (cdigit < 10) ? '0' : 'a'-10; \ + *--p = cdigit; \ + accumbits -= bits; \ + accum >>= bits; \ + } while (i < size_a-1 ? accumbits >= bits : accum > 0); \ + } \ + } \ + \ + if (alternate) { \ + if (base == 16) \ + *--p = 'x'; \ + else if (base == 8) \ + *--p = 'o'; \ + else /* (base == 2) */ \ + *--p = 'b'; \ + *--p = '0'; \ + } \ + if (negative) \ + *--p = '-'; \ + if (writer) \ + assert(p == ((TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos)); \ + else \ + assert(p == (TYPE*)PyUnicode_DATA(v)); \ + } while (0) + + if (kind == PyUnicode_1BYTE_KIND) { + Py_UCS1 *p; + WRITE_DIGITS(Py_UCS1); + } + else if (kind == PyUnicode_2BYTE_KIND) { + Py_UCS2 *p; + WRITE_DIGITS(Py_UCS2); + } + else { + assert (kind == PyUnicode_4BYTE_KIND); + Py_UCS4 *p; + WRITE_DIGITS(Py_UCS4); + } +#undef WRITE_DIGITS + + if (writer) { + writer->pos += sz; + } + else { + assert(_PyUnicode_CheckConsistency(v, 1)); + *p_output = v; + } + return 0; +} + +PyObject * +_PyLong_Format(PyObject *obj, int base) +{ + PyObject *str; + int err; + if (base == 10) + err = long_to_decimal_string_internal(obj, &str, NULL); + else + err = long_format_binary(obj, base, 1, &str, NULL); + if (err == -1) + return NULL; + return str; +} + +int +_PyLong_FormatWriter(_PyUnicodeWriter *writer, + PyObject *obj, + int base, int alternate) +{ + if (base == 10) + return long_to_decimal_string_internal(obj, NULL, writer); + else + return long_format_binary(obj, base, alternate, NULL, writer); } /* Table of digit values for 8-bit string -> integer conversion. @@ -4232,11 +4352,22 @@ static PyObject * long__format__(PyObject *self, PyObject *args) { PyObject *format_spec; + _PyUnicodeWriter writer; + int ret; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) return NULL; - return _PyLong_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); + + _PyUnicodeWriter_Init(&writer, 0); + ret = _PyLong_FormatAdvancedWriter( + &writer, + self, + format_spec, 0, PyUnicode_GET_LENGTH(format_spec)); + if (ret == -1) { + _PyUnicodeWriter_Dealloc(&writer); + return NULL; + } + return _PyUnicodeWriter_Finish(&writer); } /* Return a pair (q, r) such that a = b * q + r, and diff --git a/Objects/stringlib/asciilib.h b/Objects/stringlib/asciilib.h index ab5bae75344..f62813d2fd1 100644 --- a/Objects/stringlib/asciilib.h +++ b/Objects/stringlib/asciilib.h @@ -18,7 +18,7 @@ #define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL #define STRINGLIB_STR PyUnicode_1BYTE_DATA #define STRINGLIB_LEN PyUnicode_GET_LENGTH -#define STRINGLIB_NEW unicode_fromascii +#define STRINGLIB_NEW(STR,LEN) _PyUnicode_FromASCII((char*)(STR),(LEN)) #define STRINGLIB_RESIZE not_supported #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h index 9c0b0cf04ee..d71cf44a34b 100644 --- a/Objects/stringlib/unicode_format.h +++ b/Objects/stringlib/unicode_format.h @@ -499,26 +499,26 @@ render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *write int ok = 0; PyObject *result = NULL; PyObject *format_spec_object = NULL; - PyObject *(*formatter)(PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL; - Py_ssize_t len; + int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL; + int err; /* If we know the type exactly, skip the lookup of __format__ and just call the formatter directly. */ if (PyUnicode_CheckExact(fieldobj)) - formatter = _PyUnicode_FormatAdvanced; + formatter = _PyUnicode_FormatAdvancedWriter; else if (PyLong_CheckExact(fieldobj)) - formatter =_PyLong_FormatAdvanced; + formatter = _PyLong_FormatAdvancedWriter; else if (PyFloat_CheckExact(fieldobj)) - formatter = _PyFloat_FormatAdvanced; - - /* XXX: for 2.6, convert format_spec to the appropriate type - (unicode, str) */ + formatter = _PyFloat_FormatAdvancedWriter; + else if (PyComplex_CheckExact(fieldobj)) + formatter = _PyComplex_FormatAdvancedWriter; if (formatter) { /* we know exactly which formatter will be called when __format__ is looked up, so call it directly, instead. */ - result = formatter(fieldobj, format_spec->str, - format_spec->start, format_spec->end); + err = formatter(writer, fieldobj, format_spec->str, + format_spec->start, format_spec->end); + return (err == 0); } else { /* We need to create an object out of the pointers we have, because @@ -536,17 +536,11 @@ render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *write } if (result == NULL) goto done; - if (PyUnicode_READY(result) == -1) - goto done; - len = PyUnicode_GET_LENGTH(result); - if (_PyUnicodeWriter_Prepare(writer, - len, PyUnicode_MAX_CHAR_VALUE(result)) == -1) + if (_PyUnicodeWriter_WriteStr(writer, result) == -1) goto done; - copy_characters(writer->buffer, writer->pos, - result, 0, len); - writer->pos += len; ok = 1; + done: Py_XDECREF(format_spec_object); Py_XDECREF(result); @@ -897,16 +891,19 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs, err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar); if (err == -1) return 0; - copy_characters(writer->buffer, writer->pos, - literal.str, literal.start, sublen); + _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, + literal.str, literal.start, sublen); writer->pos += sublen; } - if (field_present) + if (field_present) { + if (iter.str.start == iter.str.end) + writer->flags.overallocate = 0; if (!output_markup(&field_name, &format_spec, format_spec_needs_expanding, conversion, writer, args, kwargs, recursion_depth, auto_number)) return 0; + } } return result; } @@ -921,7 +918,7 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs, int recursion_depth, AutoNumber *auto_number) { _PyUnicodeWriter writer; - Py_ssize_t initlen; + Py_ssize_t minlen; /* check the recursion level */ if (recursion_depth <= 0) { @@ -930,9 +927,8 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs, return NULL; } - initlen = PyUnicode_GET_LENGTH(input->str) + 100; - if (_PyUnicodeWriter_Init(&writer, initlen, 127) == -1) - return NULL; + minlen = PyUnicode_GET_LENGTH(input->str) + 100; + _PyUnicodeWriter_Init(&writer, minlen); if (!do_markup(input, args, kwargs, &writer, recursion_depth, auto_number)) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 8fbc203c530..00bfff2c05b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -225,15 +225,9 @@ const unsigned char _Py_ascii_whitespace[] = { /* forward */ static PyUnicodeObject *_PyUnicode_New(Py_ssize_t length); static PyObject* get_latin1_char(unsigned char ch); -static void copy_characters( - PyObject *to, Py_ssize_t to_start, - PyObject *from, Py_ssize_t from_start, - Py_ssize_t how_many); static int unicode_modifiable(PyObject *unicode); -static PyObject * -unicode_fromascii(const unsigned char *s, Py_ssize_t size); static PyObject * _PyUnicode_FromUCS1(const unsigned char *s, Py_ssize_t size); static PyObject * @@ -783,7 +777,7 @@ resize_copy(PyObject *unicode, Py_ssize_t length) return NULL; copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode)); - copy_characters(copy, 0, unicode, 0, copy_length); + _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, copy_length); return copy; } else { @@ -1154,15 +1148,16 @@ _copy_characters(PyObject *to, Py_ssize_t to_start, assert(0 <= from_start); assert(0 <= to_start); assert(PyUnicode_Check(from)); - assert(PyUnicode_Check(to)); assert(PyUnicode_IS_READY(from)); - assert(PyUnicode_IS_READY(to)); assert(from_start + how_many <= PyUnicode_GET_LENGTH(from)); - assert(to_start + how_many <= PyUnicode_GET_LENGTH(to)); if (how_many == 0) return 0; + assert(PyUnicode_Check(to)); + assert(PyUnicode_IS_READY(to)); + assert(to_start + how_many <= PyUnicode_GET_LENGTH(to)); + from_kind = PyUnicode_KIND(from); from_data = PyUnicode_DATA(from); to_kind = PyUnicode_KIND(to); @@ -1267,10 +1262,10 @@ _copy_characters(PyObject *to, Py_ssize_t to_start, return 0; } -static void -copy_characters(PyObject *to, Py_ssize_t to_start, - PyObject *from, Py_ssize_t from_start, - Py_ssize_t how_many) +void +_PyUnicode_FastCopyCharacters( + PyObject *to, Py_ssize_t to_start, + PyObject *from, Py_ssize_t from_start, Py_ssize_t how_many) { (void)_copy_characters(to, to_start, from, from_start, how_many, 0); } @@ -1292,6 +1287,14 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start, if (PyUnicode_READY(to) == -1) return -1; + if (from_start < 0) { + PyErr_SetString(PyExc_IndexError, "string index out of range"); + return -1; + } + if (to_start < 0) { + PyErr_SetString(PyExc_IndexError, "string index out of range"); + return -1; + } how_many = Py_MIN(PyUnicode_GET_LENGTH(from), how_many); if (to_start + how_many > PyUnicode_GET_LENGTH(to)) { PyErr_Format(PyExc_SystemError, @@ -1641,7 +1644,7 @@ unicode_widen(PyObject **p_unicode, Py_ssize_t length, maxchar); if (result == NULL) return -1; - PyUnicode_CopyCharacters(result, 0, *p_unicode, 0, length); + _PyUnicode_FastCopyCharacters(result, 0, *p_unicode, 0, length); Py_DECREF(*p_unicode); *p_unicode = result; return 0; @@ -1841,9 +1844,10 @@ _PyUnicode_ClearStaticStrings() /* Internal function, doesn't check maximum character */ -static PyObject* -unicode_fromascii(const unsigned char* s, Py_ssize_t size) +PyObject* +_PyUnicode_FromASCII(const char *buffer, Py_ssize_t size) { + const unsigned char *s = (const unsigned char *)buffer; PyObject *unicode; if (size == 1) { #ifdef Py_DEBUG @@ -2085,7 +2089,7 @@ unicode_adjust_maxchar(PyObject **p_unicode) return; } copy = PyUnicode_New(len, max_char); - copy_characters(copy, 0, unicode, 0, len); + _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, len); Py_DECREF(unicode); *p_unicode = copy; } @@ -2753,7 +2757,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) (void) va_arg(vargs, char *); size = PyUnicode_GET_LENGTH(*callresult); assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); - copy_characters(string, i, *callresult, 0, size); + _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size); i += size; /* We're done with the unicode()/repr() => forget it */ Py_DECREF(*callresult); @@ -2767,7 +2771,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) Py_ssize_t size; assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string)); size = PyUnicode_GET_LENGTH(obj); - copy_characters(string, i, obj, 0, size); + _PyUnicode_FastCopyCharacters(string, i, obj, 0, size); i += size; break; } @@ -2779,13 +2783,13 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) if (obj) { size = PyUnicode_GET_LENGTH(obj); assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string)); - copy_characters(string, i, obj, 0, size); + _PyUnicode_FastCopyCharacters(string, i, obj, 0, size); i += size; } else { size = PyUnicode_GET_LENGTH(*callresult); assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); - copy_characters(string, i, *callresult, 0, size); + _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size); i += size; Py_DECREF(*callresult); } @@ -2800,7 +2804,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) /* unused, since we already have the result */ (void) va_arg(vargs, PyObject *); assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); - copy_characters(string, i, *callresult, 0, size); + _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size); i += size; /* We're done with the unicode()/repr() => forget it */ Py_DECREF(*callresult); @@ -4171,7 +4175,7 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, if (unicode_widen(output, *outpos, PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0) goto onError; - copy_characters(*output, *outpos, repunicode, 0, replen); + _PyUnicode_FastCopyCharacters(*output, *outpos, repunicode, 0, replen); *outpos += replen; } else { @@ -9216,12 +9220,14 @@ fixup(PyObject *self, /* If the maxchar increased so that the kind changed, not all characters are representable anymore and we need to fix the string again. This only happens in very few cases. */ - copy_characters(v, 0, self, 0, PyUnicode_GET_LENGTH(self)); + _PyUnicode_FastCopyCharacters(v, 0, + self, 0, PyUnicode_GET_LENGTH(self)); maxchar_old = fixfct(v); assert(maxchar_old > 0 && maxchar_old <= maxchar_new); } else { - copy_characters(v, 0, u, 0, PyUnicode_GET_LENGTH(self)); + _PyUnicode_FastCopyCharacters(v, 0, + u, 0, PyUnicode_GET_LENGTH(self)); } Py_DECREF(u); assert(_PyUnicode_CheckConsistency(v, 1)); @@ -9603,7 +9609,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) res_data += kind * seplen; } else { - copy_characters(res, res_offset, sep, 0, seplen); + _PyUnicode_FastCopyCharacters(res, res_offset, sep, 0, seplen); res_offset += seplen; } } @@ -9616,7 +9622,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) res_data += kind * itemlen; } else { - copy_characters(res, res_offset, item, 0, itemlen); + _PyUnicode_FastCopyCharacters(res, res_offset, item, 0, itemlen); res_offset += itemlen; } } @@ -9663,13 +9669,25 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) } \ } while (0) +void +_PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length, + Py_UCS4 fill_char) +{ + const enum PyUnicode_Kind kind = PyUnicode_KIND(unicode); + const void *data = PyUnicode_DATA(unicode); + assert(PyUnicode_IS_READY(unicode)); + assert(unicode_modifiable(unicode)); + assert(fill_char <= PyUnicode_MAX_CHAR_VALUE(unicode)); + assert(start >= 0); + assert(start + length <= PyUnicode_GET_LENGTH(unicode)); + FILL(kind, data, fill_char, start, length); +} + Py_ssize_t PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length, Py_UCS4 fill_char) { Py_ssize_t maxlen; - enum PyUnicode_Kind kind; - void *data; if (!PyUnicode_Check(unicode)) { PyErr_BadInternalCall(); @@ -9680,6 +9698,10 @@ PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length, if (unicode_check_modifiable(unicode)) return -1; + if (start < 0) { + PyErr_SetString(PyExc_IndexError, "string index out of range"); + return -1; + } if (fill_char > PyUnicode_MAX_CHAR_VALUE(unicode)) { PyErr_SetString(PyExc_ValueError, "fill character is bigger than " @@ -9692,9 +9714,7 @@ PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length, if (length <= 0) return 0; - kind = PyUnicode_KIND(unicode); - data = PyUnicode_DATA(unicode); - FILL(kind, data, fill_char, start, length); + _PyUnicode_FastFill(unicode, start, length, fill_char); return length; } @@ -9734,7 +9754,7 @@ pad(PyObject *self, FILL(kind, data, fill, 0, left); if (right) FILL(kind, data, fill, left + _PyUnicode_LENGTH(self), right); - copy_characters(u, left, self, 0, _PyUnicode_LENGTH(self)); + _PyUnicode_FastCopyCharacters(u, left, self, 0, _PyUnicode_LENGTH(self)); assert(_PyUnicode_CheckConsistency(u, 1)); return u; } @@ -10058,7 +10078,7 @@ replace(PyObject *self, PyObject *str1, u = PyUnicode_New(slen, maxchar); if (!u) goto error; - copy_characters(u, 0, self, 0, slen); + _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen); rkind = PyUnicode_KIND(u); PyUnicode_WRITE(rkind, PyUnicode_DATA(u), pos, u2); @@ -10626,8 +10646,8 @@ PyUnicode_Concat(PyObject *left, PyObject *right) w = PyUnicode_New(new_len, maxchar); if (w == NULL) goto onError; - copy_characters(w, 0, u, 0, u_len); - copy_characters(w, u_len, v, 0, v_len); + _PyUnicode_FastCopyCharacters(w, 0, u, 0, u_len); + _PyUnicode_FastCopyCharacters(w, u_len, v, 0, v_len); Py_DECREF(u); Py_DECREF(v); assert(_PyUnicode_CheckConsistency(w, 1)); @@ -10702,7 +10722,7 @@ PyUnicode_Append(PyObject **p_left, PyObject *right) goto error; } /* copy 'right' into the newly allocated area of 'left' */ - copy_characters(*p_left, left_len, right, 0, right_len); + _PyUnicode_FastCopyCharacters(*p_left, left_len, right, 0, right_len); } else { maxchar = PyUnicode_MAX_CHAR_VALUE(left); @@ -10713,8 +10733,8 @@ PyUnicode_Append(PyObject **p_left, PyObject *right) res = PyUnicode_New(new_len, maxchar); if (res == NULL) goto error; - copy_characters(res, 0, left, 0, left_len); - copy_characters(res, left_len, right, 0, right_len); + _PyUnicode_FastCopyCharacters(res, 0, left, 0, left_len); + _PyUnicode_FastCopyCharacters(res, left_len, right, 0, right_len); Py_DECREF(left); *p_left = res; } @@ -11650,7 +11670,7 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end) length = end - start; if (PyUnicode_IS_ASCII(self)) { data = PyUnicode_1BYTE_DATA(self); - return unicode_fromascii(data + start, length); + return _PyUnicode_FromASCII((char*)(data + start), length); } else { kind = PyUnicode_KIND(self); @@ -12769,60 +12789,74 @@ unicode_endswith(PyObject *self, return PyBool_FromLong(result); } -typedef struct { - PyObject *buffer; - void *data; - enum PyUnicode_Kind kind; - Py_UCS4 maxchar; - Py_ssize_t pos; -} _PyUnicodeWriter ; - Py_LOCAL_INLINE(void) _PyUnicodeWriter_Update(_PyUnicodeWriter *writer) { + writer->size = PyUnicode_GET_LENGTH(writer->buffer); writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer); writer->data = PyUnicode_DATA(writer->buffer); writer->kind = PyUnicode_KIND(writer->buffer); } -Py_LOCAL(int) -_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, - Py_ssize_t length, Py_UCS4 maxchar) +void +_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length) { - writer->pos = 0; - writer->buffer = PyUnicode_New(length, maxchar); - if (writer->buffer == NULL) - return -1; - _PyUnicodeWriter_Update(writer); - return 0; + memset(writer, 0, sizeof(*writer)); +#ifdef Py_DEBUG + writer->kind = 5; /* invalid kind */ +#endif + writer->min_length = Py_MAX(min_length, 100); + writer->flags.overallocate = (min_length > 0); } -Py_LOCAL_INLINE(int) -_PyUnicodeWriter_Prepare(_PyUnicodeWriter *writer, - Py_ssize_t length, Py_UCS4 maxchar) +int +_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar) { Py_ssize_t newlen; PyObject *newbuffer; + assert(length > 0); + if (length > PY_SSIZE_T_MAX - writer->pos) { PyErr_NoMemory(); return -1; } newlen = writer->pos + length; - if (newlen > PyUnicode_GET_LENGTH(writer->buffer)) { - /* overallocate 25% to limit the number of resize */ - if (newlen <= (PY_SSIZE_T_MAX - newlen / 4)) - newlen += newlen / 4; + if (writer->buffer == NULL) { + if (writer->flags.overallocate) { + /* overallocate 25% to limit the number of resize */ + if (newlen <= (PY_SSIZE_T_MAX - newlen / 4)) + newlen += newlen / 4; + if (newlen < writer->min_length) + newlen = writer->min_length; + } + writer->buffer = PyUnicode_New(newlen, maxchar); + if (writer->buffer == NULL) + return -1; + _PyUnicodeWriter_Update(writer); + return 0; + } - if (maxchar > writer->maxchar) { + if (newlen > writer->size) { + if (writer->flags.overallocate) { + /* overallocate 25% to limit the number of resize */ + if (newlen <= (PY_SSIZE_T_MAX - newlen / 4)) + newlen += newlen / 4; + if (newlen < writer->min_length) + newlen = writer->min_length; + } + + if (maxchar > writer->maxchar || writer->flags.readonly) { /* resize + widen */ newbuffer = PyUnicode_New(newlen, maxchar); if (newbuffer == NULL) return -1; - PyUnicode_CopyCharacters(newbuffer, 0, - writer->buffer, 0, writer->pos); + _PyUnicode_FastCopyCharacters(newbuffer, 0, + writer->buffer, 0, writer->pos); Py_DECREF(writer->buffer); + writer->flags.readonly = 0; } else { newbuffer = resize_compact(writer->buffer, newlen); @@ -12833,25 +12867,76 @@ _PyUnicodeWriter_Prepare(_PyUnicodeWriter *writer, _PyUnicodeWriter_Update(writer); } else if (maxchar > writer->maxchar) { - if (unicode_widen(&writer->buffer, writer->pos, maxchar) < 0) + assert(!writer->flags.readonly); + newbuffer = PyUnicode_New(writer->size, maxchar); + if (newbuffer == NULL) return -1; + _PyUnicode_FastCopyCharacters(newbuffer, 0, + writer->buffer, 0, writer->pos); + Py_DECREF(writer->buffer); + writer->buffer = newbuffer; _PyUnicodeWriter_Update(writer); } return 0; } -Py_LOCAL(PyObject *) +int +_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) +{ + Py_UCS4 maxchar; + Py_ssize_t len; + + if (PyUnicode_READY(str) == -1) + return -1; + len = PyUnicode_GET_LENGTH(str); + if (len == 0) + return 0; + maxchar = PyUnicode_MAX_CHAR_VALUE(str); + if (maxchar > writer->maxchar || len > writer->size - writer->pos) { + if (writer->buffer == NULL && !writer->flags.overallocate) { + Py_INCREF(str); + writer->buffer = str; + _PyUnicodeWriter_Update(writer); + writer->flags.readonly = 1; + writer->size = 0; + writer->pos += len; + return 0; + } + if (_PyUnicodeWriter_PrepareInternal(writer, len, maxchar) == -1) + return -1; + } + _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, + str, 0, len); + writer->pos += len; + return 0; +} + +PyObject * _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer) { - if (PyUnicode_Resize(&writer->buffer, writer->pos) < 0) { - Py_DECREF(writer->buffer); - return NULL; + if (writer->pos == 0) { + Py_XDECREF(writer->buffer); + Py_INCREF(unicode_empty); + return unicode_empty; + } + if (writer->flags.readonly) { + assert(PyUnicode_GET_LENGTH(writer->buffer) == writer->pos); + return writer->buffer; + } + if (PyUnicode_GET_LENGTH(writer->buffer) != writer->pos) { + PyObject *newbuffer; + newbuffer = resize_compact(writer->buffer, writer->pos); + if (newbuffer == NULL) { + Py_DECREF(writer->buffer); + return NULL; + } + writer->buffer = newbuffer; } assert(_PyUnicode_CheckConsistency(writer->buffer, 1)); return writer->buffer; } -Py_LOCAL(void) +void _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer) { Py_CLEAR(writer->buffer); @@ -12874,14 +12959,24 @@ The substitutions are identified by braces ('{' and '}')."); static PyObject * unicode__format__(PyObject* self, PyObject* args) { - PyObject *format_spec, *out; + PyObject *format_spec; + _PyUnicodeWriter writer; + int ret; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) return NULL; - out = _PyUnicode_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); - return out; + if (PyUnicode_READY(self) == -1) + return NULL; + _PyUnicodeWriter_Init(&writer, 0); + ret = _PyUnicode_FormatAdvancedWriter(&writer, + self, format_spec, 0, + PyUnicode_GET_LENGTH(format_spec)); + if (ret == -1) { + _PyUnicodeWriter_Dealloc(&writer); + return NULL; + } + return _PyUnicodeWriter_Finish(&writer); } PyDoc_STRVAR(p_format__doc__, @@ -13111,16 +13206,17 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) /* Returns a new reference to a PyUnicode object, or NULL on failure. */ -static PyObject * -formatfloat(PyObject *v, int flags, int prec, int type) +static int +formatfloat(PyObject *v, int flags, int prec, int type, + PyObject **p_output, _PyUnicodeWriter *writer) { char *p; - PyObject *result; double x; + Py_ssize_t len; x = PyFloat_AsDouble(v); if (x == -1.0 && PyErr_Occurred()) - return NULL; + return -1; if (prec < 0) prec = 6; @@ -13128,10 +13224,20 @@ formatfloat(PyObject *v, int flags, int prec, int type) p = PyOS_double_to_string(x, type, prec, (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL); if (p == NULL) - return NULL; - result = unicode_fromascii((unsigned char*)p, strlen(p)); + return -1; + len = strlen(p); + if (writer) { + if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1) + return -1; + memcpy(writer->data + writer->pos * writer->kind, + p, + len); + writer->pos += len; + } + else + *p_output = _PyUnicode_FromASCII(p, len); PyMem_Free(p); - return result; + return 0; } /* formatlong() emulates the format codes d, u, o, x and X, and @@ -13267,7 +13373,7 @@ formatlong(PyObject *val, int flags, int prec, int type) } if (!PyUnicode_Check(result) || len != PyUnicode_GET_LENGTH(result)) { PyObject *unicode; - unicode = unicode_fromascii((unsigned char *)buf, len); + unicode = _PyUnicode_FromASCII(buf, len); Py_DECREF(result); result = unicode; } @@ -13336,8 +13442,7 @@ PyUnicode_Format(PyObject *format, PyObject *args) fmtcnt = PyUnicode_GET_LENGTH(uformat); fmtpos = 0; - if (_PyUnicodeWriter_Init(&writer, fmtcnt + 100, 127) < 0) - goto onError; + _PyUnicodeWriter_Init(&writer, fmtcnt + 100); if (PyTuple_Check(args)) { arglen = PyTuple_Size(args); @@ -13368,8 +13473,8 @@ PyUnicode_Format(PyObject *format, PyObject *args) if (_PyUnicodeWriter_Prepare(&writer, sublen, maxchar) == -1) goto onError; - copy_characters(writer.buffer, writer.pos, - uformat, nonfmtpos, sublen); + _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos, + uformat, nonfmtpos, sublen); writer.pos += sublen; } else { @@ -13530,6 +13635,8 @@ PyUnicode_Format(PyObject *format, PyObject *args) "incomplete format"); goto onError; } + if (fmtcnt == 0) + writer.flags.overallocate = 0; if (c == '%') { if (_PyUnicodeWriter_Prepare(&writer, 1, '%') == -1) @@ -13539,7 +13646,6 @@ PyUnicode_Format(PyObject *format, PyObject *args) continue; } - v = getnextarg(args, arglen, &argidx); if (v == NULL) goto onError; @@ -13552,6 +13658,13 @@ PyUnicode_Format(PyObject *format, PyObject *args) case 's': case 'r': case 'a': + if (PyLong_CheckExact(v) && width == -1 && prec == -1) { + /* Fast path */ + if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1) + goto onError; + goto nextarg; + } + if (PyUnicode_CheckExact(v) && c == 's') { temp = v; Py_INCREF(temp); @@ -13572,6 +13685,32 @@ PyUnicode_Format(PyObject *format, PyObject *args) case 'o': case 'x': case 'X': + if (PyLong_CheckExact(v) + && width == -1 && prec == -1 + && !(flags & (F_SIGN | F_BLANK))) + { + /* Fast path */ + switch(c) + { + case 'd': + case 'i': + case 'u': + if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1) + goto onError; + goto nextarg; + case 'x': + if (_PyLong_FormatWriter(&writer, v, 16, flags & F_ALT) == -1) + goto onError; + goto nextarg; + case 'o': + if (_PyLong_FormatWriter(&writer, v, 8, flags & F_ALT) == -1) + goto onError; + goto nextarg; + default: + break; + } + } + isnumok = 0; if (PyNumber_Check(v)) { PyObject *iobj=NULL; @@ -13611,10 +13750,20 @@ PyUnicode_Format(PyObject *format, PyObject *args) case 'F': case 'g': case 'G': + if (width == -1 && prec == -1 + && !(flags & (F_SIGN | F_BLANK))) + { + /* Fast path */ + if (formatfloat(v, flags, prec, c, NULL, &writer) == -1) + goto onError; + goto nextarg; + } + sign = 1; if (flags & F_ZERO) fill = '0'; - temp = formatfloat(v, flags, prec, c); + if (formatfloat(v, flags, prec, c, &temp, NULL) == -1) + temp = NULL; break; case 'c': @@ -13622,6 +13771,14 @@ PyUnicode_Format(PyObject *format, PyObject *args) Py_UCS4 ch = formatchar(v); if (ch == (Py_UCS4) -1) goto onError; + if (width == -1 && prec == -1) { + /* Fast path */ + if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1) + goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch); + writer.pos += 1; + goto nextarg; + } temp = PyUnicode_FromOrdinal(ch); break; } @@ -13638,6 +13795,16 @@ PyUnicode_Format(PyObject *format, PyObject *args) if (temp == NULL) goto onError; assert (PyUnicode_Check(temp)); + + if (width == -1 && prec == -1 + && !(flags & (F_SIGN | F_BLANK))) + { + /* Fast path */ + if (_PyUnicodeWriter_WriteStr(&writer, temp) == -1) + goto onError; + goto nextarg; + } + if (PyUnicode_READY(temp) == -1) { Py_CLEAR(temp); goto onError; @@ -13676,15 +13843,15 @@ PyUnicode_Format(PyObject *format, PyObject *args) if (!(flags & F_LJUST)) { if (sign) { if ((width-1) > len) - bufmaxchar = Py_MAX(bufmaxchar, fill); + bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill); } else { if (width > len) - bufmaxchar = Py_MAX(bufmaxchar, fill); + bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill); } } maxchar = _PyUnicode_FindMaxChar(temp, 0, pindex+len); - bufmaxchar = Py_MAX(bufmaxchar, maxchar); + bufmaxchar = MAX_MAXCHAR(bufmaxchar, maxchar); buflen = width; if (sign && len == width) @@ -13737,8 +13904,8 @@ PyUnicode_Format(PyObject *format, PyObject *args) } } - copy_characters(writer.buffer, writer.pos, - temp, pindex, len); + _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos, + temp, pindex, len); writer.pos += len; if (width > len) { sublen = width - len; @@ -13746,6 +13913,7 @@ PyUnicode_Format(PyObject *format, PyObject *args) writer.pos += sublen; } +nextarg: if (dict && (argidx < arglen) && c != '%') { PyErr_SetString(PyExc_TypeError, "not all arguments converted during string formatting"); diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index e1c00df9e6b..fdb587d073c 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -316,21 +316,28 @@ calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align, /* Do the padding, and return a pointer to where the caller-supplied content goes. */ static Py_ssize_t -fill_padding(PyObject *s, Py_ssize_t start, Py_ssize_t nchars, +fill_padding(_PyUnicodeWriter *writer, + Py_ssize_t nchars, Py_UCS4 fill_char, Py_ssize_t n_lpadding, Py_ssize_t n_rpadding) { + Py_ssize_t pos; + /* Pad on left. */ - if (n_lpadding) - PyUnicode_Fill(s, start, start + n_lpadding, fill_char); + if (n_lpadding) { + pos = writer->pos; + _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char); + } /* Pad on right. */ - if (n_rpadding) - PyUnicode_Fill(s, start + nchars + n_lpadding, - start + nchars + n_lpadding + n_rpadding, fill_char); + if (n_rpadding) { + pos = writer->pos + nchars + n_lpadding; + _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char); + } /* Pointer to the user content. */ - return start + n_lpadding; + writer->pos += n_lpadding; + return 0; } /************************************************************************/ @@ -541,7 +548,7 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, as determined in calc_number_widths(). Return -1 on error, or 0 on success. */ static int -fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec, +fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec, PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end, PyObject *prefix, Py_ssize_t p_start, Py_UCS4 fill_char, @@ -549,36 +556,38 @@ fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec, { /* Used to keep track of digits, decimal, and remainder. */ Py_ssize_t d_pos = d_start; - unsigned int kind = PyUnicode_KIND(out); - void *data = PyUnicode_DATA(out); + const enum PyUnicode_Kind kind = writer->kind; + const void *data = writer->data; Py_ssize_t r; if (spec->n_lpadding) { - PyUnicode_Fill(out, pos, pos + spec->n_lpadding, fill_char); - pos += spec->n_lpadding; + _PyUnicode_FastFill(writer->buffer, + writer->pos, spec->n_lpadding, fill_char); + writer->pos += spec->n_lpadding; } if (spec->n_sign == 1) { - PyUnicode_WRITE(kind, data, pos++, spec->sign); + PyUnicode_WRITE(kind, data, writer->pos, spec->sign); + writer->pos++; } if (spec->n_prefix) { - if (PyUnicode_CopyCharacters(out, pos, - prefix, p_start, - spec->n_prefix) < 0) - return -1; + _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, + prefix, p_start, + spec->n_prefix); if (toupper) { Py_ssize_t t; for (t = 0; t < spec->n_prefix; t++) { - Py_UCS4 c = PyUnicode_READ(kind, data, pos + t); + Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t); c = Py_TOUPPER(c); assert (c <= 127); - PyUnicode_WRITE(kind, data, pos + t, c); + PyUnicode_WRITE(kind, data, writer->pos + t, c); } } - pos += spec->n_prefix; + writer->pos += spec->n_prefix; } if (spec->n_spadding) { - PyUnicode_Fill(out, pos, pos + spec->n_spadding, fill_char); - pos += spec->n_spadding; + _PyUnicode_FastFill(writer->buffer, + writer->pos, spec->n_spadding, fill_char); + writer->pos += spec->n_spadding; } /* Only for type 'c' special case, it has no digits. */ @@ -594,7 +603,7 @@ fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec, return -1; } r = _PyUnicode_InsertThousandsGrouping( - out, pos, + writer->buffer, writer->pos, spec->n_grouped_digits, pdigits + kind * d_pos, spec->n_digits, spec->n_min_width, @@ -609,34 +618,38 @@ fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec, if (toupper) { Py_ssize_t t; for (t = 0; t < spec->n_grouped_digits; t++) { - Py_UCS4 c = PyUnicode_READ(kind, data, pos + t); + Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t); c = Py_TOUPPER(c); if (c > 127) { PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit"); return -1; } - PyUnicode_WRITE(kind, data, pos + t, c); + PyUnicode_WRITE(kind, data, writer->pos + t, c); } } - pos += spec->n_grouped_digits; + writer->pos += spec->n_grouped_digits; if (spec->n_decimal) { - if (PyUnicode_CopyCharacters(out, pos, locale->decimal_point, 0, spec->n_decimal) < 0) - return -1; - pos += spec->n_decimal; + _PyUnicode_FastCopyCharacters( + writer->buffer, writer->pos, + locale->decimal_point, 0, spec->n_decimal); + writer->pos += spec->n_decimal; d_pos += 1; } if (spec->n_remainder) { - if (PyUnicode_CopyCharacters(out, pos, digits, d_pos, spec->n_remainder) < 0) - return -1; - pos += spec->n_remainder; + _PyUnicode_FastCopyCharacters( + writer->buffer, writer->pos, + digits, d_pos, spec->n_remainder); + writer->pos += spec->n_remainder; d_pos += spec->n_remainder; } if (spec->n_rpadding) { - PyUnicode_Fill(out, pos, pos + spec->n_rpadding, fill_char); - pos += spec->n_rpadding; + _PyUnicode_FastFill(writer->buffer, + writer->pos, spec->n_rpadding, + fill_char); + writer->pos += spec->n_rpadding; } return 0; } @@ -707,17 +720,20 @@ free_locale_info(LocaleInfo *locale_info) /*********** string formatting ******************************************/ /************************************************************************/ -static PyObject * -format_string_internal(PyObject *value, const InternalFormatSpec *format) +static int +format_string_internal(PyObject *value, const InternalFormatSpec *format, + _PyUnicodeWriter *writer) { Py_ssize_t lpad; Py_ssize_t rpad; Py_ssize_t total; - Py_ssize_t pos; - Py_ssize_t len = PyUnicode_GET_LENGTH(value); - PyObject *result = NULL; + Py_ssize_t len; + int result = -1; Py_UCS4 maxchar; + assert(PyUnicode_IS_READY(value)); + len = PyUnicode_GET_LENGTH(value); + /* sign is not allowed on strings */ if (format->sign != '\0') { PyErr_SetString(PyExc_ValueError, @@ -741,6 +757,11 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format) goto done; } + if (format->width == -1 && format->precision == -1) { + /* Fast path */ + return _PyUnicodeWriter_WriteStr(writer, value); + } + /* if precision is specified, output no more that format.precision characters */ if (format->precision >= 0 && len >= format->precision) { @@ -754,21 +775,23 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format) maxchar = Py_MAX(maxchar, format->fill_char); /* allocate the resulting string */ - result = PyUnicode_New(total, maxchar); - if (result == NULL) + if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1) goto done; /* Write into that space. First the padding. */ - pos = fill_padding(result, 0, len, - format->fill_char=='\0'?' ':format->fill_char, - lpad, rpad); + result = fill_padding(writer, len, + format->fill_char=='\0'?' ':format->fill_char, + lpad, rpad); + if (result == -1) + goto done; /* Then the source string. */ - if (PyUnicode_CopyCharacters(result, pos, value, 0, len) < 0) - Py_CLEAR(result); + _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, + value, 0, len); + writer->pos += (len + rpad); + result = 0; done: - assert(!result || _PyUnicode_CheckConsistency(result, 1)); return result; } @@ -780,11 +803,11 @@ done: typedef PyObject* (*IntOrLongToString)(PyObject *value, int base); -static PyObject * -format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, - IntOrLongToString tostring) +static int +format_long_internal(PyObject *value, const InternalFormatSpec *format, + _PyUnicodeWriter *writer) { - PyObject *result = NULL; + int result = -1; Py_UCS4 maxchar = 127; PyObject *tmp = NULL; Py_ssize_t inumeric_chars; @@ -798,7 +821,6 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, Py_ssize_t prefix = 0; NumberFieldWidths spec; long x; - int err; /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ @@ -872,13 +894,23 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, break; } + if (format->sign != '+' && format->sign != ' ' + && format->width == -1 + && format->type != 'X' && format->type != 'n' + && !format->thousands_separators + && PyLong_CheckExact(value)) + { + /* Fast path */ + return _PyLong_FormatWriter(writer, value, base, format->alternate); + } + /* The number of prefix chars is the same as the leading chars to skip */ if (format->alternate) n_prefix = leading_chars_to_skip; /* Do the hard part, converting to a string in a given base */ - tmp = tostring(value, base); + tmp = _PyLong_Format(value, base); if (tmp == NULL || PyUnicode_READY(tmp) == -1) goto done; @@ -914,23 +946,19 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, &locale, format, &maxchar); /* Allocate the memory. */ - result = PyUnicode_New(n_total, maxchar); - if (!result) + if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1) goto done; /* Populate the memory. */ - err = fill_number(result, 0, &spec, - tmp, inumeric_chars, inumeric_chars + n_digits, - tmp, prefix, - format->fill_char == '\0' ? ' ' : format->fill_char, - &locale, format->type == 'X'); - if (err) - Py_CLEAR(result); + result = fill_number(writer, &spec, + tmp, inumeric_chars, inumeric_chars + n_digits, + tmp, prefix, + format->fill_char == '\0' ? ' ' : format->fill_char, + &locale, format->type == 'X'); done: Py_XDECREF(tmp); free_locale_info(&locale); - assert(!result || _PyUnicode_CheckConsistency(result, 1)); return result; } @@ -938,16 +966,11 @@ done: /*********** float formatting *******************************************/ /************************************************************************/ -static PyObject* -strtounicode(char *charbuffer, Py_ssize_t len) -{ - return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, charbuffer, len); -} - /* much of this is taken from unicodeobject.c */ -static PyObject * +static int format_float_internal(PyObject *value, - const InternalFormatSpec *format) + const InternalFormatSpec *format, + _PyUnicodeWriter *writer) { char *buf = NULL; /* buffer returned from PyOS_double_to_string */ Py_ssize_t n_digits; @@ -962,12 +985,11 @@ format_float_internal(PyObject *value, Py_ssize_t index; NumberFieldWidths spec; int flags = 0; - PyObject *result = NULL; + int result = -1; Py_UCS4 maxchar = 127; Py_UCS4 sign_char = '\0'; int float_type; /* Used to see if we have a nan, inf, or regular float. */ PyObject *unicode_tmp = NULL; - int err; /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ @@ -1024,13 +1046,25 @@ format_float_internal(PyObject *value, /* Since there is no unicode version of PyOS_double_to_string, just use the 8 bit version and then convert to unicode. */ - unicode_tmp = strtounicode(buf, n_digits); + unicode_tmp = _PyUnicode_FromASCII(buf, n_digits); + PyMem_Free(buf); if (unicode_tmp == NULL) goto done; - index = 0; + + if (format->sign != '+' && format->sign != ' ' + && format->width == -1 + && format->type != 'n' + && !format->thousands_separators) + { + /* Fast path */ + result = _PyUnicodeWriter_WriteStr(writer, unicode_tmp); + Py_DECREF(unicode_tmp); + return result; + } /* Is a sign character present in the output? If so, remember it and skip it */ + index = 0; if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') { sign_char = '-'; ++index; @@ -1055,24 +1089,19 @@ format_float_internal(PyObject *value, &locale, format, &maxchar); /* Allocate the memory. */ - result = PyUnicode_New(n_total, maxchar); - if (result == NULL) + if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1) goto done; /* Populate the memory. */ - err = fill_number(result, 0, &spec, - unicode_tmp, index, index + n_digits, - NULL, 0, - format->fill_char == '\0' ? ' ' : format->fill_char, - &locale, 0); - if (err) - Py_CLEAR(result); + result = fill_number(writer, &spec, + unicode_tmp, index, index + n_digits, + NULL, 0, + format->fill_char == '\0' ? ' ' : format->fill_char, + &locale, 0); done: - PyMem_Free(buf); Py_DECREF(unicode_tmp); free_locale_info(&locale); - assert(!result || _PyUnicode_CheckConsistency(result, 1)); return result; } @@ -1080,9 +1109,10 @@ done: /*********** complex formatting *****************************************/ /************************************************************************/ -static PyObject * +static int format_complex_internal(PyObject *value, - const InternalFormatSpec *format) + const InternalFormatSpec *format, + _PyUnicodeWriter *writer) { double re; double im; @@ -1106,11 +1136,10 @@ format_complex_internal(PyObject *value, NumberFieldWidths re_spec; NumberFieldWidths im_spec; int flags = 0; - PyObject *result = NULL; + int result = -1; Py_UCS4 maxchar = 127; - int rkind; + enum PyUnicode_Kind rkind; void *rdata; - Py_ssize_t index; Py_UCS4 re_sign_char = '\0'; Py_UCS4 im_sign_char = '\0'; int re_float_type; /* Used to see if we have a nan, inf, or regular float. */ @@ -1122,7 +1151,6 @@ format_complex_internal(PyObject *value, Py_ssize_t total; PyObject *re_unicode_tmp = NULL; PyObject *im_unicode_tmp = NULL; - int err; /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ @@ -1191,12 +1219,12 @@ format_complex_internal(PyObject *value, /* Since there is no unicode version of PyOS_double_to_string, just use the 8 bit version and then convert to unicode. */ - re_unicode_tmp = strtounicode(re_buf, n_re_digits); + re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits); if (re_unicode_tmp == NULL) goto done; i_re = 0; - im_unicode_tmp = strtounicode(im_buf, n_im_digits); + im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits); if (im_unicode_tmp == NULL) goto done; i_im = 0; @@ -1261,47 +1289,49 @@ format_complex_internal(PyObject *value, if (lpad || rpad) maxchar = Py_MAX(maxchar, format->fill_char); - result = PyUnicode_New(total, maxchar); - if (result == NULL) + if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1) goto done; - rkind = PyUnicode_KIND(result); - rdata = PyUnicode_DATA(result); + rkind = writer->kind; + rdata = writer->data; /* Populate the memory. First, the padding. */ - index = fill_padding(result, 0, - n_re_total + n_im_total + 1 + add_parens * 2, - format->fill_char=='\0' ? ' ' : format->fill_char, - lpad, rpad); + result = fill_padding(writer, + n_re_total + n_im_total + 1 + add_parens * 2, + format->fill_char=='\0' ? ' ' : format->fill_char, + lpad, rpad); + if (result == -1) + goto done; - if (add_parens) - PyUnicode_WRITE(rkind, rdata, index++, '('); + if (add_parens) { + PyUnicode_WRITE(rkind, rdata, writer->pos, '('); + writer->pos++; + } if (!skip_re) { - err = fill_number(result, index, &re_spec, - re_unicode_tmp, i_re, i_re + n_re_digits, - NULL, 0, - 0, - &locale, 0); - if (err) { - Py_CLEAR(result); + result = fill_number(writer, &re_spec, + re_unicode_tmp, i_re, i_re + n_re_digits, + NULL, 0, + 0, + &locale, 0); + if (result == -1) goto done; - } - index += n_re_total; } - err = fill_number(result, index, &im_spec, - im_unicode_tmp, i_im, i_im + n_im_digits, - NULL, 0, - 0, - &locale, 0); - if (err) { - Py_CLEAR(result); + result = fill_number(writer, &im_spec, + im_unicode_tmp, i_im, i_im + n_im_digits, + NULL, 0, + 0, + &locale, 0); + if (result == -1) goto done; - } - index += n_im_total; - PyUnicode_WRITE(rkind, rdata, index++, 'j'); + PyUnicode_WRITE(rkind, rdata, writer->pos, 'j'); + writer->pos++; - if (add_parens) - PyUnicode_WRITE(rkind, rdata, index++, ')'); + if (add_parens) { + PyUnicode_WRITE(rkind, rdata, writer->pos, ')'); + writer->pos++; + } + + writer->pos += rpad; done: PyMem_Free(re_buf); @@ -1309,61 +1339,79 @@ done: Py_XDECREF(re_unicode_tmp); Py_XDECREF(im_unicode_tmp); free_locale_info(&locale); - assert(!result || _PyUnicode_CheckConsistency(result, 1)); return result; } /************************************************************************/ /*********** built in formatters ****************************************/ /************************************************************************/ -PyObject * -_PyUnicode_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, Py_ssize_t end) +int +format_obj(PyObject *obj, _PyUnicodeWriter *writer) +{ + PyObject *str; + int err; + + str = PyObject_Str(obj); + if (str == NULL) + return -1; + err = _PyUnicodeWriter_WriteStr(writer, str); + Py_DECREF(str); + return err; +} + +int +_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer, + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, Py_ssize_t end) { InternalFormatSpec format; - PyObject *result; + + assert(PyUnicode_Check(obj)); /* check for the special case of zero length format spec, make it equivalent to str(obj) */ - if (start == end) - return PyObject_Str(obj); + if (start == end) { + if (PyUnicode_CheckExact(obj)) + return _PyUnicodeWriter_WriteStr(writer, obj); + else + return format_obj(obj, writer); + } /* parse the format_spec */ if (!parse_internal_render_format_spec(format_spec, start, end, &format, 's', '<')) - return NULL; + return -1; /* type conversion? */ switch (format.type) { case 's': /* no type conversion needed, already a string. do the formatting */ - result = format_string_internal(obj, &format); - if (result != NULL) - assert(_PyUnicode_CheckConsistency(result, 1)); - break; + return format_string_internal(obj, &format, writer); default: /* unknown */ unknown_presentation_type(format.type, obj->ob_type->tp_name); - result = NULL; + return -1; } - return result; } -static PyObject* -format_int_or_long(PyObject* obj, PyObject* format_spec, - Py_ssize_t start, Py_ssize_t end, - IntOrLongToString tostring) +int +_PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer, + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, Py_ssize_t end) { - PyObject *result = NULL; - PyObject *tmp = NULL; + PyObject *tmp = NULL, *str = NULL; InternalFormatSpec format; + int result = -1; /* check for the special case of zero length format spec, make it equivalent to str(obj) */ if (start == end) { - result = PyObject_Str(obj); - goto done; + if (PyLong_CheckExact(obj)) + return _PyLong_FormatWriter(writer, obj, 10, 0); + else + return format_obj(obj, writer); } /* parse the format_spec */ @@ -1382,7 +1430,7 @@ format_int_or_long(PyObject* obj, PyObject* format_spec, case 'n': /* no type conversion needed, already an int (or long). do the formatting */ - result = format_int_or_long_internal(obj, &format, tostring); + result = format_long_internal(obj, &format, writer); break; case 'e': @@ -1396,7 +1444,7 @@ format_int_or_long(PyObject* obj, PyObject* format_spec, tmp = PyNumber_Float(obj); if (tmp == NULL) goto done; - result = format_float_internal(tmp, &format); + result = format_float_internal(tmp, &format, writer); break; default: @@ -1407,41 +1455,27 @@ format_int_or_long(PyObject* obj, PyObject* format_spec, done: Py_XDECREF(tmp); + Py_XDECREF(str); return result; } -/* Need to define long_format as a function that will convert a long - to a string. In 3.0, _PyLong_Format has the correct signature. */ -#define long_format _PyLong_Format - -PyObject * -_PyLong_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, Py_ssize_t end) +int +_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer, + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, Py_ssize_t end) { - return format_int_or_long(obj, format_spec, start, end, - long_format); -} - -PyObject * -_PyFloat_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, Py_ssize_t end) -{ - PyObject *result = NULL; InternalFormatSpec format; /* check for the special case of zero length format spec, make it equivalent to str(obj) */ - if (start == end) { - result = PyObject_Str(obj); - goto done; - } + if (start == end) + return format_obj(obj, writer); /* parse the format_spec */ if (!parse_internal_render_format_spec(format_spec, start, end, &format, '\0', '>')) - goto done; + return -1; /* type conversion? */ switch (format.type) { @@ -1455,38 +1489,32 @@ _PyFloat_FormatAdvanced(PyObject *obj, case 'n': case '%': /* no conversion, already a float. do the formatting */ - result = format_float_internal(obj, &format); - break; + return format_float_internal(obj, &format, writer); default: /* unknown */ unknown_presentation_type(format.type, obj->ob_type->tp_name); - goto done; + return -1; } - -done: - return result; } -PyObject * -_PyComplex_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, Py_ssize_t end) +int +_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer, + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, Py_ssize_t end) { - PyObject *result = NULL; InternalFormatSpec format; /* check for the special case of zero length format spec, make it equivalent to str(obj) */ - if (start == end) { - result = PyObject_Str(obj); - goto done; - } + if (start == end) + return format_obj(obj, writer); /* parse the format_spec */ if (!parse_internal_render_format_spec(format_spec, start, end, &format, '\0', '>')) - goto done; + return -1; /* type conversion? */ switch (format.type) { @@ -1499,15 +1527,11 @@ _PyComplex_FormatAdvanced(PyObject *obj, case 'G': case 'n': /* no conversion, already a complex. do the formatting */ - result = format_complex_internal(obj, &format); - break; + return format_complex_internal(obj, &format, writer); default: /* unknown */ unknown_presentation_type(format.type, obj->ob_type->tp_name); - goto done; + return -1; } - -done: - return result; }