Issue #14744: Use the new _PyUnicodeWriter internal API to speed up str%args and str.format(args)

* Formatting string, int, float and complex use the _PyUnicodeWriter API. It avoids a temporary buffer in most cases. * Add _PyUnicodeWriter_WriteStr() to restore the PyAccu optimization: just keep a reference to the string if the output is only composed of one string * Disable overallocation when formatting the last argument of str%args and str.format(args) * Overallocation allocates at least 100 characters: add min_length attribute to the _PyUnicodeWriter structure * Add new private functions: _PyUnicode_FastCopyCharacters(), _PyUnicode_FastFill() and _PyUnicode_FromASCII() The speed up is around 20% in average.
2012-05-29 12:57:52 +02:00 · 2012-05-29 12:57:52 +02:00 · d3f0882dfb
parent a1b0c9fc4d
commit d3f0882dfb
12 changed files with 878 additions and 437 deletions
--- a/Include/complexobject.h
+++ b/Include/complexobject.h
@ -63,7 +63,9 @@ PyAPI_FUNC(Py_complex) PyComplex_AsCComplex(PyObject *op);
 /* Format the object based on the format_spec, as defined in PEP 3101
   (Advanced String Formatting). */
 #ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject *) _PyComplex_FormatAdvanced(PyObject *obj,
+PyAPI_FUNC(int) _PyComplex_FormatAdvancedWriter(
+    _PyUnicodeWriter *writer,
+    PyObject *obj,
    PyObject *format_spec,
    Py_ssize_t start,
    Py_ssize_t end);
--- a/Include/floatobject.h
+++ b/Include/floatobject.h
@ -112,7 +112,9 @@ PyAPI_FUNC(int) PyFloat_ClearFreeList(void);

 /* Format the object based on the format_spec, as defined in PEP 3101
   (Advanced String Formatting). */
-PyAPI_FUNC(PyObject *) _PyFloat_FormatAdvanced(PyObject *obj,
+PyAPI_FUNC(int) _PyFloat_FormatAdvancedWriter(
+    _PyUnicodeWriter *writer,
+    PyObject *obj,
    PyObject *format_spec,
    Py_ssize_t start,
    Py_ssize_t end);
--- a/Include/longobject.h
+++ b/Include/longobject.h
@ -151,11 +151,19 @@ PyAPI_FUNC(int) _PyLong_AsByteArray(PyLongObject* v,

 /* _PyLong_Format: Convert the long to a string object with given base,
   appending a base prefix of 0[box] if base is 2, 8 or 16. */
-PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *aa, int base);
+PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *obj, int base);
+
+PyAPI_FUNC(int) _PyLong_FormatWriter(
+    _PyUnicodeWriter *writer,
+    PyObject *obj,
+    int base,
+    int alternate);

 /* Format the object based on the format_spec, as defined in PEP 3101
   (Advanced String Formatting). */
-PyAPI_FUNC(PyObject *) _PyLong_FormatAdvanced(PyObject *obj,
+PyAPI_FUNC(int) _PyLong_FormatAdvancedWriter(
+    _PyUnicodeWriter *writer,
+    PyObject *obj,
    PyObject *format_spec,
    Py_ssize_t start,
    Py_ssize_t end);
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@ -648,8 +648,20 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
    Py_ssize_t from_start,
    Py_ssize_t how_many
    );
+
+/* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
+   may crash if parameters are invalid (e.g. if the output string
+   is too short). */
+PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
+    PyObject *to,
+    Py_ssize_t to_start,
+    PyObject *from,
+    Py_ssize_t from_start,
+    Py_ssize_t how_many
+    );
 #endif

+#ifndef Py_LIMITED_API
 /* Fill a string with a character: write fill_char into
   unicode[start:start+length].

@ -658,13 +670,21 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(

   Return the number of written character, or return -1 and raise an exception
   on error. */
-#ifndef Py_LIMITED_API
 PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
    PyObject *unicode,
    Py_ssize_t start,
    Py_ssize_t length,
    Py_UCS4 fill_char
    );
+
+/* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
+   if parameters are invalid (e.g. if length is longer than the string). */
+PyAPI_FUNC(void) _PyUnicode_FastFill(
+    PyObject *unicode,
+    Py_ssize_t start,
+    Py_ssize_t length,
+    Py_UCS4 fill_char
+    );
 #endif

 /* Create a Unicode Object from the Py_UNICODE buffer u of the given
@ -696,13 +716,19 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromString(
    const char *u              /* UTF-8 encoded string */
    );

+#ifndef Py_LIMITED_API
 /* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
   Scan the string to find the maximum character. */
-#ifndef Py_LIMITED_API
 PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
    int kind,
    const void *buffer,
    Py_ssize_t size);
+
+/* Create a new string from a buffer of ASCII characters.
+   WARNING: Don't check if the string contains any non-ASCII character. */
+PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
+    const char *buffer,
+    Py_ssize_t size);
 #endif

 PyAPI_FUNC(PyObject*) PyUnicode_Substring(
@ -864,10 +890,67 @@ PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
    ...
    );

+#ifndef Py_LIMITED_API
+typedef struct {
+    PyObject *buffer;
+    void *data;
+    enum PyUnicode_Kind kind;
+    Py_UCS4 maxchar;
+    Py_ssize_t size;
+    Py_ssize_t pos;
+    /* minimum length of the buffer when overallocation is enabled,
+       see _PyUnicodeWriter_Init() */
+    Py_ssize_t min_length;
+    struct {
+        unsigned char overallocate:1;
+        /* If readonly is 1, buffer is a shared string (cannot be modified)
+           and size is set to 0. */
+        unsigned char readonly:1;
+    } flags;
+} _PyUnicodeWriter ;
+
+/* Initialize a Unicode writer.
+
+   If min_length is greater than zero, _PyUnicodeWriter_Prepare()
+   overallocates the buffer and min_length is the minimum length in characters
+   of the buffer. */
+PyAPI_FUNC(void)
+_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length);
+
+/* Prepare the buffer to write 'length' characters
+   with the specified maximum character.
+
+   Return 0 on success, raise an exception and return -1 on error. */
+#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR)             \
+    (((MAXCHAR) <= (WRITER)->maxchar                                  \
+      && (LENGTH) <= (WRITER)->size - (WRITER)->pos)                  \
+     ? 0                                                              \
+     : (((LENGTH) == 0)                                               \
+        ? 0                                                           \
+        : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
+
+/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
+   instead. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
+                                 Py_ssize_t length, Py_UCS4 maxchar);
+
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str);
+
+PyAPI_FUNC(PyObject *)
+_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
+
+PyAPI_FUNC(void)
+_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
+#endif
+
 #ifndef Py_LIMITED_API
 /* Format the object based on the format_spec, as defined in PEP 3101
   (Advanced String Formatting). */
-PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj,
+PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
+    _PyUnicodeWriter *writer,
+    PyObject *obj,
    PyObject *format_spec,
    Py_ssize_t start,
    Py_ssize_t end);
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -13,6 +13,9 @@ Core and Builtins
 - Issue #14835: Make plistlib output empty arrays & dicts like OS X.
  Patch by Sidney San Martín.

+- Issue #14744: Use the new _PyUnicodeWriter internal API to speed up
+  str%args and str.format(args).
+
 - Issue #14930: Make memoryview objects weakrefable.

 - Issue #14775: Fix a potential quadratic dict build-up due to the garbage
--- a/Objects/complexobject.c
+++ b/Objects/complexobject.c
@ -699,11 +699,22 @@ static PyObject *
 complex__format__(PyObject* self, PyObject* args)
 {
    PyObject *format_spec;
+    _PyUnicodeWriter writer;
+    int ret;

    if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
        return NULL;
-    return _PyComplex_FormatAdvanced(self, format_spec, 0,
-                                     PyUnicode_GET_LENGTH(format_spec));
+
+    _PyUnicodeWriter_Init(&writer, 0);
+    ret = _PyComplex_FormatAdvancedWriter(
+        &writer,
+        self,
+        format_spec, 0, PyUnicode_GET_LENGTH(format_spec));
+    if (ret == -1) {
+        _PyUnicodeWriter_Dealloc(&writer);
+        return NULL;
+    }
+    return _PyUnicodeWriter_Finish(&writer);
 }

 #if 0
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@ -267,13 +267,15 @@ static PyObject *
 float_repr(PyFloatObject *v)
 {
    PyObject *result;
-    char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
+    char *buf;
+
+    buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
                                'r', 0,
                                Py_DTSF_ADD_DOT_0,
                                NULL);
    if (!buf)
        return PyErr_NoMemory();
-    result = PyUnicode_FromString(buf);
+    result = _PyUnicode_FromASCII(buf, strlen(buf));
    PyMem_Free(buf);
    return result;
 }
@ -1703,11 +1705,22 @@ static PyObject *
 float__format__(PyObject *self, PyObject *args)
 {
    PyObject *format_spec;
+    _PyUnicodeWriter writer;
+    int ret;

    if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
        return NULL;
-    return _PyFloat_FormatAdvanced(self, format_spec, 0,
-                                   PyUnicode_GET_LENGTH(format_spec));
+
+    _PyUnicodeWriter_Init(&writer, 0);
+    ret = _PyFloat_FormatAdvancedWriter(
+        &writer,
+        self,
+        format_spec, 0, PyUnicode_GET_LENGTH(format_spec));
+    if (ret == -1) {
+        _PyUnicodeWriter_Dealloc(&writer);
+        return NULL;
+    }
+    return _PyUnicodeWriter_Finish(&writer);
 }

 PyDoc_STRVAR(float__format__doc,
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@ -1550,20 +1550,22 @@ divrem1(PyLongObject *a, digit n, digit *prem)
   string.  (Return value is non-shared so that callers can modify the
   returned value if necessary.) */

-static PyObject *
-long_to_decimal_string(PyObject *aa)
+static int
+long_to_decimal_string_internal(PyObject *aa,
+                                PyObject **p_output,
+                                _PyUnicodeWriter *writer)
 {
    PyLongObject *scratch, *a;
    PyObject *str;
    Py_ssize_t size, strlen, size_a, i, j;
    digit *pout, *pin, rem, tenpow;
-    unsigned char *p;
    int negative;
+    enum PyUnicode_Kind kind;

    a = (PyLongObject *)aa;
    if (a == NULL || !PyLong_Check(a)) {
        PyErr_BadInternalCall();
-        return NULL;
+        return -1;
    }
    size_a = ABS(Py_SIZE(a));
    negative = Py_SIZE(a) < 0;
@ -1580,13 +1582,13 @@ long_to_decimal_string(PyObject *aa)
    if (size_a > PY_SSIZE_T_MAX / PyLong_SHIFT) {
        PyErr_SetString(PyExc_OverflowError,
                        "long is too large to format");
-        return NULL;
+        return -1;
    }
    /* the expression size_a * PyLong_SHIFT is now safe from overflow */
    size = 1 + size_a * PyLong_SHIFT / (3 * _PyLong_DECIMAL_SHIFT);
    scratch = _PyLong_New(size);
    if (scratch == NULL)
-        return NULL;
+        return -1;

    /* convert array of base _PyLong_BASE digits in pin to an array of
       base _PyLong_DECIMAL_BASE digits in pout, following Knuth (TAOCP,
@ -1609,7 +1611,7 @@ long_to_decimal_string(PyObject *aa)
        /* check for keyboard interrupt */
        SIGCHECK({
                Py_DECREF(scratch);
-                return NULL;
+                return -1;
            });
    }
    /* pout should have at least one digit, so that the case when a = 0
@ -1625,65 +1627,113 @@ long_to_decimal_string(PyObject *aa)
        tenpow *= 10;
        strlen++;
    }
+    if (writer) {
+        if (_PyUnicodeWriter_Prepare(writer, strlen, '9') == -1)
+            return -1;
+        kind = writer->kind;
+        str = NULL;
+    }
+    else {
        str = PyUnicode_New(strlen, '9');
        if (str == NULL) {
            Py_DECREF(scratch);
-        return NULL;
+            return -1;
        }
+        kind = PyUnicode_KIND(str);
+    }
+
+#define WRITE_DIGITS(TYPE)                                            \
+    do {                                                              \
+        if (writer)                                                   \
+            p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + strlen; \
+        else                                                          \
+            p = (TYPE*)PyUnicode_DATA(str) + strlen;                  \
+                                                                      \
+        *p = '\0';                                                    \
+        /* pout[0] through pout[size-2] contribute exactly            \
+           _PyLong_DECIMAL_SHIFT digits each */                       \
+        for (i=0; i < size - 1; i++) {                                \
+            rem = pout[i];                                            \
+            for (j = 0; j < _PyLong_DECIMAL_SHIFT; j++) {             \
+                *--p = '0' + rem % 10;                                \
+                rem /= 10;                                            \
+            }                                                         \
+        }                                                             \
+        /* pout[size-1]: always produce at least one decimal digit */ \
+        rem = pout[i];                                                \
+        do {                                                          \
+            *--p = '0' + rem % 10;                                    \
+            rem /= 10;                                                \
+        } while (rem != 0);                                           \
+                                                                      \
+        /* and sign */                                                \
+        if (negative)                                                 \
+            *--p = '-';                                               \
+                                                                      \
+        /* check we've counted correctly */                           \
+        if (writer)                                                   \
+            assert(p == ((TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos)); \
+        else                                                          \
+            assert(p == (TYPE*)PyUnicode_DATA(str));                  \
+    } while (0)

    /* fill the string right-to-left */
-    assert(PyUnicode_KIND(str) == PyUnicode_1BYTE_KIND);
-    p = PyUnicode_1BYTE_DATA(str) + strlen;
-    *p = '\0';
-    /* pout[0] through pout[size-2] contribute exactly
-       _PyLong_DECIMAL_SHIFT digits each */
-    for (i=0; i < size - 1; i++) {
-        rem = pout[i];
-        for (j = 0; j < _PyLong_DECIMAL_SHIFT; j++) {
-            *--p = '0' + rem % 10;
-            rem /= 10;
+    if (kind == PyUnicode_1BYTE_KIND) {
+        Py_UCS1 *p;
+        WRITE_DIGITS(Py_UCS1);
    }
+    else if (kind == PyUnicode_2BYTE_KIND) {
+        Py_UCS2 *p;
+        WRITE_DIGITS(Py_UCS2);
    }
-    /* pout[size-1]: always produce at least one decimal digit */
-    rem = pout[i];
-    do {
-        *--p = '0' + rem % 10;
-        rem /= 10;
-    } while (rem != 0);
+    else {
+        assert (kind == PyUnicode_4BYTE_KIND);
+        Py_UCS4 *p;
+        WRITE_DIGITS(Py_UCS4);
+    }
+#undef WRITE_DIGITS

-    /* and sign */
-    if (negative)
-        *--p = '-';
-
-    /* check we've counted correctly */
-    assert(p == PyUnicode_1BYTE_DATA(str));
-    assert(_PyUnicode_CheckConsistency(str, 1));
    Py_DECREF(scratch);
-    return (PyObject *)str;
+    if (writer) {
+        writer->pos += strlen;
+    }
+    else {
+        assert(_PyUnicode_CheckConsistency(str, 1));
+        *p_output = (PyObject *)str;
+    }
+    return 0;
+}
+
+static PyObject *
+long_to_decimal_string(PyObject *aa)
+{
+    PyObject *v;
+    if (long_to_decimal_string_internal(aa, &v, NULL) == -1)
+        return NULL;
+    return v;
 }

 /* Convert a long int object to a string, using a given conversion base,
-   which should be one of 2, 8, 10 or 16.  Return a string object.
-   If base is 2, 8 or 16, add the proper prefix '0b', '0o' or '0x'. */
+   which should be one of 2, 8 or 16.  Return a string object.
+   If base is 2, 8 or 16, add the proper prefix '0b', '0o' or '0x'
+   if alternate is nonzero. */

-PyObject *
-_PyLong_Format(PyObject *aa, int base)
+static int
+long_format_binary(PyObject *aa, int base, int alternate,
+                   PyObject **p_output, _PyUnicodeWriter *writer)
 {
    register PyLongObject *a = (PyLongObject *)aa;
    PyObject *v;
    Py_ssize_t sz;
    Py_ssize_t size_a;
-    Py_UCS1 *p;
+    enum PyUnicode_Kind kind;
    int negative;
    int bits;

-    assert(base == 2 || base == 8 || base == 10 || base == 16);
-    if (base == 10)
-        return long_to_decimal_string((PyObject *)a);
-
+    assert(base == 2 || base == 8 || base == 16);
    if (a == NULL || !PyLong_Check(a)) {
        PyErr_BadInternalCall();
-        return NULL;
+        return -1;
    }
    size_a = ABS(Py_SIZE(a));
    negative = Py_SIZE(a) < 0;
@ -1706,7 +1756,7 @@ _PyLong_Format(PyObject *aa, int base)

    /* Compute exact length 'sz' of output string. */
    if (size_a == 0) {
-        sz = 3;
+        sz = 1;
    }
    else {
        Py_ssize_t size_a_in_bits;
@ -1714,56 +1764,126 @@ _PyLong_Format(PyObject *aa, int base)
        if (size_a > (PY_SSIZE_T_MAX - 3) / PyLong_SHIFT) {
            PyErr_SetString(PyExc_OverflowError,
                            "int is too large to format");
-            return NULL;
+            return -1;
        }
        size_a_in_bits = (size_a - 1) * PyLong_SHIFT +
                         bits_in_digit(a->ob_digit[size_a - 1]);
-        /* Allow 2 characters for prefix and 1 for a '-' sign. */
-        sz = 2 + negative + (size_a_in_bits + (bits - 1)) / bits;
+        /* Allow 1 character for a '-' sign. */
+        sz = negative + (size_a_in_bits + (bits - 1)) / bits;
+    }
+    if (alternate) {
+        /* 2 characters for prefix  */
+        sz += 2;
    }

-    v = PyUnicode_New(sz, 'x');
-    if (v == NULL) {
-        return NULL;
-    }
-    assert(PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
-
-    p = PyUnicode_1BYTE_DATA(v) + sz;
-    if (size_a == 0) {
-        *--p = '0';
+    if (writer) {
+        if (_PyUnicodeWriter_Prepare(writer, sz, 'x') == -1)
+            return -1;
+        kind = writer->kind;
+        v = NULL;
    }
    else {
-        /* JRH: special case for power-of-2 bases */
-        twodigits accum = 0;
-        int accumbits = 0;              /* # of bits in accum */
-        Py_ssize_t i;
-        for (i = 0; i < size_a; ++i) {
-            accum |= (twodigits)a->ob_digit[i] << accumbits;
-            accumbits += PyLong_SHIFT;
-            assert(accumbits >= bits);
-            do {
-                char cdigit;
-                cdigit = (char)(accum & (base - 1));
-                cdigit += (cdigit < 10) ? '0' : 'a'-10;
-                *--p = cdigit;
-                accumbits -= bits;
-                accum >>= bits;
-            } while (i < size_a-1 ? accumbits >= bits : accum > 0);
-        }
+        v = PyUnicode_New(sz, 'x');
+        if (v == NULL)
+            return -1;
+        kind = PyUnicode_KIND(v);
    }

-    if (base == 16)
-        *--p = 'x';
-    else if (base == 8)
-        *--p = 'o';
-    else /* (base == 2) */
-        *--p = 'b';
-    *--p = '0';
-    if (negative)
-        *--p = '-';
-    assert(p == PyUnicode_1BYTE_DATA(v));
+#define WRITE_DIGITS(TYPE)                                              \
+    do {                                                                \
+        if (writer)                                                     \
+            p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + sz; \
+        else                                                            \
+            p = (TYPE*)PyUnicode_DATA(v) + sz;                          \
+                                                                        \
+        if (size_a == 0) {                                              \
+            *--p = '0';                                                 \
+        }                                                               \
+        else {                                                          \
+            /* JRH: special case for power-of-2 bases */                \
+            twodigits accum = 0;                                        \
+            int accumbits = 0;   /* # of bits in accum */               \
+            Py_ssize_t i;                                               \
+            for (i = 0; i < size_a; ++i) {                              \
+                accum |= (twodigits)a->ob_digit[i] << accumbits;        \
+                accumbits += PyLong_SHIFT;                              \
+                assert(accumbits >= bits);                              \
+                do {                                                    \
+                    char cdigit;                                        \
+                    cdigit = (char)(accum & (base - 1));                \
+                    cdigit += (cdigit < 10) ? '0' : 'a'-10;             \
+                    *--p = cdigit;                                      \
+                    accumbits -= bits;                                  \
+                    accum >>= bits;                                     \
+                } while (i < size_a-1 ? accumbits >= bits : accum > 0); \
+            }                                                           \
+        }                                                               \
+                                                                        \
+        if (alternate) {                                                \
+            if (base == 16)                                             \
+                *--p = 'x';                                             \
+            else if (base == 8)                                         \
+                *--p = 'o';                                             \
+            else /* (base == 2) */                                      \
+                *--p = 'b';                                             \
+            *--p = '0';                                                 \
+        }                                                               \
+        if (negative)                                                   \
+            *--p = '-';                                                 \
+        if (writer)                                                     \
+            assert(p == ((TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos)); \
+        else                                                            \
+            assert(p == (TYPE*)PyUnicode_DATA(v));                      \
+    } while (0)
+
+    if (kind == PyUnicode_1BYTE_KIND) {
+        Py_UCS1 *p;
+        WRITE_DIGITS(Py_UCS1);
+    }
+    else if (kind == PyUnicode_2BYTE_KIND) {
+        Py_UCS2 *p;
+        WRITE_DIGITS(Py_UCS2);
+    }
+    else {
+        assert (kind == PyUnicode_4BYTE_KIND);
+        Py_UCS4 *p;
+        WRITE_DIGITS(Py_UCS4);
+    }
+#undef WRITE_DIGITS
+
+    if (writer) {
+        writer->pos += sz;
+    }
+    else {
        assert(_PyUnicode_CheckConsistency(v, 1));
-    return v;
+        *p_output = v;
+    }
+    return 0;
+}
+
+PyObject *
+_PyLong_Format(PyObject *obj, int base)
+{
+    PyObject *str;
+    int err;
+    if (base == 10)
+        err = long_to_decimal_string_internal(obj, &str, NULL);
+    else
+        err = long_format_binary(obj, base, 1, &str, NULL);
+    if (err == -1)
+        return NULL;
+    return str;
+}
+
+int
+_PyLong_FormatWriter(_PyUnicodeWriter *writer,
+                     PyObject *obj,
+                     int base, int alternate)
+{
+    if (base == 10)
+        return long_to_decimal_string_internal(obj, NULL, writer);
+    else
+        return long_format_binary(obj, base, alternate, NULL, writer);
 }

 /* Table of digit values for 8-bit string -> integer conversion.
@ -4232,11 +4352,22 @@ static PyObject *
 long__format__(PyObject *self, PyObject *args)
 {
    PyObject *format_spec;
+    _PyUnicodeWriter writer;
+    int ret;

    if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
        return NULL;
-    return _PyLong_FormatAdvanced(self, format_spec, 0,
-                                  PyUnicode_GET_LENGTH(format_spec));
+
+    _PyUnicodeWriter_Init(&writer, 0);
+    ret = _PyLong_FormatAdvancedWriter(
+        &writer,
+        self,
+        format_spec, 0, PyUnicode_GET_LENGTH(format_spec));
+    if (ret == -1) {
+        _PyUnicodeWriter_Dealloc(&writer);
+        return NULL;
+    }
+    return _PyUnicodeWriter_Finish(&writer);
 }

 /* Return a pair (q, r) such that a = b * q + r, and
--- a/Objects/stringlib/asciilib.h
+++ b/Objects/stringlib/asciilib.h
@ -18,7 +18,7 @@
 #define STRINGLIB_TODECIMAL      Py_UNICODE_TODECIMAL
 #define STRINGLIB_STR            PyUnicode_1BYTE_DATA
 #define STRINGLIB_LEN            PyUnicode_GET_LENGTH
-#define STRINGLIB_NEW            unicode_fromascii
+#define STRINGLIB_NEW(STR,LEN)   _PyUnicode_FromASCII((char*)(STR),(LEN))
 #define STRINGLIB_RESIZE         not_supported
 #define STRINGLIB_CHECK          PyUnicode_Check
 #define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact
--- a/Objects/stringlib/unicode_format.h
+++ b/Objects/stringlib/unicode_format.h
@ -499,26 +499,26 @@ render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *write
    int ok = 0;
    PyObject *result = NULL;
    PyObject *format_spec_object = NULL;
-    PyObject *(*formatter)(PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
-    Py_ssize_t len;
+    int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
+    int err;

    /* If we know the type exactly, skip the lookup of __format__ and just
       call the formatter directly. */
    if (PyUnicode_CheckExact(fieldobj))
-        formatter = _PyUnicode_FormatAdvanced;
+        formatter = _PyUnicode_FormatAdvancedWriter;
    else if (PyLong_CheckExact(fieldobj))
-        formatter =_PyLong_FormatAdvanced;
+        formatter = _PyLong_FormatAdvancedWriter;
    else if (PyFloat_CheckExact(fieldobj))
-        formatter = _PyFloat_FormatAdvanced;
-
-    /* XXX: for 2.6, convert format_spec to the appropriate type
-       (unicode, str) */
+        formatter = _PyFloat_FormatAdvancedWriter;
+    else if (PyComplex_CheckExact(fieldobj))
+        formatter = _PyComplex_FormatAdvancedWriter;

    if (formatter) {
        /* we know exactly which formatter will be called when __format__ is
           looked up, so call it directly, instead. */
-        result = formatter(fieldobj, format_spec->str,
+        err = formatter(writer, fieldobj, format_spec->str,
                        format_spec->start, format_spec->end);
+        return (err == 0);
    }
    else {
        /* We need to create an object out of the pointers we have, because
@ -536,17 +536,11 @@ render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *write
    }
    if (result == NULL)
        goto done;
-    if (PyUnicode_READY(result) == -1)
-        goto done;

-    len = PyUnicode_GET_LENGTH(result);
-    if (_PyUnicodeWriter_Prepare(writer,
-                               len, PyUnicode_MAX_CHAR_VALUE(result)) == -1)
+    if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
        goto done;
-    copy_characters(writer->buffer, writer->pos,
-                    result, 0, len);
-    writer->pos += len;
    ok = 1;
+
 done:
    Py_XDECREF(format_spec_object);
    Py_XDECREF(result);
@ -897,17 +891,20 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
            err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar);
            if (err == -1)
                return 0;
-            copy_characters(writer->buffer, writer->pos,
+            _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
                                          literal.str, literal.start, sublen);
            writer->pos += sublen;
        }

-        if (field_present)
+        if (field_present) {
+            if (iter.str.start == iter.str.end)
+                writer->flags.overallocate = 0;
            if (!output_markup(&field_name, &format_spec,
                               format_spec_needs_expanding, conversion, writer,
                               args, kwargs, recursion_depth, auto_number))
                return 0;
        }
+    }
    return result;
 }

@ -921,7 +918,7 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
             int recursion_depth, AutoNumber *auto_number)
 {
    _PyUnicodeWriter writer;
-    Py_ssize_t initlen;
+    Py_ssize_t minlen;

    /* check the recursion level */
    if (recursion_depth <= 0) {
@ -930,9 +927,8 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
        return NULL;
    }

-    initlen = PyUnicode_GET_LENGTH(input->str) + 100;
-    if (_PyUnicodeWriter_Init(&writer, initlen, 127) == -1)
-        return NULL;
+    minlen = PyUnicode_GET_LENGTH(input->str) + 100;
+    _PyUnicodeWriter_Init(&writer, minlen);

    if (!do_markup(input, args, kwargs, &writer, recursion_depth,
                   auto_number)) {
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -225,15 +225,9 @@ const unsigned char _Py_ascii_whitespace[] = {
 /* forward */
 static PyUnicodeObject *_PyUnicode_New(Py_ssize_t length);
 static PyObject* get_latin1_char(unsigned char ch);
-static void copy_characters(
-    PyObject *to, Py_ssize_t to_start,
-    PyObject *from, Py_ssize_t from_start,
-    Py_ssize_t how_many);
 static int unicode_modifiable(PyObject *unicode);


-static PyObject *
-unicode_fromascii(const unsigned char *s, Py_ssize_t size);
 static PyObject *
 _PyUnicode_FromUCS1(const unsigned char *s, Py_ssize_t size);
 static PyObject *
@ -783,7 +777,7 @@ resize_copy(PyObject *unicode, Py_ssize_t length)
            return NULL;

        copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode));
-        copy_characters(copy, 0, unicode, 0, copy_length);
+        _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, copy_length);
        return copy;
    }
    else {
@ -1154,15 +1148,16 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
    assert(0 <= from_start);
    assert(0 <= to_start);
    assert(PyUnicode_Check(from));
-    assert(PyUnicode_Check(to));
    assert(PyUnicode_IS_READY(from));
-    assert(PyUnicode_IS_READY(to));
    assert(from_start + how_many <= PyUnicode_GET_LENGTH(from));
-    assert(to_start + how_many <= PyUnicode_GET_LENGTH(to));

    if (how_many == 0)
        return 0;

+    assert(PyUnicode_Check(to));
+    assert(PyUnicode_IS_READY(to));
+    assert(to_start + how_many <= PyUnicode_GET_LENGTH(to));
+
    from_kind = PyUnicode_KIND(from);
    from_data = PyUnicode_DATA(from);
    to_kind = PyUnicode_KIND(to);
@ -1267,10 +1262,10 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
    return 0;
 }

-static void
-copy_characters(PyObject *to, Py_ssize_t to_start,
-                       PyObject *from, Py_ssize_t from_start,
-                       Py_ssize_t how_many)
+void
+_PyUnicode_FastCopyCharacters(
+    PyObject *to, Py_ssize_t to_start,
+    PyObject *from, Py_ssize_t from_start, Py_ssize_t how_many)
 {
    (void)_copy_characters(to, to_start, from, from_start, how_many, 0);
 }
@ -1292,6 +1287,14 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
    if (PyUnicode_READY(to) == -1)
        return -1;

+    if (from_start < 0) {
+        PyErr_SetString(PyExc_IndexError, "string index out of range");
+        return -1;
+    }
+    if (to_start < 0) {
+        PyErr_SetString(PyExc_IndexError, "string index out of range");
+        return -1;
+    }
    how_many = Py_MIN(PyUnicode_GET_LENGTH(from), how_many);
    if (to_start + how_many > PyUnicode_GET_LENGTH(to)) {
        PyErr_Format(PyExc_SystemError,
@ -1641,7 +1644,7 @@ unicode_widen(PyObject **p_unicode, Py_ssize_t length,
                           maxchar);
    if (result == NULL)
        return -1;
-    PyUnicode_CopyCharacters(result, 0, *p_unicode, 0, length);
+    _PyUnicode_FastCopyCharacters(result, 0, *p_unicode, 0, length);
    Py_DECREF(*p_unicode);
    *p_unicode = result;
    return 0;
@ -1841,9 +1844,10 @@ _PyUnicode_ClearStaticStrings()

 /* Internal function, doesn't check maximum character */

-static PyObject*
-unicode_fromascii(const unsigned char* s, Py_ssize_t size)
+PyObject*
+_PyUnicode_FromASCII(const char *buffer, Py_ssize_t size)
 {
+    const unsigned char *s = (const unsigned char *)buffer;
    PyObject *unicode;
    if (size == 1) {
 #ifdef Py_DEBUG
@ -2085,7 +2089,7 @@ unicode_adjust_maxchar(PyObject **p_unicode)
            return;
    }
    copy = PyUnicode_New(len, max_char);
-    copy_characters(copy, 0, unicode, 0, len);
+    _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, len);
    Py_DECREF(unicode);
    *p_unicode = copy;
 }
@ -2753,7 +2757,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
                (void) va_arg(vargs, char *);
                size = PyUnicode_GET_LENGTH(*callresult);
                assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
-                copy_characters(string, i, *callresult, 0, size);
+                _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size);
                i += size;
                /* We're done with the unicode()/repr() => forget it */
                Py_DECREF(*callresult);
@ -2767,7 +2771,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
                Py_ssize_t size;
                assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
                size = PyUnicode_GET_LENGTH(obj);
-                copy_characters(string, i, obj, 0, size);
+                _PyUnicode_FastCopyCharacters(string, i, obj, 0, size);
                i += size;
                break;
            }
@ -2779,13 +2783,13 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
                if (obj) {
                    size = PyUnicode_GET_LENGTH(obj);
                    assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
-                    copy_characters(string, i, obj, 0, size);
+                    _PyUnicode_FastCopyCharacters(string, i, obj, 0, size);
                    i += size;
                } else {
                    size = PyUnicode_GET_LENGTH(*callresult);
                    assert(PyUnicode_KIND(*callresult) <=
                           PyUnicode_KIND(string));
-                    copy_characters(string, i, *callresult, 0, size);
+                    _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size);
                    i += size;
                    Py_DECREF(*callresult);
                }
@ -2800,7 +2804,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
                /* unused, since we already have the result */
                (void) va_arg(vargs, PyObject *);
                assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
-                copy_characters(string, i, *callresult, 0,  size);
+                _PyUnicode_FastCopyCharacters(string, i, *callresult, 0,  size);
                i += size;
                /* We're done with the unicode()/repr() => forget it */
                Py_DECREF(*callresult);
@ -4171,7 +4175,7 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
        if (unicode_widen(output, *outpos,
                          PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0)
            goto onError;
-        copy_characters(*output, *outpos, repunicode, 0, replen);
+        _PyUnicode_FastCopyCharacters(*output, *outpos, repunicode, 0, replen);
        *outpos += replen;
    }
    else {
@ -9216,12 +9220,14 @@ fixup(PyObject *self,
        /* If the maxchar increased so that the kind changed, not all
           characters are representable anymore and we need to fix the
           string again. This only happens in very few cases. */
-        copy_characters(v, 0, self, 0, PyUnicode_GET_LENGTH(self));
+        _PyUnicode_FastCopyCharacters(v, 0,
+                                      self, 0, PyUnicode_GET_LENGTH(self));
        maxchar_old = fixfct(v);
        assert(maxchar_old > 0 && maxchar_old <= maxchar_new);
    }
    else {
-        copy_characters(v, 0, u, 0, PyUnicode_GET_LENGTH(self));
+        _PyUnicode_FastCopyCharacters(v, 0,
+                                      u, 0, PyUnicode_GET_LENGTH(self));
    }
    Py_DECREF(u);
    assert(_PyUnicode_CheckConsistency(v, 1));
@ -9603,7 +9609,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
                res_data += kind * seplen;
            }
            else {
-                copy_characters(res, res_offset, sep, 0, seplen);
+                _PyUnicode_FastCopyCharacters(res, res_offset, sep, 0, seplen);
                res_offset += seplen;
            }
        }
@ -9616,7 +9622,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
                res_data += kind * itemlen;
            }
            else {
-                copy_characters(res, res_offset, item, 0, itemlen);
+                _PyUnicode_FastCopyCharacters(res, res_offset, item, 0, itemlen);
                res_offset += itemlen;
            }
        }
@ -9663,13 +9669,25 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
        } \
    } while (0)

+void
+_PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
+                    Py_UCS4 fill_char)
+{
+    const enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
+    const void *data = PyUnicode_DATA(unicode);
+    assert(PyUnicode_IS_READY(unicode));
+    assert(unicode_modifiable(unicode));
+    assert(fill_char <= PyUnicode_MAX_CHAR_VALUE(unicode));
+    assert(start >= 0);
+    assert(start + length <= PyUnicode_GET_LENGTH(unicode));
+    FILL(kind, data, fill_char, start, length);
+}
+
 Py_ssize_t
 PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
               Py_UCS4 fill_char)
 {
    Py_ssize_t maxlen;
-    enum PyUnicode_Kind kind;
-    void *data;

    if (!PyUnicode_Check(unicode)) {
        PyErr_BadInternalCall();
@ -9680,6 +9698,10 @@ PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
    if (unicode_check_modifiable(unicode))
        return -1;

+    if (start < 0) {
+        PyErr_SetString(PyExc_IndexError, "string index out of range");
+        return -1;
+    }
    if (fill_char > PyUnicode_MAX_CHAR_VALUE(unicode)) {
        PyErr_SetString(PyExc_ValueError,
                         "fill character is bigger than "
@ -9692,9 +9714,7 @@ PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
    if (length <= 0)
        return 0;

-    kind = PyUnicode_KIND(unicode);
-    data = PyUnicode_DATA(unicode);
-    FILL(kind, data, fill_char, start, length);
+    _PyUnicode_FastFill(unicode, start, length, fill_char);
    return length;
 }

@ -9734,7 +9754,7 @@ pad(PyObject *self,
        FILL(kind, data, fill, 0, left);
    if (right)
        FILL(kind, data, fill, left + _PyUnicode_LENGTH(self), right);
-    copy_characters(u, left, self, 0, _PyUnicode_LENGTH(self));
+    _PyUnicode_FastCopyCharacters(u, left, self, 0, _PyUnicode_LENGTH(self));
    assert(_PyUnicode_CheckConsistency(u, 1));
    return u;
 }
@ -10058,7 +10078,7 @@ replace(PyObject *self, PyObject *str1,
            u = PyUnicode_New(slen, maxchar);
            if (!u)
                goto error;
-            copy_characters(u, 0, self, 0, slen);
+            _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen);
            rkind = PyUnicode_KIND(u);

            PyUnicode_WRITE(rkind, PyUnicode_DATA(u), pos, u2);
@ -10626,8 +10646,8 @@ PyUnicode_Concat(PyObject *left, PyObject *right)
    w = PyUnicode_New(new_len, maxchar);
    if (w == NULL)
        goto onError;
-    copy_characters(w, 0, u, 0, u_len);
-    copy_characters(w, u_len, v, 0, v_len);
+    _PyUnicode_FastCopyCharacters(w, 0, u, 0, u_len);
+    _PyUnicode_FastCopyCharacters(w, u_len, v, 0, v_len);
    Py_DECREF(u);
    Py_DECREF(v);
    assert(_PyUnicode_CheckConsistency(w, 1));
@ -10702,7 +10722,7 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
            goto error;
        }
        /* copy 'right' into the newly allocated area of 'left' */
-        copy_characters(*p_left, left_len, right, 0, right_len);
+        _PyUnicode_FastCopyCharacters(*p_left, left_len, right, 0, right_len);
    }
    else {
        maxchar = PyUnicode_MAX_CHAR_VALUE(left);
@ -10713,8 +10733,8 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
        res = PyUnicode_New(new_len, maxchar);
        if (res == NULL)
            goto error;
-        copy_characters(res, 0, left, 0, left_len);
-        copy_characters(res, left_len, right, 0, right_len);
+        _PyUnicode_FastCopyCharacters(res, 0, left, 0, left_len);
+        _PyUnicode_FastCopyCharacters(res, left_len, right, 0, right_len);
        Py_DECREF(left);
        *p_left = res;
    }
@ -11650,7 +11670,7 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
    length = end - start;
    if (PyUnicode_IS_ASCII(self)) {
        data = PyUnicode_1BYTE_DATA(self);
-        return unicode_fromascii(data + start, length);
+        return _PyUnicode_FromASCII((char*)(data + start), length);
    }
    else {
        kind = PyUnicode_KIND(self);
@ -12769,60 +12789,74 @@ unicode_endswith(PyObject *self,
    return PyBool_FromLong(result);
 }

-typedef struct {
-    PyObject *buffer;
-    void *data;
-    enum PyUnicode_Kind kind;
-    Py_UCS4 maxchar;
-    Py_ssize_t pos;
-} _PyUnicodeWriter ;
-
 Py_LOCAL_INLINE(void)
 _PyUnicodeWriter_Update(_PyUnicodeWriter *writer)
 {
+    writer->size = PyUnicode_GET_LENGTH(writer->buffer);
    writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer);
    writer->data = PyUnicode_DATA(writer->buffer);
    writer->kind = PyUnicode_KIND(writer->buffer);
 }

-Py_LOCAL(int)
-_PyUnicodeWriter_Init(_PyUnicodeWriter *writer,
-                    Py_ssize_t length, Py_UCS4 maxchar)
+void
+_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length)
 {
-    writer->pos = 0;
-    writer->buffer = PyUnicode_New(length, maxchar);
-    if (writer->buffer == NULL)
-        return -1;
-    _PyUnicodeWriter_Update(writer);
-    return 0;
+    memset(writer, 0, sizeof(*writer));
+#ifdef Py_DEBUG
+    writer->kind = 5;    /* invalid kind */
+#endif
+    writer->min_length = Py_MAX(min_length, 100);
+    writer->flags.overallocate = (min_length > 0);
 }

-Py_LOCAL_INLINE(int)
-_PyUnicodeWriter_Prepare(_PyUnicodeWriter *writer,
+int
+_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
                                 Py_ssize_t length, Py_UCS4 maxchar)
 {
    Py_ssize_t newlen;
    PyObject *newbuffer;

+    assert(length > 0);
+
    if (length > PY_SSIZE_T_MAX - writer->pos) {
        PyErr_NoMemory();
        return -1;
    }
    newlen = writer->pos + length;

-    if (newlen > PyUnicode_GET_LENGTH(writer->buffer)) {
+    if (writer->buffer == NULL) {
+        if (writer->flags.overallocate) {
            /* overallocate 25% to limit the number of resize */
            if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
                newlen += newlen / 4;
+            if (newlen < writer->min_length)
+                newlen = writer->min_length;
+        }
+        writer->buffer = PyUnicode_New(newlen, maxchar);
+        if (writer->buffer == NULL)
+            return -1;
+        _PyUnicodeWriter_Update(writer);
+        return 0;
+    }

-        if (maxchar > writer->maxchar) {
+    if (newlen > writer->size) {
+        if (writer->flags.overallocate) {
+            /* overallocate 25% to limit the number of resize */
+            if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
+                newlen += newlen / 4;
+            if (newlen < writer->min_length)
+                newlen = writer->min_length;
+        }
+
+        if (maxchar > writer->maxchar || writer->flags.readonly) {
            /* resize + widen */
            newbuffer = PyUnicode_New(newlen, maxchar);
            if (newbuffer == NULL)
                return -1;
-            PyUnicode_CopyCharacters(newbuffer, 0,
+            _PyUnicode_FastCopyCharacters(newbuffer, 0,
                                          writer->buffer, 0, writer->pos);
            Py_DECREF(writer->buffer);
+            writer->flags.readonly = 0;
        }
        else {
            newbuffer = resize_compact(writer->buffer, newlen);
@ -12833,25 +12867,76 @@ _PyUnicodeWriter_Prepare(_PyUnicodeWriter *writer,
        _PyUnicodeWriter_Update(writer);
    }
    else if (maxchar > writer->maxchar) {
-        if (unicode_widen(&writer->buffer, writer->pos, maxchar) < 0)
+        assert(!writer->flags.readonly);
+        newbuffer = PyUnicode_New(writer->size, maxchar);
+        if (newbuffer == NULL)
            return -1;
+        _PyUnicode_FastCopyCharacters(newbuffer, 0,
+                                      writer->buffer, 0, writer->pos);
+        Py_DECREF(writer->buffer);
+        writer->buffer = newbuffer;
        _PyUnicodeWriter_Update(writer);
    }
    return 0;
 }

-Py_LOCAL(PyObject *)
+int
+_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
+{
+    Py_UCS4 maxchar;
+    Py_ssize_t len;
+
+    if (PyUnicode_READY(str) == -1)
+        return -1;
+    len = PyUnicode_GET_LENGTH(str);
+    if (len == 0)
+        return 0;
+    maxchar = PyUnicode_MAX_CHAR_VALUE(str);
+    if (maxchar > writer->maxchar || len > writer->size - writer->pos) {
+        if (writer->buffer == NULL && !writer->flags.overallocate) {
+            Py_INCREF(str);
+            writer->buffer = str;
+            _PyUnicodeWriter_Update(writer);
+            writer->flags.readonly = 1;
+            writer->size = 0;
+            writer->pos += len;
+            return 0;
+        }
+        if (_PyUnicodeWriter_PrepareInternal(writer, len, maxchar) == -1)
+            return -1;
+    }
+    _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+                                  str, 0, len);
+    writer->pos += len;
+    return 0;
+}
+
+PyObject *
 _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
 {
-    if (PyUnicode_Resize(&writer->buffer, writer->pos) < 0) {
+    if (writer->pos == 0) {
+        Py_XDECREF(writer->buffer);
+        Py_INCREF(unicode_empty);
+        return unicode_empty;
+    }
+    if (writer->flags.readonly) {
+        assert(PyUnicode_GET_LENGTH(writer->buffer) == writer->pos);
+        return writer->buffer;
+    }
+    if (PyUnicode_GET_LENGTH(writer->buffer) != writer->pos) {
+        PyObject *newbuffer;
+        newbuffer = resize_compact(writer->buffer, writer->pos);
+        if (newbuffer == NULL) {
            Py_DECREF(writer->buffer);
            return NULL;
        }
+        writer->buffer = newbuffer;
+    }
    assert(_PyUnicode_CheckConsistency(writer->buffer, 1));
    return writer->buffer;
 }

-Py_LOCAL(void)
+void
 _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer)
 {
    Py_CLEAR(writer->buffer);
@ -12874,14 +12959,24 @@ The substitutions are identified by braces ('{' and '}').");
 static PyObject *
 unicode__format__(PyObject* self, PyObject* args)
 {
-    PyObject *format_spec, *out;
+    PyObject *format_spec;
+    _PyUnicodeWriter writer;
+    int ret;

    if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
        return NULL;

-    out = _PyUnicode_FormatAdvanced(self, format_spec, 0,
+    if (PyUnicode_READY(self) == -1)
+        return NULL;
+    _PyUnicodeWriter_Init(&writer, 0);
+    ret = _PyUnicode_FormatAdvancedWriter(&writer,
+                                          self, format_spec, 0,
                                          PyUnicode_GET_LENGTH(format_spec));
-    return out;
+    if (ret == -1) {
+        _PyUnicodeWriter_Dealloc(&writer);
+        return NULL;
+    }
+    return _PyUnicodeWriter_Finish(&writer);
 }

 PyDoc_STRVAR(p_format__doc__,
@ -13111,16 +13206,17 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)

 /* Returns a new reference to a PyUnicode object, or NULL on failure. */

-static PyObject *
-formatfloat(PyObject *v, int flags, int prec, int type)
+static int
+formatfloat(PyObject *v, int flags, int prec, int type,
+            PyObject **p_output, _PyUnicodeWriter *writer)
 {
    char *p;
-    PyObject *result;
    double x;
+    Py_ssize_t len;

    x = PyFloat_AsDouble(v);
    if (x == -1.0 && PyErr_Occurred())
-        return NULL;
+        return -1;

    if (prec < 0)
        prec = 6;
@ -13128,10 +13224,20 @@ formatfloat(PyObject *v, int flags, int prec, int type)
    p = PyOS_double_to_string(x, type, prec,
                              (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
    if (p == NULL)
-        return NULL;
-    result = unicode_fromascii((unsigned char*)p, strlen(p));
+        return -1;
+    len = strlen(p);
+    if (writer) {
+        if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
+            return -1;
+        memcpy(writer->data + writer->pos * writer->kind,
+               p,
+               len);
+        writer->pos += len;
+    }
+    else
+        *p_output = _PyUnicode_FromASCII(p, len);
    PyMem_Free(p);
-    return result;
+    return 0;
 }

 /* formatlong() emulates the format codes d, u, o, x and X, and
@ -13267,7 +13373,7 @@ formatlong(PyObject *val, int flags, int prec, int type)
    }
    if (!PyUnicode_Check(result) || len != PyUnicode_GET_LENGTH(result)) {
        PyObject *unicode;
-        unicode = unicode_fromascii((unsigned char *)buf, len);
+        unicode = _PyUnicode_FromASCII(buf, len);
        Py_DECREF(result);
        result = unicode;
    }
@ -13336,8 +13442,7 @@ PyUnicode_Format(PyObject *format, PyObject *args)
    fmtcnt = PyUnicode_GET_LENGTH(uformat);
    fmtpos = 0;

-    if (_PyUnicodeWriter_Init(&writer, fmtcnt + 100, 127) < 0)
-        goto onError;
+    _PyUnicodeWriter_Init(&writer, fmtcnt + 100);

    if (PyTuple_Check(args)) {
        arglen = PyTuple_Size(args);
@ -13368,7 +13473,7 @@ PyUnicode_Format(PyObject *format, PyObject *args)
            if (_PyUnicodeWriter_Prepare(&writer, sublen, maxchar) == -1)
                goto onError;

-            copy_characters(writer.buffer, writer.pos,
+            _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos,
                                          uformat, nonfmtpos, sublen);
            writer.pos += sublen;
        }
@ -13530,6 +13635,8 @@ PyUnicode_Format(PyObject *format, PyObject *args)
                                "incomplete format");
                goto onError;
            }
+            if (fmtcnt == 0)
+                writer.flags.overallocate = 0;

            if (c == '%') {
                if (_PyUnicodeWriter_Prepare(&writer, 1, '%') == -1)
@ -13539,7 +13646,6 @@ PyUnicode_Format(PyObject *format, PyObject *args)
                continue;
            }

-
            v = getnextarg(args, arglen, &argidx);
            if (v == NULL)
                goto onError;
@ -13552,6 +13658,13 @@ PyUnicode_Format(PyObject *format, PyObject *args)
            case 's':
            case 'r':
            case 'a':
+                if (PyLong_CheckExact(v) && width == -1 && prec == -1) {
+                    /* Fast path */
+                    if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1)
+                        goto onError;
+                    goto nextarg;
+                }
+
                if (PyUnicode_CheckExact(v) && c == 's') {
                    temp = v;
                    Py_INCREF(temp);
@ -13572,6 +13685,32 @@ PyUnicode_Format(PyObject *format, PyObject *args)
            case 'o':
            case 'x':
            case 'X':
+                if (PyLong_CheckExact(v)
+                    && width == -1 && prec == -1
+                    && !(flags & (F_SIGN | F_BLANK)))
+                {
+                    /* Fast path */
+                    switch(c)
+                    {
+                    case 'd':
+                    case 'i':
+                    case 'u':
+                        if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1)
+                            goto onError;
+                        goto nextarg;
+                    case 'x':
+                        if (_PyLong_FormatWriter(&writer, v, 16, flags & F_ALT) == -1)
+                            goto onError;
+                        goto nextarg;
+                    case 'o':
+                        if (_PyLong_FormatWriter(&writer, v, 8, flags & F_ALT) == -1)
+                            goto onError;
+                        goto nextarg;
+                    default:
+                        break;
+                    }
+                }
+
                isnumok = 0;
                if (PyNumber_Check(v)) {
                    PyObject *iobj=NULL;
@ -13611,10 +13750,20 @@ PyUnicode_Format(PyObject *format, PyObject *args)
            case 'F':
            case 'g':
            case 'G':
+                if (width == -1 && prec == -1
+                    && !(flags & (F_SIGN | F_BLANK)))
+                {
+                    /* Fast path */
+                    if (formatfloat(v, flags, prec, c, NULL, &writer) == -1)
+                        goto onError;
+                    goto nextarg;
+                }
+
                sign = 1;
                if (flags & F_ZERO)
                    fill = '0';
-                temp = formatfloat(v, flags, prec, c);
+                if (formatfloat(v, flags, prec, c, &temp, NULL) == -1)
+                    temp = NULL;
                break;

            case 'c':
@ -13622,6 +13771,14 @@ PyUnicode_Format(PyObject *format, PyObject *args)
                Py_UCS4 ch = formatchar(v);
                if (ch == (Py_UCS4) -1)
                    goto onError;
+                if (width == -1 && prec == -1) {
+                    /* Fast path */
+                    if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1)
+                        goto onError;
+                    PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch);
+                    writer.pos += 1;
+                    goto nextarg;
+                }
                temp = PyUnicode_FromOrdinal(ch);
                break;
            }
@ -13638,6 +13795,16 @@ PyUnicode_Format(PyObject *format, PyObject *args)
            if (temp == NULL)
                goto onError;
            assert (PyUnicode_Check(temp));
+
+            if (width == -1 && prec == -1
+                && !(flags & (F_SIGN | F_BLANK)))
+            {
+                /* Fast path */
+                if (_PyUnicodeWriter_WriteStr(&writer, temp) == -1)
+                    goto onError;
+                goto nextarg;
+            }
+
            if (PyUnicode_READY(temp) == -1) {
                Py_CLEAR(temp);
                goto onError;
@ -13676,15 +13843,15 @@ PyUnicode_Format(PyObject *format, PyObject *args)
            if (!(flags & F_LJUST)) {
                if (sign) {
                    if ((width-1) > len)
-                        bufmaxchar = Py_MAX(bufmaxchar, fill);
+                        bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill);
                }
                else {
                    if (width > len)
-                        bufmaxchar = Py_MAX(bufmaxchar, fill);
+                        bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill);
                }
            }
            maxchar = _PyUnicode_FindMaxChar(temp, 0, pindex+len);
-            bufmaxchar = Py_MAX(bufmaxchar, maxchar);
+            bufmaxchar = MAX_MAXCHAR(bufmaxchar, maxchar);

            buflen = width;
            if (sign && len == width)
@ -13737,7 +13904,7 @@ PyUnicode_Format(PyObject *format, PyObject *args)
                }
            }

-            copy_characters(writer.buffer, writer.pos,
+            _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos,
                                          temp, pindex, len);
            writer.pos += len;
            if (width > len) {
@ -13746,6 +13913,7 @@ PyUnicode_Format(PyObject *format, PyObject *args)
                writer.pos += sublen;
            }

+nextarg:
            if (dict && (argidx < arglen) && c != '%') {
                PyErr_SetString(PyExc_TypeError,
                                "not all arguments converted during string formatting");
--- a/Python/formatter_unicode.c
+++ b/Python/formatter_unicode.c
@ -316,21 +316,28 @@ calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
 /* Do the padding, and return a pointer to where the caller-supplied
   content goes. */
 static Py_ssize_t
-fill_padding(PyObject *s, Py_ssize_t start, Py_ssize_t nchars,
+fill_padding(_PyUnicodeWriter *writer,
+             Py_ssize_t nchars,
             Py_UCS4 fill_char, Py_ssize_t n_lpadding,
             Py_ssize_t n_rpadding)
 {
+    Py_ssize_t pos;
+
    /* Pad on left. */
-    if (n_lpadding)
-        PyUnicode_Fill(s, start, start + n_lpadding, fill_char);
+    if (n_lpadding) {
+        pos = writer->pos;
+        _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
+    }

    /* Pad on right. */
-    if (n_rpadding)
-        PyUnicode_Fill(s, start + nchars + n_lpadding,
-                       start + nchars + n_lpadding + n_rpadding, fill_char);
+    if (n_rpadding) {
+        pos = writer->pos + nchars + n_lpadding;
+        _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
+    }

    /* Pointer to the user content. */
-    return start + n_lpadding;
+    writer->pos += n_lpadding;
+    return 0;
 }

 /************************************************************************/
@ -541,7 +548,7 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
   as determined in calc_number_widths().
   Return -1 on error, or 0 on success. */
 static int
-fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec,
+fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
            PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
            PyObject *prefix, Py_ssize_t p_start,
            Py_UCS4 fill_char,
@ -549,36 +556,38 @@ fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec,
 {
    /* Used to keep track of digits, decimal, and remainder. */
    Py_ssize_t d_pos = d_start;
-    unsigned int kind = PyUnicode_KIND(out);
-    void *data = PyUnicode_DATA(out);
+    const enum PyUnicode_Kind kind = writer->kind;
+    const void *data = writer->data;
    Py_ssize_t r;

    if (spec->n_lpadding) {
-        PyUnicode_Fill(out, pos, pos + spec->n_lpadding, fill_char);
-        pos += spec->n_lpadding;
+        _PyUnicode_FastFill(writer->buffer,
+                            writer->pos, spec->n_lpadding, fill_char);
+        writer->pos += spec->n_lpadding;
    }
    if (spec->n_sign == 1) {
-        PyUnicode_WRITE(kind, data, pos++, spec->sign);
+        PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
+        writer->pos++;
    }
    if (spec->n_prefix) {
-        if (PyUnicode_CopyCharacters(out, pos,
+        _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
                                      prefix, p_start,
-                                     spec->n_prefix) < 0)
-            return -1;
+                                      spec->n_prefix);
        if (toupper) {
            Py_ssize_t t;
            for (t = 0; t < spec->n_prefix; t++) {
-                Py_UCS4 c = PyUnicode_READ(kind, data, pos + t);
+                Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
                c = Py_TOUPPER(c);
                assert (c <= 127);
-                PyUnicode_WRITE(kind, data, pos + t, c);
+                PyUnicode_WRITE(kind, data, writer->pos + t, c);
            }
        }
-        pos += spec->n_prefix;
+        writer->pos += spec->n_prefix;
    }
    if (spec->n_spadding) {
-        PyUnicode_Fill(out, pos, pos + spec->n_spadding, fill_char);
-        pos += spec->n_spadding;
+        _PyUnicode_FastFill(writer->buffer,
+                            writer->pos, spec->n_spadding, fill_char);
+        writer->pos += spec->n_spadding;
    }

    /* Only for type 'c' special case, it has no digits. */
@ -594,7 +603,7 @@ fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec,
                return -1;
        }
        r = _PyUnicode_InsertThousandsGrouping(
-                out, pos,
+                writer->buffer, writer->pos,
                spec->n_grouped_digits,
                pdigits + kind * d_pos,
                spec->n_digits, spec->n_min_width,
@ -609,34 +618,38 @@ fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec,
    if (toupper) {
        Py_ssize_t t;
        for (t = 0; t < spec->n_grouped_digits; t++) {
-            Py_UCS4 c = PyUnicode_READ(kind, data, pos + t);
+            Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
            c = Py_TOUPPER(c);
            if (c > 127) {
                PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
                return -1;
            }
-            PyUnicode_WRITE(kind, data, pos + t, c);
+            PyUnicode_WRITE(kind, data, writer->pos + t, c);
        }
    }
-    pos += spec->n_grouped_digits;
+    writer->pos += spec->n_grouped_digits;

    if (spec->n_decimal) {
-        if (PyUnicode_CopyCharacters(out, pos, locale->decimal_point, 0, spec->n_decimal) < 0)
-            return -1;
-        pos += spec->n_decimal;
+        _PyUnicode_FastCopyCharacters(
+            writer->buffer, writer->pos,
+            locale->decimal_point, 0, spec->n_decimal);
+        writer->pos += spec->n_decimal;
        d_pos += 1;
    }

    if (spec->n_remainder) {
-        if (PyUnicode_CopyCharacters(out, pos, digits, d_pos, spec->n_remainder) < 0)
-            return -1;
-        pos += spec->n_remainder;
+        _PyUnicode_FastCopyCharacters(
+            writer->buffer, writer->pos,
+            digits, d_pos, spec->n_remainder);
+        writer->pos += spec->n_remainder;
        d_pos += spec->n_remainder;
    }

    if (spec->n_rpadding) {
-        PyUnicode_Fill(out, pos, pos + spec->n_rpadding, fill_char);
-        pos += spec->n_rpadding;
+        _PyUnicode_FastFill(writer->buffer,
+                            writer->pos, spec->n_rpadding,
+                            fill_char);
+        writer->pos += spec->n_rpadding;
    }
    return 0;
 }
@ -707,17 +720,20 @@ free_locale_info(LocaleInfo *locale_info)
 /*********** string formatting ******************************************/
 /************************************************************************/

-static PyObject *
-format_string_internal(PyObject *value, const InternalFormatSpec *format)
+static int
+format_string_internal(PyObject *value, const InternalFormatSpec *format,
+                       _PyUnicodeWriter *writer)
 {
    Py_ssize_t lpad;
    Py_ssize_t rpad;
    Py_ssize_t total;
-    Py_ssize_t pos;
-    Py_ssize_t len = PyUnicode_GET_LENGTH(value);
-    PyObject *result = NULL;
+    Py_ssize_t len;
+    int result = -1;
    Py_UCS4 maxchar;

+    assert(PyUnicode_IS_READY(value));
+    len = PyUnicode_GET_LENGTH(value);
+
    /* sign is not allowed on strings */
    if (format->sign != '\0') {
        PyErr_SetString(PyExc_ValueError,
@ -741,6 +757,11 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format)
        goto done;
    }

+    if (format->width == -1 && format->precision == -1) {
+        /* Fast path */
+        return _PyUnicodeWriter_WriteStr(writer, value);
+    }
+
    /* if precision is specified, output no more that format.precision
       characters */
    if (format->precision >= 0 && len >= format->precision) {
@ -754,21 +775,23 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format)
        maxchar = Py_MAX(maxchar, format->fill_char);

    /* allocate the resulting string */
-    result = PyUnicode_New(total, maxchar);
-    if (result == NULL)
+    if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
        goto done;

    /* Write into that space. First the padding. */
-    pos = fill_padding(result, 0, len,
+    result = fill_padding(writer, len,
                          format->fill_char=='\0'?' ':format->fill_char,
                          lpad, rpad);
+    if (result == -1)
+        goto done;

    /* Then the source string. */
-    if (PyUnicode_CopyCharacters(result, pos, value, 0, len) < 0)
-        Py_CLEAR(result);
+    _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+                                  value, 0, len);
+    writer->pos += (len + rpad);
+    result = 0;

 done:
-    assert(!result || _PyUnicode_CheckConsistency(result, 1));
    return result;
 }

@ -780,11 +803,11 @@ done:
 typedef PyObject*
 (*IntOrLongToString)(PyObject *value, int base);

-static PyObject *
-format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
-                            IntOrLongToString tostring)
+static int
+format_long_internal(PyObject *value, const InternalFormatSpec *format,
+                     _PyUnicodeWriter *writer)
 {
-    PyObject *result = NULL;
+    int result = -1;
    Py_UCS4 maxchar = 127;
    PyObject *tmp = NULL;
    Py_ssize_t inumeric_chars;
@ -798,7 +821,6 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
    Py_ssize_t prefix = 0;
    NumberFieldWidths spec;
    long x;
-    int err;

    /* Locale settings, either from the actual locale or
       from a hard-code pseudo-locale */
@ -872,13 +894,23 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
            break;
        }

+        if (format->sign != '+' && format->sign != ' '
+            && format->width == -1
+            && format->type != 'X' && format->type != 'n'
+            && !format->thousands_separators
+            && PyLong_CheckExact(value))
+        {
+            /* Fast path */
+            return _PyLong_FormatWriter(writer, value, base, format->alternate);
+        }
+
        /* The number of prefix chars is the same as the leading
           chars to skip */
        if (format->alternate)
            n_prefix = leading_chars_to_skip;

        /* Do the hard part, converting to a string in a given base */
-        tmp = tostring(value, base);
+        tmp = _PyLong_Format(value, base);
        if (tmp == NULL || PyUnicode_READY(tmp) == -1)
            goto done;

@ -914,23 +946,19 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
                                 &locale, format, &maxchar);

    /* Allocate the memory. */
-    result = PyUnicode_New(n_total, maxchar);
-    if (!result)
+    if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
        goto done;

    /* Populate the memory. */
-    err = fill_number(result, 0, &spec,
+    result = fill_number(writer, &spec,
                         tmp, inumeric_chars, inumeric_chars + n_digits,
                         tmp, prefix,
                         format->fill_char == '\0' ? ' ' : format->fill_char,
                         &locale, format->type == 'X');
-    if (err)
-        Py_CLEAR(result);

 done:
    Py_XDECREF(tmp);
    free_locale_info(&locale);
-    assert(!result || _PyUnicode_CheckConsistency(result, 1));
    return result;
 }

@ -938,16 +966,11 @@ done:
 /*********** float formatting *******************************************/
 /************************************************************************/

-static PyObject*
-strtounicode(char *charbuffer, Py_ssize_t len)
-{
-    return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, charbuffer, len);
-}
-
 /* much of this is taken from unicodeobject.c */
-static PyObject *
+static int
 format_float_internal(PyObject *value,
-                      const InternalFormatSpec *format)
+                      const InternalFormatSpec *format,
+                      _PyUnicodeWriter *writer)
 {
    char *buf = NULL;       /* buffer returned from PyOS_double_to_string */
    Py_ssize_t n_digits;
@ -962,12 +985,11 @@ format_float_internal(PyObject *value,
    Py_ssize_t index;
    NumberFieldWidths spec;
    int flags = 0;
-    PyObject *result = NULL;
+    int result = -1;
    Py_UCS4 maxchar = 127;
    Py_UCS4 sign_char = '\0';
    int float_type; /* Used to see if we have a nan, inf, or regular float. */
    PyObject *unicode_tmp = NULL;
-    int err;

    /* Locale settings, either from the actual locale or
       from a hard-code pseudo-locale */
@ -1024,13 +1046,25 @@ format_float_internal(PyObject *value,

    /* Since there is no unicode version of PyOS_double_to_string,
       just use the 8 bit version and then convert to unicode. */
-    unicode_tmp = strtounicode(buf, n_digits);
+    unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
+    PyMem_Free(buf);
    if (unicode_tmp == NULL)
        goto done;
-    index = 0;
+
+    if (format->sign != '+' && format->sign != ' '
+        && format->width == -1
+        && format->type != 'n'
+        && !format->thousands_separators)
+    {
+        /* Fast path */
+        result = _PyUnicodeWriter_WriteStr(writer, unicode_tmp);
+        Py_DECREF(unicode_tmp);
+        return result;
+    }

    /* Is a sign character present in the output?  If so, remember it
       and skip it */
+    index = 0;
    if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
        sign_char = '-';
        ++index;
@ -1055,24 +1089,19 @@ format_float_internal(PyObject *value,
                                 &locale, format, &maxchar);

    /* Allocate the memory. */
-    result = PyUnicode_New(n_total, maxchar);
-    if (result == NULL)
+    if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
        goto done;

    /* Populate the memory. */
-    err = fill_number(result, 0, &spec,
+    result = fill_number(writer, &spec,
                         unicode_tmp, index, index + n_digits,
                         NULL, 0,
                         format->fill_char == '\0' ? ' ' : format->fill_char,
                         &locale, 0);
-    if (err)
-        Py_CLEAR(result);

 done:
-    PyMem_Free(buf);
    Py_DECREF(unicode_tmp);
    free_locale_info(&locale);
-    assert(!result || _PyUnicode_CheckConsistency(result, 1));
    return result;
 }

@ -1080,9 +1109,10 @@ done:
 /*********** complex formatting *****************************************/
 /************************************************************************/

-static PyObject *
+static int
 format_complex_internal(PyObject *value,
-                        const InternalFormatSpec *format)
+                        const InternalFormatSpec *format,
+                        _PyUnicodeWriter *writer)
 {
    double re;
    double im;
@ -1106,11 +1136,10 @@ format_complex_internal(PyObject *value,
    NumberFieldWidths re_spec;
    NumberFieldWidths im_spec;
    int flags = 0;
-    PyObject *result = NULL;
+    int result = -1;
    Py_UCS4 maxchar = 127;
-    int rkind;
+    enum PyUnicode_Kind rkind;
    void *rdata;
-    Py_ssize_t index;
    Py_UCS4 re_sign_char = '\0';
    Py_UCS4 im_sign_char = '\0';
    int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
@ -1122,7 +1151,6 @@ format_complex_internal(PyObject *value,
    Py_ssize_t total;
    PyObject *re_unicode_tmp = NULL;
    PyObject *im_unicode_tmp = NULL;
-    int err;

    /* Locale settings, either from the actual locale or
       from a hard-code pseudo-locale */
@ -1191,12 +1219,12 @@ format_complex_internal(PyObject *value,

    /* Since there is no unicode version of PyOS_double_to_string,
       just use the 8 bit version and then convert to unicode. */
-    re_unicode_tmp = strtounicode(re_buf, n_re_digits);
+    re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
    if (re_unicode_tmp == NULL)
        goto done;
    i_re = 0;

-    im_unicode_tmp = strtounicode(im_buf, n_im_digits);
+    im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
    if (im_unicode_tmp == NULL)
        goto done;
    i_im = 0;
@ -1261,47 +1289,49 @@ format_complex_internal(PyObject *value,
    if (lpad || rpad)
        maxchar = Py_MAX(maxchar, format->fill_char);

-    result = PyUnicode_New(total, maxchar);
-    if (result == NULL)
+    if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
        goto done;
-    rkind = PyUnicode_KIND(result);
-    rdata = PyUnicode_DATA(result);
+    rkind = writer->kind;
+    rdata = writer->data;

    /* Populate the memory. First, the padding. */
-    index = fill_padding(result, 0,
+    result = fill_padding(writer,
                          n_re_total + n_im_total + 1 + add_parens * 2,
                          format->fill_char=='\0' ? ' ' : format->fill_char,
                          lpad, rpad);
+    if (result == -1)
+        goto done;

-    if (add_parens)
-        PyUnicode_WRITE(rkind, rdata, index++, '(');
+    if (add_parens) {
+        PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
+        writer->pos++;
+    }

    if (!skip_re) {
-        err = fill_number(result, index, &re_spec,
+        result = fill_number(writer, &re_spec,
                             re_unicode_tmp, i_re, i_re + n_re_digits,
                             NULL, 0,
                             0,
                             &locale, 0);
-        if (err) {
-            Py_CLEAR(result);
+        if (result == -1)
            goto done;
    }
-        index += n_re_total;
-    }
-    err = fill_number(result, index, &im_spec,
+    result = fill_number(writer, &im_spec,
                         im_unicode_tmp, i_im, i_im + n_im_digits,
                         NULL, 0,
                         0,
                         &locale, 0);
-    if (err) {
-        Py_CLEAR(result);
+    if (result == -1)
        goto done;
-    }
-    index += n_im_total;
-    PyUnicode_WRITE(rkind, rdata, index++, 'j');
+    PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
+    writer->pos++;

-    if (add_parens)
-        PyUnicode_WRITE(rkind, rdata, index++, ')');
+    if (add_parens) {
+        PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
+        writer->pos++;
+    }
+
+    writer->pos += rpad;

 done:
    PyMem_Free(re_buf);
@ -1309,61 +1339,79 @@ done:
    Py_XDECREF(re_unicode_tmp);
    Py_XDECREF(im_unicode_tmp);
    free_locale_info(&locale);
-    assert(!result || _PyUnicode_CheckConsistency(result, 1));
    return result;
 }

 /************************************************************************/
 /*********** built in formatters ****************************************/
 /************************************************************************/
-PyObject *
-_PyUnicode_FormatAdvanced(PyObject *obj,
+int
+format_obj(PyObject *obj, _PyUnicodeWriter *writer)
+{
+    PyObject *str;
+    int err;
+
+    str = PyObject_Str(obj);
+    if (str == NULL)
+        return -1;
+    err = _PyUnicodeWriter_WriteStr(writer, str);
+    Py_DECREF(str);
+    return err;
+}
+
+int
+_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
+                                PyObject *obj,
                                PyObject *format_spec,
                                Py_ssize_t start, Py_ssize_t end)
 {
    InternalFormatSpec format;
-    PyObject *result;
+
+    assert(PyUnicode_Check(obj));

    /* check for the special case of zero length format spec, make
       it equivalent to str(obj) */
-    if (start == end)
-        return PyObject_Str(obj);
+    if (start == end) {
+        if (PyUnicode_CheckExact(obj))
+            return _PyUnicodeWriter_WriteStr(writer, obj);
+        else
+            return format_obj(obj, writer);
+    }

    /* parse the format_spec */
    if (!parse_internal_render_format_spec(format_spec, start, end,
                                           &format, 's', '<'))
-        return NULL;
+        return -1;

    /* type conversion? */
    switch (format.type) {
    case 's':
        /* no type conversion needed, already a string.  do the formatting */
-        result = format_string_internal(obj, &format);
-        if (result != NULL)
-            assert(_PyUnicode_CheckConsistency(result, 1));
-        break;
+        return format_string_internal(obj, &format, writer);
    default:
        /* unknown */
        unknown_presentation_type(format.type, obj->ob_type->tp_name);
-        result = NULL;
+        return -1;
    }
-    return result;
 }

-static PyObject*
-format_int_or_long(PyObject* obj, PyObject* format_spec,
-                   Py_ssize_t start, Py_ssize_t end,
-                   IntOrLongToString tostring)
+int
+_PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
+                             PyObject *obj,
+                             PyObject *format_spec,
+                             Py_ssize_t start, Py_ssize_t end)
 {
-    PyObject *result = NULL;
-    PyObject *tmp = NULL;
+    PyObject *tmp = NULL, *str = NULL;
    InternalFormatSpec format;
+    int result = -1;

    /* check for the special case of zero length format spec, make
       it equivalent to str(obj) */
    if (start == end) {
-        result = PyObject_Str(obj);
-        goto done;
+        if (PyLong_CheckExact(obj))
+            return _PyLong_FormatWriter(writer, obj, 10, 0);
+        else
+            return format_obj(obj, writer);
    }

    /* parse the format_spec */
@ -1382,7 +1430,7 @@ format_int_or_long(PyObject* obj, PyObject* format_spec,
    case 'n':
        /* no type conversion needed, already an int (or long).  do
           the formatting */
-            result = format_int_or_long_internal(obj, &format, tostring);
+        result = format_long_internal(obj, &format, writer);
        break;

    case 'e':
@ -1396,7 +1444,7 @@ format_int_or_long(PyObject* obj, PyObject* format_spec,
        tmp = PyNumber_Float(obj);
        if (tmp == NULL)
            goto done;
-        result = format_float_internal(tmp, &format);
+        result = format_float_internal(tmp, &format, writer);
        break;

    default:
@ -1407,41 +1455,27 @@ format_int_or_long(PyObject* obj, PyObject* format_spec,

 done:
    Py_XDECREF(tmp);
+    Py_XDECREF(str);
    return result;
 }

-/* Need to define long_format as a function that will convert a long
-   to a string.  In 3.0, _PyLong_Format has the correct signature. */
-#define long_format _PyLong_Format
-
-PyObject *
-_PyLong_FormatAdvanced(PyObject *obj,
+int
+_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
+                              PyObject *obj,
                              PyObject *format_spec,
                              Py_ssize_t start, Py_ssize_t end)
 {
-    return format_int_or_long(obj, format_spec, start, end,
-                              long_format);
-}
-
-PyObject *
-_PyFloat_FormatAdvanced(PyObject *obj,
-                        PyObject *format_spec,
-                        Py_ssize_t start, Py_ssize_t end)
-{
-    PyObject *result = NULL;
    InternalFormatSpec format;

    /* check for the special case of zero length format spec, make
       it equivalent to str(obj) */
-    if (start == end) {
-        result = PyObject_Str(obj);
-        goto done;
-    }
+    if (start == end)
+        return format_obj(obj, writer);

    /* parse the format_spec */
    if (!parse_internal_render_format_spec(format_spec, start, end,
                                           &format, '\0', '>'))
-        goto done;
+        return -1;

    /* type conversion? */
    switch (format.type) {
@ -1455,38 +1489,32 @@ _PyFloat_FormatAdvanced(PyObject *obj,
    case 'n':
    case '%':
        /* no conversion, already a float.  do the formatting */
-        result = format_float_internal(obj, &format);
-        break;
+        return format_float_internal(obj, &format, writer);

    default:
        /* unknown */
        unknown_presentation_type(format.type, obj->ob_type->tp_name);
-        goto done;
+        return -1;
    }
-
-done:
-    return result;
 }

-PyObject *
-_PyComplex_FormatAdvanced(PyObject *obj,
+int
+_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
+                                PyObject *obj,
                                PyObject *format_spec,
                                Py_ssize_t start, Py_ssize_t end)
 {
-    PyObject *result = NULL;
    InternalFormatSpec format;

    /* check for the special case of zero length format spec, make
       it equivalent to str(obj) */
-    if (start == end) {
-        result = PyObject_Str(obj);
-        goto done;
-    }
+    if (start == end)
+        return format_obj(obj, writer);

    /* parse the format_spec */
    if (!parse_internal_render_format_spec(format_spec, start, end,
                                           &format, '\0', '>'))
-        goto done;
+        return -1;

    /* type conversion? */
    switch (format.type) {
@ -1499,15 +1527,11 @@ _PyComplex_FormatAdvanced(PyObject *obj,
    case 'G':
    case 'n':
        /* no conversion, already a complex.  do the formatting */
-        result = format_complex_internal(obj, &format);
-        break;
+        return format_complex_internal(obj, &format, writer);

    default:
        /* unknown */
        unknown_presentation_type(format.type, obj->ob_type->tp_name);
-        goto done;
+        return -1;
    }
-
-done:
-    return result;
 }