Merged upstream changes.

This commit is contained in:
Vinay Sajip 2012-05-29 12:53:00 +01:00
commit aa88d32574
17 changed files with 896 additions and 456 deletions

View File

@ -953,7 +953,7 @@ Shared :mod:`ctypes` Objects
It is possible to create shared objects using shared memory which can be It is possible to create shared objects using shared memory which can be
inherited by child processes. inherited by child processes.
.. function:: Value(typecode_or_type, *args[, lock]) .. function:: Value(typecode_or_type, *args, lock=True)
Return a :mod:`ctypes` object allocated from shared memory. By default the Return a :mod:`ctypes` object allocated from shared memory. By default the
return value is actually a synchronized wrapper for the object. return value is actually a synchronized wrapper for the object.
@ -1045,7 +1045,7 @@ processes.
attributes which allow one to use it to store and retrieve strings -- see attributes which allow one to use it to store and retrieve strings -- see
documentation for :mod:`ctypes`. documentation for :mod:`ctypes`.
.. function:: Array(typecode_or_type, size_or_initializer, *args[, lock]) .. function:: Array(typecode_or_type, size_or_initializer, *, lock=True)
The same as :func:`RawArray` except that depending on the value of *lock* a The same as :func:`RawArray` except that depending on the value of *lock* a
process-safe synchronization wrapper may be returned instead of a raw ctypes process-safe synchronization wrapper may be returned instead of a raw ctypes
@ -1060,7 +1060,7 @@ processes.
Note that *lock* is a keyword-only argument. Note that *lock* is a keyword-only argument.
.. function:: Value(typecode_or_type, *args[, lock]) .. function:: Value(typecode_or_type, *args, lock=True)
The same as :func:`RawValue` except that depending on the value of *lock* a The same as :func:`RawValue` except that depending on the value of *lock* a
process-safe synchronization wrapper may be returned instead of a raw ctypes process-safe synchronization wrapper may be returned instead of a raw ctypes

View File

@ -63,10 +63,12 @@ PyAPI_FUNC(Py_complex) PyComplex_AsCComplex(PyObject *op);
/* Format the object based on the format_spec, as defined in PEP 3101 /* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */ (Advanced String Formatting). */
#ifndef Py_LIMITED_API #ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject *) _PyComplex_FormatAdvanced(PyObject *obj, PyAPI_FUNC(int) _PyComplex_FormatAdvancedWriter(
PyObject *format_spec, _PyUnicodeWriter *writer,
Py_ssize_t start, PyObject *obj,
Py_ssize_t end); PyObject *format_spec,
Py_ssize_t start,
Py_ssize_t end);
#endif #endif
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -112,10 +112,12 @@ PyAPI_FUNC(int) PyFloat_ClearFreeList(void);
/* Format the object based on the format_spec, as defined in PEP 3101 /* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */ (Advanced String Formatting). */
PyAPI_FUNC(PyObject *) _PyFloat_FormatAdvanced(PyObject *obj, PyAPI_FUNC(int) _PyFloat_FormatAdvancedWriter(
PyObject *format_spec, _PyUnicodeWriter *writer,
Py_ssize_t start, PyObject *obj,
Py_ssize_t end); PyObject *format_spec,
Py_ssize_t start,
Py_ssize_t end);
#endif /* Py_LIMITED_API */ #endif /* Py_LIMITED_API */
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -151,14 +151,22 @@ PyAPI_FUNC(int) _PyLong_AsByteArray(PyLongObject* v,
/* _PyLong_Format: Convert the long to a string object with given base, /* _PyLong_Format: Convert the long to a string object with given base,
appending a base prefix of 0[box] if base is 2, 8 or 16. */ appending a base prefix of 0[box] if base is 2, 8 or 16. */
PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *aa, int base); PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *obj, int base);
PyAPI_FUNC(int) _PyLong_FormatWriter(
_PyUnicodeWriter *writer,
PyObject *obj,
int base,
int alternate);
/* Format the object based on the format_spec, as defined in PEP 3101 /* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */ (Advanced String Formatting). */
PyAPI_FUNC(PyObject *) _PyLong_FormatAdvanced(PyObject *obj, PyAPI_FUNC(int) _PyLong_FormatAdvancedWriter(
PyObject *format_spec, _PyUnicodeWriter *writer,
Py_ssize_t start, PyObject *obj,
Py_ssize_t end); PyObject *format_spec,
Py_ssize_t start,
Py_ssize_t end);
#endif /* Py_LIMITED_API */ #endif /* Py_LIMITED_API */
/* These aren't really part of the long object, but they're handy. The /* These aren't really part of the long object, but they're handy. The

View File

@ -648,8 +648,20 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
Py_ssize_t from_start, Py_ssize_t from_start,
Py_ssize_t how_many Py_ssize_t how_many
); );
/* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
may crash if parameters are invalid (e.g. if the output string
is too short). */
PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
PyObject *to,
Py_ssize_t to_start,
PyObject *from,
Py_ssize_t from_start,
Py_ssize_t how_many
);
#endif #endif
#ifndef Py_LIMITED_API
/* Fill a string with a character: write fill_char into /* Fill a string with a character: write fill_char into
unicode[start:start+length]. unicode[start:start+length].
@ -658,13 +670,21 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
Return the number of written character, or return -1 and raise an exception Return the number of written character, or return -1 and raise an exception
on error. */ on error. */
#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill( PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
PyObject *unicode, PyObject *unicode,
Py_ssize_t start, Py_ssize_t start,
Py_ssize_t length, Py_ssize_t length,
Py_UCS4 fill_char Py_UCS4 fill_char
); );
/* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
if parameters are invalid (e.g. if length is longer than the string). */
PyAPI_FUNC(void) _PyUnicode_FastFill(
PyObject *unicode,
Py_ssize_t start,
Py_ssize_t length,
Py_UCS4 fill_char
);
#endif #endif
/* Create a Unicode Object from the Py_UNICODE buffer u of the given /* Create a Unicode Object from the Py_UNICODE buffer u of the given
@ -696,13 +716,19 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromString(
const char *u /* UTF-8 encoded string */ const char *u /* UTF-8 encoded string */
); );
#ifndef Py_LIMITED_API
/* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters. /* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
Scan the string to find the maximum character. */ Scan the string to find the maximum character. */
#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData( PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
int kind, int kind,
const void *buffer, const void *buffer,
Py_ssize_t size); Py_ssize_t size);
/* Create a new string from a buffer of ASCII characters.
WARNING: Don't check if the string contains any non-ASCII character. */
PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
const char *buffer,
Py_ssize_t size);
#endif #endif
PyAPI_FUNC(PyObject*) PyUnicode_Substring( PyAPI_FUNC(PyObject*) PyUnicode_Substring(
@ -864,13 +890,70 @@ PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
... ...
); );
#ifndef Py_LIMITED_API
typedef struct {
PyObject *buffer;
void *data;
enum PyUnicode_Kind kind;
Py_UCS4 maxchar;
Py_ssize_t size;
Py_ssize_t pos;
/* minimum length of the buffer when overallocation is enabled,
see _PyUnicodeWriter_Init() */
Py_ssize_t min_length;
struct {
unsigned char overallocate:1;
/* If readonly is 1, buffer is a shared string (cannot be modified)
and size is set to 0. */
unsigned char readonly:1;
} flags;
} _PyUnicodeWriter ;
/* Initialize a Unicode writer.
If min_length is greater than zero, _PyUnicodeWriter_Prepare()
overallocates the buffer and min_length is the minimum length in characters
of the buffer. */
PyAPI_FUNC(void)
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length);
/* Prepare the buffer to write 'length' characters
with the specified maximum character.
Return 0 on success, raise an exception and return -1 on error. */
#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
(((MAXCHAR) <= (WRITER)->maxchar \
&& (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
? 0 \
: (((LENGTH) == 0) \
? 0 \
: _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
instead. */
PyAPI_FUNC(int)
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
Py_ssize_t length, Py_UCS4 maxchar);
PyAPI_FUNC(int)
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str);
PyAPI_FUNC(PyObject *)
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
PyAPI_FUNC(void)
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
#endif
#ifndef Py_LIMITED_API #ifndef Py_LIMITED_API
/* Format the object based on the format_spec, as defined in PEP 3101 /* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */ (Advanced String Formatting). */
PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj, PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
PyObject *format_spec, _PyUnicodeWriter *writer,
Py_ssize_t start, PyObject *obj,
Py_ssize_t end); PyObject *format_spec,
Py_ssize_t start,
Py_ssize_t end);
#endif #endif
PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **); PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);

View File

@ -228,19 +228,19 @@ def RawArray(typecode_or_type, size_or_initializer):
from multiprocessing.sharedctypes import RawArray from multiprocessing.sharedctypes import RawArray
return RawArray(typecode_or_type, size_or_initializer) return RawArray(typecode_or_type, size_or_initializer)
def Value(typecode_or_type, *args, **kwds): def Value(typecode_or_type, *args, lock=True):
''' '''
Returns a synchronized shared object Returns a synchronized shared object
''' '''
from multiprocessing.sharedctypes import Value from multiprocessing.sharedctypes import Value
return Value(typecode_or_type, *args, **kwds) return Value(typecode_or_type, *args, lock=lock)
def Array(typecode_or_type, size_or_initializer, **kwds): def Array(typecode_or_type, size_or_initializer, *, lock=True):
''' '''
Returns a synchronized shared array Returns a synchronized shared array
''' '''
from multiprocessing.sharedctypes import Array from multiprocessing.sharedctypes import Array
return Array(typecode_or_type, size_or_initializer, **kwds) return Array(typecode_or_type, size_or_initializer, lock=lock)
# #
# #

View File

@ -1035,12 +1035,11 @@ class ValueProxy(BaseProxy):
BaseListProxy = MakeProxyType('BaseListProxy', ( BaseListProxy = MakeProxyType('BaseListProxy', (
'__add__', '__contains__', '__delitem__', '__delslice__', '__add__', '__contains__', '__delitem__', '__getitem__', '__len__',
'__getitem__', '__getslice__', '__len__', '__mul__', '__mul__', '__reversed__', '__rmul__', '__setitem__',
'__reversed__', '__rmul__', '__setitem__', '__setslice__',
'append', 'count', 'extend', 'index', 'insert', 'pop', 'remove', 'append', 'count', 'extend', 'index', 'insert', 'pop', 'remove',
'reverse', 'sort', '__imul__' 'reverse', 'sort', '__imul__'
)) # XXX __getslice__ and __setslice__ unneeded in Py3.0 ))
class ListProxy(BaseListProxy): class ListProxy(BaseListProxy):
def __iadd__(self, value): def __iadd__(self, value):
self._callmethod('extend', (value,)) self._callmethod('extend', (value,))
@ -1058,8 +1057,8 @@ DictProxy = MakeProxyType('DictProxy', (
ArrayProxy = MakeProxyType('ArrayProxy', ( ArrayProxy = MakeProxyType('ArrayProxy', (
'__len__', '__getitem__', '__setitem__', '__getslice__', '__setslice__' '__len__', '__getitem__', '__setitem__'
)) # XXX __getslice__ and __setslice__ unneeded in Py3.0 ))
PoolProxy = MakeProxyType('PoolProxy', ( PoolProxy = MakeProxyType('PoolProxy', (

View File

@ -63,7 +63,7 @@ def RawArray(typecode_or_type, size_or_initializer):
result.__init__(*size_or_initializer) result.__init__(*size_or_initializer)
return result return result
def Value(typecode_or_type, *args, lock=None): def Value(typecode_or_type, *args, lock=True):
''' '''
Return a synchronization wrapper for a Value Return a synchronization wrapper for a Value
''' '''
@ -76,13 +76,10 @@ def Value(typecode_or_type, *args, lock=None):
raise AttributeError("'%r' has no method 'acquire'" % lock) raise AttributeError("'%r' has no method 'acquire'" % lock)
return synchronized(obj, lock) return synchronized(obj, lock)
def Array(typecode_or_type, size_or_initializer, **kwds): def Array(typecode_or_type, size_or_initializer, *, lock=True):
''' '''
Return a synchronization wrapper for a RawArray Return a synchronization wrapper for a RawArray
''' '''
lock = kwds.pop('lock', None)
if kwds:
raise ValueError('unrecognized keyword argument(s): %s' % list(kwds.keys()))
obj = RawArray(typecode_or_type, size_or_initializer) obj = RawArray(typecode_or_type, size_or_initializer)
if lock is False: if lock is False:
return obj return obj

View File

@ -13,6 +13,9 @@ Core and Builtins
- Issue #14835: Make plistlib output empty arrays & dicts like OS X. - Issue #14835: Make plistlib output empty arrays & dicts like OS X.
Patch by Sidney San Martín. Patch by Sidney San Martín.
- Issue #14744: Use the new _PyUnicodeWriter internal API to speed up
str%args and str.format(args).
- Issue #14930: Make memoryview objects weakrefable. - Issue #14930: Make memoryview objects weakrefable.
- Issue #14775: Fix a potential quadratic dict build-up due to the garbage - Issue #14775: Fix a potential quadratic dict build-up due to the garbage

View File

@ -699,11 +699,22 @@ static PyObject *
complex__format__(PyObject* self, PyObject* args) complex__format__(PyObject* self, PyObject* args)
{ {
PyObject *format_spec; PyObject *format_spec;
_PyUnicodeWriter writer;
int ret;
if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
return NULL; return NULL;
return _PyComplex_FormatAdvanced(self, format_spec, 0,
PyUnicode_GET_LENGTH(format_spec)); _PyUnicodeWriter_Init(&writer, 0);
ret = _PyComplex_FormatAdvancedWriter(
&writer,
self,
format_spec, 0, PyUnicode_GET_LENGTH(format_spec));
if (ret == -1) {
_PyUnicodeWriter_Dealloc(&writer);
return NULL;
}
return _PyUnicodeWriter_Finish(&writer);
} }
#if 0 #if 0

View File

@ -267,13 +267,15 @@ static PyObject *
float_repr(PyFloatObject *v) float_repr(PyFloatObject *v)
{ {
PyObject *result; PyObject *result;
char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v), char *buf;
'r', 0,
Py_DTSF_ADD_DOT_0, buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
NULL); 'r', 0,
Py_DTSF_ADD_DOT_0,
NULL);
if (!buf) if (!buf)
return PyErr_NoMemory(); return PyErr_NoMemory();
result = PyUnicode_FromString(buf); result = _PyUnicode_FromASCII(buf, strlen(buf));
PyMem_Free(buf); PyMem_Free(buf);
return result; return result;
} }
@ -1703,11 +1705,22 @@ static PyObject *
float__format__(PyObject *self, PyObject *args) float__format__(PyObject *self, PyObject *args)
{ {
PyObject *format_spec; PyObject *format_spec;
_PyUnicodeWriter writer;
int ret;
if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
return NULL; return NULL;
return _PyFloat_FormatAdvanced(self, format_spec, 0,
PyUnicode_GET_LENGTH(format_spec)); _PyUnicodeWriter_Init(&writer, 0);
ret = _PyFloat_FormatAdvancedWriter(
&writer,
self,
format_spec, 0, PyUnicode_GET_LENGTH(format_spec));
if (ret == -1) {
_PyUnicodeWriter_Dealloc(&writer);
return NULL;
}
return _PyUnicodeWriter_Finish(&writer);
} }
PyDoc_STRVAR(float__format__doc, PyDoc_STRVAR(float__format__doc,

View File

@ -1550,20 +1550,22 @@ divrem1(PyLongObject *a, digit n, digit *prem)
string. (Return value is non-shared so that callers can modify the string. (Return value is non-shared so that callers can modify the
returned value if necessary.) */ returned value if necessary.) */
static PyObject * static int
long_to_decimal_string(PyObject *aa) long_to_decimal_string_internal(PyObject *aa,
PyObject **p_output,
_PyUnicodeWriter *writer)
{ {
PyLongObject *scratch, *a; PyLongObject *scratch, *a;
PyObject *str; PyObject *str;
Py_ssize_t size, strlen, size_a, i, j; Py_ssize_t size, strlen, size_a, i, j;
digit *pout, *pin, rem, tenpow; digit *pout, *pin, rem, tenpow;
unsigned char *p;
int negative; int negative;
enum PyUnicode_Kind kind;
a = (PyLongObject *)aa; a = (PyLongObject *)aa;
if (a == NULL || !PyLong_Check(a)) { if (a == NULL || !PyLong_Check(a)) {
PyErr_BadInternalCall(); PyErr_BadInternalCall();
return NULL; return -1;
} }
size_a = ABS(Py_SIZE(a)); size_a = ABS(Py_SIZE(a));
negative = Py_SIZE(a) < 0; negative = Py_SIZE(a) < 0;
@ -1580,13 +1582,13 @@ long_to_decimal_string(PyObject *aa)
if (size_a > PY_SSIZE_T_MAX / PyLong_SHIFT) { if (size_a > PY_SSIZE_T_MAX / PyLong_SHIFT) {
PyErr_SetString(PyExc_OverflowError, PyErr_SetString(PyExc_OverflowError,
"long is too large to format"); "long is too large to format");
return NULL; return -1;
} }
/* the expression size_a * PyLong_SHIFT is now safe from overflow */ /* the expression size_a * PyLong_SHIFT is now safe from overflow */
size = 1 + size_a * PyLong_SHIFT / (3 * _PyLong_DECIMAL_SHIFT); size = 1 + size_a * PyLong_SHIFT / (3 * _PyLong_DECIMAL_SHIFT);
scratch = _PyLong_New(size); scratch = _PyLong_New(size);
if (scratch == NULL) if (scratch == NULL)
return NULL; return -1;
/* convert array of base _PyLong_BASE digits in pin to an array of /* convert array of base _PyLong_BASE digits in pin to an array of
base _PyLong_DECIMAL_BASE digits in pout, following Knuth (TAOCP, base _PyLong_DECIMAL_BASE digits in pout, following Knuth (TAOCP,
@ -1609,7 +1611,7 @@ long_to_decimal_string(PyObject *aa)
/* check for keyboard interrupt */ /* check for keyboard interrupt */
SIGCHECK({ SIGCHECK({
Py_DECREF(scratch); Py_DECREF(scratch);
return NULL; return -1;
}); });
} }
/* pout should have at least one digit, so that the case when a = 0 /* pout should have at least one digit, so that the case when a = 0
@ -1625,65 +1627,113 @@ long_to_decimal_string(PyObject *aa)
tenpow *= 10; tenpow *= 10;
strlen++; strlen++;
} }
str = PyUnicode_New(strlen, '9'); if (writer) {
if (str == NULL) { if (_PyUnicodeWriter_Prepare(writer, strlen, '9') == -1)
Py_DECREF(scratch); return -1;
return NULL; kind = writer->kind;
str = NULL;
} }
else {
str = PyUnicode_New(strlen, '9');
if (str == NULL) {
Py_DECREF(scratch);
return -1;
}
kind = PyUnicode_KIND(str);
}
#define WRITE_DIGITS(TYPE) \
do { \
if (writer) \
p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + strlen; \
else \
p = (TYPE*)PyUnicode_DATA(str) + strlen; \
\
*p = '\0'; \
/* pout[0] through pout[size-2] contribute exactly \
_PyLong_DECIMAL_SHIFT digits each */ \
for (i=0; i < size - 1; i++) { \
rem = pout[i]; \
for (j = 0; j < _PyLong_DECIMAL_SHIFT; j++) { \
*--p = '0' + rem % 10; \
rem /= 10; \
} \
} \
/* pout[size-1]: always produce at least one decimal digit */ \
rem = pout[i]; \
do { \
*--p = '0' + rem % 10; \
rem /= 10; \
} while (rem != 0); \
\
/* and sign */ \
if (negative) \
*--p = '-'; \
\
/* check we've counted correctly */ \
if (writer) \
assert(p == ((TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos)); \
else \
assert(p == (TYPE*)PyUnicode_DATA(str)); \
} while (0)
/* fill the string right-to-left */ /* fill the string right-to-left */
assert(PyUnicode_KIND(str) == PyUnicode_1BYTE_KIND); if (kind == PyUnicode_1BYTE_KIND) {
p = PyUnicode_1BYTE_DATA(str) + strlen; Py_UCS1 *p;
*p = '\0'; WRITE_DIGITS(Py_UCS1);
/* pout[0] through pout[size-2] contribute exactly
_PyLong_DECIMAL_SHIFT digits each */
for (i=0; i < size - 1; i++) {
rem = pout[i];
for (j = 0; j < _PyLong_DECIMAL_SHIFT; j++) {
*--p = '0' + rem % 10;
rem /= 10;
}
} }
/* pout[size-1]: always produce at least one decimal digit */ else if (kind == PyUnicode_2BYTE_KIND) {
rem = pout[i]; Py_UCS2 *p;
do { WRITE_DIGITS(Py_UCS2);
*--p = '0' + rem % 10; }
rem /= 10; else {
} while (rem != 0); assert (kind == PyUnicode_4BYTE_KIND);
Py_UCS4 *p;
WRITE_DIGITS(Py_UCS4);
}
#undef WRITE_DIGITS
/* and sign */
if (negative)
*--p = '-';
/* check we've counted correctly */
assert(p == PyUnicode_1BYTE_DATA(str));
assert(_PyUnicode_CheckConsistency(str, 1));
Py_DECREF(scratch); Py_DECREF(scratch);
return (PyObject *)str; if (writer) {
writer->pos += strlen;
}
else {
assert(_PyUnicode_CheckConsistency(str, 1));
*p_output = (PyObject *)str;
}
return 0;
}
static PyObject *
long_to_decimal_string(PyObject *aa)
{
PyObject *v;
if (long_to_decimal_string_internal(aa, &v, NULL) == -1)
return NULL;
return v;
} }
/* Convert a long int object to a string, using a given conversion base, /* Convert a long int object to a string, using a given conversion base,
which should be one of 2, 8, 10 or 16. Return a string object. which should be one of 2, 8 or 16. Return a string object.
If base is 2, 8 or 16, add the proper prefix '0b', '0o' or '0x'. */ If base is 2, 8 or 16, add the proper prefix '0b', '0o' or '0x'
if alternate is nonzero. */
PyObject * static int
_PyLong_Format(PyObject *aa, int base) long_format_binary(PyObject *aa, int base, int alternate,
PyObject **p_output, _PyUnicodeWriter *writer)
{ {
register PyLongObject *a = (PyLongObject *)aa; register PyLongObject *a = (PyLongObject *)aa;
PyObject *v; PyObject *v;
Py_ssize_t sz; Py_ssize_t sz;
Py_ssize_t size_a; Py_ssize_t size_a;
Py_UCS1 *p; enum PyUnicode_Kind kind;
int negative; int negative;
int bits; int bits;
assert(base == 2 || base == 8 || base == 10 || base == 16); assert(base == 2 || base == 8 || base == 16);
if (base == 10)
return long_to_decimal_string((PyObject *)a);
if (a == NULL || !PyLong_Check(a)) { if (a == NULL || !PyLong_Check(a)) {
PyErr_BadInternalCall(); PyErr_BadInternalCall();
return NULL; return -1;
} }
size_a = ABS(Py_SIZE(a)); size_a = ABS(Py_SIZE(a));
negative = Py_SIZE(a) < 0; negative = Py_SIZE(a) < 0;
@ -1706,7 +1756,7 @@ _PyLong_Format(PyObject *aa, int base)
/* Compute exact length 'sz' of output string. */ /* Compute exact length 'sz' of output string. */
if (size_a == 0) { if (size_a == 0) {
sz = 3; sz = 1;
} }
else { else {
Py_ssize_t size_a_in_bits; Py_ssize_t size_a_in_bits;
@ -1714,56 +1764,126 @@ _PyLong_Format(PyObject *aa, int base)
if (size_a > (PY_SSIZE_T_MAX - 3) / PyLong_SHIFT) { if (size_a > (PY_SSIZE_T_MAX - 3) / PyLong_SHIFT) {
PyErr_SetString(PyExc_OverflowError, PyErr_SetString(PyExc_OverflowError,
"int is too large to format"); "int is too large to format");
return NULL; return -1;
} }
size_a_in_bits = (size_a - 1) * PyLong_SHIFT + size_a_in_bits = (size_a - 1) * PyLong_SHIFT +
bits_in_digit(a->ob_digit[size_a - 1]); bits_in_digit(a->ob_digit[size_a - 1]);
/* Allow 2 characters for prefix and 1 for a '-' sign. */ /* Allow 1 character for a '-' sign. */
sz = 2 + negative + (size_a_in_bits + (bits - 1)) / bits; sz = negative + (size_a_in_bits + (bits - 1)) / bits;
}
if (alternate) {
/* 2 characters for prefix */
sz += 2;
} }
v = PyUnicode_New(sz, 'x'); if (writer) {
if (v == NULL) { if (_PyUnicodeWriter_Prepare(writer, sz, 'x') == -1)
return NULL; return -1;
} kind = writer->kind;
assert(PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); v = NULL;
p = PyUnicode_1BYTE_DATA(v) + sz;
if (size_a == 0) {
*--p = '0';
} }
else { else {
/* JRH: special case for power-of-2 bases */ v = PyUnicode_New(sz, 'x');
twodigits accum = 0; if (v == NULL)
int accumbits = 0; /* # of bits in accum */ return -1;
Py_ssize_t i; kind = PyUnicode_KIND(v);
for (i = 0; i < size_a; ++i) {
accum |= (twodigits)a->ob_digit[i] << accumbits;
accumbits += PyLong_SHIFT;
assert(accumbits >= bits);
do {
char cdigit;
cdigit = (char)(accum & (base - 1));
cdigit += (cdigit < 10) ? '0' : 'a'-10;
*--p = cdigit;
accumbits -= bits;
accum >>= bits;
} while (i < size_a-1 ? accumbits >= bits : accum > 0);
}
} }
if (base == 16) #define WRITE_DIGITS(TYPE) \
*--p = 'x'; do { \
else if (base == 8) if (writer) \
*--p = 'o'; p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + sz; \
else /* (base == 2) */ else \
*--p = 'b'; p = (TYPE*)PyUnicode_DATA(v) + sz; \
*--p = '0'; \
if (negative) if (size_a == 0) { \
*--p = '-'; *--p = '0'; \
assert(p == PyUnicode_1BYTE_DATA(v)); } \
assert(_PyUnicode_CheckConsistency(v, 1)); else { \
return v; /* JRH: special case for power-of-2 bases */ \
twodigits accum = 0; \
int accumbits = 0; /* # of bits in accum */ \
Py_ssize_t i; \
for (i = 0; i < size_a; ++i) { \
accum |= (twodigits)a->ob_digit[i] << accumbits; \
accumbits += PyLong_SHIFT; \
assert(accumbits >= bits); \
do { \
char cdigit; \
cdigit = (char)(accum & (base - 1)); \
cdigit += (cdigit < 10) ? '0' : 'a'-10; \
*--p = cdigit; \
accumbits -= bits; \
accum >>= bits; \
} while (i < size_a-1 ? accumbits >= bits : accum > 0); \
} \
} \
\
if (alternate) { \
if (base == 16) \
*--p = 'x'; \
else if (base == 8) \
*--p = 'o'; \
else /* (base == 2) */ \
*--p = 'b'; \
*--p = '0'; \
} \
if (negative) \
*--p = '-'; \
if (writer) \
assert(p == ((TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos)); \
else \
assert(p == (TYPE*)PyUnicode_DATA(v)); \
} while (0)
if (kind == PyUnicode_1BYTE_KIND) {
Py_UCS1 *p;
WRITE_DIGITS(Py_UCS1);
}
else if (kind == PyUnicode_2BYTE_KIND) {
Py_UCS2 *p;
WRITE_DIGITS(Py_UCS2);
}
else {
assert (kind == PyUnicode_4BYTE_KIND);
Py_UCS4 *p;
WRITE_DIGITS(Py_UCS4);
}
#undef WRITE_DIGITS
if (writer) {
writer->pos += sz;
}
else {
assert(_PyUnicode_CheckConsistency(v, 1));
*p_output = v;
}
return 0;
}
PyObject *
_PyLong_Format(PyObject *obj, int base)
{
PyObject *str;
int err;
if (base == 10)
err = long_to_decimal_string_internal(obj, &str, NULL);
else
err = long_format_binary(obj, base, 1, &str, NULL);
if (err == -1)
return NULL;
return str;
}
int
_PyLong_FormatWriter(_PyUnicodeWriter *writer,
PyObject *obj,
int base, int alternate)
{
if (base == 10)
return long_to_decimal_string_internal(obj, NULL, writer);
else
return long_format_binary(obj, base, alternate, NULL, writer);
} }
/* Table of digit values for 8-bit string -> integer conversion. /* Table of digit values for 8-bit string -> integer conversion.
@ -4232,11 +4352,22 @@ static PyObject *
long__format__(PyObject *self, PyObject *args) long__format__(PyObject *self, PyObject *args)
{ {
PyObject *format_spec; PyObject *format_spec;
_PyUnicodeWriter writer;
int ret;
if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
return NULL; return NULL;
return _PyLong_FormatAdvanced(self, format_spec, 0,
PyUnicode_GET_LENGTH(format_spec)); _PyUnicodeWriter_Init(&writer, 0);
ret = _PyLong_FormatAdvancedWriter(
&writer,
self,
format_spec, 0, PyUnicode_GET_LENGTH(format_spec));
if (ret == -1) {
_PyUnicodeWriter_Dealloc(&writer);
return NULL;
}
return _PyUnicodeWriter_Finish(&writer);
} }
/* Return a pair (q, r) such that a = b * q + r, and /* Return a pair (q, r) such that a = b * q + r, and

View File

@ -18,7 +18,7 @@
#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL #define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
#define STRINGLIB_STR PyUnicode_1BYTE_DATA #define STRINGLIB_STR PyUnicode_1BYTE_DATA
#define STRINGLIB_LEN PyUnicode_GET_LENGTH #define STRINGLIB_LEN PyUnicode_GET_LENGTH
#define STRINGLIB_NEW unicode_fromascii #define STRINGLIB_NEW(STR,LEN) _PyUnicode_FromASCII((char*)(STR),(LEN))
#define STRINGLIB_RESIZE not_supported #define STRINGLIB_RESIZE not_supported
#define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact

View File

@ -499,26 +499,26 @@ render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *write
int ok = 0; int ok = 0;
PyObject *result = NULL; PyObject *result = NULL;
PyObject *format_spec_object = NULL; PyObject *format_spec_object = NULL;
PyObject *(*formatter)(PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL; int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
Py_ssize_t len; int err;
/* If we know the type exactly, skip the lookup of __format__ and just /* If we know the type exactly, skip the lookup of __format__ and just
call the formatter directly. */ call the formatter directly. */
if (PyUnicode_CheckExact(fieldobj)) if (PyUnicode_CheckExact(fieldobj))
formatter = _PyUnicode_FormatAdvanced; formatter = _PyUnicode_FormatAdvancedWriter;
else if (PyLong_CheckExact(fieldobj)) else if (PyLong_CheckExact(fieldobj))
formatter =_PyLong_FormatAdvanced; formatter = _PyLong_FormatAdvancedWriter;
else if (PyFloat_CheckExact(fieldobj)) else if (PyFloat_CheckExact(fieldobj))
formatter = _PyFloat_FormatAdvanced; formatter = _PyFloat_FormatAdvancedWriter;
else if (PyComplex_CheckExact(fieldobj))
/* XXX: for 2.6, convert format_spec to the appropriate type formatter = _PyComplex_FormatAdvancedWriter;
(unicode, str) */
if (formatter) { if (formatter) {
/* we know exactly which formatter will be called when __format__ is /* we know exactly which formatter will be called when __format__ is
looked up, so call it directly, instead. */ looked up, so call it directly, instead. */
result = formatter(fieldobj, format_spec->str, err = formatter(writer, fieldobj, format_spec->str,
format_spec->start, format_spec->end); format_spec->start, format_spec->end);
return (err == 0);
} }
else { else {
/* We need to create an object out of the pointers we have, because /* We need to create an object out of the pointers we have, because
@ -536,17 +536,11 @@ render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *write
} }
if (result == NULL) if (result == NULL)
goto done; goto done;
if (PyUnicode_READY(result) == -1)
goto done;
len = PyUnicode_GET_LENGTH(result); if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
if (_PyUnicodeWriter_Prepare(writer,
len, PyUnicode_MAX_CHAR_VALUE(result)) == -1)
goto done; goto done;
copy_characters(writer->buffer, writer->pos,
result, 0, len);
writer->pos += len;
ok = 1; ok = 1;
done: done:
Py_XDECREF(format_spec_object); Py_XDECREF(format_spec_object);
Py_XDECREF(result); Py_XDECREF(result);
@ -897,16 +891,19 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar); err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar);
if (err == -1) if (err == -1)
return 0; return 0;
copy_characters(writer->buffer, writer->pos, _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
literal.str, literal.start, sublen); literal.str, literal.start, sublen);
writer->pos += sublen; writer->pos += sublen;
} }
if (field_present) if (field_present) {
if (iter.str.start == iter.str.end)
writer->flags.overallocate = 0;
if (!output_markup(&field_name, &format_spec, if (!output_markup(&field_name, &format_spec,
format_spec_needs_expanding, conversion, writer, format_spec_needs_expanding, conversion, writer,
args, kwargs, recursion_depth, auto_number)) args, kwargs, recursion_depth, auto_number))
return 0; return 0;
}
} }
return result; return result;
} }
@ -921,7 +918,7 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
int recursion_depth, AutoNumber *auto_number) int recursion_depth, AutoNumber *auto_number)
{ {
_PyUnicodeWriter writer; _PyUnicodeWriter writer;
Py_ssize_t initlen; Py_ssize_t minlen;
/* check the recursion level */ /* check the recursion level */
if (recursion_depth <= 0) { if (recursion_depth <= 0) {
@ -930,9 +927,8 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
return NULL; return NULL;
} }
initlen = PyUnicode_GET_LENGTH(input->str) + 100; minlen = PyUnicode_GET_LENGTH(input->str) + 100;
if (_PyUnicodeWriter_Init(&writer, initlen, 127) == -1) _PyUnicodeWriter_Init(&writer, minlen);
return NULL;
if (!do_markup(input, args, kwargs, &writer, recursion_depth, if (!do_markup(input, args, kwargs, &writer, recursion_depth,
auto_number)) { auto_number)) {

View File

@ -225,15 +225,9 @@ const unsigned char _Py_ascii_whitespace[] = {
/* forward */ /* forward */
static PyUnicodeObject *_PyUnicode_New(Py_ssize_t length); static PyUnicodeObject *_PyUnicode_New(Py_ssize_t length);
static PyObject* get_latin1_char(unsigned char ch); static PyObject* get_latin1_char(unsigned char ch);
static void copy_characters(
PyObject *to, Py_ssize_t to_start,
PyObject *from, Py_ssize_t from_start,
Py_ssize_t how_many);
static int unicode_modifiable(PyObject *unicode); static int unicode_modifiable(PyObject *unicode);
static PyObject *
unicode_fromascii(const unsigned char *s, Py_ssize_t size);
static PyObject * static PyObject *
_PyUnicode_FromUCS1(const unsigned char *s, Py_ssize_t size); _PyUnicode_FromUCS1(const unsigned char *s, Py_ssize_t size);
static PyObject * static PyObject *
@ -783,7 +777,7 @@ resize_copy(PyObject *unicode, Py_ssize_t length)
return NULL; return NULL;
copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode)); copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode));
copy_characters(copy, 0, unicode, 0, copy_length); _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, copy_length);
return copy; return copy;
} }
else { else {
@ -1154,15 +1148,16 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
assert(0 <= from_start); assert(0 <= from_start);
assert(0 <= to_start); assert(0 <= to_start);
assert(PyUnicode_Check(from)); assert(PyUnicode_Check(from));
assert(PyUnicode_Check(to));
assert(PyUnicode_IS_READY(from)); assert(PyUnicode_IS_READY(from));
assert(PyUnicode_IS_READY(to));
assert(from_start + how_many <= PyUnicode_GET_LENGTH(from)); assert(from_start + how_many <= PyUnicode_GET_LENGTH(from));
assert(to_start + how_many <= PyUnicode_GET_LENGTH(to));
if (how_many == 0) if (how_many == 0)
return 0; return 0;
assert(PyUnicode_Check(to));
assert(PyUnicode_IS_READY(to));
assert(to_start + how_many <= PyUnicode_GET_LENGTH(to));
from_kind = PyUnicode_KIND(from); from_kind = PyUnicode_KIND(from);
from_data = PyUnicode_DATA(from); from_data = PyUnicode_DATA(from);
to_kind = PyUnicode_KIND(to); to_kind = PyUnicode_KIND(to);
@ -1267,10 +1262,10 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
return 0; return 0;
} }
static void void
copy_characters(PyObject *to, Py_ssize_t to_start, _PyUnicode_FastCopyCharacters(
PyObject *from, Py_ssize_t from_start, PyObject *to, Py_ssize_t to_start,
Py_ssize_t how_many) PyObject *from, Py_ssize_t from_start, Py_ssize_t how_many)
{ {
(void)_copy_characters(to, to_start, from, from_start, how_many, 0); (void)_copy_characters(to, to_start, from, from_start, how_many, 0);
} }
@ -1292,6 +1287,14 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
if (PyUnicode_READY(to) == -1) if (PyUnicode_READY(to) == -1)
return -1; return -1;
if (from_start < 0) {
PyErr_SetString(PyExc_IndexError, "string index out of range");
return -1;
}
if (to_start < 0) {
PyErr_SetString(PyExc_IndexError, "string index out of range");
return -1;
}
how_many = Py_MIN(PyUnicode_GET_LENGTH(from), how_many); how_many = Py_MIN(PyUnicode_GET_LENGTH(from), how_many);
if (to_start + how_many > PyUnicode_GET_LENGTH(to)) { if (to_start + how_many > PyUnicode_GET_LENGTH(to)) {
PyErr_Format(PyExc_SystemError, PyErr_Format(PyExc_SystemError,
@ -1641,7 +1644,7 @@ unicode_widen(PyObject **p_unicode, Py_ssize_t length,
maxchar); maxchar);
if (result == NULL) if (result == NULL)
return -1; return -1;
PyUnicode_CopyCharacters(result, 0, *p_unicode, 0, length); _PyUnicode_FastCopyCharacters(result, 0, *p_unicode, 0, length);
Py_DECREF(*p_unicode); Py_DECREF(*p_unicode);
*p_unicode = result; *p_unicode = result;
return 0; return 0;
@ -1841,9 +1844,10 @@ _PyUnicode_ClearStaticStrings()
/* Internal function, doesn't check maximum character */ /* Internal function, doesn't check maximum character */
static PyObject* PyObject*
unicode_fromascii(const unsigned char* s, Py_ssize_t size) _PyUnicode_FromASCII(const char *buffer, Py_ssize_t size)
{ {
const unsigned char *s = (const unsigned char *)buffer;
PyObject *unicode; PyObject *unicode;
if (size == 1) { if (size == 1) {
#ifdef Py_DEBUG #ifdef Py_DEBUG
@ -2085,7 +2089,7 @@ unicode_adjust_maxchar(PyObject **p_unicode)
return; return;
} }
copy = PyUnicode_New(len, max_char); copy = PyUnicode_New(len, max_char);
copy_characters(copy, 0, unicode, 0, len); _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, len);
Py_DECREF(unicode); Py_DECREF(unicode);
*p_unicode = copy; *p_unicode = copy;
} }
@ -2753,7 +2757,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
(void) va_arg(vargs, char *); (void) va_arg(vargs, char *);
size = PyUnicode_GET_LENGTH(*callresult); size = PyUnicode_GET_LENGTH(*callresult);
assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
copy_characters(string, i, *callresult, 0, size); _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size);
i += size; i += size;
/* We're done with the unicode()/repr() => forget it */ /* We're done with the unicode()/repr() => forget it */
Py_DECREF(*callresult); Py_DECREF(*callresult);
@ -2767,7 +2771,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
Py_ssize_t size; Py_ssize_t size;
assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string)); assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
size = PyUnicode_GET_LENGTH(obj); size = PyUnicode_GET_LENGTH(obj);
copy_characters(string, i, obj, 0, size); _PyUnicode_FastCopyCharacters(string, i, obj, 0, size);
i += size; i += size;
break; break;
} }
@ -2779,13 +2783,13 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
if (obj) { if (obj) {
size = PyUnicode_GET_LENGTH(obj); size = PyUnicode_GET_LENGTH(obj);
assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string)); assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
copy_characters(string, i, obj, 0, size); _PyUnicode_FastCopyCharacters(string, i, obj, 0, size);
i += size; i += size;
} else { } else {
size = PyUnicode_GET_LENGTH(*callresult); size = PyUnicode_GET_LENGTH(*callresult);
assert(PyUnicode_KIND(*callresult) <= assert(PyUnicode_KIND(*callresult) <=
PyUnicode_KIND(string)); PyUnicode_KIND(string));
copy_characters(string, i, *callresult, 0, size); _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size);
i += size; i += size;
Py_DECREF(*callresult); Py_DECREF(*callresult);
} }
@ -2800,7 +2804,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
/* unused, since we already have the result */ /* unused, since we already have the result */
(void) va_arg(vargs, PyObject *); (void) va_arg(vargs, PyObject *);
assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
copy_characters(string, i, *callresult, 0, size); _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size);
i += size; i += size;
/* We're done with the unicode()/repr() => forget it */ /* We're done with the unicode()/repr() => forget it */
Py_DECREF(*callresult); Py_DECREF(*callresult);
@ -4171,7 +4175,7 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
if (unicode_widen(output, *outpos, if (unicode_widen(output, *outpos,
PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0) PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0)
goto onError; goto onError;
copy_characters(*output, *outpos, repunicode, 0, replen); _PyUnicode_FastCopyCharacters(*output, *outpos, repunicode, 0, replen);
*outpos += replen; *outpos += replen;
} }
else { else {
@ -9216,12 +9220,14 @@ fixup(PyObject *self,
/* If the maxchar increased so that the kind changed, not all /* If the maxchar increased so that the kind changed, not all
characters are representable anymore and we need to fix the characters are representable anymore and we need to fix the
string again. This only happens in very few cases. */ string again. This only happens in very few cases. */
copy_characters(v, 0, self, 0, PyUnicode_GET_LENGTH(self)); _PyUnicode_FastCopyCharacters(v, 0,
self, 0, PyUnicode_GET_LENGTH(self));
maxchar_old = fixfct(v); maxchar_old = fixfct(v);
assert(maxchar_old > 0 && maxchar_old <= maxchar_new); assert(maxchar_old > 0 && maxchar_old <= maxchar_new);
} }
else { else {
copy_characters(v, 0, u, 0, PyUnicode_GET_LENGTH(self)); _PyUnicode_FastCopyCharacters(v, 0,
u, 0, PyUnicode_GET_LENGTH(self));
} }
Py_DECREF(u); Py_DECREF(u);
assert(_PyUnicode_CheckConsistency(v, 1)); assert(_PyUnicode_CheckConsistency(v, 1));
@ -9603,7 +9609,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
res_data += kind * seplen; res_data += kind * seplen;
} }
else { else {
copy_characters(res, res_offset, sep, 0, seplen); _PyUnicode_FastCopyCharacters(res, res_offset, sep, 0, seplen);
res_offset += seplen; res_offset += seplen;
} }
} }
@ -9616,7 +9622,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
res_data += kind * itemlen; res_data += kind * itemlen;
} }
else { else {
copy_characters(res, res_offset, item, 0, itemlen); _PyUnicode_FastCopyCharacters(res, res_offset, item, 0, itemlen);
res_offset += itemlen; res_offset += itemlen;
} }
} }
@ -9663,13 +9669,25 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
} \ } \
} while (0) } while (0)
void
_PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
Py_UCS4 fill_char)
{
const enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
const void *data = PyUnicode_DATA(unicode);
assert(PyUnicode_IS_READY(unicode));
assert(unicode_modifiable(unicode));
assert(fill_char <= PyUnicode_MAX_CHAR_VALUE(unicode));
assert(start >= 0);
assert(start + length <= PyUnicode_GET_LENGTH(unicode));
FILL(kind, data, fill_char, start, length);
}
Py_ssize_t Py_ssize_t
PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length, PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
Py_UCS4 fill_char) Py_UCS4 fill_char)
{ {
Py_ssize_t maxlen; Py_ssize_t maxlen;
enum PyUnicode_Kind kind;
void *data;
if (!PyUnicode_Check(unicode)) { if (!PyUnicode_Check(unicode)) {
PyErr_BadInternalCall(); PyErr_BadInternalCall();
@ -9680,6 +9698,10 @@ PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
if (unicode_check_modifiable(unicode)) if (unicode_check_modifiable(unicode))
return -1; return -1;
if (start < 0) {
PyErr_SetString(PyExc_IndexError, "string index out of range");
return -1;
}
if (fill_char > PyUnicode_MAX_CHAR_VALUE(unicode)) { if (fill_char > PyUnicode_MAX_CHAR_VALUE(unicode)) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"fill character is bigger than " "fill character is bigger than "
@ -9692,9 +9714,7 @@ PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
if (length <= 0) if (length <= 0)
return 0; return 0;
kind = PyUnicode_KIND(unicode); _PyUnicode_FastFill(unicode, start, length, fill_char);
data = PyUnicode_DATA(unicode);
FILL(kind, data, fill_char, start, length);
return length; return length;
} }
@ -9734,7 +9754,7 @@ pad(PyObject *self,
FILL(kind, data, fill, 0, left); FILL(kind, data, fill, 0, left);
if (right) if (right)
FILL(kind, data, fill, left + _PyUnicode_LENGTH(self), right); FILL(kind, data, fill, left + _PyUnicode_LENGTH(self), right);
copy_characters(u, left, self, 0, _PyUnicode_LENGTH(self)); _PyUnicode_FastCopyCharacters(u, left, self, 0, _PyUnicode_LENGTH(self));
assert(_PyUnicode_CheckConsistency(u, 1)); assert(_PyUnicode_CheckConsistency(u, 1));
return u; return u;
} }
@ -10058,7 +10078,7 @@ replace(PyObject *self, PyObject *str1,
u = PyUnicode_New(slen, maxchar); u = PyUnicode_New(slen, maxchar);
if (!u) if (!u)
goto error; goto error;
copy_characters(u, 0, self, 0, slen); _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen);
rkind = PyUnicode_KIND(u); rkind = PyUnicode_KIND(u);
PyUnicode_WRITE(rkind, PyUnicode_DATA(u), pos, u2); PyUnicode_WRITE(rkind, PyUnicode_DATA(u), pos, u2);
@ -10626,8 +10646,8 @@ PyUnicode_Concat(PyObject *left, PyObject *right)
w = PyUnicode_New(new_len, maxchar); w = PyUnicode_New(new_len, maxchar);
if (w == NULL) if (w == NULL)
goto onError; goto onError;
copy_characters(w, 0, u, 0, u_len); _PyUnicode_FastCopyCharacters(w, 0, u, 0, u_len);
copy_characters(w, u_len, v, 0, v_len); _PyUnicode_FastCopyCharacters(w, u_len, v, 0, v_len);
Py_DECREF(u); Py_DECREF(u);
Py_DECREF(v); Py_DECREF(v);
assert(_PyUnicode_CheckConsistency(w, 1)); assert(_PyUnicode_CheckConsistency(w, 1));
@ -10702,7 +10722,7 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
goto error; goto error;
} }
/* copy 'right' into the newly allocated area of 'left' */ /* copy 'right' into the newly allocated area of 'left' */
copy_characters(*p_left, left_len, right, 0, right_len); _PyUnicode_FastCopyCharacters(*p_left, left_len, right, 0, right_len);
} }
else { else {
maxchar = PyUnicode_MAX_CHAR_VALUE(left); maxchar = PyUnicode_MAX_CHAR_VALUE(left);
@ -10713,8 +10733,8 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
res = PyUnicode_New(new_len, maxchar); res = PyUnicode_New(new_len, maxchar);
if (res == NULL) if (res == NULL)
goto error; goto error;
copy_characters(res, 0, left, 0, left_len); _PyUnicode_FastCopyCharacters(res, 0, left, 0, left_len);
copy_characters(res, left_len, right, 0, right_len); _PyUnicode_FastCopyCharacters(res, left_len, right, 0, right_len);
Py_DECREF(left); Py_DECREF(left);
*p_left = res; *p_left = res;
} }
@ -11650,7 +11670,7 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
length = end - start; length = end - start;
if (PyUnicode_IS_ASCII(self)) { if (PyUnicode_IS_ASCII(self)) {
data = PyUnicode_1BYTE_DATA(self); data = PyUnicode_1BYTE_DATA(self);
return unicode_fromascii(data + start, length); return _PyUnicode_FromASCII((char*)(data + start), length);
} }
else { else {
kind = PyUnicode_KIND(self); kind = PyUnicode_KIND(self);
@ -12769,60 +12789,74 @@ unicode_endswith(PyObject *self,
return PyBool_FromLong(result); return PyBool_FromLong(result);
} }
typedef struct {
PyObject *buffer;
void *data;
enum PyUnicode_Kind kind;
Py_UCS4 maxchar;
Py_ssize_t pos;
} _PyUnicodeWriter ;
Py_LOCAL_INLINE(void) Py_LOCAL_INLINE(void)
_PyUnicodeWriter_Update(_PyUnicodeWriter *writer) _PyUnicodeWriter_Update(_PyUnicodeWriter *writer)
{ {
writer->size = PyUnicode_GET_LENGTH(writer->buffer);
writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer); writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer);
writer->data = PyUnicode_DATA(writer->buffer); writer->data = PyUnicode_DATA(writer->buffer);
writer->kind = PyUnicode_KIND(writer->buffer); writer->kind = PyUnicode_KIND(writer->buffer);
} }
Py_LOCAL(int) void
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, _PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length)
Py_ssize_t length, Py_UCS4 maxchar)
{ {
writer->pos = 0; memset(writer, 0, sizeof(*writer));
writer->buffer = PyUnicode_New(length, maxchar); #ifdef Py_DEBUG
if (writer->buffer == NULL) writer->kind = 5; /* invalid kind */
return -1; #endif
_PyUnicodeWriter_Update(writer); writer->min_length = Py_MAX(min_length, 100);
return 0; writer->flags.overallocate = (min_length > 0);
} }
Py_LOCAL_INLINE(int) int
_PyUnicodeWriter_Prepare(_PyUnicodeWriter *writer, _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
Py_ssize_t length, Py_UCS4 maxchar) Py_ssize_t length, Py_UCS4 maxchar)
{ {
Py_ssize_t newlen; Py_ssize_t newlen;
PyObject *newbuffer; PyObject *newbuffer;
assert(length > 0);
if (length > PY_SSIZE_T_MAX - writer->pos) { if (length > PY_SSIZE_T_MAX - writer->pos) {
PyErr_NoMemory(); PyErr_NoMemory();
return -1; return -1;
} }
newlen = writer->pos + length; newlen = writer->pos + length;
if (newlen > PyUnicode_GET_LENGTH(writer->buffer)) { if (writer->buffer == NULL) {
/* overallocate 25% to limit the number of resize */ if (writer->flags.overallocate) {
if (newlen <= (PY_SSIZE_T_MAX - newlen / 4)) /* overallocate 25% to limit the number of resize */
newlen += newlen / 4; if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
newlen += newlen / 4;
if (newlen < writer->min_length)
newlen = writer->min_length;
}
writer->buffer = PyUnicode_New(newlen, maxchar);
if (writer->buffer == NULL)
return -1;
_PyUnicodeWriter_Update(writer);
return 0;
}
if (maxchar > writer->maxchar) { if (newlen > writer->size) {
if (writer->flags.overallocate) {
/* overallocate 25% to limit the number of resize */
if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
newlen += newlen / 4;
if (newlen < writer->min_length)
newlen = writer->min_length;
}
if (maxchar > writer->maxchar || writer->flags.readonly) {
/* resize + widen */ /* resize + widen */
newbuffer = PyUnicode_New(newlen, maxchar); newbuffer = PyUnicode_New(newlen, maxchar);
if (newbuffer == NULL) if (newbuffer == NULL)
return -1; return -1;
PyUnicode_CopyCharacters(newbuffer, 0, _PyUnicode_FastCopyCharacters(newbuffer, 0,
writer->buffer, 0, writer->pos); writer->buffer, 0, writer->pos);
Py_DECREF(writer->buffer); Py_DECREF(writer->buffer);
writer->flags.readonly = 0;
} }
else { else {
newbuffer = resize_compact(writer->buffer, newlen); newbuffer = resize_compact(writer->buffer, newlen);
@ -12833,25 +12867,76 @@ _PyUnicodeWriter_Prepare(_PyUnicodeWriter *writer,
_PyUnicodeWriter_Update(writer); _PyUnicodeWriter_Update(writer);
} }
else if (maxchar > writer->maxchar) { else if (maxchar > writer->maxchar) {
if (unicode_widen(&writer->buffer, writer->pos, maxchar) < 0) assert(!writer->flags.readonly);
newbuffer = PyUnicode_New(writer->size, maxchar);
if (newbuffer == NULL)
return -1; return -1;
_PyUnicode_FastCopyCharacters(newbuffer, 0,
writer->buffer, 0, writer->pos);
Py_DECREF(writer->buffer);
writer->buffer = newbuffer;
_PyUnicodeWriter_Update(writer); _PyUnicodeWriter_Update(writer);
} }
return 0; return 0;
} }
Py_LOCAL(PyObject *) int
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
{
Py_UCS4 maxchar;
Py_ssize_t len;
if (PyUnicode_READY(str) == -1)
return -1;
len = PyUnicode_GET_LENGTH(str);
if (len == 0)
return 0;
maxchar = PyUnicode_MAX_CHAR_VALUE(str);
if (maxchar > writer->maxchar || len > writer->size - writer->pos) {
if (writer->buffer == NULL && !writer->flags.overallocate) {
Py_INCREF(str);
writer->buffer = str;
_PyUnicodeWriter_Update(writer);
writer->flags.readonly = 1;
writer->size = 0;
writer->pos += len;
return 0;
}
if (_PyUnicodeWriter_PrepareInternal(writer, len, maxchar) == -1)
return -1;
}
_PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
str, 0, len);
writer->pos += len;
return 0;
}
PyObject *
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer) _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
{ {
if (PyUnicode_Resize(&writer->buffer, writer->pos) < 0) { if (writer->pos == 0) {
Py_DECREF(writer->buffer); Py_XDECREF(writer->buffer);
return NULL; Py_INCREF(unicode_empty);
return unicode_empty;
}
if (writer->flags.readonly) {
assert(PyUnicode_GET_LENGTH(writer->buffer) == writer->pos);
return writer->buffer;
}
if (PyUnicode_GET_LENGTH(writer->buffer) != writer->pos) {
PyObject *newbuffer;
newbuffer = resize_compact(writer->buffer, writer->pos);
if (newbuffer == NULL) {
Py_DECREF(writer->buffer);
return NULL;
}
writer->buffer = newbuffer;
} }
assert(_PyUnicode_CheckConsistency(writer->buffer, 1)); assert(_PyUnicode_CheckConsistency(writer->buffer, 1));
return writer->buffer; return writer->buffer;
} }
Py_LOCAL(void) void
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer) _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer)
{ {
Py_CLEAR(writer->buffer); Py_CLEAR(writer->buffer);
@ -12874,14 +12959,24 @@ The substitutions are identified by braces ('{' and '}').");
static PyObject * static PyObject *
unicode__format__(PyObject* self, PyObject* args) unicode__format__(PyObject* self, PyObject* args)
{ {
PyObject *format_spec, *out; PyObject *format_spec;
_PyUnicodeWriter writer;
int ret;
if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
return NULL; return NULL;
out = _PyUnicode_FormatAdvanced(self, format_spec, 0, if (PyUnicode_READY(self) == -1)
PyUnicode_GET_LENGTH(format_spec)); return NULL;
return out; _PyUnicodeWriter_Init(&writer, 0);
ret = _PyUnicode_FormatAdvancedWriter(&writer,
self, format_spec, 0,
PyUnicode_GET_LENGTH(format_spec));
if (ret == -1) {
_PyUnicodeWriter_Dealloc(&writer);
return NULL;
}
return _PyUnicodeWriter_Finish(&writer);
} }
PyDoc_STRVAR(p_format__doc__, PyDoc_STRVAR(p_format__doc__,
@ -13111,16 +13206,17 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
/* Returns a new reference to a PyUnicode object, or NULL on failure. */ /* Returns a new reference to a PyUnicode object, or NULL on failure. */
static PyObject * static int
formatfloat(PyObject *v, int flags, int prec, int type) formatfloat(PyObject *v, int flags, int prec, int type,
PyObject **p_output, _PyUnicodeWriter *writer)
{ {
char *p; char *p;
PyObject *result;
double x; double x;
Py_ssize_t len;
x = PyFloat_AsDouble(v); x = PyFloat_AsDouble(v);
if (x == -1.0 && PyErr_Occurred()) if (x == -1.0 && PyErr_Occurred())
return NULL; return -1;
if (prec < 0) if (prec < 0)
prec = 6; prec = 6;
@ -13128,10 +13224,20 @@ formatfloat(PyObject *v, int flags, int prec, int type)
p = PyOS_double_to_string(x, type, prec, p = PyOS_double_to_string(x, type, prec,
(flags & F_ALT) ? Py_DTSF_ALT : 0, NULL); (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
if (p == NULL) if (p == NULL)
return NULL; return -1;
result = unicode_fromascii((unsigned char*)p, strlen(p)); len = strlen(p);
if (writer) {
if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
return -1;
memcpy(writer->data + writer->pos * writer->kind,
p,
len);
writer->pos += len;
}
else
*p_output = _PyUnicode_FromASCII(p, len);
PyMem_Free(p); PyMem_Free(p);
return result; return 0;
} }
/* formatlong() emulates the format codes d, u, o, x and X, and /* formatlong() emulates the format codes d, u, o, x and X, and
@ -13267,7 +13373,7 @@ formatlong(PyObject *val, int flags, int prec, int type)
} }
if (!PyUnicode_Check(result) || len != PyUnicode_GET_LENGTH(result)) { if (!PyUnicode_Check(result) || len != PyUnicode_GET_LENGTH(result)) {
PyObject *unicode; PyObject *unicode;
unicode = unicode_fromascii((unsigned char *)buf, len); unicode = _PyUnicode_FromASCII(buf, len);
Py_DECREF(result); Py_DECREF(result);
result = unicode; result = unicode;
} }
@ -13336,8 +13442,7 @@ PyUnicode_Format(PyObject *format, PyObject *args)
fmtcnt = PyUnicode_GET_LENGTH(uformat); fmtcnt = PyUnicode_GET_LENGTH(uformat);
fmtpos = 0; fmtpos = 0;
if (_PyUnicodeWriter_Init(&writer, fmtcnt + 100, 127) < 0) _PyUnicodeWriter_Init(&writer, fmtcnt + 100);
goto onError;
if (PyTuple_Check(args)) { if (PyTuple_Check(args)) {
arglen = PyTuple_Size(args); arglen = PyTuple_Size(args);
@ -13368,8 +13473,8 @@ PyUnicode_Format(PyObject *format, PyObject *args)
if (_PyUnicodeWriter_Prepare(&writer, sublen, maxchar) == -1) if (_PyUnicodeWriter_Prepare(&writer, sublen, maxchar) == -1)
goto onError; goto onError;
copy_characters(writer.buffer, writer.pos, _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos,
uformat, nonfmtpos, sublen); uformat, nonfmtpos, sublen);
writer.pos += sublen; writer.pos += sublen;
} }
else { else {
@ -13530,6 +13635,8 @@ PyUnicode_Format(PyObject *format, PyObject *args)
"incomplete format"); "incomplete format");
goto onError; goto onError;
} }
if (fmtcnt == 0)
writer.flags.overallocate = 0;
if (c == '%') { if (c == '%') {
if (_PyUnicodeWriter_Prepare(&writer, 1, '%') == -1) if (_PyUnicodeWriter_Prepare(&writer, 1, '%') == -1)
@ -13539,7 +13646,6 @@ PyUnicode_Format(PyObject *format, PyObject *args)
continue; continue;
} }
v = getnextarg(args, arglen, &argidx); v = getnextarg(args, arglen, &argidx);
if (v == NULL) if (v == NULL)
goto onError; goto onError;
@ -13552,6 +13658,13 @@ PyUnicode_Format(PyObject *format, PyObject *args)
case 's': case 's':
case 'r': case 'r':
case 'a': case 'a':
if (PyLong_CheckExact(v) && width == -1 && prec == -1) {
/* Fast path */
if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1)
goto onError;
goto nextarg;
}
if (PyUnicode_CheckExact(v) && c == 's') { if (PyUnicode_CheckExact(v) && c == 's') {
temp = v; temp = v;
Py_INCREF(temp); Py_INCREF(temp);
@ -13572,6 +13685,32 @@ PyUnicode_Format(PyObject *format, PyObject *args)
case 'o': case 'o':
case 'x': case 'x':
case 'X': case 'X':
if (PyLong_CheckExact(v)
&& width == -1 && prec == -1
&& !(flags & (F_SIGN | F_BLANK)))
{
/* Fast path */
switch(c)
{
case 'd':
case 'i':
case 'u':
if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1)
goto onError;
goto nextarg;
case 'x':
if (_PyLong_FormatWriter(&writer, v, 16, flags & F_ALT) == -1)
goto onError;
goto nextarg;
case 'o':
if (_PyLong_FormatWriter(&writer, v, 8, flags & F_ALT) == -1)
goto onError;
goto nextarg;
default:
break;
}
}
isnumok = 0; isnumok = 0;
if (PyNumber_Check(v)) { if (PyNumber_Check(v)) {
PyObject *iobj=NULL; PyObject *iobj=NULL;
@ -13611,10 +13750,20 @@ PyUnicode_Format(PyObject *format, PyObject *args)
case 'F': case 'F':
case 'g': case 'g':
case 'G': case 'G':
if (width == -1 && prec == -1
&& !(flags & (F_SIGN | F_BLANK)))
{
/* Fast path */
if (formatfloat(v, flags, prec, c, NULL, &writer) == -1)
goto onError;
goto nextarg;
}
sign = 1; sign = 1;
if (flags & F_ZERO) if (flags & F_ZERO)
fill = '0'; fill = '0';
temp = formatfloat(v, flags, prec, c); if (formatfloat(v, flags, prec, c, &temp, NULL) == -1)
temp = NULL;
break; break;
case 'c': case 'c':
@ -13622,6 +13771,14 @@ PyUnicode_Format(PyObject *format, PyObject *args)
Py_UCS4 ch = formatchar(v); Py_UCS4 ch = formatchar(v);
if (ch == (Py_UCS4) -1) if (ch == (Py_UCS4) -1)
goto onError; goto onError;
if (width == -1 && prec == -1) {
/* Fast path */
if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1)
goto onError;
PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch);
writer.pos += 1;
goto nextarg;
}
temp = PyUnicode_FromOrdinal(ch); temp = PyUnicode_FromOrdinal(ch);
break; break;
} }
@ -13638,6 +13795,16 @@ PyUnicode_Format(PyObject *format, PyObject *args)
if (temp == NULL) if (temp == NULL)
goto onError; goto onError;
assert (PyUnicode_Check(temp)); assert (PyUnicode_Check(temp));
if (width == -1 && prec == -1
&& !(flags & (F_SIGN | F_BLANK)))
{
/* Fast path */
if (_PyUnicodeWriter_WriteStr(&writer, temp) == -1)
goto onError;
goto nextarg;
}
if (PyUnicode_READY(temp) == -1) { if (PyUnicode_READY(temp) == -1) {
Py_CLEAR(temp); Py_CLEAR(temp);
goto onError; goto onError;
@ -13676,15 +13843,15 @@ PyUnicode_Format(PyObject *format, PyObject *args)
if (!(flags & F_LJUST)) { if (!(flags & F_LJUST)) {
if (sign) { if (sign) {
if ((width-1) > len) if ((width-1) > len)
bufmaxchar = Py_MAX(bufmaxchar, fill); bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill);
} }
else { else {
if (width > len) if (width > len)
bufmaxchar = Py_MAX(bufmaxchar, fill); bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill);
} }
} }
maxchar = _PyUnicode_FindMaxChar(temp, 0, pindex+len); maxchar = _PyUnicode_FindMaxChar(temp, 0, pindex+len);
bufmaxchar = Py_MAX(bufmaxchar, maxchar); bufmaxchar = MAX_MAXCHAR(bufmaxchar, maxchar);
buflen = width; buflen = width;
if (sign && len == width) if (sign && len == width)
@ -13737,8 +13904,8 @@ PyUnicode_Format(PyObject *format, PyObject *args)
} }
} }
copy_characters(writer.buffer, writer.pos, _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos,
temp, pindex, len); temp, pindex, len);
writer.pos += len; writer.pos += len;
if (width > len) { if (width > len) {
sublen = width - len; sublen = width - len;
@ -13746,6 +13913,7 @@ PyUnicode_Format(PyObject *format, PyObject *args)
writer.pos += sublen; writer.pos += sublen;
} }
nextarg:
if (dict && (argidx < arglen) && c != '%') { if (dict && (argidx < arglen) && c != '%') {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"not all arguments converted during string formatting"); "not all arguments converted during string formatting");

View File

@ -316,21 +316,28 @@ calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
/* Do the padding, and return a pointer to where the caller-supplied /* Do the padding, and return a pointer to where the caller-supplied
content goes. */ content goes. */
static Py_ssize_t static Py_ssize_t
fill_padding(PyObject *s, Py_ssize_t start, Py_ssize_t nchars, fill_padding(_PyUnicodeWriter *writer,
Py_ssize_t nchars,
Py_UCS4 fill_char, Py_ssize_t n_lpadding, Py_UCS4 fill_char, Py_ssize_t n_lpadding,
Py_ssize_t n_rpadding) Py_ssize_t n_rpadding)
{ {
Py_ssize_t pos;
/* Pad on left. */ /* Pad on left. */
if (n_lpadding) if (n_lpadding) {
PyUnicode_Fill(s, start, start + n_lpadding, fill_char); pos = writer->pos;
_PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
}
/* Pad on right. */ /* Pad on right. */
if (n_rpadding) if (n_rpadding) {
PyUnicode_Fill(s, start + nchars + n_lpadding, pos = writer->pos + nchars + n_lpadding;
start + nchars + n_lpadding + n_rpadding, fill_char); _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
}
/* Pointer to the user content. */ /* Pointer to the user content. */
return start + n_lpadding; writer->pos += n_lpadding;
return 0;
} }
/************************************************************************/ /************************************************************************/
@ -541,7 +548,7 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
as determined in calc_number_widths(). as determined in calc_number_widths().
Return -1 on error, or 0 on success. */ Return -1 on error, or 0 on success. */
static int static int
fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec, fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end, PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
PyObject *prefix, Py_ssize_t p_start, PyObject *prefix, Py_ssize_t p_start,
Py_UCS4 fill_char, Py_UCS4 fill_char,
@ -549,36 +556,38 @@ fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec,
{ {
/* Used to keep track of digits, decimal, and remainder. */ /* Used to keep track of digits, decimal, and remainder. */
Py_ssize_t d_pos = d_start; Py_ssize_t d_pos = d_start;
unsigned int kind = PyUnicode_KIND(out); const enum PyUnicode_Kind kind = writer->kind;
void *data = PyUnicode_DATA(out); const void *data = writer->data;
Py_ssize_t r; Py_ssize_t r;
if (spec->n_lpadding) { if (spec->n_lpadding) {
PyUnicode_Fill(out, pos, pos + spec->n_lpadding, fill_char); _PyUnicode_FastFill(writer->buffer,
pos += spec->n_lpadding; writer->pos, spec->n_lpadding, fill_char);
writer->pos += spec->n_lpadding;
} }
if (spec->n_sign == 1) { if (spec->n_sign == 1) {
PyUnicode_WRITE(kind, data, pos++, spec->sign); PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
writer->pos++;
} }
if (spec->n_prefix) { if (spec->n_prefix) {
if (PyUnicode_CopyCharacters(out, pos, _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
prefix, p_start, prefix, p_start,
spec->n_prefix) < 0) spec->n_prefix);
return -1;
if (toupper) { if (toupper) {
Py_ssize_t t; Py_ssize_t t;
for (t = 0; t < spec->n_prefix; t++) { for (t = 0; t < spec->n_prefix; t++) {
Py_UCS4 c = PyUnicode_READ(kind, data, pos + t); Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
c = Py_TOUPPER(c); c = Py_TOUPPER(c);
assert (c <= 127); assert (c <= 127);
PyUnicode_WRITE(kind, data, pos + t, c); PyUnicode_WRITE(kind, data, writer->pos + t, c);
} }
} }
pos += spec->n_prefix; writer->pos += spec->n_prefix;
} }
if (spec->n_spadding) { if (spec->n_spadding) {
PyUnicode_Fill(out, pos, pos + spec->n_spadding, fill_char); _PyUnicode_FastFill(writer->buffer,
pos += spec->n_spadding; writer->pos, spec->n_spadding, fill_char);
writer->pos += spec->n_spadding;
} }
/* Only for type 'c' special case, it has no digits. */ /* Only for type 'c' special case, it has no digits. */
@ -594,7 +603,7 @@ fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec,
return -1; return -1;
} }
r = _PyUnicode_InsertThousandsGrouping( r = _PyUnicode_InsertThousandsGrouping(
out, pos, writer->buffer, writer->pos,
spec->n_grouped_digits, spec->n_grouped_digits,
pdigits + kind * d_pos, pdigits + kind * d_pos,
spec->n_digits, spec->n_min_width, spec->n_digits, spec->n_min_width,
@ -609,34 +618,38 @@ fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec,
if (toupper) { if (toupper) {
Py_ssize_t t; Py_ssize_t t;
for (t = 0; t < spec->n_grouped_digits; t++) { for (t = 0; t < spec->n_grouped_digits; t++) {
Py_UCS4 c = PyUnicode_READ(kind, data, pos + t); Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
c = Py_TOUPPER(c); c = Py_TOUPPER(c);
if (c > 127) { if (c > 127) {
PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit"); PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
return -1; return -1;
} }
PyUnicode_WRITE(kind, data, pos + t, c); PyUnicode_WRITE(kind, data, writer->pos + t, c);
} }
} }
pos += spec->n_grouped_digits; writer->pos += spec->n_grouped_digits;
if (spec->n_decimal) { if (spec->n_decimal) {
if (PyUnicode_CopyCharacters(out, pos, locale->decimal_point, 0, spec->n_decimal) < 0) _PyUnicode_FastCopyCharacters(
return -1; writer->buffer, writer->pos,
pos += spec->n_decimal; locale->decimal_point, 0, spec->n_decimal);
writer->pos += spec->n_decimal;
d_pos += 1; d_pos += 1;
} }
if (spec->n_remainder) { if (spec->n_remainder) {
if (PyUnicode_CopyCharacters(out, pos, digits, d_pos, spec->n_remainder) < 0) _PyUnicode_FastCopyCharacters(
return -1; writer->buffer, writer->pos,
pos += spec->n_remainder; digits, d_pos, spec->n_remainder);
writer->pos += spec->n_remainder;
d_pos += spec->n_remainder; d_pos += spec->n_remainder;
} }
if (spec->n_rpadding) { if (spec->n_rpadding) {
PyUnicode_Fill(out, pos, pos + spec->n_rpadding, fill_char); _PyUnicode_FastFill(writer->buffer,
pos += spec->n_rpadding; writer->pos, spec->n_rpadding,
fill_char);
writer->pos += spec->n_rpadding;
} }
return 0; return 0;
} }
@ -707,17 +720,20 @@ free_locale_info(LocaleInfo *locale_info)
/*********** string formatting ******************************************/ /*********** string formatting ******************************************/
/************************************************************************/ /************************************************************************/
static PyObject * static int
format_string_internal(PyObject *value, const InternalFormatSpec *format) format_string_internal(PyObject *value, const InternalFormatSpec *format,
_PyUnicodeWriter *writer)
{ {
Py_ssize_t lpad; Py_ssize_t lpad;
Py_ssize_t rpad; Py_ssize_t rpad;
Py_ssize_t total; Py_ssize_t total;
Py_ssize_t pos; Py_ssize_t len;
Py_ssize_t len = PyUnicode_GET_LENGTH(value); int result = -1;
PyObject *result = NULL;
Py_UCS4 maxchar; Py_UCS4 maxchar;
assert(PyUnicode_IS_READY(value));
len = PyUnicode_GET_LENGTH(value);
/* sign is not allowed on strings */ /* sign is not allowed on strings */
if (format->sign != '\0') { if (format->sign != '\0') {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
@ -741,6 +757,11 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format)
goto done; goto done;
} }
if (format->width == -1 && format->precision == -1) {
/* Fast path */
return _PyUnicodeWriter_WriteStr(writer, value);
}
/* if precision is specified, output no more that format.precision /* if precision is specified, output no more that format.precision
characters */ characters */
if (format->precision >= 0 && len >= format->precision) { if (format->precision >= 0 && len >= format->precision) {
@ -754,21 +775,23 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format)
maxchar = Py_MAX(maxchar, format->fill_char); maxchar = Py_MAX(maxchar, format->fill_char);
/* allocate the resulting string */ /* allocate the resulting string */
result = PyUnicode_New(total, maxchar); if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
if (result == NULL)
goto done; goto done;
/* Write into that space. First the padding. */ /* Write into that space. First the padding. */
pos = fill_padding(result, 0, len, result = fill_padding(writer, len,
format->fill_char=='\0'?' ':format->fill_char, format->fill_char=='\0'?' ':format->fill_char,
lpad, rpad); lpad, rpad);
if (result == -1)
goto done;
/* Then the source string. */ /* Then the source string. */
if (PyUnicode_CopyCharacters(result, pos, value, 0, len) < 0) _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
Py_CLEAR(result); value, 0, len);
writer->pos += (len + rpad);
result = 0;
done: done:
assert(!result || _PyUnicode_CheckConsistency(result, 1));
return result; return result;
} }
@ -780,11 +803,11 @@ done:
typedef PyObject* typedef PyObject*
(*IntOrLongToString)(PyObject *value, int base); (*IntOrLongToString)(PyObject *value, int base);
static PyObject * static int
format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, format_long_internal(PyObject *value, const InternalFormatSpec *format,
IntOrLongToString tostring) _PyUnicodeWriter *writer)
{ {
PyObject *result = NULL; int result = -1;
Py_UCS4 maxchar = 127; Py_UCS4 maxchar = 127;
PyObject *tmp = NULL; PyObject *tmp = NULL;
Py_ssize_t inumeric_chars; Py_ssize_t inumeric_chars;
@ -798,7 +821,6 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
Py_ssize_t prefix = 0; Py_ssize_t prefix = 0;
NumberFieldWidths spec; NumberFieldWidths spec;
long x; long x;
int err;
/* Locale settings, either from the actual locale or /* Locale settings, either from the actual locale or
from a hard-code pseudo-locale */ from a hard-code pseudo-locale */
@ -872,13 +894,23 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
break; break;
} }
if (format->sign != '+' && format->sign != ' '
&& format->width == -1
&& format->type != 'X' && format->type != 'n'
&& !format->thousands_separators
&& PyLong_CheckExact(value))
{
/* Fast path */
return _PyLong_FormatWriter(writer, value, base, format->alternate);
}
/* The number of prefix chars is the same as the leading /* The number of prefix chars is the same as the leading
chars to skip */ chars to skip */
if (format->alternate) if (format->alternate)
n_prefix = leading_chars_to_skip; n_prefix = leading_chars_to_skip;
/* Do the hard part, converting to a string in a given base */ /* Do the hard part, converting to a string in a given base */
tmp = tostring(value, base); tmp = _PyLong_Format(value, base);
if (tmp == NULL || PyUnicode_READY(tmp) == -1) if (tmp == NULL || PyUnicode_READY(tmp) == -1)
goto done; goto done;
@ -914,23 +946,19 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
&locale, format, &maxchar); &locale, format, &maxchar);
/* Allocate the memory. */ /* Allocate the memory. */
result = PyUnicode_New(n_total, maxchar); if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
if (!result)
goto done; goto done;
/* Populate the memory. */ /* Populate the memory. */
err = fill_number(result, 0, &spec, result = fill_number(writer, &spec,
tmp, inumeric_chars, inumeric_chars + n_digits, tmp, inumeric_chars, inumeric_chars + n_digits,
tmp, prefix, tmp, prefix,
format->fill_char == '\0' ? ' ' : format->fill_char, format->fill_char == '\0' ? ' ' : format->fill_char,
&locale, format->type == 'X'); &locale, format->type == 'X');
if (err)
Py_CLEAR(result);
done: done:
Py_XDECREF(tmp); Py_XDECREF(tmp);
free_locale_info(&locale); free_locale_info(&locale);
assert(!result || _PyUnicode_CheckConsistency(result, 1));
return result; return result;
} }
@ -938,16 +966,11 @@ done:
/*********** float formatting *******************************************/ /*********** float formatting *******************************************/
/************************************************************************/ /************************************************************************/
static PyObject*
strtounicode(char *charbuffer, Py_ssize_t len)
{
return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, charbuffer, len);
}
/* much of this is taken from unicodeobject.c */ /* much of this is taken from unicodeobject.c */
static PyObject * static int
format_float_internal(PyObject *value, format_float_internal(PyObject *value,
const InternalFormatSpec *format) const InternalFormatSpec *format,
_PyUnicodeWriter *writer)
{ {
char *buf = NULL; /* buffer returned from PyOS_double_to_string */ char *buf = NULL; /* buffer returned from PyOS_double_to_string */
Py_ssize_t n_digits; Py_ssize_t n_digits;
@ -962,12 +985,11 @@ format_float_internal(PyObject *value,
Py_ssize_t index; Py_ssize_t index;
NumberFieldWidths spec; NumberFieldWidths spec;
int flags = 0; int flags = 0;
PyObject *result = NULL; int result = -1;
Py_UCS4 maxchar = 127; Py_UCS4 maxchar = 127;
Py_UCS4 sign_char = '\0'; Py_UCS4 sign_char = '\0';
int float_type; /* Used to see if we have a nan, inf, or regular float. */ int float_type; /* Used to see if we have a nan, inf, or regular float. */
PyObject *unicode_tmp = NULL; PyObject *unicode_tmp = NULL;
int err;
/* Locale settings, either from the actual locale or /* Locale settings, either from the actual locale or
from a hard-code pseudo-locale */ from a hard-code pseudo-locale */
@ -1024,13 +1046,25 @@ format_float_internal(PyObject *value,
/* Since there is no unicode version of PyOS_double_to_string, /* Since there is no unicode version of PyOS_double_to_string,
just use the 8 bit version and then convert to unicode. */ just use the 8 bit version and then convert to unicode. */
unicode_tmp = strtounicode(buf, n_digits); unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
PyMem_Free(buf);
if (unicode_tmp == NULL) if (unicode_tmp == NULL)
goto done; goto done;
index = 0;
if (format->sign != '+' && format->sign != ' '
&& format->width == -1
&& format->type != 'n'
&& !format->thousands_separators)
{
/* Fast path */
result = _PyUnicodeWriter_WriteStr(writer, unicode_tmp);
Py_DECREF(unicode_tmp);
return result;
}
/* Is a sign character present in the output? If so, remember it /* Is a sign character present in the output? If so, remember it
and skip it */ and skip it */
index = 0;
if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') { if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
sign_char = '-'; sign_char = '-';
++index; ++index;
@ -1055,24 +1089,19 @@ format_float_internal(PyObject *value,
&locale, format, &maxchar); &locale, format, &maxchar);
/* Allocate the memory. */ /* Allocate the memory. */
result = PyUnicode_New(n_total, maxchar); if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
if (result == NULL)
goto done; goto done;
/* Populate the memory. */ /* Populate the memory. */
err = fill_number(result, 0, &spec, result = fill_number(writer, &spec,
unicode_tmp, index, index + n_digits, unicode_tmp, index, index + n_digits,
NULL, 0, NULL, 0,
format->fill_char == '\0' ? ' ' : format->fill_char, format->fill_char == '\0' ? ' ' : format->fill_char,
&locale, 0); &locale, 0);
if (err)
Py_CLEAR(result);
done: done:
PyMem_Free(buf);
Py_DECREF(unicode_tmp); Py_DECREF(unicode_tmp);
free_locale_info(&locale); free_locale_info(&locale);
assert(!result || _PyUnicode_CheckConsistency(result, 1));
return result; return result;
} }
@ -1080,9 +1109,10 @@ done:
/*********** complex formatting *****************************************/ /*********** complex formatting *****************************************/
/************************************************************************/ /************************************************************************/
static PyObject * static int
format_complex_internal(PyObject *value, format_complex_internal(PyObject *value,
const InternalFormatSpec *format) const InternalFormatSpec *format,
_PyUnicodeWriter *writer)
{ {
double re; double re;
double im; double im;
@ -1106,11 +1136,10 @@ format_complex_internal(PyObject *value,
NumberFieldWidths re_spec; NumberFieldWidths re_spec;
NumberFieldWidths im_spec; NumberFieldWidths im_spec;
int flags = 0; int flags = 0;
PyObject *result = NULL; int result = -1;
Py_UCS4 maxchar = 127; Py_UCS4 maxchar = 127;
int rkind; enum PyUnicode_Kind rkind;
void *rdata; void *rdata;
Py_ssize_t index;
Py_UCS4 re_sign_char = '\0'; Py_UCS4 re_sign_char = '\0';
Py_UCS4 im_sign_char = '\0'; Py_UCS4 im_sign_char = '\0';
int re_float_type; /* Used to see if we have a nan, inf, or regular float. */ int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
@ -1122,7 +1151,6 @@ format_complex_internal(PyObject *value,
Py_ssize_t total; Py_ssize_t total;
PyObject *re_unicode_tmp = NULL; PyObject *re_unicode_tmp = NULL;
PyObject *im_unicode_tmp = NULL; PyObject *im_unicode_tmp = NULL;
int err;
/* Locale settings, either from the actual locale or /* Locale settings, either from the actual locale or
from a hard-code pseudo-locale */ from a hard-code pseudo-locale */
@ -1191,12 +1219,12 @@ format_complex_internal(PyObject *value,
/* Since there is no unicode version of PyOS_double_to_string, /* Since there is no unicode version of PyOS_double_to_string,
just use the 8 bit version and then convert to unicode. */ just use the 8 bit version and then convert to unicode. */
re_unicode_tmp = strtounicode(re_buf, n_re_digits); re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
if (re_unicode_tmp == NULL) if (re_unicode_tmp == NULL)
goto done; goto done;
i_re = 0; i_re = 0;
im_unicode_tmp = strtounicode(im_buf, n_im_digits); im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
if (im_unicode_tmp == NULL) if (im_unicode_tmp == NULL)
goto done; goto done;
i_im = 0; i_im = 0;
@ -1261,47 +1289,49 @@ format_complex_internal(PyObject *value,
if (lpad || rpad) if (lpad || rpad)
maxchar = Py_MAX(maxchar, format->fill_char); maxchar = Py_MAX(maxchar, format->fill_char);
result = PyUnicode_New(total, maxchar); if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
if (result == NULL)
goto done; goto done;
rkind = PyUnicode_KIND(result); rkind = writer->kind;
rdata = PyUnicode_DATA(result); rdata = writer->data;
/* Populate the memory. First, the padding. */ /* Populate the memory. First, the padding. */
index = fill_padding(result, 0, result = fill_padding(writer,
n_re_total + n_im_total + 1 + add_parens * 2, n_re_total + n_im_total + 1 + add_parens * 2,
format->fill_char=='\0' ? ' ' : format->fill_char, format->fill_char=='\0' ? ' ' : format->fill_char,
lpad, rpad); lpad, rpad);
if (result == -1)
goto done;
if (add_parens) if (add_parens) {
PyUnicode_WRITE(rkind, rdata, index++, '('); PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
writer->pos++;
}
if (!skip_re) { if (!skip_re) {
err = fill_number(result, index, &re_spec, result = fill_number(writer, &re_spec,
re_unicode_tmp, i_re, i_re + n_re_digits, re_unicode_tmp, i_re, i_re + n_re_digits,
NULL, 0, NULL, 0,
0, 0,
&locale, 0); &locale, 0);
if (err) { if (result == -1)
Py_CLEAR(result);
goto done; goto done;
}
index += n_re_total;
} }
err = fill_number(result, index, &im_spec, result = fill_number(writer, &im_spec,
im_unicode_tmp, i_im, i_im + n_im_digits, im_unicode_tmp, i_im, i_im + n_im_digits,
NULL, 0, NULL, 0,
0, 0,
&locale, 0); &locale, 0);
if (err) { if (result == -1)
Py_CLEAR(result);
goto done; goto done;
} PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
index += n_im_total; writer->pos++;
PyUnicode_WRITE(rkind, rdata, index++, 'j');
if (add_parens) if (add_parens) {
PyUnicode_WRITE(rkind, rdata, index++, ')'); PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
writer->pos++;
}
writer->pos += rpad;
done: done:
PyMem_Free(re_buf); PyMem_Free(re_buf);
@ -1309,61 +1339,79 @@ done:
Py_XDECREF(re_unicode_tmp); Py_XDECREF(re_unicode_tmp);
Py_XDECREF(im_unicode_tmp); Py_XDECREF(im_unicode_tmp);
free_locale_info(&locale); free_locale_info(&locale);
assert(!result || _PyUnicode_CheckConsistency(result, 1));
return result; return result;
} }
/************************************************************************/ /************************************************************************/
/*********** built in formatters ****************************************/ /*********** built in formatters ****************************************/
/************************************************************************/ /************************************************************************/
PyObject * int
_PyUnicode_FormatAdvanced(PyObject *obj, format_obj(PyObject *obj, _PyUnicodeWriter *writer)
PyObject *format_spec, {
Py_ssize_t start, Py_ssize_t end) PyObject *str;
int err;
str = PyObject_Str(obj);
if (str == NULL)
return -1;
err = _PyUnicodeWriter_WriteStr(writer, str);
Py_DECREF(str);
return err;
}
int
_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
PyObject *obj,
PyObject *format_spec,
Py_ssize_t start, Py_ssize_t end)
{ {
InternalFormatSpec format; InternalFormatSpec format;
PyObject *result;
assert(PyUnicode_Check(obj));
/* check for the special case of zero length format spec, make /* check for the special case of zero length format spec, make
it equivalent to str(obj) */ it equivalent to str(obj) */
if (start == end) if (start == end) {
return PyObject_Str(obj); if (PyUnicode_CheckExact(obj))
return _PyUnicodeWriter_WriteStr(writer, obj);
else
return format_obj(obj, writer);
}
/* parse the format_spec */ /* parse the format_spec */
if (!parse_internal_render_format_spec(format_spec, start, end, if (!parse_internal_render_format_spec(format_spec, start, end,
&format, 's', '<')) &format, 's', '<'))
return NULL; return -1;
/* type conversion? */ /* type conversion? */
switch (format.type) { switch (format.type) {
case 's': case 's':
/* no type conversion needed, already a string. do the formatting */ /* no type conversion needed, already a string. do the formatting */
result = format_string_internal(obj, &format); return format_string_internal(obj, &format, writer);
if (result != NULL)
assert(_PyUnicode_CheckConsistency(result, 1));
break;
default: default:
/* unknown */ /* unknown */
unknown_presentation_type(format.type, obj->ob_type->tp_name); unknown_presentation_type(format.type, obj->ob_type->tp_name);
result = NULL; return -1;
} }
return result;
} }
static PyObject* int
format_int_or_long(PyObject* obj, PyObject* format_spec, _PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
Py_ssize_t start, Py_ssize_t end, PyObject *obj,
IntOrLongToString tostring) PyObject *format_spec,
Py_ssize_t start, Py_ssize_t end)
{ {
PyObject *result = NULL; PyObject *tmp = NULL, *str = NULL;
PyObject *tmp = NULL;
InternalFormatSpec format; InternalFormatSpec format;
int result = -1;
/* check for the special case of zero length format spec, make /* check for the special case of zero length format spec, make
it equivalent to str(obj) */ it equivalent to str(obj) */
if (start == end) { if (start == end) {
result = PyObject_Str(obj); if (PyLong_CheckExact(obj))
goto done; return _PyLong_FormatWriter(writer, obj, 10, 0);
else
return format_obj(obj, writer);
} }
/* parse the format_spec */ /* parse the format_spec */
@ -1382,7 +1430,7 @@ format_int_or_long(PyObject* obj, PyObject* format_spec,
case 'n': case 'n':
/* no type conversion needed, already an int (or long). do /* no type conversion needed, already an int (or long). do
the formatting */ the formatting */
result = format_int_or_long_internal(obj, &format, tostring); result = format_long_internal(obj, &format, writer);
break; break;
case 'e': case 'e':
@ -1396,7 +1444,7 @@ format_int_or_long(PyObject* obj, PyObject* format_spec,
tmp = PyNumber_Float(obj); tmp = PyNumber_Float(obj);
if (tmp == NULL) if (tmp == NULL)
goto done; goto done;
result = format_float_internal(tmp, &format); result = format_float_internal(tmp, &format, writer);
break; break;
default: default:
@ -1407,41 +1455,27 @@ format_int_or_long(PyObject* obj, PyObject* format_spec,
done: done:
Py_XDECREF(tmp); Py_XDECREF(tmp);
Py_XDECREF(str);
return result; return result;
} }
/* Need to define long_format as a function that will convert a long int
to a string. In 3.0, _PyLong_Format has the correct signature. */ _PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
#define long_format _PyLong_Format PyObject *obj,
PyObject *format_spec,
PyObject * Py_ssize_t start, Py_ssize_t end)
_PyLong_FormatAdvanced(PyObject *obj,
PyObject *format_spec,
Py_ssize_t start, Py_ssize_t end)
{ {
return format_int_or_long(obj, format_spec, start, end,
long_format);
}
PyObject *
_PyFloat_FormatAdvanced(PyObject *obj,
PyObject *format_spec,
Py_ssize_t start, Py_ssize_t end)
{
PyObject *result = NULL;
InternalFormatSpec format; InternalFormatSpec format;
/* check for the special case of zero length format spec, make /* check for the special case of zero length format spec, make
it equivalent to str(obj) */ it equivalent to str(obj) */
if (start == end) { if (start == end)
result = PyObject_Str(obj); return format_obj(obj, writer);
goto done;
}
/* parse the format_spec */ /* parse the format_spec */
if (!parse_internal_render_format_spec(format_spec, start, end, if (!parse_internal_render_format_spec(format_spec, start, end,
&format, '\0', '>')) &format, '\0', '>'))
goto done; return -1;
/* type conversion? */ /* type conversion? */
switch (format.type) { switch (format.type) {
@ -1455,38 +1489,32 @@ _PyFloat_FormatAdvanced(PyObject *obj,
case 'n': case 'n':
case '%': case '%':
/* no conversion, already a float. do the formatting */ /* no conversion, already a float. do the formatting */
result = format_float_internal(obj, &format); return format_float_internal(obj, &format, writer);
break;
default: default:
/* unknown */ /* unknown */
unknown_presentation_type(format.type, obj->ob_type->tp_name); unknown_presentation_type(format.type, obj->ob_type->tp_name);
goto done; return -1;
} }
done:
return result;
} }
PyObject * int
_PyComplex_FormatAdvanced(PyObject *obj, _PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
PyObject *format_spec, PyObject *obj,
Py_ssize_t start, Py_ssize_t end) PyObject *format_spec,
Py_ssize_t start, Py_ssize_t end)
{ {
PyObject *result = NULL;
InternalFormatSpec format; InternalFormatSpec format;
/* check for the special case of zero length format spec, make /* check for the special case of zero length format spec, make
it equivalent to str(obj) */ it equivalent to str(obj) */
if (start == end) { if (start == end)
result = PyObject_Str(obj); return format_obj(obj, writer);
goto done;
}
/* parse the format_spec */ /* parse the format_spec */
if (!parse_internal_render_format_spec(format_spec, start, end, if (!parse_internal_render_format_spec(format_spec, start, end,
&format, '\0', '>')) &format, '\0', '>'))
goto done; return -1;
/* type conversion? */ /* type conversion? */
switch (format.type) { switch (format.type) {
@ -1499,15 +1527,11 @@ _PyComplex_FormatAdvanced(PyObject *obj,
case 'G': case 'G':
case 'n': case 'n':
/* no conversion, already a complex. do the formatting */ /* no conversion, already a complex. do the formatting */
result = format_complex_internal(obj, &format); return format_complex_internal(obj, &format, writer);
break;
default: default:
/* unknown */ /* unknown */
unknown_presentation_type(format.type, obj->ob_type->tp_name); unknown_presentation_type(format.type, obj->ob_type->tp_name);
goto done; return -1;
} }
done:
return result;
} }

View File

@ -1167,8 +1167,11 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
case 'U': { /* PyUnicode object */ case 'U': { /* PyUnicode object */
PyObject **p = va_arg(*p_va, PyObject **); PyObject **p = va_arg(*p_va, PyObject **);
if (PyUnicode_Check(arg)) if (PyUnicode_Check(arg)) {
if (PyUnicode_READY(arg) == -1)
RETURN_ERR_OCCURRED;
*p = arg; *p = arg;
}
else else
return converterr("str", arg, msgbuf, bufsize); return converterr("str", arg, msgbuf, bufsize);
break; break;