bpo-33954: Fix _PyUnicode_InsertThousandsGrouping() (GH-10623)

Fix str.format(), float.__format__() and complex.__format__() methods
for non-ASCII decimal point when using the "n" formatter.

Changes:

* Rewrite _PyUnicode_InsertThousandsGrouping(): it now requires
  a _PyUnicodeWriter object for the buffer and a Python str object
  for digits.
* Rename FILL() macro to unicode_fill(), convert it to static inline function,
  add "assert(0 <= start);" and rework its code.
This commit is contained in:
Victor Stinner 2018-11-26 13:40:01 +01:00 committed by GitHub
parent df108dc661
commit 59423e3ddd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 228 additions and 252 deletions

View File

@ -2135,10 +2135,10 @@ PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
see Objects/stringlib/localeutil.h */ see Objects/stringlib/localeutil.h */
#ifndef Py_LIMITED_API #ifndef Py_LIMITED_API
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping( PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
PyObject *unicode, _PyUnicodeWriter *writer,
Py_ssize_t index,
Py_ssize_t n_buffer, Py_ssize_t n_buffer,
void *digits, PyObject *digits,
Py_ssize_t d_pos,
Py_ssize_t n_digits, Py_ssize_t n_digits,
Py_ssize_t min_width, Py_ssize_t min_width,
const char *grouping, const char *grouping,

View File

@ -0,0 +1,3 @@
For :meth:`str.format`, :meth:`float.__format__` and
:meth:`complex.__format__` methods for non-ASCII decimal point when using
the "n" formatter.

View File

@ -1,28 +1,24 @@
/* stringlib: locale related helpers implementation */ /* _PyUnicode_InsertThousandsGrouping() helper functions */
#include <locale.h>
#if !STRINGLIB_IS_UNICODE
# error "localeutil.h is specific to Unicode"
#endif
typedef struct { typedef struct {
const char *grouping; const char *grouping;
char previous; char previous;
Py_ssize_t i; /* Where we're currently pointing in grouping. */ Py_ssize_t i; /* Where we're currently pointing in grouping. */
} STRINGLIB(GroupGenerator); } GroupGenerator;
static void static void
STRINGLIB(GroupGenerator_init)(STRINGLIB(GroupGenerator) *self, const char *grouping) GroupGenerator_init(GroupGenerator *self, const char *grouping)
{ {
self->grouping = grouping; self->grouping = grouping;
self->i = 0; self->i = 0;
self->previous = 0; self->previous = 0;
} }
/* Returns the next grouping, or 0 to signify end. */ /* Returns the next grouping, or 0 to signify end. */
static Py_ssize_t static Py_ssize_t
STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self) GroupGenerator_next(GroupGenerator *self)
{ {
/* Note that we don't really do much error checking here. If a /* Note that we don't really do much error checking here. If a
grouping string contains just CHAR_MAX, for example, then just grouping string contains just CHAR_MAX, for example, then just
@ -43,138 +39,44 @@ STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self)
} }
} }
/* Fill in some digits, leading zeros, and thousands separator. All /* Fill in some digits, leading zeros, and thousands separator. All
are optional, depending on when we're called. */ are optional, depending on when we're called. */
static void static void
STRINGLIB(fill)(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end, InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,
Py_ssize_t n_chars, Py_ssize_t n_zeros, STRINGLIB_CHAR* thousands_sep, PyObject *digits, Py_ssize_t *digits_pos,
Py_ssize_t thousands_sep_len) Py_ssize_t n_chars, Py_ssize_t n_zeros,
PyObject *thousands_sep, Py_ssize_t thousands_sep_len,
Py_UCS4 *maxchar)
{ {
Py_ssize_t i; if (!writer) {
/* if maxchar > 127, maxchar is already set */
if (*maxchar == 127 && thousands_sep) {
Py_UCS4 maxchar2 = PyUnicode_MAX_CHAR_VALUE(thousands_sep);
*maxchar = Py_MAX(*maxchar, maxchar2);
}
return;
}
if (thousands_sep) { if (thousands_sep) {
*buffer_end -= thousands_sep_len; *buffer_pos -= thousands_sep_len;
/* Copy the thousands_sep chars into the buffer. */ /* Copy the thousands_sep chars into the buffer. */
memcpy(*buffer_end, thousands_sep, _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
thousands_sep_len * STRINGLIB_SIZEOF_CHAR); thousands_sep, 0,
thousands_sep_len);
} }
*buffer_end -= n_chars; *buffer_pos -= n_chars;
*digits_end -= n_chars; *digits_pos -= n_chars;
memcpy(*buffer_end, *digits_end, n_chars * sizeof(STRINGLIB_CHAR)); _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
digits, *digits_pos,
n_chars);
*buffer_end -= n_zeros; if (n_zeros) {
for (i = 0; i < n_zeros; i++) *buffer_pos -= n_zeros;
(*buffer_end)[i] = '0'; enum PyUnicode_Kind kind = PyUnicode_KIND(writer->buffer);
void *data = PyUnicode_DATA(writer->buffer);
unicode_fill(kind, data, '0', *buffer_pos, n_zeros);
}
} }
/**
* InsertThousandsGrouping:
* @buffer: A pointer to the start of a string.
* @n_buffer: Number of characters in @buffer.
* @digits: A pointer to the digits we're reading from. If count
* is non-NULL, this is unused.
* @n_digits: The number of digits in the string, in which we want
* to put the grouping chars.
* @min_width: The minimum width of the digits in the output string.
* Output will be zero-padded on the left to fill.
* @grouping: see definition in localeconv().
* @thousands_sep: see definition in localeconv().
*
* There are 2 modes: counting and filling. If @buffer is NULL,
* we are in counting mode, else filling mode.
* If counting, the required buffer size is returned.
* If filling, we know the buffer will be large enough, so we don't
* need to pass in the buffer size.
* Inserts thousand grouping characters (as defined by grouping and
* thousands_sep) into the string between buffer and buffer+n_digits.
*
* Return value: 0 on error, else 1. Note that no error can occur if
* count is non-NULL.
*
* This name won't be used, the includer of this file should define
* it to be the actual function name, based on unicode or string.
*
* As closely as possible, this code mimics the logic in decimal.py's
_insert_thousands_sep().
**/
static Py_ssize_t
STRINGLIB(InsertThousandsGrouping)(
STRINGLIB_CHAR *buffer,
Py_ssize_t n_buffer,
STRINGLIB_CHAR *digits,
Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping,
STRINGLIB_CHAR *thousands_sep,
Py_ssize_t thousands_sep_len)
{
Py_ssize_t count = 0;
Py_ssize_t n_zeros;
int loop_broken = 0;
int use_separator = 0; /* First time through, don't append the
separator. They only go between
groups. */
STRINGLIB_CHAR *buffer_end = NULL;
STRINGLIB_CHAR *digits_end = NULL;
Py_ssize_t l;
Py_ssize_t n_chars;
Py_ssize_t remaining = n_digits; /* Number of chars remaining to
be looked at */
/* A generator that returns all of the grouping widths, until it
returns 0. */
STRINGLIB(GroupGenerator) groupgen;
STRINGLIB(GroupGenerator_init)(&groupgen, grouping);
if (buffer) {
buffer_end = buffer + n_buffer;
digits_end = digits + n_digits;
}
while ((l = STRINGLIB(GroupGenerator_next)(&groupgen)) > 0) {
l = Py_MIN(l, Py_MAX(Py_MAX(remaining, min_width), 1));
n_zeros = Py_MAX(0, l - remaining);
n_chars = Py_MAX(0, Py_MIN(remaining, l));
/* Use n_zero zero's and n_chars chars */
/* Count only, don't do anything. */
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
if (buffer) {
/* Copy into the output buffer. */
STRINGLIB(fill)(&digits_end, &buffer_end, n_chars, n_zeros,
use_separator ? thousands_sep : NULL, thousands_sep_len);
}
/* Use a separator next time. */
use_separator = 1;
remaining -= n_chars;
min_width -= l;
if (remaining <= 0 && min_width <= 0) {
loop_broken = 1;
break;
}
min_width -= thousands_sep_len;
}
if (!loop_broken) {
/* We left the loop without using a break statement. */
l = Py_MAX(Py_MAX(remaining, min_width), 1);
n_zeros = Py_MAX(0, l - remaining);
n_chars = Py_MAX(0, Py_MIN(remaining, l));
/* Use n_zero zero's and n_chars chars */
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
if (buffer) {
/* Copy into the output buffer. */
STRINGLIB(fill)(&digits_end, &buffer_end, n_chars, n_zeros,
use_separator ? thousands_sep : NULL, thousands_sep_len);
}
}
return count;
}

View File

@ -220,6 +220,38 @@ static PyObject *unicode_empty = NULL;
return unicode_empty; \ return unicode_empty; \
} while (0) } while (0)
static inline void
unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value,
Py_ssize_t start, Py_ssize_t length)
{
assert(0 <= start);
assert(kind != PyUnicode_WCHAR_KIND);
switch (kind) {
case PyUnicode_1BYTE_KIND: {
Py_UCS1 ch = (unsigned char)value;
Py_UCS1 *to = (Py_UCS1 *)data + start;
memset(to, ch, length);
break;
}
case PyUnicode_2BYTE_KIND: {
Py_UCS2 ch = (Py_UCS2)value;
Py_UCS2 *to = (Py_UCS2 *)data + start;
const Py_UCS2 *end = to + length;
for (; to < end; ++to) *to = ch;
break;
}
case PyUnicode_4BYTE_KIND: {
Py_UCS4 ch = value;
Py_UCS4 * to = (Py_UCS4 *)data + start;
const Py_UCS4 *end = to + length;
for (; to < end; ++to) *to = ch;
break;
}
default: Py_UNREACHABLE();
}
}
/* Forward declaration */ /* Forward declaration */
static inline int static inline int
_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch); _PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch);
@ -790,7 +822,6 @@ ensure_unicode(PyObject *obj)
#include "stringlib/count.h" #include "stringlib/count.h"
#include "stringlib/find.h" #include "stringlib/find.h"
#include "stringlib/find_max_char.h" #include "stringlib/find_max_char.h"
#include "stringlib/localeutil.h"
#include "stringlib/undef.h" #include "stringlib/undef.h"
#include "stringlib/ucs1lib.h" #include "stringlib/ucs1lib.h"
@ -801,7 +832,6 @@ ensure_unicode(PyObject *obj)
#include "stringlib/find.h" #include "stringlib/find.h"
#include "stringlib/replace.h" #include "stringlib/replace.h"
#include "stringlib/find_max_char.h" #include "stringlib/find_max_char.h"
#include "stringlib/localeutil.h"
#include "stringlib/undef.h" #include "stringlib/undef.h"
#include "stringlib/ucs2lib.h" #include "stringlib/ucs2lib.h"
@ -812,7 +842,6 @@ ensure_unicode(PyObject *obj)
#include "stringlib/find.h" #include "stringlib/find.h"
#include "stringlib/replace.h" #include "stringlib/replace.h"
#include "stringlib/find_max_char.h" #include "stringlib/find_max_char.h"
#include "stringlib/localeutil.h"
#include "stringlib/undef.h" #include "stringlib/undef.h"
#include "stringlib/ucs4lib.h" #include "stringlib/ucs4lib.h"
@ -823,7 +852,6 @@ ensure_unicode(PyObject *obj)
#include "stringlib/find.h" #include "stringlib/find.h"
#include "stringlib/replace.h" #include "stringlib/replace.h"
#include "stringlib/find_max_char.h" #include "stringlib/find_max_char.h"
#include "stringlib/localeutil.h"
#include "stringlib/undef.h" #include "stringlib/undef.h"
#include "stringlib/unicodedefs.h" #include "stringlib/unicodedefs.h"
@ -9323,86 +9351,149 @@ any_find_slice(PyObject* s1, PyObject* s2,
return result; return result;
} }
/* _PyUnicode_InsertThousandsGrouping() helper functions */
#include "stringlib/localeutil.h"
/**
* InsertThousandsGrouping:
* @writer: Unicode writer.
* @n_buffer: Number of characters in @buffer.
* @digits: Digits we're reading from. If count is non-NULL, this is unused.
* @d_pos: Start of digits string.
* @n_digits: The number of digits in the string, in which we want
* to put the grouping chars.
* @min_width: The minimum width of the digits in the output string.
* Output will be zero-padded on the left to fill.
* @grouping: see definition in localeconv().
* @thousands_sep: see definition in localeconv().
*
* There are 2 modes: counting and filling. If @writer is NULL,
* we are in counting mode, else filling mode.
* If counting, the required buffer size is returned.
* If filling, we know the buffer will be large enough, so we don't
* need to pass in the buffer size.
* Inserts thousand grouping characters (as defined by grouping and
* thousands_sep) into @writer.
*
* Return value: -1 on error, number of characters otherwise.
**/
Py_ssize_t Py_ssize_t
_PyUnicode_InsertThousandsGrouping( _PyUnicode_InsertThousandsGrouping(
PyObject *unicode, Py_ssize_t index, _PyUnicodeWriter *writer,
Py_ssize_t n_buffer, Py_ssize_t n_buffer,
void *digits, Py_ssize_t n_digits, PyObject *digits,
Py_ssize_t d_pos,
Py_ssize_t n_digits,
Py_ssize_t min_width, Py_ssize_t min_width,
const char *grouping, PyObject *thousands_sep, const char *grouping,
PyObject *thousands_sep,
Py_UCS4 *maxchar) Py_UCS4 *maxchar)
{ {
unsigned int kind, thousands_sep_kind; if (writer) {
char *data, *thousands_sep_data; assert(digits != NULL);
Py_ssize_t thousands_sep_len; assert(maxchar == NULL);
Py_ssize_t len;
if (unicode != NULL) {
kind = PyUnicode_KIND(unicode);
data = (char *) PyUnicode_DATA(unicode) + index * kind;
} }
else { else {
kind = PyUnicode_1BYTE_KIND; assert(digits == NULL);
data = NULL; assert(maxchar != NULL);
} }
thousands_sep_kind = PyUnicode_KIND(thousands_sep); assert(0 <= d_pos);
thousands_sep_data = PyUnicode_DATA(thousands_sep); assert(0 <= n_digits);
thousands_sep_len = PyUnicode_GET_LENGTH(thousands_sep); assert(0 <= min_width);
if (unicode != NULL && thousands_sep_kind != kind) { assert(grouping != NULL);
if (thousands_sep_kind < kind) {
thousands_sep_data = _PyUnicode_AsKind(thousands_sep, kind); if (digits != NULL) {
if (!thousands_sep_data) if (PyUnicode_READY(digits) == -1) {
return -1; return -1;
}
else {
data = _PyUnicode_AsKind(unicode, thousands_sep_kind);
if (!data)
return -1;
} }
} }
if (PyUnicode_READY(thousands_sep) == -1) {
return -1;
}
switch (kind) { Py_ssize_t count = 0;
case PyUnicode_1BYTE_KIND: Py_ssize_t n_zeros;
if (unicode != NULL && PyUnicode_IS_ASCII(unicode)) int loop_broken = 0;
len = asciilib_InsertThousandsGrouping( int use_separator = 0; /* First time through, don't append the
(Py_UCS1 *) data, n_buffer, (Py_UCS1 *) digits, n_digits, separator. They only go between
min_width, grouping, groups. */
(Py_UCS1 *) thousands_sep_data, thousands_sep_len); Py_ssize_t buffer_pos;
else Py_ssize_t digits_pos;
len = ucs1lib_InsertThousandsGrouping( Py_ssize_t len;
(Py_UCS1*)data, n_buffer, (Py_UCS1*)digits, n_digits, Py_ssize_t n_chars;
min_width, grouping, Py_ssize_t remaining = n_digits; /* Number of chars remaining to
(Py_UCS1 *) thousands_sep_data, thousands_sep_len); be looked at */
break; /* A generator that returns all of the grouping widths, until it
case PyUnicode_2BYTE_KIND: returns 0. */
len = ucs2lib_InsertThousandsGrouping( GroupGenerator groupgen;
(Py_UCS2 *) data, n_buffer, (Py_UCS2 *) digits, n_digits, GroupGenerator_init(&groupgen, grouping);
min_width, grouping, const Py_ssize_t thousands_sep_len = PyUnicode_GET_LENGTH(thousands_sep);
(Py_UCS2 *) thousands_sep_data, thousands_sep_len);
break; /* if digits are not grouped, thousands separator
case PyUnicode_4BYTE_KIND: should be an empty string */
len = ucs4lib_InsertThousandsGrouping( assert(!(grouping[0] == CHAR_MAX && thousands_sep_len != 0));
(Py_UCS4 *) data, n_buffer, (Py_UCS4 *) digits, n_digits,
min_width, grouping, digits_pos = d_pos + n_digits;
(Py_UCS4 *) thousands_sep_data, thousands_sep_len); if (writer) {
break; buffer_pos = writer->pos + n_buffer;
default: assert(buffer_pos <= PyUnicode_GET_LENGTH(writer->buffer));
Py_UNREACHABLE(); assert(digits_pos <= PyUnicode_GET_LENGTH(digits));
} }
if (unicode != NULL && thousands_sep_kind != kind) { else {
if (thousands_sep_kind < kind) buffer_pos = n_buffer;
PyMem_Free(thousands_sep_data);
else
PyMem_Free(data);
} }
if (unicode == NULL) {
if (!writer) {
*maxchar = 127; *maxchar = 127;
if (len != n_digits) {
*maxchar = Py_MAX(*maxchar,
PyUnicode_MAX_CHAR_VALUE(thousands_sep));
}
} }
return len;
while ((len = GroupGenerator_next(&groupgen)) > 0) {
len = Py_MIN(len, Py_MAX(Py_MAX(remaining, min_width), 1));
n_zeros = Py_MAX(0, len - remaining);
n_chars = Py_MAX(0, Py_MIN(remaining, len));
/* Use n_zero zero's and n_chars chars */
/* Count only, don't do anything. */
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
/* Copy into the writer. */
InsertThousandsGrouping_fill(writer, &buffer_pos,
digits, &digits_pos,
n_chars, n_zeros,
use_separator ? thousands_sep : NULL,
thousands_sep_len, maxchar);
/* Use a separator next time. */
use_separator = 1;
remaining -= n_chars;
min_width -= len;
if (remaining <= 0 && min_width <= 0) {
loop_broken = 1;
break;
}
min_width -= thousands_sep_len;
}
if (!loop_broken) {
/* We left the loop without using a break statement. */
len = Py_MAX(Py_MAX(remaining, min_width), 1);
n_zeros = Py_MAX(0, len - remaining);
n_chars = Py_MAX(0, Py_MIN(remaining, len));
/* Use n_zero zero's and n_chars chars */
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
/* Copy into the writer. */
InsertThousandsGrouping_fill(writer, &buffer_pos,
digits, &digits_pos,
n_chars, n_zeros,
use_separator ? thousands_sep : NULL,
thousands_sep_len, maxchar);
}
return count;
} }
@ -10021,30 +10112,6 @@ _PyUnicode_JoinArray(PyObject *separator, PyObject *const *items, Py_ssize_t seq
return NULL; return NULL;
} }
#define FILL(kind, data, value, start, length) \
do { \
Py_ssize_t i_ = 0; \
assert(kind != PyUnicode_WCHAR_KIND); \
switch ((kind)) { \
case PyUnicode_1BYTE_KIND: { \
unsigned char * to_ = (unsigned char *)((data)) + (start); \
memset(to_, (unsigned char)value, (length)); \
break; \
} \
case PyUnicode_2BYTE_KIND: { \
Py_UCS2 * to_ = (Py_UCS2 *)((data)) + (start); \
for (; i_ < (length); ++i_, ++to_) *to_ = (value); \
break; \
} \
case PyUnicode_4BYTE_KIND: { \
Py_UCS4 * to_ = (Py_UCS4 *)((data)) + (start); \
for (; i_ < (length); ++i_, ++to_) *to_ = (value); \
break; \
} \
default: Py_UNREACHABLE(); \
} \
} while (0)
void void
_PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length, _PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
Py_UCS4 fill_char) Py_UCS4 fill_char)
@ -10056,7 +10123,7 @@ _PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
assert(fill_char <= PyUnicode_MAX_CHAR_VALUE(unicode)); assert(fill_char <= PyUnicode_MAX_CHAR_VALUE(unicode));
assert(start >= 0); assert(start >= 0);
assert(start + length <= PyUnicode_GET_LENGTH(unicode)); assert(start + length <= PyUnicode_GET_LENGTH(unicode));
FILL(kind, data, fill_char, start, length); unicode_fill(kind, data, fill_char, start, length);
} }
Py_ssize_t Py_ssize_t
@ -10127,9 +10194,9 @@ pad(PyObject *self,
kind = PyUnicode_KIND(u); kind = PyUnicode_KIND(u);
data = PyUnicode_DATA(u); data = PyUnicode_DATA(u);
if (left) if (left)
FILL(kind, data, fill, 0, left); unicode_fill(kind, data, fill, 0, left);
if (right) if (right)
FILL(kind, data, fill, left + _PyUnicode_LENGTH(self), right); unicode_fill(kind, data, fill, left + _PyUnicode_LENGTH(self), right);
_PyUnicode_FastCopyCharacters(u, left, self, 0, _PyUnicode_LENGTH(self)); _PyUnicode_FastCopyCharacters(u, left, self, 0, _PyUnicode_LENGTH(self));
assert(_PyUnicode_CheckConsistency(u, 1)); assert(_PyUnicode_CheckConsistency(u, 1));
return u; return u;
@ -11516,7 +11583,7 @@ unicode_expandtabs_impl(PyObject *self, int tabsize)
if (tabsize > 0) { if (tabsize > 0) {
incr = tabsize - (line_pos % tabsize); incr = tabsize - (line_pos % tabsize);
line_pos += incr; line_pos += incr;
FILL(kind, dest_data, ' ', j, incr); unicode_fill(kind, dest_data, ' ', j, incr);
j += incr; j += incr;
} }
} }
@ -14792,7 +14859,7 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx,
/* Pad left with the fill character if needed */ /* Pad left with the fill character if needed */
if (arg->width > len && !(arg->flags & F_LJUST)) { if (arg->width > len && !(arg->flags & F_LJUST)) {
sublen = arg->width - len; sublen = arg->width - len;
FILL(writer->kind, writer->data, fill, writer->pos, sublen); unicode_fill(writer->kind, writer->data, fill, writer->pos, sublen);
writer->pos += sublen; writer->pos += sublen;
arg->width = len; arg->width = len;
} }
@ -14824,7 +14891,7 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx,
/* Pad right with the fill character if needed */ /* Pad right with the fill character if needed */
if (arg->width > len) { if (arg->width > len) {
sublen = arg->width - len; sublen = arg->width - len;
FILL(writer->kind, writer->data, ' ', writer->pos, sublen); unicode_fill(writer->kind, writer->data, ' ', writer->pos, sublen);
writer->pos += sublen; writer->pos += sublen;
} }
return 0; return 0;

View File

@ -462,7 +462,8 @@ parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
/* not all fields of format are used. for example, precision is /* not all fields of format are used. for example, precision is
unused. should this take discrete params in order to be more clear unused. should this take discrete params in order to be more clear
about what it does? or is passing a single format parameter easier about what it does? or is passing a single format parameter easier
and more efficient enough to justify a little obfuscation? */ and more efficient enough to justify a little obfuscation?
Return -1 on error. */
static Py_ssize_t static Py_ssize_t
calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start, Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
@ -541,9 +542,12 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
Py_UCS4 grouping_maxchar; Py_UCS4 grouping_maxchar;
spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping( spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
NULL, 0, NULL, 0,
0, NULL, NULL, 0, spec->n_digits,
spec->n_digits, spec->n_min_width, spec->n_min_width,
locale->grouping, locale->thousands_sep, &grouping_maxchar); locale->grouping, locale->thousands_sep, &grouping_maxchar);
if (spec->n_grouped_digits == -1) {
return -1;
}
*maxchar = Py_MAX(*maxchar, grouping_maxchar); *maxchar = Py_MAX(*maxchar, grouping_maxchar);
} }
@ -635,26 +639,14 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
/* Only for type 'c' special case, it has no digits. */ /* Only for type 'c' special case, it has no digits. */
if (spec->n_digits != 0) { if (spec->n_digits != 0) {
/* Fill the digits with InsertThousandsGrouping. */ /* Fill the digits with InsertThousandsGrouping. */
char *pdigits;
if (PyUnicode_READY(digits))
return -1;
pdigits = PyUnicode_DATA(digits);
if (PyUnicode_KIND(digits) < kind) {
pdigits = _PyUnicode_AsKind(digits, kind);
if (pdigits == NULL)
return -1;
}
r = _PyUnicode_InsertThousandsGrouping( r = _PyUnicode_InsertThousandsGrouping(
writer->buffer, writer->pos, writer, spec->n_grouped_digits,
spec->n_grouped_digits, digits, d_pos, spec->n_digits,
pdigits + kind * d_pos, spec->n_min_width,
spec->n_digits, spec->n_min_width,
locale->grouping, locale->thousands_sep, NULL); locale->grouping, locale->thousands_sep, NULL);
if (r == -1) if (r == -1)
return -1; return -1;
assert(r == spec->n_grouped_digits); assert(r == spec->n_grouped_digits);
if (PyUnicode_KIND(digits) < kind)
PyMem_Free(pdigits);
d_pos += spec->n_digits; d_pos += spec->n_digits;
} }
if (toupper) { if (toupper) {
@ -994,6 +986,9 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format,
n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars, n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
inumeric_chars + n_digits, n_remainder, 0, inumeric_chars + n_digits, n_remainder, 0,
&locale, format, &maxchar); &locale, format, &maxchar);
if (n_total == -1) {
goto done;
}
/* Allocate the memory. */ /* Allocate the memory. */
if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1) if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
@ -1139,6 +1134,9 @@ format_float_internal(PyObject *value,
n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index, n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
index + n_digits, n_remainder, has_decimal, index + n_digits, n_remainder, has_decimal,
&locale, format, &maxchar); &locale, format, &maxchar);
if (n_total == -1) {
goto done;
}
/* Allocate the memory. */ /* Allocate the memory. */
if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1) if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
@ -1322,6 +1320,9 @@ format_complex_internal(PyObject *value,
i_re, i_re + n_re_digits, n_re_remainder, i_re, i_re + n_re_digits, n_re_remainder,
re_has_decimal, &locale, &tmp_format, re_has_decimal, &locale, &tmp_format,
&maxchar); &maxchar);
if (n_re_total == -1) {
goto done;
}
/* Same formatting, but always include a sign, unless the real part is /* Same formatting, but always include a sign, unless the real part is
* going to be omitted, in which case we use whatever sign convention was * going to be omitted, in which case we use whatever sign convention was
@ -1332,6 +1333,9 @@ format_complex_internal(PyObject *value,
i_im, i_im + n_im_digits, n_im_remainder, i_im, i_im + n_im_digits, n_im_remainder,
im_has_decimal, &locale, &tmp_format, im_has_decimal, &locale, &tmp_format,
&maxchar); &maxchar);
if (n_im_total == -1) {
goto done;
}
if (skip_re) if (skip_re)
n_re_total = 0; n_re_total = 0;