bpo-33954: Fix _PyUnicode_InsertThousandsGrouping() (GH-10623)
Fix str.format(), float.__format__() and complex.__format__() methods for non-ASCII decimal point when using the "n" formatter. Changes: * Rewrite _PyUnicode_InsertThousandsGrouping(): it now requires a _PyUnicodeWriter object for the buffer and a Python str object for digits. * Rename FILL() macro to unicode_fill(), convert it to static inline function, add "assert(0 <= start);" and rework its code.
This commit is contained in:
parent
df108dc661
commit
59423e3ddd
|
@ -2135,10 +2135,10 @@ PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
|
||||||
see Objects/stringlib/localeutil.h */
|
see Objects/stringlib/localeutil.h */
|
||||||
#ifndef Py_LIMITED_API
|
#ifndef Py_LIMITED_API
|
||||||
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
|
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
|
||||||
PyObject *unicode,
|
_PyUnicodeWriter *writer,
|
||||||
Py_ssize_t index,
|
|
||||||
Py_ssize_t n_buffer,
|
Py_ssize_t n_buffer,
|
||||||
void *digits,
|
PyObject *digits,
|
||||||
|
Py_ssize_t d_pos,
|
||||||
Py_ssize_t n_digits,
|
Py_ssize_t n_digits,
|
||||||
Py_ssize_t min_width,
|
Py_ssize_t min_width,
|
||||||
const char *grouping,
|
const char *grouping,
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
For :meth:`str.format`, :meth:`float.__format__` and
|
||||||
|
:meth:`complex.__format__` methods for non-ASCII decimal point when using
|
||||||
|
the "n" formatter.
|
|
@ -1,28 +1,24 @@
|
||||||
/* stringlib: locale related helpers implementation */
|
/* _PyUnicode_InsertThousandsGrouping() helper functions */
|
||||||
|
|
||||||
#include <locale.h>
|
|
||||||
|
|
||||||
#if !STRINGLIB_IS_UNICODE
|
|
||||||
# error "localeutil.h is specific to Unicode"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const char *grouping;
|
const char *grouping;
|
||||||
char previous;
|
char previous;
|
||||||
Py_ssize_t i; /* Where we're currently pointing in grouping. */
|
Py_ssize_t i; /* Where we're currently pointing in grouping. */
|
||||||
} STRINGLIB(GroupGenerator);
|
} GroupGenerator;
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
STRINGLIB(GroupGenerator_init)(STRINGLIB(GroupGenerator) *self, const char *grouping)
|
GroupGenerator_init(GroupGenerator *self, const char *grouping)
|
||||||
{
|
{
|
||||||
self->grouping = grouping;
|
self->grouping = grouping;
|
||||||
self->i = 0;
|
self->i = 0;
|
||||||
self->previous = 0;
|
self->previous = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Returns the next grouping, or 0 to signify end. */
|
/* Returns the next grouping, or 0 to signify end. */
|
||||||
static Py_ssize_t
|
static Py_ssize_t
|
||||||
STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self)
|
GroupGenerator_next(GroupGenerator *self)
|
||||||
{
|
{
|
||||||
/* Note that we don't really do much error checking here. If a
|
/* Note that we don't really do much error checking here. If a
|
||||||
grouping string contains just CHAR_MAX, for example, then just
|
grouping string contains just CHAR_MAX, for example, then just
|
||||||
|
@ -43,138 +39,44 @@ STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Fill in some digits, leading zeros, and thousands separator. All
|
/* Fill in some digits, leading zeros, and thousands separator. All
|
||||||
are optional, depending on when we're called. */
|
are optional, depending on when we're called. */
|
||||||
static void
|
static void
|
||||||
STRINGLIB(fill)(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end,
|
InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,
|
||||||
Py_ssize_t n_chars, Py_ssize_t n_zeros, STRINGLIB_CHAR* thousands_sep,
|
PyObject *digits, Py_ssize_t *digits_pos,
|
||||||
Py_ssize_t thousands_sep_len)
|
Py_ssize_t n_chars, Py_ssize_t n_zeros,
|
||||||
|
PyObject *thousands_sep, Py_ssize_t thousands_sep_len,
|
||||||
|
Py_UCS4 *maxchar)
|
||||||
{
|
{
|
||||||
Py_ssize_t i;
|
if (!writer) {
|
||||||
|
/* if maxchar > 127, maxchar is already set */
|
||||||
|
if (*maxchar == 127 && thousands_sep) {
|
||||||
|
Py_UCS4 maxchar2 = PyUnicode_MAX_CHAR_VALUE(thousands_sep);
|
||||||
|
*maxchar = Py_MAX(*maxchar, maxchar2);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (thousands_sep) {
|
if (thousands_sep) {
|
||||||
*buffer_end -= thousands_sep_len;
|
*buffer_pos -= thousands_sep_len;
|
||||||
|
|
||||||
/* Copy the thousands_sep chars into the buffer. */
|
/* Copy the thousands_sep chars into the buffer. */
|
||||||
memcpy(*buffer_end, thousands_sep,
|
_PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
|
||||||
thousands_sep_len * STRINGLIB_SIZEOF_CHAR);
|
thousands_sep, 0,
|
||||||
|
thousands_sep_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
*buffer_end -= n_chars;
|
*buffer_pos -= n_chars;
|
||||||
*digits_end -= n_chars;
|
*digits_pos -= n_chars;
|
||||||
memcpy(*buffer_end, *digits_end, n_chars * sizeof(STRINGLIB_CHAR));
|
_PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
|
||||||
|
digits, *digits_pos,
|
||||||
|
n_chars);
|
||||||
|
|
||||||
*buffer_end -= n_zeros;
|
if (n_zeros) {
|
||||||
for (i = 0; i < n_zeros; i++)
|
*buffer_pos -= n_zeros;
|
||||||
(*buffer_end)[i] = '0';
|
enum PyUnicode_Kind kind = PyUnicode_KIND(writer->buffer);
|
||||||
}
|
void *data = PyUnicode_DATA(writer->buffer);
|
||||||
|
unicode_fill(kind, data, '0', *buffer_pos, n_zeros);
|
||||||
/**
|
|
||||||
* InsertThousandsGrouping:
|
|
||||||
* @buffer: A pointer to the start of a string.
|
|
||||||
* @n_buffer: Number of characters in @buffer.
|
|
||||||
* @digits: A pointer to the digits we're reading from. If count
|
|
||||||
* is non-NULL, this is unused.
|
|
||||||
* @n_digits: The number of digits in the string, in which we want
|
|
||||||
* to put the grouping chars.
|
|
||||||
* @min_width: The minimum width of the digits in the output string.
|
|
||||||
* Output will be zero-padded on the left to fill.
|
|
||||||
* @grouping: see definition in localeconv().
|
|
||||||
* @thousands_sep: see definition in localeconv().
|
|
||||||
*
|
|
||||||
* There are 2 modes: counting and filling. If @buffer is NULL,
|
|
||||||
* we are in counting mode, else filling mode.
|
|
||||||
* If counting, the required buffer size is returned.
|
|
||||||
* If filling, we know the buffer will be large enough, so we don't
|
|
||||||
* need to pass in the buffer size.
|
|
||||||
* Inserts thousand grouping characters (as defined by grouping and
|
|
||||||
* thousands_sep) into the string between buffer and buffer+n_digits.
|
|
||||||
*
|
|
||||||
* Return value: 0 on error, else 1. Note that no error can occur if
|
|
||||||
* count is non-NULL.
|
|
||||||
*
|
|
||||||
* This name won't be used, the includer of this file should define
|
|
||||||
* it to be the actual function name, based on unicode or string.
|
|
||||||
*
|
|
||||||
* As closely as possible, this code mimics the logic in decimal.py's
|
|
||||||
_insert_thousands_sep().
|
|
||||||
**/
|
|
||||||
static Py_ssize_t
|
|
||||||
STRINGLIB(InsertThousandsGrouping)(
|
|
||||||
STRINGLIB_CHAR *buffer,
|
|
||||||
Py_ssize_t n_buffer,
|
|
||||||
STRINGLIB_CHAR *digits,
|
|
||||||
Py_ssize_t n_digits,
|
|
||||||
Py_ssize_t min_width,
|
|
||||||
const char *grouping,
|
|
||||||
STRINGLIB_CHAR *thousands_sep,
|
|
||||||
Py_ssize_t thousands_sep_len)
|
|
||||||
{
|
|
||||||
Py_ssize_t count = 0;
|
|
||||||
Py_ssize_t n_zeros;
|
|
||||||
int loop_broken = 0;
|
|
||||||
int use_separator = 0; /* First time through, don't append the
|
|
||||||
separator. They only go between
|
|
||||||
groups. */
|
|
||||||
STRINGLIB_CHAR *buffer_end = NULL;
|
|
||||||
STRINGLIB_CHAR *digits_end = NULL;
|
|
||||||
Py_ssize_t l;
|
|
||||||
Py_ssize_t n_chars;
|
|
||||||
Py_ssize_t remaining = n_digits; /* Number of chars remaining to
|
|
||||||
be looked at */
|
|
||||||
/* A generator that returns all of the grouping widths, until it
|
|
||||||
returns 0. */
|
|
||||||
STRINGLIB(GroupGenerator) groupgen;
|
|
||||||
STRINGLIB(GroupGenerator_init)(&groupgen, grouping);
|
|
||||||
|
|
||||||
if (buffer) {
|
|
||||||
buffer_end = buffer + n_buffer;
|
|
||||||
digits_end = digits + n_digits;
|
|
||||||
}
|
|
||||||
|
|
||||||
while ((l = STRINGLIB(GroupGenerator_next)(&groupgen)) > 0) {
|
|
||||||
l = Py_MIN(l, Py_MAX(Py_MAX(remaining, min_width), 1));
|
|
||||||
n_zeros = Py_MAX(0, l - remaining);
|
|
||||||
n_chars = Py_MAX(0, Py_MIN(remaining, l));
|
|
||||||
|
|
||||||
/* Use n_zero zero's and n_chars chars */
|
|
||||||
|
|
||||||
/* Count only, don't do anything. */
|
|
||||||
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
|
|
||||||
|
|
||||||
if (buffer) {
|
|
||||||
/* Copy into the output buffer. */
|
|
||||||
STRINGLIB(fill)(&digits_end, &buffer_end, n_chars, n_zeros,
|
|
||||||
use_separator ? thousands_sep : NULL, thousands_sep_len);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Use a separator next time. */
|
|
||||||
use_separator = 1;
|
|
||||||
|
|
||||||
remaining -= n_chars;
|
|
||||||
min_width -= l;
|
|
||||||
|
|
||||||
if (remaining <= 0 && min_width <= 0) {
|
|
||||||
loop_broken = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
min_width -= thousands_sep_len;
|
|
||||||
}
|
|
||||||
if (!loop_broken) {
|
|
||||||
/* We left the loop without using a break statement. */
|
|
||||||
|
|
||||||
l = Py_MAX(Py_MAX(remaining, min_width), 1);
|
|
||||||
n_zeros = Py_MAX(0, l - remaining);
|
|
||||||
n_chars = Py_MAX(0, Py_MIN(remaining, l));
|
|
||||||
|
|
||||||
/* Use n_zero zero's and n_chars chars */
|
|
||||||
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
|
|
||||||
if (buffer) {
|
|
||||||
/* Copy into the output buffer. */
|
|
||||||
STRINGLIB(fill)(&digits_end, &buffer_end, n_chars, n_zeros,
|
|
||||||
use_separator ? thousands_sep : NULL, thousands_sep_len);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
|
@ -220,6 +220,38 @@ static PyObject *unicode_empty = NULL;
|
||||||
return unicode_empty; \
|
return unicode_empty; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value,
|
||||||
|
Py_ssize_t start, Py_ssize_t length)
|
||||||
|
{
|
||||||
|
assert(0 <= start);
|
||||||
|
assert(kind != PyUnicode_WCHAR_KIND);
|
||||||
|
switch (kind) {
|
||||||
|
case PyUnicode_1BYTE_KIND: {
|
||||||
|
Py_UCS1 ch = (unsigned char)value;
|
||||||
|
Py_UCS1 *to = (Py_UCS1 *)data + start;
|
||||||
|
memset(to, ch, length);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PyUnicode_2BYTE_KIND: {
|
||||||
|
Py_UCS2 ch = (Py_UCS2)value;
|
||||||
|
Py_UCS2 *to = (Py_UCS2 *)data + start;
|
||||||
|
const Py_UCS2 *end = to + length;
|
||||||
|
for (; to < end; ++to) *to = ch;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PyUnicode_4BYTE_KIND: {
|
||||||
|
Py_UCS4 ch = value;
|
||||||
|
Py_UCS4 * to = (Py_UCS4 *)data + start;
|
||||||
|
const Py_UCS4 *end = to + length;
|
||||||
|
for (; to < end; ++to) *to = ch;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default: Py_UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Forward declaration */
|
/* Forward declaration */
|
||||||
static inline int
|
static inline int
|
||||||
_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch);
|
_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch);
|
||||||
|
@ -790,7 +822,6 @@ ensure_unicode(PyObject *obj)
|
||||||
#include "stringlib/count.h"
|
#include "stringlib/count.h"
|
||||||
#include "stringlib/find.h"
|
#include "stringlib/find.h"
|
||||||
#include "stringlib/find_max_char.h"
|
#include "stringlib/find_max_char.h"
|
||||||
#include "stringlib/localeutil.h"
|
|
||||||
#include "stringlib/undef.h"
|
#include "stringlib/undef.h"
|
||||||
|
|
||||||
#include "stringlib/ucs1lib.h"
|
#include "stringlib/ucs1lib.h"
|
||||||
|
@ -801,7 +832,6 @@ ensure_unicode(PyObject *obj)
|
||||||
#include "stringlib/find.h"
|
#include "stringlib/find.h"
|
||||||
#include "stringlib/replace.h"
|
#include "stringlib/replace.h"
|
||||||
#include "stringlib/find_max_char.h"
|
#include "stringlib/find_max_char.h"
|
||||||
#include "stringlib/localeutil.h"
|
|
||||||
#include "stringlib/undef.h"
|
#include "stringlib/undef.h"
|
||||||
|
|
||||||
#include "stringlib/ucs2lib.h"
|
#include "stringlib/ucs2lib.h"
|
||||||
|
@ -812,7 +842,6 @@ ensure_unicode(PyObject *obj)
|
||||||
#include "stringlib/find.h"
|
#include "stringlib/find.h"
|
||||||
#include "stringlib/replace.h"
|
#include "stringlib/replace.h"
|
||||||
#include "stringlib/find_max_char.h"
|
#include "stringlib/find_max_char.h"
|
||||||
#include "stringlib/localeutil.h"
|
|
||||||
#include "stringlib/undef.h"
|
#include "stringlib/undef.h"
|
||||||
|
|
||||||
#include "stringlib/ucs4lib.h"
|
#include "stringlib/ucs4lib.h"
|
||||||
|
@ -823,7 +852,6 @@ ensure_unicode(PyObject *obj)
|
||||||
#include "stringlib/find.h"
|
#include "stringlib/find.h"
|
||||||
#include "stringlib/replace.h"
|
#include "stringlib/replace.h"
|
||||||
#include "stringlib/find_max_char.h"
|
#include "stringlib/find_max_char.h"
|
||||||
#include "stringlib/localeutil.h"
|
|
||||||
#include "stringlib/undef.h"
|
#include "stringlib/undef.h"
|
||||||
|
|
||||||
#include "stringlib/unicodedefs.h"
|
#include "stringlib/unicodedefs.h"
|
||||||
|
@ -9323,86 +9351,149 @@ any_find_slice(PyObject* s1, PyObject* s2,
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* _PyUnicode_InsertThousandsGrouping() helper functions */
|
||||||
|
#include "stringlib/localeutil.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* InsertThousandsGrouping:
|
||||||
|
* @writer: Unicode writer.
|
||||||
|
* @n_buffer: Number of characters in @buffer.
|
||||||
|
* @digits: Digits we're reading from. If count is non-NULL, this is unused.
|
||||||
|
* @d_pos: Start of digits string.
|
||||||
|
* @n_digits: The number of digits in the string, in which we want
|
||||||
|
* to put the grouping chars.
|
||||||
|
* @min_width: The minimum width of the digits in the output string.
|
||||||
|
* Output will be zero-padded on the left to fill.
|
||||||
|
* @grouping: see definition in localeconv().
|
||||||
|
* @thousands_sep: see definition in localeconv().
|
||||||
|
*
|
||||||
|
* There are 2 modes: counting and filling. If @writer is NULL,
|
||||||
|
* we are in counting mode, else filling mode.
|
||||||
|
* If counting, the required buffer size is returned.
|
||||||
|
* If filling, we know the buffer will be large enough, so we don't
|
||||||
|
* need to pass in the buffer size.
|
||||||
|
* Inserts thousand grouping characters (as defined by grouping and
|
||||||
|
* thousands_sep) into @writer.
|
||||||
|
*
|
||||||
|
* Return value: -1 on error, number of characters otherwise.
|
||||||
|
**/
|
||||||
Py_ssize_t
|
Py_ssize_t
|
||||||
_PyUnicode_InsertThousandsGrouping(
|
_PyUnicode_InsertThousandsGrouping(
|
||||||
PyObject *unicode, Py_ssize_t index,
|
_PyUnicodeWriter *writer,
|
||||||
Py_ssize_t n_buffer,
|
Py_ssize_t n_buffer,
|
||||||
void *digits, Py_ssize_t n_digits,
|
PyObject *digits,
|
||||||
|
Py_ssize_t d_pos,
|
||||||
|
Py_ssize_t n_digits,
|
||||||
Py_ssize_t min_width,
|
Py_ssize_t min_width,
|
||||||
const char *grouping, PyObject *thousands_sep,
|
const char *grouping,
|
||||||
|
PyObject *thousands_sep,
|
||||||
Py_UCS4 *maxchar)
|
Py_UCS4 *maxchar)
|
||||||
{
|
{
|
||||||
unsigned int kind, thousands_sep_kind;
|
if (writer) {
|
||||||
char *data, *thousands_sep_data;
|
assert(digits != NULL);
|
||||||
Py_ssize_t thousands_sep_len;
|
assert(maxchar == NULL);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
assert(digits == NULL);
|
||||||
|
assert(maxchar != NULL);
|
||||||
|
}
|
||||||
|
assert(0 <= d_pos);
|
||||||
|
assert(0 <= n_digits);
|
||||||
|
assert(0 <= min_width);
|
||||||
|
assert(grouping != NULL);
|
||||||
|
|
||||||
|
if (digits != NULL) {
|
||||||
|
if (PyUnicode_READY(digits) == -1) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (PyUnicode_READY(thousands_sep) == -1) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_ssize_t count = 0;
|
||||||
|
Py_ssize_t n_zeros;
|
||||||
|
int loop_broken = 0;
|
||||||
|
int use_separator = 0; /* First time through, don't append the
|
||||||
|
separator. They only go between
|
||||||
|
groups. */
|
||||||
|
Py_ssize_t buffer_pos;
|
||||||
|
Py_ssize_t digits_pos;
|
||||||
Py_ssize_t len;
|
Py_ssize_t len;
|
||||||
|
Py_ssize_t n_chars;
|
||||||
|
Py_ssize_t remaining = n_digits; /* Number of chars remaining to
|
||||||
|
be looked at */
|
||||||
|
/* A generator that returns all of the grouping widths, until it
|
||||||
|
returns 0. */
|
||||||
|
GroupGenerator groupgen;
|
||||||
|
GroupGenerator_init(&groupgen, grouping);
|
||||||
|
const Py_ssize_t thousands_sep_len = PyUnicode_GET_LENGTH(thousands_sep);
|
||||||
|
|
||||||
if (unicode != NULL) {
|
/* if digits are not grouped, thousands separator
|
||||||
kind = PyUnicode_KIND(unicode);
|
should be an empty string */
|
||||||
data = (char *) PyUnicode_DATA(unicode) + index * kind;
|
assert(!(grouping[0] == CHAR_MAX && thousands_sep_len != 0));
|
||||||
|
|
||||||
|
digits_pos = d_pos + n_digits;
|
||||||
|
if (writer) {
|
||||||
|
buffer_pos = writer->pos + n_buffer;
|
||||||
|
assert(buffer_pos <= PyUnicode_GET_LENGTH(writer->buffer));
|
||||||
|
assert(digits_pos <= PyUnicode_GET_LENGTH(digits));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
kind = PyUnicode_1BYTE_KIND;
|
buffer_pos = n_buffer;
|
||||||
data = NULL;
|
|
||||||
}
|
|
||||||
thousands_sep_kind = PyUnicode_KIND(thousands_sep);
|
|
||||||
thousands_sep_data = PyUnicode_DATA(thousands_sep);
|
|
||||||
thousands_sep_len = PyUnicode_GET_LENGTH(thousands_sep);
|
|
||||||
if (unicode != NULL && thousands_sep_kind != kind) {
|
|
||||||
if (thousands_sep_kind < kind) {
|
|
||||||
thousands_sep_data = _PyUnicode_AsKind(thousands_sep, kind);
|
|
||||||
if (!thousands_sep_data)
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
data = _PyUnicode_AsKind(unicode, thousands_sep_kind);
|
|
||||||
if (!data)
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (kind) {
|
if (!writer) {
|
||||||
case PyUnicode_1BYTE_KIND:
|
|
||||||
if (unicode != NULL && PyUnicode_IS_ASCII(unicode))
|
|
||||||
len = asciilib_InsertThousandsGrouping(
|
|
||||||
(Py_UCS1 *) data, n_buffer, (Py_UCS1 *) digits, n_digits,
|
|
||||||
min_width, grouping,
|
|
||||||
(Py_UCS1 *) thousands_sep_data, thousands_sep_len);
|
|
||||||
else
|
|
||||||
len = ucs1lib_InsertThousandsGrouping(
|
|
||||||
(Py_UCS1*)data, n_buffer, (Py_UCS1*)digits, n_digits,
|
|
||||||
min_width, grouping,
|
|
||||||
(Py_UCS1 *) thousands_sep_data, thousands_sep_len);
|
|
||||||
break;
|
|
||||||
case PyUnicode_2BYTE_KIND:
|
|
||||||
len = ucs2lib_InsertThousandsGrouping(
|
|
||||||
(Py_UCS2 *) data, n_buffer, (Py_UCS2 *) digits, n_digits,
|
|
||||||
min_width, grouping,
|
|
||||||
(Py_UCS2 *) thousands_sep_data, thousands_sep_len);
|
|
||||||
break;
|
|
||||||
case PyUnicode_4BYTE_KIND:
|
|
||||||
len = ucs4lib_InsertThousandsGrouping(
|
|
||||||
(Py_UCS4 *) data, n_buffer, (Py_UCS4 *) digits, n_digits,
|
|
||||||
min_width, grouping,
|
|
||||||
(Py_UCS4 *) thousands_sep_data, thousands_sep_len);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Py_UNREACHABLE();
|
|
||||||
}
|
|
||||||
if (unicode != NULL && thousands_sep_kind != kind) {
|
|
||||||
if (thousands_sep_kind < kind)
|
|
||||||
PyMem_Free(thousands_sep_data);
|
|
||||||
else
|
|
||||||
PyMem_Free(data);
|
|
||||||
}
|
|
||||||
if (unicode == NULL) {
|
|
||||||
*maxchar = 127;
|
*maxchar = 127;
|
||||||
if (len != n_digits) {
|
|
||||||
*maxchar = Py_MAX(*maxchar,
|
|
||||||
PyUnicode_MAX_CHAR_VALUE(thousands_sep));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
while ((len = GroupGenerator_next(&groupgen)) > 0) {
|
||||||
|
len = Py_MIN(len, Py_MAX(Py_MAX(remaining, min_width), 1));
|
||||||
|
n_zeros = Py_MAX(0, len - remaining);
|
||||||
|
n_chars = Py_MAX(0, Py_MIN(remaining, len));
|
||||||
|
|
||||||
|
/* Use n_zero zero's and n_chars chars */
|
||||||
|
|
||||||
|
/* Count only, don't do anything. */
|
||||||
|
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
|
||||||
|
|
||||||
|
/* Copy into the writer. */
|
||||||
|
InsertThousandsGrouping_fill(writer, &buffer_pos,
|
||||||
|
digits, &digits_pos,
|
||||||
|
n_chars, n_zeros,
|
||||||
|
use_separator ? thousands_sep : NULL,
|
||||||
|
thousands_sep_len, maxchar);
|
||||||
|
|
||||||
|
/* Use a separator next time. */
|
||||||
|
use_separator = 1;
|
||||||
|
|
||||||
|
remaining -= n_chars;
|
||||||
|
min_width -= len;
|
||||||
|
|
||||||
|
if (remaining <= 0 && min_width <= 0) {
|
||||||
|
loop_broken = 1;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
return len;
|
min_width -= thousands_sep_len;
|
||||||
|
}
|
||||||
|
if (!loop_broken) {
|
||||||
|
/* We left the loop without using a break statement. */
|
||||||
|
|
||||||
|
len = Py_MAX(Py_MAX(remaining, min_width), 1);
|
||||||
|
n_zeros = Py_MAX(0, len - remaining);
|
||||||
|
n_chars = Py_MAX(0, Py_MIN(remaining, len));
|
||||||
|
|
||||||
|
/* Use n_zero zero's and n_chars chars */
|
||||||
|
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
|
||||||
|
|
||||||
|
/* Copy into the writer. */
|
||||||
|
InsertThousandsGrouping_fill(writer, &buffer_pos,
|
||||||
|
digits, &digits_pos,
|
||||||
|
n_chars, n_zeros,
|
||||||
|
use_separator ? thousands_sep : NULL,
|
||||||
|
thousands_sep_len, maxchar);
|
||||||
|
}
|
||||||
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -10021,30 +10112,6 @@ _PyUnicode_JoinArray(PyObject *separator, PyObject *const *items, Py_ssize_t seq
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define FILL(kind, data, value, start, length) \
|
|
||||||
do { \
|
|
||||||
Py_ssize_t i_ = 0; \
|
|
||||||
assert(kind != PyUnicode_WCHAR_KIND); \
|
|
||||||
switch ((kind)) { \
|
|
||||||
case PyUnicode_1BYTE_KIND: { \
|
|
||||||
unsigned char * to_ = (unsigned char *)((data)) + (start); \
|
|
||||||
memset(to_, (unsigned char)value, (length)); \
|
|
||||||
break; \
|
|
||||||
} \
|
|
||||||
case PyUnicode_2BYTE_KIND: { \
|
|
||||||
Py_UCS2 * to_ = (Py_UCS2 *)((data)) + (start); \
|
|
||||||
for (; i_ < (length); ++i_, ++to_) *to_ = (value); \
|
|
||||||
break; \
|
|
||||||
} \
|
|
||||||
case PyUnicode_4BYTE_KIND: { \
|
|
||||||
Py_UCS4 * to_ = (Py_UCS4 *)((data)) + (start); \
|
|
||||||
for (; i_ < (length); ++i_, ++to_) *to_ = (value); \
|
|
||||||
break; \
|
|
||||||
} \
|
|
||||||
default: Py_UNREACHABLE(); \
|
|
||||||
} \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
void
|
void
|
||||||
_PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
|
_PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
|
||||||
Py_UCS4 fill_char)
|
Py_UCS4 fill_char)
|
||||||
|
@ -10056,7 +10123,7 @@ _PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
|
||||||
assert(fill_char <= PyUnicode_MAX_CHAR_VALUE(unicode));
|
assert(fill_char <= PyUnicode_MAX_CHAR_VALUE(unicode));
|
||||||
assert(start >= 0);
|
assert(start >= 0);
|
||||||
assert(start + length <= PyUnicode_GET_LENGTH(unicode));
|
assert(start + length <= PyUnicode_GET_LENGTH(unicode));
|
||||||
FILL(kind, data, fill_char, start, length);
|
unicode_fill(kind, data, fill_char, start, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_ssize_t
|
Py_ssize_t
|
||||||
|
@ -10127,9 +10194,9 @@ pad(PyObject *self,
|
||||||
kind = PyUnicode_KIND(u);
|
kind = PyUnicode_KIND(u);
|
||||||
data = PyUnicode_DATA(u);
|
data = PyUnicode_DATA(u);
|
||||||
if (left)
|
if (left)
|
||||||
FILL(kind, data, fill, 0, left);
|
unicode_fill(kind, data, fill, 0, left);
|
||||||
if (right)
|
if (right)
|
||||||
FILL(kind, data, fill, left + _PyUnicode_LENGTH(self), right);
|
unicode_fill(kind, data, fill, left + _PyUnicode_LENGTH(self), right);
|
||||||
_PyUnicode_FastCopyCharacters(u, left, self, 0, _PyUnicode_LENGTH(self));
|
_PyUnicode_FastCopyCharacters(u, left, self, 0, _PyUnicode_LENGTH(self));
|
||||||
assert(_PyUnicode_CheckConsistency(u, 1));
|
assert(_PyUnicode_CheckConsistency(u, 1));
|
||||||
return u;
|
return u;
|
||||||
|
@ -11516,7 +11583,7 @@ unicode_expandtabs_impl(PyObject *self, int tabsize)
|
||||||
if (tabsize > 0) {
|
if (tabsize > 0) {
|
||||||
incr = tabsize - (line_pos % tabsize);
|
incr = tabsize - (line_pos % tabsize);
|
||||||
line_pos += incr;
|
line_pos += incr;
|
||||||
FILL(kind, dest_data, ' ', j, incr);
|
unicode_fill(kind, dest_data, ' ', j, incr);
|
||||||
j += incr;
|
j += incr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -14792,7 +14859,7 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx,
|
||||||
/* Pad left with the fill character if needed */
|
/* Pad left with the fill character if needed */
|
||||||
if (arg->width > len && !(arg->flags & F_LJUST)) {
|
if (arg->width > len && !(arg->flags & F_LJUST)) {
|
||||||
sublen = arg->width - len;
|
sublen = arg->width - len;
|
||||||
FILL(writer->kind, writer->data, fill, writer->pos, sublen);
|
unicode_fill(writer->kind, writer->data, fill, writer->pos, sublen);
|
||||||
writer->pos += sublen;
|
writer->pos += sublen;
|
||||||
arg->width = len;
|
arg->width = len;
|
||||||
}
|
}
|
||||||
|
@ -14824,7 +14891,7 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx,
|
||||||
/* Pad right with the fill character if needed */
|
/* Pad right with the fill character if needed */
|
||||||
if (arg->width > len) {
|
if (arg->width > len) {
|
||||||
sublen = arg->width - len;
|
sublen = arg->width - len;
|
||||||
FILL(writer->kind, writer->data, ' ', writer->pos, sublen);
|
unicode_fill(writer->kind, writer->data, ' ', writer->pos, sublen);
|
||||||
writer->pos += sublen;
|
writer->pos += sublen;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -462,7 +462,8 @@ parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
|
||||||
/* not all fields of format are used. for example, precision is
|
/* not all fields of format are used. for example, precision is
|
||||||
unused. should this take discrete params in order to be more clear
|
unused. should this take discrete params in order to be more clear
|
||||||
about what it does? or is passing a single format parameter easier
|
about what it does? or is passing a single format parameter easier
|
||||||
and more efficient enough to justify a little obfuscation? */
|
and more efficient enough to justify a little obfuscation?
|
||||||
|
Return -1 on error. */
|
||||||
static Py_ssize_t
|
static Py_ssize_t
|
||||||
calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
|
calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
|
||||||
Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
|
Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
|
||||||
|
@ -541,9 +542,12 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
|
||||||
Py_UCS4 grouping_maxchar;
|
Py_UCS4 grouping_maxchar;
|
||||||
spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
|
spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
|
||||||
NULL, 0,
|
NULL, 0,
|
||||||
0, NULL,
|
NULL, 0, spec->n_digits,
|
||||||
spec->n_digits, spec->n_min_width,
|
spec->n_min_width,
|
||||||
locale->grouping, locale->thousands_sep, &grouping_maxchar);
|
locale->grouping, locale->thousands_sep, &grouping_maxchar);
|
||||||
|
if (spec->n_grouped_digits == -1) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
*maxchar = Py_MAX(*maxchar, grouping_maxchar);
|
*maxchar = Py_MAX(*maxchar, grouping_maxchar);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -635,26 +639,14 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
|
||||||
/* Only for type 'c' special case, it has no digits. */
|
/* Only for type 'c' special case, it has no digits. */
|
||||||
if (spec->n_digits != 0) {
|
if (spec->n_digits != 0) {
|
||||||
/* Fill the digits with InsertThousandsGrouping. */
|
/* Fill the digits with InsertThousandsGrouping. */
|
||||||
char *pdigits;
|
|
||||||
if (PyUnicode_READY(digits))
|
|
||||||
return -1;
|
|
||||||
pdigits = PyUnicode_DATA(digits);
|
|
||||||
if (PyUnicode_KIND(digits) < kind) {
|
|
||||||
pdigits = _PyUnicode_AsKind(digits, kind);
|
|
||||||
if (pdigits == NULL)
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
r = _PyUnicode_InsertThousandsGrouping(
|
r = _PyUnicode_InsertThousandsGrouping(
|
||||||
writer->buffer, writer->pos,
|
writer, spec->n_grouped_digits,
|
||||||
spec->n_grouped_digits,
|
digits, d_pos, spec->n_digits,
|
||||||
pdigits + kind * d_pos,
|
spec->n_min_width,
|
||||||
spec->n_digits, spec->n_min_width,
|
|
||||||
locale->grouping, locale->thousands_sep, NULL);
|
locale->grouping, locale->thousands_sep, NULL);
|
||||||
if (r == -1)
|
if (r == -1)
|
||||||
return -1;
|
return -1;
|
||||||
assert(r == spec->n_grouped_digits);
|
assert(r == spec->n_grouped_digits);
|
||||||
if (PyUnicode_KIND(digits) < kind)
|
|
||||||
PyMem_Free(pdigits);
|
|
||||||
d_pos += spec->n_digits;
|
d_pos += spec->n_digits;
|
||||||
}
|
}
|
||||||
if (toupper) {
|
if (toupper) {
|
||||||
|
@ -994,6 +986,9 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format,
|
||||||
n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
|
n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
|
||||||
inumeric_chars + n_digits, n_remainder, 0,
|
inumeric_chars + n_digits, n_remainder, 0,
|
||||||
&locale, format, &maxchar);
|
&locale, format, &maxchar);
|
||||||
|
if (n_total == -1) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
/* Allocate the memory. */
|
/* Allocate the memory. */
|
||||||
if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
|
if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
|
||||||
|
@ -1139,6 +1134,9 @@ format_float_internal(PyObject *value,
|
||||||
n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
|
n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
|
||||||
index + n_digits, n_remainder, has_decimal,
|
index + n_digits, n_remainder, has_decimal,
|
||||||
&locale, format, &maxchar);
|
&locale, format, &maxchar);
|
||||||
|
if (n_total == -1) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
/* Allocate the memory. */
|
/* Allocate the memory. */
|
||||||
if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
|
if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
|
||||||
|
@ -1322,6 +1320,9 @@ format_complex_internal(PyObject *value,
|
||||||
i_re, i_re + n_re_digits, n_re_remainder,
|
i_re, i_re + n_re_digits, n_re_remainder,
|
||||||
re_has_decimal, &locale, &tmp_format,
|
re_has_decimal, &locale, &tmp_format,
|
||||||
&maxchar);
|
&maxchar);
|
||||||
|
if (n_re_total == -1) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
/* Same formatting, but always include a sign, unless the real part is
|
/* Same formatting, but always include a sign, unless the real part is
|
||||||
* going to be omitted, in which case we use whatever sign convention was
|
* going to be omitted, in which case we use whatever sign convention was
|
||||||
|
@ -1332,6 +1333,9 @@ format_complex_internal(PyObject *value,
|
||||||
i_im, i_im + n_im_digits, n_im_remainder,
|
i_im, i_im + n_im_digits, n_im_remainder,
|
||||||
im_has_decimal, &locale, &tmp_format,
|
im_has_decimal, &locale, &tmp_format,
|
||||||
&maxchar);
|
&maxchar);
|
||||||
|
if (n_im_total == -1) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
if (skip_re)
|
if (skip_re)
|
||||||
n_re_total = 0;
|
n_re_total = 0;
|
||||||
|
|
Loading…
Reference in New Issue