Issue 2526, float.__format__ 'n' specifier does not support thousands grouping.
Implemented grouping, with tests. Cleaned up PyOS_ascii_formatd by breaking reformatting into smaller functions.
This commit is contained in:
parent
48f6276ddc
commit
0a95063d73
|
@ -1,8 +1,9 @@
|
|||
# Python test set -- part 6, built-in types
|
||||
|
||||
from test.test_support import run_unittest, have_unicode
|
||||
from test.test_support import run_unittest, have_unicode, run_with_locale
|
||||
import unittest
|
||||
import sys
|
||||
import locale
|
||||
|
||||
class TypesTests(unittest.TestCase):
|
||||
|
||||
|
@ -476,6 +477,15 @@ class TypesTests(unittest.TestCase):
|
|||
self.assertEqual(value.__format__(format_spec),
|
||||
float(value).__format__(format_spec))
|
||||
|
||||
@run_with_locale('LC_NUMERIC', 'en_US.UTF8')
|
||||
def test_float__format__locale(self):
|
||||
# test locale support for __format__ code 'n'
|
||||
|
||||
for i in range(-10, 10):
|
||||
x = 1234567890.0 * (10.0 ** i)
|
||||
self.assertEqual(locale.format('%g', x, grouping=True), format(x, 'n'))
|
||||
self.assertEqual(locale.format('%.10g', x, grouping=True), format(x, '.10n'))
|
||||
|
||||
def test_float__format__(self):
|
||||
# these should be rewritten to use both format(x, spec) and
|
||||
# x.__format__(spec)
|
||||
|
|
|
@ -187,6 +187,38 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
|
|||
return val;
|
||||
}
|
||||
|
||||
/* Given a string that may have a decimal point in the current
|
||||
locale, change it back to a dot. Since the string cannot get
|
||||
longer, no need for a maximum buffer size parameter. */
|
||||
Py_LOCAL_INLINE(void)
|
||||
change_decimal_from_locale_to_dot(char* buffer)
|
||||
{
|
||||
struct lconv *locale_data = localeconv();
|
||||
const char *decimal_point = locale_data->decimal_point;
|
||||
|
||||
if (decimal_point[0] != '.' || decimal_point[1] != 0) {
|
||||
size_t decimal_point_len = strlen(decimal_point);
|
||||
|
||||
if (*buffer == '+' || *buffer == '-')
|
||||
buffer++;
|
||||
while (isdigit(Py_CHARMASK(*buffer)))
|
||||
buffer++;
|
||||
if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
|
||||
*buffer = '.';
|
||||
buffer++;
|
||||
if (decimal_point_len > 1) {
|
||||
/* buffer needs to get smaller */
|
||||
size_t rest_len = strlen(buffer +
|
||||
(decimal_point_len - 1));
|
||||
memmove(buffer,
|
||||
buffer + (decimal_point_len - 1),
|
||||
rest_len);
|
||||
buffer[rest_len] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* From the C99 standard, section 7.19.6:
|
||||
The exponent always contains at least two digits, and only as many more digits
|
||||
|
@ -194,6 +226,189 @@ as necessary to represent the exponent.
|
|||
*/
|
||||
#define MIN_EXPONENT_DIGITS 2
|
||||
|
||||
/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
|
||||
in length. */
|
||||
Py_LOCAL_INLINE(void)
|
||||
ensure_minumim_exponent_length(char* buffer, size_t buf_size)
|
||||
{
|
||||
char *p = strpbrk(buffer, "eE");
|
||||
if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
|
||||
char *start = p + 2;
|
||||
int exponent_digit_cnt = 0;
|
||||
int leading_zero_cnt = 0;
|
||||
int in_leading_zeros = 1;
|
||||
int significant_digit_cnt;
|
||||
|
||||
/* Skip over the exponent and the sign. */
|
||||
p += 2;
|
||||
|
||||
/* Find the end of the exponent, keeping track of leading
|
||||
zeros. */
|
||||
while (*p && isdigit(Py_CHARMASK(*p))) {
|
||||
if (in_leading_zeros && *p == '0')
|
||||
++leading_zero_cnt;
|
||||
if (*p != '0')
|
||||
in_leading_zeros = 0;
|
||||
++p;
|
||||
++exponent_digit_cnt;
|
||||
}
|
||||
|
||||
significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
|
||||
if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
|
||||
/* If there are 2 exactly digits, we're done,
|
||||
regardless of what they contain */
|
||||
}
|
||||
else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
|
||||
int extra_zeros_cnt;
|
||||
|
||||
/* There are more than 2 digits in the exponent. See
|
||||
if we can delete some of the leading zeros */
|
||||
if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
|
||||
significant_digit_cnt = MIN_EXPONENT_DIGITS;
|
||||
extra_zeros_cnt = exponent_digit_cnt -
|
||||
significant_digit_cnt;
|
||||
|
||||
/* Delete extra_zeros_cnt worth of characters from the
|
||||
front of the exponent */
|
||||
assert(extra_zeros_cnt >= 0);
|
||||
|
||||
/* Add one to significant_digit_cnt to copy the
|
||||
trailing 0 byte, thus setting the length */
|
||||
memmove(start,
|
||||
start + extra_zeros_cnt,
|
||||
significant_digit_cnt + 1);
|
||||
}
|
||||
else {
|
||||
/* If there are fewer than 2 digits, add zeros
|
||||
until there are 2, if there's enough room */
|
||||
int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
|
||||
if (start + zeros + exponent_digit_cnt + 1
|
||||
< buffer + buf_size) {
|
||||
memmove(start + zeros, start,
|
||||
exponent_digit_cnt + 1);
|
||||
memset(start, '0', zeros);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Ensure that buffer has a decimal point in it. The decimal point
|
||||
will not be in the current locale, it will always be '.' */
|
||||
Py_LOCAL_INLINE(void)
|
||||
ensure_decimal_point(char* buffer, size_t buf_size)
|
||||
{
|
||||
int insert_count = 0;
|
||||
char* chars_to_insert;
|
||||
|
||||
/* search for the first non-digit character */
|
||||
char *p = buffer;
|
||||
while (*p && isdigit(Py_CHARMASK(*p)))
|
||||
++p;
|
||||
|
||||
if (*p == '.') {
|
||||
if (isdigit(Py_CHARMASK(*(p+1)))) {
|
||||
/* Nothing to do, we already have a decimal
|
||||
point and a digit after it */
|
||||
}
|
||||
else {
|
||||
/* We have a decimal point, but no following
|
||||
digit. Insert a zero after the decimal. */
|
||||
++p;
|
||||
chars_to_insert = "0";
|
||||
insert_count = 1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
chars_to_insert = ".0";
|
||||
insert_count = 2;
|
||||
}
|
||||
if (insert_count) {
|
||||
size_t buf_len = strlen(buffer);
|
||||
if (buf_len + insert_count + 1 >= buf_size) {
|
||||
/* If there is not enough room in the buffer
|
||||
for the additional text, just skip it. It's
|
||||
not worth generating an error over. */
|
||||
}
|
||||
else {
|
||||
memmove(p + insert_count, p,
|
||||
buffer + strlen(buffer) - p + 1);
|
||||
memcpy(p, chars_to_insert, insert_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Add the locale specific grouping characters to buffer. Note
|
||||
that any decimal point (if it's present) in buffer is already
|
||||
locale-specific. Return 0 on error, else 1. */
|
||||
Py_LOCAL_INLINE(int)
|
||||
add_thousands_grouping(char* buffer, size_t buf_size)
|
||||
{
|
||||
struct lconv *locale_data = localeconv();
|
||||
const char *grouping = locale_data->grouping;
|
||||
const char *thousands_sep = locale_data->thousands_sep;
|
||||
size_t thousands_sep_len = strlen(thousands_sep);
|
||||
const char *decimal_point = locale_data->decimal_point;
|
||||
char *pend = buffer + strlen(buffer); /* current end of buffer */
|
||||
char *pmax = buffer + buf_size; /* max of buffer */
|
||||
char current_grouping;
|
||||
|
||||
/* Find the decimal point, if any. We're only concerned
|
||||
about the characters to the left of the decimal when
|
||||
adding grouping. */
|
||||
char *p = strstr(buffer, decimal_point);
|
||||
if (!p) {
|
||||
/* No decimal, use the entire string. */
|
||||
|
||||
/* If any exponent, adjust p. */
|
||||
p = strpbrk(buffer, "eE");
|
||||
if (!p)
|
||||
/* No exponent and no decimal. Use the entire
|
||||
string. */
|
||||
p = pend;
|
||||
}
|
||||
/* At this point, p points just past the right-most character we
|
||||
want to format. We need to add the grouping string for the
|
||||
characters between buffer and p. */
|
||||
|
||||
/* Starting at p and working right-to-left, keep track of
|
||||
what grouping needs to be added and insert that. */
|
||||
current_grouping = *grouping++;
|
||||
|
||||
/* If the first character is 0, perform no grouping at all. */
|
||||
if (current_grouping == 0)
|
||||
return 1;
|
||||
|
||||
while (p - buffer > current_grouping) {
|
||||
/* Always leave buffer and pend valid at the end of this
|
||||
loop, since we might leave with a return statement. */
|
||||
|
||||
/* Is there room to insert thousands_sep_len chars?. */
|
||||
if (pmax - pend <= thousands_sep_len)
|
||||
/* No room. */
|
||||
return 0;
|
||||
|
||||
/* Move the rest of the string down. */
|
||||
p -= current_grouping;
|
||||
memmove(p + thousands_sep_len,
|
||||
p,
|
||||
pend - p + 1);
|
||||
/* Adjust end pointer. */
|
||||
pend += thousands_sep_len;
|
||||
/* Copy the thousands_sep chars into the buffer. */
|
||||
memcpy(p, thousands_sep, thousands_sep_len);
|
||||
|
||||
/* Move to the next grouping character, unless we're
|
||||
repeating (which is designated by a grouping of 0). */
|
||||
if (*grouping != 0) {
|
||||
current_grouping = *grouping++;
|
||||
if (current_grouping == CHAR_MAX)
|
||||
/* We're done. */
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* see FORMATBUFLEN in unicodeobject.c */
|
||||
#define FLOAT_FORMATBUFLEN 120
|
||||
|
||||
|
@ -222,7 +437,6 @@ PyOS_ascii_formatd(char *buffer,
|
|||
const char *format,
|
||||
double d)
|
||||
{
|
||||
char *p;
|
||||
char format_char;
|
||||
size_t format_len = strlen(format);
|
||||
|
||||
|
@ -277,144 +491,31 @@ PyOS_ascii_formatd(char *buffer,
|
|||
/* Have PyOS_snprintf do the hard work */
|
||||
PyOS_snprintf(buffer, buf_size, format, d);
|
||||
|
||||
/* Get the current local, and find the decimal point character (or
|
||||
string?). Convert that string back to a dot. Do not do this if
|
||||
using the 'n' (number) format code. */
|
||||
if (format_char != 'n') {
|
||||
struct lconv *locale_data = localeconv();
|
||||
const char *decimal_point = locale_data->decimal_point;
|
||||
size_t decimal_point_len = strlen(decimal_point);
|
||||
size_t rest_len;
|
||||
/* Do various fixups on the return string */
|
||||
|
||||
assert(decimal_point_len != 0);
|
||||
|
||||
if (decimal_point[0] != '.' || decimal_point[1] != 0) {
|
||||
p = buffer;
|
||||
|
||||
if (*p == '+' || *p == '-')
|
||||
p++;
|
||||
|
||||
while (isdigit(Py_CHARMASK(*p)))
|
||||
p++;
|
||||
|
||||
if (strncmp(p, decimal_point,
|
||||
decimal_point_len) == 0) {
|
||||
*p = '.';
|
||||
p++;
|
||||
if (decimal_point_len > 1) {
|
||||
rest_len = strlen(p +
|
||||
(decimal_point_len - 1));
|
||||
memmove(p, p + (decimal_point_len - 1),
|
||||
rest_len);
|
||||
p[rest_len] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Get the current locale, and find the decimal point string.
|
||||
Convert that string back to a dot. Do not do this if using the
|
||||
'n' (number) format code, since we want to keep the localized
|
||||
decimal point in that case. */
|
||||
if (format_char != 'n')
|
||||
change_decimal_from_locale_to_dot(buffer);
|
||||
|
||||
/* If an exponent exists, ensure that the exponent is at least
|
||||
MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
|
||||
for the extra zeros. Also, if there are more than
|
||||
MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
|
||||
back to MIN_EXPONENT_DIGITS */
|
||||
p = strpbrk(buffer, "eE");
|
||||
if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
|
||||
char *start = p + 2;
|
||||
int exponent_digit_cnt = 0;
|
||||
int leading_zero_cnt = 0;
|
||||
int in_leading_zeros = 1;
|
||||
int significant_digit_cnt;
|
||||
|
||||
p += 2;
|
||||
while (*p && isdigit(Py_CHARMASK(*p))) {
|
||||
if (in_leading_zeros && *p == '0')
|
||||
++leading_zero_cnt;
|
||||
if (*p != '0')
|
||||
in_leading_zeros = 0;
|
||||
++p;
|
||||
++exponent_digit_cnt;
|
||||
}
|
||||
|
||||
significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
|
||||
if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
|
||||
/* If there are 2 exactly digits, we're done,
|
||||
regardless of what they contain */
|
||||
}
|
||||
else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
|
||||
int extra_zeros_cnt;
|
||||
|
||||
/* There are more than 2 digits in the exponent. See
|
||||
if we can delete some of the leading zeros */
|
||||
if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
|
||||
significant_digit_cnt = MIN_EXPONENT_DIGITS;
|
||||
extra_zeros_cnt = exponent_digit_cnt -
|
||||
significant_digit_cnt;
|
||||
|
||||
/* Delete extra_zeros_cnt worth of characters from the
|
||||
front of the exponent */
|
||||
assert(extra_zeros_cnt >= 0);
|
||||
|
||||
/* Add one to significant_digit_cnt to copy the
|
||||
trailing 0 byte, thus setting the length */
|
||||
memmove(start,
|
||||
start + extra_zeros_cnt,
|
||||
significant_digit_cnt + 1);
|
||||
}
|
||||
else {
|
||||
/* If there are fewer than 2 digits, add zeros
|
||||
until there are 2, if there's enough room */
|
||||
int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
|
||||
if (start + zeros + exponent_digit_cnt + 1
|
||||
< buffer + buf_size) {
|
||||
memmove(start + zeros, start,
|
||||
exponent_digit_cnt + 1);
|
||||
memset(start, '0', zeros);
|
||||
}
|
||||
}
|
||||
}
|
||||
ensure_minumim_exponent_length(buffer, buf_size);
|
||||
|
||||
/* If format_char is 'Z', make sure we have at least one character
|
||||
after the decimal point (and make sure we have a decimal point). */
|
||||
if (format_char == 'Z') {
|
||||
int insert_count = 0;
|
||||
char* chars_to_insert;
|
||||
if (format_char == 'Z')
|
||||
ensure_decimal_point(buffer, buf_size);
|
||||
|
||||
/* search for the first non-digit character */
|
||||
p = buffer;
|
||||
while (*p && isdigit(Py_CHARMASK(*p)))
|
||||
++p;
|
||||
|
||||
if (*p == '.') {
|
||||
if (isdigit(Py_CHARMASK(*(p+1)))) {
|
||||
/* Nothing to do, we already have a decimal
|
||||
point and a digit after it */
|
||||
}
|
||||
else {
|
||||
/* We have a decimal point, but no following
|
||||
digit. Insert a zero after the decimal. */
|
||||
++p;
|
||||
chars_to_insert = "0";
|
||||
insert_count = 1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
chars_to_insert = ".0";
|
||||
insert_count = 2;
|
||||
}
|
||||
if (insert_count) {
|
||||
size_t buf_len = strlen(buffer);
|
||||
if (buf_len + insert_count + 1 >= buf_size) {
|
||||
/* If there is not enough room in the buffer
|
||||
for the additional text, just skip it. It's
|
||||
not worth generating an error over. */
|
||||
}
|
||||
else {
|
||||
memmove(p + insert_count, p,
|
||||
buffer + strlen(buffer) - p + 1);
|
||||
memcpy(p, chars_to_insert, insert_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
/* If format_char is 'n', add the thousands grouping. */
|
||||
if (format_char == 'n')
|
||||
if (!add_thousands_grouping(buffer, buf_size))
|
||||
return NULL;
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue