Issue 2526, float.__format__ 'n' specifier does not support thousands grouping.

Implemented grouping, with tests.
Cleaned up PyOS_ascii_formatd by breaking reformatting into smaller functions.
This commit is contained in:
Eric Smith 2008-04-30 01:09:30 +00:00
parent 48f6276ddc
commit 0a95063d73
2 changed files with 240 additions and 129 deletions

View File

@ -1,8 +1,9 @@
# Python test set -- part 6, built-in types
from test.test_support import run_unittest, have_unicode
from test.test_support import run_unittest, have_unicode, run_with_locale
import unittest
import sys
import locale
class TypesTests(unittest.TestCase):
@ -476,6 +477,15 @@ class TypesTests(unittest.TestCase):
self.assertEqual(value.__format__(format_spec),
float(value).__format__(format_spec))
@run_with_locale('LC_NUMERIC', 'en_US.UTF8')
def test_float__format__locale(self):
# test locale support for __format__ code 'n'
for i in range(-10, 10):
x = 1234567890.0 * (10.0 ** i)
self.assertEqual(locale.format('%g', x, grouping=True), format(x, 'n'))
self.assertEqual(locale.format('%.10g', x, grouping=True), format(x, '.10n'))
def test_float__format__(self):
# these should be rewritten to use both format(x, spec) and
# x.__format__(spec)

View File

@ -187,6 +187,38 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
return val;
}
/* Given a string that may have a decimal point in the current
locale, change it back to a dot. Since the string cannot get
longer, no need for a maximum buffer size parameter. */
Py_LOCAL_INLINE(void)
change_decimal_from_locale_to_dot(char* buffer)
{
struct lconv *locale_data = localeconv();
const char *decimal_point = locale_data->decimal_point;
if (decimal_point[0] != '.' || decimal_point[1] != 0) {
size_t decimal_point_len = strlen(decimal_point);
if (*buffer == '+' || *buffer == '-')
buffer++;
while (isdigit(Py_CHARMASK(*buffer)))
buffer++;
if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
*buffer = '.';
buffer++;
if (decimal_point_len > 1) {
/* buffer needs to get smaller */
size_t rest_len = strlen(buffer +
(decimal_point_len - 1));
memmove(buffer,
buffer + (decimal_point_len - 1),
rest_len);
buffer[rest_len] = 0;
}
}
}
}
/* From the C99 standard, section 7.19.6:
The exponent always contains at least two digits, and only as many more digits
@ -194,6 +226,189 @@ as necessary to represent the exponent.
*/
#define MIN_EXPONENT_DIGITS 2
/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
in length. */
Py_LOCAL_INLINE(void)
ensure_minumim_exponent_length(char* buffer, size_t buf_size)
{
char *p = strpbrk(buffer, "eE");
if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
char *start = p + 2;
int exponent_digit_cnt = 0;
int leading_zero_cnt = 0;
int in_leading_zeros = 1;
int significant_digit_cnt;
/* Skip over the exponent and the sign. */
p += 2;
/* Find the end of the exponent, keeping track of leading
zeros. */
while (*p && isdigit(Py_CHARMASK(*p))) {
if (in_leading_zeros && *p == '0')
++leading_zero_cnt;
if (*p != '0')
in_leading_zeros = 0;
++p;
++exponent_digit_cnt;
}
significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
/* If there are 2 exactly digits, we're done,
regardless of what they contain */
}
else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
int extra_zeros_cnt;
/* There are more than 2 digits in the exponent. See
if we can delete some of the leading zeros */
if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
significant_digit_cnt = MIN_EXPONENT_DIGITS;
extra_zeros_cnt = exponent_digit_cnt -
significant_digit_cnt;
/* Delete extra_zeros_cnt worth of characters from the
front of the exponent */
assert(extra_zeros_cnt >= 0);
/* Add one to significant_digit_cnt to copy the
trailing 0 byte, thus setting the length */
memmove(start,
start + extra_zeros_cnt,
significant_digit_cnt + 1);
}
else {
/* If there are fewer than 2 digits, add zeros
until there are 2, if there's enough room */
int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
if (start + zeros + exponent_digit_cnt + 1
< buffer + buf_size) {
memmove(start + zeros, start,
exponent_digit_cnt + 1);
memset(start, '0', zeros);
}
}
}
}
/* Ensure that buffer has a decimal point in it. The decimal point
will not be in the current locale, it will always be '.' */
Py_LOCAL_INLINE(void)
ensure_decimal_point(char* buffer, size_t buf_size)
{
int insert_count = 0;
char* chars_to_insert;
/* search for the first non-digit character */
char *p = buffer;
while (*p && isdigit(Py_CHARMASK(*p)))
++p;
if (*p == '.') {
if (isdigit(Py_CHARMASK(*(p+1)))) {
/* Nothing to do, we already have a decimal
point and a digit after it */
}
else {
/* We have a decimal point, but no following
digit. Insert a zero after the decimal. */
++p;
chars_to_insert = "0";
insert_count = 1;
}
}
else {
chars_to_insert = ".0";
insert_count = 2;
}
if (insert_count) {
size_t buf_len = strlen(buffer);
if (buf_len + insert_count + 1 >= buf_size) {
/* If there is not enough room in the buffer
for the additional text, just skip it. It's
not worth generating an error over. */
}
else {
memmove(p + insert_count, p,
buffer + strlen(buffer) - p + 1);
memcpy(p, chars_to_insert, insert_count);
}
}
}
/* Add the locale specific grouping characters to buffer. Note
that any decimal point (if it's present) in buffer is already
locale-specific. Return 0 on error, else 1. */
Py_LOCAL_INLINE(int)
add_thousands_grouping(char* buffer, size_t buf_size)
{
struct lconv *locale_data = localeconv();
const char *grouping = locale_data->grouping;
const char *thousands_sep = locale_data->thousands_sep;
size_t thousands_sep_len = strlen(thousands_sep);
const char *decimal_point = locale_data->decimal_point;
char *pend = buffer + strlen(buffer); /* current end of buffer */
char *pmax = buffer + buf_size; /* max of buffer */
char current_grouping;
/* Find the decimal point, if any. We're only concerned
about the characters to the left of the decimal when
adding grouping. */
char *p = strstr(buffer, decimal_point);
if (!p) {
/* No decimal, use the entire string. */
/* If any exponent, adjust p. */
p = strpbrk(buffer, "eE");
if (!p)
/* No exponent and no decimal. Use the entire
string. */
p = pend;
}
/* At this point, p points just past the right-most character we
want to format. We need to add the grouping string for the
characters between buffer and p. */
/* Starting at p and working right-to-left, keep track of
what grouping needs to be added and insert that. */
current_grouping = *grouping++;
/* If the first character is 0, perform no grouping at all. */
if (current_grouping == 0)
return 1;
while (p - buffer > current_grouping) {
/* Always leave buffer and pend valid at the end of this
loop, since we might leave with a return statement. */
/* Is there room to insert thousands_sep_len chars?. */
if (pmax - pend <= thousands_sep_len)
/* No room. */
return 0;
/* Move the rest of the string down. */
p -= current_grouping;
memmove(p + thousands_sep_len,
p,
pend - p + 1);
/* Adjust end pointer. */
pend += thousands_sep_len;
/* Copy the thousands_sep chars into the buffer. */
memcpy(p, thousands_sep, thousands_sep_len);
/* Move to the next grouping character, unless we're
repeating (which is designated by a grouping of 0). */
if (*grouping != 0) {
current_grouping = *grouping++;
if (current_grouping == CHAR_MAX)
/* We're done. */
return 1;
}
}
return 1;
}
/* see FORMATBUFLEN in unicodeobject.c */
#define FLOAT_FORMATBUFLEN 120
@ -222,7 +437,6 @@ PyOS_ascii_formatd(char *buffer,
const char *format,
double d)
{
char *p;
char format_char;
size_t format_len = strlen(format);
@ -277,144 +491,31 @@ PyOS_ascii_formatd(char *buffer,
/* Have PyOS_snprintf do the hard work */
PyOS_snprintf(buffer, buf_size, format, d);
/* Get the current local, and find the decimal point character (or
string?). Convert that string back to a dot. Do not do this if
using the 'n' (number) format code. */
if (format_char != 'n') {
struct lconv *locale_data = localeconv();
const char *decimal_point = locale_data->decimal_point;
size_t decimal_point_len = strlen(decimal_point);
size_t rest_len;
/* Do various fixups on the return string */
assert(decimal_point_len != 0);
if (decimal_point[0] != '.' || decimal_point[1] != 0) {
p = buffer;
if (*p == '+' || *p == '-')
p++;
while (isdigit(Py_CHARMASK(*p)))
p++;
if (strncmp(p, decimal_point,
decimal_point_len) == 0) {
*p = '.';
p++;
if (decimal_point_len > 1) {
rest_len = strlen(p +
(decimal_point_len - 1));
memmove(p, p + (decimal_point_len - 1),
rest_len);
p[rest_len] = 0;
}
}
}
}
/* Get the current locale, and find the decimal point string.
Convert that string back to a dot. Do not do this if using the
'n' (number) format code, since we want to keep the localized
decimal point in that case. */
if (format_char != 'n')
change_decimal_from_locale_to_dot(buffer);
/* If an exponent exists, ensure that the exponent is at least
MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
for the extra zeros. Also, if there are more than
MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
back to MIN_EXPONENT_DIGITS */
p = strpbrk(buffer, "eE");
if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
char *start = p + 2;
int exponent_digit_cnt = 0;
int leading_zero_cnt = 0;
int in_leading_zeros = 1;
int significant_digit_cnt;
p += 2;
while (*p && isdigit(Py_CHARMASK(*p))) {
if (in_leading_zeros && *p == '0')
++leading_zero_cnt;
if (*p != '0')
in_leading_zeros = 0;
++p;
++exponent_digit_cnt;
}
significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
/* If there are 2 exactly digits, we're done,
regardless of what they contain */
}
else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
int extra_zeros_cnt;
/* There are more than 2 digits in the exponent. See
if we can delete some of the leading zeros */
if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
significant_digit_cnt = MIN_EXPONENT_DIGITS;
extra_zeros_cnt = exponent_digit_cnt -
significant_digit_cnt;
/* Delete extra_zeros_cnt worth of characters from the
front of the exponent */
assert(extra_zeros_cnt >= 0);
/* Add one to significant_digit_cnt to copy the
trailing 0 byte, thus setting the length */
memmove(start,
start + extra_zeros_cnt,
significant_digit_cnt + 1);
}
else {
/* If there are fewer than 2 digits, add zeros
until there are 2, if there's enough room */
int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
if (start + zeros + exponent_digit_cnt + 1
< buffer + buf_size) {
memmove(start + zeros, start,
exponent_digit_cnt + 1);
memset(start, '0', zeros);
}
}
}
ensure_minumim_exponent_length(buffer, buf_size);
/* If format_char is 'Z', make sure we have at least one character
after the decimal point (and make sure we have a decimal point). */
if (format_char == 'Z') {
int insert_count = 0;
char* chars_to_insert;
if (format_char == 'Z')
ensure_decimal_point(buffer, buf_size);
/* search for the first non-digit character */
p = buffer;
while (*p && isdigit(Py_CHARMASK(*p)))
++p;
if (*p == '.') {
if (isdigit(Py_CHARMASK(*(p+1)))) {
/* Nothing to do, we already have a decimal
point and a digit after it */
}
else {
/* We have a decimal point, but no following
digit. Insert a zero after the decimal. */
++p;
chars_to_insert = "0";
insert_count = 1;
}
}
else {
chars_to_insert = ".0";
insert_count = 2;
}
if (insert_count) {
size_t buf_len = strlen(buffer);
if (buf_len + insert_count + 1 >= buf_size) {
/* If there is not enough room in the buffer
for the additional text, just skip it. It's
not worth generating an error over. */
}
else {
memmove(p + insert_count, p,
buffer + strlen(buffer) - p + 1);
memcpy(p, chars_to_insert, insert_count);
}
}
}
/* If format_char is 'n', add the thousands grouping. */
if (format_char == 'n')
if (!add_thousands_grouping(buffer, buf_size))
return NULL;
return buffer;
}