Addresses issue 2802: 'n' formatting for integers.
Adds 'n' as a format specifier for integers, to mirror the same specifier which is already available for floats. 'n' is the same as 'd', but inserts the current locale-specific thousands grouping. I added this as a stringlib function, but it's only used by str type, not unicode. This is because of an implementation detail in unicode.format(), which does its own str->unicode conversion. But the unicode version will be needed in 3.0, and it may be needed by other code eventually in 2.6 (maybe decimal?), so I left it as a stringlib implementation. As long as the unicode version isn't instantiated, there's no overhead for this.
This commit is contained in:
parent
30ece44f2e
commit
cf537ff39e
|
@ -177,6 +177,16 @@ PyAPI_FUNC(int) PyString_AsStringAndSize(
|
|||
strings) */
|
||||
);
|
||||
|
||||
/* Using the current locale, insert the thousands grouping
|
||||
into the string pointed to by buffer. For the argument descriptions,
|
||||
see Objects/stringlib/localeutil.h */
|
||||
|
||||
PyAPI_FUNC(int) _PyString_InsertThousandsGrouping(char *buffer,
|
||||
Py_ssize_t len,
|
||||
char *plast,
|
||||
Py_ssize_t buf_size,
|
||||
Py_ssize_t *count,
|
||||
int append_zero_char);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -377,7 +377,7 @@ class TypesTests(unittest.TestCase):
|
|||
|
||||
# ensure that float type specifiers work; format converts
|
||||
# the int to a float
|
||||
for format_spec in 'eEfFgGn%':
|
||||
for format_spec in 'eEfFgG%':
|
||||
for value in [0, 1, -1, 100, -100, 1234567890, -1234567890]:
|
||||
self.assertEqual(value.__format__(format_spec),
|
||||
float(value).__format__(format_spec))
|
||||
|
@ -472,7 +472,7 @@ class TypesTests(unittest.TestCase):
|
|||
|
||||
# ensure that float type specifiers work; format converts
|
||||
# the long to a float
|
||||
for format_spec in 'eEfFgGn%':
|
||||
for format_spec in 'eEfFgG%':
|
||||
for value in [0L, 1L, -1L, 100L, -100L, 1234567890L, -1234567890L]:
|
||||
self.assertEqual(value.__format__(format_spec),
|
||||
float(value).__format__(format_spec))
|
||||
|
@ -486,6 +486,17 @@ class TypesTests(unittest.TestCase):
|
|||
self.assertEqual(locale.format('%g', x, grouping=True), format(x, 'n'))
|
||||
self.assertEqual(locale.format('%.10g', x, grouping=True), format(x, '.10n'))
|
||||
|
||||
@run_with_locale('LC_NUMERIC', 'en_US.UTF8')
|
||||
def test_int__format__locale(self):
|
||||
# test locale support for __format__ code 'n' for integers
|
||||
|
||||
x = 123456789012345678901234567890
|
||||
for i in range(0, 30):
|
||||
self.assertEqual(locale.format('%d', x, grouping=True), format(x, 'n'))
|
||||
|
||||
# move to the next integer to test
|
||||
x = x // 10
|
||||
|
||||
def test_float__format__(self):
|
||||
# these should be rewritten to use both format(x, spec) and
|
||||
# x.__format__(spec)
|
||||
|
|
|
@ -549,7 +549,8 @@ STRINGLIB_HEADERS= \
|
|||
$(srcdir)/Objects/stringlib/stringdefs.h \
|
||||
$(srcdir)/Objects/stringlib/string_format.h \
|
||||
$(srcdir)/Objects/stringlib/transmogrify.h \
|
||||
$(srcdir)/Objects/stringlib/unicodedefs.h
|
||||
$(srcdir)/Objects/stringlib/unicodedefs.h \
|
||||
$(srcdir)/Objects/stringlib/localeutil.h
|
||||
|
||||
Objects/unicodeobject.o: $(srcdir)/Objects/unicodeobject.c \
|
||||
$(STRINGLIB_HEADERS)
|
||||
|
|
|
@ -453,6 +453,9 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
|
|||
Py_ssize_t n_digits; /* count of digits need from the computed
|
||||
string */
|
||||
Py_ssize_t n_leading_chars;
|
||||
Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to
|
||||
allocate, used for 'n'
|
||||
formatting. */
|
||||
NumberFieldWidths spec;
|
||||
long x;
|
||||
|
||||
|
@ -523,6 +526,7 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
|
|||
break;
|
||||
default: /* shouldn't be needed, but stops a compiler warning */
|
||||
case 'd':
|
||||
case 'n':
|
||||
base = 10;
|
||||
leading_chars_to_skip = 0;
|
||||
break;
|
||||
|
@ -555,8 +559,15 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
|
|||
/* Calculate the widths of the various leading and trailing parts */
|
||||
calc_number_widths(&spec, sign, n_digits, format);
|
||||
|
||||
if (format->type == 'n')
|
||||
/* Compute how many additional chars we need to allocate
|
||||
to hold the thousands grouping. */
|
||||
STRINGLIB_GROUPING(pnumeric_chars, n_digits,
|
||||
pnumeric_chars+n_digits,
|
||||
0, &n_grouping_chars, 0);
|
||||
|
||||
/* Allocate a new string to hold the result */
|
||||
result = STRINGLIB_NEW(NULL, spec.n_total);
|
||||
result = STRINGLIB_NEW(NULL, spec.n_total + n_grouping_chars);
|
||||
if (!result)
|
||||
goto done;
|
||||
p = STRINGLIB_STR(result);
|
||||
|
@ -567,13 +578,26 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
|
|||
pnumeric_chars,
|
||||
n_digits * sizeof(STRINGLIB_CHAR));
|
||||
|
||||
/* if X, convert to uppercase */
|
||||
/* If type is 'X', convert to uppercase */
|
||||
if (format->type == 'X') {
|
||||
Py_ssize_t t;
|
||||
for (t = 0; t < n_digits; ++t)
|
||||
p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
|
||||
}
|
||||
|
||||
/* Insert the grouping, if any, after the uppercasing of 'X', so we can
|
||||
ensure that grouping chars won't be affeted. */
|
||||
if (n_grouping_chars && format->type == 'n') {
|
||||
/* We know this can't fail, since we've already
|
||||
reserved enough space. */
|
||||
STRINGLIB_CHAR *pstart = p + n_leading_chars;
|
||||
int r = STRINGLIB_GROUPING(pstart, n_digits,
|
||||
pstart + n_digits,
|
||||
spec.n_total+n_grouping_chars-n_leading_chars,
|
||||
NULL, 0);
|
||||
assert(r);
|
||||
}
|
||||
|
||||
/* Fill in the non-digit parts */
|
||||
fill_number(p, &spec, n_digits,
|
||||
format->fill_char == '\0' ? ' ' : format->fill_char);
|
||||
|
@ -841,6 +865,7 @@ format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring)
|
|||
case 'o':
|
||||
case 'x':
|
||||
case 'X':
|
||||
case 'n':
|
||||
/* no type conversion needed, already an int (or long). do
|
||||
the formatting */
|
||||
result = format_int_or_long_internal(value, &format, tostring);
|
||||
|
@ -852,7 +877,6 @@ format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring)
|
|||
case 'F':
|
||||
case 'g':
|
||||
case 'G':
|
||||
case 'n':
|
||||
case '%':
|
||||
/* convert to float */
|
||||
tmp = PyNumber_Float(value);
|
||||
|
|
|
@ -0,0 +1,121 @@
|
|||
/* stringlib: locale related helpers implementation */
|
||||
|
||||
#ifndef STRINGLIB_LOCALEUTIL_H
|
||||
#define STRINGLIB_LOCALEUTIL_H
|
||||
|
||||
#include <locale.h>
|
||||
|
||||
/**
|
||||
* _Py_InsertThousandsGrouping:
|
||||
* @buffer: A pointer to the start of a string.
|
||||
* @len: The length of the string.
|
||||
* @plast: A pointer to the end of of the digits in the string. This
|
||||
* may be before the end of the string (if the string contains
|
||||
* decimals, for example).
|
||||
* @buf_size: The maximum size of the buffer pointed to by buffer.
|
||||
* @count: If non-NULL, points to a variable that will receive the
|
||||
* number of characters we need to insert (and no formatting
|
||||
* will actually occur).
|
||||
* @append_zero_char: If non-zero, put a trailing zero at the end of
|
||||
* of the resulting string, if and only if we modified the
|
||||
* string.
|
||||
*
|
||||
* Inserts thousand grouping characters (as defined in the current
|
||||
* locale) into the string between buffer and plast. If count is
|
||||
* non-NULL, don't do any formatting, just count the number of
|
||||
* characters to insert. This is used by the caller to appropriately
|
||||
* resize the buffer, if needed.
|
||||
*
|
||||
* Return value: 0 on error, else 1. Note that no error can occur if
|
||||
* count is non-NULL.
|
||||
*
|
||||
* This name won't be used, the includer of this file should define
|
||||
* it to be the actual function name, based on unicode or string.
|
||||
**/
|
||||
int
|
||||
_Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
|
||||
Py_ssize_t len,
|
||||
STRINGLIB_CHAR *plast,
|
||||
Py_ssize_t buf_size,
|
||||
Py_ssize_t *count,
|
||||
int append_zero_char)
|
||||
{
|
||||
struct lconv *locale_data = localeconv();
|
||||
const char *grouping = locale_data->grouping;
|
||||
const char *thousands_sep = locale_data->thousands_sep;
|
||||
Py_ssize_t thousands_sep_len = strlen(thousands_sep);
|
||||
STRINGLIB_CHAR *pend = buffer + len; /* current end of buffer */
|
||||
STRINGLIB_CHAR *pmax = buffer + buf_size; /* max of buffer */
|
||||
char current_grouping;
|
||||
|
||||
/* Initialize the character count, if we're just counting. */
|
||||
if (count)
|
||||
*count = 0;
|
||||
|
||||
/* Starting at plast and working right-to-left, keep track of
|
||||
what grouping needs to be added and insert that. */
|
||||
current_grouping = *grouping++;
|
||||
|
||||
/* If the first character is 0, perform no grouping at all. */
|
||||
if (current_grouping == 0)
|
||||
return 1;
|
||||
|
||||
while (plast - buffer > current_grouping) {
|
||||
/* Always leave buffer and pend valid at the end of this
|
||||
loop, since we might leave with a return statement. */
|
||||
|
||||
plast -= current_grouping;
|
||||
if (count) {
|
||||
/* We're only counting, not touching the memory. */
|
||||
*count += thousands_sep_len;
|
||||
}
|
||||
else {
|
||||
/* Do the formatting. */
|
||||
|
||||
/* Is there room to insert thousands_sep_len chars? */
|
||||
if (pmax - pend < thousands_sep_len)
|
||||
/* No room. */
|
||||
return 0;
|
||||
|
||||
/* Move the rest of the string down. */
|
||||
memmove(plast + thousands_sep_len,
|
||||
plast,
|
||||
(pend - plast) * sizeof(STRINGLIB_CHAR));
|
||||
/* Copy the thousands_sep chars into the buffer. */
|
||||
#if STRINGLIB_IS_UNICODE
|
||||
/* Convert from the char's of the thousands_sep from
|
||||
the locale into unicode. */
|
||||
{
|
||||
Py_ssize_t i;
|
||||
for (i = 0; i < thousands_sep_len; ++i)
|
||||
plast[i] = thousands_sep[i];
|
||||
}
|
||||
#else
|
||||
/* No conversion, just memcpy the thousands_sep. */
|
||||
memcpy(plast, thousands_sep, thousands_sep_len);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Adjust end pointer. */
|
||||
pend += thousands_sep_len;
|
||||
|
||||
/* Move to the next grouping character, unless we're
|
||||
repeating (which is designated by a grouping of 0). */
|
||||
if (*grouping != 0) {
|
||||
current_grouping = *grouping++;
|
||||
if (current_grouping == CHAR_MAX)
|
||||
/* We're done. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (append_zero_char) {
|
||||
/* Append a zero character to mark the end of the string,
|
||||
if there's room. */
|
||||
if (pend - plast < 1)
|
||||
/* No room, error. */
|
||||
return 0;
|
||||
*pend = 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
#endif /* STRINGLIB_LOCALEUTIL_H */
|
|
@ -23,5 +23,6 @@
|
|||
#define STRINGLIB_CHECK PyString_Check
|
||||
#define STRINGLIB_CMP memcmp
|
||||
#define STRINGLIB_TOSTR PyObject_Str
|
||||
#define STRINGLIB_GROUPING _PyString_InsertThousandsGrouping
|
||||
|
||||
#endif /* !STRINGLIB_STRINGDEFS_H */
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#define STRINGLIB_NEW PyUnicode_FromUnicode
|
||||
#define STRINGLIB_RESIZE PyUnicode_Resize
|
||||
#define STRINGLIB_CHECK PyUnicode_Check
|
||||
#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping
|
||||
|
||||
#if PY_VERSION_HEX < 0x03000000
|
||||
#define STRINGLIB_TOSTR PyObject_Unicode
|
||||
|
|
|
@ -784,6 +784,10 @@ PyString_AsStringAndSize(register PyObject *obj,
|
|||
#include "stringlib/find.h"
|
||||
#include "stringlib/partition.h"
|
||||
|
||||
#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
|
||||
#include "stringlib/localeutil.h"
|
||||
|
||||
|
||||
|
||||
static int
|
||||
string_print(PyStringObject *op, FILE *fp, int flags)
|
||||
|
|
|
@ -343,14 +343,9 @@ ensure_decimal_point(char* buffer, size_t buf_size)
|
|||
Py_LOCAL_INLINE(int)
|
||||
add_thousands_grouping(char* buffer, size_t buf_size)
|
||||
{
|
||||
Py_ssize_t len = strlen(buffer);
|
||||
struct lconv *locale_data = localeconv();
|
||||
const char *grouping = locale_data->grouping;
|
||||
const char *thousands_sep = locale_data->thousands_sep;
|
||||
size_t thousands_sep_len = strlen(thousands_sep);
|
||||
const char *decimal_point = locale_data->decimal_point;
|
||||
char *pend = buffer + strlen(buffer); /* current end of buffer */
|
||||
char *pmax = buffer + buf_size; /* max of buffer */
|
||||
char current_grouping;
|
||||
|
||||
/* Find the decimal point, if any. We're only concerned
|
||||
about the characters to the left of the decimal when
|
||||
|
@ -364,49 +359,13 @@ add_thousands_grouping(char* buffer, size_t buf_size)
|
|||
if (!p)
|
||||
/* No exponent and no decimal. Use the entire
|
||||
string. */
|
||||
p = pend;
|
||||
p = buffer + len;
|
||||
}
|
||||
/* At this point, p points just past the right-most character we
|
||||
want to format. We need to add the grouping string for the
|
||||
characters between buffer and p. */
|
||||
|
||||
/* Starting at p and working right-to-left, keep track of
|
||||
what grouping needs to be added and insert that. */
|
||||
current_grouping = *grouping++;
|
||||
|
||||
/* If the first character is 0, perform no grouping at all. */
|
||||
if (current_grouping == 0)
|
||||
return 1;
|
||||
|
||||
while (p - buffer > current_grouping) {
|
||||
/* Always leave buffer and pend valid at the end of this
|
||||
loop, since we might leave with a return statement. */
|
||||
|
||||
/* Is there room to insert thousands_sep_len chars?. */
|
||||
if (pmax - pend <= thousands_sep_len)
|
||||
/* No room. */
|
||||
return 0;
|
||||
|
||||
/* Move the rest of the string down. */
|
||||
p -= current_grouping;
|
||||
memmove(p + thousands_sep_len,
|
||||
p,
|
||||
pend - p + 1);
|
||||
/* Adjust end pointer. */
|
||||
pend += thousands_sep_len;
|
||||
/* Copy the thousands_sep chars into the buffer. */
|
||||
memcpy(p, thousands_sep, thousands_sep_len);
|
||||
|
||||
/* Move to the next grouping character, unless we're
|
||||
repeating (which is designated by a grouping of 0). */
|
||||
if (*grouping != 0) {
|
||||
current_grouping = *grouping++;
|
||||
if (current_grouping == CHAR_MAX)
|
||||
/* We're done. */
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
return _PyString_InsertThousandsGrouping(buffer, len, p,
|
||||
buf_size, NULL, 1);
|
||||
}
|
||||
|
||||
/* see FORMATBUFLEN in unicodeobject.c */
|
||||
|
|
Loading…
Reference in New Issue