From cf537ff39ea1a518e937ee607bce816e8f3f41b6 Mon Sep 17 00:00:00 2001 From: Eric Smith Date: Sun, 11 May 2008 19:52:48 +0000 Subject: [PATCH] Addresses issue 2802: 'n' formatting for integers. Adds 'n' as a format specifier for integers, to mirror the same specifier which is already available for floats. 'n' is the same as 'd', but inserts the current locale-specific thousands grouping. I added this as a stringlib function, but it's only used by str type, not unicode. This is because of an implementation detail in unicode.format(), which does its own str->unicode conversion. But the unicode version will be needed in 3.0, and it may be needed by other code eventually in 2.6 (maybe decimal?), so I left it as a stringlib implementation. As long as the unicode version isn't instantiated, there's no overhead for this. --- Include/stringobject.h | 12 +++- Lib/test/test_types.py | 15 +++- Makefile.pre.in | 3 +- Objects/stringlib/formatter.h | 30 +++++++- Objects/stringlib/localeutil.h | 121 ++++++++++++++++++++++++++++++++ Objects/stringlib/stringdefs.h | 1 + Objects/stringlib/unicodedefs.h | 1 + Objects/stringobject.c | 4 ++ Python/pystrtod.c | 49 ++----------- 9 files changed, 184 insertions(+), 52 deletions(-) create mode 100644 Objects/stringlib/localeutil.h diff --git a/Include/stringobject.h b/Include/stringobject.h index e3f880a066d..405736285c4 100644 --- a/Include/stringobject.h +++ b/Include/stringobject.h @@ -176,7 +176,17 @@ PyAPI_FUNC(int) PyString_AsStringAndSize( (only possible for 0-terminated strings) */ ); - + +/* Using the current locale, insert the thousands grouping + into the string pointed to by buffer. For the argument descriptions, + see Objects/stringlib/localeutil.h */ + +PyAPI_FUNC(int) _PyString_InsertThousandsGrouping(char *buffer, + Py_ssize_t len, + char *plast, + Py_ssize_t buf_size, + Py_ssize_t *count, + int append_zero_char); #ifdef __cplusplus } diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index 4b620c509f4..aca5ff28c86 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -377,7 +377,7 @@ class TypesTests(unittest.TestCase): # ensure that float type specifiers work; format converts # the int to a float - for format_spec in 'eEfFgGn%': + for format_spec in 'eEfFgG%': for value in [0, 1, -1, 100, -100, 1234567890, -1234567890]: self.assertEqual(value.__format__(format_spec), float(value).__format__(format_spec)) @@ -472,7 +472,7 @@ class TypesTests(unittest.TestCase): # ensure that float type specifiers work; format converts # the long to a float - for format_spec in 'eEfFgGn%': + for format_spec in 'eEfFgG%': for value in [0L, 1L, -1L, 100L, -100L, 1234567890L, -1234567890L]: self.assertEqual(value.__format__(format_spec), float(value).__format__(format_spec)) @@ -486,6 +486,17 @@ class TypesTests(unittest.TestCase): self.assertEqual(locale.format('%g', x, grouping=True), format(x, 'n')) self.assertEqual(locale.format('%.10g', x, grouping=True), format(x, '.10n')) + @run_with_locale('LC_NUMERIC', 'en_US.UTF8') + def test_int__format__locale(self): + # test locale support for __format__ code 'n' for integers + + x = 123456789012345678901234567890 + for i in range(0, 30): + self.assertEqual(locale.format('%d', x, grouping=True), format(x, 'n')) + + # move to the next integer to test + x = x // 10 + def test_float__format__(self): # these should be rewritten to use both format(x, spec) and # x.__format__(spec) diff --git a/Makefile.pre.in b/Makefile.pre.in index b3f1cf06c22..daae2baf683 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -549,7 +549,8 @@ STRINGLIB_HEADERS= \ $(srcdir)/Objects/stringlib/stringdefs.h \ $(srcdir)/Objects/stringlib/string_format.h \ $(srcdir)/Objects/stringlib/transmogrify.h \ - $(srcdir)/Objects/stringlib/unicodedefs.h + $(srcdir)/Objects/stringlib/unicodedefs.h \ + $(srcdir)/Objects/stringlib/localeutil.h Objects/unicodeobject.o: $(srcdir)/Objects/unicodeobject.c \ $(STRINGLIB_HEADERS) diff --git a/Objects/stringlib/formatter.h b/Objects/stringlib/formatter.h index 531bc223ff3..22dd292ce9e 100644 --- a/Objects/stringlib/formatter.h +++ b/Objects/stringlib/formatter.h @@ -453,6 +453,9 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, Py_ssize_t n_digits; /* count of digits need from the computed string */ Py_ssize_t n_leading_chars; + Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to + allocate, used for 'n' + formatting. */ NumberFieldWidths spec; long x; @@ -523,6 +526,7 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, break; default: /* shouldn't be needed, but stops a compiler warning */ case 'd': + case 'n': base = 10; leading_chars_to_skip = 0; break; @@ -555,8 +559,15 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, /* Calculate the widths of the various leading and trailing parts */ calc_number_widths(&spec, sign, n_digits, format); + if (format->type == 'n') + /* Compute how many additional chars we need to allocate + to hold the thousands grouping. */ + STRINGLIB_GROUPING(pnumeric_chars, n_digits, + pnumeric_chars+n_digits, + 0, &n_grouping_chars, 0); + /* Allocate a new string to hold the result */ - result = STRINGLIB_NEW(NULL, spec.n_total); + result = STRINGLIB_NEW(NULL, spec.n_total + n_grouping_chars); if (!result) goto done; p = STRINGLIB_STR(result); @@ -567,13 +578,26 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, pnumeric_chars, n_digits * sizeof(STRINGLIB_CHAR)); - /* if X, convert to uppercase */ + /* If type is 'X', convert to uppercase */ if (format->type == 'X') { Py_ssize_t t; for (t = 0; t < n_digits; ++t) p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]); } + /* Insert the grouping, if any, after the uppercasing of 'X', so we can + ensure that grouping chars won't be affeted. */ + if (n_grouping_chars && format->type == 'n') { + /* We know this can't fail, since we've already + reserved enough space. */ + STRINGLIB_CHAR *pstart = p + n_leading_chars; + int r = STRINGLIB_GROUPING(pstart, n_digits, + pstart + n_digits, + spec.n_total+n_grouping_chars-n_leading_chars, + NULL, 0); + assert(r); + } + /* Fill in the non-digit parts */ fill_number(p, &spec, n_digits, format->fill_char == '\0' ? ' ' : format->fill_char); @@ -841,6 +865,7 @@ format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring) case 'o': case 'x': case 'X': + case 'n': /* no type conversion needed, already an int (or long). do the formatting */ result = format_int_or_long_internal(value, &format, tostring); @@ -852,7 +877,6 @@ format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring) case 'F': case 'g': case 'G': - case 'n': case '%': /* convert to float */ tmp = PyNumber_Float(value); diff --git a/Objects/stringlib/localeutil.h b/Objects/stringlib/localeutil.h new file mode 100644 index 00000000000..5cab0bb1811 --- /dev/null +++ b/Objects/stringlib/localeutil.h @@ -0,0 +1,121 @@ +/* stringlib: locale related helpers implementation */ + +#ifndef STRINGLIB_LOCALEUTIL_H +#define STRINGLIB_LOCALEUTIL_H + +#include + +/** + * _Py_InsertThousandsGrouping: + * @buffer: A pointer to the start of a string. + * @len: The length of the string. + * @plast: A pointer to the end of of the digits in the string. This + * may be before the end of the string (if the string contains + * decimals, for example). + * @buf_size: The maximum size of the buffer pointed to by buffer. + * @count: If non-NULL, points to a variable that will receive the + * number of characters we need to insert (and no formatting + * will actually occur). + * @append_zero_char: If non-zero, put a trailing zero at the end of + * of the resulting string, if and only if we modified the + * string. + * + * Inserts thousand grouping characters (as defined in the current + * locale) into the string between buffer and plast. If count is + * non-NULL, don't do any formatting, just count the number of + * characters to insert. This is used by the caller to appropriately + * resize the buffer, if needed. + * + * Return value: 0 on error, else 1. Note that no error can occur if + * count is non-NULL. + * + * This name won't be used, the includer of this file should define + * it to be the actual function name, based on unicode or string. + **/ +int +_Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer, + Py_ssize_t len, + STRINGLIB_CHAR *plast, + Py_ssize_t buf_size, + Py_ssize_t *count, + int append_zero_char) +{ + struct lconv *locale_data = localeconv(); + const char *grouping = locale_data->grouping; + const char *thousands_sep = locale_data->thousands_sep; + Py_ssize_t thousands_sep_len = strlen(thousands_sep); + STRINGLIB_CHAR *pend = buffer + len; /* current end of buffer */ + STRINGLIB_CHAR *pmax = buffer + buf_size; /* max of buffer */ + char current_grouping; + + /* Initialize the character count, if we're just counting. */ + if (count) + *count = 0; + + /* Starting at plast and working right-to-left, keep track of + what grouping needs to be added and insert that. */ + current_grouping = *grouping++; + + /* If the first character is 0, perform no grouping at all. */ + if (current_grouping == 0) + return 1; + + while (plast - buffer > current_grouping) { + /* Always leave buffer and pend valid at the end of this + loop, since we might leave with a return statement. */ + + plast -= current_grouping; + if (count) { + /* We're only counting, not touching the memory. */ + *count += thousands_sep_len; + } + else { + /* Do the formatting. */ + + /* Is there room to insert thousands_sep_len chars? */ + if (pmax - pend < thousands_sep_len) + /* No room. */ + return 0; + + /* Move the rest of the string down. */ + memmove(plast + thousands_sep_len, + plast, + (pend - plast) * sizeof(STRINGLIB_CHAR)); + /* Copy the thousands_sep chars into the buffer. */ +#if STRINGLIB_IS_UNICODE + /* Convert from the char's of the thousands_sep from + the locale into unicode. */ + { + Py_ssize_t i; + for (i = 0; i < thousands_sep_len; ++i) + plast[i] = thousands_sep[i]; + } +#else + /* No conversion, just memcpy the thousands_sep. */ + memcpy(plast, thousands_sep, thousands_sep_len); +#endif + } + + /* Adjust end pointer. */ + pend += thousands_sep_len; + + /* Move to the next grouping character, unless we're + repeating (which is designated by a grouping of 0). */ + if (*grouping != 0) { + current_grouping = *grouping++; + if (current_grouping == CHAR_MAX) + /* We're done. */ + break; + } + } + if (append_zero_char) { + /* Append a zero character to mark the end of the string, + if there's room. */ + if (pend - plast < 1) + /* No room, error. */ + return 0; + *pend = 0; + } + return 1; +} +#endif /* STRINGLIB_LOCALEUTIL_H */ diff --git a/Objects/stringlib/stringdefs.h b/Objects/stringlib/stringdefs.h index 1e0df0f9cdc..daaa2e2b0f7 100644 --- a/Objects/stringlib/stringdefs.h +++ b/Objects/stringlib/stringdefs.h @@ -23,5 +23,6 @@ #define STRINGLIB_CHECK PyString_Check #define STRINGLIB_CMP memcmp #define STRINGLIB_TOSTR PyObject_Str +#define STRINGLIB_GROUPING _PyString_InsertThousandsGrouping #endif /* !STRINGLIB_STRINGDEFS_H */ diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h index f402a987883..8f87fe0f7d1 100644 --- a/Objects/stringlib/unicodedefs.h +++ b/Objects/stringlib/unicodedefs.h @@ -21,6 +21,7 @@ #define STRINGLIB_NEW PyUnicode_FromUnicode #define STRINGLIB_RESIZE PyUnicode_Resize #define STRINGLIB_CHECK PyUnicode_Check +#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping #if PY_VERSION_HEX < 0x03000000 #define STRINGLIB_TOSTR PyObject_Unicode diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 9d518541ec7..b96aaf85b62 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -784,6 +784,10 @@ PyString_AsStringAndSize(register PyObject *obj, #include "stringlib/find.h" #include "stringlib/partition.h" +#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping +#include "stringlib/localeutil.h" + + static int string_print(PyStringObject *op, FILE *fp, int flags) diff --git a/Python/pystrtod.c b/Python/pystrtod.c index 0912cec57ed..3f0328e06b3 100644 --- a/Python/pystrtod.c +++ b/Python/pystrtod.c @@ -343,14 +343,9 @@ ensure_decimal_point(char* buffer, size_t buf_size) Py_LOCAL_INLINE(int) add_thousands_grouping(char* buffer, size_t buf_size) { + Py_ssize_t len = strlen(buffer); struct lconv *locale_data = localeconv(); - const char *grouping = locale_data->grouping; - const char *thousands_sep = locale_data->thousands_sep; - size_t thousands_sep_len = strlen(thousands_sep); const char *decimal_point = locale_data->decimal_point; - char *pend = buffer + strlen(buffer); /* current end of buffer */ - char *pmax = buffer + buf_size; /* max of buffer */ - char current_grouping; /* Find the decimal point, if any. We're only concerned about the characters to the left of the decimal when @@ -364,49 +359,13 @@ add_thousands_grouping(char* buffer, size_t buf_size) if (!p) /* No exponent and no decimal. Use the entire string. */ - p = pend; + p = buffer + len; } /* At this point, p points just past the right-most character we want to format. We need to add the grouping string for the characters between buffer and p. */ - - /* Starting at p and working right-to-left, keep track of - what grouping needs to be added and insert that. */ - current_grouping = *grouping++; - - /* If the first character is 0, perform no grouping at all. */ - if (current_grouping == 0) - return 1; - - while (p - buffer > current_grouping) { - /* Always leave buffer and pend valid at the end of this - loop, since we might leave with a return statement. */ - - /* Is there room to insert thousands_sep_len chars?. */ - if (pmax - pend <= thousands_sep_len) - /* No room. */ - return 0; - - /* Move the rest of the string down. */ - p -= current_grouping; - memmove(p + thousands_sep_len, - p, - pend - p + 1); - /* Adjust end pointer. */ - pend += thousands_sep_len; - /* Copy the thousands_sep chars into the buffer. */ - memcpy(p, thousands_sep, thousands_sep_len); - - /* Move to the next grouping character, unless we're - repeating (which is designated by a grouping of 0). */ - if (*grouping != 0) { - current_grouping = *grouping++; - if (current_grouping == CHAR_MAX) - /* We're done. */ - return 1; - } - } - return 1; + return _PyString_InsertThousandsGrouping(buffer, len, p, + buf_size, NULL, 1); } /* see FORMATBUFLEN in unicodeobject.c */