Merged revisions 63078 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk

When forward porting this, I added _PyUnicode_InsertThousandsGrouping.

........
  r63078 | eric.smith | 2008-05-11 15:52:48 -0400 (Sun, 11 May 2008) | 14 lines

  Addresses issue 2802: 'n' formatting for integers.

  Adds 'n' as a format specifier for integers, to mirror the same
  specifier which is already available for floats.  'n' is the same as
  'd', but inserts the current locale-specific thousands grouping.

  I added this as a stringlib function, but it's only used by str type,
  not unicode.  This is because of an implementation detail in
  unicode.format(), which does its own str->unicode conversion.  But the
  unicode version will be needed in 3.0, and it may be needed by other
  code eventually in 2.6 (maybe decimal?), so I left it as a stringlib
  implementation.  As long as the unicode version isn't instantiated,
  there's no overhead for this.
........
This commit is contained in:
Eric Smith 2008-05-11 21:00:57 +00:00
parent aa5b411b41
commit 5807c415c5
12 changed files with 196 additions and 51 deletions

View File

@ -87,6 +87,17 @@ PyAPI_FUNC(int) PyString_AsStringAndSize(
strings) */
);
/* Using the current locale, insert the thousands grouping
into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */
PyAPI_FUNC(int) _PyString_InsertThousandsGrouping(char *buffer,
Py_ssize_t len,
char *plast,
Py_ssize_t buf_size,
Py_ssize_t *count,
int append_zero_char);
/* Flags used by string formatting */
#define F_LJUST (1<<0)
#define F_SIGN (1<<1)

View File

@ -1409,6 +1409,17 @@ PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
PyObject *sepobj
);
/* Using the current locale, insert the thousands grouping
into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */
PyAPI_FUNC(int) _PyUnicode_InsertThousandsGrouping(Py_UNICODE *buffer,
Py_ssize_t len,
Py_UNICODE *plast,
Py_ssize_t buf_size,
Py_ssize_t *count,
int append_zero_char);
/* === Characters Type APIs =============================================== */
/* These should not be used directly. Use the Py_UNICODE_IS* and

View File

@ -761,7 +761,7 @@ class LongTest(unittest.TestCase):
# ensure that float type specifiers work; format converts
# the int to a float
for format_spec in 'eEfFgGn%':
for format_spec in 'eEfFgG%':
for value in [0, 1, -1, 100, -100, 1234567890, -1234567890]:
self.assertEqual(format(value, format_spec),
format(float(value), format_spec))

View File

@ -313,7 +313,7 @@ class TypesTests(unittest.TestCase):
# ensure that float type specifiers work; format converts
# the int to a float
for format_spec in 'eEfFgGn%':
for format_spec in 'eEfFgG%':
for value in [0, 1, -1, 100, -100, 1234567890, -1234567890]:
self.assertEqual(value.__format__(format_spec),
float(value).__format__(format_spec))
@ -403,7 +403,7 @@ class TypesTests(unittest.TestCase):
# ensure that float type specifiers work; format converts
# the long to a float
for format_spec in 'eEfFgGn%':
for format_spec in 'eEfFgG%':
for value in [0, 1, -1, 100, -100, 1234567890, -1234567890]:
self.assertEqual(value.__format__(format_spec),
float(value).__format__(format_spec))
@ -417,6 +417,17 @@ class TypesTests(unittest.TestCase):
self.assertEqual(locale.format('%g', x, grouping=True), format(x, 'n'))
self.assertEqual(locale.format('%.10g', x, grouping=True), format(x, '.10n'))
@run_with_locale('LC_NUMERIC', 'en_US.UTF8')
def test_int__format__locale(self):
# test locale support for __format__ code 'n' for integers
x = 123456789012345678901234567890
for i in range(0, 30):
self.assertEqual(locale.format('%d', x, grouping=True), format(x, 'n'))
# move to the next integer to test
x = x // 10
def test_float__format__(self):
# these should be rewritten to use both format(x, spec) and
# x.__format__(spec)

View File

@ -548,6 +548,7 @@ BYTESTR_DEPS = \
$(srcdir)/Objects/stringlib/string_format.h \
$(srcdir)/Objects/stringlib/transmogrify.h \
$(srcdir)/Objects/stringlib/unicodedefs.h \
$(srcdir)/Objects/stringlib/localeutil.h
Objects/stringobject.o: $(srcdir)/Objects/stringobject.c $(BYTESTR_DEPS)

View File

@ -453,6 +453,9 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
Py_ssize_t n_digits; /* count of digits need from the computed
string */
Py_ssize_t n_leading_chars;
Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to
allocate, used for 'n'
formatting. */
NumberFieldWidths spec;
long x;
@ -523,6 +526,7 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
break;
default: /* shouldn't be needed, but stops a compiler warning */
case 'd':
case 'n':
base = 10;
leading_chars_to_skip = 0;
break;
@ -555,8 +559,15 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
/* Calculate the widths of the various leading and trailing parts */
calc_number_widths(&spec, sign, n_digits, format);
if (format->type == 'n')
/* Compute how many additional chars we need to allocate
to hold the thousands grouping. */
STRINGLIB_GROUPING(pnumeric_chars, n_digits,
pnumeric_chars+n_digits,
0, &n_grouping_chars, 0);
/* Allocate a new string to hold the result */
result = STRINGLIB_NEW(NULL, spec.n_total);
result = STRINGLIB_NEW(NULL, spec.n_total + n_grouping_chars);
if (!result)
goto done;
p = STRINGLIB_STR(result);
@ -567,13 +578,26 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
pnumeric_chars,
n_digits * sizeof(STRINGLIB_CHAR));
/* if X, convert to uppercase */
/* If type is 'X', convert to uppercase */
if (format->type == 'X') {
Py_ssize_t t;
for (t = 0; t < n_digits; ++t)
p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
}
/* Insert the grouping, if any, after the uppercasing of 'X', so we can
ensure that grouping chars won't be affeted. */
if (n_grouping_chars && format->type == 'n') {
/* We know this can't fail, since we've already
reserved enough space. */
STRINGLIB_CHAR *pstart = p + n_leading_chars;
int r = STRINGLIB_GROUPING(pstart, n_digits,
pstart + n_digits,
spec.n_total+n_grouping_chars-n_leading_chars,
NULL, 0);
assert(r);
}
/* Fill in the non-digit parts */
fill_number(p, &spec, n_digits,
format->fill_char == '\0' ? ' ' : format->fill_char);
@ -841,6 +865,7 @@ format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring)
case 'o':
case 'x':
case 'X':
case 'n':
/* no type conversion needed, already an int (or long). do
the formatting */
result = format_int_or_long_internal(value, &format, tostring);
@ -852,7 +877,6 @@ format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring)
case 'F':
case 'g':
case 'G':
case 'n':
case '%':
/* convert to float */
tmp = PyNumber_Float(value);

View File

@ -0,0 +1,121 @@
/* stringlib: locale related helpers implementation */
#ifndef STRINGLIB_LOCALEUTIL_H
#define STRINGLIB_LOCALEUTIL_H
#include <locale.h>
/**
* _Py_InsertThousandsGrouping:
* @buffer: A pointer to the start of a string.
* @len: The length of the string.
* @plast: A pointer to the end of of the digits in the string. This
* may be before the end of the string (if the string contains
* decimals, for example).
* @buf_size: The maximum size of the buffer pointed to by buffer.
* @count: If non-NULL, points to a variable that will receive the
* number of characters we need to insert (and no formatting
* will actually occur).
* @append_zero_char: If non-zero, put a trailing zero at the end of
* of the resulting string, if and only if we modified the
* string.
*
* Inserts thousand grouping characters (as defined in the current
* locale) into the string between buffer and plast. If count is
* non-NULL, don't do any formatting, just count the number of
* characters to insert. This is used by the caller to appropriately
* resize the buffer, if needed.
*
* Return value: 0 on error, else 1. Note that no error can occur if
* count is non-NULL.
*
* This name won't be used, the includer of this file should define
* it to be the actual function name, based on unicode or string.
**/
int
_Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
Py_ssize_t len,
STRINGLIB_CHAR *plast,
Py_ssize_t buf_size,
Py_ssize_t *count,
int append_zero_char)
{
struct lconv *locale_data = localeconv();
const char *grouping = locale_data->grouping;
const char *thousands_sep = locale_data->thousands_sep;
Py_ssize_t thousands_sep_len = strlen(thousands_sep);
STRINGLIB_CHAR *pend = buffer + len; /* current end of buffer */
STRINGLIB_CHAR *pmax = buffer + buf_size; /* max of buffer */
char current_grouping;
/* Initialize the character count, if we're just counting. */
if (count)
*count = 0;
/* Starting at plast and working right-to-left, keep track of
what grouping needs to be added and insert that. */
current_grouping = *grouping++;
/* If the first character is 0, perform no grouping at all. */
if (current_grouping == 0)
return 1;
while (plast - buffer > current_grouping) {
/* Always leave buffer and pend valid at the end of this
loop, since we might leave with a return statement. */
plast -= current_grouping;
if (count) {
/* We're only counting, not touching the memory. */
*count += thousands_sep_len;
}
else {
/* Do the formatting. */
/* Is there room to insert thousands_sep_len chars? */
if (pmax - pend < thousands_sep_len)
/* No room. */
return 0;
/* Move the rest of the string down. */
memmove(plast + thousands_sep_len,
plast,
(pend - plast) * sizeof(STRINGLIB_CHAR));
/* Copy the thousands_sep chars into the buffer. */
#if STRINGLIB_IS_UNICODE
/* Convert from the char's of the thousands_sep from
the locale into unicode. */
{
Py_ssize_t i;
for (i = 0; i < thousands_sep_len; ++i)
plast[i] = thousands_sep[i];
}
#else
/* No conversion, just memcpy the thousands_sep. */
memcpy(plast, thousands_sep, thousands_sep_len);
#endif
}
/* Adjust end pointer. */
pend += thousands_sep_len;
/* Move to the next grouping character, unless we're
repeating (which is designated by a grouping of 0). */
if (*grouping != 0) {
current_grouping = *grouping++;
if (current_grouping == CHAR_MAX)
/* We're done. */
break;
}
}
if (append_zero_char) {
/* Append a zero character to mark the end of the string,
if there's room. */
if (pend - plast < 1)
/* No room, error. */
return 0;
*pend = 0;
}
return 1;
}
#endif /* STRINGLIB_LOCALEUTIL_H */

View File

@ -23,5 +23,6 @@
#define STRINGLIB_CHECK PyString_Check
#define STRINGLIB_CMP memcmp
#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_GROUPING _PyString_InsertThousandsGrouping
#endif /* !STRINGLIB_STRINGDEFS_H */

View File

@ -21,6 +21,7 @@
#define STRINGLIB_NEW PyUnicode_FromUnicode
#define STRINGLIB_RESIZE PyUnicode_Resize
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping
#if PY_VERSION_HEX < 0x03000000
#define STRINGLIB_TOSTR PyObject_Unicode

View File

@ -570,6 +570,8 @@ PyString_AsStringAndSize(register PyObject *obj,
#include "stringlib/ctype.h"
#include "stringlib/transmogrify.h"
#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
#include "stringlib/localeutil.h"
PyObject *
PyString_Repr(PyObject *obj, int smartquotes)

View File

@ -5234,6 +5234,9 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
#include "stringlib/find.h"
#include "stringlib/partition.h"
#define _Py_InsertThousandsGrouping _PyUnicode_InsertThousandsGrouping
#include "stringlib/localeutil.h"
/* helper macro to fixup start/end slice values */
#define FIX_START_END(obj) \
if (start < 0) \

View File

@ -343,14 +343,9 @@ ensure_decimal_point(char* buffer, size_t buf_size)
Py_LOCAL_INLINE(int)
add_thousands_grouping(char* buffer, size_t buf_size)
{
Py_ssize_t len = strlen(buffer);
struct lconv *locale_data = localeconv();
const char *grouping = locale_data->grouping;
const char *thousands_sep = locale_data->thousands_sep;
size_t thousands_sep_len = strlen(thousands_sep);
const char *decimal_point = locale_data->decimal_point;
char *pend = buffer + strlen(buffer); /* current end of buffer */
char *pmax = buffer + buf_size; /* max of buffer */
char current_grouping;
/* Find the decimal point, if any. We're only concerned
about the characters to the left of the decimal when
@ -364,49 +359,13 @@ add_thousands_grouping(char* buffer, size_t buf_size)
if (!p)
/* No exponent and no decimal. Use the entire
string. */
p = pend;
p = buffer + len;
}
/* At this point, p points just past the right-most character we
want to format. We need to add the grouping string for the
characters between buffer and p. */
/* Starting at p and working right-to-left, keep track of
what grouping needs to be added and insert that. */
current_grouping = *grouping++;
/* If the first character is 0, perform no grouping at all. */
if (current_grouping == 0)
return 1;
while (p - buffer > current_grouping) {
/* Always leave buffer and pend valid at the end of this
loop, since we might leave with a return statement. */
/* Is there room to insert thousands_sep_len chars?. */
if (pmax - pend <= thousands_sep_len)
/* No room. */
return 0;
/* Move the rest of the string down. */
p -= current_grouping;
memmove(p + thousands_sep_len,
p,
pend - p + 1);
/* Adjust end pointer. */
pend += thousands_sep_len;
/* Copy the thousands_sep chars into the buffer. */
memcpy(p, thousands_sep, thousands_sep_len);
/* Move to the next grouping character, unless we're
repeating (which is designated by a grouping of 0). */
if (*grouping != 0) {
current_grouping = *grouping++;
if (current_grouping == CHAR_MAX)
/* We're done. */
return 1;
}
}
return 1;
return _PyString_InsertThousandsGrouping(buffer, len, p,
buf_size, NULL, 1);
}
/* see FORMATBUFLEN in unicodeobject.c */