Issue 27080: PEP 515: add '_' formatting option.

This commit is contained in:
Eric V. Smith 2016-09-09 23:06:47 -04:00
parent 37d398ebd1
commit 89e1b1aae0
4 changed files with 93 additions and 22 deletions

View File

@ -300,7 +300,7 @@ non-empty format string typically modifies the result.
The general form of a *standard format specifier* is: The general form of a *standard format specifier* is:
.. productionlist:: sf .. productionlist:: sf
format_spec: [[`fill`]`align`][`sign`][#][0][`width`][,][.`precision`][`type`] format_spec: [[`fill`]`align`][`sign`][#][0][`width`][,][_][.`precision`][`type`]
fill: <any character> fill: <any character>
align: "<" | ">" | "=" | "^" align: "<" | ">" | "=" | "^"
sign: "+" | "-" | " " sign: "+" | "-" | " "
@ -378,6 +378,16 @@ instead.
.. versionchanged:: 3.1 .. versionchanged:: 3.1
Added the ``','`` option (see also :pep:`378`). Added the ``','`` option (see also :pep:`378`).
The ``'_'`` option signals the use of an underscore for a thousands
separator for floating point presentation types and for integer
presentation type ``'d'``. For integer presentation types ``'b'``,
``'o'``, ``'x'``, and ``'X'``, underscores will be inserted every 4
digits. For other presentation types, specifying this option is an
error.
.. versionchanged:: 3.6
Added the ``'_'`` option (see also :pep:`515`).
*width* is a decimal integer defining the minimum field width. If not *width* is a decimal integer defining the minimum field width. If not
specified, then the field width will be determined by the content. specified, then the field width will be determined by the content.

View File

@ -621,6 +621,8 @@ class LongTest(unittest.TestCase):
def test__format__(self): def test__format__(self):
self.assertEqual(format(123456789, 'd'), '123456789') self.assertEqual(format(123456789, 'd'), '123456789')
self.assertEqual(format(123456789, 'd'), '123456789') self.assertEqual(format(123456789, 'd'), '123456789')
self.assertEqual(format(123456789, ','), '123,456,789')
self.assertEqual(format(123456789, '_'), '123_456_789')
# sign and aligning are interdependent # sign and aligning are interdependent
self.assertEqual(format(1, "-"), '1') self.assertEqual(format(1, "-"), '1')
@ -649,8 +651,25 @@ class LongTest(unittest.TestCase):
self.assertEqual(format(int('be', 16), "X"), "BE") self.assertEqual(format(int('be', 16), "X"), "BE")
self.assertEqual(format(-int('be', 16), "x"), "-be") self.assertEqual(format(-int('be', 16), "x"), "-be")
self.assertEqual(format(-int('be', 16), "X"), "-BE") self.assertEqual(format(-int('be', 16), "X"), "-BE")
self.assertRaises(ValueError, format, 1234567890, ',x')
self.assertEqual(format(1234567890, '_x'), '4996_02d2')
self.assertEqual(format(1234567890, '_X'), '4996_02D2')
# octal # octal
self.assertEqual(format(3, "o"), "3")
self.assertEqual(format(-3, "o"), "-3")
self.assertEqual(format(1234, "o"), "2322")
self.assertEqual(format(-1234, "o"), "-2322")
self.assertEqual(format(1234, "-o"), "2322")
self.assertEqual(format(-1234, "-o"), "-2322")
self.assertEqual(format(1234, " o"), " 2322")
self.assertEqual(format(-1234, " o"), "-2322")
self.assertEqual(format(1234, "+o"), "+2322")
self.assertEqual(format(-1234, "+o"), "-2322")
self.assertRaises(ValueError, format, 1234567890, ',o')
self.assertEqual(format(1234567890, '_o'), '111_4540_1322')
# binary
self.assertEqual(format(3, "b"), "11") self.assertEqual(format(3, "b"), "11")
self.assertEqual(format(-3, "b"), "-11") self.assertEqual(format(-3, "b"), "-11")
self.assertEqual(format(1234, "b"), "10011010010") self.assertEqual(format(1234, "b"), "10011010010")
@ -661,12 +680,21 @@ class LongTest(unittest.TestCase):
self.assertEqual(format(-1234, " b"), "-10011010010") self.assertEqual(format(-1234, " b"), "-10011010010")
self.assertEqual(format(1234, "+b"), "+10011010010") self.assertEqual(format(1234, "+b"), "+10011010010")
self.assertEqual(format(-1234, "+b"), "-10011010010") self.assertEqual(format(-1234, "+b"), "-10011010010")
self.assertRaises(ValueError, format, 1234567890, ',b')
self.assertEqual(format(12345, '_b'), '11_0000_0011_1001')
# make sure these are errors # make sure these are errors
self.assertRaises(ValueError, format, 3, "1.3") # precision disallowed self.assertRaises(ValueError, format, 3, "1.3") # precision disallowed
self.assertRaises(ValueError, format, 3, "_c") # underscore,
self.assertRaises(ValueError, format, 3, ",c") # comma, and
self.assertRaises(ValueError, format, 3, "+c") # sign not allowed self.assertRaises(ValueError, format, 3, "+c") # sign not allowed
# with 'c' # with 'c'
self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, '_,')
self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, ',_')
self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, '_,d')
self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, ',_d')
# ensure that only int and float type specifiers work # ensure that only int and float type specifiers work
for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] + for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] +
[chr(x) for x in range(ord('A'), ord('Z')+1)]): [chr(x) for x in range(ord('A'), ord('Z')+1)]):

View File

@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 1
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #27080: Implement formatting support for PEP 515. Initial patch
by Chris Angelico.
- Issue #27199: In tarfile, expose copyfileobj bufsize to improve throughput. - Issue #27199: In tarfile, expose copyfileobj bufsize to improve throughput.
Patch by Jason Fried. Patch by Jason Fried.

View File

@ -32,14 +32,20 @@ invalid_comma_type(Py_UCS4 presentation_type)
{ {
if (presentation_type > 32 && presentation_type < 128) if (presentation_type > 32 && presentation_type < 128)
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"Cannot specify ',' with '%c'.", "Cannot specify ',' or '_' with '%c'.",
(char)presentation_type); (char)presentation_type);
else else
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"Cannot specify ',' with '\\x%x'.", "Cannot specify ',' or '_' with '\\x%x'.",
(unsigned int)presentation_type); (unsigned int)presentation_type);
} }
static void
invalid_comma_and_underscore()
{
PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
}
/* /*
get_integer consumes 0 or more decimal digit characters from an get_integer consumes 0 or more decimal digit characters from an
input string, updates *result with the corresponding positive input string, updates *result with the corresponding positive
@ -108,6 +114,12 @@ is_sign_element(Py_UCS4 c)
} }
} }
/* Locale type codes. LT_NO_LOCALE must be zero. */
#define LT_NO_LOCALE 0
#define LT_DEFAULT_LOCALE 1
#define LT_UNDERSCORE_LOCALE 2
#define LT_UNDER_FOUR_LOCALE 3
#define LT_CURRENT_LOCALE 4
typedef struct { typedef struct {
Py_UCS4 fill_char; Py_UCS4 fill_char;
@ -223,9 +235,22 @@ parse_internal_render_format_spec(PyObject *format_spec,
/* Comma signifies add thousands separators */ /* Comma signifies add thousands separators */
if (end-pos && READ_spec(pos) == ',') { if (end-pos && READ_spec(pos) == ',') {
format->thousands_separators = 1; format->thousands_separators = LT_DEFAULT_LOCALE;
++pos; ++pos;
} }
/* Underscore signifies add thousands separators */
if (end-pos && READ_spec(pos) == '_') {
if (format->thousands_separators != 0) {
invalid_comma_and_underscore();
return 0;
}
format->thousands_separators = LT_UNDERSCORE_LOCALE;
++pos;
}
if (end-pos && READ_spec(pos) == ',') {
invalid_comma_and_underscore();
return 0;
}
/* Parse field precision */ /* Parse field precision */
if (end-pos && READ_spec(pos) == '.') { if (end-pos && READ_spec(pos) == '.') {
@ -275,6 +300,16 @@ parse_internal_render_format_spec(PyObject *format_spec,
case '\0': case '\0':
/* These are allowed. See PEP 378.*/ /* These are allowed. See PEP 378.*/
break; break;
case 'b':
case 'o':
case 'x':
case 'X':
/* Underscores are allowed in bin/oct/hex. See PEP 515. */
if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
/* Every four digits, not every three, in bin/oct/hex. */
format->thousands_separators = LT_UNDER_FOUR_LOCALE;
break;
}
default: default:
invalid_comma_type(format->type); invalid_comma_type(format->type);
return 0; return 0;
@ -351,11 +386,6 @@ fill_padding(_PyUnicodeWriter *writer,
/*********** common routines for numeric formatting *********************/ /*********** common routines for numeric formatting *********************/
/************************************************************************/ /************************************************************************/
/* Locale type codes. */
#define LT_CURRENT_LOCALE 0
#define LT_DEFAULT_LOCALE 1
#define LT_NO_LOCALE 2
/* Locale info needed for formatting integers and the part of floats /* Locale info needed for formatting integers and the part of floats
before and including the decimal. Note that locales only support before and including the decimal. Note that locales only support
8-bit chars, not unicode. */ 8-bit chars, not unicode. */
@ -667,8 +697,8 @@ static const char no_grouping[1] = {CHAR_MAX};
/* Find the decimal point character(s?), thousands_separator(s?), and /* Find the decimal point character(s?), thousands_separator(s?), and
grouping description, either for the current locale if type is grouping description, either for the current locale if type is
LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
none if LT_NO_LOCALE. */ LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
static int static int
get_locale_info(int type, LocaleInfo *locale_info) get_locale_info(int type, LocaleInfo *locale_info)
{ {
@ -691,16 +721,22 @@ get_locale_info(int type, LocaleInfo *locale_info)
break; break;
} }
case LT_DEFAULT_LOCALE: case LT_DEFAULT_LOCALE:
case LT_UNDERSCORE_LOCALE:
case LT_UNDER_FOUR_LOCALE:
locale_info->decimal_point = PyUnicode_FromOrdinal('.'); locale_info->decimal_point = PyUnicode_FromOrdinal('.');
locale_info->thousands_sep = PyUnicode_FromOrdinal(','); locale_info->thousands_sep = PyUnicode_FromOrdinal(
type == LT_DEFAULT_LOCALE ? ',' : '_');
if (!locale_info->decimal_point || !locale_info->thousands_sep) { if (!locale_info->decimal_point || !locale_info->thousands_sep) {
Py_XDECREF(locale_info->decimal_point); Py_XDECREF(locale_info->decimal_point);
Py_XDECREF(locale_info->thousands_sep); Py_XDECREF(locale_info->thousands_sep);
return -1; return -1;
} }
locale_info->grouping = "\3"; /* Group every 3 characters. The if (type != LT_UNDER_FOUR_LOCALE)
locale_info->grouping = "\3"; /* Group every 3 characters. The
(implicit) trailing 0 means repeat (implicit) trailing 0 means repeat
infinitely. */ infinitely. */
else
locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
break; break;
case LT_NO_LOCALE: case LT_NO_LOCALE:
locale_info->decimal_point = PyUnicode_FromOrdinal('.'); locale_info->decimal_point = PyUnicode_FromOrdinal('.');
@ -952,9 +988,7 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format,
/* Determine the grouping, separator, and decimal point, if any. */ /* Determine the grouping, separator, and decimal point, if any. */
if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
(format->thousands_separators ? format->thousands_separators,
LT_DEFAULT_LOCALE :
LT_NO_LOCALE),
&locale) == -1) &locale) == -1)
goto done; goto done;
@ -1099,9 +1133,7 @@ format_float_internal(PyObject *value,
/* Determine the grouping, separator, and decimal point, if any. */ /* Determine the grouping, separator, and decimal point, if any. */
if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
(format->thousands_separators ? format->thousands_separators,
LT_DEFAULT_LOCALE :
LT_NO_LOCALE),
&locale) == -1) &locale) == -1)
goto done; goto done;
@ -1277,9 +1309,7 @@ format_complex_internal(PyObject *value,
/* Determine the grouping, separator, and decimal point, if any. */ /* Determine the grouping, separator, and decimal point, if any. */
if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
(format->thousands_separators ? format->thousands_separators,
LT_DEFAULT_LOCALE :
LT_NO_LOCALE),
&locale) == -1) &locale) == -1)
goto done; goto done;