diff --git a/Doc/library/string.rst b/Doc/library/string.rst index 668a5305d70..662541ec494 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -294,7 +294,7 @@ result as if you had called :func:`str` on the value. The general form of a *standard format specifier* is: .. productionlist:: sf - format_spec: [[`fill`]`align`][`sign`][0][`width`][.`precision`][`type`] + format_spec: [[`fill`]`align`][`sign`][#][0][`width`][.`precision`][`type`] fill: align: "<" | ">" | "=" | "^" sign: "+" | "-" | " " @@ -348,6 +348,10 @@ following: | | positive numbers, and a minus sign on negative numbers. | +---------+----------------------------------------------------------+ +The ``'#'`` option is only valid for integers, and only for binary, +octal, or decimal output. If present, it specifies that the output +will be prefixed by ``'0b'``, ``'0o'``, or ``'0x'``, respectively. + *width* is a decimal integer defining the minimum field width. If not specified, then the field width will be determined by the content. @@ -368,7 +372,7 @@ The available integer presentation types are: +---------+----------------------------------------------------------+ | Type | Meaning | +=========+==========================================================+ - | ``'b'`` | Binary. Outputs the number in base 2. | + | ``'b'`` | Binary format. Outputs the number in base 2. | +---------+----------------------------------------------------------+ | ``'c'`` | Character. Converts the integer to the corresponding | | | unicode character before printing. | diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index 360cfe4b15b..1b8e6054d7f 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -301,7 +301,8 @@ class TypesTests(unittest.TestCase): test(-1, "-#5b", ' -0b1') test(1, "+#5b", ' +0b1') test(100, "+#b", '+0b1100100') -# test(100, "#012b", '0b001100100') + test(100, "#012b", '0b0001100100') + test(-100, "#012b", '-0b001100100') test(0, "#o", '0o0') test(0, "-#o", '0o0') @@ -310,6 +311,8 @@ class TypesTests(unittest.TestCase): test(-1, "-#5o", ' -0o1') test(1, "+#5o", ' +0o1') test(100, "+#o", '+0o144') + test(100, "#012o", '0o0000000144') + test(-100, "#012o", '-0o000000144') test(0, "#x", '0x0') test(0, "-#x", '0x0') @@ -318,6 +321,10 @@ class TypesTests(unittest.TestCase): test(-1, "-#5x", ' -0x1') test(1, "+#5x", ' +0x1') test(100, "+#x", '+0x64') + test(100, "#012x", '0x0000000064') + test(-100, "#012x", '-0x000000064') + test(123456, "#012x", '0x000001e240') + test(-123456, "#012x", '-0x00001e240') test(0, "#X", '0X0') test(0, "-#X", '0X0') @@ -326,6 +333,10 @@ class TypesTests(unittest.TestCase): test(-1, "-#5X", ' -0X1') test(1, "+#5X", ' +0X1') test(100, "+#X", '+0X64') + test(100, "#012X", '0X0000000064') + test(-100, "#012X", '-0X000000064') + test(123456, "#012X", '0X000001E240') + test(-123456, "#012X", '-0X00001E240') # make sure these are errors diff --git a/Misc/NEWS b/Misc/NEWS index 3272f00b555..235e7f66f97 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,9 @@ What's new in Python 3.0b2? Core and Builtins ----------------- +- Issue #3083: Add alternate (#) formatting for bin, oct, hex output + for str.format(). This adds the prefix 0b, 0o, or 0x, respectively. + - Issue #3280: like chr(), the "%c" format now accepts unicode code points beyond the Basic Multilingual Plane (above 0xffff) on all configurations. On "narrow Unicode" builds, the result is a string of 2 code units, forming a diff --git a/Objects/stringlib/formatter.h b/Objects/stringlib/formatter.h index 9b7d607a5a8..ba43200ec9e 100644 --- a/Objects/stringlib/formatter.h +++ b/Objects/stringlib/formatter.h @@ -147,6 +147,13 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec, #endif } + /* If the next character is #, we're in alternate mode. This only + applies to integers. */ + if (end-ptr >= 1 && ptr[0] == '#') { + format->alternate = 1; + ++ptr; + } + /* The special case for 0-padding (backwards compat) */ if (format->fill_char == '\0' && end-ptr >= 1 && ptr[0] == '0') { format->fill_char = '0'; @@ -156,13 +163,6 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec, ++ptr; } - /* If the next character is #, we're in alternate mode. This only - applies to integers. */ - if (end-ptr >= 1 && ptr[0] == '#') { - format->alternate = 1; - ++ptr; - } - /* XXX add error checking */ specified_width = get_integer(&ptr, end, &format->width); @@ -211,9 +211,10 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec, /************************************************************************/ /* describes the layout for an integer, see the comment in - _calc_integer_widths() for details */ + calc_number_widths() for details */ typedef struct { Py_ssize_t n_lpadding; + Py_ssize_t n_prefix; Py_ssize_t n_spadding; Py_ssize_t n_rpadding; char lsign; @@ -234,6 +235,7 @@ calc_number_widths(NumberFieldWidths *r, STRINGLIB_CHAR actual_sign, const InternalFormatSpec *format) { r->n_lpadding = 0; + r->n_prefix = 0; r->n_spadding = 0; r->n_rpadding = 0; r->lsign = '\0'; @@ -288,13 +290,16 @@ calc_number_widths(NumberFieldWidths *r, STRINGLIB_CHAR actual_sign, } } + r->n_prefix = n_prefix; + /* now the number of padding characters */ if (format->width == -1) { /* no padding at all, nothing to do */ } else { /* see if any padding is needed */ - if (r->n_lsign + n_digits + r->n_rsign >= format->width) { + if (r->n_lsign + n_digits + r->n_rsign + + r->n_prefix >= format->width) { /* no padding needed, we're already bigger than the requested width */ } @@ -302,7 +307,8 @@ calc_number_widths(NumberFieldWidths *r, STRINGLIB_CHAR actual_sign, /* determine which of left, space, or right padding is needed */ Py_ssize_t padding = format->width - - (r->n_lsign + n_digits + r->n_rsign); + (r->n_lsign + r->n_prefix + + n_digits + r->n_rsign); if (format->align == '<') r->n_rpadding = padding; else if (format->align == '>') @@ -317,18 +323,19 @@ calc_number_widths(NumberFieldWidths *r, STRINGLIB_CHAR actual_sign, r->n_lpadding = padding; } } - r->n_total = r->n_lpadding + r->n_lsign + r->n_spadding + - n_digits + r->n_rsign + r->n_rpadding; + r->n_total = r->n_lpadding + r->n_lsign + r->n_prefix + + r->n_spadding + n_digits + r->n_rsign + r->n_rpadding; } /* fill in the non-digit parts of a numbers's string representation, - as determined in _calc_integer_widths(). returns the pointer to + as determined in calc_number_widths(). returns the pointer to where the digits go. */ static STRINGLIB_CHAR * fill_non_digits(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec, - Py_ssize_t n_digits, STRINGLIB_CHAR fill_char) + STRINGLIB_CHAR *prefix, Py_ssize_t n_digits, + STRINGLIB_CHAR fill_char) { - STRINGLIB_CHAR* p_digits; + STRINGLIB_CHAR *p_digits; if (spec->n_lpadding) { STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding); @@ -337,6 +344,12 @@ fill_non_digits(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec, if (spec->n_lsign == 1) { *p_buf++ = spec->lsign; } + if (spec->n_prefix) { + memmove(p_buf, + prefix, + spec->n_prefix * sizeof(STRINGLIB_CHAR)); + p_buf += spec->n_prefix; + } if (spec->n_spadding) { STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding); p_buf += spec->n_spadding; @@ -477,6 +490,8 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to allocate, used for 'n' formatting. */ + Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */ + STRINGLIB_CHAR *prefix = NULL; NumberFieldWidths spec; long x; @@ -534,19 +549,16 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, switch (format->type) { case 'b': base = 2; - if (!format->alternate) - leading_chars_to_skip = 2; /* 0b */ + leading_chars_to_skip = 2; /* 0b */ break; case 'o': base = 8; - if (!format->alternate) - leading_chars_to_skip = 2; /* 0o */ + leading_chars_to_skip = 2; /* 0o */ break; case 'x': case 'X': base = 16; - if (!format->alternate) - leading_chars_to_skip = 2; /* 0x */ + leading_chars_to_skip = 2; /* 0x */ break; default: /* shouldn't be needed, but stops a compiler warning */ case 'd': @@ -555,6 +567,11 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, break; } + /* The number of prefix chars is the same as the leading + chars to skip */ + if (format->alternate) + n_prefix = leading_chars_to_skip; + /* Do the hard part, converting to a string in a given base */ tmp = tostring(value, base); if (tmp == NULL) @@ -563,6 +580,8 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, pnumeric_chars = STRINGLIB_STR(tmp); n_digits = STRINGLIB_LEN(tmp); + prefix = pnumeric_chars; + /* Remember not to modify what pnumeric_chars points to. it might be interned. Only modify it after we copy it into a newly allocated output buffer. */ @@ -571,6 +590,7 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, and skip it */ sign = pnumeric_chars[0]; if (sign == '-') { + ++prefix; ++leading_chars_to_skip; } @@ -586,7 +606,8 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, 0, &n_grouping_chars, 0); /* Calculate the widths of the various leading and trailing parts */ - calc_number_widths(&spec, sign, 0, n_digits + n_grouping_chars, format); + calc_number_widths(&spec, sign, n_prefix, n_digits + n_grouping_chars, + format); /* Allocate a new string to hold the result */ result = STRINGLIB_NEW(NULL, spec.n_total); @@ -594,35 +615,52 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, goto done; p = STRINGLIB_STR(result); + /* XXX There is too much magic here regarding the internals of + spec and the location of the prefix and digits. It would be + better if calc_number_widths returned a number of logical + offsets into the buffer, and those were used. Maybe in a + future code cleanup. */ + /* Fill in the digit parts */ - n_leading_chars = spec.n_lpadding + spec.n_lsign + spec.n_spadding; + n_leading_chars = spec.n_lpadding + spec.n_lsign + + spec.n_prefix + spec.n_spadding; memmove(p + n_leading_chars, pnumeric_chars, n_digits * sizeof(STRINGLIB_CHAR)); - /* If type is 'X', convert to uppercase */ + /* If type is 'X', convert the filled in digits to uppercase */ if (format->type == 'X') { Py_ssize_t t; for (t = 0; t < n_digits; ++t) p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]); } - /* Insert the grouping, if any, after the uppercasing of 'X', so we can - ensure that grouping chars won't be affected. */ + /* Insert the grouping, if any, after the uppercasing of the digits, so + we can ensure that grouping chars won't be affected. */ if (n_grouping_chars) { /* We know this can't fail, since we've already reserved enough space. */ STRINGLIB_CHAR *pstart = p + n_leading_chars; int r = STRINGLIB_GROUPING(pstart, n_digits, n_digits, - spec.n_total+n_grouping_chars-n_leading_chars, - NULL, 0); + spec.n_total+n_grouping_chars-n_leading_chars, + NULL, 0); assert(r); } /* Fill in the non-digit parts (padding, sign, etc.) */ - fill_non_digits(p, &spec, n_digits + n_grouping_chars, + fill_non_digits(p, &spec, prefix, n_digits + n_grouping_chars, format->fill_char == '\0' ? ' ' : format->fill_char); + /* If type is 'X', uppercase the prefix. This has to be done after the + prefix is filled in by fill_non_digits */ + if (format->type == 'X') { + Py_ssize_t t; + for (t = 0; t < n_prefix; ++t) + p[t + spec.n_lpadding + spec.n_lsign] = + STRINGLIB_TOUPPER(p[t + spec.n_lpadding + spec.n_lsign]); + } + + done: Py_XDECREF(tmp); return result; @@ -768,7 +806,7 @@ format_float_internal(PyObject *value, goto done; /* Fill in the non-digit parts (padding, sign, etc.) */ - fill_non_digits(STRINGLIB_STR(result), &spec, n_digits, + fill_non_digits(STRINGLIB_STR(result), &spec, NULL, n_digits, format->fill_char == '\0' ? ' ' : format->fill_char); /* fill in the digit parts */