The other half of Issue #1580: use short float repr where possible.

Addresses the float -> string conversion, using David Gay's code which
was added in Mark Dickinson's checkin r71663.

Also addresses these, which are intertwined with the short repr
changes:

- Issue #5772: format(1e100, '<') produces '1e+100', not '1.0e+100'
- Issue #5515: 'n' formatting with commas no longer works poorly
    with leading zeros.
- PEP 378 Format Specifier for Thousands Separator: implemented
    for floats.
This commit is contained in:
Eric Smith 2009-04-16 20:16:10 +00:00
parent b08a53a99d
commit 0923d1d8d7
16 changed files with 1491 additions and 830 deletions

View File

@ -91,24 +91,22 @@ PyAPI_FUNC(int) PyBytes_AsStringAndSize(
into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */
PyAPI_FUNC(int) _PyBytes_InsertThousandsGroupingLocale(char *buffer,
Py_ssize_t n_buffer,
Py_ssize_t n_digits,
Py_ssize_t buf_size,
Py_ssize_t *count,
int append_zero_char);
PyAPI_FUNC(Py_ssize_t) _PyBytes_InsertThousandsGroupingLocale(char *buffer,
Py_ssize_t n_buffer,
char *digits,
Py_ssize_t n_digits,
Py_ssize_t min_width);
/* Using explicit passed-in values, insert the thousands grouping
into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */
PyAPI_FUNC(int) _PyBytes_InsertThousandsGrouping(char *buffer,
Py_ssize_t n_buffer,
Py_ssize_t n_digits,
Py_ssize_t buf_size,
Py_ssize_t *count,
int append_zero_char,
const char *grouping,
const char *thousands_sep);
PyAPI_FUNC(Py_ssize_t) _PyBytes_InsertThousandsGrouping(char *buffer,
Py_ssize_t n_buffer,
char *digits,
Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping,
const char *thousands_sep);
/* Flags used by string formatting */
#define F_LJUST (1<<0)

View File

@ -10,6 +10,25 @@ PyAPI_FUNC(double) PyOS_ascii_strtod(const char *str, char **ptr);
PyAPI_FUNC(double) PyOS_ascii_atof(const char *str);
PyAPI_FUNC(char *) PyOS_ascii_formatd(char *buffer, size_t buf_len, const char *format, double d);
/* The caller is responsible for calling PyMem_Free to free the buffer
that's is returned. */
PyAPI_FUNC(char *) PyOS_double_to_string(double val,
char format_code,
int precision,
int flags,
int *type);
/* PyOS_double_to_string's "flags" parameter can be set to 0 or more of: */
#define Py_DTSF_SIGN 0x01 /* always add the sign */
#define Py_DTSF_ADD_DOT_0 0x02 /* if the result is an integer add ".0" */
#define Py_DTSF_ALT 0x04 /* "alternate" formatting. it's format_code
specific */
/* PyOS_double_to_string's "type", if non-NULL, will be set to one of: */
#define Py_DTST_FINITE 0
#define Py_DTST_INFINITE 1
#define Py_DTST_NAN 2
#ifdef __cplusplus
}

View File

@ -1482,24 +1482,22 @@ PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */
PyAPI_FUNC(int) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer,
Py_ssize_t n_buffer,
Py_ssize_t n_digits,
Py_ssize_t buf_size,
Py_ssize_t *count,
int append_zero_char);
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer,
Py_ssize_t n_buffer,
Py_UNICODE *digits,
Py_ssize_t n_digits,
Py_ssize_t min_width);
/* Using explicit passed-in values, insert the thousands grouping
into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */
PyAPI_FUNC(int) _PyUnicode_InsertThousandsGrouping(Py_UNICODE *buffer,
Py_ssize_t n_buffer,
Py_ssize_t n_digits,
Py_ssize_t buf_size,
Py_ssize_t *count,
int append_zero_char,
const char *grouping,
const char *thousands_sep);
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(Py_UNICODE *buffer,
Py_ssize_t n_buffer,
Py_UNICODE *digits,
Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping,
const char *thousands_sep);
/* === Characters Type APIs =============================================== */
/* Helper array used by Py_UNICODE_ISSPACE(). */

View File

@ -1,6 +1,7 @@
import unittest, struct
import os
import sys
from test import support
import math
from math import isinf, isnan, copysign, ldexp
@ -10,6 +11,10 @@ import random, fractions
INF = float("inf")
NAN = float("nan")
#locate file with float format test values
test_dir = os.path.dirname(__file__) or os.curdir
format_testfile = os.path.join(test_dir, 'formatfloat_testcases.txt')
class GeneralFloatCases(unittest.TestCase):
def test_float(self):
@ -24,6 +29,10 @@ class GeneralFloatCases(unittest.TestCase):
self.assertRaises(ValueError, float, "+-3.14")
self.assertRaises(ValueError, float, "-+3.14")
self.assertRaises(ValueError, float, "--3.14")
self.assertRaises(ValueError, float, ".nan")
self.assertRaises(ValueError, float, "+.inf")
self.assertRaises(ValueError, float, ".")
self.assertRaises(ValueError, float, "-.")
self.assertEqual(float(b" \u0663.\u0661\u0664 ".decode('raw-unicode-escape')), 3.14)
@support.run_with_locale('LC_NUMERIC', 'fr_FR', 'de_DE')
@ -316,6 +325,73 @@ class ReprTestCase(unittest.TestCase):
self.assertEqual(v, eval(repr(v)))
floats_file.close()
class FormatTestCase(unittest.TestCase):
@unittest.skipUnless(float.__getformat__("double").startswith("IEEE"),
"test requires IEEE 754 doubles")
def test_format_testfile(self):
for line in open(format_testfile):
if line.startswith('--'):
continue
line = line.strip()
if not line:
continue
lhs, rhs = map(str.strip, line.split('->'))
fmt, arg = lhs.split()
self.assertEqual(fmt % float(arg), rhs)
self.assertEqual(fmt % -float(arg), '-' + rhs)
@unittest.skipUnless(getattr(sys, 'float_repr_style', '') == 'short',
"applies only when using short float repr style")
def test_short_repr(self):
# test short float repr introduced in Python 3.1. One aspect
# of this repr is that we get some degree of str -> float ->
# str roundtripping. In particular, for any numeric string
# containing 15 or fewer significant digits, those exact same
# digits (modulo trailing zeros) should appear in the output.
# No more repr(0.03) -> "0.029999999999999999"!
test_strings = [
# output always includes *either* a decimal point and at
# least one digit after that point, or an exponent.
'0.0',
'1.0',
'0.01',
'0.02',
'0.03',
'0.04',
'0.05',
'1.23456789',
'10.0',
'100.0',
# values >= 1e16 get an exponent...
'1000000000000000.0',
'9999999999999990.0',
'1e+16',
'1e+17',
# ... and so do values < 1e-4
'0.001',
'0.001001',
'0.00010000000000001',
'0.0001',
'9.999999999999e-05',
'1e-05',
# values designed to provoke failure if the FPU rounding
# precision isn't set correctly
'8.72293771110361e+25',
'7.47005307342313e+26',
'2.86438000439698e+28',
'8.89142905246179e+28',
'3.08578087079232e+35',
]
for s in test_strings:
negs = '-'+s
self.assertEqual(s, repr(float(s)))
self.assertEqual(negs, repr(float(negs)))
# Beginning with Python 2.6 float has cross platform compatible
# ways to create and represent inf and nan
class InfNanTest(unittest.TestCase):

View File

@ -220,6 +220,11 @@ class FormatTest(unittest.TestCase):
testformat("%a", "\u0378", "'\\u0378'") # non printable
testformat("%r", "\u0374", "'\u0374'") # printable
testformat("%a", "\u0374", "'\\u0374'") # printable
# alternate float formatting
testformat('%g', 1.1, '1.1')
testformat('%#g', 1.1, '1.10000')
# Test exception for unknown format characters
if verbose:
print('Testing exceptions')

View File

@ -113,6 +113,9 @@ class TypesTests(unittest.TestCase):
self.assertEqual(1.5e-101.__format__('e'), '1.500000e-101')
self.assertEqual('%e' % 1.5e-101, '1.500000e-101')
self.assertEqual('%g' % 1.0, '1')
self.assertEqual('%#g' % 1.0, '1.00000')
def test_normal_integers(self):
# Ensure the first 256 integers are shared
a = 256
@ -358,6 +361,8 @@ class TypesTests(unittest.TestCase):
self.assertRaises(TypeError, 3 .__format__, 0)
# can't have ',' with 'n'
self.assertRaises(ValueError, 3 .__format__, ",n")
# can't have ',' with 'c'
self.assertRaises(ValueError, 3 .__format__, ",c")
# ensure that only int and float type specifiers work
for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] +
@ -547,10 +552,34 @@ class TypesTests(unittest.TestCase):
# a totaly empty format specifier means something else.
# So, just use a sign flag
test(1e200, '+g', '+1e+200')
test(1e200, '+', '+1.0e+200')
test(1e200, '+', '+1e+200')
test(1.1e200, '+g', '+1.1e+200')
test(1.1e200, '+', '+1.1e+200')
# 0 padding
test(1234., '010f', '1234.000000')
test(1234., '011f', '1234.000000')
test(1234., '012f', '01234.000000')
test(-1234., '011f', '-1234.000000')
test(-1234., '012f', '-1234.000000')
test(-1234., '013f', '-01234.000000')
test(-1234.12341234, '013f', '-01234.123412')
test(-123456.12341234, '011.2f', '-0123456.12')
# 0 padding with commas
test(1234., '011,f', '1,234.000000')
test(1234., '012,f', '1,234.000000')
test(1234., '013,f', '01,234.000000')
test(-1234., '012,f', '-1,234.000000')
test(-1234., '013,f', '-1,234.000000')
test(-1234., '014,f', '-01,234.000000')
test(-12345., '015,f', '-012,345.000000')
test(-123456., '016,f', '-0,123,456.000000')
test(-123456., '017,f', '-0,123,456.000000')
test(-123456.12341234, '017,f', '-0,123,456.123412')
test(-123456.12341234, '013,.2f', '-0,123,456.12')
# % formatting
test(-1.0, '%', '-100.000000%')
@ -575,6 +604,24 @@ class TypesTests(unittest.TestCase):
self.assertRaises(ValueError, format, 0.0, '#')
self.assertRaises(ValueError, format, 0.0, '#20f')
def test_format_spec_errors(self):
# int, float, and string all share the same format spec
# mini-language parser.
# Check that we can't ask for too many digits. This is
# probably a CPython specific test. It tries to put the width
# into a C long.
self.assertRaises(ValueError, format, 0, '1'*10000 + 'd')
# Similar with the precision.
self.assertRaises(ValueError, format, 0, '.' + '1'*10000 + 'd')
# And may as well test both.
self.assertRaises(ValueError, format, 0, '1'*1000 + '.' + '1'*10000 + 'd')
# Make sure commas aren't allowed with various type codes
for code in 'xXobns':
self.assertRaises(ValueError, format, 0, ',' + code)
def test_main():
run_unittest(TypesTests)

View File

@ -12,6 +12,15 @@ What's New in Python 3.1 beta 1?
Core and Builtins
-----------------
- Issue #5772: format(1e100, '<') produces '1e+100', not '1.0e+100'.
- Issue #5515: str.format() presentation type 'n' with commas no
longer works poorly with leading zeros when formatting ints and
floats.
- Implement PEP 378, Format Specifier for Thousands Separator, for
floats.
- The repr function switches to exponential notation at 1e16, not 1e17
as it did before. This change applies to both 'short' and legacy
float repr styles. For the new repr style, it avoids misleading

View File

@ -1016,16 +1016,31 @@ save_float(PicklerObject *self, PyObject *obj)
return -1;
if (pickler_write(self, pdata, 9) < 0)
return -1;
}
}
else {
char pdata[250];
pdata[0] = FLOAT;
PyOS_ascii_formatd(pdata + 1, sizeof(pdata) - 2, "%.17g", x);
/* Extend the formatted string with a newline character */
strcat(pdata, "\n");
int result = -1;
char *buf = NULL;
char op = FLOAT;
if (pickler_write(self, pdata, strlen(pdata)) < 0)
return -1;
if (pickler_write(self, &op, 1) < 0)
goto done;
buf = PyOS_double_to_string(x, 'r', 0, 0, NULL);
if (!buf) {
PyErr_NoMemory();
goto done;
}
if (pickler_write(self, buf, strlen(buf)) < 0)
goto done;
if (pickler_write(self, "\n", 1) < 0)
goto done;
result = 0;
done:
PyMem_Free(buf);
return result;
}
return 0;

View File

@ -562,6 +562,7 @@ PyBytes_AsStringAndSize(register PyObject *obj,
/* -------------------------------------------------------------------- */
/* Methods */
#include "stringlib/stringdefs.h"
#define STRINGLIB_CHAR char
#define STRINGLIB_CMP memcmp

View File

@ -14,22 +14,6 @@
#ifndef WITHOUT_COMPLEX
/* Precisions used by repr() and str(), respectively.
The repr() precision (17 significant decimal digits) is the minimal number
that is guaranteed to have enough precision so that if the number is read
back in the exact same binary value is recreated. This is true for IEEE
floating point by design, and also happens to work for all other modern
hardware.
The str() precision is chosen so that in most cases, the rounding noise
created by various operations is suppressed, while giving plenty of
precision for practical use.
*/
#define PREC_REPR 17
#define PREC_STR 12
/* elementary operations on complex numbers */
static Py_complex c_1 = {1., 0.};
@ -345,71 +329,114 @@ complex_dealloc(PyObject *op)
}
static void
complex_to_buf(char *buf, int bufsz, PyComplexObject *v, int precision)
static PyObject *
complex_format(PyComplexObject *v, char format_code)
{
char format[32];
if (v->cval.real == 0.) {
if (!Py_IS_FINITE(v->cval.imag)) {
if (Py_IS_NAN(v->cval.imag))
strncpy(buf, "nan*j", 6);
else if (copysign(1, v->cval.imag) == 1)
strncpy(buf, "inf*j", 6);
else
strncpy(buf, "-inf*j", 7);
}
else {
PyOS_snprintf(format, sizeof(format), "%%.%ig", precision);
PyOS_ascii_formatd(buf, bufsz - 1, format, v->cval.imag);
strncat(buf, "j", 1);
}
} else {
char re[64], im[64];
/* Format imaginary part with sign, real part without */
if (!Py_IS_FINITE(v->cval.real)) {
if (Py_IS_NAN(v->cval.real))
strncpy(re, "nan", 4);
/* else if (copysign(1, v->cval.real) == 1) */
else if (v->cval.real > 0)
strncpy(re, "inf", 4);
else
strncpy(re, "-inf", 5);
}
else {
PyOS_snprintf(format, sizeof(format), "%%.%ig", precision);
PyOS_ascii_formatd(re, sizeof(re), format, v->cval.real);
}
if (!Py_IS_FINITE(v->cval.imag)) {
if (Py_IS_NAN(v->cval.imag))
strncpy(im, "+nan*", 6);
/* else if (copysign(1, v->cval.imag) == 1) */
else if (v->cval.imag > 0)
strncpy(im, "+inf*", 6);
else
strncpy(im, "-inf*", 6);
}
else {
PyOS_snprintf(format, sizeof(format), "%%+.%ig", precision);
PyOS_ascii_formatd(im, sizeof(im), format, v->cval.imag);
}
PyOS_snprintf(buf, bufsz, "(%s%sj)", re, im);
}
PyObject *result = NULL;
Py_ssize_t len;
/* If these are non-NULL, they'll need to be freed. */
char *pre = NULL;
char *pim = NULL;
char *buf = NULL;
/* These do not need to be freed. They're either aliases for pim
and pre, or pointers to constants. */
char *re = NULL;
char *im = NULL;
char *lead = "";
char *tail = "";
if (v->cval.real == 0.) {
re = "";
if (!Py_IS_FINITE(v->cval.imag)) {
if (Py_IS_NAN(v->cval.imag))
im = "nan*";
else if (copysign(1, v->cval.imag) == 1)
im = "inf*";
else
im = "-inf*";
}
else {
pim = PyOS_double_to_string(v->cval.imag, format_code,
0, 0, NULL);
if (!pim) {
PyErr_NoMemory();
goto done;
}
im = pim;
}
} else {
/* Format imaginary part with sign, real part without */
if (!Py_IS_FINITE(v->cval.real)) {
if (Py_IS_NAN(v->cval.real))
re = "nan";
/* else if (copysign(1, v->cval.real) == 1) */
else if (v->cval.real > 0)
re = "inf";
else
re = "-inf";
}
else {
pre = PyOS_double_to_string(v->cval.real, format_code,
0, 0, NULL);
if (!pre) {
PyErr_NoMemory();
goto done;
}
re = pre;
}
if (!Py_IS_FINITE(v->cval.imag)) {
if (Py_IS_NAN(v->cval.imag))
im = "+nan*";
/* else if (copysign(1, v->cval.imag) == 1) */
else if (v->cval.imag > 0)
im = "+inf*";
else
im = "-inf*";
}
else {
pim = PyOS_double_to_string(v->cval.imag, format_code,
0, Py_DTSF_SIGN, NULL);
if (!pim) {
PyErr_NoMemory();
goto done;
}
im = pim;
}
lead = "(";
tail = ")";
}
/* Alloc the final buffer. Add one for the "j" in the format string, and
one for the trailing zero. */
len = strlen(lead) + strlen(re) + strlen(im) + strlen(tail) + 2;
buf = PyMem_Malloc(len);
if (!buf) {
PyErr_NoMemory();
goto done;
}
PyOS_snprintf(buf, len, "%s%s%sj%s", lead, re, im, tail);
result = PyUnicode_FromString(buf);
done:
PyMem_Free(pim);
PyMem_Free(pre);
PyMem_Free(buf);
return result;
}
static PyObject *
complex_repr(PyComplexObject *v)
{
char buf[100];
complex_to_buf(buf, sizeof(buf), v, PREC_REPR);
return PyUnicode_FromString(buf);
return complex_format(v, 'r');
}
static PyObject *
complex_str(PyComplexObject *v)
{
char buf[100];
complex_to_buf(buf, sizeof(buf), v, PREC_STR);
return PyUnicode_FromString(buf);
return complex_format(v, 's');
}
static long

View File

@ -197,8 +197,7 @@ PyFloat_FromString(PyObject *v)
sp = s;
/* We don't care about overflow or underflow. If the platform supports
* them, infinities and signed zeroes (on underflow) are fine.
* However, strtod can return 0 for denormalized numbers, where atof
* does not. So (alas!) we special-case a zero result. Note that
* However, strtod can return 0 for denormalized numbers. Note that
* whether strtod sets errno on underflow is not defined, so we can't
* key off errno.
*/
@ -259,14 +258,6 @@ PyFloat_FromString(PyObject *v)
"null byte in argument for float()");
goto error;
}
if (x == 0.0) {
/* See above -- may have been strtod being anal
about denorms. */
PyFPE_START_PROTECT("atof", goto error)
x = PyOS_ascii_atof(s);
PyFPE_END_PROTECT(x)
errno = 0; /* whether atof ever set errno is undefined */
}
result = PyFloat_FromDouble(x);
error:
if (s_buffer)
@ -320,72 +311,6 @@ PyFloat_AsDouble(PyObject *op)
return val;
}
/* Methods */
static void
format_double(char *buf, size_t buflen, double ob_fval, int precision)
{
register char *cp;
char format[32];
int i;
/* Subroutine for float_repr, float_str and float_print.
We want float numbers to be recognizable as such,
i.e., they should contain a decimal point or an exponent.
However, %g may print the number as an integer;
in such cases, we append ".0" to the string. */
PyOS_snprintf(format, 32, "%%.%ig", precision);
PyOS_ascii_formatd(buf, buflen, format, ob_fval);
cp = buf;
if (*cp == '-')
cp++;
for (; *cp != '\0'; cp++) {
/* Any non-digit means it's not an integer;
this takes care of NAN and INF as well. */
if (!isdigit(Py_CHARMASK(*cp)))
break;
}
if (*cp == '\0') {
*cp++ = '.';
*cp++ = '0';
*cp++ = '\0';
return;
}
/* Checking the next three chars should be more than enough to
* detect inf or nan, even on Windows. We check for inf or nan
* at last because they are rare cases.
*/
for (i=0; *cp != '\0' && i<3; cp++, i++) {
if (isdigit(Py_CHARMASK(*cp)) || *cp == '.')
continue;
/* found something that is neither a digit nor point
* it might be a NaN or INF
*/
#ifdef Py_NAN
if (Py_IS_NAN(ob_fval)) {
strcpy(buf, "nan");
}
else
#endif
if (Py_IS_INFINITY(ob_fval)) {
cp = buf;
if (*cp == '-')
cp++;
strcpy(cp, "inf");
}
break;
}
}
static void
format_float(char *buf, size_t buflen, PyFloatObject *v, int precision)
{
assert(PyFloat_Check(v));
format_double(buf, buflen, PyFloat_AS_DOUBLE(v), precision);
}
/* Macro and helper that convert PyObject obj to a C double and store
the value in dbl. If conversion to double raises an exception, obj is
set to NULL, and the function invoking this macro returns NULL. If
@ -398,6 +323,8 @@ format_float(char *buf, size_t buflen, PyFloatObject *v, int precision)
else if (convert_to_double(&(obj), &(dbl)) < 0) \
return obj;
/* Methods */
static int
convert_to_double(PyObject **v, double *dbl)
{
@ -418,38 +345,30 @@ convert_to_double(PyObject **v, double *dbl)
return 0;
}
/* Precisions used by repr() and str(), respectively.
The repr() precision (17 significant decimal digits) is the minimal number
that is guaranteed to have enough precision so that if the number is read
back in the exact same binary value is recreated. This is true for IEEE
floating point by design, and also happens to work for all other modern
hardware.
The str() precision is chosen so that in most cases, the rounding noise
created by various operations is suppressed, while giving plenty of
precision for practical use.
*/
#define PREC_REPR 17
#define PREC_STR 12
static PyObject *
float_str_or_repr(PyFloatObject *v, char format_code)
{
PyObject *result;
char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
format_code, 0, Py_DTSF_ADD_DOT_0,
NULL);
if (!buf)
return PyErr_NoMemory();
result = PyUnicode_FromString(buf);
PyMem_Free(buf);
return result;
}
static PyObject *
float_repr(PyFloatObject *v)
{
char buf[100];
format_float(buf, sizeof(buf), v, PREC_REPR);
return PyUnicode_FromString(buf);
return float_str_or_repr(v, 'r');
}
static PyObject *
float_str(PyFloatObject *v)
{
char buf[100];
format_float(buf, sizeof(buf), v, PREC_STR);
return PyUnicode_FromString(buf);
return float_str_or_repr(v, 's');
}
/* Comparison is pretty much a nightmare. When comparing float to float,
@ -1980,15 +1899,21 @@ PyFloat_Fini(void)
i++, p++) {
if (PyFloat_CheckExact(p) &&
Py_REFCNT(p) != 0) {
char buf[100];
format_float(buf, sizeof(buf), p, PREC_STR);
/* XXX(twouters) cast refcount to
long until %zd is universally
available
*/
fprintf(stderr,
char *buf = PyOS_double_to_string(
PyFloat_AS_DOUBLE(p), 'r',
0, 0, NULL);
if (buf) {
/* XXX(twouters) cast
refcount to long
until %zd is
universally
available
*/
fprintf(stderr,
"# <float at %p, refcnt=%ld, val=%s>\n",
p, (long)Py_REFCNT(p), buf);
PyMem_Free(buf);
}
}
}
list = list->next;
@ -2233,14 +2158,6 @@ _PyFloat_Pack8(double x, unsigned char *p, int le)
}
}
/* Should only be used by marshal. */
int
_PyFloat_Repr(double x, char *p, size_t len)
{
format_double(p, len, x, PREC_REPR);
return (int)strlen(p);
}
double
_PyFloat_Unpack4(const unsigned char *p, int le)
{

View File

@ -1,6 +1,8 @@
/* implements the string, long, and float formatters. that is,
string.__format__, etc. */
#include <locale.h>
/* Before including this, you must include either:
stringlib/unicodedefs.h
stringlib/stringdefs.h
@ -13,8 +15,6 @@
be. These are the only non-static functions defined here.
*/
#define ALLOW_PARENS_FOR_SIGN 0
/* Raises an exception about an unknown presentation type for this
* type. */
@ -104,9 +104,6 @@ is_sign_element(STRINGLIB_CHAR c)
{
switch (c) {
case ' ': case '+': case '-':
#if ALLOW_PARENS_FOR_SIGN
case '(':
#endif
return 1;
default:
return 0;
@ -143,7 +140,7 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
/* end-ptr is used throughout this code to specify the length of
the input string */
Py_ssize_t specified_width;
Py_ssize_t consumed;
format->fill_char = '\0';
format->align = '\0';
@ -170,11 +167,6 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
if (end-ptr >= 1 && is_sign_element(ptr[0])) {
format->sign = ptr[0];
++ptr;
#if ALLOW_PARENS_FOR_SIGN
if (end-ptr >= 1 && ptr[0] == ')') {
++ptr;
}
#endif
}
/* If the next character is #, we're in alternate mode. This only
@ -193,15 +185,17 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
++ptr;
}
/* XXX add error checking */
specified_width = get_integer(&ptr, end, &format->width);
consumed = get_integer(&ptr, end, &format->width);
if (consumed == -1)
/* Overflow error. Exception already set. */
return 0;
/* if specified_width is 0, we didn't consume any characters for
the width. in that case, reset the width to -1, because
get_integer() will have set it to zero */
if (specified_width == 0) {
/* If consumed is 0, we didn't consume any characters for the
width. In that case, reset the width to -1, because
get_integer() will have set it to zero. -1 is how we record
that the width wasn't specified. */
if (consumed == 0)
format->width = -1;
}
/* Comma signifies add thousands separators */
if (end-ptr && ptr[0] == ',') {
@ -213,11 +207,13 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
if (end-ptr && ptr[0] == '.') {
++ptr;
/* XXX add error checking */
specified_width = get_integer(&ptr, end, &format->precision);
consumed = get_integer(&ptr, end, &format->precision);
if (consumed == -1)
/* Overflow error. Exception already set. */
return 0;
/* not having a precision after a dot is an error */
if (specified_width == 0) {
/* Not having a precision after a dot is an error. */
if (consumed == 0) {
PyErr_Format(PyExc_ValueError,
"Format specifier missing precision");
return 0;
@ -225,10 +221,10 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
}
/* Finally, parse the type field */
/* Finally, parse the type field. */
if (end-ptr > 1) {
/* invalid conversion spec */
/* More than one char remain, invalid conversion spec. */
PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
return 0;
}
@ -238,9 +234,27 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
++ptr;
}
if (format->type == 'n' && format->thousands_separators) {
PyErr_Format(PyExc_ValueError, "Cannot specify ',' with 'n'.");
return 0;
/* Do as much validating as we can, just by looking at the format
specifier. Do not take into account what type of formatting
we're doing (int, float, string). */
if (format->thousands_separators) {
switch (format->type) {
case 'd':
case 'e':
case 'f':
case 'g':
case 'E':
case 'G':
case '%':
case 'F':
/* These are allowed. See PEP 378.*/
break;
default:
PyErr_Format(PyExc_ValueError,
"Cannot specify ',' with '%c'.", format->type);
return 0;
}
}
return 1;
@ -251,6 +265,20 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
/*********** common routines for numeric formatting *********************/
/************************************************************************/
/* Locale type codes. */
#define LT_CURRENT_LOCALE 0
#define LT_DEFAULT_LOCALE 1
#define LT_NO_LOCALE 2
/* Locale info needed for formatting integers and the part of floats
before and including the decimal. Note that locales only support
8-bit chars, not unicode. */
typedef struct {
char *decimal_point;
char *thousands_sep;
char *grouping;
} LocaleInfo;
/* describes the layout for an integer, see the comment in
calc_number_widths() for details */
typedef struct {
@ -258,38 +286,84 @@ typedef struct {
Py_ssize_t n_prefix;
Py_ssize_t n_spadding;
Py_ssize_t n_rpadding;
char lsign;
Py_ssize_t n_lsign;
char rsign;
Py_ssize_t n_rsign;
Py_ssize_t n_total; /* just a convenience, it's derivable from the
other fields */
char sign;
Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
any grouping chars. */
Py_ssize_t n_decimal; /* 0 if only an integer */
Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
excluding the decimal itself, if
present. */
/* These 2 are not the widths of fields, but are needed by
STRINGLIB_GROUPING. */
Py_ssize_t n_digits; /* The number of digits before a decimal
or exponent. */
Py_ssize_t n_min_width; /* The min_width we used when we computed
the n_grouped_digits width. */
} NumberFieldWidths;
/* Given a number of the form:
digits[remainder]
where ptr points to the start and end points to the end, find where
the integer part ends. This could be a decimal, an exponent, both,
or neither.
If a decimal point is present, set *has_decimal and increment
remainder beyond it.
Results are undefined (but shouldn't crash) for improperly
formatted strings.
*/
static void
parse_number(STRINGLIB_CHAR *ptr, Py_ssize_t len,
Py_ssize_t *n_remainder, int *has_decimal)
{
STRINGLIB_CHAR *end = ptr + len;
STRINGLIB_CHAR *remainder;
while (ptr<end && isdigit(*ptr))
++ptr;
remainder = ptr;
/* Does remainder start with a decimal point? */
*has_decimal = ptr<end && *remainder == '.';
/* Skip the decimal point. */
if (*has_decimal)
remainder++;
*n_remainder = end - remainder;
}
/* not all fields of format are used. for example, precision is
unused. should this take discrete params in order to be more clear
about what it does? or is passing a single format parameter easier
and more efficient enough to justify a little obfuscation? */
static void
calc_number_widths(NumberFieldWidths *spec, STRINGLIB_CHAR actual_sign,
Py_ssize_t n_prefix, Py_ssize_t n_digits,
static Py_ssize_t
calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
STRINGLIB_CHAR sign_char, STRINGLIB_CHAR *number,
Py_ssize_t n_number, Py_ssize_t n_remainder,
int has_decimal, const LocaleInfo *locale,
const InternalFormatSpec *format)
{
Py_ssize_t n_non_digit_non_padding;
Py_ssize_t n_padding;
spec->n_digits = n_number - n_remainder - (has_decimal?1:0);
spec->n_lpadding = 0;
spec->n_prefix = 0;
spec->n_prefix = n_prefix;
spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0;
spec->n_remainder = n_remainder;
spec->n_spadding = 0;
spec->n_rpadding = 0;
spec->lsign = '\0';
spec->n_lsign = 0;
spec->rsign = '\0';
spec->n_rsign = 0;
spec->sign = '\0';
spec->n_sign = 0;
/* the output will look like:
| |
| <lpadding> <lsign> <prefix> <spadding> <digits> <rsign> <rpadding> |
| |
| |
| <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
| |
lsign and rsign are computed from format->sign and the actual
sign is computed from format->sign and the actual
sign of the number
prefix is given (it's for the '0x' prefix)
@ -304,108 +378,191 @@ calc_number_widths(NumberFieldWidths *spec, STRINGLIB_CHAR actual_sign,
*/
/* compute the various parts we're going to write */
if (format->sign == '+') {
switch (format->sign) {
case '+':
/* always put a + or - */
spec->n_lsign = 1;
spec->lsign = (actual_sign == '-' ? '-' : '+');
}
#if ALLOW_PARENS_FOR_SIGN
else if (format->sign == '(') {
if (actual_sign == '-') {
spec->n_lsign = 1;
spec->lsign = '(';
spec->n_rsign = 1;
spec->rsign = ')';
}
}
#endif
else if (format->sign == ' ') {
spec->n_lsign = 1;
spec->lsign = (actual_sign == '-' ? '-' : ' ');
}
else {
/* non specified, or the default (-) */
if (actual_sign == '-') {
spec->n_lsign = 1;
spec->lsign = '-';
spec->n_sign = 1;
spec->sign = (sign_char == '-' ? '-' : '+');
break;
case ' ':
spec->n_sign = 1;
spec->sign = (sign_char == '-' ? '-' : ' ');
break;
default:
/* Not specified, or the default (-) */
if (sign_char == '-') {
spec->n_sign = 1;
spec->sign = '-';
}
}
spec->n_prefix = n_prefix;
/* The number of chars used for non-digits and non-padding. */
n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
spec->n_remainder;
/* now the number of padding characters */
if (format->width == -1) {
/* no padding at all, nothing to do */
}
else {
/* see if any padding is needed */
if (spec->n_lsign + n_digits + spec->n_rsign +
spec->n_prefix >= format->width) {
/* no padding needed, we're already bigger than the
requested width */
}
else {
/* determine which of left, space, or right padding is
needed */
Py_ssize_t padding = format->width -
(spec->n_lsign + spec->n_prefix +
n_digits + spec->n_rsign);
if (format->align == '<')
spec->n_rpadding = padding;
else if (format->align == '>')
spec->n_lpadding = padding;
else if (format->align == '^') {
spec->n_lpadding = padding / 2;
spec->n_rpadding = padding - spec->n_lpadding;
}
else if (format->align == '=')
spec->n_spadding = padding;
else
spec->n_lpadding = padding;
/* min_width can go negative, that's okay. format->width == -1 means
we don't care. */
if (format->fill_char == '0')
spec->n_min_width = format->width - n_non_digit_non_padding;
else
spec->n_min_width = 0;
if (spec->n_digits == 0)
/* This case only occurs when using 'c' formatting, we need
to special case it because the grouping code always wants
to have at least one character. */
spec->n_grouped_digits = 0;
else
spec->n_grouped_digits = STRINGLIB_GROUPING(NULL, 0, NULL,
spec->n_digits,
spec->n_min_width,
locale->grouping,
locale->thousands_sep);
/* Given the desired width and the total of digit and non-digit
space we consume, see if we need any padding. format->width can
be negative (meaning no padding), but this code still works in
that case. */
n_padding = format->width -
(n_non_digit_non_padding + spec->n_grouped_digits);
if (n_padding > 0) {
/* Some padding is needed. Determine if it's left, space, or right. */
switch (format->align) {
case '<':
spec->n_rpadding = n_padding;
break;
case '^':
spec->n_lpadding = n_padding / 2;
spec->n_rpadding = n_padding - spec->n_lpadding;
break;
case '=':
spec->n_spadding = n_padding;
break;
default:
/* Handles '>', plus catch-all just in case. */
spec->n_lpadding = n_padding;
break;
}
}
spec->n_total = spec->n_lpadding + spec->n_lsign + spec->n_prefix +
spec->n_spadding + n_digits + spec->n_rsign + spec->n_rpadding;
return spec->n_lpadding + spec->n_sign + spec->n_prefix +
spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
spec->n_remainder + spec->n_rpadding;
}
/* fill in the non-digit parts of a numbers's string representation,
as determined in calc_number_widths(). returns the pointer to
where the digits go. */
static STRINGLIB_CHAR *
fill_non_digits(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec,
STRINGLIB_CHAR *prefix, Py_ssize_t n_digits,
STRINGLIB_CHAR fill_char)
/* Fill in the digit parts of a numbers's string representation,
as determined in calc_number_widths().
No error checking, since we know the buffer is the correct size. */
static void
fill_number(STRINGLIB_CHAR *buf, const NumberFieldWidths *spec,
STRINGLIB_CHAR *digits, Py_ssize_t n_digits,
STRINGLIB_CHAR *prefix, STRINGLIB_CHAR fill_char,
LocaleInfo *locale, int toupper)
{
STRINGLIB_CHAR *p_digits;
/* Used to keep track of digits, decimal, and remainder. */
STRINGLIB_CHAR *p = digits;
#ifndef NDEBUG
Py_ssize_t r;
#endif
if (spec->n_lpadding) {
STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding);
p_buf += spec->n_lpadding;
STRINGLIB_FILL(buf, fill_char, spec->n_lpadding);
buf += spec->n_lpadding;
}
if (spec->n_lsign == 1) {
*p_buf++ = spec->lsign;
if (spec->n_sign == 1) {
*buf++ = spec->sign;
}
if (spec->n_prefix) {
memmove(p_buf,
memmove(buf,
prefix,
spec->n_prefix * sizeof(STRINGLIB_CHAR));
p_buf += spec->n_prefix;
if (toupper) {
Py_ssize_t t;
for (t = 0; t < spec->n_prefix; ++t)
buf[t] = STRINGLIB_TOUPPER(buf[t]);
}
buf += spec->n_prefix;
}
if (spec->n_spadding) {
STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding);
p_buf += spec->n_spadding;
STRINGLIB_FILL(buf, fill_char, spec->n_spadding);
buf += spec->n_spadding;
}
p_digits = p_buf;
p_buf += n_digits;
if (spec->n_rsign == 1) {
*p_buf++ = spec->rsign;
/* Only for type 'c' special case, it has no digits. */
if (spec->n_digits != 0) {
/* Fill the digits with InsertThousandsGrouping. */
#ifndef NDEBUG
r =
#endif
STRINGLIB_GROUPING(buf, spec->n_grouped_digits, digits,
spec->n_digits, spec->n_min_width,
locale->grouping, locale->thousands_sep);
#ifndef NDEBUG
assert(r == spec->n_grouped_digits);
#endif
p += spec->n_digits;
}
if (toupper) {
Py_ssize_t t;
for (t = 0; t < spec->n_grouped_digits; ++t)
buf[t] = STRINGLIB_TOUPPER(buf[t]);
}
buf += spec->n_grouped_digits;
if (spec->n_decimal) {
Py_ssize_t t;
for (t = 0; t < spec->n_decimal; ++t)
buf[t] = locale->decimal_point[t];
buf += spec->n_decimal;
p += 1;
}
if (spec->n_remainder) {
memcpy(buf, p, spec->n_remainder * sizeof(STRINGLIB_CHAR));
buf += spec->n_remainder;
p += spec->n_remainder;
}
if (spec->n_rpadding) {
STRINGLIB_FILL(p_buf, fill_char, spec->n_rpadding);
p_buf += spec->n_rpadding;
STRINGLIB_FILL(buf, fill_char, spec->n_rpadding);
buf += spec->n_rpadding;
}
return p_digits;
}
static char no_grouping[1] = {CHAR_MAX};
/* Find the decimal point character(s?), thousands_separator(s?), and
grouping description, either for the current locale if type is
LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
none if LT_NO_LOCALE. */
static void
get_locale_info(int type, LocaleInfo *locale_info)
{
switch (type) {
case LT_CURRENT_LOCALE: {
struct lconv *locale_data = localeconv();
locale_info->decimal_point = locale_data->decimal_point;
locale_info->thousands_sep = locale_data->thousands_sep;
locale_info->grouping = locale_data->grouping;
break;
}
case LT_DEFAULT_LOCALE:
locale_info->decimal_point = ".";
locale_info->thousands_sep = ",";
locale_info->grouping = "\3"; /* Group every 3 characters,
trailing 0 means repeat
infinitely. */
break;
case LT_NO_LOCALE:
locale_info->decimal_point = ".";
locale_info->thousands_sep = "";
locale_info->grouping = no_grouping;
break;
default:
assert(0);
}
}
#endif /* FORMAT_FLOAT || FORMAT_LONG */
/************************************************************************/
@ -523,19 +680,21 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
PyObject *tmp = NULL;
STRINGLIB_CHAR *pnumeric_chars;
STRINGLIB_CHAR numeric_char;
STRINGLIB_CHAR sign = '\0';
STRINGLIB_CHAR *p;
STRINGLIB_CHAR sign_char = '\0';
Py_ssize_t n_digits; /* count of digits need from the computed
string */
Py_ssize_t n_leading_chars;
Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to
allocate, used for 'n'
formatting. */
Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
produces non-digits */
Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
Py_ssize_t n_total;
STRINGLIB_CHAR *prefix = NULL;
NumberFieldWidths spec;
long x;
/* Locale settings, either from the actual locale or
from a hard-code pseudo-locale */
LocaleInfo locale;
/* no precision allowed on integers */
if (format->precision != -1) {
PyErr_SetString(PyExc_ValueError,
@ -543,7 +702,6 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
goto done;
}
/* special case for character formatting */
if (format->type == 'c') {
/* error to specify a sign */
@ -554,6 +712,14 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
goto done;
}
/* Error to specify a comma. */
if (format->thousands_separators) {
PyErr_SetString(PyExc_ValueError,
"Thousands separators not allowed with integer"
" format specifier 'c'");
goto done;
}
/* taken from unicodeobject.c formatchar() */
/* Integer input truncated to a character */
/* XXX: won't work for int */
@ -578,6 +744,13 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
numeric_char = (STRINGLIB_CHAR)x;
pnumeric_chars = &numeric_char;
n_digits = 1;
/* As a sort-of hack, we tell calc_number_widths that we only
have "remainder" characters. calc_number_widths thinks
these are characters that don't get formatted, only copied
into the output string. We do this for 'c' formatting,
because the characters are likely to be non-digits. */
n_remainder = 1;
}
else {
int base;
@ -629,8 +802,8 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
/* Is a sign character present in the output? If so, remember it
and skip it */
sign = pnumeric_chars[0];
if (sign == '-') {
if (pnumeric_chars[0] == '-') {
sign_char = pnumeric_chars[0];
++prefix;
++leading_chars_to_skip;
}
@ -640,86 +813,26 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
pnumeric_chars += leading_chars_to_skip;
}
if (format->type == 'n')
/* Compute how many additional chars we need to allocate
to hold the thousands grouping. */
STRINGLIB_GROUPING_LOCALE(NULL, n_digits, n_digits,
0, &n_grouping_chars, 0);
if (format->thousands_separators)
/* Compute how many additional chars we need to allocate
to hold the thousands grouping. */
STRINGLIB_GROUPING(NULL, n_digits, n_digits,
0, &n_grouping_chars, 0, "\3", ",");
/* Determine the grouping, separator, and decimal point, if any. */
get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
(format->thousands_separators ?
LT_DEFAULT_LOCALE :
LT_NO_LOCALE),
&locale);
/* Calculate the widths of the various leading and trailing parts */
calc_number_widths(&spec, sign, n_prefix, n_digits + n_grouping_chars,
format);
/* Calculate how much memory we'll need. */
n_total = calc_number_widths(&spec, n_prefix, sign_char, pnumeric_chars,
n_digits, n_remainder, 0, &locale, format);
/* Allocate a new string to hold the result */
result = STRINGLIB_NEW(NULL, spec.n_total);
/* Allocate the memory. */
result = STRINGLIB_NEW(NULL, n_total);
if (!result)
goto done;
p = STRINGLIB_STR(result);
/* XXX There is too much magic here regarding the internals of
spec and the location of the prefix and digits. It would be
better if calc_number_widths returned a number of logical
offsets into the buffer, and those were used. Maybe in a
future code cleanup. */
/* Fill in the digit parts */
n_leading_chars = spec.n_lpadding + spec.n_lsign +
spec.n_prefix + spec.n_spadding;
memmove(p + n_leading_chars,
pnumeric_chars,
n_digits * sizeof(STRINGLIB_CHAR));
/* If type is 'X', convert the filled in digits to uppercase */
if (format->type == 'X') {
Py_ssize_t t;
for (t = 0; t < n_digits; ++t)
p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
}
/* Insert the grouping, if any, after the uppercasing of the digits, so
we can ensure that grouping chars won't be affected. */
if (n_grouping_chars) {
/* We know this can't fail, since we've already
reserved enough space. */
STRINGLIB_CHAR *pstart = p + n_leading_chars;
#ifndef NDEBUG
int r;
#endif
if (format->type == 'n')
#ifndef NDEBUG
r =
#endif
STRINGLIB_GROUPING_LOCALE(pstart, n_digits, n_digits,
spec.n_total+n_grouping_chars-n_leading_chars,
NULL, 0);
else
#ifndef NDEBUG
r =
#endif
STRINGLIB_GROUPING(pstart, n_digits, n_digits,
spec.n_total+n_grouping_chars-n_leading_chars,
NULL, 0, "\3", ",");
assert(r);
}
/* Fill in the non-digit parts (padding, sign, etc.) */
fill_non_digits(p, &spec, prefix, n_digits + n_grouping_chars,
format->fill_char == '\0' ? ' ' : format->fill_char);
/* If type is 'X', uppercase the prefix. This has to be done after the
prefix is filled in by fill_non_digits */
if (format->type == 'X') {
Py_ssize_t t;
for (t = 0; t < n_prefix; ++t)
p[t + spec.n_lpadding + spec.n_lsign] =
STRINGLIB_TOUPPER(p[t + spec.n_lpadding + spec.n_lsign]);
}
/* Populate the memory. */
fill_number(STRINGLIB_STR(result), &spec, pnumeric_chars, n_digits,
prefix, format->fill_char == '\0' ? ' ' : format->fill_char,
&locale, format->type == 'X');
done:
Py_XDECREF(tmp);
@ -733,64 +846,45 @@ done:
#ifdef FORMAT_FLOAT
#if STRINGLIB_IS_UNICODE
/* taken from unicodeobject.c */
static Py_ssize_t
strtounicode(Py_UNICODE *buffer, const char *charbuffer)
static void
strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len)
{
register Py_ssize_t i;
Py_ssize_t len = strlen(charbuffer);
for (i = len - 1; i >= 0; --i)
buffer[i] = (Py_UNICODE) charbuffer[i];
return len;
Py_ssize_t i;
for (i = 0; i < len; ++i)
buffer[i] = (Py_UNICODE)charbuffer[i];
}
#endif
/* see FORMATBUFLEN in unicodeobject.c */
#define FLOAT_FORMATBUFLEN 120
/* much of this is taken from unicodeobject.c */
static PyObject *
format_float_internal(PyObject *value,
const InternalFormatSpec *format)
{
/* fmt = '%.' + `prec` + `type` + '%%'
worst case length = 2 + 10 (len of INT_MAX) + 1 + 2 = 15 (use 20)*/
char fmt[20];
/* taken from unicodeobject.c */
/* Worst case length calc to ensure no buffer overrun:
'g' formats:
fmt = %#.<prec>g
buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
for any double rep.)
len = 1 + prec + 1 + 2 + 5 = 9 + prec
'f' formats:
buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
len = 1 + 50 + 1 + prec = 52 + prec
If prec=0 the effective precision is 1 (the leading digit is
always given), therefore increase the length by one.
*/
char charbuf[FLOAT_FORMATBUFLEN];
char *buf = NULL; /* buffer returned from PyOS_double_to_string */
Py_ssize_t n_digits;
double x;
Py_ssize_t n_remainder;
Py_ssize_t n_total;
int has_decimal;
double val;
Py_ssize_t precision = format->precision;
PyObject *result = NULL;
STRINGLIB_CHAR sign;
char* trailing = "";
STRINGLIB_CHAR type = format->type;
int add_pct = 0;
STRINGLIB_CHAR *p;
NumberFieldWidths spec;
STRINGLIB_CHAR type = format->type;
int flags = 0;
PyObject *result = NULL;
STRINGLIB_CHAR sign_char = '\0';
int float_type; /* Used to see if we have a nan, inf, or regular float. */
#if STRINGLIB_IS_UNICODE
Py_UNICODE unicodebuf[FLOAT_FORMATBUFLEN];
Py_UNICODE *unicode_tmp = NULL;
#endif
/* alternate is not allowed on floats. */
/* Locale settings, either from the actual locale or
from a hard-code pseudo-locale */
LocaleInfo locale;
/* Alternate is not allowed on floats. */
if (format->alternate) {
PyErr_SetString(PyExc_ValueError,
"Alternate form (#) not allowed in float format "
@ -798,84 +892,106 @@ format_float_internal(PyObject *value,
goto done;
}
/* first, do the conversion as 8-bit chars, using the platform's
snprintf. then, if needed, convert to unicode. */
if (type == '\0') {
/* Omitted type specifier. This is like 'g' but with at least
one digit after the decimal point. */
type = 'g';
flags |= Py_DTSF_ADD_DOT_0;
}
if (type == 'n')
/* 'n' is the same as 'g', except for the locale used to
format the result. We take care of that later. */
type = 'g';
/* 'F' is the same as 'f', per the PEP */
if (type == 'F')
type = 'f';
x = PyFloat_AsDouble(value);
if (x == -1.0 && PyErr_Occurred())
val = PyFloat_AsDouble(value);
if (val == -1.0 && PyErr_Occurred())
goto done;
if (type == '%') {
type = 'f';
x *= 100;
trailing = "%";
val *= 100;
add_pct = 1;
}
if (precision < 0)
precision = 6;
if (type == 'f' && fabs(x) >= 1e50)
if ((type == 'f' || type == 'F') && fabs(val) >= 1e50)
type = 'g';
/* cast "type", because if we're in unicode we need to pass a
8-bit char. this is safe, because we've restricted what "type"
can be */
PyOS_snprintf(fmt, sizeof(fmt), "%%.%" PY_FORMAT_SIZE_T "d%c", precision,
(char)type);
/* Cast "type", because if we're in unicode we need to pass a
8-bit char. This is safe, because we've restricted what "type"
can be. */
buf = PyOS_double_to_string(val, (char)type, precision, flags,
&float_type);
if (buf == NULL)
goto done;
n_digits = strlen(buf);
/* do the actual formatting */
PyOS_ascii_formatd(charbuf, sizeof(charbuf), fmt, x);
if (add_pct) {
/* We know that buf has a trailing zero (since we just called
strlen() on it), and we don't use that fact any more. So we
can just write over the trailing zero. */
buf[n_digits] = '%';
n_digits += 1;
}
/* adding trailing to fmt with PyOS_snprintf doesn't work, not
sure why. we'll just concatentate it here, no harm done. we
know we can't have a buffer overflow from the fmt size
analysis */
strcat(charbuf, trailing);
/* rather than duplicate the code for snprintf for both unicode
and 8 bit strings, we just use the 8 bit version and then
convert to unicode in a separate code path. that's probably
the lesser of 2 evils. */
/* Since there is no unicode version of PyOS_double_to_string,
just use the 8 bit version and then convert to unicode. */
#if STRINGLIB_IS_UNICODE
n_digits = strtounicode(unicodebuf, charbuf);
p = unicodebuf;
unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_digits)*sizeof(Py_UNICODE));
if (unicode_tmp == NULL) {
PyErr_NoMemory();
goto done;
}
strtounicode(unicode_tmp, buf, n_digits);
p = unicode_tmp;
#else
/* compute the length. I believe this is done because the return
value from snprintf above is unreliable */
n_digits = strlen(charbuf);
p = charbuf;
p = buf;
#endif
/* is a sign character present in the output? if so, remember it
/* Is a sign character present in the output? If so, remember it
and skip it */
sign = p[0];
if (sign == '-') {
if (*p == '-') {
sign_char = *p;
++p;
--n_digits;
}
calc_number_widths(&spec, sign, 0, n_digits, format);
/* Determine if we have any "remainder" (after the digits, might include
decimal or exponent or both (or neither)) */
parse_number(p, n_digits, &n_remainder, &has_decimal);
/* allocate a string with enough space */
result = STRINGLIB_NEW(NULL, spec.n_total);
/* Determine the grouping, separator, and decimal point, if any. */
get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
(format->thousands_separators ?
LT_DEFAULT_LOCALE :
LT_NO_LOCALE),
&locale);
/* Calculate how much memory we'll need. */
n_total = calc_number_widths(&spec, 0, sign_char, p, n_digits,
n_remainder, has_decimal, &locale, format);
/* Allocate the memory. */
result = STRINGLIB_NEW(NULL, n_total);
if (result == NULL)
goto done;
/* Fill in the non-digit parts (padding, sign, etc.) */
fill_non_digits(STRINGLIB_STR(result), &spec, NULL, n_digits,
format->fill_char == '\0' ? ' ' : format->fill_char);
/* fill in the digit parts */
memmove(STRINGLIB_STR(result) +
(spec.n_lpadding + spec.n_lsign + spec.n_spadding),
p,
n_digits * sizeof(STRINGLIB_CHAR));
/* Populate the memory. */
fill_number(STRINGLIB_STR(result), &spec, p, n_digits, NULL,
format->fill_char == '\0' ? ' ' : format->fill_char, &locale,
0);
done:
PyMem_Free(buf);
#if STRINGLIB_IS_UNICODE
PyMem_Free(unicode_tmp);
#endif
return result;
}
#endif /* FORMAT_FLOAT */
@ -1056,11 +1172,7 @@ FORMAT_FLOAT(PyObject *obj,
/* type conversion? */
switch (format.type) {
case '\0':
/* 'Z' means like 'g', but with at least one decimal. See
PyOS_ascii_formatd */
format.type = 'Z';
/* Deliberate fall through to the next case statement */
case '\0': /* No format code: like 'g', but with at least one decimal. */
case 'e':
case 'E':
case 'f':

View File

@ -5,161 +5,208 @@
#include <locale.h>
#define MAX(x, y) ((x) < (y) ? (y) : (x))
#define MIN(x, y) ((x) < (y) ? (x) : (y))
typedef struct {
const char *grouping;
char previous;
Py_ssize_t i; /* Where we're currently pointing in grouping. */
} GroupGenerator;
static void
_GroupGenerator_init(GroupGenerator *self, const char *grouping)
{
self->grouping = grouping;
self->i = 0;
self->previous = 0;
}
/* Returns the next grouping, or 0 to signify end. */
static Py_ssize_t
_GroupGenerator_next(GroupGenerator *self)
{
/* Note that we don't really do much error checking here. If a
grouping string contains just CHAR_MAX, for example, then just
terminate the generator. That shouldn't happen, but at least we
fail gracefully. */
switch (self->grouping[self->i]) {
case 0:
return self->previous;
case CHAR_MAX:
/* Stop the generator. */
return 0;
default: {
char ch = self->grouping[self->i];
self->previous = ch;
self->i++;
return (Py_ssize_t)ch;
}
}
}
/* Fill in some digits, leading zeros, and thousands separator. All
are optional, depending on when we're called. */
static void
fill(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end,
Py_ssize_t n_chars, Py_ssize_t n_zeros, const char* thousands_sep,
Py_ssize_t thousands_sep_len)
{
#if STRINGLIB_IS_UNICODE
Py_ssize_t i;
#endif
if (thousands_sep) {
*buffer_end -= thousands_sep_len;
/* Copy the thousands_sep chars into the buffer. */
#if STRINGLIB_IS_UNICODE
/* Convert from the char's of the thousands_sep from
the locale into unicode. */
for (i = 0; i < thousands_sep_len; ++i)
(*buffer_end)[i] = thousands_sep[i];
#else
/* No conversion, just memcpy the thousands_sep. */
memcpy(*buffer_end, thousands_sep, thousands_sep_len);
#endif
}
*buffer_end -= n_chars;
*digits_end -= n_chars;
memcpy(*buffer_end, *digits_end, n_chars * sizeof(STRINGLIB_CHAR));
*buffer_end -= n_zeros;
STRINGLIB_FILL(*buffer_end, '0', n_zeros);
}
/**
* _Py_InsertThousandsGrouping:
* @buffer: A pointer to the start of a string.
* @n_buffer: The length of the string.
* @n_buffer: Number of characters in @buffer.
* @digits: A pointer to the digits we're reading from. If count
* is non-NULL, this is unused.
* @n_digits: The number of digits in the string, in which we want
* to put the grouping chars.
* @buf_size: The maximum size of the buffer pointed to by buffer.
* @count: If non-NULL, points to a variable that will receive the
* number of characters we need to insert (and no formatting
* will actually occur).
* @append_zero_char: If non-zero, put a trailing zero at the end of
* of the resulting string, if and only if we modified the
* string.
* @min_width: The minimum width of the digits in the output string.
* Output will be zero-padded on the left to fill.
* @grouping: see definition in localeconv().
* @thousands_sep: see definition in localeconv().
*
* There are 2 modes: counting and filling. If @buffer is NULL,
* we are in counting mode, else filling mode.
* If counting, the required buffer size is returned.
* If filling, we know the buffer will be large enough, so we don't
* need to pass in the buffer size.
* Inserts thousand grouping characters (as defined by grouping and
* thousands_sep) into the string between buffer and buffer+n_digits.
* If count is non-NULL, don't do any formatting, just count the
* number of characters to insert. This is used by the caller to
* appropriately resize the buffer, if needed. If count is non-NULL,
* buffer can be NULL (it is not dereferenced at all in that case).
*
* Return value: 0 on error, else 1. Note that no error can occur if
* count is non-NULL.
*
* This name won't be used, the includer of this file should define
* it to be the actual function name, based on unicode or string.
*
* As closely as possible, this code mimics the logic in decimal.py's
_insert_thousands_sep().
**/
int
Py_ssize_t
_Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
Py_ssize_t n_buffer,
STRINGLIB_CHAR *digits,
Py_ssize_t n_digits,
Py_ssize_t buf_size,
Py_ssize_t *count,
int append_zero_char,
Py_ssize_t min_width,
const char *grouping,
const char *thousands_sep)
{
Py_ssize_t thousands_sep_len = strlen(thousands_sep);
STRINGLIB_CHAR *pend = NULL; /* current end of buffer */
STRINGLIB_CHAR *pmax = NULL; /* max of buffer */
char current_grouping;
Py_ssize_t remaining = n_digits; /* Number of chars remaining to
be looked at */
Py_ssize_t count = 0;
Py_ssize_t n_zeros;
int loop_broken = 0;
int use_separator = 0; /* First time through, don't append the
separator. They only go between
groups. */
STRINGLIB_CHAR *buffer_end = NULL;
STRINGLIB_CHAR *digits_end = NULL;
Py_ssize_t l;
Py_ssize_t n_chars;
Py_ssize_t thousands_sep_len = strlen(thousands_sep);
Py_ssize_t remaining = n_digits; /* Number of chars remaining to
be looked at */
/* A generator that returns all of the grouping widths, until it
returns 0. */
GroupGenerator groupgen;
_GroupGenerator_init(&groupgen, grouping);
/* Initialize the character count, if we're just counting. */
if (count)
*count = 0;
else {
/* We're not just counting, we're modifying buffer */
pend = buffer + n_buffer;
pmax = buffer + buf_size;
if (buffer) {
buffer_end = buffer + n_buffer;
digits_end = digits + n_digits;
}
while ((l = _GroupGenerator_next(&groupgen)) > 0) {
l = MIN(l, MAX(MAX(remaining, min_width), 1));
n_zeros = MAX(0, l - remaining);
n_chars = MAX(0, MIN(remaining, l));
/* Use n_zero zero's and n_chars chars */
/* Count only, don't do anything. */
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
if (buffer) {
/* Copy into the output buffer. */
fill(&digits_end, &buffer_end, n_chars, n_zeros,
use_separator ? thousands_sep : NULL, thousands_sep_len);
}
/* Starting at the end and working right-to-left, keep track of
what grouping needs to be added and insert that. */
current_grouping = *grouping++;
/* Use a separator next time. */
use_separator = 1;
/* If the first character is 0, perform no grouping at all. */
if (current_grouping == 0)
return 1;
remaining -= n_chars;
min_width -= l;
while (remaining > current_grouping) {
/* Always leave buffer and pend valid at the end of this
loop, since we might leave with a return statement. */
remaining -= current_grouping;
if (count) {
/* We're only counting, not touching the memory. */
*count += thousands_sep_len;
}
else {
/* Do the formatting. */
STRINGLIB_CHAR *plast = buffer + remaining;
/* Is there room to insert thousands_sep_len chars? */
if (pmax - pend < thousands_sep_len)
/* No room. */
return 0;
/* Move the rest of the string down. */
memmove(plast + thousands_sep_len,
plast,
(pend - plast) * sizeof(STRINGLIB_CHAR));
/* Copy the thousands_sep chars into the buffer. */
#if STRINGLIB_IS_UNICODE
/* Convert from the char's of the thousands_sep from
the locale into unicode. */
{
Py_ssize_t i;
for (i = 0; i < thousands_sep_len; ++i)
plast[i] = thousands_sep[i];
}
#else
/* No conversion, just memcpy the thousands_sep. */
memcpy(plast, thousands_sep, thousands_sep_len);
#endif
}
/* Adjust end pointer. */
pend += thousands_sep_len;
/* Move to the next grouping character, unless we're
repeating (which is designated by a grouping of 0). */
if (*grouping != 0) {
current_grouping = *grouping++;
if (current_grouping == CHAR_MAX)
/* We're done. */
break;
}
if (remaining <= 0 && min_width <= 0) {
loop_broken = 1;
break;
}
if (append_zero_char) {
/* Append a zero character to mark the end of the string,
if there's room. */
if (pend - (buffer + remaining) < 1)
/* No room, error. */
return 0;
*pend = 0;
min_width -= thousands_sep_len;
}
if (!loop_broken) {
/* We left the loop without using a break statement. */
l = MAX(MAX(remaining, min_width), 1);
n_zeros = MAX(0, l - remaining);
n_chars = MAX(0, MIN(remaining, l));
/* Use n_zero zero's and n_chars chars */
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
if (buffer) {
/* Copy into the output buffer. */
fill(&digits_end, &buffer_end, n_chars, n_zeros,
use_separator ? thousands_sep : NULL, thousands_sep_len);
}
return 1;
}
return count;
}
/**
* _Py_InsertThousandsGroupingLocale:
* @buffer: A pointer to the start of a string.
* @n_buffer: The length of the string.
* @n_digits: The number of digits in the string, in which we want
* to put the grouping chars.
* @buf_size: The maximum size of the buffer pointed to by buffer.
* @count: If non-NULL, points to a variable that will receive the
* number of characters we need to insert (and no formatting
* will actually occur).
* @append_zero_char: If non-zero, put a trailing zero at the end of
* of the resulting string, if and only if we modified the
* string.
*
* Reads thee current locale and calls _Py_InsertThousandsGrouping().
**/
int
Py_ssize_t
_Py_InsertThousandsGroupingLocale(STRINGLIB_CHAR *buffer,
Py_ssize_t n_buffer,
STRINGLIB_CHAR *digits,
Py_ssize_t n_digits,
Py_ssize_t buf_size,
Py_ssize_t *count,
int append_zero_char)
Py_ssize_t min_width)
{
struct lconv *locale_data = localeconv();
const char *grouping = locale_data->grouping;
const char *thousands_sep = locale_data->thousands_sep;
return _Py_InsertThousandsGrouping(buffer, n_buffer, n_digits,
buf_size, count,
append_zero_char, grouping,
thousands_sep);
return _Py_InsertThousandsGrouping(buffer, n_buffer, digits, n_digits,
min_width, grouping, thousands_sep);
}
#endif /* STRINGLIB_LOCALEUTIL_H */

View File

@ -8792,43 +8792,14 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
return NULL;
}
static Py_ssize_t
strtounicode(Py_UNICODE *buffer, const char *charbuffer)
static void
strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len)
{
register Py_ssize_t i;
Py_ssize_t len = strlen(charbuffer);
for (i = len - 1; i >= 0; i--)
buffer[i] = (Py_UNICODE) charbuffer[i];
return len;
}
static int
doubletounicode(Py_UNICODE *buffer, size_t len, const char *format, double x)
{
Py_ssize_t result;
PyOS_ascii_formatd((char *)buffer, len, format, x);
result = strtounicode(buffer, (char *)buffer);
return Py_SAFE_DOWNCAST(result, Py_ssize_t, int);
}
#if 0
static int
longtounicode(Py_UNICODE *buffer, size_t len, const char *format, long x)
{
Py_ssize_t result;
PyOS_snprintf((char *)buffer, len, format, x);
result = strtounicode(buffer, (char *)buffer);
return Py_SAFE_DOWNCAST(result, Py_ssize_t, int);
}
#endif
/* XXX To save some code duplication, formatfloat/long/int could have been
shared with stringobject.c, converting from 8-bit to Unicode after the
formatting is done. */
static int
formatfloat(Py_UNICODE *buf,
size_t buflen,
@ -8837,54 +8808,59 @@ formatfloat(Py_UNICODE *buf,
int type,
PyObject *v)
{
/* fmt = '%#.' + `prec` + `type`
worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
char fmt[20];
/* eric.smith: To minimize disturbances in PyUnicode_Format (the
only caller of this routine), I'm going to keep the existing
API to this function. That means that we'll allocate memory and
then copy back into the supplied buffer. But that's better than
all of the changes that would be required in PyUnicode_Format
because it does lots of memory management tricks. */
char* p = NULL;
int result = -1;
double x;
Py_ssize_t len;
x = PyFloat_AsDouble(v);
if (x == -1.0 && PyErr_Occurred())
return -1;
goto done;
if (prec < 0)
prec = 6;
/* make sure that the decimal representation of precision really does
need at most 10 digits: platforms with sizeof(int) == 8 exist! */
if (prec > 0x7fffffffL) {
PyErr_SetString(PyExc_OverflowError,
"outrageously large precision "
"for formatted float");
return -1;
goto done;
}
if (type == 'f' && fabs(x) >= 1e50)
type = 'g';
/* Worst case length calc to ensure no buffer overrun:
'g' formats:
fmt = %#.<prec>g
buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
for any double rep.)
len = 1 + prec + 1 + 2 + 5 = 9 + prec
'f' formats:
buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
len = 1 + 50 + 1 + prec = 52 + prec
If prec=0 the effective precision is 1 (the leading digit is
always given), therefore increase the length by one.
*/
if (((type == 'g' || type == 'G') &&
buflen <= (size_t)10 + (size_t)prec) ||
(type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
((type == 'f' || type == 'F') &&
buflen <= (size_t)53 + (size_t)prec)) {
PyErr_SetString(PyExc_OverflowError,
"formatted float is too long (precision too large?)");
return -1;
goto done;
}
PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
(flags&F_ALT) ? "#" : "",
prec, type);
return doubletounicode(buf, buflen, fmt, x);
p = PyOS_double_to_string(x, type, prec,
(flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
len = strlen(p);
if (len+1 >= buflen) {
/* Caller supplied buffer is not large enough. */
PyErr_NoMemory();
goto done;
}
strtounicode(buf, p, len);
result = Py_SAFE_DOWNCAST(len, Py_ssize_t, int);
done:
PyMem_Free(p);
return result;
}
static PyObject*
@ -8903,84 +8879,6 @@ formatlong(PyObject *val, int flags, int prec, int type)
return result;
}
#if 0
static int
formatint(Py_UNICODE *buf,
size_t buflen,
int flags,
int prec,
int type,
PyObject *v)
{
/* fmt = '%#.' + `prec` + 'l' + `type`
* worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
* + 1 + 1
* = 24
*/
char fmt[64]; /* plenty big enough! */
char *sign;
long x;
x = PyLong_AsLong(v);
if (x == -1 && PyErr_Occurred())
return -1;
if (x < 0 && type == 'u') {
type = 'd';
}
if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
sign = "-";
else
sign = "";
if (prec < 0)
prec = 1;
/* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
* worst case buf = '-0x' + [0-9]*prec, where prec >= 11
*/
if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
PyErr_SetString(PyExc_OverflowError,
"formatted integer is too long (precision too large?)");
return -1;
}
if ((flags & F_ALT) &&
(type == 'x' || type == 'X' || type == 'o')) {
/* When converting under %#o, %#x or %#X, there are a number
* of issues that cause pain:
* - for %#o, we want a different base marker than C
* - when 0 is being converted, the C standard leaves off
* the '0x' or '0X', which is inconsistent with other
* %#x/%#X conversions and inconsistent with Python's
* hex() function
* - there are platforms that violate the standard and
* convert 0 with the '0x' or '0X'
* (Metrowerks, Compaq Tru64)
* - there are platforms that give '0x' when converting
* under %#X, but convert 0 in accordance with the
* standard (OS/2 EMX)
*
* We can achieve the desired consistency by inserting our
* own '0x' or '0X' prefix, and substituting %x/%X in place
* of %#x/%#X.
*
* Note that this is the same approach as used in
* formatint() in stringobject.c
*/
PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
sign, type, prec, type);
}
else {
PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
sign, (flags&F_ALT) ? "#" : "",
prec, type);
}
if (sign[0])
return longtounicode(buf, buflen, fmt, -x);
else
return longtounicode(buf, buflen, fmt, x);
}
#endif
static int
formatchar(Py_UNICODE *buf,
size_t buflen,
@ -9359,8 +9257,6 @@ PyObject *PyUnicode_Format(PyObject *format,
case 'F':
case 'g':
case 'G':
if (c == 'F')
c = 'f';
pbuf = formatbuf;
len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
flags, prec, c, v);

View File

@ -236,12 +236,15 @@ w_object(PyObject *v, WFILE *p)
w_string((char*)buf, 8, p);
}
else {
char buf[256]; /* Plenty to format any double */
n = _PyFloat_Repr(PyFloat_AS_DOUBLE(v),
buf, sizeof(buf));
char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
'r', 0, 0, NULL);
if (!buf)
return;
n = strlen(buf);
w_byte(TYPE_FLOAT, p);
w_byte((int)n, p);
w_string(buf, (int)n, p);
PyMem_Free(buf);
}
}
#ifndef WITHOUT_COMPLEX
@ -263,17 +266,24 @@ w_object(PyObject *v, WFILE *p)
w_string((char*)buf, 8, p);
}
else {
char buf[256]; /* Plenty to format any double */
char *buf;
w_byte(TYPE_COMPLEX, p);
n = _PyFloat_Repr(PyComplex_RealAsDouble(v),
buf, sizeof(buf));
buf = PyOS_double_to_string(PyComplex_RealAsDouble(v),
'r', 0, 0, NULL);
if (!buf)
return;
n = strlen(buf);
w_byte((int)n, p);
w_string(buf, (int)n, p);
n = _PyFloat_Repr(PyComplex_ImagAsDouble(v),
buf, sizeof(buf));
PyMem_Free(buf);
buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v),
'r', 0, 0, NULL);
if (!buf)
return;
n = strlen(buf);
w_byte((int)n, p);
w_string(buf, (int)n, p);
PyMem_Free(buf);
}
}
#endif

View File

@ -37,6 +37,38 @@
*
* Return value: the #gdouble value.
**/
#ifndef PY_NO_SHORT_FLOAT_REPR
double
PyOS_ascii_strtod(const char *nptr, char **endptr)
{
double result;
_Py_SET_53BIT_PRECISION_HEADER;
assert(nptr != NULL);
/* Set errno to zero, so that we can distinguish zero results
and underflows */
errno = 0;
_Py_SET_53BIT_PRECISION_START;
result = _Py_dg_strtod(nptr, endptr);
_Py_SET_53BIT_PRECISION_END;
return result;
}
#else
/*
Use system strtod; since strtod is locale aware, we may
have to first fix the decimal separator.
Note that unlike _Py_dg_strtod, the system strtod may not always give
correctly rounded results.
*/
double
PyOS_ascii_strtod(const char *nptr, char **endptr)
{
@ -187,6 +219,15 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
return val;
}
#endif
double
PyOS_ascii_atof(const char *nptr)
{
return PyOS_ascii_strtod(nptr, NULL);
}
/* Given a string that may have a decimal point in the current
locale, change it back to a dot. Since the string cannot get
longer, no need for a maximum buffer size parameter. */
@ -292,8 +333,9 @@ ensure_minumim_exponent_length(char* buffer, size_t buf_size)
}
}
/* Ensure that buffer has a decimal point in it. The decimal point
will not be in the current locale, it will always be '.' */
/* Ensure that buffer has a decimal point in it. The decimal point will not
be in the current locale, it will always be '.'. Don't add a decimal if an
exponent is present. */
Py_LOCAL_INLINE(void)
ensure_decimal_point(char* buffer, size_t buf_size)
{
@ -322,7 +364,8 @@ ensure_decimal_point(char* buffer, size_t buf_size)
insert_count = 1;
}
}
else {
else if (!(*p == 'e' || *p == 'E')) {
/* Don't add ".0" if we have an exponent. */
chars_to_insert = ".0";
insert_count = 2;
}
@ -341,37 +384,6 @@ ensure_decimal_point(char* buffer, size_t buf_size)
}
}
/* Add the locale specific grouping characters to buffer. Note
that any decimal point (if it's present) in buffer is already
locale-specific. Return 0 on error, else 1. */
Py_LOCAL_INLINE(int)
add_thousands_grouping(char* buffer, size_t buf_size)
{
Py_ssize_t len = strlen(buffer);
struct lconv *locale_data = localeconv();
const char *decimal_point = locale_data->decimal_point;
/* Find the decimal point, if any. We're only concerned
about the characters to the left of the decimal when
adding grouping. */
char *p = strstr(buffer, decimal_point);
if (!p) {
/* No decimal, use the entire string. */
/* If any exponent, adjust p. */
p = strpbrk(buffer, "eE");
if (!p)
/* No exponent and no decimal. Use the entire
string. */
p = buffer + len;
}
/* At this point, p points just past the right-most character we
want to format. We need to add the grouping string for the
characters between buffer and p. */
return _PyBytes_InsertThousandsGroupingLocale(buffer, len, p-buffer,
buf_size, NULL, 1);
}
/* see FORMATBUFLEN in unicodeobject.c */
#define FLOAT_FORMATBUFLEN 120
@ -386,9 +398,8 @@ add_thousands_grouping(char* buffer, size_t buf_size)
* Converts a #gdouble to a string, using the '.' as
* decimal point. To format the number you pass in
* a printf()-style format string. Allowed conversion
* specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'.
* specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
*
* 'n' is the same as 'g', except it uses the current locale.
* 'Z' is the same as 'g', except it always has a decimal and
* at least one digit after the decimal.
*
@ -403,11 +414,6 @@ PyOS_ascii_formatd(char *buffer,
char format_char;
size_t format_len = strlen(format);
/* For type 'n', we need to make a copy of the format string, because
we're going to modify 'n' -> 'g', and format is const char*, so we
can't modify it directly. FLOAT_FORMATBUFLEN should be longer than
we ever need this to be. There's an upcoming check to ensure it's
big enough. */
/* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
also with at least one character past the decimal. */
char tmp_format[FLOAT_FORMATBUFLEN];
@ -433,12 +439,12 @@ PyOS_ascii_formatd(char *buffer,
if (!(format_char == 'e' || format_char == 'E' ||
format_char == 'f' || format_char == 'F' ||
format_char == 'g' || format_char == 'G' ||
format_char == 'n' || format_char == 'Z'))
format_char == 'Z'))
return NULL;
/* Map 'n' or 'Z' format_char to 'g', by copying the format string and
/* Map 'Z' format_char to 'g', by copying the format string and
replacing the final char with a 'g' */
if (format_char == 'n' || format_char == 'Z') {
if (format_char == 'Z') {
if (format_len + 1 >= sizeof(tmp_format)) {
/* The format won't fit in our copy. Error out. In
practice, this will never happen and will be
@ -457,11 +463,8 @@ PyOS_ascii_formatd(char *buffer,
/* Do various fixups on the return string */
/* Get the current locale, and find the decimal point string.
Convert that string back to a dot. Do not do this if using the
'n' (number) format code, since we want to keep the localized
decimal point in that case. */
if (format_char != 'n')
change_decimal_from_locale_to_dot(buffer);
Convert that string back to a dot. */
change_decimal_from_locale_to_dot(buffer);
/* If an exponent exists, ensure that the exponent is at least
MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
@ -475,16 +478,497 @@ PyOS_ascii_formatd(char *buffer,
if (format_char == 'Z')
ensure_decimal_point(buffer, buf_size);
/* If format_char is 'n', add the thousands grouping. */
if (format_char == 'n')
if (!add_thousands_grouping(buffer, buf_size))
return NULL;
return buffer;
}
double
PyOS_ascii_atof(const char *nptr)
#ifdef PY_NO_SHORT_FLOAT_REPR
/* The fallback code to use if _Py_dg_dtoa is not available. */
PyAPI_FUNC(char *) PyOS_double_to_string(double val,
char format_code,
int precision,
int flags,
int *type)
{
return PyOS_ascii_strtod(nptr, NULL);
char buf[128];
char format[32];
Py_ssize_t len;
char *result;
char *p;
int t;
int upper = 0;
/* Validate format_code, and map upper and lower case */
switch (format_code) {
case 'e': /* exponent */
case 'f': /* fixed */
case 'g': /* general */
break;
case 'E':
upper = 1;
format_code = 'e';
break;
case 'F':
upper = 1;
format_code = 'f';
break;
case 'G':
upper = 1;
format_code = 'g';
break;
case 'r': /* repr format */
/* Supplied precision is unused, must be 0. */
if (precision != 0) {
PyErr_BadInternalCall();
return NULL;
}
precision = 17;
format_code = 'g';
break;
case 's': /* str format */
/* Supplied precision is unused, must be 0. */
if (precision != 0) {
PyErr_BadInternalCall();
return NULL;
}
precision = 12;
format_code = 'g';
break;
default:
PyErr_BadInternalCall();
return NULL;
}
/* Handle nan and inf. */
if (Py_IS_NAN(val)) {
strcpy(buf, "nan");
t = Py_DTST_NAN;
} else if (Py_IS_INFINITY(val)) {
if (copysign(1., val) == 1.)
strcpy(buf, "inf");
else
strcpy(buf, "-inf");
t = Py_DTST_INFINITE;
} else {
t = Py_DTST_FINITE;
if (flags & Py_DTSF_ADD_DOT_0)
format_code = 'Z';
PyOS_snprintf(format, 32, "%%%s.%i%c", (flags & Py_DTSF_ALT ? "#" : ""), precision, format_code);
PyOS_ascii_formatd(buf, sizeof(buf), format, val);
}
len = strlen(buf);
/* Add 1 for the trailing 0 byte.
Add 1 because we might need to make room for the sign.
*/
result = PyMem_Malloc(len + 2);
if (result == NULL) {
PyErr_NoMemory();
return NULL;
}
p = result;
/* Never add sign for nan/inf, even if asked. */
if (flags & Py_DTSF_SIGN && buf[0] != '-' && t == Py_DTST_FINITE)
*p++ = '+';
strcpy(p, buf);
if (upper) {
/* Convert to upper case. */
char *p1;
for (p1 = p; *p1; p1++)
*p1 = toupper(*p1);
}
if (type)
*type = t;
return result;
}
#else
/* _Py_dg_dtoa is available. */
/* I'm using a lookup table here so that I don't have to invent a non-locale
specific way to convert to uppercase */
#define OFS_INF 0
#define OFS_NAN 1
#define OFS_E 2
/* The lengths of these are known to the code below, so don't change them */
static char *lc_float_strings[] = {
"inf",
"nan",
"e",
};
static char *uc_float_strings[] = {
"INF",
"NAN",
"E",
};
/* Convert a double d to a string, and return a PyMem_Malloc'd block of
memory contain the resulting string.
Arguments:
d is the double to be converted
format_code is one of 'e', 'f', 'g', 'r' or 's'. 'e', 'f' and 'g'
correspond to '%e', '%f' and '%g'; 'r' and 's' correspond
to repr and str.
mode is one of '0', '2' or '3', and is completely determined by
format_code: 'e', 'g' and 's' use mode 2; 'f' mode 3, 'r' mode 0.
precision is the desired precision
always_add_sign is nonzero if a '+' sign should be included for positive
numbers
add_dot_0_if_integer is nonzero if integers in non-exponential form
should have ".0" added. Only applies to format codes 'r', 's', and 'g'.
use_alt_formatting is nonzero if alternative formatting should be
used. Only applies to format codes 'e', 'f' and 'g'.
type, if non-NULL, will be set to one of these constants to identify
the type of the 'd' argument:
Py_DTST_FINITE
Py_DTST_INFINITE
Py_DTST_NAN
Returns a PyMem_Malloc'd block of memory containing the resulting string,
or NULL on error. If NULL is returned, the Python error has been set.
*/
static char *
format_float_short(double d, char format_code,
int mode, Py_ssize_t precision,
int always_add_sign, int add_dot_0_if_integer,
int use_alt_formatting, char **float_strings, int *type)
{
char *buf = NULL;
char *p = NULL;
Py_ssize_t bufsize = 0;
char *digits, *digits_end;
int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
_Py_SET_53BIT_PRECISION_HEADER;
/* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
Must be matched by a call to _Py_dg_freedtoa. */
_Py_SET_53BIT_PRECISION_START;
digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
&digits_end);
_Py_SET_53BIT_PRECISION_END;
decpt = (Py_ssize_t)decpt_as_int;
if (digits == NULL) {
/* The only failure mode is no memory. */
PyErr_NoMemory();
goto exit;
}
assert(digits_end != NULL && digits_end >= digits);
digits_len = digits_end - digits;
if (digits_len && !isdigit(digits[0])) {
/* Infinities and nans here; adapt Gay's output,
so convert Infinity to inf and NaN to nan, and
ignore sign of nan. Then return. */
/* We only need 5 bytes to hold the result "+inf\0" . */
bufsize = 5; /* Used later in an assert. */
buf = (char *)PyMem_Malloc(bufsize);
if (buf == NULL) {
PyErr_NoMemory();
goto exit;
}
p = buf;
if (digits[0] == 'i' || digits[0] == 'I') {
if (sign == 1) {
*p++ = '-';
}
else if (always_add_sign) {
*p++ = '+';
}
strncpy(p, float_strings[OFS_INF], 3);
p += 3;
if (type)
*type = Py_DTST_INFINITE;
}
else if (digits[0] == 'n' || digits[0] == 'N') {
/* note that we *never* add a sign for a nan,
even if one has explicitly been requested */
strncpy(p, float_strings[OFS_NAN], 3);
p += 3;
if (type)
*type = Py_DTST_NAN;
}
else {
/* shouldn't get here: Gay's code should always return
something starting with a digit, an 'I', or 'N' */
strncpy(p, "ERR", 3);
p += 3;
assert(0);
}
goto exit;
}
/* The result must be finite (not inf or nan). */
if (type)
*type = Py_DTST_FINITE;
/* We got digits back, format them. We may need to pad 'digits'
either on the left or right (or both) with extra zeros, so in
general the resulting string has the form
[<sign>]<zeros><digits><zeros>[<exponent>]
where either of the <zeros> pieces could be empty, and there's a
decimal point that could appear either in <digits> or in the
leading or trailing <zeros>.
Imagine an infinite 'virtual' string vdigits, consisting of the
string 'digits' (starting at index 0) padded on both the left and
right with infinite strings of zeros. We want to output a slice
vdigits[vdigits_start : vdigits_end]
of this virtual string. Thus if vdigits_start < 0 then we'll end
up producing some leading zeros; if vdigits_end > digits_len there
will be trailing zeros in the output. The next section of code
determines whether to use an exponent or not, figures out the
position 'decpt' of the decimal point, and computes 'vdigits_start'
and 'vdigits_end'. */
vdigits_end = digits_len;
switch (format_code) {
case 'e':
use_exp = 1;
vdigits_end = precision;
break;
case 'f':
vdigits_end = decpt + precision;
break;
case 'g':
if (decpt <= -4 || decpt > precision)
use_exp = 1;
if (use_alt_formatting)
vdigits_end = precision;
break;
case 'r':
/* convert to exponential format at 1e16. We used to convert
at 1e17, but that gives odd-looking results for some values
when a 16-digit 'shortest' repr is padded with bogus zeros.
For example, repr(2e16+8) would give 20000000000000010.0;
the true value is 20000000000000008.0. */
if (decpt <= -4 || decpt > 16)
use_exp = 1;
break;
case 's':
/* if we're forcing a digit after the point, convert to
exponential format at 1e11. If not, convert at 1e12. */
if (decpt <= -4 || decpt >
(add_dot_0_if_integer ? precision-1 : precision))
use_exp = 1;
break;
default:
PyErr_BadInternalCall();
goto exit;
}
/* if using an exponent, reset decimal point position to 1 and adjust
exponent accordingly.*/
if (use_exp) {
exp = decpt - 1;
decpt = 1;
}
/* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
decpt < vdigits_end if add_dot_0_if_integer and no exponent */
vdigits_start = decpt <= 0 ? decpt-1 : 0;
if (!use_exp && add_dot_0_if_integer)
vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
else
vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
/* double check inequalities */
assert(vdigits_start <= 0 &&
0 <= digits_len &&
digits_len <= vdigits_end);
/* decimal point should be in (vdigits_start, vdigits_end] */
assert(vdigits_start < decpt && decpt <= vdigits_end);
/* Compute an upper bound how much memory we need. This might be a few
chars too long, but no big deal. */
bufsize =
/* sign, decimal point and trailing 0 byte */
3 +
/* total digit count (including zero padding on both sides) */
(vdigits_end - vdigits_start) +
/* exponent "e+100", max 3 numerical digits */
(use_exp ? 5 : 0);
/* Now allocate the memory and initialize p to point to the start of
it. */
buf = (char *)PyMem_Malloc(bufsize);
if (buf == NULL) {
PyErr_NoMemory();
goto exit;
}
p = buf;
/* Add a negative sign if negative, and a plus sign if non-negative
and always_add_sign is true. */
if (sign == 1)
*p++ = '-';
else if (always_add_sign)
*p++ = '+';
/* note that exactly one of the three 'if' conditions is true,
so we include exactly one decimal point */
/* Zero padding on left of digit string */
if (decpt <= 0) {
memset(p, '0', decpt-vdigits_start);
p += decpt - vdigits_start;
*p++ = '.';
memset(p, '0', 0-decpt);
p += 0-decpt;
}
else {
memset(p, '0', 0-vdigits_start);
p += 0 - vdigits_start;
}
/* Digits, with included decimal point */
if (0 < decpt && decpt <= digits_len) {
strncpy(p, digits, decpt-0);
p += decpt-0;
*p++ = '.';
strncpy(p, digits+decpt, digits_len-decpt);
p += digits_len-decpt;
}
else {
strncpy(p, digits, digits_len);
p += digits_len;
}
/* And zeros on the right */
if (digits_len < decpt) {
memset(p, '0', decpt-digits_len);
p += decpt-digits_len;
*p++ = '.';
memset(p, '0', vdigits_end-decpt);
p += vdigits_end-decpt;
}
else {
memset(p, '0', vdigits_end-digits_len);
p += vdigits_end-digits_len;
}
/* Delete a trailing decimal pt unless using alternative formatting. */
if (p[-1] == '.' && !use_alt_formatting)
p--;
/* Now that we've done zero padding, add an exponent if needed. */
if (use_exp) {
*p++ = float_strings[OFS_E][0];
exp_len = sprintf(p, "%+.02d", exp);
p += exp_len;
}
exit:
if (buf) {
*p = '\0';
/* It's too late if this fails, as we've already stepped on
memory that isn't ours. But it's an okay debugging test. */
assert(p-buf < bufsize);
}
if (digits)
_Py_dg_freedtoa(digits);
return buf;
}
PyAPI_FUNC(char *) PyOS_double_to_string(double val,
char format_code,
int precision,
int flags,
int *type)
{
char lc_format_code = format_code;
char** float_strings = lc_float_strings;
int mode = 0;
/* Validate format_code, and map upper and lower case */
switch (format_code) {
case 'e': /* exponent */
case 'f': /* fixed */
case 'g': /* general */
case 'r': /* repr format */
case 's': /* str format */
break;
case 'E':
lc_format_code = 'e';
break;
case 'F':
lc_format_code = 'f';
break;
case 'G':
lc_format_code = 'g';
break;
default:
PyErr_BadInternalCall();
return NULL;
}
if (format_code != lc_format_code)
float_strings = uc_float_strings;
/* From the format code, compute the mode and make any adjustments as
needed. */
switch (lc_format_code) {
case 'e':
mode = 2;
precision++;
break;
case 'f':
mode = 3;
break;
case 'g':
mode = 2;
/* precision 0 makes no sense for 'g' format; interpret as 1 */
if (precision == 0)
precision = 1;
break;
case 'r':
/* "repr" pseudo-mode */
mode = 0;
/* Supplied precision is unused, must be 0. */
if (precision != 0) {
PyErr_BadInternalCall();
return NULL;
}
break;
case 's':
mode = 2;
/* Supplied precision is unused, must be 0. */
if (precision != 0) {
PyErr_BadInternalCall();
return NULL;
}
precision = 12;
break;
}
return format_float_short(val, lc_format_code, mode, precision,
flags & Py_DTSF_SIGN,
flags & Py_DTSF_ADD_DOT_0,
flags & Py_DTSF_ALT,
float_strings, type);
}
#endif /* ifdef PY_NO_SHORT_FLOAT_REPR */