Issue #5859: Remove use of fixed-length buffers for float formatting

in unicodeobject.c and the fallback version of PyOS_double_to_string.
As a result, operations like '%.120e' % 12.34 no longer raise an
exception.
This commit is contained in:
Mark Dickinson 2009-05-01 11:42:00 +00:00
parent fb526ac34a
commit f489caf5da
4 changed files with 90 additions and 98 deletions

View File

@ -1105,14 +1105,7 @@ class MixinStrUnicodeUserStringTest:
value = 0.01
for x in range(60):
value = value * 3.141592655 / 3.0 * 10.0
# The formatfloat() code in stringobject.c and
# unicodeobject.c uses a 120 byte buffer and switches from
# 'f' formatting to 'g' at precision 50, so we expect
# OverflowErrors for the ranges x < 50 and prec >= 67.
if x < 50 and prec >= 67:
self.checkraises(OverflowError, format, "__mod__", value)
else:
self.checkcall(format, "__mod__", value)
self.checkcall(format, "__mod__", value)
def test_inplace_rewrites(self):
# Check that strings don't copy and modify cached single-character strings

View File

@ -12,6 +12,9 @@ What's New in Python 3.1 beta 1?
Core and Builtins
-----------------
- Issue #5859: Remove length restrictions for float formatting:
'%.67f' % 12.34 and '%.120e' % 12.34 no longer raise an exception.
- Issue #1588: Add complex.__format__. For example,
format(complex(1, 2./3), '.5') now produces a sensible result.

View File

@ -8792,73 +8792,30 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
return NULL;
}
static void
strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len)
{
register Py_ssize_t i;
for (i = len - 1; i >= 0; i--)
buffer[i] = (Py_UNICODE) charbuffer[i];
}
/* Returns a new reference to a PyUnicode object, or NULL on failure. */
static int
formatfloat(Py_UNICODE *buf,
size_t buflen,
int flags,
int prec,
int type,
PyObject *v)
static PyObject *
formatfloat(PyObject *v, int flags, int prec, int type)
{
/* eric.smith: To minimize disturbances in PyUnicode_Format (the
only caller of this routine), I'm going to keep the existing
API to this function. That means that we'll allocate memory and
then copy back into the supplied buffer. But that's better than
all of the changes that would be required in PyUnicode_Format
because it does lots of memory management tricks. */
char* p = NULL;
int result = -1;
char *p;
PyObject *result;
double x;
Py_ssize_t len;
x = PyFloat_AsDouble(v);
if (x == -1.0 && PyErr_Occurred())
goto done;
return NULL;
if (prec < 0)
prec = 6;
/* make sure that the decimal representation of precision really does
need at most 10 digits: platforms with sizeof(int) == 8 exist! */
if (prec > 0x7fffffffL) {
PyErr_SetString(PyExc_OverflowError,
"outrageously large precision "
"for formatted float");
goto done;
}
if (type == 'f' && fabs(x) >= 1e50)
type = 'g';
if (((type == 'g' || type == 'G') &&
buflen <= (size_t)10 + (size_t)prec) ||
((type == 'f' || type == 'F') &&
buflen <= (size_t)53 + (size_t)prec)) {
PyErr_SetString(PyExc_OverflowError,
"formatted float is too long (precision too large?)");
goto done;
}
p = PyOS_double_to_string(x, type, prec,
(flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
len = strlen(p);
if (len+1 >= buflen) {
/* Caller supplied buffer is not large enough. */
PyErr_NoMemory();
goto done;
}
strtounicode(buf, p, len);
result = Py_SAFE_DOWNCAST(len, Py_ssize_t, int);
done:
if (p == NULL)
return NULL;
result = PyUnicode_FromStringAndSize(p, strlen(p));
PyMem_Free(p);
return result;
}
@ -8940,14 +8897,9 @@ formatchar(Py_UNICODE *buf,
}
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
FORMATBUFLEN is the length of the buffer in which the floats, ints, &
chars are formatted. XXX This is a magic number. Each formatting
routine does bounds checking to ensure no overflow, but a better
solution may be to malloc a buffer of appropriate size for each
format. For now, the current solution is sufficient.
FORMATBUFLEN is the length of the buffer in which chars are formatted.
*/
#define FORMATBUFLEN (size_t)120
#define FORMATBUFLEN (size_t)10
PyObject *PyUnicode_Format(PyObject *format,
PyObject *args)
@ -9012,7 +8964,7 @@ PyObject *PyUnicode_Format(PyObject *format,
Py_UNICODE *pbuf;
Py_UNICODE sign;
Py_ssize_t len;
Py_UNICODE formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Py_UNICODE formatbuf[FORMATBUFLEN]; /* For formatchar() */
fmt++;
if (*fmt == '(') {
@ -9257,11 +9209,11 @@ PyObject *PyUnicode_Format(PyObject *format,
case 'F':
case 'g':
case 'G':
pbuf = formatbuf;
len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
flags, prec, c, v);
if (len < 0)
temp = formatfloat(v, flags, prec, c);
if (!temp)
goto onError;
pbuf = PyUnicode_AS_UNICODE(temp);
len = PyUnicode_GET_SIZE(temp);
sign = 1;
if (flags & F_ZERO)
fill = '0';

View File

@ -620,12 +620,10 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
int flags,
int *type)
{
char buf[128];
char format[32];
Py_ssize_t len;
char *result;
char *p;
int t;
Py_ssize_t bufsize;
char *buf;
int t, exp;
int upper = 0;
/* Validate format_code, and map upper and lower case */
@ -669,6 +667,61 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
return NULL;
}
/* Here's a quick-and-dirty calculation to figure out how big a buffer
we need. In general, for a finite float we need:
1 byte for each digit of the decimal significand, and
1 for a possible sign
1 for a possible decimal point
2 for a possible [eE][+-]
1 for each digit of the exponent; if we allow 19 digits
total then we're safe up to exponents of 2**63.
1 for the trailing nul byte
This gives a total of 24 + the number of digits in the significand,
and the number of digits in the significand is:
for 'g' format: at most precision, except possibly
when precision == 0, when it's 1.
for 'e' format: precision+1
for 'f' format: precision digits after the point, at least 1
before. To figure out how many digits appear before the point
we have to examine the size of the number. If fabs(val) < 1.0
then there will be only one digit before the point. If
fabs(val) >= 1.0, then there are at most
1+floor(log10(ceiling(fabs(val))))
digits before the point (where the 'ceiling' allows for the
possibility that the rounding rounds the integer part of val
up). A safe upper bound for the above quantity is
1+floor(exp/3), where exp is the unique integer such that 0.5
<= fabs(val)/2**exp < 1.0. This exp can be obtained from
frexp.
So we allow room for precision+1 digits for all formats, plus an
extra floor(exp/3) digits for 'f' format.
*/
if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
/* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
bufsize = 5;
else {
bufsize = 25 + precision;
if (format_code == 'f' && fabs(val) >= 1.0) {
frexp(val, &exp);
bufsize += exp/3;
}
}
buf = PyMem_Malloc(bufsize);
if (buf == NULL) {
PyErr_NoMemory();
return NULL;
}
/* Handle nan and inf. */
if (Py_IS_NAN(val)) {
strcpy(buf, "nan");
@ -687,38 +740,29 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
(flags & Py_DTSF_ALT ? "#" : ""), precision,
format_code);
_PyOS_ascii_formatd(buf, sizeof(buf), format, val, precision);
_PyOS_ascii_formatd(buf, bufsize, format, val, precision);
}
len = strlen(buf);
/* Add 1 for the trailing 0 byte.
Add 1 because we might need to make room for the sign.
*/
result = PyMem_Malloc(len + 2);
if (result == NULL) {
PyErr_NoMemory();
return NULL;
}
p = result;
/* Add sign when requested. It's convenient (esp. when formatting
complex numbers) to include a sign even for inf and nan. */
if (flags & Py_DTSF_SIGN && buf[0] != '-')
*p++ = '+';
strcpy(p, buf);
if (flags & Py_DTSF_SIGN && buf[0] != '-') {
size_t len = strlen(buf);
/* the bufsize calculations above should ensure that we've got
space to add a sign */
assert((size_t)bufsize >= len+2);
memmove(buf+1, buf, len+1);
buf[0] = '+';
}
if (upper) {
/* Convert to upper case. */
char *p1;
for (p1 = p; *p1; p1++)
for (p1 = buf; *p1; p1++)
*p1 = Py_TOUPPER(*p1);
}
if (type)
*type = t;
return result;
return buf;
}
#else