Issue #5859: Remove use of fixed-length buffers for float formatting
in unicodeobject.c and the fallback version of PyOS_double_to_string. As a result, operations like '%.120e' % 12.34 no longer raise an exception.
This commit is contained in:
parent
fb526ac34a
commit
f489caf5da
|
@ -1105,14 +1105,7 @@ class MixinStrUnicodeUserStringTest:
|
|||
value = 0.01
|
||||
for x in range(60):
|
||||
value = value * 3.141592655 / 3.0 * 10.0
|
||||
# The formatfloat() code in stringobject.c and
|
||||
# unicodeobject.c uses a 120 byte buffer and switches from
|
||||
# 'f' formatting to 'g' at precision 50, so we expect
|
||||
# OverflowErrors for the ranges x < 50 and prec >= 67.
|
||||
if x < 50 and prec >= 67:
|
||||
self.checkraises(OverflowError, format, "__mod__", value)
|
||||
else:
|
||||
self.checkcall(format, "__mod__", value)
|
||||
self.checkcall(format, "__mod__", value)
|
||||
|
||||
def test_inplace_rewrites(self):
|
||||
# Check that strings don't copy and modify cached single-character strings
|
||||
|
|
|
@ -12,6 +12,9 @@ What's New in Python 3.1 beta 1?
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #5859: Remove length restrictions for float formatting:
|
||||
'%.67f' % 12.34 and '%.120e' % 12.34 no longer raise an exception.
|
||||
|
||||
- Issue #1588: Add complex.__format__. For example,
|
||||
format(complex(1, 2./3), '.5') now produces a sensible result.
|
||||
|
||||
|
|
|
@ -8792,73 +8792,30 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len)
|
||||
{
|
||||
register Py_ssize_t i;
|
||||
for (i = len - 1; i >= 0; i--)
|
||||
buffer[i] = (Py_UNICODE) charbuffer[i];
|
||||
}
|
||||
/* Returns a new reference to a PyUnicode object, or NULL on failure. */
|
||||
|
||||
static int
|
||||
formatfloat(Py_UNICODE *buf,
|
||||
size_t buflen,
|
||||
int flags,
|
||||
int prec,
|
||||
int type,
|
||||
PyObject *v)
|
||||
static PyObject *
|
||||
formatfloat(PyObject *v, int flags, int prec, int type)
|
||||
{
|
||||
/* eric.smith: To minimize disturbances in PyUnicode_Format (the
|
||||
only caller of this routine), I'm going to keep the existing
|
||||
API to this function. That means that we'll allocate memory and
|
||||
then copy back into the supplied buffer. But that's better than
|
||||
all of the changes that would be required in PyUnicode_Format
|
||||
because it does lots of memory management tricks. */
|
||||
|
||||
char* p = NULL;
|
||||
int result = -1;
|
||||
char *p;
|
||||
PyObject *result;
|
||||
double x;
|
||||
Py_ssize_t len;
|
||||
|
||||
x = PyFloat_AsDouble(v);
|
||||
if (x == -1.0 && PyErr_Occurred())
|
||||
goto done;
|
||||
return NULL;
|
||||
|
||||
if (prec < 0)
|
||||
prec = 6;
|
||||
|
||||
/* make sure that the decimal representation of precision really does
|
||||
need at most 10 digits: platforms with sizeof(int) == 8 exist! */
|
||||
if (prec > 0x7fffffffL) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"outrageously large precision "
|
||||
"for formatted float");
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (type == 'f' && fabs(x) >= 1e50)
|
||||
type = 'g';
|
||||
|
||||
if (((type == 'g' || type == 'G') &&
|
||||
buflen <= (size_t)10 + (size_t)prec) ||
|
||||
((type == 'f' || type == 'F') &&
|
||||
buflen <= (size_t)53 + (size_t)prec)) {
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"formatted float is too long (precision too large?)");
|
||||
goto done;
|
||||
}
|
||||
|
||||
p = PyOS_double_to_string(x, type, prec,
|
||||
(flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
|
||||
len = strlen(p);
|
||||
if (len+1 >= buflen) {
|
||||
/* Caller supplied buffer is not large enough. */
|
||||
PyErr_NoMemory();
|
||||
goto done;
|
||||
}
|
||||
strtounicode(buf, p, len);
|
||||
result = Py_SAFE_DOWNCAST(len, Py_ssize_t, int);
|
||||
|
||||
done:
|
||||
if (p == NULL)
|
||||
return NULL;
|
||||
result = PyUnicode_FromStringAndSize(p, strlen(p));
|
||||
PyMem_Free(p);
|
||||
return result;
|
||||
}
|
||||
|
@ -8940,14 +8897,9 @@ formatchar(Py_UNICODE *buf,
|
|||
}
|
||||
|
||||
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
|
||||
|
||||
FORMATBUFLEN is the length of the buffer in which the floats, ints, &
|
||||
chars are formatted. XXX This is a magic number. Each formatting
|
||||
routine does bounds checking to ensure no overflow, but a better
|
||||
solution may be to malloc a buffer of appropriate size for each
|
||||
format. For now, the current solution is sufficient.
|
||||
FORMATBUFLEN is the length of the buffer in which chars are formatted.
|
||||
*/
|
||||
#define FORMATBUFLEN (size_t)120
|
||||
#define FORMATBUFLEN (size_t)10
|
||||
|
||||
PyObject *PyUnicode_Format(PyObject *format,
|
||||
PyObject *args)
|
||||
|
@ -9012,7 +8964,7 @@ PyObject *PyUnicode_Format(PyObject *format,
|
|||
Py_UNICODE *pbuf;
|
||||
Py_UNICODE sign;
|
||||
Py_ssize_t len;
|
||||
Py_UNICODE formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
|
||||
Py_UNICODE formatbuf[FORMATBUFLEN]; /* For formatchar() */
|
||||
|
||||
fmt++;
|
||||
if (*fmt == '(') {
|
||||
|
@ -9257,11 +9209,11 @@ PyObject *PyUnicode_Format(PyObject *format,
|
|||
case 'F':
|
||||
case 'g':
|
||||
case 'G':
|
||||
pbuf = formatbuf;
|
||||
len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
|
||||
flags, prec, c, v);
|
||||
if (len < 0)
|
||||
temp = formatfloat(v, flags, prec, c);
|
||||
if (!temp)
|
||||
goto onError;
|
||||
pbuf = PyUnicode_AS_UNICODE(temp);
|
||||
len = PyUnicode_GET_SIZE(temp);
|
||||
sign = 1;
|
||||
if (flags & F_ZERO)
|
||||
fill = '0';
|
||||
|
|
|
@ -620,12 +620,10 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
|
|||
int flags,
|
||||
int *type)
|
||||
{
|
||||
char buf[128];
|
||||
char format[32];
|
||||
Py_ssize_t len;
|
||||
char *result;
|
||||
char *p;
|
||||
int t;
|
||||
Py_ssize_t bufsize;
|
||||
char *buf;
|
||||
int t, exp;
|
||||
int upper = 0;
|
||||
|
||||
/* Validate format_code, and map upper and lower case */
|
||||
|
@ -669,6 +667,61 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/* Here's a quick-and-dirty calculation to figure out how big a buffer
|
||||
we need. In general, for a finite float we need:
|
||||
|
||||
1 byte for each digit of the decimal significand, and
|
||||
|
||||
1 for a possible sign
|
||||
1 for a possible decimal point
|
||||
2 for a possible [eE][+-]
|
||||
1 for each digit of the exponent; if we allow 19 digits
|
||||
total then we're safe up to exponents of 2**63.
|
||||
1 for the trailing nul byte
|
||||
|
||||
This gives a total of 24 + the number of digits in the significand,
|
||||
and the number of digits in the significand is:
|
||||
|
||||
for 'g' format: at most precision, except possibly
|
||||
when precision == 0, when it's 1.
|
||||
for 'e' format: precision+1
|
||||
for 'f' format: precision digits after the point, at least 1
|
||||
before. To figure out how many digits appear before the point
|
||||
we have to examine the size of the number. If fabs(val) < 1.0
|
||||
then there will be only one digit before the point. If
|
||||
fabs(val) >= 1.0, then there are at most
|
||||
|
||||
1+floor(log10(ceiling(fabs(val))))
|
||||
|
||||
digits before the point (where the 'ceiling' allows for the
|
||||
possibility that the rounding rounds the integer part of val
|
||||
up). A safe upper bound for the above quantity is
|
||||
1+floor(exp/3), where exp is the unique integer such that 0.5
|
||||
<= fabs(val)/2**exp < 1.0. This exp can be obtained from
|
||||
frexp.
|
||||
|
||||
So we allow room for precision+1 digits for all formats, plus an
|
||||
extra floor(exp/3) digits for 'f' format.
|
||||
|
||||
*/
|
||||
|
||||
if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
|
||||
/* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
|
||||
bufsize = 5;
|
||||
else {
|
||||
bufsize = 25 + precision;
|
||||
if (format_code == 'f' && fabs(val) >= 1.0) {
|
||||
frexp(val, &exp);
|
||||
bufsize += exp/3;
|
||||
}
|
||||
}
|
||||
|
||||
buf = PyMem_Malloc(bufsize);
|
||||
if (buf == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Handle nan and inf. */
|
||||
if (Py_IS_NAN(val)) {
|
||||
strcpy(buf, "nan");
|
||||
|
@ -687,38 +740,29 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
|
|||
PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
|
||||
(flags & Py_DTSF_ALT ? "#" : ""), precision,
|
||||
format_code);
|
||||
_PyOS_ascii_formatd(buf, sizeof(buf), format, val, precision);
|
||||
_PyOS_ascii_formatd(buf, bufsize, format, val, precision);
|
||||
}
|
||||
|
||||
len = strlen(buf);
|
||||
|
||||
/* Add 1 for the trailing 0 byte.
|
||||
Add 1 because we might need to make room for the sign.
|
||||
*/
|
||||
result = PyMem_Malloc(len + 2);
|
||||
if (result == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return NULL;
|
||||
}
|
||||
p = result;
|
||||
|
||||
/* Add sign when requested. It's convenient (esp. when formatting
|
||||
complex numbers) to include a sign even for inf and nan. */
|
||||
if (flags & Py_DTSF_SIGN && buf[0] != '-')
|
||||
*p++ = '+';
|
||||
|
||||
strcpy(p, buf);
|
||||
|
||||
if (flags & Py_DTSF_SIGN && buf[0] != '-') {
|
||||
size_t len = strlen(buf);
|
||||
/* the bufsize calculations above should ensure that we've got
|
||||
space to add a sign */
|
||||
assert((size_t)bufsize >= len+2);
|
||||
memmove(buf+1, buf, len+1);
|
||||
buf[0] = '+';
|
||||
}
|
||||
if (upper) {
|
||||
/* Convert to upper case. */
|
||||
char *p1;
|
||||
for (p1 = p; *p1; p1++)
|
||||
for (p1 = buf; *p1; p1++)
|
||||
*p1 = Py_TOUPPER(*p1);
|
||||
}
|
||||
|
||||
if (type)
|
||||
*type = t;
|
||||
return result;
|
||||
return buf;
|
||||
}
|
||||
|
||||
#else
|
||||
|
|
Loading…
Reference in New Issue