From f489caf5daa2b0f3a1bd951b585c834aab1a54c6 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Fri, 1 May 2009 11:42:00 +0000 Subject: [PATCH] Issue #5859: Remove use of fixed-length buffers for float formatting in unicodeobject.c and the fallback version of PyOS_double_to_string. As a result, operations like '%.120e' % 12.34 no longer raise an exception. --- Lib/test/string_tests.py | 9 +--- Misc/NEWS | 3 ++ Objects/unicodeobject.c | 82 ++++++++--------------------------- Python/pystrtod.c | 94 +++++++++++++++++++++++++++++----------- 4 files changed, 90 insertions(+), 98 deletions(-) diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index 2a58e582d4a..1637efb51b2 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -1105,14 +1105,7 @@ class MixinStrUnicodeUserStringTest: value = 0.01 for x in range(60): value = value * 3.141592655 / 3.0 * 10.0 - # The formatfloat() code in stringobject.c and - # unicodeobject.c uses a 120 byte buffer and switches from - # 'f' formatting to 'g' at precision 50, so we expect - # OverflowErrors for the ranges x < 50 and prec >= 67. - if x < 50 and prec >= 67: - self.checkraises(OverflowError, format, "__mod__", value) - else: - self.checkcall(format, "__mod__", value) + self.checkcall(format, "__mod__", value) def test_inplace_rewrites(self): # Check that strings don't copy and modify cached single-character strings diff --git a/Misc/NEWS b/Misc/NEWS index 257762b965e..3e8f5e6478e 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,9 @@ What's New in Python 3.1 beta 1? Core and Builtins ----------------- +- Issue #5859: Remove length restrictions for float formatting: + '%.67f' % 12.34 and '%.120e' % 12.34 no longer raise an exception. + - Issue #1588: Add complex.__format__. For example, format(complex(1, 2./3), '.5') now produces a sensible result. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3cea89921d1..31b9a73683c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8792,73 +8792,30 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) return NULL; } -static void -strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len) -{ - register Py_ssize_t i; - for (i = len - 1; i >= 0; i--) - buffer[i] = (Py_UNICODE) charbuffer[i]; -} +/* Returns a new reference to a PyUnicode object, or NULL on failure. */ -static int -formatfloat(Py_UNICODE *buf, - size_t buflen, - int flags, - int prec, - int type, - PyObject *v) +static PyObject * +formatfloat(PyObject *v, int flags, int prec, int type) { - /* eric.smith: To minimize disturbances in PyUnicode_Format (the - only caller of this routine), I'm going to keep the existing - API to this function. That means that we'll allocate memory and - then copy back into the supplied buffer. But that's better than - all of the changes that would be required in PyUnicode_Format - because it does lots of memory management tricks. */ - - char* p = NULL; - int result = -1; + char *p; + PyObject *result; double x; - Py_ssize_t len; x = PyFloat_AsDouble(v); if (x == -1.0 && PyErr_Occurred()) - goto done; + return NULL; + if (prec < 0) prec = 6; - /* make sure that the decimal representation of precision really does - need at most 10 digits: platforms with sizeof(int) == 8 exist! */ - if (prec > 0x7fffffffL) { - PyErr_SetString(PyExc_OverflowError, - "outrageously large precision " - "for formatted float"); - goto done; - } - if (type == 'f' && fabs(x) >= 1e50) type = 'g'; - if (((type == 'g' || type == 'G') && - buflen <= (size_t)10 + (size_t)prec) || - ((type == 'f' || type == 'F') && - buflen <= (size_t)53 + (size_t)prec)) { - PyErr_SetString(PyExc_OverflowError, - "formatted float is too long (precision too large?)"); - goto done; - } - p = PyOS_double_to_string(x, type, prec, (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL); - len = strlen(p); - if (len+1 >= buflen) { - /* Caller supplied buffer is not large enough. */ - PyErr_NoMemory(); - goto done; - } - strtounicode(buf, p, len); - result = Py_SAFE_DOWNCAST(len, Py_ssize_t, int); - -done: + if (p == NULL) + return NULL; + result = PyUnicode_FromStringAndSize(p, strlen(p)); PyMem_Free(p); return result; } @@ -8940,14 +8897,9 @@ formatchar(Py_UNICODE *buf, } /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) - - FORMATBUFLEN is the length of the buffer in which the floats, ints, & - chars are formatted. XXX This is a magic number. Each formatting - routine does bounds checking to ensure no overflow, but a better - solution may be to malloc a buffer of appropriate size for each - format. For now, the current solution is sufficient. + FORMATBUFLEN is the length of the buffer in which chars are formatted. */ -#define FORMATBUFLEN (size_t)120 +#define FORMATBUFLEN (size_t)10 PyObject *PyUnicode_Format(PyObject *format, PyObject *args) @@ -9012,7 +8964,7 @@ PyObject *PyUnicode_Format(PyObject *format, Py_UNICODE *pbuf; Py_UNICODE sign; Py_ssize_t len; - Py_UNICODE formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */ + Py_UNICODE formatbuf[FORMATBUFLEN]; /* For formatchar() */ fmt++; if (*fmt == '(') { @@ -9257,11 +9209,11 @@ PyObject *PyUnicode_Format(PyObject *format, case 'F': case 'g': case 'G': - pbuf = formatbuf; - len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE), - flags, prec, c, v); - if (len < 0) + temp = formatfloat(v, flags, prec, c); + if (!temp) goto onError; + pbuf = PyUnicode_AS_UNICODE(temp); + len = PyUnicode_GET_SIZE(temp); sign = 1; if (flags & F_ZERO) fill = '0'; diff --git a/Python/pystrtod.c b/Python/pystrtod.c index e68f5d79e01..104061056e7 100644 --- a/Python/pystrtod.c +++ b/Python/pystrtod.c @@ -620,12 +620,10 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val, int flags, int *type) { - char buf[128]; char format[32]; - Py_ssize_t len; - char *result; - char *p; - int t; + Py_ssize_t bufsize; + char *buf; + int t, exp; int upper = 0; /* Validate format_code, and map upper and lower case */ @@ -669,6 +667,61 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val, return NULL; } + /* Here's a quick-and-dirty calculation to figure out how big a buffer + we need. In general, for a finite float we need: + + 1 byte for each digit of the decimal significand, and + + 1 for a possible sign + 1 for a possible decimal point + 2 for a possible [eE][+-] + 1 for each digit of the exponent; if we allow 19 digits + total then we're safe up to exponents of 2**63. + 1 for the trailing nul byte + + This gives a total of 24 + the number of digits in the significand, + and the number of digits in the significand is: + + for 'g' format: at most precision, except possibly + when precision == 0, when it's 1. + for 'e' format: precision+1 + for 'f' format: precision digits after the point, at least 1 + before. To figure out how many digits appear before the point + we have to examine the size of the number. If fabs(val) < 1.0 + then there will be only one digit before the point. If + fabs(val) >= 1.0, then there are at most + + 1+floor(log10(ceiling(fabs(val)))) + + digits before the point (where the 'ceiling' allows for the + possibility that the rounding rounds the integer part of val + up). A safe upper bound for the above quantity is + 1+floor(exp/3), where exp is the unique integer such that 0.5 + <= fabs(val)/2**exp < 1.0. This exp can be obtained from + frexp. + + So we allow room for precision+1 digits for all formats, plus an + extra floor(exp/3) digits for 'f' format. + + */ + + if (Py_IS_NAN(val) || Py_IS_INFINITY(val)) + /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */ + bufsize = 5; + else { + bufsize = 25 + precision; + if (format_code == 'f' && fabs(val) >= 1.0) { + frexp(val, &exp); + bufsize += exp/3; + } + } + + buf = PyMem_Malloc(bufsize); + if (buf == NULL) { + PyErr_NoMemory(); + return NULL; + } + /* Handle nan and inf. */ if (Py_IS_NAN(val)) { strcpy(buf, "nan"); @@ -687,38 +740,29 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val, PyOS_snprintf(format, sizeof(format), "%%%s.%i%c", (flags & Py_DTSF_ALT ? "#" : ""), precision, format_code); - _PyOS_ascii_formatd(buf, sizeof(buf), format, val, precision); + _PyOS_ascii_formatd(buf, bufsize, format, val, precision); } - len = strlen(buf); - - /* Add 1 for the trailing 0 byte. - Add 1 because we might need to make room for the sign. - */ - result = PyMem_Malloc(len + 2); - if (result == NULL) { - PyErr_NoMemory(); - return NULL; - } - p = result; - /* Add sign when requested. It's convenient (esp. when formatting complex numbers) to include a sign even for inf and nan. */ - if (flags & Py_DTSF_SIGN && buf[0] != '-') - *p++ = '+'; - - strcpy(p, buf); - + if (flags & Py_DTSF_SIGN && buf[0] != '-') { + size_t len = strlen(buf); + /* the bufsize calculations above should ensure that we've got + space to add a sign */ + assert((size_t)bufsize >= len+2); + memmove(buf+1, buf, len+1); + buf[0] = '+'; + } if (upper) { /* Convert to upper case. */ char *p1; - for (p1 = p; *p1; p1++) + for (p1 = buf; *p1; p1++) *p1 = Py_TOUPPER(*p1); } if (type) *type = t; - return result; + return buf; } #else