From d0880d57b053179a8dd91f2b6fbcb5b5ddf56a1d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 27 Apr 2012 23:40:13 +0200 Subject: [PATCH] Simplify and optimize formatlong() * Remove _PyBytes_FormatLong(): inline it into formatlong() * the input type is always a long, so remove the code for bool * don't duplicate the string if the length does not change * Use PyUnicode_DATA() instead of _PyUnicode_AsString() --- Include/bytesobject.h | 2 - Objects/bytesobject.c | 143 ---------------------------------------- Objects/unicodeobject.c | 135 +++++++++++++++++++++++++++++++++++-- 3 files changed, 128 insertions(+), 152 deletions(-) diff --git a/Include/bytesobject.h b/Include/bytesobject.h index e1af89f9c95..d7c7ffd37ba 100644 --- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -62,8 +62,6 @@ PyAPI_FUNC(void) PyBytes_Concat(PyObject **, PyObject *); PyAPI_FUNC(void) PyBytes_ConcatAndDel(PyObject **, PyObject *); #ifndef Py_LIMITED_API PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t); -PyAPI_FUNC(PyObject *) _PyBytes_FormatLong(PyObject*, int, int, - int, char**, int*); #endif PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t, const char *, Py_ssize_t, diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index b07be26896f..efe6ef87b35 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2860,149 +2860,6 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize) return 0; } -/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and - * the F_ALT flag, for Python's long (unbounded) ints. It's not used for - * Python's regular ints. - * Return value: a new PyBytes*, or NULL if error. - * . *pbuf is set to point into it, - * *plen set to the # of chars following that. - * Caller must decref it when done using pbuf. - * The string starting at *pbuf is of the form - * "-"? ("0x" | "0X")? digit+ - * "0x"/"0X" are present only for x and X conversions, with F_ALT - * set in flags. The case of hex digits will be correct, - * There will be at least prec digits, zero-filled on the left if - * necessary to get that many. - * val object to be converted - * flags bitmask of format flags; only F_ALT is looked at - * prec minimum number of digits; 0-fill on left if needed - * type a character in [duoxX]; u acts the same as d - * - * CAUTION: o, x and X conversions on regular ints can never - * produce a '-' sign, but can for Python's unbounded ints. - */ -PyObject* -_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type, - char **pbuf, int *plen) -{ - PyObject *result = NULL; - char *buf; - Py_ssize_t i; - int sign; /* 1 if '-', else 0 */ - int len; /* number of characters */ - Py_ssize_t llen; - int numdigits; /* len == numnondigits + numdigits */ - int numnondigits = 0; - - /* Avoid exceeding SSIZE_T_MAX */ - if (prec > INT_MAX-3) { - PyErr_SetString(PyExc_OverflowError, - "precision too large"); - return NULL; - } - - switch (type) { - case 'd': - case 'u': - /* Special-case boolean: we want 0/1 */ - if (PyBool_Check(val)) - result = PyNumber_ToBase(val, 10); - else - result = Py_TYPE(val)->tp_str(val); - break; - case 'o': - numnondigits = 2; - result = PyNumber_ToBase(val, 8); - break; - case 'x': - case 'X': - numnondigits = 2; - result = PyNumber_ToBase(val, 16); - break; - default: - assert(!"'type' not in [duoxX]"); - } - if (!result) - return NULL; - - buf = _PyUnicode_AsString(result); - if (!buf) { - Py_DECREF(result); - return NULL; - } - - /* To modify the string in-place, there can only be one reference. */ - if (Py_REFCNT(result) != 1) { - PyErr_BadInternalCall(); - return NULL; - } - llen = PyUnicode_GetLength(result); - if (llen > INT_MAX) { - PyErr_SetString(PyExc_ValueError, - "string too large in _PyBytes_FormatLong"); - return NULL; - } - len = (int)llen; - if (buf[len-1] == 'L') { - --len; - buf[len] = '\0'; - } - sign = buf[0] == '-'; - numnondigits += sign; - numdigits = len - numnondigits; - assert(numdigits > 0); - - /* Get rid of base marker unless F_ALT */ - if (((flags & F_ALT) == 0 && - (type == 'o' || type == 'x' || type == 'X'))) { - assert(buf[sign] == '0'); - assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' || - buf[sign+1] == 'o'); - numnondigits -= 2; - buf += 2; - len -= 2; - if (sign) - buf[0] = '-'; - assert(len == numnondigits + numdigits); - assert(numdigits > 0); - } - - /* Fill with leading zeroes to meet minimum width. */ - if (prec > numdigits) { - PyObject *r1 = PyBytes_FromStringAndSize(NULL, - numnondigits + prec); - char *b1; - if (!r1) { - Py_DECREF(result); - return NULL; - } - b1 = PyBytes_AS_STRING(r1); - for (i = 0; i < numnondigits; ++i) - *b1++ = *buf++; - for (i = 0; i < prec - numdigits; i++) - *b1++ = '0'; - for (i = 0; i < numdigits; i++) - *b1++ = *buf++; - *b1 = '\0'; - Py_DECREF(result); - result = r1; - buf = PyBytes_AS_STRING(result); - len = numnondigits + prec; - } - - /* Fix up case for hex conversions. */ - if (type == 'X') { - /* Need to convert all lower case letters to upper case. - and need to convert 0x to 0X (and -0x to -0X). */ - for (i = 0; i < len; i++) - if (buf[i] >= 'a' && buf[i] <= 'x') - buf[i] -= 'a'-'A'; - } - *pbuf = buf; - *plen = len; - return result; -} - void PyBytes_Fini(void) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3afb799fc3b..ac77114da97 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13438,19 +13438,140 @@ formatfloat(PyObject *v, int flags, int prec, int type) return result; } +/* formatlong() emulates the format codes d, u, o, x and X, and + * the F_ALT flag, for Python's long (unbounded) ints. It's not used for + * Python's regular ints. + * Return value: a new PyUnicodeObject*, or NULL if error. + * The output string is of the form + * "-"? ("0x" | "0X")? digit+ + * "0x"/"0X" are present only for x and X conversions, with F_ALT + * set in flags. The case of hex digits will be correct, + * There will be at least prec digits, zero-filled on the left if + * necessary to get that many. + * val object to be converted + * flags bitmask of format flags; only F_ALT is looked at + * prec minimum number of digits; 0-fill on left if needed + * type a character in [duoxX]; u acts the same as d + * + * CAUTION: o, x and X conversions on regular ints can never + * produce a '-' sign, but can for Python's unbounded ints. + */ static PyObject* formatlong(PyObject *val, int flags, int prec, int type) { + PyObject *result = NULL; char *buf; - int len; - PyObject *str; /* temporary string object. */ - PyObject *result; + Py_ssize_t i; + int sign; /* 1 if '-', else 0 */ + int len; /* number of characters */ + Py_ssize_t llen; + int numdigits; /* len == numnondigits + numdigits */ + int numnondigits = 0; - str = _PyBytes_FormatLong(val, flags, prec, type, &buf, &len); - if (!str) + /* Avoid exceeding SSIZE_T_MAX */ + if (prec > INT_MAX-3) { + PyErr_SetString(PyExc_OverflowError, + "precision too large"); return NULL; - result = PyUnicode_DecodeASCII(buf, len, NULL); - Py_DECREF(str); + } + + assert(PyLong_Check(val)); + + switch (type) { + case 'd': + case 'u': + /* Special-case boolean: we want 0/1 */ + result = Py_TYPE(val)->tp_str(val); + break; + case 'o': + numnondigits = 2; + result = PyNumber_ToBase(val, 8); + break; + case 'x': + case 'X': + numnondigits = 2; + result = PyNumber_ToBase(val, 16); + break; + default: + assert(!"'type' not in [duoxX]"); + } + if (!result) + return NULL; + + assert(unicode_modifiable(result)); + assert(PyUnicode_IS_READY(result)); + assert(PyUnicode_IS_ASCII(result)); + + /* To modify the string in-place, there can only be one reference. */ + if (Py_REFCNT(result) != 1) { + PyErr_BadInternalCall(); + return NULL; + } + buf = PyUnicode_DATA(result); + llen = PyUnicode_GET_LENGTH(result); + if (llen > INT_MAX) { + PyErr_SetString(PyExc_ValueError, + "string too large in _PyBytes_FormatLong"); + return NULL; + } + len = (int)llen; + sign = buf[0] == '-'; + numnondigits += sign; + numdigits = len - numnondigits; + assert(numdigits > 0); + + /* Get rid of base marker unless F_ALT */ + if (((flags & F_ALT) == 0 && + (type == 'o' || type == 'x' || type == 'X'))) { + assert(buf[sign] == '0'); + assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' || + buf[sign+1] == 'o'); + numnondigits -= 2; + buf += 2; + len -= 2; + if (sign) + buf[0] = '-'; + assert(len == numnondigits + numdigits); + assert(numdigits > 0); + } + + /* Fill with leading zeroes to meet minimum width. */ + if (prec > numdigits) { + PyObject *r1 = PyBytes_FromStringAndSize(NULL, + numnondigits + prec); + char *b1; + if (!r1) { + Py_DECREF(result); + return NULL; + } + b1 = PyBytes_AS_STRING(r1); + for (i = 0; i < numnondigits; ++i) + *b1++ = *buf++; + for (i = 0; i < prec - numdigits; i++) + *b1++ = '0'; + for (i = 0; i < numdigits; i++) + *b1++ = *buf++; + *b1 = '\0'; + Py_DECREF(result); + result = r1; + buf = PyBytes_AS_STRING(result); + len = numnondigits + prec; + } + + /* Fix up case for hex conversions. */ + if (type == 'X') { + /* Need to convert all lower case letters to upper case. + and need to convert 0x to 0X (and -0x to -0X). */ + for (i = 0; i < len; i++) + if (buf[i] >= 'a' && buf[i] <= 'x') + buf[i] -= 'a'-'A'; + } + if (!PyUnicode_Check(result) || len != PyUnicode_GET_LENGTH(result)) { + PyObject *unicode; + unicode = unicode_fromascii((unsigned char *)buf, len); + Py_DECREF(result); + result = unicode; + } return result; }