Issue #16741: Fix an error reporting in int().

This commit is contained in:
Serhiy Storchaka 2013-08-03 21:14:05 +03:00
commit 579ddc2fd4
5 changed files with 98 additions and 68 deletions

View File

@ -97,6 +97,7 @@ PyAPI_FUNC(PyObject *) PyLong_FromString(char *, char **, int);
#ifndef Py_LIMITED_API #ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject *) PyLong_FromUnicode(Py_UNICODE*, Py_ssize_t, int); PyAPI_FUNC(PyObject *) PyLong_FromUnicode(Py_UNICODE*, Py_ssize_t, int);
PyAPI_FUNC(PyObject *) PyLong_FromUnicodeObject(PyObject *u, int base); PyAPI_FUNC(PyObject *) PyLong_FromUnicodeObject(PyObject *u, int base);
PyAPI_FUNC(PyObject *) _PyLong_FromBytes(const char *, Py_ssize_t, int);
#endif #endif
#ifndef Py_LIMITED_API #ifndef Py_LIMITED_API

View File

@ -73,14 +73,6 @@ class IntTestCases(unittest.TestCase):
x = -1-sys.maxsize x = -1-sys.maxsize
self.assertEqual(x >> 1, x//2) self.assertEqual(x >> 1, x//2)
self.assertRaises(ValueError, int, '123\0')
self.assertRaises(ValueError, int, '53', 40)
# SF bug 1545497: embedded NULs were not detected with
# explicit base
self.assertRaises(ValueError, int, '123\0', 10)
self.assertRaises(ValueError, int, '123\x00 245', 20)
x = int('1' * 600) x = int('1' * 600)
self.assertIsInstance(x, int) self.assertIsInstance(x, int)
@ -401,14 +393,37 @@ class IntTestCases(unittest.TestCase):
int(TruncReturnsBadInt()) int(TruncReturnsBadInt())
def test_error_message(self): def test_error_message(self):
testlist = ('\xbd', '123\xbd', ' 123 456 ') def check(s, base=None):
for s in testlist: with self.assertRaises(ValueError,
try: msg="int(%r, %r)" % (s, base)) as cm:
if base is None:
int(s) int(s)
except ValueError as e:
self.assertIn(s.strip(), e.args[0])
else: else:
self.fail("Expected int(%r) to raise a ValueError", s) int(s, base)
self.assertEqual(cm.exception.args[0],
"invalid literal for int() with base %d: %r" %
(10 if base is None else base, s))
check('\xbd')
check('123\xbd')
check(' 123 456 ')
check('123\x00')
# SF bug 1545497: embedded NULs were not detected with explicit base
check('123\x00', 10)
check('123\x00 245', 20)
check('123\x00 245', 16)
check('123\x00245', 20)
check('123\x00245', 16)
# byte string with embedded NUL
check(b'123\x00')
check(b'123\x00', 10)
# non-UTF-8 byte string
check(b'123\xbd')
check(b'123\xbd', 10)
# lone surrogate in Unicode string
check('123\ud800')
check('123\ud800', 10)
def test_main(): def test_main():
support.run_unittest(IntTestCases) support.run_unittest(IntTestCases)

View File

@ -10,6 +10,8 @@ What's New in Python 3.4.0 Alpha 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #16741: Fix an error reporting in int().
- Issue #17899: Fix rare file descriptor leak in os.listdir(). - Issue #17899: Fix rare file descriptor leak in os.listdir().
- Issue #9035: ismount now recognises volumes mounted below a drive root - Issue #9035: ismount now recognises volumes mounted below a drive root

View File

@ -1261,25 +1261,6 @@ convert_integral_to_int(PyObject *integral, const char *error_format)
} }
/* Add a check for embedded NULL-bytes in the argument. */
static PyObject *
long_from_string(const char *s, Py_ssize_t len)
{
char *end;
PyObject *x;
x = PyLong_FromString((char*)s, &end, 10);
if (x == NULL)
return NULL;
if (end != s + len) {
PyErr_SetString(PyExc_ValueError,
"null byte in argument for int()");
Py_DECREF(x);
return NULL;
}
return x;
}
PyObject * PyObject *
PyNumber_Long(PyObject *o) PyNumber_Long(PyObject *o)
{ {
@ -1327,16 +1308,16 @@ PyNumber_Long(PyObject *o)
if (PyBytes_Check(o)) if (PyBytes_Check(o))
/* need to do extra error checking that PyLong_FromString() /* need to do extra error checking that PyLong_FromString()
* doesn't do. In particular int('9.5') must raise an * doesn't do. In particular int('9\x005') must raise an
* exception, not truncate the float. * exception, not truncate at the null.
*/ */
return long_from_string(PyBytes_AS_STRING(o), return _PyLong_FromBytes(PyBytes_AS_STRING(o),
PyBytes_GET_SIZE(o)); PyBytes_GET_SIZE(o), 10);
if (PyUnicode_Check(o)) if (PyUnicode_Check(o))
/* The above check is done in PyLong_FromUnicode(). */ /* The above check is done in PyLong_FromUnicode(). */
return PyLong_FromUnicodeObject(o, 10); return PyLong_FromUnicodeObject(o, 10);
if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len)) if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len))
return long_from_string(buffer, buffer_len); return _PyLong_FromBytes(buffer, buffer_len, 10);
return type_error("int() argument must be a string or a " return type_error("int() argument must be a string or a "
"number, not '%.200s'", o); "number, not '%.200s'", o);

View File

@ -2000,6 +2000,14 @@ long_from_binary_base(char **str, int base)
return long_normalize(z); return long_normalize(z);
} }
/* Parses a long from a bytestring. Leading and trailing whitespace will be
* ignored.
*
* If successful, a PyLong object will be returned and 'pend' will be pointing
* to the first unused byte unless it's NULL.
*
* If unsuccessful, NULL will be returned.
*/
PyObject * PyObject *
PyLong_FromString(char *str, char **pend, int base) PyLong_FromString(char *str, char **pend, int base)
{ {
@ -2262,24 +2270,54 @@ digit beyond the first.
str++; str++;
if (*str != '\0') if (*str != '\0')
goto onError; goto onError;
if (pend)
*pend = str;
long_normalize(z); long_normalize(z);
return (PyObject *) maybe_small_long(z); z = maybe_small_long(z);
if (z == NULL)
return NULL;
if (pend != NULL)
*pend = str;
return (PyObject *) z;
onError: onError:
if (pend != NULL)
*pend = str;
Py_XDECREF(z); Py_XDECREF(z);
slen = strlen(orig_str) < 200 ? strlen(orig_str) : 200; slen = strlen(orig_str) < 200 ? strlen(orig_str) : 200;
strobj = PyUnicode_FromStringAndSize(orig_str, slen); strobj = PyUnicode_FromStringAndSize(orig_str, slen);
if (strobj == NULL) if (strobj == NULL)
return NULL; return NULL;
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"invalid literal for int() with base %d: %R", "invalid literal for int() with base %d: %.200R",
base, strobj); base, strobj);
Py_DECREF(strobj); Py_DECREF(strobj);
return NULL; return NULL;
} }
/* Since PyLong_FromString doesn't have a length parameter,
* check here for possible NULs in the string.
*
* Reports an invalid literal as a bytes object.
*/
PyObject *
_PyLong_FromBytes(const char *s, Py_ssize_t len, int base)
{
PyObject *result, *strobj;
char *end = NULL;
result = PyLong_FromString((char*)s, &end, base);
if (end == NULL || (result != NULL && end == s + len))
return result;
Py_XDECREF(result);
strobj = PyBytes_FromStringAndSize(s, Py_MIN(len, 200));
if (strobj != NULL) {
PyErr_Format(PyExc_ValueError,
"invalid literal for int() with base %d: %.200R",
base, strobj);
Py_DECREF(strobj);
}
return NULL;
}
PyObject * PyObject *
PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base) PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base)
{ {
@ -2294,9 +2332,8 @@ PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base)
PyObject * PyObject *
PyLong_FromUnicodeObject(PyObject *u, int base) PyLong_FromUnicodeObject(PyObject *u, int base)
{ {
PyObject *result; PyObject *result, *asciidig;
PyObject *asciidig; char *buffer, *end = NULL;
char *buffer, *end;
Py_ssize_t buflen; Py_ssize_t buflen;
asciidig = _PyUnicode_TransformDecimalAndSpaceToASCII(u); asciidig = _PyUnicode_TransformDecimalAndSpaceToASCII(u);
@ -2305,18 +2342,23 @@ PyLong_FromUnicodeObject(PyObject *u, int base)
buffer = PyUnicode_AsUTF8AndSize(asciidig, &buflen); buffer = PyUnicode_AsUTF8AndSize(asciidig, &buflen);
if (buffer == NULL) { if (buffer == NULL) {
Py_DECREF(asciidig); Py_DECREF(asciidig);
if (!PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
return NULL; return NULL;
} }
else {
result = PyLong_FromString(buffer, &end, base); result = PyLong_FromString(buffer, &end, base);
if (result != NULL && end != buffer + buflen) { if (end == NULL || (result != NULL && end == buffer + buflen)) {
PyErr_SetString(PyExc_ValueError,
"null byte in argument for int()");
Py_DECREF(result);
result = NULL;
}
Py_DECREF(asciidig); Py_DECREF(asciidig);
return result; return result;
} }
Py_DECREF(asciidig);
Py_XDECREF(result);
}
PyErr_Format(PyExc_ValueError,
"invalid literal for int() with base %d: %.200R",
base, u);
return NULL;
}
/* forward */ /* forward */
static PyLongObject *x_divrem static PyLongObject *x_divrem
@ -4319,23 +4361,12 @@ long_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
if (PyUnicode_Check(x)) if (PyUnicode_Check(x))
return PyLong_FromUnicodeObject(x, (int)base); return PyLong_FromUnicodeObject(x, (int)base);
else if (PyByteArray_Check(x) || PyBytes_Check(x)) { else if (PyByteArray_Check(x) || PyBytes_Check(x)) {
/* Since PyLong_FromString doesn't have a length parameter,
* check here for possible NULs in the string. */
char *string; char *string;
Py_ssize_t size = Py_SIZE(x);
if (PyByteArray_Check(x)) if (PyByteArray_Check(x))
string = PyByteArray_AS_STRING(x); string = PyByteArray_AS_STRING(x);
else else
string = PyBytes_AS_STRING(x); string = PyBytes_AS_STRING(x);
if (strlen(string) != (size_t)size || !size) { return _PyLong_FromBytes(string, Py_SIZE(x), (int)base);
/* We only see this if there's a null byte in x or x is empty,
x is a bytes or buffer, *and* a base is given. */
PyErr_Format(PyExc_ValueError,
"invalid literal for int() with base %d: %R",
(int)base, x);
return NULL;
}
return PyLong_FromString(string, NULL, (int)base);
} }
else { else {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,