Issue #16741: Fix an error reporting in int().

This commit is contained in:
Serhiy Storchaka 2013-08-03 20:55:06 +03:00
parent 1f35ae0a3c
commit f6d0aeeadc
5 changed files with 101 additions and 67 deletions

View File

@ -84,6 +84,7 @@ PyAPI_FUNC(PyObject *) PyLong_FromString(char *, char **, int);
#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject *) PyLong_FromUnicode(Py_UNICODE*, Py_ssize_t, int);
PyAPI_FUNC(PyObject *) PyLong_FromUnicodeObject(PyObject *u, int base);
PyAPI_FUNC(PyObject *) _PyLong_FromBytes(const char *, Py_ssize_t, int);
#endif
#ifndef Py_LIMITED_API

View File

@ -73,14 +73,6 @@ class IntTestCases(unittest.TestCase):
x = -1-sys.maxsize
self.assertEqual(x >> 1, x//2)
self.assertRaises(ValueError, int, '123\0')
self.assertRaises(ValueError, int, '53', 40)
# SF bug 1545497: embedded NULs were not detected with
# explicit base
self.assertRaises(ValueError, int, '123\0', 10)
self.assertRaises(ValueError, int, '123\x00 245', 20)
x = int('1' * 600)
self.assertIsInstance(x, int)
@ -360,14 +352,37 @@ class IntTestCases(unittest.TestCase):
int(TruncReturnsBadInt())
def test_error_message(self):
testlist = ('\xbd', '123\xbd', ' 123 456 ')
for s in testlist:
try:
int(s)
except ValueError as e:
self.assertIn(s.strip(), e.args[0])
else:
self.fail("Expected int(%r) to raise a ValueError", s)
def check(s, base=None):
with self.assertRaises(ValueError,
msg="int(%r, %r)" % (s, base)) as cm:
if base is None:
int(s)
else:
int(s, base)
self.assertEqual(cm.exception.args[0],
"invalid literal for int() with base %d: %r" %
(10 if base is None else base, s))
check('\xbd')
check('123\xbd')
check(' 123 456 ')
check('123\x00')
# SF bug 1545497: embedded NULs were not detected with explicit base
check('123\x00', 10)
check('123\x00 245', 20)
check('123\x00 245', 16)
check('123\x00245', 20)
check('123\x00245', 16)
# byte string with embedded NUL
check(b'123\x00')
check(b'123\x00', 10)
# non-UTF-8 byte string
check(b'123\xbd')
check(b'123\xbd', 10)
# lone surrogate in Unicode string
check('123\ud800')
check('123\ud800', 10)
def test_main():
support.run_unittest(IntTestCases)

View File

@ -12,6 +12,8 @@ What's New in Python 3.3.3 release candidate 1?
Core and Builtins
-----------------
- Issue #16741: Fix an error reporting in int().
- Issue #17899: Fix rare file descriptor leak in os.listdir().
- Issue #18552: Check return value of PyArena_AddPyObject() in

View File

@ -1240,25 +1240,6 @@ convert_integral_to_int(PyObject *integral, const char *error_format)
}
/* Add a check for embedded NULL-bytes in the argument. */
static PyObject *
long_from_string(const char *s, Py_ssize_t len)
{
char *end;
PyObject *x;
x = PyLong_FromString((char*)s, &end, 10);
if (x == NULL)
return NULL;
if (end != s + len) {
PyErr_SetString(PyExc_ValueError,
"null byte in argument for int()");
Py_DECREF(x);
return NULL;
}
return x;
}
PyObject *
PyNumber_Long(PyObject *o)
{
@ -1306,16 +1287,16 @@ PyNumber_Long(PyObject *o)
if (PyBytes_Check(o))
/* need to do extra error checking that PyLong_FromString()
* doesn't do. In particular int('9.5') must raise an
* exception, not truncate the float.
* doesn't do. In particular int('9\x005') must raise an
* exception, not truncate at the null.
*/
return long_from_string(PyBytes_AS_STRING(o),
PyBytes_GET_SIZE(o));
return _PyLong_FromBytes(PyBytes_AS_STRING(o),
PyBytes_GET_SIZE(o), 10);
if (PyUnicode_Check(o))
/* The above check is done in PyLong_FromUnicode(). */
return PyLong_FromUnicodeObject(o, 10);
if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len))
return long_from_string(buffer, buffer_len);
return _PyLong_FromBytes(buffer, buffer_len, 10);
return type_error("int() argument must be a string or a "
"number, not '%.200s'", o);

View File

@ -2005,6 +2005,14 @@ long_from_binary_base(char **str, int base)
return long_normalize(z);
}
/* Parses a long from a bytestring. Leading and trailing whitespace will be
* ignored.
*
* If successful, a PyLong object will be returned and 'pend' will be pointing
* to the first unused byte unless it's NULL.
*
* If unsuccessful, NULL will be returned.
*/
PyObject *
PyLong_FromString(char *str, char **pend, int base)
{
@ -2267,12 +2275,17 @@ digit beyond the first.
str++;
if (*str != '\0')
goto onError;
if (pend)
*pend = str;
long_normalize(z);
return (PyObject *) maybe_small_long(z);
z = maybe_small_long(z);
if (z == NULL)
return NULL;
if (pend != NULL)
*pend = str;
return (PyObject *) z;
onError:
if (pend != NULL)
*pend = str;
Py_XDECREF(z);
slen = strlen(orig_str) < 200 ? strlen(orig_str) : 200;
strobj = PyUnicode_FromStringAndSize(orig_str, slen);
@ -2285,6 +2298,31 @@ digit beyond the first.
return NULL;
}
/* Since PyLong_FromString doesn't have a length parameter,
* check here for possible NULs in the string.
*
* Reports an invalid literal as a bytes object.
*/
PyObject *
_PyLong_FromBytes(const char *s, Py_ssize_t len, int base)
{
PyObject *result, *strobj;
char *end = NULL;
result = PyLong_FromString((char*)s, &end, base);
if (end == NULL || (result != NULL && end == s + len))
return result;
Py_XDECREF(result);
strobj = PyBytes_FromStringAndSize(s, Py_MIN(len, 200));
if (strobj != NULL) {
PyErr_Format(PyExc_ValueError,
"invalid literal for int() with base %d: %R",
base, strobj);
Py_DECREF(strobj);
}
return NULL;
}
PyObject *
PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base)
{
@ -2299,9 +2337,8 @@ PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base)
PyObject *
PyLong_FromUnicodeObject(PyObject *u, int base)
{
PyObject *result;
PyObject *asciidig;
char *buffer, *end;
PyObject *result, *asciidig, *strobj;
char *buffer, *end = NULL;
Py_ssize_t buflen;
asciidig = _PyUnicode_TransformDecimalAndSpaceToASCII(u);
@ -2310,17 +2347,26 @@ PyLong_FromUnicodeObject(PyObject *u, int base)
buffer = PyUnicode_AsUTF8AndSize(asciidig, &buflen);
if (buffer == NULL) {
Py_DECREF(asciidig);
return NULL;
if (!PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
return NULL;
}
result = PyLong_FromString(buffer, &end, base);
if (result != NULL && end != buffer + buflen) {
PyErr_SetString(PyExc_ValueError,
"null byte in argument for int()");
Py_DECREF(result);
result = NULL;
else {
result = PyLong_FromString(buffer, &end, base);
if (end == NULL || (result != NULL && end == buffer + buflen)) {
Py_DECREF(asciidig);
return result;
}
Py_DECREF(asciidig);
Py_XDECREF(result);
}
Py_DECREF(asciidig);
return result;
strobj = PySequence_GetSlice(u, 0, 200);
if (strobj != NULL) {
PyErr_Format(PyExc_ValueError,
"invalid literal for int() with base %d: %R",
base, strobj);
Py_DECREF(strobj);
}
return NULL;
}
/* forward */
@ -4308,23 +4354,12 @@ long_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
if (PyUnicode_Check(x))
return PyLong_FromUnicodeObject(x, (int)base);
else if (PyByteArray_Check(x) || PyBytes_Check(x)) {
/* Since PyLong_FromString doesn't have a length parameter,
* check here for possible NULs in the string. */
char *string;
Py_ssize_t size = Py_SIZE(x);
if (PyByteArray_Check(x))
string = PyByteArray_AS_STRING(x);
else
string = PyBytes_AS_STRING(x);
if (strlen(string) != (size_t)size || !size) {
/* We only see this if there's a null byte in x or x is empty,
x is a bytes or buffer, *and* a base is given. */
PyErr_Format(PyExc_ValueError,
"invalid literal for int() with base %d: %R",
(int)base, x);
return NULL;
}
return PyLong_FromString(string, NULL, (int)base);
return _PyLong_FromBytes(string, Py_SIZE(x), (int)base);
}
else {
PyErr_SetString(PyExc_TypeError,