From 4b63c21d6faebc406260733c16b826e3e01b0d89 Mon Sep 17 00:00:00 2001 From: Armin Rigo Date: Wed, 4 Oct 2006 11:44:06 +0000 Subject: [PATCH] Forward-port of r52136: a review of overflow-detecting code. * unified the way intobject, longobject and mystrtoul handle values around -sys.maxint-1. * in general, trying to entierely avoid overflows in any computation involving signed ints or longs is extremely involved. Fixed a few simple cases where a compiler might be too clever (but that's all guesswork). * more overflow checks against bad data in marshal.c. * 2.5 specific: fixed a number of places that were still confusing int and Py_ssize_t. Some of them could potentially have caused "real-world" breakage. * list.pop(x): fixing overflow issues on x was messy. I just reverted to PyArg_ParseTuple("n"), which does the right thing. (An obscure test was trying to give a Decimal to list.pop()... doesn't make sense any more IMHO) * trying to write a few tests... --- Lib/test/list_tests.py | 3 -- Lib/test/test_builtin.py | 11 +++++-- Lib/test/test_long.py | 23 +++++++++++-- Misc/NEWS | 10 ++++-- Modules/cPickle.c | 12 ++++--- Objects/abstract.c | 14 ++++---- Objects/fileobject.c | 8 +++-- Objects/intobject.c | 24 +++++++++----- Objects/listobject.c | 11 ++----- Objects/longobject.c | 71 ++++++++++++++++++++++++++-------------- Objects/stringobject.c | 25 ++++++++++---- Objects/typeobject.c | 17 +++------- Objects/unicodeobject.c | 7 ++-- Python/errors.c | 3 +- Python/getargs.c | 7 ++-- Python/marshal.c | 38 +++++++++++++-------- Python/modsupport.c | 2 +- Python/mystrtoul.c | 12 +++---- Python/sysmodule.c | 2 +- 19 files changed, 187 insertions(+), 113 deletions(-) diff --git a/Lib/test/list_tests.py b/Lib/test/list_tests.py index 14b54c77352..7c6623a22f8 100644 --- a/Lib/test/list_tests.py +++ b/Lib/test/list_tests.py @@ -269,7 +269,6 @@ class CommonTest(seq_tests.CommonTest): self.assertRaises(TypeError, a.insert) def test_pop(self): - from decimal import Decimal a = self.type2test([-1, 0, 1]) a.pop() self.assertEqual(a, [-1, 0]) @@ -281,8 +280,6 @@ class CommonTest(seq_tests.CommonTest): self.assertRaises(IndexError, a.pop) self.assertRaises(TypeError, a.pop, 42, 42) a = self.type2test([0, 10, 20, 30, 40]) - self.assertEqual(a.pop(Decimal(2)), 20) - self.assertRaises(IndexError, a.pop, Decimal(25)) def test_remove(self): a = self.type2test([0, 0, 1]) diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index f3bdbe29c4f..f7cf8118e2f 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -156,6 +156,11 @@ class BuiltinTest(unittest.TestCase): S = [10, 20, 30] self.assertEqual(any(x > 42 for x in S), False) + def test_neg(self): + x = -sys.maxint-1 + self.assert_(isinstance(x, int)) + self.assertEqual(-x, sys.maxint+1) + def test_apply(self): def f0(*args): self.assertEqual(args, ()) @@ -702,9 +707,11 @@ class BuiltinTest(unittest.TestCase): pass s = repr(-1-sys.maxint) - self.assertEqual(int(s)+1, -sys.maxint) + x = int(s) + self.assertEqual(x+1, -sys.maxint) + self.assert_(isinstance(x, int)) # should return long - int(s[1:]) + self.assertEqual(int(s[1:]), sys.maxint+1) # should return long x = int(1e100) diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index 7b0c7b0798a..ae132adad93 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -247,17 +247,23 @@ class LongTest(unittest.TestCase): "long(-sys.maxint-1) != -sys.maxint-1") # long -> int should not fail for hugepos_aslong or hugeneg_aslong + x = int(hugepos_aslong) try: - self.assertEqual(int(hugepos_aslong), hugepos, + self.assertEqual(x, hugepos, "converting sys.maxint to long and back to int fails") except OverflowError: self.fail("int(long(sys.maxint)) overflowed!") + if not isinstance(x, int): + raise TestFailed("int(long(sys.maxint)) should have returned int") + x = int(hugeneg_aslong) try: - self.assertEqual(int(hugeneg_aslong), hugeneg, + self.assertEqual(x, hugeneg, "converting -sys.maxint-1 to long and back to int fails") except OverflowError: self.fail("int(long(-sys.maxint-1)) overflowed!") - + if not isinstance(x, int): + raise TestFailed("int(long(-sys.maxint-1)) should have " + "returned int") # but long -> int should overflow for hugepos+1 and hugeneg-1 x = hugepos_aslong + 1 try: @@ -282,6 +288,17 @@ class LongTest(unittest.TestCase): self.assert_(type(y) is long, "overflowing int conversion must return long not long subtype") + # long -> Py_ssize_t conversion + class X(object): + def __getslice__(self, i, j): + return i, j + + self.assertEqual(X()[-5L:7L], (-5, 7)) + # use the clamping effect to test the smallest and largest longs + # that fit a Py_ssize_t + slicemin, slicemax = X()[-2L**100:2L**100] + self.assertEqual(X()[slicemin:slicemax], (slicemin, slicemax)) + # ----------------------------------- tests of auto int->long conversion def test_auto_overflow(self): diff --git a/Misc/NEWS b/Misc/NEWS index e71be104e9f..85587ebf3a6 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,8 +12,14 @@ What's New in Python 2.5.1c1? Core and builtins ----------------- -- Integer negation and absolute value were fixed to not rely - on undefined behaviour of the C compiler anymore. +- list.pop(x) accepts any object x following the __index__ protocol. + +- Fix some leftovers from the conversion from int to Py_ssize_t + (relevant to strings and sequences of more than 2**31 items). + +- A number of places, including integer negation and absolute value, + were fixed to not rely on undefined behaviour of the C compiler + anymore. - Bug #1566800: make sure that EnvironmentError can be called with any number of arguments, as was the case in Python 2.4. diff --git a/Modules/cPickle.c b/Modules/cPickle.c index 24c98ccb225..4f7d1f198af 100644 --- a/Modules/cPickle.c +++ b/Modules/cPickle.c @@ -1024,7 +1024,7 @@ save_int(Picklerobject *self, PyObject *args) static int save_long(Picklerobject *self, PyObject *args) { - int size; + Py_ssize_t size; int res = -1; PyObject *repr = NULL; @@ -1066,7 +1066,7 @@ save_long(Picklerobject *self, PyObject *args) * byte at the start, and cut it back later if possible. */ nbytes = (nbits >> 3) + 1; - if ((int)nbytes < 0 || (size_t)(int)nbytes != nbytes) { + if (nbytes > INT_MAX) { PyErr_SetString(PyExc_OverflowError, "long too large " "to pickle"); goto finally; @@ -1208,12 +1208,14 @@ save_string(Picklerobject *self, PyObject *args, int doput) c_str[1] = size; len = 2; } - else { + else if (size <= INT_MAX) { c_str[0] = BINSTRING; for (i = 1; i < 5; i++) c_str[i] = (int)(size >> ((i - 1) * 8)); len = 5; } + else + return -1; /* string too large */ if (self->write_func(self, c_str, len) < 0) return -1; @@ -1286,7 +1288,7 @@ modified_EncodeRawUnicodeEscape(const Py_UNICODE *s, int size) static int save_unicode(Picklerobject *self, PyObject *args, int doput) { - int size, len; + Py_ssize_t size, len; PyObject *repr=0; if (!PyUnicode_Check(args)) @@ -1325,6 +1327,8 @@ save_unicode(Picklerobject *self, PyObject *args, int doput) if ((size = PyString_Size(repr)) < 0) goto err; + if (size > INT_MAX) + return -1; /* string too large */ c_str[0] = BINUNICODE; for (i = 1; i < 5; i++) diff --git a/Objects/abstract.c b/Objects/abstract.c index a18bb787eab..7115c523c2d 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -1652,20 +1652,18 @@ _PySequence_IterSearch(PyObject *seq, PyObject *obj, int operation) if (cmp > 0) { switch (operation) { case PY_ITERSEARCH_COUNT: - ++n; - if (n <= 0) { - /* XXX(nnorwitz): int means ssize_t */ + if (n == PY_SSIZE_T_MAX) { PyErr_SetString(PyExc_OverflowError, - "count exceeds C int size"); + "count exceeds C integer size"); goto Fail; } + ++n; break; case PY_ITERSEARCH_INDEX: if (wrapped) { - /* XXX(nnorwitz): int means ssize_t */ PyErr_SetString(PyExc_OverflowError, - "index exceeds C int size"); + "index exceeds C integer size"); goto Fail; } goto Done; @@ -1680,9 +1678,9 @@ _PySequence_IterSearch(PyObject *seq, PyObject *obj, int operation) } if (operation == PY_ITERSEARCH_INDEX) { - ++n; - if (n <= 0) + if (n == PY_SSIZE_T_MAX) wrapped = 1; + ++n; } } diff --git a/Objects/fileobject.c b/Objects/fileobject.c index b43bf85b872..ced07684fd3 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -1001,6 +1001,7 @@ getline_via_fgets(FILE *fp) size_t nfree; /* # of free buffer slots; pvend-pvfree */ size_t total_v_size; /* total # of slots in buffer */ size_t increment; /* amount to increment the buffer */ + size_t prev_v_size; /* Optimize for normal case: avoid _PyString_Resize if at all * possible via first reading into stack buffer "buf". @@ -1115,8 +1116,11 @@ getline_via_fgets(FILE *fp) /* expand buffer and try again */ assert(*(pvend-1) == '\0'); increment = total_v_size >> 2; /* mild exponential growth */ + prev_v_size = total_v_size; total_v_size += increment; - if (total_v_size > PY_SSIZE_T_MAX) { + /* check for overflow */ + if (total_v_size <= prev_v_size || + total_v_size > PY_SSIZE_T_MAX) { PyErr_SetString(PyExc_OverflowError, "line is longer than a Python string can hold"); Py_DECREF(v); @@ -1125,7 +1129,7 @@ getline_via_fgets(FILE *fp) if (_PyString_Resize(&v, (int)total_v_size) < 0) return NULL; /* overwrite the trailing null byte */ - pvfree = BUF(v) + (total_v_size - increment - 1); + pvfree = BUF(v) + (prev_v_size - 1); } if (BUF(v) + total_v_size != p) _PyString_Resize(&v, p - BUF(v)); diff --git a/Objects/intobject.c b/Objects/intobject.c index 28f76067107..a4d50be1294 100644 --- a/Objects/intobject.c +++ b/Objects/intobject.c @@ -546,6 +546,17 @@ int_mul(PyObject *v, PyObject *w) } } +/* Integer overflow checking for unary negation: on a 2's-complement + * box, -x overflows iff x is the most negative long. In this case we + * get -x == x. However, -x is undefined (by C) if x /is/ the most + * negative long (it's a signed overflow case), and some compilers care. + * So we cast x to unsigned long first. However, then other compilers + * warn about applying unary minus to an unsigned operand. Hence the + * weird "0-". + */ +#define UNARY_NEG_WOULD_OVERFLOW(x) \ + ((x) < 0 && (unsigned long)(x) == 0-(unsigned long)(x)) + /* Return type of i_divmod */ enum divmod_result { DIVMOD_OK, /* Correct result */ @@ -564,14 +575,8 @@ i_divmod(register long x, register long y, "integer division or modulo by zero"); return DIVMOD_ERROR; } - /* (-sys.maxint-1)/-1 is the only overflow case. x is the most - * negative long iff x < 0 and, on a 2's-complement box, x == -x. - * However, -x is undefined (by C) if x /is/ the most negative long - * (it's a signed overflow case), and some compilers care. So we cast - * x to unsigned long first. However, then other compilers warn about - * applying unary minus to an unsigned operand. Hence the weird "0-". - */ - if (y == -1 && x < 0 && (unsigned long)x == 0-(unsigned long)x) + /* (-sys.maxint-1)/-1 is the only overflow case. */ + if (y == -1 && UNARY_NEG_WOULD_OVERFLOW(x)) return DIVMOD_OVERFLOW; xdivy = x / y; xmody = x - xdivy * y; @@ -762,7 +767,8 @@ int_neg(PyIntObject *v) { register long a; a = v->ob_ival; - if (a < 0 && (unsigned long)a == 0-(unsigned long)a) { + /* check for overflow */ + if (UNARY_NEG_WOULD_OVERFLOW(a)) { PyObject *o = PyLong_FromLong(a); if (o != NULL) { PyObject *result = PyNumber_Negative(o); diff --git a/Objects/listobject.c b/Objects/listobject.c index ad276447d3f..7afea121cdd 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -863,17 +863,12 @@ static PyObject * listpop(PyListObject *self, PyObject *args) { Py_ssize_t i = -1; - PyObject *v, *arg = NULL; + PyObject *v; int status; - if (!PyArg_UnpackTuple(args, "pop", 0, 1, &arg)) + if (!PyArg_ParseTuple(args, "|n:pop", &i)) return NULL; - if (arg != NULL) { - if (PyInt_Check(arg)) - i = PyInt_AS_LONG((PyIntObject*) arg); - else if (!PyArg_ParseTuple(args, "|n:pop", &i)) - return NULL; - } + if (self->ob_size == 0) { /* Special-case most common failure cause */ PyErr_SetString(PyExc_IndexError, "pop from empty list"); diff --git a/Objects/longobject.c b/Objects/longobject.c index e32c42566eb..4d886cd7242 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -193,6 +193,18 @@ PyLong_FromDouble(double dval) return (PyObject *)v; } +/* Checking for overflow in PyLong_AsLong is a PITA since C doesn't define + * anything about what happens when a signed integer operation overflows, + * and some compilers think they're doing you a favor by being "clever" + * then. The bit pattern for the largest postive signed long is + * (unsigned long)LONG_MAX, and for the smallest negative signed long + * it is abs(LONG_MIN), which we could write -(unsigned long)LONG_MIN. + * However, some other compilers warn about applying unary minus to an + * unsigned operand. Hence the weird "0-". + */ +#define PY_ABS_LONG_MIN (0-(unsigned long)LONG_MIN) +#define PY_ABS_SSIZE_T_MIN (0-(size_t)PY_SSIZE_T_MIN) + /* Get a C long int from a long int object. Returns -1 and sets an error condition if overflow occurs. */ @@ -225,14 +237,16 @@ PyLong_AsLong(PyObject *vv) if ((x >> SHIFT) != prev) goto overflow; } - /* Haven't lost any bits, but if the sign bit is set we're in - * trouble *unless* this is the min negative number. So, - * trouble iff sign bit set && (positive || some bit set other - * than the sign bit). - */ - if ((long)x < 0 && (sign > 0 || (x << 1) != 0)) - goto overflow; - return (long)x * sign; + /* Haven't lost any bits, but casting to long requires extra care + * (see comment above). + */ + if (x <= (unsigned long)LONG_MAX) { + return (long)x * sign; + } + else if (sign < 0 && x == PY_ABS_LONG_MIN) { + return LONG_MIN; + } + /* else overflow */ overflow: PyErr_SetString(PyExc_OverflowError, @@ -268,14 +282,16 @@ _PyLong_AsSsize_t(PyObject *vv) { if ((x >> SHIFT) != prev) goto overflow; } - /* Haven't lost any bits, but if the sign bit is set we're in - * trouble *unless* this is the min negative number. So, - * trouble iff sign bit set && (positive || some bit set other - * than the sign bit). + /* Haven't lost any bits, but casting to a signed type requires + * extra care (see comment above). */ - if ((Py_ssize_t)x < 0 && (sign > 0 || (x << 1) != 0)) - goto overflow; - return (Py_ssize_t)x * sign; + if (x <= (size_t)PY_SSIZE_T_MAX) { + return (Py_ssize_t)x * sign; + } + else if (sign < 0 && x == PY_ABS_SSIZE_T_MIN) { + return PY_SSIZE_T_MIN; + } + /* else overflow */ overflow: PyErr_SetString(PyExc_OverflowError, @@ -1167,7 +1183,7 @@ long_format(PyObject *aa, int base, int addL) { register PyLongObject *a = (PyLongObject *)aa; PyStringObject *str; - Py_ssize_t i; + Py_ssize_t i, j, sz; Py_ssize_t size_a; char *p; int bits; @@ -1187,11 +1203,18 @@ long_format(PyObject *aa, int base, int addL) ++bits; i >>= 1; } - i = 5 + (addL ? 1 : 0) + (size_a*SHIFT + bits-1) / bits; - str = (PyStringObject *) PyString_FromStringAndSize((char *)0, i); + i = 5 + (addL ? 1 : 0); + j = size_a*SHIFT + bits-1; + sz = i + j / bits; + if (j / SHIFT < size_a || sz < i) { + PyErr_SetString(PyExc_OverflowError, + "long is too large to format"); + return NULL; + } + str = (PyStringObject *) PyString_FromStringAndSize((char *)0, sz); if (str == NULL) return NULL; - p = PyString_AS_STRING(str) + i; + p = PyString_AS_STRING(str) + sz; *p = '\0'; if (addL) *--p = 'L'; @@ -1305,7 +1328,7 @@ long_format(PyObject *aa, int base, int addL) } while ((*q++ = *p++) != '\0'); q--; _PyString_Resize((PyObject **)&str, - (int) (q - PyString_AS_STRING(str))); + (Py_ssize_t) (q - PyString_AS_STRING(str))); } return (PyObject *)str; } @@ -1363,14 +1386,14 @@ long_from_binary_base(char **str, int base) while (_PyLong_DigitValue[Py_CHARMASK(*p)] < base) ++p; *str = p; - n = (p - start) * bits_per_char; - if (n / bits_per_char != p - start) { + /* n <- # of Python digits needed, = ceiling(n/SHIFT). */ + n = (p - start) * bits_per_char + SHIFT - 1; + if (n / bits_per_char < p - start) { PyErr_SetString(PyExc_ValueError, "long string too large to convert"); return NULL; } - /* n <- # of Python digits needed, = ceiling(n/SHIFT). */ - n = (n + SHIFT - 1) / SHIFT; + n = n / SHIFT; z = _PyLong_New(n); if (z == NULL) return NULL; diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 4c2faf45437..aa2fd872ddb 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -804,10 +804,22 @@ string_print(PyStringObject *op, FILE *fp, int flags) return ret; } if (flags & Py_PRINT_RAW) { + char *data = op->ob_sval; + Py_ssize_t size = op->ob_size; + while (size > INT_MAX) { + /* Very long strings cannot be written atomically. + * But don't write exactly INT_MAX bytes at a time + * to avoid memory aligment issues. + */ + const int chunk_size = INT_MAX & ~0x3FFF; + fwrite(data, 1, chunk_size, fp); + data += chunk_size; + size -= chunk_size; + } #ifdef __VMS - if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp); + if (size) fwrite(data, (int)size, 1, fp); #else - fwrite(op->ob_sval, 1, (int) op->ob_size, fp); + fwrite(data, 1, (int)size, fp); #endif return 0; } @@ -844,7 +856,7 @@ PyString_Repr(PyObject *obj, int smartquotes) register PyStringObject* op = (PyStringObject*) obj; size_t newsize = 2 + 4 * op->ob_size; PyObject *v; - if (newsize > PY_SSIZE_T_MAX) { + if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) { PyErr_SetString(PyExc_OverflowError, "string is too large to make repr"); } @@ -4237,7 +4249,7 @@ _PyString_FormatLong(PyObject *val, int flags, int prec, int type, return NULL; } llen = PyString_Size(result); - if (llen > PY_SSIZE_T_MAX) { + if (llen > INT_MAX) { PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong"); return NULL; } @@ -4726,9 +4738,10 @@ PyString_Format(PyObject *format, PyObject *args) default: PyErr_Format(PyExc_ValueError, "unsupported format character '%c' (0x%x) " - "at index %i", + "at index %zd", c, c, - (int)(fmt - 1 - PyString_AsString(format))); + (Py_ssize_t)(fmt - 1 - + PyString_AsString(format))); goto error; } if (sign) { diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 4d99f7d67f9..d4a46c37f02 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -98,7 +98,7 @@ type_module(PyTypeObject *type, void *context) s = strrchr(type->tp_name, '.'); if (s != NULL) return PyString_FromStringAndSize( - type->tp_name, (int)(s - type->tp_name)); + type->tp_name, (Py_ssize_t)(s - type->tp_name)); return PyString_FromString("__builtin__"); } } @@ -4116,19 +4116,10 @@ slot_sq_length(PyObject *self) return -1; len = PyInt_AsSsize_t(res); Py_DECREF(res); - if (len == -1 && PyErr_Occurred()) - return -1; -#if SIZEOF_SIZE_T < SIZEOF_INT - /* Overflow check -- range of PyInt is more than C ssize_t */ - if (len != (int)len) { - PyErr_SetString(PyExc_OverflowError, - "__len__() should return 0 <= outcome < 2**31"); - return -1; - } -#endif if (len < 0) { - PyErr_SetString(PyExc_ValueError, - "__len__() should return >= 0"); + if (!PyErr_Occurred()) + PyErr_SetString(PyExc_ValueError, + "__len__() should return >= 0"); return -1; } return len; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2ae3f61d230..00f2018da0c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2380,6 +2380,7 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s, Py_UNICODE unimax = PyUnicode_GetMax(); #endif + /* XXX overflow detection missing */ v = _PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE); if (v == NULL) goto onError; @@ -3166,6 +3167,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s, Py_ssize_t needed = (targetsize - extrachars) + \ (targetsize << 2); extrachars += needed; + /* XXX overflow detection missing */ if (_PyUnicode_Resize(&v, PyUnicode_GET_SIZE(v) + needed) < 0) { Py_DECREF(x); @@ -7758,10 +7760,11 @@ PyObject *PyUnicode_Format(PyObject *format, default: PyErr_Format(PyExc_ValueError, "unsupported format character '%c' (0x%x) " - "at index %i", + "at index %zd", (31<=c && c<=126) ? (char)c : '?', (int)c, - (int)(fmt -1 - PyUnicode_AS_UNICODE(uformat))); + (Py_ssize_t)(fmt - 1 - + PyUnicode_AS_UNICODE(uformat))); goto onError; } if (sign) { diff --git a/Python/errors.c b/Python/errors.c index 7b7105104ad..66a734eb15c 100644 --- a/Python/errors.c +++ b/Python/errors.c @@ -551,7 +551,8 @@ PyErr_NewException(char *name, PyObject *base, PyObject *dict) goto failure; } if (PyDict_GetItemString(dict, "__module__") == NULL) { - modulename = PyString_FromStringAndSize(name, (int)(dot-name)); + modulename = PyString_FromStringAndSize(name, + (Py_ssize_t)(dot-name)); if (modulename == NULL) goto failure; if (PyDict_SetItemString(dict, "__module__", modulename) != 0) diff --git a/Python/getargs.c b/Python/getargs.c index 3fca9cdfce3..d6255986327 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -815,7 +815,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, #endif else return converterr("string", arg, msgbuf, bufsize); - if ((int)strlen(*p) != PyString_Size(arg)) + if ((Py_ssize_t)strlen(*p) != PyString_Size(arg)) return converterr("string without null bytes", arg, msgbuf, bufsize); } @@ -882,7 +882,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, format++; } else if (*p != NULL && - (int)strlen(*p) != PyString_Size(arg)) + (Py_ssize_t)strlen(*p) != PyString_Size(arg)) return converterr( "string without null bytes or None", arg, msgbuf, bufsize); @@ -1029,7 +1029,8 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, PyMem_Free()ing it after usage */ - if ((int)strlen(PyString_AS_STRING(s)) != size) { + if ((Py_ssize_t)strlen(PyString_AS_STRING(s)) + != size) { Py_DECREF(s); return converterr( "(encoded string without NULL bytes)", diff --git a/Python/marshal.c b/Python/marshal.c index c3bc87f21f0..776836e5575 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -546,6 +546,11 @@ r_object(RFILE *p) int size; PyLongObject *ob; n = r_long(p); + if (n < -INT_MAX || n > INT_MAX) { + PyErr_SetString(PyExc_ValueError, + "bad marshal data"); + return NULL; + } size = n<0 ? -n : n; ob = _PyLong_New(size); if (ob == NULL) @@ -654,7 +659,7 @@ r_object(RFILE *p) case TYPE_INTERNED: case TYPE_STRING: n = r_long(p); - if (n < 0) { + if (n < 0 || n > INT_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data"); return NULL; } @@ -689,7 +694,7 @@ r_object(RFILE *p) char *buffer; n = r_long(p); - if (n < 0) { + if (n < 0 || n > INT_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data"); return NULL; } @@ -710,7 +715,7 @@ r_object(RFILE *p) case TYPE_TUPLE: n = r_long(p); - if (n < 0) { + if (n < 0 || n > INT_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data"); return NULL; } @@ -733,7 +738,7 @@ r_object(RFILE *p) case TYPE_LIST: n = r_long(p); - if (n < 0) { + if (n < 0 || n > INT_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data"); return NULL; } @@ -831,10 +836,11 @@ r_object(RFILE *p) v = NULL; - argcount = r_long(p); - nlocals = r_long(p); - stacksize = r_long(p); - flags = r_long(p); + /* XXX ignore long->int overflows for now */ + argcount = (int)r_long(p); + nlocals = (int)r_long(p); + stacksize = (int)r_long(p); + flags = (int)r_long(p); code = r_object(p); if (code == NULL) goto code_error; @@ -859,7 +865,7 @@ r_object(RFILE *p) name = r_object(p); if (name == NULL) goto code_error; - firstlineno = r_long(p); + firstlineno = (int)r_long(p); lnotab = r_object(p); if (lnotab == NULL) goto code_error; @@ -1031,10 +1037,16 @@ PyMarshal_WriteObjectToString(PyObject *x, int version) wf.strings = (version > 0) ? PyDict_New() : NULL; w_object(x, &wf); Py_XDECREF(wf.strings); - if (wf.str != NULL) - _PyString_Resize(&wf.str, - (int) (wf.ptr - - PyString_AS_STRING((PyStringObject *)wf.str))); + if (wf.str != NULL) { + char *base = PyString_AS_STRING((PyStringObject *)wf.str); + if (wf.ptr - base > PY_SSIZE_T_MAX) { + Py_DECREF(wf.str); + PyErr_SetString(PyExc_OverflowError, + "too much marshall data for a string"); + return NULL; + } + _PyString_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)); + } if (wf.error) { Py_XDECREF(wf.str); PyErr_SetString(PyExc_ValueError, diff --git a/Python/modsupport.c b/Python/modsupport.c index e291014ca4a..1aa3df2852c 100644 --- a/Python/modsupport.c +++ b/Python/modsupport.c @@ -421,7 +421,7 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags) "string too long for Python string"); return NULL; } - n = (int)m; + n = (Py_ssize_t)m; } v = PyString_FromStringAndSize(str, n); } diff --git a/Python/mystrtoul.c b/Python/mystrtoul.c index 0dda4be2ee8..f0070575dbf 100644 --- a/Python/mystrtoul.c +++ b/Python/mystrtoul.c @@ -195,13 +195,10 @@ overflowed: return (unsigned long)-1; } -/* Checking for overflow in PyOS_strtol is a PITA since C doesn't define - * anything about what happens when a signed integer operation overflows, - * and some compilers think they're doing you a favor by being "clever" - * then. Python assumes a 2's-complement representation, so that the bit - * pattern for the largest postive signed long is LONG_MAX, and for - * the smallest negative signed long is LONG_MAX + 1. +/* Checking for overflow in PyOS_strtol is a PITA; see comments + * about PY_ABS_LONG_MIN in longobject.c. */ +#define PY_ABS_LONG_MIN (0-(unsigned long)LONG_MIN) long PyOS_strtol(char *str, char **ptr, int base) @@ -224,8 +221,7 @@ PyOS_strtol(char *str, char **ptr, int base) if (sign == '-') result = -result; } - else if (sign == '-' && uresult == (unsigned long)LONG_MAX + 1) { - assert(LONG_MIN == -LONG_MAX-1); + else if (sign == '-' && uresult == PY_ABS_LONG_MIN) { result = LONG_MIN; } else { diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 2dbe2839a68..6fbaba50252 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1225,7 +1225,7 @@ makepathobject(char *path, int delim) p = strchr(path, delim); if (p == NULL) p = strchr(path, '\0'); /* End of string */ - w = PyString_FromStringAndSize(path, (int) (p - path)); + w = PyString_FromStringAndSize(path, (Py_ssize_t) (p - path)); if (w == NULL) { Py_DECREF(v); return NULL;