Issue #5308: Raise ValueError when marshalling too large object (a sequence

with size >= 2**31), instead of producing illegal marshal data.
This commit is contained in:
Serhiy Storchaka 2013-02-13 12:12:11 +02:00
commit 0c15b5d897
3 changed files with 116 additions and 66 deletions

View File

@ -279,6 +279,51 @@ class BugsTestCase(unittest.TestCase):
unicode_string = 'T' unicode_string = 'T'
self.assertRaises(TypeError, marshal.loads, unicode_string) self.assertRaises(TypeError, marshal.loads, unicode_string)
LARGE_SIZE = 2**31
character_size = 4 if sys.maxunicode > 0xFFFF else 2
pointer_size = 8 if sys.maxsize > 0xFFFFFFFF else 4
class NullWriter:
def write(self, s):
pass
@unittest.skipIf(LARGE_SIZE > sys.maxsize, "test cannot run on 32-bit systems")
class LargeValuesTestCase(unittest.TestCase):
def check_unmarshallable(self, data):
self.assertRaises(ValueError, marshal.dump, data, NullWriter())
@support.bigmemtest(size=LARGE_SIZE, memuse=1, dry_run=False)
def test_bytes(self, size):
self.check_unmarshallable(b'x' * size)
@support.bigmemtest(size=LARGE_SIZE, memuse=character_size, dry_run=False)
def test_str(self, size):
self.check_unmarshallable('x' * size)
@support.bigmemtest(size=LARGE_SIZE, memuse=pointer_size, dry_run=False)
def test_tuple(self, size):
self.check_unmarshallable((None,) * size)
@support.bigmemtest(size=LARGE_SIZE, memuse=pointer_size, dry_run=False)
def test_list(self, size):
self.check_unmarshallable([None] * size)
@support.bigmemtest(size=LARGE_SIZE,
memuse=pointer_size*12 + sys.getsizeof(LARGE_SIZE-1),
dry_run=False)
def test_set(self, size):
self.check_unmarshallable(set(range(size)))
@support.bigmemtest(size=LARGE_SIZE,
memuse=pointer_size*12 + sys.getsizeof(LARGE_SIZE-1),
dry_run=False)
def test_frozenset(self, size):
self.check_unmarshallable(frozenset(range(size)))
@support.bigmemtest(size=LARGE_SIZE, memuse=1, dry_run=False)
def test_bytearray(self, size):
self.check_unmarshallable(bytearray(size))
def test_main(): def test_main():
support.run_unittest(IntTestCase, support.run_unittest(IntTestCase,
@ -288,7 +333,9 @@ def test_main():
ContainerTestCase, ContainerTestCase,
ExceptionTestCase, ExceptionTestCase,
BufferTestCase, BufferTestCase,
BugsTestCase) BugsTestCase,
LargeValuesTestCase,
)
if __name__ == "__main__": if __name__ == "__main__":
test_main() test_main()

View File

@ -10,6 +10,9 @@ What's New in Python 3.4.0 Alpha 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #5308: Raise ValueError when marshalling too large object (a sequence
with size >= 2**31), instead of producing illegal marshal data.
- Issue #12983: Bytes literals with invalid \x escape now raise a SyntaxError - Issue #12983: Bytes literals with invalid \x escape now raise a SyntaxError
and a full traceback including line number. and a full traceback including line number.

View File

@ -95,7 +95,7 @@ w_more(int c, WFILE *p)
} }
static void static void
w_string(char *s, int n, WFILE *p) w_string(char *s, Py_ssize_t n, WFILE *p)
{ {
if (p->fp != NULL) { if (p->fp != NULL) {
fwrite(s, 1, n, p->fp); fwrite(s, 1, n, p->fp);
@ -124,6 +124,21 @@ w_long(long x, WFILE *p)
w_byte((char)((x>>24) & 0xff), p); w_byte((char)((x>>24) & 0xff), p);
} }
#define SIZE32_MAX 0x7FFFFFFF
#if SIZEOF_SIZE_T > 4
# define W_SIZE(n, p) do { \
if ((n) > SIZE32_MAX) { \
(p)->depth--; \
(p)->error = WFERR_UNMARSHALLABLE; \
return; \
} \
w_long((long)(n), p); \
} while(0)
#else
# define W_SIZE w_long
#endif
/* We assume that Python longs are stored internally in base some power of /* We assume that Python longs are stored internally in base some power of
2**15; for the sake of portability we'll always read and write them in base 2**15; for the sake of portability we'll always read and write them in base
exactly 2**15. */ exactly 2**15. */
@ -157,6 +172,11 @@ w_PyLong(const PyLongObject *ob, WFILE *p)
d >>= PyLong_MARSHAL_SHIFT; d >>= PyLong_MARSHAL_SHIFT;
l++; l++;
} while (d != 0); } while (d != 0);
if (l > SIZE32_MAX) {
p->depth--;
p->error = WFERR_UNMARSHALLABLE;
return;
}
w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p); w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
for (i=0; i < n-1; i++) { for (i=0; i < n-1; i++) {
@ -245,7 +265,7 @@ w_object(PyObject *v, WFILE *p)
n = strlen(buf); n = strlen(buf);
w_byte(TYPE_FLOAT, p); w_byte(TYPE_FLOAT, p);
w_byte((int)n, p); w_byte((int)n, p);
w_string(buf, (int)n, p); w_string(buf, n, p);
PyMem_Free(buf); PyMem_Free(buf);
} }
} }
@ -277,7 +297,7 @@ w_object(PyObject *v, WFILE *p)
} }
n = strlen(buf); n = strlen(buf);
w_byte((int)n, p); w_byte((int)n, p);
w_string(buf, (int)n, p); w_string(buf, n, p);
PyMem_Free(buf); PyMem_Free(buf);
buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v), buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v),
'g', 17, 0, NULL); 'g', 17, 0, NULL);
@ -287,21 +307,15 @@ w_object(PyObject *v, WFILE *p)
} }
n = strlen(buf); n = strlen(buf);
w_byte((int)n, p); w_byte((int)n, p);
w_string(buf, (int)n, p); w_string(buf, n, p);
PyMem_Free(buf); PyMem_Free(buf);
} }
} }
else if (PyBytes_CheckExact(v)) { else if (PyBytes_CheckExact(v)) {
w_byte(TYPE_STRING, p); w_byte(TYPE_STRING, p);
n = PyBytes_GET_SIZE(v); n = PyBytes_GET_SIZE(v);
if (n > INT_MAX) { W_SIZE(n, p);
/* huge strings are not supported */ w_string(PyBytes_AS_STRING(v), n, p);
p->depth--;
p->error = WFERR_UNMARSHALLABLE;
return;
}
w_long((long)n, p);
w_string(PyBytes_AS_STRING(v), (int)n, p);
} }
else if (PyUnicode_CheckExact(v)) { else if (PyUnicode_CheckExact(v)) {
PyObject *utf8; PyObject *utf8;
@ -313,19 +327,14 @@ w_object(PyObject *v, WFILE *p)
} }
w_byte(TYPE_UNICODE, p); w_byte(TYPE_UNICODE, p);
n = PyBytes_GET_SIZE(utf8); n = PyBytes_GET_SIZE(utf8);
if (n > INT_MAX) { W_SIZE(n, p);
p->depth--; w_string(PyBytes_AS_STRING(utf8), n, p);
p->error = WFERR_UNMARSHALLABLE;
return;
}
w_long((long)n, p);
w_string(PyBytes_AS_STRING(utf8), (int)n, p);
Py_DECREF(utf8); Py_DECREF(utf8);
} }
else if (PyTuple_CheckExact(v)) { else if (PyTuple_CheckExact(v)) {
w_byte(TYPE_TUPLE, p); w_byte(TYPE_TUPLE, p);
n = PyTuple_Size(v); n = PyTuple_Size(v);
w_long((long)n, p); W_SIZE(n, p);
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
w_object(PyTuple_GET_ITEM(v, i), p); w_object(PyTuple_GET_ITEM(v, i), p);
} }
@ -333,7 +342,7 @@ w_object(PyObject *v, WFILE *p)
else if (PyList_CheckExact(v)) { else if (PyList_CheckExact(v)) {
w_byte(TYPE_LIST, p); w_byte(TYPE_LIST, p);
n = PyList_GET_SIZE(v); n = PyList_GET_SIZE(v);
w_long((long)n, p); W_SIZE(n, p);
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
w_object(PyList_GET_ITEM(v, i), p); w_object(PyList_GET_ITEM(v, i), p);
} }
@ -363,7 +372,7 @@ w_object(PyObject *v, WFILE *p)
p->error = WFERR_UNMARSHALLABLE; p->error = WFERR_UNMARSHALLABLE;
return; return;
} }
w_long((long)n, p); W_SIZE(n, p);
it = PyObject_GetIter(v); it = PyObject_GetIter(v);
if (it == NULL) { if (it == NULL) {
p->depth--; p->depth--;
@ -413,13 +422,8 @@ w_object(PyObject *v, WFILE *p)
w_byte(TYPE_STRING, p); w_byte(TYPE_STRING, p);
n = view.len; n = view.len;
s = view.buf; s = view.buf;
if (n > INT_MAX) { W_SIZE(n, p);
p->depth--; w_string(s, n, p);
p->error = WFERR_UNMARSHALLABLE;
return;
}
w_long((long)n, p);
w_string(s, (int)n, p);
PyBuffer_Release(&view); PyBuffer_Release(&view);
} }
else { else {
@ -456,18 +460,18 @@ typedef WFILE RFILE; /* Same struct with different invariants */
#define rs_byte(p) (((p)->ptr < (p)->end) ? (unsigned char)*(p)->ptr++ : EOF) #define rs_byte(p) (((p)->ptr < (p)->end) ? (unsigned char)*(p)->ptr++ : EOF)
static int static Py_ssize_t
r_string(char *s, int n, RFILE *p) r_string(char *s, Py_ssize_t n, RFILE *p)
{ {
char *ptr; char *ptr;
int read, left; Py_ssize_t read, left;
if (!p->readable) { if (!p->readable) {
if (p->fp != NULL) if (p->fp != NULL)
/* The result fits into int because it must be <=n. */ /* The result fits into int because it must be <=n. */
read = (int) fread(s, 1, n, p->fp); read = fread(s, 1, n, p->fp);
else { else {
left = (int)(p->end - p->ptr); left = p->end - p->ptr;
read = (left < n) ? left : n; read = (left < n) ? left : n;
memcpy(s, p->ptr, read); memcpy(s, p->ptr, read);
p->ptr += read; p->ptr += read;
@ -476,7 +480,7 @@ r_string(char *s, int n, RFILE *p)
else { else {
_Py_IDENTIFIER(read); _Py_IDENTIFIER(read);
PyObject *data = _PyObject_CallMethodId(p->readable, &PyId_read, "i", n); PyObject *data = _PyObject_CallMethodId(p->readable, &PyId_read, "n", n);
read = 0; read = 0;
if (data != NULL) { if (data != NULL) {
if (!PyBytes_Check(data)) { if (!PyBytes_Check(data)) {
@ -506,7 +510,7 @@ r_byte(RFILE *p)
{ {
int c = EOF; int c = EOF;
unsigned char ch; unsigned char ch;
int n; Py_ssize_t n;
if (!p->readable) if (!p->readable)
c = p->fp ? getc(p->fp) : rs_byte(p); c = p->fp ? getc(p->fp) : rs_byte(p);
@ -590,8 +594,8 @@ static PyObject *
r_PyLong(RFILE *p) r_PyLong(RFILE *p)
{ {
PyLongObject *ob; PyLongObject *ob;
int size, i, j, md, shorts_in_top_digit; long n, size, i;
long n; int j, md, shorts_in_top_digit;
digit d; digit d;
n = r_long(p); n = r_long(p);
@ -599,7 +603,7 @@ r_PyLong(RFILE *p)
return NULL; return NULL;
if (n == 0) if (n == 0)
return (PyObject *)_PyLong_New(0); return (PyObject *)_PyLong_New(0);
if (n < -INT_MAX || n > INT_MAX) { if (n < -SIZE32_MAX || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"bad marshal data (long size out of range)"); "bad marshal data (long size out of range)");
return NULL; return NULL;
@ -730,7 +734,7 @@ r_object(RFILE *p)
double dx; double dx;
retval = NULL; retval = NULL;
n = r_byte(p); n = r_byte(p);
if (n == EOF || r_string(buf, (int)n, p) != n) { if (n == EOF || r_string(buf, n, p) != n) {
PyErr_SetString(PyExc_EOFError, PyErr_SetString(PyExc_EOFError,
"EOF read where object expected"); "EOF read where object expected");
break; break;
@ -768,7 +772,7 @@ r_object(RFILE *p)
Py_complex c; Py_complex c;
retval = NULL; retval = NULL;
n = r_byte(p); n = r_byte(p);
if (n == EOF || r_string(buf, (int)n, p) != n) { if (n == EOF || r_string(buf, n, p) != n) {
PyErr_SetString(PyExc_EOFError, PyErr_SetString(PyExc_EOFError,
"EOF read where object expected"); "EOF read where object expected");
break; break;
@ -778,7 +782,7 @@ r_object(RFILE *p)
if (c.real == -1.0 && PyErr_Occurred()) if (c.real == -1.0 && PyErr_Occurred())
break; break;
n = r_byte(p); n = r_byte(p);
if (n == EOF || r_string(buf, (int)n, p) != n) { if (n == EOF || r_string(buf, n, p) != n) {
PyErr_SetString(PyExc_EOFError, PyErr_SetString(PyExc_EOFError,
"EOF read where object expected"); "EOF read where object expected");
break; break;
@ -827,7 +831,7 @@ r_object(RFILE *p)
retval = NULL; retval = NULL;
break; break;
} }
if (n < 0 || n > INT_MAX) { if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
retval = NULL; retval = NULL;
break; break;
@ -837,7 +841,7 @@ r_object(RFILE *p)
retval = NULL; retval = NULL;
break; break;
} }
if (r_string(PyBytes_AS_STRING(v), (int)n, p) != n) { if (r_string(PyBytes_AS_STRING(v), n, p) != n) {
Py_DECREF(v); Py_DECREF(v);
PyErr_SetString(PyExc_EOFError, PyErr_SetString(PyExc_EOFError,
"EOF read where object expected"); "EOF read where object expected");
@ -856,7 +860,7 @@ r_object(RFILE *p)
retval = NULL; retval = NULL;
break; break;
} }
if (n < 0 || n > INT_MAX) { if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)"); PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)");
retval = NULL; retval = NULL;
break; break;
@ -866,7 +870,7 @@ r_object(RFILE *p)
retval = PyErr_NoMemory(); retval = PyErr_NoMemory();
break; break;
} }
if (r_string(buffer, (int)n, p) != n) { if (r_string(buffer, n, p) != n) {
PyMem_DEL(buffer); PyMem_DEL(buffer);
PyErr_SetString(PyExc_EOFError, PyErr_SetString(PyExc_EOFError,
"EOF read where object expected"); "EOF read where object expected");
@ -885,12 +889,12 @@ r_object(RFILE *p)
retval = NULL; retval = NULL;
break; break;
} }
if (n < 0 || n > INT_MAX) { if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)"); PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
retval = NULL; retval = NULL;
break; break;
} }
v = PyTuple_New((int)n); v = PyTuple_New(n);
if (v == NULL) { if (v == NULL) {
retval = NULL; retval = NULL;
break; break;
@ -905,7 +909,7 @@ r_object(RFILE *p)
v = NULL; v = NULL;
break; break;
} }
PyTuple_SET_ITEM(v, (int)i, v2); PyTuple_SET_ITEM(v, i, v2);
} }
retval = v; retval = v;
break; break;
@ -916,12 +920,12 @@ r_object(RFILE *p)
retval = NULL; retval = NULL;
break; break;
} }
if (n < 0 || n > INT_MAX) { if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)"); PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
retval = NULL; retval = NULL;
break; break;
} }
v = PyList_New((int)n); v = PyList_New(n);
if (v == NULL) { if (v == NULL) {
retval = NULL; retval = NULL;
break; break;
@ -936,7 +940,7 @@ r_object(RFILE *p)
v = NULL; v = NULL;
break; break;
} }
PyList_SET_ITEM(v, (int)i, v2); PyList_SET_ITEM(v, i, v2);
} }
retval = v; retval = v;
break; break;
@ -972,7 +976,7 @@ r_object(RFILE *p)
retval = NULL; retval = NULL;
break; break;
} }
if (n < 0 || n > INT_MAX) { if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)"); PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
retval = NULL; retval = NULL;
break; break;
@ -1180,12 +1184,8 @@ PyMarshal_ReadLastObjectFromFile(FILE *fp)
if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) { if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {
char* pBuf = (char *)PyMem_MALLOC(filesize); char* pBuf = (char *)PyMem_MALLOC(filesize);
if (pBuf != NULL) { if (pBuf != NULL) {
PyObject* v; size_t n = fread(pBuf, 1, (size_t)filesize, fp);
size_t n; PyObject* v = PyMarshal_ReadObjectFromString(pBuf, n);
/* filesize must fit into an int, because it
is smaller than REASONABLE_FILE_LIMIT */
n = fread(pBuf, 1, (int)filesize, fp);
v = PyMarshal_ReadObjectFromString(pBuf, n);
PyMem_FREE(pBuf); PyMem_FREE(pBuf);
return v; return v;
} }