Closes #12291 for 3.3 - merged fix from 3.2.

This commit is contained in:
Vinay Sajip 2011-07-02 17:16:02 +01:00
commit 65897a386e
4 changed files with 183 additions and 60 deletions

View File

@ -214,7 +214,7 @@ class BadBytecodeTest(unittest.TestCase):
lambda bc: bc[:8] + b'<test>', lambda bc: bc[:8] + b'<test>',
del_source=del_source) del_source=del_source)
file_path = mapping['_temp'] if not del_source else bytecode_path file_path = mapping['_temp'] if not del_source else bytecode_path
with self.assertRaises(ValueError): with self.assertRaises(EOFError):
self.import_(file_path, '_temp') self.import_(file_path, '_temp')
def _test_bad_magic(self, test, *, del_source=False): def _test_bad_magic(self, test, *, del_source=False):

View File

@ -228,6 +228,30 @@ class BugsTestCase(unittest.TestCase):
invalid_string = b'l\x02\x00\x00\x00\x00\x00\x00\x00' invalid_string = b'l\x02\x00\x00\x00\x00\x00\x00\x00'
self.assertRaises(ValueError, marshal.loads, invalid_string) self.assertRaises(ValueError, marshal.loads, invalid_string)
def test_multiple_dumps_and_loads(self):
# Issue 12291: marshal.load() should be callable multiple times
# with interleaved data written by non-marshal code
# Adapted from a patch by Engelbert Gruber.
data = (1, 'abc', b'def', 1.0, (2, 'a', ['b', b'c']))
for interleaved in (b'', b'0123'):
ilen = len(interleaved)
positions = []
try:
with open(support.TESTFN, 'wb') as f:
for d in data:
marshal.dump(d, f)
if ilen:
f.write(interleaved)
positions.append(f.tell())
with open(support.TESTFN, 'rb') as f:
for i, d in enumerate(data):
self.assertEqual(d, marshal.load(f))
if ilen:
f.read(ilen)
self.assertEqual(positions[i], f.tell())
finally:
support.unlink(support.TESTFN)
def test_main(): def test_main():
support.run_unittest(IntTestCase, support.run_unittest(IntTestCase,

View File

@ -10,6 +10,9 @@ What's New in Python 3.3 Alpha 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #12291: You can now load multiple marshalled objects from a stream,
with other data interleaved between marshalled objects.
- Issue #12356: When required positional or keyword-only arguments are not - Issue #12356: When required positional or keyword-only arguments are not
given, produce a informative error message which includes the name(s) of the given, produce a informative error message which includes the name(s) of the
missing arguments. missing arguments.

View File

@ -57,6 +57,7 @@ typedef struct {
int error; /* see WFERR_* values */ int error; /* see WFERR_* values */
int depth; int depth;
/* If fp == NULL, the following are valid: */ /* If fp == NULL, the following are valid: */
PyObject * readable; /* Stream-like object being read from */
PyObject *str; PyObject *str;
PyObject *current_filename; PyObject *current_filename;
char *ptr; char *ptr;
@ -463,27 +464,75 @@ typedef WFILE RFILE; /* Same struct with different invariants */
#define rs_byte(p) (((p)->ptr < (p)->end) ? (unsigned char)*(p)->ptr++ : EOF) #define rs_byte(p) (((p)->ptr < (p)->end) ? (unsigned char)*(p)->ptr++ : EOF)
#define r_byte(p) ((p)->fp ? getc((p)->fp) : rs_byte(p))
static int static int
r_string(char *s, int n, RFILE *p) r_string(char *s, int n, RFILE *p)
{ {
if (p->fp != NULL) char * ptr;
/* The result fits into int because it must be <=n. */ int read, left;
return (int)fread(s, 1, n, p->fp);
if (p->end - p->ptr < n) if (!p->readable) {
n = (int)(p->end - p->ptr); if (p->fp != NULL)
memcpy(s, p->ptr, n); /* The result fits into int because it must be <=n. */
p->ptr += n; read = (int) fread(s, 1, n, p->fp);
return n; else {
left = (int)(p->end - p->ptr);
read = (left < n) ? left : n;
memcpy(s, p->ptr, read);
p->ptr += read;
}
}
else {
PyObject *data = PyObject_CallMethod(p->readable, "read", "i", n);
read = 0;
if (data != NULL) {
if (!PyBytes_Check(data)) {
PyErr_Format(PyExc_TypeError,
"f.read() returned not bytes but %.100s",
data->ob_type->tp_name);
}
else {
read = PyBytes_GET_SIZE(data);
if (read > 0) {
ptr = PyBytes_AS_STRING(data);
memcpy(s, ptr, read);
}
}
Py_DECREF(data);
}
}
if (!PyErr_Occurred() && (read < n)) {
PyErr_SetString(PyExc_EOFError, "EOF read where not expected");
}
return read;
}
static int
r_byte(RFILE *p)
{
int c = EOF;
unsigned char ch;
int n;
if (!p->readable)
c = p->fp ? getc(p->fp) : rs_byte(p);
else {
n = r_string((char *) &ch, 1, p);
if (n > 0)
c = ch;
}
return c;
} }
static int static int
r_short(RFILE *p) r_short(RFILE *p)
{ {
register short x; register short x;
x = r_byte(p); unsigned char buffer[2];
x |= r_byte(p) << 8;
r_string((char *) buffer, 2, p);
x = buffer[0];
x |= buffer[1] << 8;
/* Sign-extension, in case short greater than 16 bits */ /* Sign-extension, in case short greater than 16 bits */
x |= -(x & 0x8000); x |= -(x & 0x8000);
return x; return x;
@ -493,19 +542,13 @@ static long
r_long(RFILE *p) r_long(RFILE *p)
{ {
register long x; register long x;
register FILE *fp = p->fp; unsigned char buffer[4];
if (fp) {
x = getc(fp); r_string((char *) buffer, 4, p);
x |= (long)getc(fp) << 8; x = buffer[0];
x |= (long)getc(fp) << 16; x |= (long)buffer[1] << 8;
x |= (long)getc(fp) << 24; x |= (long)buffer[2] << 16;
} x |= (long)buffer[3] << 24;
else {
x = rs_byte(p);
x |= (long)rs_byte(p) << 8;
x |= (long)rs_byte(p) << 16;
x |= (long)rs_byte(p) << 24;
}
#if SIZEOF_LONG > 4 #if SIZEOF_LONG > 4
/* Sign extension for 64-bit machines */ /* Sign extension for 64-bit machines */
x |= -(x & 0x80000000L); x |= -(x & 0x80000000L);
@ -523,25 +566,30 @@ r_long(RFILE *p)
static PyObject * static PyObject *
r_long64(RFILE *p) r_long64(RFILE *p)
{ {
PyObject * result = NULL;
long lo4 = r_long(p); long lo4 = r_long(p);
long hi4 = r_long(p); long hi4 = r_long(p);
if (!PyErr_Occurred()) {
#if SIZEOF_LONG > 4 #if SIZEOF_LONG > 4
long x = (hi4 << 32) | (lo4 & 0xFFFFFFFFL); long x = (hi4 << 32) | (lo4 & 0xFFFFFFFFL);
return PyLong_FromLong(x); result = PyLong_FromLong(x);
#else #else
unsigned char buf[8]; unsigned char buf[8];
int one = 1; int one = 1;
int is_little_endian = (int)*(char*)&one; int is_little_endian = (int)*(char*)&one;
if (is_little_endian) { if (is_little_endian) {
memcpy(buf, &lo4, 4); memcpy(buf, &lo4, 4);
memcpy(buf+4, &hi4, 4); memcpy(buf+4, &hi4, 4);
} }
else { else {
memcpy(buf, &hi4, 4); memcpy(buf, &hi4, 4);
memcpy(buf+4, &lo4, 4); memcpy(buf+4, &lo4, 4);
} }
return _PyLong_FromByteArray(buf, 8, is_little_endian, 1); result = _PyLong_FromByteArray(buf, 8, is_little_endian, 1);
#endif #endif
}
return result;
} }
static PyObject * static PyObject *
@ -553,6 +601,8 @@ r_PyLong(RFILE *p)
digit d; digit d;
n = r_long(p); n = r_long(p);
if (PyErr_Occurred())
return NULL;
if (n == 0) if (n == 0)
return (PyObject *)_PyLong_New(0); return (PyObject *)_PyLong_New(0);
if (n < -INT_MAX || n > INT_MAX) { if (n < -INT_MAX || n > INT_MAX) {
@ -572,6 +622,8 @@ r_PyLong(RFILE *p)
d = 0; d = 0;
for (j=0; j < PyLong_MARSHAL_RATIO; j++) { for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
md = r_short(p); md = r_short(p);
if (PyErr_Occurred())
break;
if (md < 0 || md > PyLong_MARSHAL_BASE) if (md < 0 || md > PyLong_MARSHAL_BASE)
goto bad_digit; goto bad_digit;
d += (digit)md << j*PyLong_MARSHAL_SHIFT; d += (digit)md << j*PyLong_MARSHAL_SHIFT;
@ -581,6 +633,8 @@ r_PyLong(RFILE *p)
d = 0; d = 0;
for (j=0; j < shorts_in_top_digit; j++) { for (j=0; j < shorts_in_top_digit; j++) {
md = r_short(p); md = r_short(p);
if (PyErr_Occurred())
break;
if (md < 0 || md > PyLong_MARSHAL_BASE) if (md < 0 || md > PyLong_MARSHAL_BASE)
goto bad_digit; goto bad_digit;
/* topmost marshal digit should be nonzero */ /* topmost marshal digit should be nonzero */
@ -592,6 +646,10 @@ r_PyLong(RFILE *p)
} }
d += (digit)md << j*PyLong_MARSHAL_SHIFT; d += (digit)md << j*PyLong_MARSHAL_SHIFT;
} }
if (PyErr_Occurred()) {
Py_DECREF(ob);
return NULL;
}
/* top digit should be nonzero, else the resulting PyLong won't be /* top digit should be nonzero, else the resulting PyLong won't be
normalized */ normalized */
ob->ob_digit[size-1] = d; ob->ob_digit[size-1] = d;
@ -660,7 +718,8 @@ r_object(RFILE *p)
break; break;
case TYPE_INT: case TYPE_INT:
retval = PyLong_FromLong(r_long(p)); n = r_long(p);
retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
break; break;
case TYPE_INT64: case TYPE_INT64:
@ -770,6 +829,10 @@ r_object(RFILE *p)
case TYPE_STRING: case TYPE_STRING:
n = r_long(p); n = r_long(p);
if (PyErr_Occurred()) {
retval = NULL;
break;
}
if (n < 0 || n > INT_MAX) { if (n < 0 || n > INT_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)"); PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
retval = NULL; retval = NULL;
@ -795,6 +858,10 @@ r_object(RFILE *p)
char *buffer; char *buffer;
n = r_long(p); n = r_long(p);
if (PyErr_Occurred()) {
retval = NULL;
break;
}
if (n < 0 || n > INT_MAX) { if (n < 0 || n > INT_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)"); PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)");
retval = NULL; retval = NULL;
@ -820,6 +887,10 @@ r_object(RFILE *p)
case TYPE_TUPLE: case TYPE_TUPLE:
n = r_long(p); n = r_long(p);
if (PyErr_Occurred()) {
retval = NULL;
break;
}
if (n < 0 || n > INT_MAX) { if (n < 0 || n > INT_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)"); PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
retval = NULL; retval = NULL;
@ -847,6 +918,10 @@ r_object(RFILE *p)
case TYPE_LIST: case TYPE_LIST:
n = r_long(p); n = r_long(p);
if (PyErr_Occurred()) {
retval = NULL;
break;
}
if (n < 0 || n > INT_MAX) { if (n < 0 || n > INT_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)"); PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
retval = NULL; retval = NULL;
@ -899,6 +974,10 @@ r_object(RFILE *p)
case TYPE_SET: case TYPE_SET:
case TYPE_FROZENSET: case TYPE_FROZENSET:
n = r_long(p); n = r_long(p);
if (PyErr_Occurred()) {
retval = NULL;
break;
}
if (n < 0 || n > INT_MAX) { if (n < 0 || n > INT_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)"); PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
retval = NULL; retval = NULL;
@ -952,10 +1031,20 @@ r_object(RFILE *p)
/* XXX ignore long->int overflows for now */ /* XXX ignore long->int overflows for now */
argcount = (int)r_long(p); argcount = (int)r_long(p);
if (PyErr_Occurred())
goto code_error;
kwonlyargcount = (int)r_long(p); kwonlyargcount = (int)r_long(p);
if (PyErr_Occurred())
goto code_error;
nlocals = (int)r_long(p); nlocals = (int)r_long(p);
if (PyErr_Occurred())
goto code_error;
stacksize = (int)r_long(p); stacksize = (int)r_long(p);
if (PyErr_Occurred())
goto code_error;
flags = (int)r_long(p); flags = (int)r_long(p);
if (PyErr_Occurred())
goto code_error;
code = r_object(p); code = r_object(p);
if (code == NULL) if (code == NULL)
goto code_error; goto code_error;
@ -1049,6 +1138,7 @@ PyMarshal_ReadShortFromFile(FILE *fp)
{ {
RFILE rf; RFILE rf;
assert(fp); assert(fp);
rf.readable = NULL;
rf.fp = fp; rf.fp = fp;
rf.current_filename = NULL; rf.current_filename = NULL;
rf.end = rf.ptr = NULL; rf.end = rf.ptr = NULL;
@ -1060,6 +1150,7 @@ PyMarshal_ReadLongFromFile(FILE *fp)
{ {
RFILE rf; RFILE rf;
rf.fp = fp; rf.fp = fp;
rf.readable = NULL;
rf.current_filename = NULL; rf.current_filename = NULL;
rf.ptr = rf.end = NULL; rf.ptr = rf.end = NULL;
return r_long(&rf); return r_long(&rf);
@ -1121,6 +1212,7 @@ PyMarshal_ReadObjectFromFile(FILE *fp)
RFILE rf; RFILE rf;
PyObject *result; PyObject *result;
rf.fp = fp; rf.fp = fp;
rf.readable = NULL;
rf.current_filename = NULL; rf.current_filename = NULL;
rf.depth = 0; rf.depth = 0;
rf.ptr = rf.end = NULL; rf.ptr = rf.end = NULL;
@ -1134,6 +1226,7 @@ PyMarshal_ReadObjectFromString(char *str, Py_ssize_t len)
RFILE rf; RFILE rf;
PyObject *result; PyObject *result;
rf.fp = NULL; rf.fp = NULL;
rf.readable = NULL;
rf.current_filename = NULL; rf.current_filename = NULL;
rf.ptr = str; rf.ptr = str;
rf.end = str + len; rf.end = str + len;
@ -1149,6 +1242,7 @@ PyMarshal_WriteObjectToString(PyObject *x, int version)
PyObject *res = NULL; PyObject *res = NULL;
wf.fp = NULL; wf.fp = NULL;
wf.readable = NULL;
wf.str = PyBytes_FromStringAndSize((char *)NULL, 50); wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
if (wf.str == NULL) if (wf.str == NULL)
return NULL; return NULL;
@ -1224,32 +1318,33 @@ The version argument indicates the data format that dump should use.");
static PyObject * static PyObject *
marshal_load(PyObject *self, PyObject *f) marshal_load(PyObject *self, PyObject *f)
{ {
/* XXX Quick hack -- need to do this differently */
PyObject *data, *result; PyObject *data, *result;
RFILE rf; RFILE rf;
data = PyObject_CallMethod(f, "read", ""); char *p;
int n;
/*
* Make a call to the read method, but read zero bytes.
* This is to ensure that the object passed in at least
* has a read method which returns bytes.
*/
data = PyObject_CallMethod(f, "read", "i", 0);
if (data == NULL) if (data == NULL)
return NULL; return NULL;
rf.fp = NULL; if (!PyBytes_Check(data)) {
rf.current_filename = NULL; PyErr_Format(PyExc_TypeError,
if (PyBytes_Check(data)) { "f.read() returned not bytes but %.100s",
rf.ptr = PyBytes_AS_STRING(data); data->ob_type->tp_name);
rf.end = rf.ptr + PyBytes_GET_SIZE(data); result = NULL;
}
else if (PyBytes_Check(data)) {
rf.ptr = PyBytes_AS_STRING(data);
rf.end = rf.ptr + PyBytes_GET_SIZE(data);
} }
else { else {
PyErr_Format(PyExc_TypeError, rf.strings = PyList_New(0);
"f.read() returned neither string " rf.depth = 0;
"nor bytes but %.100s", rf.fp = NULL;
data->ob_type->tp_name); rf.readable = f;
Py_DECREF(data); result = read_object(&rf);
return NULL; Py_DECREF(rf.strings);
} }
rf.depth = 0;
result = read_object(&rf);
Py_DECREF(data); Py_DECREF(data);
return result; return result;
} }
@ -1300,6 +1395,7 @@ marshal_loads(PyObject *self, PyObject *args)
s = p.buf; s = p.buf;
n = p.len; n = p.len;
rf.fp = NULL; rf.fp = NULL;
rf.readable = NULL;
rf.current_filename = NULL; rf.current_filename = NULL;
rf.ptr = s; rf.ptr = s;
rf.end = s + n; rf.end = s + n;