Issue #15513: Added a __sizeof__ implementation for pickle classes.

This commit is contained in:
Serhiy Storchaka 2014-12-16 19:40:58 +02:00
commit 01bdd9a980
4 changed files with 184 additions and 4 deletions

View File

@ -1,7 +1,10 @@
import pickle
import io
import collections
import struct
import sys
import unittest
from test import support
from test.pickletester import AbstractPickleTests
@ -138,6 +141,71 @@ if has_c_implementation:
def get_dispatch_table(self):
return collections.ChainMap({}, pickle.dispatch_table)
@support.cpython_only
class SizeofTests(unittest.TestCase):
check_sizeof = support.check_sizeof
def test_pickler(self):
basesize = support.calcobjsize('5P2n3i2n3iP')
p = _pickle.Pickler(io.BytesIO())
self.assertEqual(object.__sizeof__(p), basesize)
MT_size = struct.calcsize('3nP0n')
ME_size = struct.calcsize('Pn0P')
check = self.check_sizeof
check(p, basesize +
MT_size + 8 * ME_size + # Minimal memo table size.
sys.getsizeof(b'x'*4096)) # Minimal write buffer size.
for i in range(6):
p.dump(chr(i))
check(p, basesize +
MT_size + 32 * ME_size + # Size of memo table required to
# save references to 6 objects.
0) # Write buffer is cleared after every dump().
def test_unpickler(self):
basesize = support.calcobjsize('2Pn2P 2P2n2i5P 2P3n6P2n2i')
unpickler = _pickle.Unpickler
P = struct.calcsize('P') # Size of memo table entry.
n = struct.calcsize('n') # Size of mark table entry.
check = self.check_sizeof
for encoding in 'ASCII', 'UTF-16', 'latin-1':
for errors in 'strict', 'replace':
u = unpickler(io.BytesIO(),
encoding=encoding, errors=errors)
self.assertEqual(object.__sizeof__(u), basesize)
check(u, basesize +
32 * P + # Minimal memo table size.
len(encoding) + 1 + len(errors) + 1)
stdsize = basesize + len('ASCII') + 1 + len('strict') + 1
def check_unpickler(data, memo_size, marks_size):
dump = pickle.dumps(data)
u = unpickler(io.BytesIO(dump),
encoding='ASCII', errors='strict')
u.load()
check(u, stdsize + memo_size * P + marks_size * n)
check_unpickler(0, 32, 0)
# 20 is minimal non-empty mark stack size.
check_unpickler([0] * 100, 32, 20)
# 128 is memo table size required to save references to 100 objects.
check_unpickler([chr(i) for i in range(100)], 128, 20)
def recurse(deep):
data = 0
for i in range(deep):
data = [data, data]
return data
check_unpickler(recurse(0), 32, 0)
check_unpickler(recurse(1), 32, 20)
check_unpickler(recurse(20), 32, 58)
check_unpickler(recurse(50), 64, 58)
check_unpickler(recurse(100), 128, 134)
u = unpickler(io.BytesIO(pickle.dumps('a', 0)),
encoding='ASCII', errors='strict')
u.load()
check(u, stdsize + 32 * P + 2 + 1)
def test_main():
tests = [PickleTests, PyPicklerTests, PyPersPicklerTests,
@ -148,7 +216,7 @@ def test_main():
PyPicklerUnpicklerObjectTests,
CPicklerUnpicklerObjectTests,
CDispatchTableTests, CChainDispatchTableTests,
InMemoryPickleTests])
InMemoryPickleTests, SizeofTests])
support.run_unittest(*tests)
support.run_doctest(pickle)

View File

@ -196,7 +196,9 @@ Core and Builtins
Library
-------
- Issue #19858: pickletools.optimize() now aware of the MEMOIZE opcode, can
- Issue #15513: Added a __sizeof__ implementation for pickle classes.
- Issue #19858: pickletools.optimize() now aware of the MEMOIZE opcode, can
produce more compact result and no longer produces invalid output if input
data contains MEMOIZE opcodes together with PUT or BINPUT opcodes.

View File

@ -375,7 +375,7 @@ static PyTypeObject Pdata_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
"_pickle.Pdata", /*tp_name*/
sizeof(Pdata), /*tp_basicsize*/
0, /*tp_itemsize*/
sizeof(PyObject *), /*tp_itemsize*/
(destructor)Pdata_dealloc, /*tp_dealloc*/
};
@ -3977,9 +3977,37 @@ _pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
Py_RETURN_NONE;
}
/*[clinic input]
_pickle.Pickler.__sizeof__ -> Py_ssize_t
Returns size in memory, in bytes.
[clinic start generated code]*/
static Py_ssize_t
_pickle_Pickler___sizeof___impl(PicklerObject *self)
/*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
{
Py_ssize_t res, s;
res = sizeof(PicklerObject);
if (self->memo != NULL) {
res += sizeof(PyMemoTable);
res += self->memo->mt_allocated * sizeof(PyMemoEntry);
}
if (self->output_buffer != NULL) {
s = _PySys_GetSizeOf(self->output_buffer);
if (s == -1)
return -1;
res += s;
}
return res;
}
static struct PyMethodDef Pickler_methods[] = {
_PICKLE_PICKLER_DUMP_METHODDEF
_PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
_PICKLE_PICKLER___SIZEOF___METHODDEF
{NULL, NULL} /* sentinel */
};
@ -6336,9 +6364,37 @@ _pickle_Unpickler_find_class_impl(UnpicklerObject *self, PyObject *module_name,
return global;
}
/*[clinic input]
_pickle.Unpickler.__sizeof__ -> Py_ssize_t
Returns size in memory, in bytes.
[clinic start generated code]*/
static Py_ssize_t
_pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
/*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
{
Py_ssize_t res;
res = sizeof(UnpicklerObject);
if (self->memo != NULL)
res += self->memo_size * sizeof(PyObject *);
if (self->marks != NULL)
res += self->marks_size * sizeof(Py_ssize_t);
if (self->input_line != NULL)
res += strlen(self->input_line) + 1;
if (self->encoding != NULL)
res += strlen(self->encoding) + 1;
if (self->errors != NULL)
res += strlen(self->errors) + 1;
return res;
}
static struct PyMethodDef Unpickler_methods[] = {
_PICKLE_UNPICKLER_LOAD_METHODDEF
_PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
_PICKLE_UNPICKLER___SIZEOF___METHODDEF
{NULL, NULL} /* sentinel */
};

View File

@ -34,6 +34,33 @@ PyDoc_STRVAR(_pickle_Pickler_dump__doc__,
#define _PICKLE_PICKLER_DUMP_METHODDEF \
{"dump", (PyCFunction)_pickle_Pickler_dump, METH_O, _pickle_Pickler_dump__doc__},
PyDoc_STRVAR(_pickle_Pickler___sizeof____doc__,
"__sizeof__($self, /)\n"
"--\n"
"\n"
"Returns size in memory, in bytes.");
#define _PICKLE_PICKLER___SIZEOF___METHODDEF \
{"__sizeof__", (PyCFunction)_pickle_Pickler___sizeof__, METH_NOARGS, _pickle_Pickler___sizeof____doc__},
static Py_ssize_t
_pickle_Pickler___sizeof___impl(PicklerObject *self);
static PyObject *
_pickle_Pickler___sizeof__(PicklerObject *self, PyObject *Py_UNUSED(ignored))
{
PyObject *return_value = NULL;
Py_ssize_t _return_value;
_return_value = _pickle_Pickler___sizeof___impl(self);
if ((_return_value == -1) && PyErr_Occurred())
goto exit;
return_value = PyLong_FromSsize_t(_return_value);
exit:
return return_value;
}
PyDoc_STRVAR(_pickle_Pickler___init____doc__,
"Pickler(file, protocol=None, fix_imports=True)\n"
"--\n"
@ -191,6 +218,33 @@ exit:
return return_value;
}
PyDoc_STRVAR(_pickle_Unpickler___sizeof____doc__,
"__sizeof__($self, /)\n"
"--\n"
"\n"
"Returns size in memory, in bytes.");
#define _PICKLE_UNPICKLER___SIZEOF___METHODDEF \
{"__sizeof__", (PyCFunction)_pickle_Unpickler___sizeof__, METH_NOARGS, _pickle_Unpickler___sizeof____doc__},
static Py_ssize_t
_pickle_Unpickler___sizeof___impl(UnpicklerObject *self);
static PyObject *
_pickle_Unpickler___sizeof__(UnpicklerObject *self, PyObject *Py_UNUSED(ignored))
{
PyObject *return_value = NULL;
Py_ssize_t _return_value;
_return_value = _pickle_Unpickler___sizeof___impl(self);
if ((_return_value == -1) && PyErr_Occurred())
goto exit;
return_value = PyLong_FromSsize_t(_return_value);
exit:
return return_value;
}
PyDoc_STRVAR(_pickle_Unpickler___init____doc__,
"Unpickler(file, *, fix_imports=True, encoding=\'ASCII\', errors=\'strict\')\n"
"--\n"
@ -488,4 +542,4 @@ _pickle_loads(PyModuleDef *module, PyObject *args, PyObject *kwargs)
exit:
return return_value;
}
/*[clinic end generated code: output=f965b6c7018c898d input=a9049054013a1b77]*/
/*[clinic end generated code: output=3aba79576e240c62 input=a9049054013a1b77]*/