diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py index 0b2fe1ef2aa..e1a88b6b586 100644 --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -1,7 +1,10 @@ import pickle import io import collections +import struct +import sys +import unittest from test import support from test.pickletester import AbstractPickleTests @@ -138,6 +141,71 @@ if has_c_implementation: def get_dispatch_table(self): return collections.ChainMap({}, pickle.dispatch_table) + @support.cpython_only + class SizeofTests(unittest.TestCase): + check_sizeof = support.check_sizeof + + def test_pickler(self): + basesize = support.calcobjsize('5P2n3i2n3iP') + p = _pickle.Pickler(io.BytesIO()) + self.assertEqual(object.__sizeof__(p), basesize) + MT_size = struct.calcsize('3nP0n') + ME_size = struct.calcsize('Pn0P') + check = self.check_sizeof + check(p, basesize + + MT_size + 8 * ME_size + # Minimal memo table size. + sys.getsizeof(b'x'*4096)) # Minimal write buffer size. + for i in range(6): + p.dump(chr(i)) + check(p, basesize + + MT_size + 32 * ME_size + # Size of memo table required to + # save references to 6 objects. + 0) # Write buffer is cleared after every dump(). + + def test_unpickler(self): + basesize = support.calcobjsize('2Pn2P 2P2n2i5P 2P3n6P2n2i') + unpickler = _pickle.Unpickler + P = struct.calcsize('P') # Size of memo table entry. + n = struct.calcsize('n') # Size of mark table entry. + check = self.check_sizeof + for encoding in 'ASCII', 'UTF-16', 'latin-1': + for errors in 'strict', 'replace': + u = unpickler(io.BytesIO(), + encoding=encoding, errors=errors) + self.assertEqual(object.__sizeof__(u), basesize) + check(u, basesize + + 32 * P + # Minimal memo table size. + len(encoding) + 1 + len(errors) + 1) + + stdsize = basesize + len('ASCII') + 1 + len('strict') + 1 + def check_unpickler(data, memo_size, marks_size): + dump = pickle.dumps(data) + u = unpickler(io.BytesIO(dump), + encoding='ASCII', errors='strict') + u.load() + check(u, stdsize + memo_size * P + marks_size * n) + + check_unpickler(0, 32, 0) + # 20 is minimal non-empty mark stack size. + check_unpickler([0] * 100, 32, 20) + # 128 is memo table size required to save references to 100 objects. + check_unpickler([chr(i) for i in range(100)], 128, 20) + def recurse(deep): + data = 0 + for i in range(deep): + data = [data, data] + return data + check_unpickler(recurse(0), 32, 0) + check_unpickler(recurse(1), 32, 20) + check_unpickler(recurse(20), 32, 58) + check_unpickler(recurse(50), 64, 58) + check_unpickler(recurse(100), 128, 134) + + u = unpickler(io.BytesIO(pickle.dumps('a', 0)), + encoding='ASCII', errors='strict') + u.load() + check(u, stdsize + 32 * P + 2 + 1) + def test_main(): tests = [PickleTests, PyPicklerTests, PyPersPicklerTests, @@ -148,7 +216,7 @@ def test_main(): PyPicklerUnpicklerObjectTests, CPicklerUnpicklerObjectTests, CDispatchTableTests, CChainDispatchTableTests, - InMemoryPickleTests]) + InMemoryPickleTests, SizeofTests]) support.run_unittest(*tests) support.run_doctest(pickle) diff --git a/Misc/NEWS b/Misc/NEWS index 437753b8e6b..fcff454a66d 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -196,7 +196,9 @@ Core and Builtins Library ------- -- Issue #19858: pickletools.optimize() now aware of the MEMOIZE opcode, can +- Issue #15513: Added a __sizeof__ implementation for pickle classes. + +- Issue #19858: pickletools.optimize() now aware of the MEMOIZE opcode, can produce more compact result and no longer produces invalid output if input data contains MEMOIZE opcodes together with PUT or BINPUT opcodes. diff --git a/Modules/_pickle.c b/Modules/_pickle.c index ecf0c6ca953..6416233fc6e 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -375,7 +375,7 @@ static PyTypeObject Pdata_Type = { PyVarObject_HEAD_INIT(NULL, 0) "_pickle.Pdata", /*tp_name*/ sizeof(Pdata), /*tp_basicsize*/ - 0, /*tp_itemsize*/ + sizeof(PyObject *), /*tp_itemsize*/ (destructor)Pdata_dealloc, /*tp_dealloc*/ }; @@ -3977,9 +3977,37 @@ _pickle_Pickler_dump(PicklerObject *self, PyObject *obj) Py_RETURN_NONE; } +/*[clinic input] + +_pickle.Pickler.__sizeof__ -> Py_ssize_t + +Returns size in memory, in bytes. +[clinic start generated code]*/ + +static Py_ssize_t +_pickle_Pickler___sizeof___impl(PicklerObject *self) +/*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/ +{ + Py_ssize_t res, s; + + res = sizeof(PicklerObject); + if (self->memo != NULL) { + res += sizeof(PyMemoTable); + res += self->memo->mt_allocated * sizeof(PyMemoEntry); + } + if (self->output_buffer != NULL) { + s = _PySys_GetSizeOf(self->output_buffer); + if (s == -1) + return -1; + res += s; + } + return res; +} + static struct PyMethodDef Pickler_methods[] = { _PICKLE_PICKLER_DUMP_METHODDEF _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF + _PICKLE_PICKLER___SIZEOF___METHODDEF {NULL, NULL} /* sentinel */ }; @@ -6336,9 +6364,37 @@ _pickle_Unpickler_find_class_impl(UnpicklerObject *self, PyObject *module_name, return global; } +/*[clinic input] + +_pickle.Unpickler.__sizeof__ -> Py_ssize_t + +Returns size in memory, in bytes. +[clinic start generated code]*/ + +static Py_ssize_t +_pickle_Unpickler___sizeof___impl(UnpicklerObject *self) +/*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/ +{ + Py_ssize_t res; + + res = sizeof(UnpicklerObject); + if (self->memo != NULL) + res += self->memo_size * sizeof(PyObject *); + if (self->marks != NULL) + res += self->marks_size * sizeof(Py_ssize_t); + if (self->input_line != NULL) + res += strlen(self->input_line) + 1; + if (self->encoding != NULL) + res += strlen(self->encoding) + 1; + if (self->errors != NULL) + res += strlen(self->errors) + 1; + return res; +} + static struct PyMethodDef Unpickler_methods[] = { _PICKLE_UNPICKLER_LOAD_METHODDEF _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF + _PICKLE_UNPICKLER___SIZEOF___METHODDEF {NULL, NULL} /* sentinel */ }; diff --git a/Modules/clinic/_pickle.c.h b/Modules/clinic/_pickle.c.h index a20a61e6f58..975298ce471 100644 --- a/Modules/clinic/_pickle.c.h +++ b/Modules/clinic/_pickle.c.h @@ -34,6 +34,33 @@ PyDoc_STRVAR(_pickle_Pickler_dump__doc__, #define _PICKLE_PICKLER_DUMP_METHODDEF \ {"dump", (PyCFunction)_pickle_Pickler_dump, METH_O, _pickle_Pickler_dump__doc__}, +PyDoc_STRVAR(_pickle_Pickler___sizeof____doc__, +"__sizeof__($self, /)\n" +"--\n" +"\n" +"Returns size in memory, in bytes."); + +#define _PICKLE_PICKLER___SIZEOF___METHODDEF \ + {"__sizeof__", (PyCFunction)_pickle_Pickler___sizeof__, METH_NOARGS, _pickle_Pickler___sizeof____doc__}, + +static Py_ssize_t +_pickle_Pickler___sizeof___impl(PicklerObject *self); + +static PyObject * +_pickle_Pickler___sizeof__(PicklerObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + Py_ssize_t _return_value; + + _return_value = _pickle_Pickler___sizeof___impl(self); + if ((_return_value == -1) && PyErr_Occurred()) + goto exit; + return_value = PyLong_FromSsize_t(_return_value); + +exit: + return return_value; +} + PyDoc_STRVAR(_pickle_Pickler___init____doc__, "Pickler(file, protocol=None, fix_imports=True)\n" "--\n" @@ -191,6 +218,33 @@ exit: return return_value; } +PyDoc_STRVAR(_pickle_Unpickler___sizeof____doc__, +"__sizeof__($self, /)\n" +"--\n" +"\n" +"Returns size in memory, in bytes."); + +#define _PICKLE_UNPICKLER___SIZEOF___METHODDEF \ + {"__sizeof__", (PyCFunction)_pickle_Unpickler___sizeof__, METH_NOARGS, _pickle_Unpickler___sizeof____doc__}, + +static Py_ssize_t +_pickle_Unpickler___sizeof___impl(UnpicklerObject *self); + +static PyObject * +_pickle_Unpickler___sizeof__(UnpicklerObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + Py_ssize_t _return_value; + + _return_value = _pickle_Unpickler___sizeof___impl(self); + if ((_return_value == -1) && PyErr_Occurred()) + goto exit; + return_value = PyLong_FromSsize_t(_return_value); + +exit: + return return_value; +} + PyDoc_STRVAR(_pickle_Unpickler___init____doc__, "Unpickler(file, *, fix_imports=True, encoding=\'ASCII\', errors=\'strict\')\n" "--\n" @@ -488,4 +542,4 @@ _pickle_loads(PyModuleDef *module, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=f965b6c7018c898d input=a9049054013a1b77]*/ +/*[clinic end generated code: output=3aba79576e240c62 input=a9049054013a1b77]*/