From a9f48a0d4f0bad22275b5feb78f63a8a8f00a6f8 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Sat, 2 May 2009 21:41:14 +0000 Subject: [PATCH] Merged revisions 72223 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r72223 | antoine.pitrou | 2009-05-02 23:13:23 +0200 (sam., 02 mai 2009) | 5 lines Isue #5084: unpickling now interns the attribute names of pickled objects, saving memory and avoiding growth in size of subsequent pickles. Proposal and original patch by Jake McGuire. ........ --- Lib/pickle.py | 10 +++++++++- Lib/test/pickletester.py | 14 ++++++++++++++ Misc/NEWS | 4 ++++ Modules/_pickle.c | 16 +++++++++++++++- 4 files changed, 42 insertions(+), 2 deletions(-) diff --git a/Lib/pickle.py b/Lib/pickle.py index 409d4b2a1a4..b94b3058cd7 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -1195,7 +1195,15 @@ class _Unpickler: if isinstance(state, tuple) and len(state) == 2: state, slotstate = state if state: - inst.__dict__.update(state) + d = inst.__dict__ + intern = sys.intern + try: + for k, v in state.items(): + d[intern(k)] = v + # keys in state don't have to be strings + # don't blow up, but don't go out of our way + except TypeError: + d.update(state) if slotstate: for k, v in slotstate.items(): setattr(inst, k, v) diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index e3a929fb8d4..1585586d3de 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -932,6 +932,20 @@ class AbstractPickleTests(unittest.TestCase): "Failed protocol %d: %r != %r" % (proto, obj, loaded)) + def test_attribute_name_interning(self): + # Test that attribute names of pickled objects are interned when + # unpickling. + for proto in protocols: + x = C() + x.foo = 42 + x.bar = "hello" + s = self.dumps(x, proto) + y = self.loads(s) + x_keys = sorted(x.__dict__) + y_keys = sorted(y.__dict__) + for x_key, y_key in zip(x_keys, y_keys): + self.assertIs(x_key, y_key) + # Test classes for reduce_ex class REX_one(object): diff --git a/Misc/NEWS b/Misc/NEWS index f4116ad8e78..00270fdcd55 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -109,6 +109,10 @@ Installation Library ------- +- Issue #5084: unpickling now interns the attribute names of pickled objects, + saving memory and avoiding growth in size of subsequent pickles. Proposal + and original patch by Jake McGuire. + - The json module now works exclusively with str and not bytes. - Issue #3959: The ipaddr module has been added to the standard library. diff --git a/Modules/_pickle.c b/Modules/_pickle.c index e9c4a768cce..754d13275cc 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -4020,6 +4020,8 @@ load_build(UnpicklerObject *self) /* Set inst.__dict__ from the state dict (if any). */ if (state != Py_None) { PyObject *dict; + PyObject *d_key, *d_value; + Py_ssize_t i; if (!PyDict_Check(state)) { PyErr_SetString(UnpicklingError, "state is not a dictionary"); @@ -4029,7 +4031,19 @@ load_build(UnpicklerObject *self) if (dict == NULL) goto error; - PyDict_Update(dict, state); + i = 0; + while (PyDict_Next(state, &i, &d_key, &d_value)) { + /* normally the keys for instance attributes are + interned. we should try to do that here. */ + Py_INCREF(d_key); + if (PyUnicode_CheckExact(d_key)) + PyUnicode_InternInPlace(&d_key); + if (PyObject_SetItem(dict, d_key, d_value) < 0) { + Py_DECREF(d_key); + goto error; + } + Py_DECREF(d_key); + } Py_DECREF(dict); }