diff --git a/Lib/pickle.py b/Lib/pickle.py index abed1ca4fab..8c5d51fc15b 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -1221,7 +1221,15 @@ class Unpickler: state, slotstate = state if state: try: - inst.__dict__.update(state) + d = inst.__dict__ + try: + for k, v in state.iteritems(): + d[intern(k)] = v + # keys in state don't have to be strings + # don't blow up, but don't go out of our way + except TypeError: + d.update(state) + except RuntimeError: # XXX In restricted execution, the instance's __dict__ # is not accessible. Use the old way of unpickling diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 4ffa7028400..bc0be1f845c 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -938,6 +938,20 @@ class AbstractPickleTests(unittest.TestCase): "Failed protocol %d: %r != %r" % (proto, obj, loaded)) + def test_attribute_name_interning(self): + # Test that attribute names of pickled objects are interned when + # unpickling. + for proto in protocols: + x = C() + x.foo = 42 + x.bar = "hello" + s = self.dumps(x, proto) + y = self.loads(s) + x_keys = sorted(x.__dict__) + y_keys = sorted(y.__dict__) + for x_key, y_key in zip(x_keys, y_keys): + self.assertIs(x_key, y_key) + # Test classes for reduce_ex diff --git a/Misc/NEWS b/Misc/NEWS index 1ccb739edc7..e7516bb0116 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -261,6 +261,10 @@ Core and Builtins Library ------- +- Issue #5084: unpickling now interns the attribute names of pickled objects, + saving memory and avoiding growth in size of subsequent pickles. Proposal + and original patch by Jake McGuire. + - Issue #3002: ``shutil.copyfile()`` and ``shutil.copytree()`` now raise an error when a named pipe is encountered, rather than blocking infinitely. diff --git a/Modules/cPickle.c b/Modules/cPickle.c index 6c7ed9987e1..a0e443ed940 100644 --- a/Modules/cPickle.c +++ b/Modules/cPickle.c @@ -4473,8 +4473,16 @@ load_build(Unpicklerobject *self) i = 0; while (PyDict_Next(state, &i, &d_key, &d_value)) { - if (PyObject_SetItem(dict, d_key, d_value) < 0) + /* normally the keys for instance attributes are + interned. we should try to do that here. */ + Py_INCREF(d_key); + if (PyString_CheckExact(d_key)) + PyString_InternInPlace(&d_key); + if (PyObject_SetItem(dict, d_key, d_value) < 0) { + Py_DECREF(d_key); goto finally; + } + Py_DECREF(d_key); } Py_DECREF(dict); }