mirror of https://github.com/python/cpython
Improve pickle's documentation.
Use double-space for ending a sentence. Add dbpickle.py example. Improve description about persistent IDs.
This commit is contained in:
parent
758bca6e36
commit
5f3b63ad6f
|
@ -0,0 +1,88 @@
|
||||||
|
# Simple example presenting how persistent ID can be used to pickle
|
||||||
|
# external objects by reference.
|
||||||
|
|
||||||
|
import pickle
|
||||||
|
import sqlite3
|
||||||
|
from collections import namedtuple
|
||||||
|
|
||||||
|
# Simple class representing a record in our database.
|
||||||
|
MemoRecord = namedtuple("MemoRecord", "key, task")
|
||||||
|
|
||||||
|
class DBPickler(pickle.Pickler):
|
||||||
|
|
||||||
|
def persistent_id(self, obj):
|
||||||
|
# Instead of pickling MemoRecord as a regular class instance, we emit a
|
||||||
|
# persistent ID instead.
|
||||||
|
if isinstance(obj, MemoRecord):
|
||||||
|
# Here, our persistent ID is simply a tuple containing a tag and a
|
||||||
|
# key which refers to a specific record in the database.
|
||||||
|
return ("MemoRecord", obj.key)
|
||||||
|
else:
|
||||||
|
# If obj does not have a persistent ID, return None. This means obj
|
||||||
|
# needs to be pickled as usual.
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class DBUnpickler(pickle.Unpickler):
|
||||||
|
|
||||||
|
def __init__(self, file, connection):
|
||||||
|
super().__init__(file)
|
||||||
|
self.connection = connection
|
||||||
|
|
||||||
|
def persistent_load(self, pid):
|
||||||
|
# This method is invoked whenever a persistent ID is encountered.
|
||||||
|
# Here, pid is the tuple returned by DBPickler.
|
||||||
|
cursor = self.connection.cursor()
|
||||||
|
type_tag, key_id = pid
|
||||||
|
if type_tag == "MemoRecord":
|
||||||
|
# Fetch the referenced record from the database and return it.
|
||||||
|
cursor.execute("SELECT * FROM memos WHERE key=?", (str(key_id),))
|
||||||
|
key, task = cursor.fetchone()
|
||||||
|
return MemoRecord(key, task)
|
||||||
|
else:
|
||||||
|
# Always raises an error if you cannot return the correct object.
|
||||||
|
# Otherwise, the unpickler will think None is the object referenced
|
||||||
|
# by the persistent ID.
|
||||||
|
raise pickle.UnpicklingError("unsupported persistent object")
|
||||||
|
|
||||||
|
|
||||||
|
def main(verbose=True):
|
||||||
|
import io, pprint
|
||||||
|
|
||||||
|
# Initialize and populate our database.
|
||||||
|
conn = sqlite3.connect(":memory:")
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute("CREATE TABLE memos(key INTEGER PRIMARY KEY, task TEXT)")
|
||||||
|
tasks = (
|
||||||
|
'give food to fish',
|
||||||
|
'prepare group meeting',
|
||||||
|
'fight with a zebra',
|
||||||
|
)
|
||||||
|
for task in tasks:
|
||||||
|
cursor.execute("INSERT INTO memos VALUES(NULL, ?)", (task,))
|
||||||
|
|
||||||
|
# Fetch the records to be pickled.
|
||||||
|
cursor.execute("SELECT * FROM memos")
|
||||||
|
memos = [MemoRecord(key, task) for key, task in cursor]
|
||||||
|
# Save the records using our custom DBPickler.
|
||||||
|
file = io.BytesIO()
|
||||||
|
DBPickler(file).dump(memos)
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print("Records to be pickled:")
|
||||||
|
pprint.pprint(memos)
|
||||||
|
|
||||||
|
# Update a record, just for good measure.
|
||||||
|
cursor.execute("UPDATE memos SET task='learn italian' WHERE key=1")
|
||||||
|
|
||||||
|
# Load the reports from the pickle data stream.
|
||||||
|
file.seek(0)
|
||||||
|
memos = DBUnpickler(file, conn).load()
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print("Unpickled records:")
|
||||||
|
pprint.pprint(memos)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
|
@ -27,7 +27,7 @@ Relationship to other Python modules
|
||||||
------------------------------------
|
------------------------------------
|
||||||
|
|
||||||
The :mod:`pickle` module has an transparent optimizer (:mod:`_pickle`) written
|
The :mod:`pickle` module has an transparent optimizer (:mod:`_pickle`) written
|
||||||
in C. It is used whenever available. Otherwise the pure Python implementation is
|
in C. It is used whenever available. Otherwise the pure Python implementation is
|
||||||
used.
|
used.
|
||||||
|
|
||||||
Python has a more primitive serialization module called :mod:`marshal`, but in
|
Python has a more primitive serialization module called :mod:`marshal`, but in
|
||||||
|
@ -108,7 +108,7 @@ There are currently 4 different protocols which can be used for pickling.
|
||||||
efficient pickling of :term:`new-style class`\es.
|
efficient pickling of :term:`new-style class`\es.
|
||||||
|
|
||||||
* Protocol version 3 was added in Python 3.0. It has explicit support for
|
* Protocol version 3 was added in Python 3.0. It has explicit support for
|
||||||
bytes and cannot be unpickled by Python 2.x pickle modules. This is
|
bytes and cannot be unpickled by Python 2.x pickle modules. This is
|
||||||
the current recommended protocol, use it whenever it is possible.
|
the current recommended protocol, use it whenever it is possible.
|
||||||
|
|
||||||
Refer to :pep:`307` for more information.
|
Refer to :pep:`307` for more information.
|
||||||
|
@ -166,7 +166,7 @@ process more convenient:
|
||||||
Python needed to read the pickle produced.
|
Python needed to read the pickle produced.
|
||||||
|
|
||||||
The *file* argument must have a write() method that accepts a single bytes
|
The *file* argument must have a write() method that accepts a single bytes
|
||||||
argument. It can thus be a file object opened for binary writing, a
|
argument. It can thus be a file object opened for binary writing, a
|
||||||
io.BytesIO instance, or any other custom object that meets this interface.
|
io.BytesIO instance, or any other custom object that meets this interface.
|
||||||
|
|
||||||
.. function:: dumps(obj[, protocol])
|
.. function:: dumps(obj[, protocol])
|
||||||
|
@ -220,7 +220,7 @@ The :mod:`pickle` module defines three exceptions:
|
||||||
|
|
||||||
.. exception:: PickleError
|
.. exception:: PickleError
|
||||||
|
|
||||||
Common base class for the other pickling exceptions. It inherits
|
Common base class for the other pickling exceptions. It inherits
|
||||||
:exc:`Exception`.
|
:exc:`Exception`.
|
||||||
|
|
||||||
.. exception:: PicklingError
|
.. exception:: PicklingError
|
||||||
|
@ -228,10 +228,13 @@ The :mod:`pickle` module defines three exceptions:
|
||||||
Error raised when an unpicklable object is encountered by :class:`Pickler`.
|
Error raised when an unpicklable object is encountered by :class:`Pickler`.
|
||||||
It inherits :exc:`PickleError`.
|
It inherits :exc:`PickleError`.
|
||||||
|
|
||||||
|
Refer to :ref:`pickle-picklable` to learn what kinds of objects can be
|
||||||
|
pickled.
|
||||||
|
|
||||||
.. exception:: UnpicklingError
|
.. exception:: UnpicklingError
|
||||||
|
|
||||||
Error raised when there a problem unpickling an object, such as a data
|
Error raised when there a problem unpickling an object, such as a data
|
||||||
corruption or a security violation. It inherits :exc:`PickleError`.
|
corruption or a security violation. It inherits :exc:`PickleError`.
|
||||||
|
|
||||||
Note that other exceptions may also be raised during unpickling, including
|
Note that other exceptions may also be raised during unpickling, including
|
||||||
(but not necessarily limited to) AttributeError, EOFError, ImportError, and
|
(but not necessarily limited to) AttributeError, EOFError, ImportError, and
|
||||||
|
@ -254,7 +257,7 @@ The :mod:`pickle` module exports two classes, :class:`Pickler` and
|
||||||
Python needed to read the pickle produced.
|
Python needed to read the pickle produced.
|
||||||
|
|
||||||
The *file* argument must have a write() method that accepts a single bytes
|
The *file* argument must have a write() method that accepts a single bytes
|
||||||
argument. It can thus be a file object opened for binary writing, a
|
argument. It can thus be a file object opened for binary writing, a
|
||||||
io.BytesIO instance, or any other custom object that meets this interface.
|
io.BytesIO instance, or any other custom object that meets this interface.
|
||||||
|
|
||||||
.. method:: dump(obj)
|
.. method:: dump(obj)
|
||||||
|
@ -276,8 +279,8 @@ The :mod:`pickle` module exports two classes, :class:`Pickler` and
|
||||||
|
|
||||||
.. method:: clear_memo()
|
.. method:: clear_memo()
|
||||||
|
|
||||||
Deprecated. Use the :meth:`clear` method on the :attr:`memo`. Clear the
|
Deprecated. Use the :meth:`clear` method on :attr:`memo`, instead.
|
||||||
pickler's memo, useful when reusing picklers.
|
Clear the pickler's memo, useful when reusing picklers.
|
||||||
|
|
||||||
.. attribute:: fast
|
.. attribute:: fast
|
||||||
|
|
||||||
|
@ -329,24 +332,28 @@ return the old value, not the modified one.
|
||||||
|
|
||||||
Read a pickled object representation from the open file object given in
|
Read a pickled object representation from the open file object given in
|
||||||
the constructor, and return the reconstituted object hierarchy specified
|
the constructor, and return the reconstituted object hierarchy specified
|
||||||
therein. Bytes past the pickled object's representation are ignored.
|
therein. Bytes past the pickled object's representation are ignored.
|
||||||
|
|
||||||
.. method:: persistent_load(pid)
|
.. method:: persistent_load(pid)
|
||||||
|
|
||||||
Raise an :exc:`UnpickingError` by default.
|
Raise an :exc:`UnpickingError` by default.
|
||||||
|
|
||||||
If defined, :meth:`persistent_load` should return the object specified by
|
If defined, :meth:`persistent_load` should return the object specified by
|
||||||
the persistent ID *pid*. On errors, such as if an invalid persistent ID is
|
the persistent ID *pid*. If an invalid persistent ID is encountered, an
|
||||||
encountered, an :exc:`UnpickingError` should be raised.
|
:exc:`UnpickingError` should be raised.
|
||||||
|
|
||||||
See :ref:`pickle-persistent` for details and examples of uses.
|
See :ref:`pickle-persistent` for details and examples of uses.
|
||||||
|
|
||||||
.. method:: find_class(module, name)
|
.. method:: find_class(module, name)
|
||||||
|
|
||||||
Import *module* if necessary and return the object called *name* from it.
|
Import *module* if necessary and return the object called *name* from it,
|
||||||
Subclasses may override this to gain control over what type of objects can
|
where the *module* and *name* arguments are :class:`str` objects.
|
||||||
be loaded, potentially reducing security risks.
|
|
||||||
|
|
||||||
|
Subclasses may override this to gain control over what type of objects and
|
||||||
|
how they can be loaded, potentially reducing security risks.
|
||||||
|
|
||||||
|
|
||||||
|
.. _pickle-picklable:
|
||||||
|
|
||||||
What can be pickled and unpickled?
|
What can be pickled and unpickled?
|
||||||
----------------------------------
|
----------------------------------
|
||||||
|
@ -372,9 +379,9 @@ The following types can be pickled:
|
||||||
|
|
||||||
Attempts to pickle unpicklable objects will raise the :exc:`PicklingError`
|
Attempts to pickle unpicklable objects will raise the :exc:`PicklingError`
|
||||||
exception; when this happens, an unspecified number of bytes may have already
|
exception; when this happens, an unspecified number of bytes may have already
|
||||||
been written to the underlying file. Trying to pickle a highly recursive data
|
been written to the underlying file. Trying to pickle a highly recursive data
|
||||||
structure may exceed the maximum recursion depth, a :exc:`RuntimeError` will be
|
structure may exceed the maximum recursion depth, a :exc:`RuntimeError` will be
|
||||||
raised in this case. You can carefully raise this limit with
|
raised in this case. You can carefully raise this limit with
|
||||||
:func:`sys.setrecursionlimit`.
|
:func:`sys.setrecursionlimit`.
|
||||||
|
|
||||||
Note that functions (built-in and user-defined) are pickled by "fully qualified"
|
Note that functions (built-in and user-defined) are pickled by "fully qualified"
|
||||||
|
@ -390,7 +397,7 @@ pickled, so in the following example the class attribute ``attr`` is not
|
||||||
restored in the unpickling environment::
|
restored in the unpickling environment::
|
||||||
|
|
||||||
class Foo:
|
class Foo:
|
||||||
attr = 'a class attr'
|
attr = 'A class attribute'
|
||||||
|
|
||||||
picklestring = pickle.dumps(Foo)
|
picklestring = pickle.dumps(Foo)
|
||||||
|
|
||||||
|
@ -571,79 +578,30 @@ Pickling and unpickling external objects
|
||||||
|
|
||||||
For the benefit of object persistence, the :mod:`pickle` module supports the
|
For the benefit of object persistence, the :mod:`pickle` module supports the
|
||||||
notion of a reference to an object outside the pickled data stream. Such
|
notion of a reference to an object outside the pickled data stream. Such
|
||||||
objects are referenced by a "persistent id", which is just an arbitrary string
|
objects are referenced by a persistent ID, which should be either a string of
|
||||||
of printable ASCII characters. The resolution of such names is not defined by
|
alphanumeric characters (for protocol 0) [#]_ or just an arbitrary object (for
|
||||||
the :mod:`pickle` module; it will delegate this resolution to user defined
|
any newer protocol).
|
||||||
functions on the pickler and unpickler.
|
|
||||||
|
|
||||||
To define external persistent id resolution, you need to set the
|
The resolution of such persistent IDs is not defined by the :mod:`pickle`
|
||||||
:attr:`persistent_id` attribute of the pickler object and the
|
module; it will delegate this resolution to the user defined methods on the
|
||||||
:attr:`persistent_load` attribute of the unpickler object.
|
pickler and unpickler, :meth:`persistent_id` and :meth:`persistent_load`
|
||||||
|
respectively.
|
||||||
|
|
||||||
To pickle objects that have an external persistent id, the pickler must have a
|
To pickle objects that have an external persistent id, the pickler must have a
|
||||||
custom :func:`persistent_id` method that takes an object as an argument and
|
custom :meth:`persistent_id` method that takes an object as an argument and
|
||||||
returns either ``None`` or the persistent id for that object. When ``None`` is
|
returns either ``None`` or the persistent id for that object. When ``None`` is
|
||||||
returned, the pickler simply pickles the object as normal. When a persistent id
|
returned, the pickler simply pickles the object as normal. When a persistent ID
|
||||||
string is returned, the pickler will pickle that string, along with a marker so
|
string is returned, the pickler will pickle that object, along with a marker so
|
||||||
that the unpickler will recognize the string as a persistent id.
|
that the unpickler will recognize it as a persistent ID.
|
||||||
|
|
||||||
To unpickle external objects, the unpickler must have a custom
|
To unpickle external objects, the unpickler must have a custom
|
||||||
:func:`persistent_load` function that takes a persistent id string and returns
|
:meth:`persistent_load` method that takes a persistent ID object and returns the
|
||||||
the referenced object.
|
referenced object.
|
||||||
|
|
||||||
Here's a silly example that *might* shed more light::
|
Example:
|
||||||
|
|
||||||
import pickle
|
.. highlightlang:: python
|
||||||
from io import StringIO
|
.. literalinclude:: ../includes/dbpickle.py
|
||||||
|
|
||||||
src = StringIO()
|
|
||||||
p = pickle.Pickler(src)
|
|
||||||
|
|
||||||
def persistent_id(obj):
|
|
||||||
if hasattr(obj, 'x'):
|
|
||||||
return 'the value %d' % obj.x
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
p.persistent_id = persistent_id
|
|
||||||
|
|
||||||
class Integer:
|
|
||||||
def __init__(self, x):
|
|
||||||
self.x = x
|
|
||||||
def __str__(self):
|
|
||||||
return 'My name is integer %d' % self.x
|
|
||||||
|
|
||||||
i = Integer(7)
|
|
||||||
print(i)
|
|
||||||
p.dump(i)
|
|
||||||
|
|
||||||
datastream = src.getvalue()
|
|
||||||
print(repr(datastream))
|
|
||||||
dst = StringIO(datastream)
|
|
||||||
|
|
||||||
up = pickle.Unpickler(dst)
|
|
||||||
|
|
||||||
class FancyInteger(Integer):
|
|
||||||
def __str__(self):
|
|
||||||
return 'I am the integer %d' % self.x
|
|
||||||
|
|
||||||
def persistent_load(persid):
|
|
||||||
if persid.startswith('the value '):
|
|
||||||
value = int(persid.split()[2])
|
|
||||||
return FancyInteger(value)
|
|
||||||
else:
|
|
||||||
raise pickle.UnpicklingError('Invalid persistent id')
|
|
||||||
|
|
||||||
up.persistent_load = persistent_load
|
|
||||||
|
|
||||||
j = up.load()
|
|
||||||
print(j)
|
|
||||||
|
|
||||||
|
|
||||||
.. BAW: pickle supports something called inst_persistent_id()
|
|
||||||
which appears to give unknown types a second shot at producing a persistent
|
|
||||||
id. Since Jim Fulton can't remember why it was added or what it's for, I'm
|
|
||||||
leaving it undocumented.
|
|
||||||
|
|
||||||
|
|
||||||
.. _pickle-sub:
|
.. _pickle-sub:
|
||||||
|
@ -808,5 +766,10 @@ the same process or a new process. ::
|
||||||
|
|
||||||
.. [#] These methods can also be used to implement copying class instances.
|
.. [#] These methods can also be used to implement copying class instances.
|
||||||
|
|
||||||
.. [#] This protocol is also used by the shallow and deep copying operations defined in
|
.. [#] This protocol is also used by the shallow and deep copying operations
|
||||||
the :mod:`copy` module.
|
defined in the :mod:`copy` module.
|
||||||
|
|
||||||
|
.. [#] The limitation on alphanumeric characters is due to the fact the
|
||||||
|
persistent IDs, in protocol 0, are delimited by the newline character.
|
||||||
|
Therefore if any kind of newline characters, such as \r and \n, occurs in
|
||||||
|
persistent IDs, the resulting pickle will become unreadable.
|
||||||
|
|
Loading…
Reference in New Issue