Patch #553171: Add writeback parameter. Also add protocol parameter.

This commit is contained in:
Martin v. Löwis 2003-04-19 20:59:03 +00:00
parent cf615b5275
commit 153c9e493e
4 changed files with 193 additions and 50 deletions

View File

@ -13,15 +13,30 @@ instances, recursive data types, and objects containing lots of shared
sub-objects. The keys are ordinary strings.
\refstmodindex{pickle}
\begin{funcdesc}{open}{filename\optional{,flag='c'\optional{,binary=\code{False}}}}
\begin{funcdesc}{open}{filename\optional{,flag='c'\optional{,protocol=\code{None}\optional{,writeback=\code{False}\optional{,binary=\code{None}}}}}}
Open a persistent dictionary. The filename specified is the base filename
for the underlying database. As a side-effect, an extension may be added to
the filename and more than one file may be created. By default, the
underlying database file is opened for reading and writing. The optional
{}\var{flag} pararameter has the same interpretation as the \var{flag}
parameter of \function{anydbm.open}. By default, ASCII pickles are used to
serialize values. If the optional \var{binary} parameter is set to
{}\var{True}, binary pickles will be used instead.
parameter of \function{anydbm.open}.
By default, version 0 pickles are used to serialize values.
The version of the pickle protocol can be specified with the
\var{protocol} parameter. \versionchanged[The \var{protocol}
parameter was added. The \var{binary} parameter is deprecated
and provided for backwards compatibility only]{2.3}
By default, mutations to persistent-dictionary mutable entries are not
automatically written back. If the optional \var{writeback} parameter
is set to {}\var{True}, all entries accessed are cached in memory, and
written back at close time; this can make it handier to mutate mutable
entries in the persistent dictionary, but, if many entries are
accessed, it can consume vast amounts of memory for the cache, and it
can make the close operation very slow since all accessed entries are
written back (there is no way to determine which accessed entries are
mutable, nor which ones were actually mutated).
\end{funcdesc}
Shelve objects support all methods supported by dictionaries. This eases
@ -61,33 +76,47 @@ requires knowledge about the database implementation used.
\end{itemize}
\begin{classdesc}{Shelf}{dict\optional{, binary=False}}
\begin{classdesc}{Shelf}{dict\optional{, protocol=None\optional{, writeback=False\optional{, binary=None}}}}
A subclass of \class{UserDict.DictMixin} which stores pickled values in the
\var{dict} object. If the \var{binary} parameter is \code{True}, binary
pickles will be used. This can provide much more compact storage than plain
text pickles, depending on the nature of the objects stored in the database.
\var{dict} object.
By default, version 0 pickles are used to serialize values. The
version of the pickle protocol can be specified with the
\var{protocol} parameter. See the \module{pickle} documentation for a
discussion of the pickle protocols. \versionchanged[The \var{protocol}
parameter was added. The \var{binary} parameter is deprecated and
provided for backwards compatibility only]{2.3}
If the \var{writeback} parameter is \code{True}, the object will hold a
cache of all entries accessed and write them back to the \var{dict} at
sync and close times. This allows natural operations on mutable entries,
but can consume much more memory and make sync and close take a long time.
\end{classdesc}
\begin{classdesc}{BsdDbShelf}{dict\optional{, binary=False}}
A subclass of \class{Shelf} which exposes \method{first}, \method{next},
\method{previous}, \method{last} and \method{set_location} which are
available in the \module{bsddb} module but not in other database modules.
The \var{dict} object passed to the constructor must support those methods.
This is generally accomplished by calling one of \function{bsddb.hashopen},
\begin{classdesc}{BsdDbShelf}{dict\optional{, protocol=None\optional{, writeback=False\optional{, binary=None}}}}
A subclass of \class{Shelf} which exposes \method{first},
\method{next}, \method{previous}, \method{last} and
\method{set_location} which are available in the \module{bsddb} module
but not in other database modules. The \var{dict} object passed to
the constructor must support those methods. This is generally
accomplished by calling one of \function{bsddb.hashopen},
\function{bsddb.btopen} or \function{bsddb.rnopen}. The optional
\var{binary} parameter has the same interpretation as for the \class{Shelf}
class.
\var{protocol}, \var{writeback}, and \var{binary} parameters have the
same interpretation as for the \class{Shelf} class.
\end{classdesc}
\begin{classdesc}{DbfilenameShelf}{filename\optional{, flag='c'\optional{, binary=False}}}
\begin{classdesc}{DbfilenameShelf}{filename\optional{, flag='c'\optional{, protocol=None\optional{, writeback=False\optional{, binary=None}}}}}
A subclass of \class{Shelf} which accepts a \var{filename} instead of
a dict-like object. The underlying file will be opened using
{}\function{anydbm.open}. By default, the file will be created and
opened for both read and write. The optional \var{flag} parameter has
the same interpretation as for the \function{open} function. The
optional \var{protocol}, \var{writeback}, and \var{binary} parameters
have the same interpretation as for the \class{Shelf} class.
A subclass of \class{Shelf} which accepts a \var{filename} instead of a
dict-like object. The underlying file will be opened using
{}\function{anydbm.open}. By default, the file will be created and opened
for both read and write. The optional \var{flag} parameter has the same
interpretation as for the \function{open} function. The optional
\var{binary} parameter has the same interpretation as for the
{}\class{Shelf} class.
\end{classdesc}
\subsection{Example}
@ -103,13 +132,24 @@ d = shelve.open(filename) # open -- file may get suffix added by low-level
d[key] = data # store data at key (overwrites old data if
# using an existing key)
data = d[key] # retrieve data at key (raise KeyError if no
data = d[key] # retrieve a COPY of data at key (raise KeyError if no
# such key)
del d[key] # delete data stored at key (raises KeyError
# if no such key)
flag = d.has_key(key) # true if the key exists
list = d.keys() # a list of all existing keys (slow!)
# as d was opened WITHOUT writeback=True, beware:
d['xx'] = range(4) # this works as expected, but...
d['xx'].append(5) # *this doesn't!* -- d['xx'] is STILL range(4)!!!
# having opened d without writeback=True, you need to code carefully:
temp = d['xx'] # extracts the copy
temp.append(5) # mutates the copy
d['xx'] = temp # stores the copy right back, to persist it
# or, d=shelve.open(filename,writeback=True) would let you just code
# d['xx'].append(5) and have it work as expected, BUT it would also
# consume more memory and make the d.close() operation slower.
d.close() # close it
\end{verbatim}

View File

@ -15,8 +15,9 @@ object):
d[key] = data # store data at key (overwrites old data if
# using an existing key)
data = d[key] # retrieve data at key (raise KeyError if no
# such key)
data = d[key] # retrieve a COPY of the data at key (raise
# KeyError if no such key) -- NOTE that this
# access returns a *copy* of the entry!
del d[key] # delete data stored at key (raises KeyError
# if no such key)
flag = d.has_key(key) # true if the key exists; same as "key in d"
@ -26,6 +27,33 @@ object):
Dependent on the implementation, closing a persistent dictionary may
or may not be necessary to flush changes to disk.
Normally, d[key] returns a COPY of the entry. This needs care when
mutable entries are mutated: for example, if d[key] is a list,
d[key].append(anitem)
does NOT modify the entry d[key] itself, as stored in the persistent
mapping -- it only modifies the copy, which is then immediately
discarded, so that the append has NO effect whatsoever. To append an
item to d[key] in a way that will affect the persistent mapping, use:
data = d[key]
data.append(anitem)
d[key] = data
To avoid the problem with mutable entries, you may pass the keyword
argument writeback=True in the call to shelve.open. When you use:
d = shelve.open(filename, writeback=True)
then d keeps a cache of all entries you access, and writes them all back
to the persistent mapping when you call d.close(). This ensures that
such usage as d[key].append(anitem) works as intended.
However, using keyword argument writeback=True may consume vast amount
of memory for the cache, and it may make d.close() very slow, if you
access many of d's entries after opening it in this way: d has no way to
check which of the entries you access are mutable and/or which ones you
actually mutate, so it must cache, and write back at close, all of the
entries that you access. You can call d.sync() to write back all the
entries in the cache, and empty the cache (d.sync() also synchronizes
the persistent dictionary on disk, if feasible).
"""
# Try using cPickle and cStringIO if available.
@ -41,6 +69,7 @@ except ImportError:
from StringIO import StringIO
import UserDict
import warnings
__all__ = ["Shelf","BsdDbShelf","DbfilenameShelf","open"]
@ -51,9 +80,19 @@ class Shelf(UserDict.DictMixin):
See the module's __doc__ string for an overview of the interface.
"""
def __init__(self, dict, binary=False):
def __init__(self, dict, protocol=None, writeback=False, binary=None):
self.dict = dict
self._binary = binary
if protocol is not None and binary is not None:
raise ValueError, "can't specify both 'protocol' and 'binary'"
if binary is not None:
warnings.warn("The 'binary' argument to Shelf() is deprecated",
PendingDeprecationWarning)
protocol = int(binary)
if protocol is None:
protocol = 0
self._protocol = protocol
self.writeback = writeback
self.cache = {}
def keys(self):
return self.dict.keys()
@ -73,19 +112,32 @@ class Shelf(UserDict.DictMixin):
return default
def __getitem__(self, key):
try:
value = self.cache[key]
except KeyError:
f = StringIO(self.dict[key])
return Unpickler(f).load()
value = Unpickler(f).load()
if self.writeback:
self.cache[key] = value
return value
def __setitem__(self, key, value):
if self.writeback:
self.cache[key] = value
f = StringIO()
p = Pickler(f, self._binary)
p = Pickler(f, self._protocol)
p.dump(value)
self.dict[key] = f.getvalue()
def __delitem__(self, key):
del self.dict[key]
try:
del self.cache[key]
except KeyError:
pass
def close(self):
self.sync()
try:
self.dict.close()
except:
@ -96,6 +148,12 @@ class Shelf(UserDict.DictMixin):
self.close()
def sync(self):
if self.writeback and self.cache:
self.writeback = False
for key, entry in self.cache.iteritems():
self[key] = entry
self.writeback = True
self.cache = {}
if hasattr(self.dict, 'sync'):
self.dict.sync()
@ -113,8 +171,8 @@ class BsdDbShelf(Shelf):
See the module's __doc__ string for an overview of the interface.
"""
def __init__(self, dict, binary=False):
Shelf.__init__(self, dict, binary)
def __init__(self, dict, protocol=None, writeback=False, binary=None):
Shelf.__init__(self, dict, protocol, writeback, binary)
def set_location(self, key):
(key, value) = self.dict.set_location(key)
@ -149,22 +207,25 @@ class DbfilenameShelf(Shelf):
See the module's __doc__ string for an overview of the interface.
"""
def __init__(self, filename, flag='c', binary=False):
def __init__(self, filename, flag='c', protocol=None, writeback=False, binary=None):
import anydbm
Shelf.__init__(self, anydbm.open(filename, flag), binary)
Shelf.__init__(self, anydbm.open(filename, flag), protocol, writeback, binary)
def open(filename, flag='c', binary=False):
def open(filename, flag='c', protocol=None, writeback=False, binary=None):
"""Open a persistent dictionary for reading and writing.
The filename parameter is the base filename for the underlying database.
As a side-effect, an extension may be added to the filename and more
than one file may be created. The optional flag parameter has the
same interpretation as the flag parameter of anydbm.open(). The
optional binary parameter may be set to True to force the use of binary
pickles for serializing data values.
The filename parameter is the base filename for the underlying
database. As a side-effect, an extension may be added to the
filename and more than one file may be created. The optional flag
parameter has the same interpretation as the flag parameter of
anydbm.open(). The optional protocol parameter specifies the
version of the pickle protocol (0, 1, or 2).
The optional binary parameter is deprecated and may be set to True
to force the use of binary pickles for serializing data values.
See the module's __doc__ string for an overview of the interface.
"""
return DbfilenameShelf(filename, flag, binary)
return DbfilenameShelf(filename, flag, binary, writeback)

View File

@ -28,6 +28,16 @@ class TestCase(unittest.TestCase):
for f in glob.glob(self.fn+"*"):
os.unlink(f)
def test_proto2_file_shelf(self):
try:
s = shelve.open(self.fn, protocol=2)
s['key1'] = (1,2,3,4)
self.assertEqual(s['key1'], (1,2,3,4))
s.close()
finally:
for f in glob.glob(self.fn+"*"):
os.unlink(f)
def test_in_memory_shelf(self):
d1 = {}
s = shelve.Shelf(d1, binary=False)
@ -43,6 +53,27 @@ class TestCase(unittest.TestCase):
self.assertEqual(len(d1), 1)
self.assertNotEqual(d1, d2)
def test_mutable_entry(self):
d1 = {}
s = shelve.Shelf(d1, protocol=2, writeback=False)
s['key1'] = [1,2,3,4]
self.assertEqual(s['key1'], [1,2,3,4])
s['key1'].append(5)
self.assertEqual(s['key1'], [1,2,3,4])
s.close()
d2 = {}
s = shelve.Shelf(d2, protocol=2, writeback=True)
s['key1'] = [1,2,3,4]
self.assertEqual(s['key1'], [1,2,3,4])
s['key1'].append(5)
self.assertEqual(s['key1'], [1,2,3,4,5])
s.close()
self.assertEqual(len(d1), 1)
self.assertEqual(len(d2), 1)
from test_userdict import TestMappingProtocol
class TestShelveBase(TestMappingProtocol):
@ -56,10 +87,10 @@ class TestShelveBase(TestMappingProtocol):
return {"key1":"value1", "key2":2, "key3":(1,2,3)}
def _empty_mapping(self):
if self._in_mem:
x= shelve.Shelf({}, binary = self._binary)
x= shelve.Shelf({}, **self._args)
else:
self.counter+=1
x= shelve.open(self.fn+str(self.counter), binary=self._binary)
x= shelve.open(self.fn+str(self.counter), **self._args)
self._db.append(x)
return x
def tearDown(self):
@ -71,24 +102,32 @@ class TestShelveBase(TestMappingProtocol):
os.unlink(f)
class TestAsciiFileShelve(TestShelveBase):
_binary = False
_args={'binary':False}
_in_mem = False
class TestBinaryFileShelve(TestShelveBase):
_binary = True
_args={'binary':True}
_in_mem = False
class TestProto2FileShelve(TestShelveBase):
_args={'protocol':2}
_in_mem = False
class TestAsciiMemShelve(TestShelveBase):
_binary = False
_args={'binary':False}
_in_mem = True
class TestBinaryMemShelve(TestShelveBase):
_binary = True
_args={'binary':True}
_in_mem = True
class TestProto2MemShelve(TestShelveBase):
_args={'protocol':2}
_in_mem = True
def test_main():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestAsciiFileShelve))
suite.addTest(unittest.makeSuite(TestBinaryFileShelve))
suite.addTest(unittest.makeSuite(TestProto2FileShelve))
suite.addTest(unittest.makeSuite(TestAsciiMemShelve))
suite.addTest(unittest.makeSuite(TestBinaryMemShelve))
suite.addTest(unittest.makeSuite(TestProto2MemShelve))
suite.addTest(unittest.makeSuite(TestCase))
test_support.run_suite(suite)

View File

@ -123,6 +123,9 @@ Extension modules
Library
-------
- shelve now supports the optional writeback argument, and exposes
pickle protocol versions.
- Several methods of nntplib.NNTP have grown an optional file argument
which specifies a file where to divert the command's output
(already supported by the body() method). (SF patch #720468)