1995-08-10 16:29:28 -03:00
|
|
|
"""A dumb and slow but simple dbm clone.
|
|
|
|
|
|
|
|
For database spam, spam.dir contains the index (a text file),
|
|
|
|
spam.bak *may* contain a backup of the index (also a text file),
|
|
|
|
while spam.dat contains the data (a binary file).
|
|
|
|
|
|
|
|
XXX TO DO:
|
|
|
|
|
|
|
|
- seems to contain a bug when updating...
|
|
|
|
|
|
|
|
- reclaim free space (currently, space once occupied by deleted or expanded
|
|
|
|
items is never reused)
|
|
|
|
|
|
|
|
- support concurrent access (currently, if two processes take turns making
|
|
|
|
updates, they can mess up the index)
|
|
|
|
|
|
|
|
- support efficient access to large databases (currently, the whole index
|
|
|
|
is read when the database is opened, and some updates rewrite the whole index)
|
|
|
|
|
|
|
|
- support opening for read-only (flag = 'm')
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
2001-07-19 07:06:39 -03:00
|
|
|
import os as _os
|
1995-08-10 16:29:28 -03:00
|
|
|
import __builtin__
|
2003-06-28 04:08:39 -03:00
|
|
|
import UserDict
|
1995-08-10 16:29:28 -03:00
|
|
|
|
|
|
|
_open = __builtin__.open
|
|
|
|
|
|
|
|
_BLOCKSIZE = 512
|
|
|
|
|
2001-01-14 19:36:06 -04:00
|
|
|
error = IOError # For anydbm
|
1996-05-28 19:58:40 -03:00
|
|
|
|
2003-06-28 04:08:39 -03:00
|
|
|
class _Database(UserDict.DictMixin):
|
1995-08-10 16:29:28 -03:00
|
|
|
|
2003-07-12 17:11:25 -03:00
|
|
|
def __init__(self, filebasename, mode):
|
2001-12-07 17:54:46 -04:00
|
|
|
self._mode = mode
|
2003-07-12 17:11:25 -03:00
|
|
|
|
|
|
|
# The directory file is a text file. Each line looks like
|
|
|
|
# "%r, (%d, %d)\n" % (key, pos, siz)
|
|
|
|
# where key is the string key, pos is the offset into the dat
|
|
|
|
# file of the associated value's first byte, and siz is the number
|
|
|
|
# of bytes in the associated value.
|
|
|
|
self._dirfile = filebasename + _os.extsep + 'dir'
|
|
|
|
|
|
|
|
# The data file is a binary file pointed into by the directory
|
|
|
|
# file, and holds the values associated with keys. Each value
|
|
|
|
# begins at a _BLOCKSIZE-aligned byte offset, and is a raw
|
|
|
|
# binary 8-bit string value.
|
|
|
|
self._datfile = filebasename + _os.extsep + 'dat'
|
|
|
|
self._bakfile = filebasename + _os.extsep + 'bak'
|
|
|
|
|
|
|
|
# The index is an in-memory dict, mirroring the directory file.
|
|
|
|
self._index = None # maps keys to (pos, siz) pairs
|
|
|
|
|
2001-01-14 19:36:06 -04:00
|
|
|
# Mod by Jack: create data file if needed
|
|
|
|
try:
|
|
|
|
f = _open(self._datfile, 'r')
|
|
|
|
except IOError:
|
2001-12-07 17:54:46 -04:00
|
|
|
f = _open(self._datfile, 'w', self._mode)
|
2001-01-14 19:36:06 -04:00
|
|
|
f.close()
|
|
|
|
self._update()
|
|
|
|
|
2003-07-12 17:11:25 -03:00
|
|
|
# Read directory file into the in-memory index dict.
|
2001-01-14 19:36:06 -04:00
|
|
|
def _update(self):
|
|
|
|
self._index = {}
|
|
|
|
try:
|
|
|
|
f = _open(self._dirfile)
|
|
|
|
except IOError:
|
|
|
|
pass
|
|
|
|
else:
|
2003-07-12 17:11:25 -03:00
|
|
|
for line in f:
|
|
|
|
key, pos_and_siz_pair = eval(line)
|
|
|
|
self._index[key] = pos_and_siz_pair
|
2001-01-14 19:36:06 -04:00
|
|
|
f.close()
|
|
|
|
|
2003-07-12 17:11:25 -03:00
|
|
|
# Write the index dict to the directory file. The original directory
|
|
|
|
# file (if any) is renamed with a .bak extension first. If a .bak
|
|
|
|
# file currently exists, it's deleted.
|
2001-01-14 19:36:06 -04:00
|
|
|
def _commit(self):
|
2003-07-12 17:11:25 -03:00
|
|
|
try:
|
|
|
|
_os.unlink(self._bakfile)
|
|
|
|
except _os.error:
|
|
|
|
pass
|
|
|
|
|
|
|
|
try:
|
|
|
|
_os.rename(self._dirfile, self._bakfile)
|
|
|
|
except _os.error:
|
|
|
|
pass
|
|
|
|
|
2001-12-07 17:54:46 -04:00
|
|
|
f = _open(self._dirfile, 'w', self._mode)
|
2001-01-14 19:36:06 -04:00
|
|
|
for key, (pos, siz) in self._index.items():
|
2003-07-12 17:11:25 -03:00
|
|
|
f.write("%r, (%d, %d)\n" % (key, pos, siz))
|
2001-01-14 19:36:06 -04:00
|
|
|
f.close()
|
|
|
|
|
|
|
|
def __getitem__(self, key):
|
|
|
|
pos, siz = self._index[key] # may raise KeyError
|
|
|
|
f = _open(self._datfile, 'rb')
|
|
|
|
f.seek(pos)
|
|
|
|
dat = f.read(siz)
|
|
|
|
f.close()
|
|
|
|
return dat
|
|
|
|
|
2003-07-12 17:11:25 -03:00
|
|
|
# Append val to the data file, starting at a _BLOCKSIZE-aligned
|
|
|
|
# offset. The data file is first padded with NUL bytes (if needed)
|
|
|
|
# to get to an aligned offset. Return pair
|
|
|
|
# (starting offset of val, len(val))
|
2001-01-14 19:36:06 -04:00
|
|
|
def _addval(self, val):
|
|
|
|
f = _open(self._datfile, 'rb+')
|
|
|
|
f.seek(0, 2)
|
|
|
|
pos = int(f.tell())
|
2001-09-04 16:14:14 -03:00
|
|
|
npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
|
2001-01-14 19:36:06 -04:00
|
|
|
f.write('\0'*(npos-pos))
|
|
|
|
pos = npos
|
|
|
|
f.write(val)
|
|
|
|
f.close()
|
|
|
|
return (pos, len(val))
|
|
|
|
|
2003-07-12 17:11:25 -03:00
|
|
|
# Write val to the data file, starting at offset pos. The caller
|
|
|
|
# is responsible for ensuring that there's enough room starting at
|
|
|
|
# pos to hold val, without overwriting some other value. Return
|
|
|
|
# pair (pos, len(val)).
|
2001-01-14 19:36:06 -04:00
|
|
|
def _setval(self, pos, val):
|
|
|
|
f = _open(self._datfile, 'rb+')
|
|
|
|
f.seek(pos)
|
|
|
|
f.write(val)
|
|
|
|
f.close()
|
|
|
|
return (pos, len(val))
|
|
|
|
|
2003-07-12 17:11:25 -03:00
|
|
|
# key is a new key whose associated value starts in the data file
|
|
|
|
# at offset pos and with length size. Add an index record to
|
|
|
|
# the in-memory index dict, and append one to the index file.
|
|
|
|
def _addkey(self, key, pos_and_siz_pair):
|
|
|
|
self._index[key] = pos_and_siz_pair
|
2001-12-07 17:54:46 -04:00
|
|
|
f = _open(self._dirfile, 'a', self._mode)
|
2003-07-12 17:11:25 -03:00
|
|
|
f.write("%r, %r\n" % (key, pos_and_siz_pair))
|
2001-01-14 19:36:06 -04:00
|
|
|
f.close()
|
|
|
|
|
|
|
|
def __setitem__(self, key, val):
|
|
|
|
if not type(key) == type('') == type(val):
|
|
|
|
raise TypeError, "keys and values must be strings"
|
2003-07-12 17:11:25 -03:00
|
|
|
if key not in self._index:
|
|
|
|
self._addkey(key, self._addval(val))
|
2001-01-14 19:36:06 -04:00
|
|
|
else:
|
2003-07-12 17:11:25 -03:00
|
|
|
# See whether the new value is small enough to fit in the
|
|
|
|
# (padded) space currently occupied by the old value.
|
2001-01-14 19:36:06 -04:00
|
|
|
pos, siz = self._index[key]
|
2003-07-11 01:09:55 -03:00
|
|
|
oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
|
|
|
|
newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
|
2001-01-14 19:36:06 -04:00
|
|
|
if newblocks <= oldblocks:
|
2003-07-12 17:11:25 -03:00
|
|
|
self._index[key] = self._setval(pos, val)
|
2001-01-14 19:36:06 -04:00
|
|
|
else:
|
2003-07-12 17:11:25 -03:00
|
|
|
# The new value doesn't fit in the (padded) space used
|
|
|
|
# by the old value. The blocks used by the old value are
|
|
|
|
# forever lost.
|
|
|
|
self._index[key] = self._addval(val)
|
|
|
|
|
|
|
|
# Note that _index may be out of synch with the directory
|
|
|
|
# file now: _setval() and _addval() don't update the directory
|
|
|
|
# file.
|
2001-01-14 19:36:06 -04:00
|
|
|
|
|
|
|
def __delitem__(self, key):
|
2003-07-12 17:11:25 -03:00
|
|
|
# The blocks used by the associated value are lost.
|
2001-01-14 19:36:06 -04:00
|
|
|
del self._index[key]
|
2003-07-12 17:11:25 -03:00
|
|
|
# XXX It's unclear why we do a _commit() here (the code always
|
|
|
|
# XXX has, so I'm not changing it). _setitem__ doesn't try to
|
|
|
|
# XXX keep the directory file in synch. Why should we? Or
|
|
|
|
# XXX why shouldn't __setitem__?
|
2001-01-14 19:36:06 -04:00
|
|
|
self._commit()
|
|
|
|
|
|
|
|
def keys(self):
|
|
|
|
return self._index.keys()
|
|
|
|
|
|
|
|
def has_key(self, key):
|
2002-06-01 11:25:41 -03:00
|
|
|
return key in self._index
|
2001-01-14 19:36:06 -04:00
|
|
|
|
2001-05-03 01:55:47 -03:00
|
|
|
def __contains__(self, key):
|
2002-06-01 11:25:41 -03:00
|
|
|
return key in self._index
|
2001-05-03 01:55:47 -03:00
|
|
|
|
|
|
|
def iterkeys(self):
|
|
|
|
return self._index.iterkeys()
|
|
|
|
__iter__ = iterkeys
|
|
|
|
|
2001-01-14 19:36:06 -04:00
|
|
|
def __len__(self):
|
|
|
|
return len(self._index)
|
|
|
|
|
|
|
|
def close(self):
|
2001-12-21 01:13:37 -04:00
|
|
|
self._commit()
|
2001-01-14 19:36:06 -04:00
|
|
|
self._index = None
|
|
|
|
self._datfile = self._dirfile = self._bakfile = None
|
1995-08-10 16:29:28 -03:00
|
|
|
|
2001-12-21 01:13:37 -04:00
|
|
|
def __del__(self):
|
|
|
|
if self._index is not None:
|
|
|
|
self._commit()
|
2002-02-16 03:34:19 -04:00
|
|
|
|
2001-12-21 01:13:37 -04:00
|
|
|
|
1995-08-10 16:29:28 -03:00
|
|
|
|
2001-12-07 17:54:46 -04:00
|
|
|
def open(file, flag=None, mode=0666):
|
2002-05-29 13:18:42 -03:00
|
|
|
"""Open the database file, filename, and return corresponding object.
|
|
|
|
|
|
|
|
The flag argument, used to control how the database is opened in the
|
|
|
|
other DBM implementations, is ignored in the dumbdbm module; the
|
|
|
|
database is always opened for update, and will be created if it does
|
|
|
|
not exist.
|
|
|
|
|
|
|
|
The optional mode argument is the UNIX mode of the file, used only when
|
|
|
|
the database has to be created. It defaults to octal code 0666 (and
|
|
|
|
will be modified by the prevailing umask).
|
|
|
|
|
|
|
|
"""
|
2003-03-01 18:58:00 -04:00
|
|
|
# flag argument is currently ignored
|
2001-12-07 17:54:46 -04:00
|
|
|
return _Database(file, mode)
|