mirror of https://github.com/python/cpython
635 lines
23 KiB
Python
635 lines
23 KiB
Python
#-----------------------------------------------------------------------
|
|
#
|
|
# Copyright (C) 2000, 2001 by Autonomous Zone Industries
|
|
# Copyright (C) 2002 Gregory P. Smith
|
|
#
|
|
# License: This is free software. You may use this software for any
|
|
# purpose including modification/redistribution, so long as
|
|
# this header remains intact and that you do not claim any
|
|
# rights of ownership or authorship of this software. This
|
|
# software has been tested, but no warranty is expressed or
|
|
# implied.
|
|
#
|
|
# -- Gregory P. Smith <greg@electricrain.com>
|
|
|
|
# This provides a simple database table interface built on top of
|
|
# the Python BerkeleyDB 3 interface.
|
|
#
|
|
_cvsid = '$Id$'
|
|
|
|
import string
|
|
import sys
|
|
try:
|
|
import cPickle
|
|
pickle = cPickle
|
|
except ImportError:
|
|
import pickle
|
|
import whrandom
|
|
import xdrlib
|
|
import re
|
|
import copy
|
|
|
|
from bsddb.db import *
|
|
|
|
|
|
class TableDBError(StandardError): pass
|
|
class TableAlreadyExists(TableDBError): pass
|
|
|
|
|
|
class Cond:
|
|
"""This condition matches everything"""
|
|
def __call__(self, s):
|
|
return 1
|
|
|
|
class ExactCond(Cond):
|
|
"""Acts as an exact match condition function"""
|
|
def __init__(self, strtomatch):
|
|
self.strtomatch = strtomatch
|
|
def __call__(self, s):
|
|
return s == self.strtomatch
|
|
|
|
class PrefixCond(Cond):
|
|
"""Acts as a condition function for matching a string prefix"""
|
|
def __init__(self, prefix):
|
|
self.prefix = prefix
|
|
def __call__(self, s):
|
|
return s[:len(self.prefix)] == self.prefix
|
|
|
|
class PostfixCond(Cond):
|
|
"""Acts as a condition function for matching a string postfix"""
|
|
def __init__(self, postfix):
|
|
self.postfix = postfix
|
|
def __call__(self, s):
|
|
return s[-len(self.postfix):] == self.postfix
|
|
|
|
class LikeCond(Cond):
|
|
"""
|
|
Acts as a function that will match using an SQL 'LIKE' style
|
|
string. Case insensitive and % signs are wild cards.
|
|
This isn't perfect but it should work for the simple common cases.
|
|
"""
|
|
def __init__(self, likestr, re_flags=re.IGNORECASE):
|
|
# escape python re characters
|
|
chars_to_escape = '.*+()[]?'
|
|
for char in chars_to_escape :
|
|
likestr = string.replace(likestr, char, '\\'+char)
|
|
# convert %s to wildcards
|
|
self.likestr = string.replace(likestr, '%', '.*')
|
|
self.re = re.compile('^'+self.likestr+'$', re_flags)
|
|
def __call__(self, s):
|
|
return self.re.match(s)
|
|
|
|
#
|
|
# keys used to store database metadata
|
|
#
|
|
_table_names_key = '__TABLE_NAMES__' # list of the tables in this db
|
|
_columns = '._COLUMNS__' # table_name+this key contains a list of columns
|
|
def _columns_key(table) : return table + _columns
|
|
|
|
#
|
|
# these keys are found within table sub databases
|
|
#
|
|
_data = '._DATA_.' # this+column+this+rowid key contains table data
|
|
_rowid = '._ROWID_.' # this+rowid+this key contains a unique entry for each
|
|
# row in the table. (no data is stored)
|
|
_rowid_str_len = 8 # length in bytes of the unique rowid strings
|
|
def _data_key(table, col, rowid) : return table + _data + col + _data + rowid
|
|
def _search_col_data_key(table, col) : return table + _data + col + _data
|
|
def _search_all_data_key(table) : return table + _data
|
|
def _rowid_key(table, rowid) : return table + _rowid + rowid + _rowid
|
|
def _search_rowid_key(table) : return table + _rowid
|
|
|
|
def contains_metastrings(s) :
|
|
"""Verify that the given string does not contain any
|
|
metadata strings that might interfere with dbtables database operation.
|
|
"""
|
|
if string.find(s, _table_names_key) >= 0 or \
|
|
string.find(s, _columns) >= 0 or \
|
|
string.find(s, _data) >= 0 or \
|
|
string.find(s, _rowid) >= 0 :
|
|
return 1
|
|
else :
|
|
return 0
|
|
|
|
|
|
class bsdTableDB :
|
|
|
|
# Save close() from bombing out if __init__() failed
|
|
db = None
|
|
env = None
|
|
|
|
def __init__(self, filename, dbhome, create=0, truncate=0, mode=0600, recover=0, dbflags=0) :
|
|
"""bsdTableDB.open(filename, dbhome, create=0, truncate=0, mode=0600)
|
|
Open database name in the dbhome BerkeleyDB directory.
|
|
Use keyword arguments when calling this constructor.
|
|
"""
|
|
myflags = DB_THREAD
|
|
if create :
|
|
myflags = myflags | DB_CREATE
|
|
flagsforenv = DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_TXN | dbflags
|
|
if recover :
|
|
flagsforenv = flagsforenv | DB_RECOVER
|
|
self.env = DBEnv()
|
|
self.env.set_lk_detect(DB_LOCK_DEFAULT) # enable auto deadlock avoidance
|
|
self.env.open(dbhome, myflags | flagsforenv)
|
|
if truncate :
|
|
myflags = myflags | DB_TRUNCATE
|
|
self.db = DB(self.env)
|
|
self.db.set_flags(DB_DUP) # allow duplicate entries [warning: be careful w/ metadata]
|
|
self.db.open(filename, DB_BTREE, myflags, mode)
|
|
|
|
self.dbfilename = filename
|
|
|
|
# Initialize the table names list if this is a new database
|
|
if not self.db.has_key(_table_names_key) :
|
|
self.db.put(_table_names_key, pickle.dumps([], 1))
|
|
|
|
# TODO verify more of the database's metadata?
|
|
|
|
self.__tablecolumns = {}
|
|
|
|
def __del__(self):
|
|
self.close()
|
|
|
|
def close(self):
|
|
if self.db is not None:
|
|
self.db.close()
|
|
self.db = None
|
|
if self.env is not None:
|
|
self.env.close()
|
|
self.env = None
|
|
|
|
def checkpoint(self, mins=0):
|
|
try:
|
|
self.env.txn_checkpoint(mins)
|
|
except DBIncompleteError:
|
|
pass
|
|
|
|
def sync(self):
|
|
try:
|
|
self.db.sync()
|
|
except DBIncompleteError:
|
|
pass
|
|
|
|
def _db_print(self) :
|
|
"""Print the database to stdout for debugging"""
|
|
print "******** Printing raw database for debugging ********"
|
|
cur = self.db.cursor()
|
|
try:
|
|
key, data = cur.first()
|
|
while 1 :
|
|
print `{key: data}`
|
|
next = cur.next()
|
|
if next:
|
|
key, data = next
|
|
else:
|
|
cur.close()
|
|
return
|
|
except DBNotFoundError:
|
|
cur.close()
|
|
|
|
|
|
def CreateTable(self, table, columns) :
|
|
"""CreateTable(table, columns) - Create a new table in the database
|
|
raises TableDBError if it already exists or for other DB errors.
|
|
"""
|
|
assert type(columns) == type([])
|
|
txn = None
|
|
try:
|
|
# checking sanity of the table and column names here on
|
|
# table creation will prevent problems elsewhere.
|
|
if contains_metastrings(table) :
|
|
raise ValueError, "bad table name: contains reserved metastrings"
|
|
for column in columns :
|
|
if contains_metastrings(column) :
|
|
raise ValueError, "bad column name: contains reserved metastrings"
|
|
|
|
columnlist_key = _columns_key(table)
|
|
if self.db.has_key(columnlist_key) :
|
|
raise TableAlreadyExists, "table already exists"
|
|
|
|
txn = self.env.txn_begin()
|
|
# store the table's column info
|
|
self.db.put(columnlist_key, pickle.dumps(columns, 1), txn=txn)
|
|
|
|
# add the table name to the tablelist
|
|
tablelist = pickle.loads(self.db.get(_table_names_key, txn=txn, flags=DB_RMW))
|
|
tablelist.append(table)
|
|
self.db.delete(_table_names_key, txn) # delete 1st, incase we opened with DB_DUP
|
|
self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn)
|
|
|
|
txn.commit()
|
|
txn = None
|
|
|
|
except DBError, dberror:
|
|
if txn :
|
|
txn.abort()
|
|
raise TableDBError, dberror[1]
|
|
|
|
|
|
def ListTableColumns(self, table):
|
|
"""Return a list of columns in the given table. [] if the table doesn't exist.
|
|
"""
|
|
assert type(table) == type('')
|
|
if contains_metastrings(table) :
|
|
raise ValueError, "bad table name: contains reserved metastrings"
|
|
|
|
columnlist_key = _columns_key(table)
|
|
if not self.db.has_key(columnlist_key):
|
|
return []
|
|
pickledcolumnlist = self.db.get(columnlist_key)
|
|
if pickledcolumnlist:
|
|
return pickle.loads(pickledcolumnlist)
|
|
else:
|
|
return []
|
|
|
|
def ListTables(self):
|
|
"""Return a list of tables in this database."""
|
|
pickledtablelist = self.db.get(_table_names_key)
|
|
if pickledtablelist:
|
|
return pickle.loads(pickledtablelist)
|
|
else:
|
|
return []
|
|
|
|
def CreateOrExtendTable(self, table, columns):
|
|
"""CreateOrExtendTable(table, columns) - Create a new table in the database.
|
|
If a table of this name already exists, extend it to have any
|
|
additional columns present in the given list as well as
|
|
all of its current columns.
|
|
"""
|
|
assert type(columns) == type([])
|
|
try:
|
|
self.CreateTable(table, columns)
|
|
except TableAlreadyExists:
|
|
# the table already existed, add any new columns
|
|
txn = None
|
|
try:
|
|
columnlist_key = _columns_key(table)
|
|
txn = self.env.txn_begin()
|
|
|
|
# load the current column list
|
|
oldcolumnlist = pickle.loads(self.db.get(columnlist_key, txn=txn, flags=DB_RMW))
|
|
# create a hash table for fast lookups of column names in the loop below
|
|
oldcolumnhash = {}
|
|
for c in oldcolumnlist:
|
|
oldcolumnhash[c] = c
|
|
|
|
# create a new column list containing both the old and new column names
|
|
newcolumnlist = copy.copy(oldcolumnlist)
|
|
for c in columns:
|
|
if not oldcolumnhash.has_key(c):
|
|
newcolumnlist.append(c)
|
|
|
|
# store the table's new extended column list
|
|
if newcolumnlist != oldcolumnlist :
|
|
# delete the old one first since we opened with DB_DUP
|
|
self.db.delete(columnlist_key, txn)
|
|
self.db.put(columnlist_key, pickle.dumps(newcolumnlist, 1), txn=txn)
|
|
|
|
txn.commit()
|
|
txn = None
|
|
|
|
self.__load_column_info(table)
|
|
except DBError, dberror:
|
|
if txn:
|
|
txn.abort()
|
|
raise TableDBError, dberror[1]
|
|
|
|
|
|
def __load_column_info(self, table) :
|
|
"""initialize the self.__tablecolumns dict"""
|
|
# check the column names
|
|
try:
|
|
tcolpickles = self.db.get(_columns_key(table))
|
|
except DBNotFoundError:
|
|
raise TableDBError, "unknown table: " + `table`
|
|
if not tcolpickles:
|
|
raise TableDBError, "unknown table: " + `table`
|
|
self.__tablecolumns[table] = pickle.loads(tcolpickles)
|
|
|
|
def __new_rowid(self, table, txn=None) :
|
|
"""Create a new unique row identifier"""
|
|
unique = 0
|
|
while not unique :
|
|
# Generate a random 64-bit row ID string
|
|
# (note: this code has <64 bits of randomness
|
|
# but it's plenty for our database id needs!)
|
|
p = xdrlib.Packer()
|
|
p.pack_int(int(whrandom.random()*2147483647))
|
|
p.pack_int(int(whrandom.random()*2147483647))
|
|
newid = p.get_buffer()
|
|
|
|
# Guarantee uniqueness by adding this key to the database
|
|
try:
|
|
self.db.put(_rowid_key(table, newid), None, txn=txn, flags=DB_NOOVERWRITE)
|
|
except DBKeyExistsError:
|
|
pass
|
|
else:
|
|
unique = 1
|
|
|
|
return newid
|
|
|
|
|
|
def Insert(self, table, rowdict) :
|
|
"""Insert(table, datadict) - Insert a new row into the table
|
|
using the keys+values from rowdict as the column values.
|
|
"""
|
|
txn = None
|
|
try:
|
|
if not self.db.has_key(_columns_key(table)) :
|
|
raise TableDBError, "unknown table"
|
|
|
|
# check the validity of each column name
|
|
if not self.__tablecolumns.has_key(table) :
|
|
self.__load_column_info(table)
|
|
for column in rowdict.keys() :
|
|
if not self.__tablecolumns[table].count(column) :
|
|
raise TableDBError, "unknown column: "+`column`
|
|
|
|
# get a unique row identifier for this row
|
|
rowid = self.__new_rowid(table)
|
|
|
|
txn = self.env.txn_begin()
|
|
|
|
# insert the row values into the table database
|
|
for column, dataitem in rowdict.items() :
|
|
# store the value
|
|
self.db.put(_data_key(table, column, rowid), dataitem, txn=txn)
|
|
|
|
txn.commit()
|
|
txn = None
|
|
|
|
except DBError, dberror:
|
|
if txn :
|
|
txn.abort()
|
|
self.db.delete(_rowid_key(table, rowid))
|
|
raise TableDBError, dberror[1]
|
|
|
|
|
|
def Modify(self, table, conditions={}, mappings={}) :
|
|
"""Modify(table, conditions) - Modify in rows matching 'conditions'
|
|
using mapping functions in 'mappings'
|
|
* conditions is a dictionary keyed on column names
|
|
containing condition functions expecting the data string as an
|
|
argument and returning a boolean.
|
|
* mappings is a dictionary keyed on column names containint condition
|
|
functions expecting the data string as an argument and returning the
|
|
new string for that column.
|
|
"""
|
|
try:
|
|
matching_rowids = self.__Select(table, [], conditions)
|
|
|
|
# modify only requested columns
|
|
columns = mappings.keys()
|
|
for rowid in matching_rowids.keys() :
|
|
txn = None
|
|
try:
|
|
for column in columns :
|
|
txn = self.env.txn_begin()
|
|
# modify the requested column
|
|
try:
|
|
dataitem = self.db.get(_data_key(table, column, rowid), txn)
|
|
self.db.delete(_data_key(table, column, rowid), txn)
|
|
except DBNotFoundError:
|
|
dataitem = None # XXXXXXX row key somehow didn't exist, assume no error
|
|
dataitem = mappings[column](dataitem)
|
|
if dataitem <> None:
|
|
self.db.put(_data_key(table, column, rowid), dataitem, txn=txn)
|
|
txn.commit()
|
|
txn = None
|
|
|
|
except DBError, dberror:
|
|
if txn :
|
|
txn.abort()
|
|
raise
|
|
|
|
except DBError, dberror:
|
|
raise TableDBError, dberror[1]
|
|
|
|
def Delete(self, table, conditions={}) :
|
|
"""Delete(table, conditions) - Delete items matching the given
|
|
conditions from the table.
|
|
* conditions is a dictionary keyed on column names
|
|
containing condition functions expecting the data string as an
|
|
argument and returning a boolean.
|
|
"""
|
|
try:
|
|
matching_rowids = self.__Select(table, [], conditions)
|
|
|
|
# delete row data from all columns
|
|
columns = self.__tablecolumns[table]
|
|
for rowid in matching_rowids.keys() :
|
|
txn = None
|
|
try:
|
|
txn = self.env.txn_begin()
|
|
for column in columns :
|
|
# delete the data key
|
|
try:
|
|
self.db.delete(_data_key(table, column, rowid), txn)
|
|
except DBNotFoundError:
|
|
pass # XXXXXXX column may not exist, assume no error
|
|
|
|
try:
|
|
self.db.delete(_rowid_key(table, rowid), txn)
|
|
except DBNotFoundError:
|
|
pass # XXXXXXX row key somehow didn't exist, assume no error
|
|
txn.commit()
|
|
txn = None
|
|
except DBError, dberror:
|
|
if txn :
|
|
txn.abort()
|
|
raise
|
|
|
|
except DBError, dberror:
|
|
raise TableDBError, dberror[1]
|
|
|
|
|
|
def Select(self, table, columns, conditions={}) :
|
|
"""Select(table, conditions) - retrieve specific row data
|
|
Returns a list of row column->value mapping dictionaries.
|
|
* columns is a list of which column data to return. If
|
|
columns is None, all columns will be returned.
|
|
* conditions is a dictionary keyed on column names
|
|
containing callable conditions expecting the data string as an
|
|
argument and returning a boolean.
|
|
"""
|
|
try:
|
|
if not self.__tablecolumns.has_key(table) :
|
|
self.__load_column_info(table)
|
|
if columns is None :
|
|
columns = self.__tablecolumns[table]
|
|
matching_rowids = self.__Select(table, columns, conditions)
|
|
except DBError, dberror:
|
|
raise TableDBError, dberror[1]
|
|
|
|
# return the matches as a list of dictionaries
|
|
return matching_rowids.values()
|
|
|
|
|
|
def __Select(self, table, columns, conditions) :
|
|
"""__Select() - Used to implement Select and Delete (above)
|
|
Returns a dictionary keyed on rowids containing dicts
|
|
holding the row data for columns listed in the columns param
|
|
that match the given conditions.
|
|
* conditions is a dictionary keyed on column names
|
|
containing callable conditions expecting the data string as an
|
|
argument and returning a boolean.
|
|
"""
|
|
# check the validity of each column name
|
|
if not self.__tablecolumns.has_key(table) :
|
|
self.__load_column_info(table)
|
|
if columns is None :
|
|
columns = self.tablecolumns[table]
|
|
for column in (columns + conditions.keys()) :
|
|
if not self.__tablecolumns[table].count(column) :
|
|
raise TableDBError, "unknown column: "+`column`
|
|
|
|
# keyed on rows that match so far, containings dicts keyed on
|
|
# column names containing the data for that row and column.
|
|
matching_rowids = {}
|
|
|
|
rejected_rowids = {} # keys are rowids that do not match
|
|
|
|
# attempt to sort the conditions in such a way as to minimize full column lookups
|
|
def cmp_conditions(atuple, btuple):
|
|
a = atuple[1]
|
|
b = btuple[1]
|
|
if type(a) == type(b) :
|
|
if isinstance(a, PrefixCond) and isinstance(b, PrefixCond):
|
|
return cmp(len(b.prefix), len(a.prefix)) # longest prefix first
|
|
if isinstance(a, LikeCond) and isinstance(b, LikeCond):
|
|
return cmp(len(b.likestr), len(a.likestr)) # longest likestr first
|
|
return 0
|
|
if isinstance(a, ExactCond):
|
|
return -1
|
|
if isinstance(b, ExactCond):
|
|
return 1
|
|
if isinstance(a, PrefixCond):
|
|
return -1
|
|
if isinstance(b, PrefixCond):
|
|
return 1
|
|
# leave all unknown condition callables alone as equals
|
|
return 0
|
|
|
|
conditionlist = conditions.items()
|
|
conditionlist.sort(cmp_conditions)
|
|
|
|
# Apply conditions to column data to find what we want
|
|
cur = self.db.cursor()
|
|
column_num = -1
|
|
for column, condition in conditionlist :
|
|
column_num = column_num + 1
|
|
searchkey = _search_col_data_key(table, column)
|
|
# speedup: don't linear search columns within loop
|
|
if column in columns :
|
|
savethiscolumndata = 1 # save the data for return
|
|
else :
|
|
savethiscolumndata = 0 # data only used for selection
|
|
|
|
try:
|
|
key, data = cur.set_range(searchkey)
|
|
while key[:len(searchkey)] == searchkey :
|
|
# extract the rowid from the key
|
|
rowid = key[-_rowid_str_len:]
|
|
|
|
if not rejected_rowids.has_key(rowid) :
|
|
# if no condition was specified or the condition
|
|
# succeeds, add row to our match list.
|
|
if not condition or condition(data) :
|
|
if not matching_rowids.has_key(rowid) :
|
|
matching_rowids[rowid] = {}
|
|
if savethiscolumndata :
|
|
matching_rowids[rowid][column] = data
|
|
else :
|
|
if matching_rowids.has_key(rowid) :
|
|
del matching_rowids[rowid]
|
|
rejected_rowids[rowid] = rowid
|
|
|
|
key, data = cur.next()
|
|
|
|
except DBError, dberror:
|
|
if dberror[0] != DB_NOTFOUND :
|
|
raise
|
|
continue
|
|
|
|
cur.close()
|
|
|
|
# we're done selecting rows, garbage collect the reject list
|
|
del rejected_rowids
|
|
|
|
# extract any remaining desired column data from the
|
|
# database for the matching rows.
|
|
if len(columns) > 0 :
|
|
for rowid, rowdata in matching_rowids.items() :
|
|
for column in columns :
|
|
if rowdata.has_key(column) :
|
|
continue
|
|
try:
|
|
rowdata[column] = self.db.get(_data_key(table, column, rowid))
|
|
except DBError, dberror:
|
|
if dberror[0] != DB_NOTFOUND :
|
|
raise
|
|
rowdata[column] = None
|
|
|
|
# return the matches
|
|
return matching_rowids
|
|
|
|
|
|
def Drop(self, table) :
|
|
"""Remove an entire table from the database
|
|
"""
|
|
txn = None
|
|
try:
|
|
txn = self.env.txn_begin()
|
|
|
|
# delete the column list
|
|
self.db.delete(_columns_key(table), txn)
|
|
|
|
cur = self.db.cursor(txn)
|
|
|
|
# delete all keys containing this tables column and row info
|
|
table_key = _search_all_data_key(table)
|
|
while 1 :
|
|
try:
|
|
key, data = cur.set_range(table_key)
|
|
except DBNotFoundError:
|
|
break
|
|
# only delete items in this table
|
|
if key[:len(table_key)] != table_key :
|
|
break
|
|
cur.delete()
|
|
|
|
# delete all rowids used by this table
|
|
table_key = _search_rowid_key(table)
|
|
while 1 :
|
|
try:
|
|
key, data = cur.set_range(table_key)
|
|
except DBNotFoundError:
|
|
break
|
|
# only delete items in this table
|
|
if key[:len(table_key)] != table_key :
|
|
break
|
|
cur.delete()
|
|
|
|
cur.close()
|
|
|
|
# delete the tablename from the table name list
|
|
tablelist = pickle.loads(self.db.get(_table_names_key, txn=txn, flags=DB_RMW))
|
|
try:
|
|
tablelist.remove(table)
|
|
except ValueError:
|
|
pass # hmm, it wasn't there, oh well, that's what we want.
|
|
self.db.delete(_table_names_key, txn) # delete 1st, incase we opened with DB_DUP
|
|
self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn)
|
|
|
|
txn.commit()
|
|
txn = None
|
|
|
|
if self.__tablecolumns.has_key(table) :
|
|
del self.__tablecolumns[table]
|
|
|
|
except DBError, dberror:
|
|
if txn :
|
|
txn.abort()
|
|
raise TableDBError, dberror[1]
|