1 #-----------------------------------------------------------------------
3 # Copyright (C) 2000, 2001 by Autonomous Zone Industries
4 # Copyright (C) 2002 Gregory P. Smith
6 # License: This is free software. You may use this software for any
7 # purpose including modification/redistribution, so long as
8 # this header remains intact and that you do not claim any
9 # rights of ownership or authorship of this software. This
10 # software has been tested, but no warranty is expressed or
13 # -- Gregory P. Smith <greg@krypto.org>
15 # This provides a simple database table interface built on top of
16 # the Python Berkeley DB 3 interface.
25 import cPickle
as pickle
28 # For Pythons w/distutils pybsddb
34 # XXX(nnorwitz): is this correct? DBIncompleteError is conditional in _bsddb.c
35 if not hasattr(db
,"DBIncompleteError") :
36 class DBIncompleteError(Exception):
38 db
.DBIncompleteError
= DBIncompleteError
40 class TableDBError(StandardError):
42 class TableAlreadyExists(TableDBError
):
47 """This condition matches everything"""
48 def __call__(self
, s
):
51 class ExactCond(Cond
):
52 """Acts as an exact match condition function"""
53 def __init__(self
, strtomatch
):
54 self
.strtomatch
= strtomatch
55 def __call__(self
, s
):
56 return s
== self
.strtomatch
58 class PrefixCond(Cond
):
59 """Acts as a condition function for matching a string prefix"""
60 def __init__(self
, prefix
):
62 def __call__(self
, s
):
63 return s
[:len(self
.prefix
)] == self
.prefix
65 class PostfixCond(Cond
):
66 """Acts as a condition function for matching a string postfix"""
67 def __init__(self
, postfix
):
68 self
.postfix
= postfix
69 def __call__(self
, s
):
70 return s
[-len(self
.postfix
):] == self
.postfix
74 Acts as a function that will match using an SQL 'LIKE' style
75 string. Case insensitive and % signs are wild cards.
76 This isn't perfect but it should work for the simple common cases.
78 def __init__(self
, likestr
, re_flags
=re
.IGNORECASE
):
79 # escape python re characters
80 chars_to_escape
= '.*+()[]?'
81 for char
in chars_to_escape
:
82 likestr
= likestr
.replace(char
, '\\'+char
)
83 # convert %s to wildcards
84 self
.likestr
= likestr
.replace('%', '.*')
85 self
.re
= re
.compile('^'+self
.likestr
+'$', re_flags
)
86 def __call__(self
, s
):
87 return self
.re
.match(s
)
90 # keys used to store database metadata
92 _table_names_key
= '__TABLE_NAMES__' # list of the tables in this db
93 _columns
= '._COLUMNS__' # table_name+this key contains a list of columns
95 def _columns_key(table
):
96 return table
+ _columns
99 # these keys are found within table sub databases
101 _data
= '._DATA_.' # this+column+this+rowid key contains table data
102 _rowid
= '._ROWID_.' # this+rowid+this key contains a unique entry for each
103 # row in the table. (no data is stored)
104 _rowid_str_len
= 8 # length in bytes of the unique rowid strings
107 def _data_key(table
, col
, rowid
):
108 return table
+ _data
+ col
+ _data
+ rowid
110 def _search_col_data_key(table
, col
):
111 return table
+ _data
+ col
+ _data
113 def _search_all_data_key(table
):
116 def _rowid_key(table
, rowid
):
117 return table
+ _rowid
+ rowid
+ _rowid
119 def _search_rowid_key(table
):
120 return table
+ _rowid
122 def contains_metastrings(s
) :
123 """Verify that the given string does not contain any
124 metadata strings that might interfere with dbtables database operation.
126 if (s
.find(_table_names_key
) >= 0 or
127 s
.find(_columns
) >= 0 or
128 s
.find(_data
) >= 0 or
129 s
.find(_rowid
) >= 0):
137 def __init__(self
, filename
, dbhome
, create
=0, truncate
=0, mode
=0600,
138 recover
=0, dbflags
=0):
139 """bsdTableDB(filename, dbhome, create=0, truncate=0, mode=0600)
141 Open database name in the dbhome Berkeley DB directory.
142 Use keyword arguments when calling this constructor.
145 myflags
= db
.DB_THREAD
147 myflags |
= db
.DB_CREATE
148 flagsforenv
= (db
.DB_INIT_MPOOL | db
.DB_INIT_LOCK | db
.DB_INIT_LOG |
149 db
.DB_INIT_TXN | dbflags
)
150 # DB_AUTO_COMMIT isn't a valid flag for env.open()
152 dbflags |
= db
.DB_AUTO_COMMIT
153 except AttributeError:
156 flagsforenv
= flagsforenv | db
.DB_RECOVER
157 self
.env
= db
.DBEnv()
158 # enable auto deadlock avoidance
159 self
.env
.set_lk_detect(db
.DB_LOCK_DEFAULT
)
160 self
.env
.open(dbhome
, myflags | flagsforenv
)
162 myflags |
= db
.DB_TRUNCATE
163 self
.db
= db
.DB(self
.env
)
164 # this code relies on DBCursor.set* methods to raise exceptions
165 # rather than returning None
166 self
.db
.set_get_returns_none(1)
167 # allow duplicate entries [warning: be careful w/ metadata]
168 self
.db
.set_flags(db
.DB_DUP
)
169 self
.db
.open(filename
, db
.DB_BTREE
, dbflags | myflags
, mode
)
170 self
.dbfilename
= filename
172 if sys
.version_info
[0] >= 3 :
173 class cursor_py3k(object) :
174 def __init__(self
, dbcursor
) :
175 self
._dbcursor
= dbcursor
178 return self
._dbcursor
.close()
180 def set_range(self
, search
) :
181 v
= self
._dbcursor
.set_range(bytes(search
, "iso8859-1"))
183 v
= (v
[0].decode("iso8859-1"),
184 v
[1].decode("iso8859-1"))
188 v
= getattr(self
._dbcursor
, "next")()
190 v
= (v
[0].decode("iso8859-1"),
191 v
[1].decode("iso8859-1"))
194 class db_py3k(object) :
195 def __init__(self
, db
) :
198 def cursor(self
, txn
=None) :
199 return cursor_py3k(self
._db
.cursor(txn
=txn
))
201 def has_key(self
, key
, txn
=None) :
202 return getattr(self
._db
,"has_key")(bytes(key
, "iso8859-1"),
205 def put(self
, key
, value
, flags
=0, txn
=None) :
206 key
= bytes(key
, "iso8859-1")
208 value
= bytes(value
, "iso8859-1")
209 return self
._db
.put(key
, value
, flags
=flags
, txn
=txn
)
211 def put_bytes(self
, key
, value
, txn
=None) :
212 key
= bytes(key
, "iso8859-1")
213 return self
._db
.put(key
, value
, txn
=txn
)
215 def get(self
, key
, txn
=None, flags
=0) :
216 key
= bytes(key
, "iso8859-1")
217 v
= self
._db
.get(key
, txn
=txn
, flags
=flags
)
219 v
= v
.decode("iso8859-1")
222 def get_bytes(self
, key
, txn
=None, flags
=0) :
223 key
= bytes(key
, "iso8859-1")
224 return self
._db
.get(key
, txn
=txn
, flags
=flags
)
226 def delete(self
, key
, txn
=None) :
227 key
= bytes(key
, "iso8859-1")
228 return self
._db
.delete(key
, txn
=txn
)
231 return self
._db
.close()
233 self
.db
= db_py3k(self
.db
)
237 # Initialize the table names list if this is a new database
238 txn
= self
.env
.txn_begin()
240 if not getattr(self
.db
, "has_key")(_table_names_key
, txn
):
241 getattr(self
.db
, "put_bytes", self
.db
.put
) \
242 (_table_names_key
, pickle
.dumps([], 1), txn
=txn
)
249 # TODO verify more of the database's metadata?
250 self
.__tablecolumns
= {}
256 if self
.db
is not None:
259 if self
.env
is not None:
263 def checkpoint(self
, mins
=0):
265 self
.env
.txn_checkpoint(mins
)
266 except db
.DBIncompleteError
:
272 except db
.DBIncompleteError
:
275 def _db_print(self
) :
276 """Print the database to stdout for debugging"""
277 print "******** Printing raw database for debugging ********"
278 cur
= self
.db
.cursor()
280 key
, data
= cur
.first()
282 print repr({key
: data
})
289 except db
.DBNotFoundError
:
293 def CreateTable(self
, table
, columns
):
294 """CreateTable(table, columns) - Create a new table in the database.
296 raises TableDBError if it already exists or for other DB errors.
298 assert isinstance(columns
, list)
302 # checking sanity of the table and column names here on
303 # table creation will prevent problems elsewhere.
304 if contains_metastrings(table
):
306 "bad table name: contains reserved metastrings")
307 for column
in columns
:
308 if contains_metastrings(column
):
310 "bad column name: contains reserved metastrings")
312 columnlist_key
= _columns_key(table
)
313 if getattr(self
.db
, "has_key")(columnlist_key
):
314 raise TableAlreadyExists
, "table already exists"
316 txn
= self
.env
.txn_begin()
317 # store the table's column info
318 getattr(self
.db
, "put_bytes", self
.db
.put
)(columnlist_key
,
319 pickle
.dumps(columns
, 1), txn
=txn
)
321 # add the table name to the tablelist
322 tablelist
= pickle
.loads(getattr(self
.db
, "get_bytes",
323 self
.db
.get
) (_table_names_key
, txn
=txn
, flags
=db
.DB_RMW
))
324 tablelist
.append(table
)
325 # delete 1st, in case we opened with DB_DUP
326 self
.db
.delete(_table_names_key
, txn
=txn
)
327 getattr(self
.db
, "put_bytes", self
.db
.put
)(_table_names_key
,
328 pickle
.dumps(tablelist
, 1), txn
=txn
)
332 except db
.DBError
, dberror
:
335 if sys
.version_info
[0] < 3 :
336 raise TableDBError
, dberror
[1]
338 raise TableDBError
, dberror
.args
[1]
341 def ListTableColumns(self
, table
):
342 """Return a list of columns in the given table.
343 [] if the table doesn't exist.
345 assert isinstance(table
, str)
346 if contains_metastrings(table
):
347 raise ValueError, "bad table name: contains reserved metastrings"
349 columnlist_key
= _columns_key(table
)
350 if not getattr(self
.db
, "has_key")(columnlist_key
):
352 pickledcolumnlist
= getattr(self
.db
, "get_bytes",
353 self
.db
.get
)(columnlist_key
)
354 if pickledcolumnlist
:
355 return pickle
.loads(pickledcolumnlist
)
359 def ListTables(self
):
360 """Return a list of tables in this database."""
361 pickledtablelist
= self
.db
.get_get(_table_names_key
)
363 return pickle
.loads(pickledtablelist
)
367 def CreateOrExtendTable(self
, table
, columns
):
368 """CreateOrExtendTable(table, columns)
370 Create a new table in the database.
372 If a table of this name already exists, extend it to have any
373 additional columns present in the given list as well as
374 all of its current columns.
376 assert isinstance(columns
, list)
379 self
.CreateTable(table
, columns
)
380 except TableAlreadyExists
:
381 # the table already existed, add any new columns
384 columnlist_key
= _columns_key(table
)
385 txn
= self
.env
.txn_begin()
387 # load the current column list
388 oldcolumnlist
= pickle
.loads(
389 getattr(self
.db
, "get_bytes",
390 self
.db
.get
)(columnlist_key
, txn
=txn
, flags
=db
.DB_RMW
))
391 # create a hash table for fast lookups of column names in the
394 for c
in oldcolumnlist
:
397 # create a new column list containing both the old and new
399 newcolumnlist
= copy
.copy(oldcolumnlist
)
401 if not c
in oldcolumnhash
:
402 newcolumnlist
.append(c
)
404 # store the table's new extended column list
405 if newcolumnlist
!= oldcolumnlist
:
406 # delete the old one first since we opened with DB_DUP
407 self
.db
.delete(columnlist_key
, txn
=txn
)
408 getattr(self
.db
, "put_bytes", self
.db
.put
)(columnlist_key
,
409 pickle
.dumps(newcolumnlist
, 1),
415 self
.__load
_column
_info
(table
)
416 except db
.DBError
, dberror
:
419 if sys
.version_info
[0] < 3 :
420 raise TableDBError
, dberror
[1]
422 raise TableDBError
, dberror
.args
[1]
425 def __load_column_info(self
, table
) :
426 """initialize the self.__tablecolumns dict"""
427 # check the column names
429 tcolpickles
= getattr(self
.db
, "get_bytes",
430 self
.db
.get
)(_columns_key(table
))
431 except db
.DBNotFoundError
:
432 raise TableDBError
, "unknown table: %r" % (table
,)
434 raise TableDBError
, "unknown table: %r" % (table
,)
435 self
.__tablecolumns
[table
] = pickle
.loads(tcolpickles
)
437 def __new_rowid(self
, table
, txn
) :
438 """Create a new unique row identifier"""
441 # Generate a random 64-bit row ID string
442 # (note: might have <64 bits of true randomness
443 # but it's plenty for our database id needs!)
445 for x
in xrange(_rowid_str_len
):
446 blist
.append(random
.randint(0,255))
447 newid
= struct
.pack('B'*_rowid_str_len
, *blist
)
449 if sys
.version_info
[0] >= 3 :
450 newid
= newid
.decode("iso8859-1") # 8 bits
452 # Guarantee uniqueness by adding this key to the database
454 self
.db
.put(_rowid_key(table
, newid
), None, txn
=txn
,
455 flags
=db
.DB_NOOVERWRITE
)
456 except db
.DBKeyExistError
:
464 def Insert(self
, table
, rowdict
) :
465 """Insert(table, datadict) - Insert a new row into the table
466 using the keys+values from rowdict as the column values.
471 if not getattr(self
.db
, "has_key")(_columns_key(table
)):
472 raise TableDBError
, "unknown table"
474 # check the validity of each column name
475 if not table
in self
.__tablecolumns
:
476 self
.__load
_column
_info
(table
)
477 for column
in rowdict
.keys() :
478 if not self
.__tablecolumns
[table
].count(column
):
479 raise TableDBError
, "unknown column: %r" % (column
,)
481 # get a unique row identifier for this row
482 txn
= self
.env
.txn_begin()
483 rowid
= self
.__new
_rowid
(table
, txn
=txn
)
485 # insert the row values into the table database
486 for column
, dataitem
in rowdict
.items():
488 self
.db
.put(_data_key(table
, column
, rowid
), dataitem
, txn
=txn
)
493 except db
.DBError
, dberror
:
494 # WIBNI we could just abort the txn and re-raise the exception?
495 # But no, because TableDBError is not related to DBError via
496 # inheritance, so it would be backwards incompatible. Do the next
498 info
= sys
.exc_info()
501 self
.db
.delete(_rowid_key(table
, rowid
))
502 if sys
.version_info
[0] < 3 :
503 raise TableDBError
, dberror
[1], info
[2]
505 raise TableDBError
, dberror
.args
[1], info
[2]
508 def Modify(self
, table
, conditions
={}, mappings
={}):
509 """Modify(table, conditions={}, mappings={}) - Modify items in rows matching 'conditions' using mapping functions in 'mappings'
511 * table - the table name
512 * conditions - a dictionary keyed on column names containing
513 a condition callable expecting the data string as an
514 argument and returning a boolean.
515 * mappings - a dictionary keyed on column names containing a
516 condition callable expecting the data string as an argument and
517 returning the new string for that column.
521 matching_rowids
= self
.__Select
(table
, [], conditions
)
523 # modify only requested columns
524 columns
= mappings
.keys()
525 for rowid
in matching_rowids
.keys():
528 for column
in columns
:
529 txn
= self
.env
.txn_begin()
530 # modify the requested column
532 dataitem
= self
.db
.get(
533 _data_key(table
, column
, rowid
),
536 _data_key(table
, column
, rowid
),
538 except db
.DBNotFoundError
:
539 # XXXXXXX row key somehow didn't exist, assume no
542 dataitem
= mappings
[column
](dataitem
)
545 _data_key(table
, column
, rowid
),
550 # catch all exceptions here since we call unknown callables
556 except db
.DBError
, dberror
:
557 if sys
.version_info
[0] < 3 :
558 raise TableDBError
, dberror
[1]
560 raise TableDBError
, dberror
.args
[1]
562 def Delete(self
, table
, conditions
={}):
563 """Delete(table, conditions) - Delete items matching the given
564 conditions from the table.
566 * conditions - a dictionary keyed on column names containing
567 condition functions expecting the data string as an
568 argument and returning a boolean.
572 matching_rowids
= self
.__Select
(table
, [], conditions
)
574 # delete row data from all columns
575 columns
= self
.__tablecolumns
[table
]
576 for rowid
in matching_rowids
.keys():
579 txn
= self
.env
.txn_begin()
580 for column
in columns
:
581 # delete the data key
583 self
.db
.delete(_data_key(table
, column
, rowid
),
585 except db
.DBNotFoundError
:
586 # XXXXXXX column may not exist, assume no error
590 self
.db
.delete(_rowid_key(table
, rowid
), txn
=txn
)
591 except db
.DBNotFoundError
:
592 # XXXXXXX row key somehow didn't exist, assume no error
596 except db
.DBError
, dberror
:
600 except db
.DBError
, dberror
:
601 if sys
.version_info
[0] < 3 :
602 raise TableDBError
, dberror
[1]
604 raise TableDBError
, dberror
.args
[1]
607 def Select(self
, table
, columns
, conditions
={}):
608 """Select(table, columns, conditions) - retrieve specific row data
609 Returns a list of row column->value mapping dictionaries.
611 * columns - a list of which column data to return. If
612 columns is None, all columns will be returned.
613 * conditions - a dictionary keyed on column names
614 containing callable conditions expecting the data string as an
615 argument and returning a boolean.
618 if not table
in self
.__tablecolumns
:
619 self
.__load
_column
_info
(table
)
621 columns
= self
.__tablecolumns
[table
]
622 matching_rowids
= self
.__Select
(table
, columns
, conditions
)
623 except db
.DBError
, dberror
:
624 if sys
.version_info
[0] < 3 :
625 raise TableDBError
, dberror
[1]
627 raise TableDBError
, dberror
.args
[1]
628 # return the matches as a list of dictionaries
629 return matching_rowids
.values()
632 def __Select(self
, table
, columns
, conditions
):
633 """__Select() - Used to implement Select and Delete (above)
634 Returns a dictionary keyed on rowids containing dicts
635 holding the row data for columns listed in the columns param
636 that match the given conditions.
637 * conditions is a dictionary keyed on column names
638 containing callable conditions expecting the data string as an
639 argument and returning a boolean.
641 # check the validity of each column name
642 if not table
in self
.__tablecolumns
:
643 self
.__load
_column
_info
(table
)
645 columns
= self
.tablecolumns
[table
]
646 for column
in (columns
+ conditions
.keys()):
647 if not self
.__tablecolumns
[table
].count(column
):
648 raise TableDBError
, "unknown column: %r" % (column
,)
650 # keyed on rows that match so far, containings dicts keyed on
651 # column names containing the data for that row and column.
653 # keys are rowids that do not match
656 # attempt to sort the conditions in such a way as to minimize full
658 def cmp_conditions(atuple
, btuple
):
661 if type(a
) is type(b
):
662 if isinstance(a
, PrefixCond
) and isinstance(b
, PrefixCond
):
663 # longest prefix first
664 return cmp(len(b
.prefix
), len(a
.prefix
))
665 if isinstance(a
, LikeCond
) and isinstance(b
, LikeCond
):
666 # longest likestr first
667 return cmp(len(b
.likestr
), len(a
.likestr
))
669 if isinstance(a
, ExactCond
):
671 if isinstance(b
, ExactCond
):
673 if isinstance(a
, PrefixCond
):
675 if isinstance(b
, PrefixCond
):
677 # leave all unknown condition callables alone as equals
680 if sys
.version_info
[0] < 3 :
681 conditionlist
= conditions
.items()
682 conditionlist
.sort(cmp_conditions
)
683 else : # Insertion Sort. Please, improve
685 for i
in conditions
.items() :
686 for j
, k
in enumerate(conditionlist
) :
687 r
= cmp_conditions(k
, i
)
689 conditionlist
.insert(j
, i
)
692 conditionlist
.append(i
)
694 # Apply conditions to column data to find what we want
695 cur
= self
.db
.cursor()
697 for column
, condition
in conditionlist
:
698 column_num
= column_num
+ 1
699 searchkey
= _search_col_data_key(table
, column
)
700 # speedup: don't linear search columns within loop
701 if column
in columns
:
702 savethiscolumndata
= 1 # save the data for return
704 savethiscolumndata
= 0 # data only used for selection
707 key
, data
= cur
.set_range(searchkey
)
708 while key
[:len(searchkey
)] == searchkey
:
709 # extract the rowid from the key
710 rowid
= key
[-_rowid_str_len
:]
712 if not rowid
in rejected_rowids
:
713 # if no condition was specified or the condition
714 # succeeds, add row to our match list.
715 if not condition
or condition(data
):
716 if not rowid
in matching_rowids
:
717 matching_rowids
[rowid
] = {}
718 if savethiscolumndata
:
719 matching_rowids
[rowid
][column
] = data
721 if rowid
in matching_rowids
:
722 del matching_rowids
[rowid
]
723 rejected_rowids
[rowid
] = rowid
725 key
, data
= cur
.next()
727 except db
.DBError
, dberror
:
728 if dberror
.args
[0] != db
.DB_NOTFOUND
:
734 # we're done selecting rows, garbage collect the reject list
737 # extract any remaining desired column data from the
738 # database for the matching rows.
740 for rowid
, rowdata
in matching_rowids
.items():
741 for column
in columns
:
742 if column
in rowdata
:
745 rowdata
[column
] = self
.db
.get(
746 _data_key(table
, column
, rowid
))
747 except db
.DBError
, dberror
:
748 if sys
.version_info
[0] < 3 :
749 if dberror
[0] != db
.DB_NOTFOUND
:
752 if dberror
.args
[0] != db
.DB_NOTFOUND
:
754 rowdata
[column
] = None
757 return matching_rowids
760 def Drop(self
, table
):
761 """Remove an entire table from the database"""
764 txn
= self
.env
.txn_begin()
766 # delete the column list
767 self
.db
.delete(_columns_key(table
), txn
=txn
)
769 cur
= self
.db
.cursor(txn
)
771 # delete all keys containing this tables column and row info
772 table_key
= _search_all_data_key(table
)
775 key
, data
= cur
.set_range(table_key
)
776 except db
.DBNotFoundError
:
778 # only delete items in this table
779 if key
[:len(table_key
)] != table_key
:
783 # delete all rowids used by this table
784 table_key
= _search_rowid_key(table
)
787 key
, data
= cur
.set_range(table_key
)
788 except db
.DBNotFoundError
:
790 # only delete items in this table
791 if key
[:len(table_key
)] != table_key
:
797 # delete the tablename from the table name list
798 tablelist
= pickle
.loads(
799 getattr(self
.db
, "get_bytes", self
.db
.get
)(_table_names_key
,
800 txn
=txn
, flags
=db
.DB_RMW
))
802 tablelist
.remove(table
)
804 # hmm, it wasn't there, oh well, that's what we want.
806 # delete 1st, incase we opened with DB_DUP
807 self
.db
.delete(_table_names_key
, txn
=txn
)
808 getattr(self
.db
, "put_bytes", self
.db
.put
)(_table_names_key
,
809 pickle
.dumps(tablelist
, 1), txn
=txn
)
814 if table
in self
.__tablecolumns
:
815 del self
.__tablecolumns
[table
]
817 except db
.DBError
, dberror
:
820 raise TableDBError(dberror
.args
[1])