1 #-----------------------------------------------------------------------
3 # Copyright (C) 2000, 2001 by Autonomous Zone Industries
4 # Copyright (C) 2002 Gregory P. Smith
6 # License: This is free software. You may use this software for any
7 # purpose including modification/redistribution, so long as
8 # this header remains intact and that you do not claim any
9 # rights of ownership or authorship of this software. This
10 # software has been tested, but no warranty is expressed or
13 # -- Gregory P. Smith <greg@electricrain.com>
15 # This provides a simple database table interface built on top of
16 # the Python BerkeleyDB 3 interface.
25 from types
import ListType
, StringType
26 import cPickle
as pickle
29 # For Pythons w/distutils pybsddb
30 from bsddb3
.db
import *
33 from bsddb
.db
import *
36 class TableDBError(StandardError):
38 class TableAlreadyExists(TableDBError
):
43 """This condition matches everything"""
44 def __call__(self
, s
):
47 class ExactCond(Cond
):
48 """Acts as an exact match condition function"""
49 def __init__(self
, strtomatch
):
50 self
.strtomatch
= strtomatch
51 def __call__(self
, s
):
52 return s
== self
.strtomatch
54 class PrefixCond(Cond
):
55 """Acts as a condition function for matching a string prefix"""
56 def __init__(self
, prefix
):
58 def __call__(self
, s
):
59 return s
[:len(self
.prefix
)] == self
.prefix
61 class PostfixCond(Cond
):
62 """Acts as a condition function for matching a string postfix"""
63 def __init__(self
, postfix
):
64 self
.postfix
= postfix
65 def __call__(self
, s
):
66 return s
[-len(self
.postfix
):] == self
.postfix
70 Acts as a function that will match using an SQL 'LIKE' style
71 string. Case insensitive and % signs are wild cards.
72 This isn't perfect but it should work for the simple common cases.
74 def __init__(self
, likestr
, re_flags
=re
.IGNORECASE
):
75 # escape python re characters
76 chars_to_escape
= '.*+()[]?'
77 for char
in chars_to_escape
:
78 likestr
= likestr
.replace(char
, '\\'+char
)
79 # convert %s to wildcards
80 self
.likestr
= likestr
.replace('%', '.*')
81 self
.re
= re
.compile('^'+self
.likestr
+'$', re_flags
)
82 def __call__(self
, s
):
83 return self
.re
.match(s
)
86 # keys used to store database metadata
88 _table_names_key
= '__TABLE_NAMES__' # list of the tables in this db
89 _columns
= '._COLUMNS__' # table_name+this key contains a list of columns
91 def _columns_key(table
):
92 return table
+ _columns
95 # these keys are found within table sub databases
97 _data
= '._DATA_.' # this+column+this+rowid key contains table data
98 _rowid
= '._ROWID_.' # this+rowid+this key contains a unique entry for each
99 # row in the table. (no data is stored)
100 _rowid_str_len
= 8 # length in bytes of the unique rowid strings
102 def _data_key(table
, col
, rowid
):
103 return table
+ _data
+ col
+ _data
+ rowid
105 def _search_col_data_key(table
, col
):
106 return table
+ _data
+ col
+ _data
108 def _search_all_data_key(table
):
111 def _rowid_key(table
, rowid
):
112 return table
+ _rowid
+ rowid
+ _rowid
114 def _search_rowid_key(table
):
115 return table
+ _rowid
117 def contains_metastrings(s
) :
118 """Verify that the given string does not contain any
119 metadata strings that might interfere with dbtables database operation.
121 if (s
.find(_table_names_key
) >= 0 or
122 s
.find(_columns
) >= 0 or
123 s
.find(_data
) >= 0 or
124 s
.find(_rowid
) >= 0):
132 def __init__(self
, filename
, dbhome
, create
=0, truncate
=0, mode
=0600,
133 recover
=0, dbflags
=0):
134 """bsdTableDB.open(filename, dbhome, create=0, truncate=0, mode=0600)
135 Open database name in the dbhome BerkeleyDB directory.
136 Use keyword arguments when calling this constructor.
142 flagsforenv
= (DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_LOG |
143 DB_INIT_TXN | dbflags
)
144 # DB_AUTO_COMMIT isn't a valid flag for env.open()
146 dbflags |
= DB_AUTO_COMMIT
147 except AttributeError:
150 flagsforenv
= flagsforenv | DB_RECOVER
152 # enable auto deadlock avoidance
153 self
.env
.set_lk_detect(DB_LOCK_DEFAULT
)
154 self
.env
.open(dbhome
, myflags | flagsforenv
)
156 myflags |
= DB_TRUNCATE
157 self
.db
= DB(self
.env
)
158 # this code relies on DBCursor.set* methods to raise exceptions
159 # rather than returning None
160 self
.db
.set_get_returns_none(1)
161 # allow duplicate entries [warning: be careful w/ metadata]
162 self
.db
.set_flags(DB_DUP
)
163 self
.db
.open(filename
, DB_BTREE
, dbflags | myflags
, mode
)
164 self
.dbfilename
= filename
165 # Initialize the table names list if this is a new database
166 txn
= self
.env
.txn_begin()
168 if not self
.db
.has_key(_table_names_key
, txn
):
169 self
.db
.put(_table_names_key
, pickle
.dumps([], 1), txn
=txn
)
176 # TODO verify more of the database's metadata?
177 self
.__tablecolumns
= {}
183 if self
.db
is not None:
186 if self
.env
is not None:
190 def checkpoint(self
, mins
=0):
192 self
.env
.txn_checkpoint(mins
)
193 except DBIncompleteError
:
199 except DBIncompleteError
:
202 def _db_print(self
) :
203 """Print the database to stdout for debugging"""
204 print "******** Printing raw database for debugging ********"
205 cur
= self
.db
.cursor()
207 key
, data
= cur
.first()
209 print repr({key
: data
})
216 except DBNotFoundError
:
220 def CreateTable(self
, table
, columns
):
221 """CreateTable(table, columns) - Create a new table in the database
222 raises TableDBError if it already exists or for other DB errors.
224 assert isinstance(columns
, ListType
)
227 # checking sanity of the table and column names here on
228 # table creation will prevent problems elsewhere.
229 if contains_metastrings(table
):
231 "bad table name: contains reserved metastrings")
232 for column
in columns
:
233 if contains_metastrings(column
):
235 "bad column name: contains reserved metastrings")
237 columnlist_key
= _columns_key(table
)
238 if self
.db
.has_key(columnlist_key
):
239 raise TableAlreadyExists
, "table already exists"
241 txn
= self
.env
.txn_begin()
242 # store the table's column info
243 self
.db
.put(columnlist_key
, pickle
.dumps(columns
, 1), txn
=txn
)
245 # add the table name to the tablelist
246 tablelist
= pickle
.loads(self
.db
.get(_table_names_key
, txn
=txn
,
248 tablelist
.append(table
)
249 # delete 1st, in case we opened with DB_DUP
250 self
.db
.delete(_table_names_key
, txn
)
251 self
.db
.put(_table_names_key
, pickle
.dumps(tablelist
, 1), txn
=txn
)
255 except DBError
, dberror
:
258 raise TableDBError
, dberror
[1]
261 def ListTableColumns(self
, table
):
262 """Return a list of columns in the given table.
263 [] if the table doesn't exist.
265 assert isinstance(table
, StringType
)
266 if contains_metastrings(table
):
267 raise ValueError, "bad table name: contains reserved metastrings"
269 columnlist_key
= _columns_key(table
)
270 if not self
.db
.has_key(columnlist_key
):
272 pickledcolumnlist
= self
.db
.get(columnlist_key
)
273 if pickledcolumnlist
:
274 return pickle
.loads(pickledcolumnlist
)
278 def ListTables(self
):
279 """Return a list of tables in this database."""
280 pickledtablelist
= self
.db
.get(_table_names_key
)
282 return pickle
.loads(pickledtablelist
)
286 def CreateOrExtendTable(self
, table
, columns
):
287 """CreateOrExtendTable(table, columns)
289 - Create a new table in the database.
290 If a table of this name already exists, extend it to have any
291 additional columns present in the given list as well as
292 all of its current columns.
294 assert isinstance(columns
, ListType
)
296 self
.CreateTable(table
, columns
)
297 except TableAlreadyExists
:
298 # the table already existed, add any new columns
301 columnlist_key
= _columns_key(table
)
302 txn
= self
.env
.txn_begin()
304 # load the current column list
305 oldcolumnlist
= pickle
.loads(
306 self
.db
.get(columnlist_key
, txn
=txn
, flags
=DB_RMW
))
307 # create a hash table for fast lookups of column names in the
310 for c
in oldcolumnlist
:
313 # create a new column list containing both the old and new
315 newcolumnlist
= copy
.copy(oldcolumnlist
)
317 if not oldcolumnhash
.has_key(c
):
318 newcolumnlist
.append(c
)
320 # store the table's new extended column list
321 if newcolumnlist
!= oldcolumnlist
:
322 # delete the old one first since we opened with DB_DUP
323 self
.db
.delete(columnlist_key
, txn
)
324 self
.db
.put(columnlist_key
,
325 pickle
.dumps(newcolumnlist
, 1),
331 self
.__load
_column
_info
(table
)
332 except DBError
, dberror
:
335 raise TableDBError
, dberror
[1]
338 def __load_column_info(self
, table
) :
339 """initialize the self.__tablecolumns dict"""
340 # check the column names
342 tcolpickles
= self
.db
.get(_columns_key(table
))
343 except DBNotFoundError
:
344 raise TableDBError
, "unknown table: %r" % (table
,)
346 raise TableDBError
, "unknown table: %r" % (table
,)
347 self
.__tablecolumns
[table
] = pickle
.loads(tcolpickles
)
349 def __new_rowid(self
, table
, txn
) :
350 """Create a new unique row identifier"""
353 # Generate a random 64-bit row ID string
354 # (note: this code has <64 bits of randomness
355 # but it's plenty for our database id needs!)
357 p
.pack_int(int(random
.random()*2147483647))
358 p
.pack_int(int(random
.random()*2147483647))
359 newid
= p
.get_buffer()
361 # Guarantee uniqueness by adding this key to the database
363 self
.db
.put(_rowid_key(table
, newid
), None, txn
=txn
,
364 flags
=DB_NOOVERWRITE
)
365 except DBKeyExistError
:
373 def Insert(self
, table
, rowdict
) :
374 """Insert(table, datadict) - Insert a new row into the table
375 using the keys+values from rowdict as the column values.
379 if not self
.db
.has_key(_columns_key(table
)):
380 raise TableDBError
, "unknown table"
382 # check the validity of each column name
383 if not self
.__tablecolumns
.has_key(table
):
384 self
.__load
_column
_info
(table
)
385 for column
in rowdict
.keys() :
386 if not self
.__tablecolumns
[table
].count(column
):
387 raise TableDBError
, "unknown column: %r" % (column
,)
389 # get a unique row identifier for this row
390 txn
= self
.env
.txn_begin()
391 rowid
= self
.__new
_rowid
(table
, txn
=txn
)
393 # insert the row values into the table database
394 for column
, dataitem
in rowdict
.items():
396 self
.db
.put(_data_key(table
, column
, rowid
), dataitem
, txn
=txn
)
401 except DBError
, dberror
:
402 # WIBNI we could just abort the txn and re-raise the exception?
403 # But no, because TableDBError is not related to DBError via
404 # inheritance, so it would be backwards incompatible. Do the next
406 info
= sys
.exc_info()
409 self
.db
.delete(_rowid_key(table
, rowid
))
410 raise TableDBError
, dberror
[1], info
[2]
413 def Modify(self
, table
, conditions
={}, mappings
={}):
414 """Modify(table, conditions) - Modify in rows matching 'conditions'
415 using mapping functions in 'mappings'
416 * conditions is a dictionary keyed on column names
417 containing condition functions expecting the data string as an
418 argument and returning a boolean.
419 * mappings is a dictionary keyed on column names containint condition
420 functions expecting the data string as an argument and returning the
421 new string for that column.
424 matching_rowids
= self
.__Select
(table
, [], conditions
)
426 # modify only requested columns
427 columns
= mappings
.keys()
428 for rowid
in matching_rowids
.keys():
431 for column
in columns
:
432 txn
= self
.env
.txn_begin()
433 # modify the requested column
435 dataitem
= self
.db
.get(
436 _data_key(table
, column
, rowid
),
439 _data_key(table
, column
, rowid
),
441 except DBNotFoundError
:
442 # XXXXXXX row key somehow didn't exist, assume no
445 dataitem
= mappings
[column
](dataitem
)
448 _data_key(table
, column
, rowid
),
453 except DBError
, dberror
:
458 except DBError
, dberror
:
459 raise TableDBError
, dberror
[1]
461 def Delete(self
, table
, conditions
={}):
462 """Delete(table, conditions) - Delete items matching the given
463 conditions from the table.
464 * conditions is a dictionary keyed on column names
465 containing condition functions expecting the data string as an
466 argument and returning a boolean.
469 matching_rowids
= self
.__Select
(table
, [], conditions
)
471 # delete row data from all columns
472 columns
= self
.__tablecolumns
[table
]
473 for rowid
in matching_rowids
.keys():
476 txn
= self
.env
.txn_begin()
477 for column
in columns
:
478 # delete the data key
480 self
.db
.delete(_data_key(table
, column
, rowid
),
482 except DBNotFoundError
:
483 # XXXXXXX column may not exist, assume no error
487 self
.db
.delete(_rowid_key(table
, rowid
), txn
)
488 except DBNotFoundError
:
489 # XXXXXXX row key somehow didn't exist, assume no error
493 except DBError
, dberror
:
497 except DBError
, dberror
:
498 raise TableDBError
, dberror
[1]
501 def Select(self
, table
, columns
, conditions
={}):
502 """Select(table, conditions) - retrieve specific row data
503 Returns a list of row column->value mapping dictionaries.
504 * columns is a list of which column data to return. If
505 columns is None, all columns will be returned.
506 * conditions is a dictionary keyed on column names
507 containing callable conditions expecting the data string as an
508 argument and returning a boolean.
511 if not self
.__tablecolumns
.has_key(table
):
512 self
.__load
_column
_info
(table
)
514 columns
= self
.__tablecolumns
[table
]
515 matching_rowids
= self
.__Select
(table
, columns
, conditions
)
516 except DBError
, dberror
:
517 raise TableDBError
, dberror
[1]
518 # return the matches as a list of dictionaries
519 return matching_rowids
.values()
522 def __Select(self
, table
, columns
, conditions
):
523 """__Select() - Used to implement Select and Delete (above)
524 Returns a dictionary keyed on rowids containing dicts
525 holding the row data for columns listed in the columns param
526 that match the given conditions.
527 * conditions is a dictionary keyed on column names
528 containing callable conditions expecting the data string as an
529 argument and returning a boolean.
531 # check the validity of each column name
532 if not self
.__tablecolumns
.has_key(table
):
533 self
.__load
_column
_info
(table
)
535 columns
= self
.tablecolumns
[table
]
536 for column
in (columns
+ conditions
.keys()):
537 if not self
.__tablecolumns
[table
].count(column
):
538 raise TableDBError
, "unknown column: %r" % (column
,)
540 # keyed on rows that match so far, containings dicts keyed on
541 # column names containing the data for that row and column.
543 # keys are rowids that do not match
546 # attempt to sort the conditions in such a way as to minimize full
548 def cmp_conditions(atuple
, btuple
):
551 if type(a
) is type(b
):
552 if isinstance(a
, PrefixCond
) and isinstance(b
, PrefixCond
):
553 # longest prefix first
554 return cmp(len(b
.prefix
), len(a
.prefix
))
555 if isinstance(a
, LikeCond
) and isinstance(b
, LikeCond
):
556 # longest likestr first
557 return cmp(len(b
.likestr
), len(a
.likestr
))
559 if isinstance(a
, ExactCond
):
561 if isinstance(b
, ExactCond
):
563 if isinstance(a
, PrefixCond
):
565 if isinstance(b
, PrefixCond
):
567 # leave all unknown condition callables alone as equals
570 conditionlist
= conditions
.items()
571 conditionlist
.sort(cmp_conditions
)
573 # Apply conditions to column data to find what we want
574 cur
= self
.db
.cursor()
576 for column
, condition
in conditionlist
:
577 column_num
= column_num
+ 1
578 searchkey
= _search_col_data_key(table
, column
)
579 # speedup: don't linear search columns within loop
580 if column
in columns
:
581 savethiscolumndata
= 1 # save the data for return
583 savethiscolumndata
= 0 # data only used for selection
586 key
, data
= cur
.set_range(searchkey
)
587 while key
[:len(searchkey
)] == searchkey
:
588 # extract the rowid from the key
589 rowid
= key
[-_rowid_str_len
:]
591 if not rejected_rowids
.has_key(rowid
):
592 # if no condition was specified or the condition
593 # succeeds, add row to our match list.
594 if not condition
or condition(data
):
595 if not matching_rowids
.has_key(rowid
):
596 matching_rowids
[rowid
] = {}
597 if savethiscolumndata
:
598 matching_rowids
[rowid
][column
] = data
600 if matching_rowids
.has_key(rowid
):
601 del matching_rowids
[rowid
]
602 rejected_rowids
[rowid
] = rowid
604 key
, data
= cur
.next()
606 except DBError
, dberror
:
607 if dberror
[0] != DB_NOTFOUND
:
613 # we're done selecting rows, garbage collect the reject list
616 # extract any remaining desired column data from the
617 # database for the matching rows.
619 for rowid
, rowdata
in matching_rowids
.items():
620 for column
in columns
:
621 if rowdata
.has_key(column
):
624 rowdata
[column
] = self
.db
.get(
625 _data_key(table
, column
, rowid
))
626 except DBError
, dberror
:
627 if dberror
[0] != DB_NOTFOUND
:
629 rowdata
[column
] = None
632 return matching_rowids
635 def Drop(self
, table
):
636 """Remove an entire table from the database"""
639 txn
= self
.env
.txn_begin()
641 # delete the column list
642 self
.db
.delete(_columns_key(table
), txn
)
644 cur
= self
.db
.cursor(txn
)
646 # delete all keys containing this tables column and row info
647 table_key
= _search_all_data_key(table
)
650 key
, data
= cur
.set_range(table_key
)
651 except DBNotFoundError
:
653 # only delete items in this table
654 if key
[:len(table_key
)] != table_key
:
658 # delete all rowids used by this table
659 table_key
= _search_rowid_key(table
)
662 key
, data
= cur
.set_range(table_key
)
663 except DBNotFoundError
:
665 # only delete items in this table
666 if key
[:len(table_key
)] != table_key
:
672 # delete the tablename from the table name list
673 tablelist
= pickle
.loads(
674 self
.db
.get(_table_names_key
, txn
=txn
, flags
=DB_RMW
))
676 tablelist
.remove(table
)
678 # hmm, it wasn't there, oh well, that's what we want.
680 # delete 1st, incase we opened with DB_DUP
681 self
.db
.delete(_table_names_key
, txn
)
682 self
.db
.put(_table_names_key
, pickle
.dumps(tablelist
, 1), txn
=txn
)
687 if self
.__tablecolumns
.has_key(table
):
688 del self
.__tablecolumns
[table
]
690 except DBError
, dberror
:
693 raise TableDBError
, dberror
[1]