1 #-----------------------------------------------------------------------
3 # Copyright (C) 2000, 2001 by Autonomous Zone Industries
4 # Copyright (C) 2002 Gregory P. Smith
6 # License: This is free software. You may use this software for any
7 # purpose including modification/redistribution, so long as
8 # this header remains intact and that you do not claim any
9 # rights of ownership or authorship of this software. This
10 # software has been tested, but no warranty is expressed or
13 # -- Gregory P. Smith <greg@electricrain.com>
15 # This provides a simple database table interface built on top of
16 # the Python BerkeleyDB 3 interface.
25 from types
import ListType
, StringType
26 import cPickle
as pickle
29 # For Pythons w/distutils pybsddb
30 from bsddb3
.db
import *
33 from bsddb
.db
import *
35 # XXX(nnorwitz): is this correct? DBIncompleteError is conditional in _bsddb.c
39 class DBIncompleteError(Exception):
42 class TableDBError(StandardError):
44 class TableAlreadyExists(TableDBError
):
49 """This condition matches everything"""
50 def __call__(self
, s
):
53 class ExactCond(Cond
):
54 """Acts as an exact match condition function"""
55 def __init__(self
, strtomatch
):
56 self
.strtomatch
= strtomatch
57 def __call__(self
, s
):
58 return s
== self
.strtomatch
60 class PrefixCond(Cond
):
61 """Acts as a condition function for matching a string prefix"""
62 def __init__(self
, prefix
):
64 def __call__(self
, s
):
65 return s
[:len(self
.prefix
)] == self
.prefix
67 class PostfixCond(Cond
):
68 """Acts as a condition function for matching a string postfix"""
69 def __init__(self
, postfix
):
70 self
.postfix
= postfix
71 def __call__(self
, s
):
72 return s
[-len(self
.postfix
):] == self
.postfix
76 Acts as a function that will match using an SQL 'LIKE' style
77 string. Case insensitive and % signs are wild cards.
78 This isn't perfect but it should work for the simple common cases.
80 def __init__(self
, likestr
, re_flags
=re
.IGNORECASE
):
81 # escape python re characters
82 chars_to_escape
= '.*+()[]?'
83 for char
in chars_to_escape
:
84 likestr
= likestr
.replace(char
, '\\'+char
)
85 # convert %s to wildcards
86 self
.likestr
= likestr
.replace('%', '.*')
87 self
.re
= re
.compile('^'+self
.likestr
+'$', re_flags
)
88 def __call__(self
, s
):
89 return self
.re
.match(s
)
92 # keys used to store database metadata
94 _table_names_key
= '__TABLE_NAMES__' # list of the tables in this db
95 _columns
= '._COLUMNS__' # table_name+this key contains a list of columns
97 def _columns_key(table
):
98 return table
+ _columns
101 # these keys are found within table sub databases
103 _data
= '._DATA_.' # this+column+this+rowid key contains table data
104 _rowid
= '._ROWID_.' # this+rowid+this key contains a unique entry for each
105 # row in the table. (no data is stored)
106 _rowid_str_len
= 8 # length in bytes of the unique rowid strings
108 def _data_key(table
, col
, rowid
):
109 return table
+ _data
+ col
+ _data
+ rowid
111 def _search_col_data_key(table
, col
):
112 return table
+ _data
+ col
+ _data
114 def _search_all_data_key(table
):
117 def _rowid_key(table
, rowid
):
118 return table
+ _rowid
+ rowid
+ _rowid
120 def _search_rowid_key(table
):
121 return table
+ _rowid
123 def contains_metastrings(s
) :
124 """Verify that the given string does not contain any
125 metadata strings that might interfere with dbtables database operation.
127 if (s
.find(_table_names_key
) >= 0 or
128 s
.find(_columns
) >= 0 or
129 s
.find(_data
) >= 0 or
130 s
.find(_rowid
) >= 0):
138 def __init__(self
, filename
, dbhome
, create
=0, truncate
=0, mode
=0600,
139 recover
=0, dbflags
=0):
140 """bsdTableDB(filename, dbhome, create=0, truncate=0, mode=0600)
142 Open database name in the dbhome BerkeleyDB directory.
143 Use keyword arguments when calling this constructor.
149 flagsforenv
= (DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_LOG |
150 DB_INIT_TXN | dbflags
)
151 # DB_AUTO_COMMIT isn't a valid flag for env.open()
153 dbflags |
= DB_AUTO_COMMIT
154 except AttributeError:
157 flagsforenv
= flagsforenv | DB_RECOVER
159 # enable auto deadlock avoidance
160 self
.env
.set_lk_detect(DB_LOCK_DEFAULT
)
161 self
.env
.open(dbhome
, myflags | flagsforenv
)
163 myflags |
= DB_TRUNCATE
164 self
.db
= DB(self
.env
)
165 # this code relies on DBCursor.set* methods to raise exceptions
166 # rather than returning None
167 self
.db
.set_get_returns_none(1)
168 # allow duplicate entries [warning: be careful w/ metadata]
169 self
.db
.set_flags(DB_DUP
)
170 self
.db
.open(filename
, DB_BTREE
, dbflags | myflags
, mode
)
171 self
.dbfilename
= filename
172 # Initialize the table names list if this is a new database
173 txn
= self
.env
.txn_begin()
175 if not self
.db
.has_key(_table_names_key
, txn
):
176 self
.db
.put(_table_names_key
, pickle
.dumps([], 1), txn
=txn
)
183 # TODO verify more of the database's metadata?
184 self
.__tablecolumns
= {}
190 if self
.db
is not None:
193 if self
.env
is not None:
197 def checkpoint(self
, mins
=0):
199 self
.env
.txn_checkpoint(mins
)
200 except DBIncompleteError
:
206 except DBIncompleteError
:
209 def _db_print(self
) :
210 """Print the database to stdout for debugging"""
211 print "******** Printing raw database for debugging ********"
212 cur
= self
.db
.cursor()
214 key
, data
= cur
.first()
216 print repr({key
: data
})
223 except DBNotFoundError
:
227 def CreateTable(self
, table
, columns
):
228 """CreateTable(table, columns) - Create a new table in the database.
230 raises TableDBError if it already exists or for other DB errors.
232 assert isinstance(columns
, ListType
)
235 # checking sanity of the table and column names here on
236 # table creation will prevent problems elsewhere.
237 if contains_metastrings(table
):
239 "bad table name: contains reserved metastrings")
240 for column
in columns
:
241 if contains_metastrings(column
):
243 "bad column name: contains reserved metastrings")
245 columnlist_key
= _columns_key(table
)
246 if self
.db
.has_key(columnlist_key
):
247 raise TableAlreadyExists
, "table already exists"
249 txn
= self
.env
.txn_begin()
250 # store the table's column info
251 self
.db
.put(columnlist_key
, pickle
.dumps(columns
, 1), txn
=txn
)
253 # add the table name to the tablelist
254 tablelist
= pickle
.loads(self
.db
.get(_table_names_key
, txn
=txn
,
256 tablelist
.append(table
)
257 # delete 1st, in case we opened with DB_DUP
258 self
.db
.delete(_table_names_key
, txn
)
259 self
.db
.put(_table_names_key
, pickle
.dumps(tablelist
, 1), txn
=txn
)
263 except DBError
, dberror
:
266 raise TableDBError
, dberror
[1]
269 def ListTableColumns(self
, table
):
270 """Return a list of columns in the given table.
271 [] if the table doesn't exist.
273 assert isinstance(table
, StringType
)
274 if contains_metastrings(table
):
275 raise ValueError, "bad table name: contains reserved metastrings"
277 columnlist_key
= _columns_key(table
)
278 if not self
.db
.has_key(columnlist_key
):
280 pickledcolumnlist
= self
.db
.get(columnlist_key
)
281 if pickledcolumnlist
:
282 return pickle
.loads(pickledcolumnlist
)
286 def ListTables(self
):
287 """Return a list of tables in this database."""
288 pickledtablelist
= self
.db
.get(_table_names_key
)
290 return pickle
.loads(pickledtablelist
)
294 def CreateOrExtendTable(self
, table
, columns
):
295 """CreateOrExtendTable(table, columns)
297 Create a new table in the database.
299 If a table of this name already exists, extend it to have any
300 additional columns present in the given list as well as
301 all of its current columns.
303 assert isinstance(columns
, ListType
)
305 self
.CreateTable(table
, columns
)
306 except TableAlreadyExists
:
307 # the table already existed, add any new columns
310 columnlist_key
= _columns_key(table
)
311 txn
= self
.env
.txn_begin()
313 # load the current column list
314 oldcolumnlist
= pickle
.loads(
315 self
.db
.get(columnlist_key
, txn
=txn
, flags
=DB_RMW
))
316 # create a hash table for fast lookups of column names in the
319 for c
in oldcolumnlist
:
322 # create a new column list containing both the old and new
324 newcolumnlist
= copy
.copy(oldcolumnlist
)
326 if not oldcolumnhash
.has_key(c
):
327 newcolumnlist
.append(c
)
329 # store the table's new extended column list
330 if newcolumnlist
!= oldcolumnlist
:
331 # delete the old one first since we opened with DB_DUP
332 self
.db
.delete(columnlist_key
, txn
)
333 self
.db
.put(columnlist_key
,
334 pickle
.dumps(newcolumnlist
, 1),
340 self
.__load
_column
_info
(table
)
341 except DBError
, dberror
:
344 raise TableDBError
, dberror
[1]
347 def __load_column_info(self
, table
) :
348 """initialize the self.__tablecolumns dict"""
349 # check the column names
351 tcolpickles
= self
.db
.get(_columns_key(table
))
352 except DBNotFoundError
:
353 raise TableDBError
, "unknown table: %r" % (table
,)
355 raise TableDBError
, "unknown table: %r" % (table
,)
356 self
.__tablecolumns
[table
] = pickle
.loads(tcolpickles
)
358 def __new_rowid(self
, table
, txn
) :
359 """Create a new unique row identifier"""
362 # Generate a random 64-bit row ID string
363 # (note: this code has <64 bits of randomness
364 # but it's plenty for our database id needs!)
366 p
.pack_int(int(random
.random()*2147483647))
367 p
.pack_int(int(random
.random()*2147483647))
368 newid
= p
.get_buffer()
370 # Guarantee uniqueness by adding this key to the database
372 self
.db
.put(_rowid_key(table
, newid
), None, txn
=txn
,
373 flags
=DB_NOOVERWRITE
)
374 except DBKeyExistError
:
382 def Insert(self
, table
, rowdict
) :
383 """Insert(table, datadict) - Insert a new row into the table
384 using the keys+values from rowdict as the column values.
388 if not self
.db
.has_key(_columns_key(table
)):
389 raise TableDBError
, "unknown table"
391 # check the validity of each column name
392 if not self
.__tablecolumns
.has_key(table
):
393 self
.__load
_column
_info
(table
)
394 for column
in rowdict
.keys() :
395 if not self
.__tablecolumns
[table
].count(column
):
396 raise TableDBError
, "unknown column: %r" % (column
,)
398 # get a unique row identifier for this row
399 txn
= self
.env
.txn_begin()
400 rowid
= self
.__new
_rowid
(table
, txn
=txn
)
402 # insert the row values into the table database
403 for column
, dataitem
in rowdict
.items():
405 self
.db
.put(_data_key(table
, column
, rowid
), dataitem
, txn
=txn
)
410 except DBError
, dberror
:
411 # WIBNI we could just abort the txn and re-raise the exception?
412 # But no, because TableDBError is not related to DBError via
413 # inheritance, so it would be backwards incompatible. Do the next
415 info
= sys
.exc_info()
418 self
.db
.delete(_rowid_key(table
, rowid
))
419 raise TableDBError
, dberror
[1], info
[2]
422 def Modify(self
, table
, conditions
={}, mappings
={}):
423 """Modify(table, conditions={}, mappings={}) - Modify items in rows matching 'conditions' using mapping functions in 'mappings'
425 * table - the table name
426 * conditions - a dictionary keyed on column names containing
427 a condition callable expecting the data string as an
428 argument and returning a boolean.
429 * mappings - a dictionary keyed on column names containing a
430 condition callable expecting the data string as an argument and
431 returning the new string for that column.
434 matching_rowids
= self
.__Select
(table
, [], conditions
)
436 # modify only requested columns
437 columns
= mappings
.keys()
438 for rowid
in matching_rowids
.keys():
441 for column
in columns
:
442 txn
= self
.env
.txn_begin()
443 # modify the requested column
445 dataitem
= self
.db
.get(
446 _data_key(table
, column
, rowid
),
449 _data_key(table
, column
, rowid
),
451 except DBNotFoundError
:
452 # XXXXXXX row key somehow didn't exist, assume no
455 dataitem
= mappings
[column
](dataitem
)
458 _data_key(table
, column
, rowid
),
463 # catch all exceptions here since we call unknown callables
469 except DBError
, dberror
:
470 raise TableDBError
, dberror
[1]
472 def Delete(self
, table
, conditions
={}):
473 """Delete(table, conditions) - Delete items matching the given
474 conditions from the table.
476 * conditions - a dictionary keyed on column names containing
477 condition functions expecting the data string as an
478 argument and returning a boolean.
481 matching_rowids
= self
.__Select
(table
, [], conditions
)
483 # delete row data from all columns
484 columns
= self
.__tablecolumns
[table
]
485 for rowid
in matching_rowids
.keys():
488 txn
= self
.env
.txn_begin()
489 for column
in columns
:
490 # delete the data key
492 self
.db
.delete(_data_key(table
, column
, rowid
),
494 except DBNotFoundError
:
495 # XXXXXXX column may not exist, assume no error
499 self
.db
.delete(_rowid_key(table
, rowid
), txn
)
500 except DBNotFoundError
:
501 # XXXXXXX row key somehow didn't exist, assume no error
505 except DBError
, dberror
:
509 except DBError
, dberror
:
510 raise TableDBError
, dberror
[1]
513 def Select(self
, table
, columns
, conditions
={}):
514 """Select(table, columns, conditions) - retrieve specific row data
515 Returns a list of row column->value mapping dictionaries.
517 * columns - a list of which column data to return. If
518 columns is None, all columns will be returned.
519 * conditions - a dictionary keyed on column names
520 containing callable conditions expecting the data string as an
521 argument and returning a boolean.
524 if not self
.__tablecolumns
.has_key(table
):
525 self
.__load
_column
_info
(table
)
527 columns
= self
.__tablecolumns
[table
]
528 matching_rowids
= self
.__Select
(table
, columns
, conditions
)
529 except DBError
, dberror
:
530 raise TableDBError
, dberror
[1]
531 # return the matches as a list of dictionaries
532 return matching_rowids
.values()
535 def __Select(self
, table
, columns
, conditions
):
536 """__Select() - Used to implement Select and Delete (above)
537 Returns a dictionary keyed on rowids containing dicts
538 holding the row data for columns listed in the columns param
539 that match the given conditions.
540 * conditions is a dictionary keyed on column names
541 containing callable conditions expecting the data string as an
542 argument and returning a boolean.
544 # check the validity of each column name
545 if not self
.__tablecolumns
.has_key(table
):
546 self
.__load
_column
_info
(table
)
548 columns
= self
.tablecolumns
[table
]
549 for column
in (columns
+ conditions
.keys()):
550 if not self
.__tablecolumns
[table
].count(column
):
551 raise TableDBError
, "unknown column: %r" % (column
,)
553 # keyed on rows that match so far, containings dicts keyed on
554 # column names containing the data for that row and column.
556 # keys are rowids that do not match
559 # attempt to sort the conditions in such a way as to minimize full
561 def cmp_conditions(atuple
, btuple
):
564 if type(a
) is type(b
):
565 if isinstance(a
, PrefixCond
) and isinstance(b
, PrefixCond
):
566 # longest prefix first
567 return cmp(len(b
.prefix
), len(a
.prefix
))
568 if isinstance(a
, LikeCond
) and isinstance(b
, LikeCond
):
569 # longest likestr first
570 return cmp(len(b
.likestr
), len(a
.likestr
))
572 if isinstance(a
, ExactCond
):
574 if isinstance(b
, ExactCond
):
576 if isinstance(a
, PrefixCond
):
578 if isinstance(b
, PrefixCond
):
580 # leave all unknown condition callables alone as equals
583 conditionlist
= conditions
.items()
584 conditionlist
.sort(cmp_conditions
)
586 # Apply conditions to column data to find what we want
587 cur
= self
.db
.cursor()
589 for column
, condition
in conditionlist
:
590 column_num
= column_num
+ 1
591 searchkey
= _search_col_data_key(table
, column
)
592 # speedup: don't linear search columns within loop
593 if column
in columns
:
594 savethiscolumndata
= 1 # save the data for return
596 savethiscolumndata
= 0 # data only used for selection
599 key
, data
= cur
.set_range(searchkey
)
600 while key
[:len(searchkey
)] == searchkey
:
601 # extract the rowid from the key
602 rowid
= key
[-_rowid_str_len
:]
604 if not rejected_rowids
.has_key(rowid
):
605 # if no condition was specified or the condition
606 # succeeds, add row to our match list.
607 if not condition
or condition(data
):
608 if not matching_rowids
.has_key(rowid
):
609 matching_rowids
[rowid
] = {}
610 if savethiscolumndata
:
611 matching_rowids
[rowid
][column
] = data
613 if matching_rowids
.has_key(rowid
):
614 del matching_rowids
[rowid
]
615 rejected_rowids
[rowid
] = rowid
617 key
, data
= cur
.next()
619 except DBError
, dberror
:
620 if dberror
[0] != DB_NOTFOUND
:
626 # we're done selecting rows, garbage collect the reject list
629 # extract any remaining desired column data from the
630 # database for the matching rows.
632 for rowid
, rowdata
in matching_rowids
.items():
633 for column
in columns
:
634 if rowdata
.has_key(column
):
637 rowdata
[column
] = self
.db
.get(
638 _data_key(table
, column
, rowid
))
639 except DBError
, dberror
:
640 if dberror
[0] != DB_NOTFOUND
:
642 rowdata
[column
] = None
645 return matching_rowids
648 def Drop(self
, table
):
649 """Remove an entire table from the database"""
652 txn
= self
.env
.txn_begin()
654 # delete the column list
655 self
.db
.delete(_columns_key(table
), txn
)
657 cur
= self
.db
.cursor(txn
)
659 # delete all keys containing this tables column and row info
660 table_key
= _search_all_data_key(table
)
663 key
, data
= cur
.set_range(table_key
)
664 except DBNotFoundError
:
666 # only delete items in this table
667 if key
[:len(table_key
)] != table_key
:
671 # delete all rowids used by this table
672 table_key
= _search_rowid_key(table
)
675 key
, data
= cur
.set_range(table_key
)
676 except DBNotFoundError
:
678 # only delete items in this table
679 if key
[:len(table_key
)] != table_key
:
685 # delete the tablename from the table name list
686 tablelist
= pickle
.loads(
687 self
.db
.get(_table_names_key
, txn
=txn
, flags
=DB_RMW
))
689 tablelist
.remove(table
)
691 # hmm, it wasn't there, oh well, that's what we want.
693 # delete 1st, incase we opened with DB_DUP
694 self
.db
.delete(_table_names_key
, txn
)
695 self
.db
.put(_table_names_key
, pickle
.dumps(tablelist
, 1), txn
=txn
)
700 if self
.__tablecolumns
.has_key(table
):
701 del self
.__tablecolumns
[table
]
703 except DBError
, dberror
:
706 raise TableDBError
, dberror
[1]