1 #-----------------------------------------------------------------------
3 # Copyright (C) 2000, 2001 by Autonomous Zone Industries
4 # Copyright (C) 2002 Gregory P. Smith
6 # License: This is free software. You may use this software for any
7 # purpose including modification/redistribution, so long as
8 # this header remains intact and that you do not claim any
9 # rights of ownership or authorship of this software. This
10 # software has been tested, but no warranty is expressed or
13 # -- Gregory P. Smith <greg@krypto.org>
15 # This provides a simple database table interface built on top of
16 # the Python Berkeley DB 3 interface.
27 if sys
.version_info
[0] >= 3 :
30 if sys
.version_info
< (2, 6) :
31 import cPickle
as pickle
33 # When we drop support for python 2.4
34 # we could use: (in 2.5 we need a __future__ statement)
36 # with warnings.catch_warnings():
37 # warnings.filterwarnings(...)
40 # We can not use "with" as is, because it would be invalid syntax
41 # in python 2.4 and (with no __future__) 2.5.
42 # Here we simulate "with" following PEP 343 :
44 w
= warnings
.catch_warnings()
47 warnings
.filterwarnings('ignore',
48 message
='the cPickle module has been removed in Python 3.0',
49 category
=DeprecationWarning)
50 import cPickle
as pickle
56 # For Pythons w/distutils pybsddb
62 class TableDBError(StandardError):
64 class TableAlreadyExists(TableDBError
):
69 """This condition matches everything"""
70 def __call__(self
, s
):
73 class ExactCond(Cond
):
74 """Acts as an exact match condition function"""
75 def __init__(self
, strtomatch
):
76 self
.strtomatch
= strtomatch
77 def __call__(self
, s
):
78 return s
== self
.strtomatch
80 class PrefixCond(Cond
):
81 """Acts as a condition function for matching a string prefix"""
82 def __init__(self
, prefix
):
84 def __call__(self
, s
):
85 return s
[:len(self
.prefix
)] == self
.prefix
87 class PostfixCond(Cond
):
88 """Acts as a condition function for matching a string postfix"""
89 def __init__(self
, postfix
):
90 self
.postfix
= postfix
91 def __call__(self
, s
):
92 return s
[-len(self
.postfix
):] == self
.postfix
96 Acts as a function that will match using an SQL 'LIKE' style
97 string. Case insensitive and % signs are wild cards.
98 This isn't perfect but it should work for the simple common cases.
100 def __init__(self
, likestr
, re_flags
=re
.IGNORECASE
):
101 # escape python re characters
102 chars_to_escape
= '.*+()[]?'
103 for char
in chars_to_escape
:
104 likestr
= likestr
.replace(char
, '\\'+char
)
105 # convert %s to wildcards
106 self
.likestr
= likestr
.replace('%', '.*')
107 self
.re
= re
.compile('^'+self
.likestr
+'$', re_flags
)
108 def __call__(self
, s
):
109 return self
.re
.match(s
)
112 # keys used to store database metadata
114 _table_names_key
= '__TABLE_NAMES__' # list of the tables in this db
115 _columns
= '._COLUMNS__' # table_name+this key contains a list of columns
117 def _columns_key(table
):
118 return table
+ _columns
121 # these keys are found within table sub databases
123 _data
= '._DATA_.' # this+column+this+rowid key contains table data
124 _rowid
= '._ROWID_.' # this+rowid+this key contains a unique entry for each
125 # row in the table. (no data is stored)
126 _rowid_str_len
= 8 # length in bytes of the unique rowid strings
129 def _data_key(table
, col
, rowid
):
130 return table
+ _data
+ col
+ _data
+ rowid
132 def _search_col_data_key(table
, col
):
133 return table
+ _data
+ col
+ _data
135 def _search_all_data_key(table
):
138 def _rowid_key(table
, rowid
):
139 return table
+ _rowid
+ rowid
+ _rowid
141 def _search_rowid_key(table
):
142 return table
+ _rowid
144 def contains_metastrings(s
) :
145 """Verify that the given string does not contain any
146 metadata strings that might interfere with dbtables database operation.
148 if (s
.find(_table_names_key
) >= 0 or
149 s
.find(_columns
) >= 0 or
150 s
.find(_data
) >= 0 or
151 s
.find(_rowid
) >= 0):
159 def __init__(self
, filename
, dbhome
, create
=0, truncate
=0, mode
=0600,
160 recover
=0, dbflags
=0):
161 """bsdTableDB(filename, dbhome, create=0, truncate=0, mode=0600)
163 Open database name in the dbhome Berkeley DB directory.
164 Use keyword arguments when calling this constructor.
167 myflags
= db
.DB_THREAD
169 myflags |
= db
.DB_CREATE
170 flagsforenv
= (db
.DB_INIT_MPOOL | db
.DB_INIT_LOCK | db
.DB_INIT_LOG |
171 db
.DB_INIT_TXN | dbflags
)
172 # DB_AUTO_COMMIT isn't a valid flag for env.open()
174 dbflags |
= db
.DB_AUTO_COMMIT
175 except AttributeError:
178 flagsforenv
= flagsforenv | db
.DB_RECOVER
179 self
.env
= db
.DBEnv()
180 # enable auto deadlock avoidance
181 self
.env
.set_lk_detect(db
.DB_LOCK_DEFAULT
)
182 self
.env
.open(dbhome
, myflags | flagsforenv
)
184 myflags |
= db
.DB_TRUNCATE
185 self
.db
= db
.DB(self
.env
)
186 # this code relies on DBCursor.set* methods to raise exceptions
187 # rather than returning None
188 self
.db
.set_get_returns_none(1)
189 # allow duplicate entries [warning: be careful w/ metadata]
190 self
.db
.set_flags(db
.DB_DUP
)
191 self
.db
.open(filename
, db
.DB_BTREE
, dbflags | myflags
, mode
)
192 self
.dbfilename
= filename
194 if sys
.version_info
[0] >= 3 :
195 class cursor_py3k(object) :
196 def __init__(self
, dbcursor
) :
197 self
._dbcursor
= dbcursor
200 return self
._dbcursor
.close()
202 def set_range(self
, search
) :
203 v
= self
._dbcursor
.set_range(bytes(search
, "iso8859-1"))
205 v
= (v
[0].decode("iso8859-1"),
206 v
[1].decode("iso8859-1"))
210 v
= getattr(self
._dbcursor
, "next")()
212 v
= (v
[0].decode("iso8859-1"),
213 v
[1].decode("iso8859-1"))
216 class db_py3k(object) :
217 def __init__(self
, db
) :
220 def cursor(self
, txn
=None) :
221 return cursor_py3k(self
._db
.cursor(txn
=txn
))
223 def has_key(self
, key
, txn
=None) :
224 return getattr(self
._db
,"has_key")(bytes(key
, "iso8859-1"),
227 def put(self
, key
, value
, flags
=0, txn
=None) :
228 key
= bytes(key
, "iso8859-1")
229 if value
is not None :
230 value
= bytes(value
, "iso8859-1")
231 return self
._db
.put(key
, value
, flags
=flags
, txn
=txn
)
233 def put_bytes(self
, key
, value
, txn
=None) :
234 key
= bytes(key
, "iso8859-1")
235 return self
._db
.put(key
, value
, txn
=txn
)
237 def get(self
, key
, txn
=None, flags
=0) :
238 key
= bytes(key
, "iso8859-1")
239 v
= self
._db
.get(key
, txn
=txn
, flags
=flags
)
241 v
= v
.decode("iso8859-1")
244 def get_bytes(self
, key
, txn
=None, flags
=0) :
245 key
= bytes(key
, "iso8859-1")
246 return self
._db
.get(key
, txn
=txn
, flags
=flags
)
248 def delete(self
, key
, txn
=None) :
249 key
= bytes(key
, "iso8859-1")
250 return self
._db
.delete(key
, txn
=txn
)
253 return self
._db
.close()
255 self
.db
= db_py3k(self
.db
)
259 # Initialize the table names list if this is a new database
260 txn
= self
.env
.txn_begin()
262 if not getattr(self
.db
, "has_key")(_table_names_key
, txn
):
263 getattr(self
.db
, "put_bytes", self
.db
.put
) \
264 (_table_names_key
, pickle
.dumps([], 1), txn
=txn
)
271 # TODO verify more of the database's metadata?
272 self
.__tablecolumns
= {}
278 if self
.db
is not None:
281 if self
.env
is not None:
285 def checkpoint(self
, mins
=0):
286 self
.env
.txn_checkpoint(mins
)
291 def _db_print(self
) :
292 """Print the database to stdout for debugging"""
293 print "******** Printing raw database for debugging ********"
294 cur
= self
.db
.cursor()
296 key
, data
= cur
.first()
298 print repr({key
: data
})
305 except db
.DBNotFoundError
:
309 def CreateTable(self
, table
, columns
):
310 """CreateTable(table, columns) - Create a new table in the database.
312 raises TableDBError if it already exists or for other DB errors.
314 assert isinstance(columns
, list)
318 # checking sanity of the table and column names here on
319 # table creation will prevent problems elsewhere.
320 if contains_metastrings(table
):
322 "bad table name: contains reserved metastrings")
323 for column
in columns
:
324 if contains_metastrings(column
):
326 "bad column name: contains reserved metastrings")
328 columnlist_key
= _columns_key(table
)
329 if getattr(self
.db
, "has_key")(columnlist_key
):
330 raise TableAlreadyExists
, "table already exists"
332 txn
= self
.env
.txn_begin()
333 # store the table's column info
334 getattr(self
.db
, "put_bytes", self
.db
.put
)(columnlist_key
,
335 pickle
.dumps(columns
, 1), txn
=txn
)
337 # add the table name to the tablelist
338 tablelist
= pickle
.loads(getattr(self
.db
, "get_bytes",
339 self
.db
.get
) (_table_names_key
, txn
=txn
, flags
=db
.DB_RMW
))
340 tablelist
.append(table
)
341 # delete 1st, in case we opened with DB_DUP
342 self
.db
.delete(_table_names_key
, txn
=txn
)
343 getattr(self
.db
, "put_bytes", self
.db
.put
)(_table_names_key
,
344 pickle
.dumps(tablelist
, 1), txn
=txn
)
348 except db
.DBError
, dberror
:
351 if sys
.version_info
< (2, 6) :
352 raise TableDBError
, dberror
[1]
354 raise TableDBError
, dberror
.args
[1]
357 def ListTableColumns(self
, table
):
358 """Return a list of columns in the given table.
359 [] if the table doesn't exist.
361 assert isinstance(table
, str)
362 if contains_metastrings(table
):
363 raise ValueError, "bad table name: contains reserved metastrings"
365 columnlist_key
= _columns_key(table
)
366 if not getattr(self
.db
, "has_key")(columnlist_key
):
368 pickledcolumnlist
= getattr(self
.db
, "get_bytes",
369 self
.db
.get
)(columnlist_key
)
370 if pickledcolumnlist
:
371 return pickle
.loads(pickledcolumnlist
)
375 def ListTables(self
):
376 """Return a list of tables in this database."""
377 pickledtablelist
= self
.db
.get_get(_table_names_key
)
379 return pickle
.loads(pickledtablelist
)
383 def CreateOrExtendTable(self
, table
, columns
):
384 """CreateOrExtendTable(table, columns)
386 Create a new table in the database.
388 If a table of this name already exists, extend it to have any
389 additional columns present in the given list as well as
390 all of its current columns.
392 assert isinstance(columns
, list)
395 self
.CreateTable(table
, columns
)
396 except TableAlreadyExists
:
397 # the table already existed, add any new columns
400 columnlist_key
= _columns_key(table
)
401 txn
= self
.env
.txn_begin()
403 # load the current column list
404 oldcolumnlist
= pickle
.loads(
405 getattr(self
.db
, "get_bytes",
406 self
.db
.get
)(columnlist_key
, txn
=txn
, flags
=db
.DB_RMW
))
407 # create a hash table for fast lookups of column names in the
410 for c
in oldcolumnlist
:
413 # create a new column list containing both the old and new
415 newcolumnlist
= copy
.copy(oldcolumnlist
)
417 if not c
in oldcolumnhash
:
418 newcolumnlist
.append(c
)
420 # store the table's new extended column list
421 if newcolumnlist
!= oldcolumnlist
:
422 # delete the old one first since we opened with DB_DUP
423 self
.db
.delete(columnlist_key
, txn
=txn
)
424 getattr(self
.db
, "put_bytes", self
.db
.put
)(columnlist_key
,
425 pickle
.dumps(newcolumnlist
, 1),
431 self
.__load
_column
_info
(table
)
432 except db
.DBError
, dberror
:
435 if sys
.version_info
< (2, 6) :
436 raise TableDBError
, dberror
[1]
438 raise TableDBError
, dberror
.args
[1]
441 def __load_column_info(self
, table
) :
442 """initialize the self.__tablecolumns dict"""
443 # check the column names
445 tcolpickles
= getattr(self
.db
, "get_bytes",
446 self
.db
.get
)(_columns_key(table
))
447 except db
.DBNotFoundError
:
448 raise TableDBError
, "unknown table: %r" % (table
,)
450 raise TableDBError
, "unknown table: %r" % (table
,)
451 self
.__tablecolumns
[table
] = pickle
.loads(tcolpickles
)
453 def __new_rowid(self
, table
, txn
) :
454 """Create a new unique row identifier"""
457 # Generate a random 64-bit row ID string
458 # (note: might have <64 bits of true randomness
459 # but it's plenty for our database id needs!)
461 for x
in xrange(_rowid_str_len
):
462 blist
.append(random
.randint(0,255))
463 newid
= struct
.pack('B'*_rowid_str_len
, *blist
)
465 if sys
.version_info
[0] >= 3 :
466 newid
= newid
.decode("iso8859-1") # 8 bits
468 # Guarantee uniqueness by adding this key to the database
470 self
.db
.put(_rowid_key(table
, newid
), None, txn
=txn
,
471 flags
=db
.DB_NOOVERWRITE
)
472 except db
.DBKeyExistError
:
480 def Insert(self
, table
, rowdict
) :
481 """Insert(table, datadict) - Insert a new row into the table
482 using the keys+values from rowdict as the column values.
487 if not getattr(self
.db
, "has_key")(_columns_key(table
)):
488 raise TableDBError
, "unknown table"
490 # check the validity of each column name
491 if not table
in self
.__tablecolumns
:
492 self
.__load
_column
_info
(table
)
493 for column
in rowdict
.keys() :
494 if not self
.__tablecolumns
[table
].count(column
):
495 raise TableDBError
, "unknown column: %r" % (column
,)
497 # get a unique row identifier for this row
498 txn
= self
.env
.txn_begin()
499 rowid
= self
.__new
_rowid
(table
, txn
=txn
)
501 # insert the row values into the table database
502 for column
, dataitem
in rowdict
.items():
504 self
.db
.put(_data_key(table
, column
, rowid
), dataitem
, txn
=txn
)
509 except db
.DBError
, dberror
:
510 # WIBNI we could just abort the txn and re-raise the exception?
511 # But no, because TableDBError is not related to DBError via
512 # inheritance, so it would be backwards incompatible. Do the next
514 info
= sys
.exc_info()
517 self
.db
.delete(_rowid_key(table
, rowid
))
518 if sys
.version_info
< (2, 6) :
519 raise TableDBError
, dberror
[1], info
[2]
521 raise TableDBError
, dberror
.args
[1], info
[2]
524 def Modify(self
, table
, conditions
={}, mappings
={}):
525 """Modify(table, conditions={}, mappings={}) - Modify items in rows matching 'conditions' using mapping functions in 'mappings'
527 * table - the table name
528 * conditions - a dictionary keyed on column names containing
529 a condition callable expecting the data string as an
530 argument and returning a boolean.
531 * mappings - a dictionary keyed on column names containing a
532 condition callable expecting the data string as an argument and
533 returning the new string for that column.
537 matching_rowids
= self
.__Select
(table
, [], conditions
)
539 # modify only requested columns
540 columns
= mappings
.keys()
541 for rowid
in matching_rowids
.keys():
544 for column
in columns
:
545 txn
= self
.env
.txn_begin()
546 # modify the requested column
548 dataitem
= self
.db
.get(
549 _data_key(table
, column
, rowid
),
552 _data_key(table
, column
, rowid
),
554 except db
.DBNotFoundError
:
555 # XXXXXXX row key somehow didn't exist, assume no
558 dataitem
= mappings
[column
](dataitem
)
559 if dataitem
is not None:
561 _data_key(table
, column
, rowid
),
566 # catch all exceptions here since we call unknown callables
572 except db
.DBError
, dberror
:
573 if sys
.version_info
< (2, 6) :
574 raise TableDBError
, dberror
[1]
576 raise TableDBError
, dberror
.args
[1]
578 def Delete(self
, table
, conditions
={}):
579 """Delete(table, conditions) - Delete items matching the given
580 conditions from the table.
582 * conditions - a dictionary keyed on column names containing
583 condition functions expecting the data string as an
584 argument and returning a boolean.
588 matching_rowids
= self
.__Select
(table
, [], conditions
)
590 # delete row data from all columns
591 columns
= self
.__tablecolumns
[table
]
592 for rowid
in matching_rowids
.keys():
595 txn
= self
.env
.txn_begin()
596 for column
in columns
:
597 # delete the data key
599 self
.db
.delete(_data_key(table
, column
, rowid
),
601 except db
.DBNotFoundError
:
602 # XXXXXXX column may not exist, assume no error
606 self
.db
.delete(_rowid_key(table
, rowid
), txn
=txn
)
607 except db
.DBNotFoundError
:
608 # XXXXXXX row key somehow didn't exist, assume no error
612 except db
.DBError
, dberror
:
616 except db
.DBError
, dberror
:
617 if sys
.version_info
< (2, 6) :
618 raise TableDBError
, dberror
[1]
620 raise TableDBError
, dberror
.args
[1]
623 def Select(self
, table
, columns
, conditions
={}):
624 """Select(table, columns, conditions) - retrieve specific row data
625 Returns a list of row column->value mapping dictionaries.
627 * columns - a list of which column data to return. If
628 columns is None, all columns will be returned.
629 * conditions - a dictionary keyed on column names
630 containing callable conditions expecting the data string as an
631 argument and returning a boolean.
634 if not table
in self
.__tablecolumns
:
635 self
.__load
_column
_info
(table
)
637 columns
= self
.__tablecolumns
[table
]
638 matching_rowids
= self
.__Select
(table
, columns
, conditions
)
639 except db
.DBError
, dberror
:
640 if sys
.version_info
< (2, 6) :
641 raise TableDBError
, dberror
[1]
643 raise TableDBError
, dberror
.args
[1]
644 # return the matches as a list of dictionaries
645 return matching_rowids
.values()
648 def __Select(self
, table
, columns
, conditions
):
649 """__Select() - Used to implement Select and Delete (above)
650 Returns a dictionary keyed on rowids containing dicts
651 holding the row data for columns listed in the columns param
652 that match the given conditions.
653 * conditions is a dictionary keyed on column names
654 containing callable conditions expecting the data string as an
655 argument and returning a boolean.
657 # check the validity of each column name
658 if not table
in self
.__tablecolumns
:
659 self
.__load
_column
_info
(table
)
661 columns
= self
.tablecolumns
[table
]
662 for column
in (columns
+ conditions
.keys()):
663 if not self
.__tablecolumns
[table
].count(column
):
664 raise TableDBError
, "unknown column: %r" % (column
,)
666 # keyed on rows that match so far, containings dicts keyed on
667 # column names containing the data for that row and column.
669 # keys are rowids that do not match
672 # attempt to sort the conditions in such a way as to minimize full
674 def cmp_conditions(atuple
, btuple
):
677 if type(a
) is type(b
):
679 # Needed for python 3. "cmp" vanished in 3.0.1
685 if isinstance(a
, PrefixCond
) and isinstance(b
, PrefixCond
):
686 # longest prefix first
687 return cmp(len(b
.prefix
), len(a
.prefix
))
688 if isinstance(a
, LikeCond
) and isinstance(b
, LikeCond
):
689 # longest likestr first
690 return cmp(len(b
.likestr
), len(a
.likestr
))
692 if isinstance(a
, ExactCond
):
694 if isinstance(b
, ExactCond
):
696 if isinstance(a
, PrefixCond
):
698 if isinstance(b
, PrefixCond
):
700 # leave all unknown condition callables alone as equals
703 if sys
.version_info
< (2, 6) :
704 conditionlist
= conditions
.items()
705 conditionlist
.sort(cmp_conditions
)
706 else : # Insertion Sort. Please, improve
708 for i
in conditions
.items() :
709 for j
, k
in enumerate(conditionlist
) :
710 r
= cmp_conditions(k
, i
)
712 conditionlist
.insert(j
, i
)
715 conditionlist
.append(i
)
717 # Apply conditions to column data to find what we want
718 cur
= self
.db
.cursor()
720 for column
, condition
in conditionlist
:
721 column_num
= column_num
+ 1
722 searchkey
= _search_col_data_key(table
, column
)
723 # speedup: don't linear search columns within loop
724 if column
in columns
:
725 savethiscolumndata
= 1 # save the data for return
727 savethiscolumndata
= 0 # data only used for selection
730 key
, data
= cur
.set_range(searchkey
)
731 while key
[:len(searchkey
)] == searchkey
:
732 # extract the rowid from the key
733 rowid
= key
[-_rowid_str_len
:]
735 if not rowid
in rejected_rowids
:
736 # if no condition was specified or the condition
737 # succeeds, add row to our match list.
738 if not condition
or condition(data
):
739 if not rowid
in matching_rowids
:
740 matching_rowids
[rowid
] = {}
741 if savethiscolumndata
:
742 matching_rowids
[rowid
][column
] = data
744 if rowid
in matching_rowids
:
745 del matching_rowids
[rowid
]
746 rejected_rowids
[rowid
] = rowid
748 key
, data
= cur
.next()
750 except db
.DBError
, dberror
:
751 if dberror
.args
[0] != db
.DB_NOTFOUND
:
757 # we're done selecting rows, garbage collect the reject list
760 # extract any remaining desired column data from the
761 # database for the matching rows.
763 for rowid
, rowdata
in matching_rowids
.items():
764 for column
in columns
:
765 if column
in rowdata
:
768 rowdata
[column
] = self
.db
.get(
769 _data_key(table
, column
, rowid
))
770 except db
.DBError
, dberror
:
771 if sys
.version_info
< (2, 6) :
772 if dberror
[0] != db
.DB_NOTFOUND
:
775 if dberror
.args
[0] != db
.DB_NOTFOUND
:
777 rowdata
[column
] = None
780 return matching_rowids
783 def Drop(self
, table
):
784 """Remove an entire table from the database"""
787 txn
= self
.env
.txn_begin()
789 # delete the column list
790 self
.db
.delete(_columns_key(table
), txn
=txn
)
792 cur
= self
.db
.cursor(txn
)
794 # delete all keys containing this tables column and row info
795 table_key
= _search_all_data_key(table
)
798 key
, data
= cur
.set_range(table_key
)
799 except db
.DBNotFoundError
:
801 # only delete items in this table
802 if key
[:len(table_key
)] != table_key
:
806 # delete all rowids used by this table
807 table_key
= _search_rowid_key(table
)
810 key
, data
= cur
.set_range(table_key
)
811 except db
.DBNotFoundError
:
813 # only delete items in this table
814 if key
[:len(table_key
)] != table_key
:
820 # delete the tablename from the table name list
821 tablelist
= pickle
.loads(
822 getattr(self
.db
, "get_bytes", self
.db
.get
)(_table_names_key
,
823 txn
=txn
, flags
=db
.DB_RMW
))
825 tablelist
.remove(table
)
827 # hmm, it wasn't there, oh well, that's what we want.
829 # delete 1st, incase we opened with DB_DUP
830 self
.db
.delete(_table_names_key
, txn
=txn
)
831 getattr(self
.db
, "put_bytes", self
.db
.put
)(_table_names_key
,
832 pickle
.dumps(tablelist
, 1), txn
=txn
)
837 if table
in self
.__tablecolumns
:
838 del self
.__tablecolumns
[table
]
840 except db
.DBError
, dberror
:
843 raise TableDBError(dberror
.args
[1])