Tweak wording
[pytest.git] / Lib / bsddb / dbtables.py
blob492d5fdfac29c9f6c1702ab7c7b46e032e98022d
1 #-----------------------------------------------------------------------
3 # Copyright (C) 2000, 2001 by Autonomous Zone Industries
4 # Copyright (C) 2002 Gregory P. Smith
6 # License: This is free software. You may use this software for any
7 # purpose including modification/redistribution, so long as
8 # this header remains intact and that you do not claim any
9 # rights of ownership or authorship of this software. This
10 # software has been tested, but no warranty is expressed or
11 # implied.
13 # -- Gregory P. Smith <greg@electricrain.com>
15 # This provides a simple database table interface built on top of
16 # the Python BerkeleyDB 3 interface.
18 _cvsid = '$Id$'
20 import re
21 import sys
22 import copy
23 import xdrlib
24 import random
25 from types import ListType, StringType
26 import cPickle as pickle
28 try:
29 # For Pythons w/distutils pybsddb
30 from bsddb3.db import *
31 except ImportError:
32 # For Python 2.3
33 from bsddb.db import *
35 # XXX(nnorwitz): is this correct? DBIncompleteError is conditional in _bsddb.c
36 try:
37 DBIncompleteError
38 except NameError:
39 class DBIncompleteError(Exception):
40 pass
42 class TableDBError(StandardError):
43 pass
44 class TableAlreadyExists(TableDBError):
45 pass
48 class Cond:
49 """This condition matches everything"""
50 def __call__(self, s):
51 return 1
53 class ExactCond(Cond):
54 """Acts as an exact match condition function"""
55 def __init__(self, strtomatch):
56 self.strtomatch = strtomatch
57 def __call__(self, s):
58 return s == self.strtomatch
60 class PrefixCond(Cond):
61 """Acts as a condition function for matching a string prefix"""
62 def __init__(self, prefix):
63 self.prefix = prefix
64 def __call__(self, s):
65 return s[:len(self.prefix)] == self.prefix
67 class PostfixCond(Cond):
68 """Acts as a condition function for matching a string postfix"""
69 def __init__(self, postfix):
70 self.postfix = postfix
71 def __call__(self, s):
72 return s[-len(self.postfix):] == self.postfix
74 class LikeCond(Cond):
75 """
76 Acts as a function that will match using an SQL 'LIKE' style
77 string. Case insensitive and % signs are wild cards.
78 This isn't perfect but it should work for the simple common cases.
79 """
80 def __init__(self, likestr, re_flags=re.IGNORECASE):
81 # escape python re characters
82 chars_to_escape = '.*+()[]?'
83 for char in chars_to_escape :
84 likestr = likestr.replace(char, '\\'+char)
85 # convert %s to wildcards
86 self.likestr = likestr.replace('%', '.*')
87 self.re = re.compile('^'+self.likestr+'$', re_flags)
88 def __call__(self, s):
89 return self.re.match(s)
92 # keys used to store database metadata
94 _table_names_key = '__TABLE_NAMES__' # list of the tables in this db
95 _columns = '._COLUMNS__' # table_name+this key contains a list of columns
97 def _columns_key(table):
98 return table + _columns
101 # these keys are found within table sub databases
103 _data = '._DATA_.' # this+column+this+rowid key contains table data
104 _rowid = '._ROWID_.' # this+rowid+this key contains a unique entry for each
105 # row in the table. (no data is stored)
106 _rowid_str_len = 8 # length in bytes of the unique rowid strings
108 def _data_key(table, col, rowid):
109 return table + _data + col + _data + rowid
111 def _search_col_data_key(table, col):
112 return table + _data + col + _data
114 def _search_all_data_key(table):
115 return table + _data
117 def _rowid_key(table, rowid):
118 return table + _rowid + rowid + _rowid
120 def _search_rowid_key(table):
121 return table + _rowid
123 def contains_metastrings(s) :
124 """Verify that the given string does not contain any
125 metadata strings that might interfere with dbtables database operation.
127 if (s.find(_table_names_key) >= 0 or
128 s.find(_columns) >= 0 or
129 s.find(_data) >= 0 or
130 s.find(_rowid) >= 0):
131 # Then
132 return 1
133 else:
134 return 0
137 class bsdTableDB :
138 def __init__(self, filename, dbhome, create=0, truncate=0, mode=0600,
139 recover=0, dbflags=0):
140 """bsdTableDB(filename, dbhome, create=0, truncate=0, mode=0600)
142 Open database name in the dbhome BerkeleyDB directory.
143 Use keyword arguments when calling this constructor.
145 self.db = None
146 myflags = DB_THREAD
147 if create:
148 myflags |= DB_CREATE
149 flagsforenv = (DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_LOG |
150 DB_INIT_TXN | dbflags)
151 # DB_AUTO_COMMIT isn't a valid flag for env.open()
152 try:
153 dbflags |= DB_AUTO_COMMIT
154 except AttributeError:
155 pass
156 if recover:
157 flagsforenv = flagsforenv | DB_RECOVER
158 self.env = DBEnv()
159 # enable auto deadlock avoidance
160 self.env.set_lk_detect(DB_LOCK_DEFAULT)
161 self.env.open(dbhome, myflags | flagsforenv)
162 if truncate:
163 myflags |= DB_TRUNCATE
164 self.db = DB(self.env)
165 # this code relies on DBCursor.set* methods to raise exceptions
166 # rather than returning None
167 self.db.set_get_returns_none(1)
168 # allow duplicate entries [warning: be careful w/ metadata]
169 self.db.set_flags(DB_DUP)
170 self.db.open(filename, DB_BTREE, dbflags | myflags, mode)
171 self.dbfilename = filename
172 # Initialize the table names list if this is a new database
173 txn = self.env.txn_begin()
174 try:
175 if not self.db.has_key(_table_names_key, txn):
176 self.db.put(_table_names_key, pickle.dumps([], 1), txn=txn)
177 # Yes, bare except
178 except:
179 txn.abort()
180 raise
181 else:
182 txn.commit()
183 # TODO verify more of the database's metadata?
184 self.__tablecolumns = {}
186 def __del__(self):
187 self.close()
189 def close(self):
190 if self.db is not None:
191 self.db.close()
192 self.db = None
193 if self.env is not None:
194 self.env.close()
195 self.env = None
197 def checkpoint(self, mins=0):
198 try:
199 self.env.txn_checkpoint(mins)
200 except DBIncompleteError:
201 pass
203 def sync(self):
204 try:
205 self.db.sync()
206 except DBIncompleteError:
207 pass
209 def _db_print(self) :
210 """Print the database to stdout for debugging"""
211 print "******** Printing raw database for debugging ********"
212 cur = self.db.cursor()
213 try:
214 key, data = cur.first()
215 while 1:
216 print repr({key: data})
217 next = cur.next()
218 if next:
219 key, data = next
220 else:
221 cur.close()
222 return
223 except DBNotFoundError:
224 cur.close()
227 def CreateTable(self, table, columns):
228 """CreateTable(table, columns) - Create a new table in the database.
230 raises TableDBError if it already exists or for other DB errors.
232 assert isinstance(columns, ListType)
233 txn = None
234 try:
235 # checking sanity of the table and column names here on
236 # table creation will prevent problems elsewhere.
237 if contains_metastrings(table):
238 raise ValueError(
239 "bad table name: contains reserved metastrings")
240 for column in columns :
241 if contains_metastrings(column):
242 raise ValueError(
243 "bad column name: contains reserved metastrings")
245 columnlist_key = _columns_key(table)
246 if self.db.has_key(columnlist_key):
247 raise TableAlreadyExists, "table already exists"
249 txn = self.env.txn_begin()
250 # store the table's column info
251 self.db.put(columnlist_key, pickle.dumps(columns, 1), txn=txn)
253 # add the table name to the tablelist
254 tablelist = pickle.loads(self.db.get(_table_names_key, txn=txn,
255 flags=DB_RMW))
256 tablelist.append(table)
257 # delete 1st, in case we opened with DB_DUP
258 self.db.delete(_table_names_key, txn)
259 self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn)
261 txn.commit()
262 txn = None
263 except DBError, dberror:
264 if txn:
265 txn.abort()
266 raise TableDBError, dberror[1]
269 def ListTableColumns(self, table):
270 """Return a list of columns in the given table.
271 [] if the table doesn't exist.
273 assert isinstance(table, StringType)
274 if contains_metastrings(table):
275 raise ValueError, "bad table name: contains reserved metastrings"
277 columnlist_key = _columns_key(table)
278 if not self.db.has_key(columnlist_key):
279 return []
280 pickledcolumnlist = self.db.get(columnlist_key)
281 if pickledcolumnlist:
282 return pickle.loads(pickledcolumnlist)
283 else:
284 return []
286 def ListTables(self):
287 """Return a list of tables in this database."""
288 pickledtablelist = self.db.get(_table_names_key)
289 if pickledtablelist:
290 return pickle.loads(pickledtablelist)
291 else:
292 return []
294 def CreateOrExtendTable(self, table, columns):
295 """CreateOrExtendTable(table, columns)
297 Create a new table in the database.
299 If a table of this name already exists, extend it to have any
300 additional columns present in the given list as well as
301 all of its current columns.
303 assert isinstance(columns, ListType)
304 try:
305 self.CreateTable(table, columns)
306 except TableAlreadyExists:
307 # the table already existed, add any new columns
308 txn = None
309 try:
310 columnlist_key = _columns_key(table)
311 txn = self.env.txn_begin()
313 # load the current column list
314 oldcolumnlist = pickle.loads(
315 self.db.get(columnlist_key, txn=txn, flags=DB_RMW))
316 # create a hash table for fast lookups of column names in the
317 # loop below
318 oldcolumnhash = {}
319 for c in oldcolumnlist:
320 oldcolumnhash[c] = c
322 # create a new column list containing both the old and new
323 # column names
324 newcolumnlist = copy.copy(oldcolumnlist)
325 for c in columns:
326 if not oldcolumnhash.has_key(c):
327 newcolumnlist.append(c)
329 # store the table's new extended column list
330 if newcolumnlist != oldcolumnlist :
331 # delete the old one first since we opened with DB_DUP
332 self.db.delete(columnlist_key, txn)
333 self.db.put(columnlist_key,
334 pickle.dumps(newcolumnlist, 1),
335 txn=txn)
337 txn.commit()
338 txn = None
340 self.__load_column_info(table)
341 except DBError, dberror:
342 if txn:
343 txn.abort()
344 raise TableDBError, dberror[1]
347 def __load_column_info(self, table) :
348 """initialize the self.__tablecolumns dict"""
349 # check the column names
350 try:
351 tcolpickles = self.db.get(_columns_key(table))
352 except DBNotFoundError:
353 raise TableDBError, "unknown table: %r" % (table,)
354 if not tcolpickles:
355 raise TableDBError, "unknown table: %r" % (table,)
356 self.__tablecolumns[table] = pickle.loads(tcolpickles)
358 def __new_rowid(self, table, txn) :
359 """Create a new unique row identifier"""
360 unique = 0
361 while not unique:
362 # Generate a random 64-bit row ID string
363 # (note: this code has <64 bits of randomness
364 # but it's plenty for our database id needs!)
365 p = xdrlib.Packer()
366 p.pack_int(int(random.random()*2147483647))
367 p.pack_int(int(random.random()*2147483647))
368 newid = p.get_buffer()
370 # Guarantee uniqueness by adding this key to the database
371 try:
372 self.db.put(_rowid_key(table, newid), None, txn=txn,
373 flags=DB_NOOVERWRITE)
374 except DBKeyExistError:
375 pass
376 else:
377 unique = 1
379 return newid
382 def Insert(self, table, rowdict) :
383 """Insert(table, datadict) - Insert a new row into the table
384 using the keys+values from rowdict as the column values.
386 txn = None
387 try:
388 if not self.db.has_key(_columns_key(table)):
389 raise TableDBError, "unknown table"
391 # check the validity of each column name
392 if not self.__tablecolumns.has_key(table):
393 self.__load_column_info(table)
394 for column in rowdict.keys() :
395 if not self.__tablecolumns[table].count(column):
396 raise TableDBError, "unknown column: %r" % (column,)
398 # get a unique row identifier for this row
399 txn = self.env.txn_begin()
400 rowid = self.__new_rowid(table, txn=txn)
402 # insert the row values into the table database
403 for column, dataitem in rowdict.items():
404 # store the value
405 self.db.put(_data_key(table, column, rowid), dataitem, txn=txn)
407 txn.commit()
408 txn = None
410 except DBError, dberror:
411 # WIBNI we could just abort the txn and re-raise the exception?
412 # But no, because TableDBError is not related to DBError via
413 # inheritance, so it would be backwards incompatible. Do the next
414 # best thing.
415 info = sys.exc_info()
416 if txn:
417 txn.abort()
418 self.db.delete(_rowid_key(table, rowid))
419 raise TableDBError, dberror[1], info[2]
422 def Modify(self, table, conditions={}, mappings={}):
423 """Modify(table, conditions={}, mappings={}) - Modify items in rows matching 'conditions' using mapping functions in 'mappings'
425 * table - the table name
426 * conditions - a dictionary keyed on column names containing
427 a condition callable expecting the data string as an
428 argument and returning a boolean.
429 * mappings - a dictionary keyed on column names containing a
430 condition callable expecting the data string as an argument and
431 returning the new string for that column.
433 try:
434 matching_rowids = self.__Select(table, [], conditions)
436 # modify only requested columns
437 columns = mappings.keys()
438 for rowid in matching_rowids.keys():
439 txn = None
440 try:
441 for column in columns:
442 txn = self.env.txn_begin()
443 # modify the requested column
444 try:
445 dataitem = self.db.get(
446 _data_key(table, column, rowid),
447 txn)
448 self.db.delete(
449 _data_key(table, column, rowid),
450 txn)
451 except DBNotFoundError:
452 # XXXXXXX row key somehow didn't exist, assume no
453 # error
454 dataitem = None
455 dataitem = mappings[column](dataitem)
456 if dataitem <> None:
457 self.db.put(
458 _data_key(table, column, rowid),
459 dataitem, txn=txn)
460 txn.commit()
461 txn = None
463 # catch all exceptions here since we call unknown callables
464 except:
465 if txn:
466 txn.abort()
467 raise
469 except DBError, dberror:
470 raise TableDBError, dberror[1]
472 def Delete(self, table, conditions={}):
473 """Delete(table, conditions) - Delete items matching the given
474 conditions from the table.
476 * conditions - a dictionary keyed on column names containing
477 condition functions expecting the data string as an
478 argument and returning a boolean.
480 try:
481 matching_rowids = self.__Select(table, [], conditions)
483 # delete row data from all columns
484 columns = self.__tablecolumns[table]
485 for rowid in matching_rowids.keys():
486 txn = None
487 try:
488 txn = self.env.txn_begin()
489 for column in columns:
490 # delete the data key
491 try:
492 self.db.delete(_data_key(table, column, rowid),
493 txn)
494 except DBNotFoundError:
495 # XXXXXXX column may not exist, assume no error
496 pass
498 try:
499 self.db.delete(_rowid_key(table, rowid), txn)
500 except DBNotFoundError:
501 # XXXXXXX row key somehow didn't exist, assume no error
502 pass
503 txn.commit()
504 txn = None
505 except DBError, dberror:
506 if txn:
507 txn.abort()
508 raise
509 except DBError, dberror:
510 raise TableDBError, dberror[1]
513 def Select(self, table, columns, conditions={}):
514 """Select(table, columns, conditions) - retrieve specific row data
515 Returns a list of row column->value mapping dictionaries.
517 * columns - a list of which column data to return. If
518 columns is None, all columns will be returned.
519 * conditions - a dictionary keyed on column names
520 containing callable conditions expecting the data string as an
521 argument and returning a boolean.
523 try:
524 if not self.__tablecolumns.has_key(table):
525 self.__load_column_info(table)
526 if columns is None:
527 columns = self.__tablecolumns[table]
528 matching_rowids = self.__Select(table, columns, conditions)
529 except DBError, dberror:
530 raise TableDBError, dberror[1]
531 # return the matches as a list of dictionaries
532 return matching_rowids.values()
535 def __Select(self, table, columns, conditions):
536 """__Select() - Used to implement Select and Delete (above)
537 Returns a dictionary keyed on rowids containing dicts
538 holding the row data for columns listed in the columns param
539 that match the given conditions.
540 * conditions is a dictionary keyed on column names
541 containing callable conditions expecting the data string as an
542 argument and returning a boolean.
544 # check the validity of each column name
545 if not self.__tablecolumns.has_key(table):
546 self.__load_column_info(table)
547 if columns is None:
548 columns = self.tablecolumns[table]
549 for column in (columns + conditions.keys()):
550 if not self.__tablecolumns[table].count(column):
551 raise TableDBError, "unknown column: %r" % (column,)
553 # keyed on rows that match so far, containings dicts keyed on
554 # column names containing the data for that row and column.
555 matching_rowids = {}
556 # keys are rowids that do not match
557 rejected_rowids = {}
559 # attempt to sort the conditions in such a way as to minimize full
560 # column lookups
561 def cmp_conditions(atuple, btuple):
562 a = atuple[1]
563 b = btuple[1]
564 if type(a) is type(b):
565 if isinstance(a, PrefixCond) and isinstance(b, PrefixCond):
566 # longest prefix first
567 return cmp(len(b.prefix), len(a.prefix))
568 if isinstance(a, LikeCond) and isinstance(b, LikeCond):
569 # longest likestr first
570 return cmp(len(b.likestr), len(a.likestr))
571 return 0
572 if isinstance(a, ExactCond):
573 return -1
574 if isinstance(b, ExactCond):
575 return 1
576 if isinstance(a, PrefixCond):
577 return -1
578 if isinstance(b, PrefixCond):
579 return 1
580 # leave all unknown condition callables alone as equals
581 return 0
583 conditionlist = conditions.items()
584 conditionlist.sort(cmp_conditions)
586 # Apply conditions to column data to find what we want
587 cur = self.db.cursor()
588 column_num = -1
589 for column, condition in conditionlist:
590 column_num = column_num + 1
591 searchkey = _search_col_data_key(table, column)
592 # speedup: don't linear search columns within loop
593 if column in columns:
594 savethiscolumndata = 1 # save the data for return
595 else:
596 savethiscolumndata = 0 # data only used for selection
598 try:
599 key, data = cur.set_range(searchkey)
600 while key[:len(searchkey)] == searchkey:
601 # extract the rowid from the key
602 rowid = key[-_rowid_str_len:]
604 if not rejected_rowids.has_key(rowid):
605 # if no condition was specified or the condition
606 # succeeds, add row to our match list.
607 if not condition or condition(data):
608 if not matching_rowids.has_key(rowid):
609 matching_rowids[rowid] = {}
610 if savethiscolumndata:
611 matching_rowids[rowid][column] = data
612 else:
613 if matching_rowids.has_key(rowid):
614 del matching_rowids[rowid]
615 rejected_rowids[rowid] = rowid
617 key, data = cur.next()
619 except DBError, dberror:
620 if dberror[0] != DB_NOTFOUND:
621 raise
622 continue
624 cur.close()
626 # we're done selecting rows, garbage collect the reject list
627 del rejected_rowids
629 # extract any remaining desired column data from the
630 # database for the matching rows.
631 if len(columns) > 0:
632 for rowid, rowdata in matching_rowids.items():
633 for column in columns:
634 if rowdata.has_key(column):
635 continue
636 try:
637 rowdata[column] = self.db.get(
638 _data_key(table, column, rowid))
639 except DBError, dberror:
640 if dberror[0] != DB_NOTFOUND:
641 raise
642 rowdata[column] = None
644 # return the matches
645 return matching_rowids
648 def Drop(self, table):
649 """Remove an entire table from the database"""
650 txn = None
651 try:
652 txn = self.env.txn_begin()
654 # delete the column list
655 self.db.delete(_columns_key(table), txn)
657 cur = self.db.cursor(txn)
659 # delete all keys containing this tables column and row info
660 table_key = _search_all_data_key(table)
661 while 1:
662 try:
663 key, data = cur.set_range(table_key)
664 except DBNotFoundError:
665 break
666 # only delete items in this table
667 if key[:len(table_key)] != table_key:
668 break
669 cur.delete()
671 # delete all rowids used by this table
672 table_key = _search_rowid_key(table)
673 while 1:
674 try:
675 key, data = cur.set_range(table_key)
676 except DBNotFoundError:
677 break
678 # only delete items in this table
679 if key[:len(table_key)] != table_key:
680 break
681 cur.delete()
683 cur.close()
685 # delete the tablename from the table name list
686 tablelist = pickle.loads(
687 self.db.get(_table_names_key, txn=txn, flags=DB_RMW))
688 try:
689 tablelist.remove(table)
690 except ValueError:
691 # hmm, it wasn't there, oh well, that's what we want.
692 pass
693 # delete 1st, incase we opened with DB_DUP
694 self.db.delete(_table_names_key, txn)
695 self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn)
697 txn.commit()
698 txn = None
700 if self.__tablecolumns.has_key(table):
701 del self.__tablecolumns[table]
703 except DBError, dberror:
704 if txn:
705 txn.abort()
706 raise TableDBError, dberror[1]