Lib/bsddb/dbtables.py

   1 #-----------------------------------------------------------------------
   2 #
   3 # Copyright (C) 2000, 2001 by Autonomous Zone Industries
   4 # Copyright (C) 2002 Gregory P. Smith
   5 #
   6 # License:      This is free software.  You may use this software for any
   7 #               purpose including modification/redistribution, so long as
   8 #               this header remains intact and that you do not claim any
   9 #               rights of ownership or authorship of this software.  This
  10 #               software has been tested, but no warranty is expressed or
  11 #               implied.
  12 #
  13 #   --  Gregory P. Smith <greg@electricrain.com>
  14
  15 # This provides a simple database table interface built on top of
  16 # the Python BerkeleyDB 3 interface.
  17 #
  18 _cvsid = '$Id$'
  19
  20 import re
  21 import sys
  22 import copy
  23 import xdrlib
  24 import random
  25 from types import ListType, StringType
  26 import cPickle as pickle
  27
  28 try:
  29     # For Pythons w/distutils pybsddb
  30     from bsddb3.db import *
  31 except ImportError:
  32     # For Python 2.3
  33     from bsddb.db import *
  34
  35
  36 class TableDBError(StandardError):
  37     pass
  38 class TableAlreadyExists(TableDBError):
  39     pass
  40
  41
  42 class Cond:
  43     """This condition matches everything"""
  44     def __call__(self, s):
  45         return 1
  46
  47 class ExactCond(Cond):
  48     """Acts as an exact match condition function"""
  49     def __init__(self, strtomatch):
  50         self.strtomatch = strtomatch
  51     def __call__(self, s):
  52         return s == self.strtomatch
  53
  54 class PrefixCond(Cond):
  55     """Acts as a condition function for matching a string prefix"""
  56     def __init__(self, prefix):
  57         self.prefix = prefix
  58     def __call__(self, s):
  59         return s[:len(self.prefix)] == self.prefix
  60
  61 class PostfixCond(Cond):
  62     """Acts as a condition function for matching a string postfix"""
  63     def __init__(self, postfix):
  64         self.postfix = postfix
  65     def __call__(self, s):
  66         return s[-len(self.postfix):] == self.postfix
  67
  68 class LikeCond(Cond):
  69     """
  70     Acts as a function that will match using an SQL 'LIKE' style
  71     string.  Case insensitive and % signs are wild cards.
  72     This isn't perfect but it should work for the simple common cases.
  73     """
  74     def __init__(self, likestr, re_flags=re.IGNORECASE):
  75         # escape python re characters
  76         chars_to_escape = '.*+()[]?'
  77         for char in chars_to_escape :
  78             likestr = likestr.replace(char, '\\'+char)
  79         # convert %s to wildcards
  80         self.likestr = likestr.replace('%', '.*')
  81         self.re = re.compile('^'+self.likestr+'$', re_flags)
  82     def __call__(self, s):
  83         return self.re.match(s)
  84
  85 #
  86 # keys used to store database metadata
  87 #
  88 _table_names_key = '__TABLE_NAMES__'  # list of the tables in this db
  89 _columns = '._COLUMNS__'  # table_name+this key contains a list of columns
  90
  91 def _columns_key(table):
  92     return table + _columns
  93
  94 #
  95 # these keys are found within table sub databases
  96 #
  97 _data =  '._DATA_.'  # this+column+this+rowid key contains table data
  98 _rowid = '._ROWID_.' # this+rowid+this key contains a unique entry for each
  99                      # row in the table.  (no data is stored)
 100 _rowid_str_len = 8   # length in bytes of the unique rowid strings
 101
 102 def _data_key(table, col, rowid):
 103     return table + _data + col + _data + rowid
 104
 105 def _search_col_data_key(table, col):
 106     return table + _data + col + _data
 107
 108 def _search_all_data_key(table):
 109     return table + _data
 110
 111 def _rowid_key(table, rowid):
 112     return table + _rowid + rowid + _rowid
 113
 114 def _search_rowid_key(table):
 115     return table + _rowid
 116
 117 def contains_metastrings(s) :
 118     """Verify that the given string does not contain any
 119     metadata strings that might interfere with dbtables database operation.
 120     """
 121     if (s.find(_table_names_key) >= 0 or
 122         s.find(_columns) >= 0 or
 123         s.find(_data) >= 0 or
 124         s.find(_rowid) >= 0):
 125         # Then
 126         return 1
 127     else:
 128         return 0
 129
 130
 131 class bsdTableDB :
 132     def __init__(self, filename, dbhome, create=0, truncate=0, mode=0600,
 133                  recover=0, dbflags=0):
 134         """bsdTableDB.open(filename, dbhome, create=0, truncate=0, mode=0600)
 135         Open database name in the dbhome BerkeleyDB directory.
 136         Use keyword arguments when calling this constructor.
 137         """
 138         self.db = None
 139         myflags = DB_THREAD
 140         if create:
 141             myflags |= DB_CREATE
 142         flagsforenv = (DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_LOG |
 143                        DB_INIT_TXN | dbflags)
 144         # DB_AUTO_COMMIT isn't a valid flag for env.open()
 145         try:
 146             dbflags |= DB_AUTO_COMMIT
 147         except AttributeError:
 148             pass
 149         if recover:
 150             flagsforenv = flagsforenv | DB_RECOVER
 151         self.env = DBEnv()
 152         # enable auto deadlock avoidance
 153         self.env.set_lk_detect(DB_LOCK_DEFAULT)
 154         self.env.open(dbhome, myflags | flagsforenv)
 155         if truncate:
 156             myflags |= DB_TRUNCATE
 157         self.db = DB(self.env)
 158         # this code relies on DBCursor.set* methods to raise exceptions
 159         # rather than returning None
 160         self.db.set_get_returns_none(1)
 161         # allow duplicate entries [warning: be careful w/ metadata]
 162         self.db.set_flags(DB_DUP)
 163         self.db.open(filename, DB_BTREE, dbflags | myflags, mode)
 164         self.dbfilename = filename
 165         # Initialize the table names list if this is a new database
 166         txn = self.env.txn_begin()
 167         try:
 168             if not self.db.has_key(_table_names_key, txn):
 169                 self.db.put(_table_names_key, pickle.dumps([], 1), txn=txn)
 170         # Yes, bare except
 171         except:
 172             txn.abort()
 173             raise
 174         else:
 175             txn.commit()
 176         # TODO verify more of the database's metadata?
 177         self.__tablecolumns = {}
 178
 179     def __del__(self):
 180         self.close()
 181
 182     def close(self):
 183         if self.db is not None:
 184             self.db.close()
 185             self.db = None
 186         if self.env is not None:
 187             self.env.close()
 188             self.env = None
 189
 190     def checkpoint(self, mins=0):
 191         try:
 192             self.env.txn_checkpoint(mins)
 193         except DBIncompleteError:
 194             pass
 195
 196     def sync(self):
 197         try:
 198             self.db.sync()
 199         except DBIncompleteError:
 200             pass
 201
 202     def _db_print(self) :
 203         """Print the database to stdout for debugging"""
 204         print "******** Printing raw database for debugging ********"
 205         cur = self.db.cursor()
 206         try:
 207             key, data = cur.first()
 208             while 1:
 209                 print repr({key: data})
 210                 next = cur.next()
 211                 if next:
 212                     key, data = next
 213                 else:
 214                     cur.close()
 215                     return
 216         except DBNotFoundError:
 217             cur.close()
 218
 219
 220     def CreateTable(self, table, columns):
 221         """CreateTable(table, columns) - Create a new table in the database
 222         raises TableDBError if it already exists or for other DB errors.
 223         """
 224         assert isinstance(columns, ListType)
 225         txn = None
 226         try:
 227             # checking sanity of the table and column names here on
 228             # table creation will prevent problems elsewhere.
 229             if contains_metastrings(table):
 230                 raise ValueError(
 231                     "bad table name: contains reserved metastrings")
 232             for column in columns :
 233                 if contains_metastrings(column):
 234                     raise ValueError(
 235                         "bad column name: contains reserved metastrings")
 236
 237             columnlist_key = _columns_key(table)
 238             if self.db.has_key(columnlist_key):
 239                 raise TableAlreadyExists, "table already exists"
 240
 241             txn = self.env.txn_begin()
 242             # store the table's column info
 243             self.db.put(columnlist_key, pickle.dumps(columns, 1), txn=txn)
 244
 245             # add the table name to the tablelist
 246             tablelist = pickle.loads(self.db.get(_table_names_key, txn=txn,
 247                                                  flags=DB_RMW))
 248             tablelist.append(table)
 249             # delete 1st, in case we opened with DB_DUP
 250             self.db.delete(_table_names_key, txn)
 251             self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn)
 252
 253             txn.commit()
 254             txn = None
 255         except DBError, dberror:
 256             if txn:
 257                 txn.abort()
 258             raise TableDBError, dberror[1]
 259
 260
 261     def ListTableColumns(self, table):
 262         """Return a list of columns in the given table.
 263         [] if the table doesn't exist.
 264         """
 265         assert isinstance(table, StringType)
 266         if contains_metastrings(table):
 267             raise ValueError, "bad table name: contains reserved metastrings"
 268
 269         columnlist_key = _columns_key(table)
 270         if not self.db.has_key(columnlist_key):
 271             return []
 272         pickledcolumnlist = self.db.get(columnlist_key)
 273         if pickledcolumnlist:
 274             return pickle.loads(pickledcolumnlist)
 275         else:
 276             return []
 277
 278     def ListTables(self):
 279         """Return a list of tables in this database."""
 280         pickledtablelist = self.db.get(_table_names_key)
 281         if pickledtablelist:
 282             return pickle.loads(pickledtablelist)
 283         else:
 284             return []
 285
 286     def CreateOrExtendTable(self, table, columns):
 287         """CreateOrExtendTable(table, columns)
 288
 289         - Create a new table in the database.
 290         If a table of this name already exists, extend it to have any
 291         additional columns present in the given list as well as
 292         all of its current columns.
 293         """
 294         assert isinstance(columns, ListType)
 295         try:
 296             self.CreateTable(table, columns)
 297         except TableAlreadyExists:
 298             # the table already existed, add any new columns
 299             txn = None
 300             try:
 301                 columnlist_key = _columns_key(table)
 302                 txn = self.env.txn_begin()
 303
 304                 # load the current column list
 305                 oldcolumnlist = pickle.loads(
 306                     self.db.get(columnlist_key, txn=txn, flags=DB_RMW))
 307                 # create a hash table for fast lookups of column names in the
 308                 # loop below
 309                 oldcolumnhash = {}
 310                 for c in oldcolumnlist:
 311                     oldcolumnhash[c] = c
 312
 313                 # create a new column list containing both the old and new
 314                 # column names
 315                 newcolumnlist = copy.copy(oldcolumnlist)
 316                 for c in columns:
 317                     if not oldcolumnhash.has_key(c):
 318                         newcolumnlist.append(c)
 319
 320                 # store the table's new extended column list
 321                 if newcolumnlist != oldcolumnlist :
 322                     # delete the old one first since we opened with DB_DUP
 323                     self.db.delete(columnlist_key, txn)
 324                     self.db.put(columnlist_key,
 325                                 pickle.dumps(newcolumnlist, 1),
 326                                 txn=txn)
 327
 328                 txn.commit()
 329                 txn = None
 330
 331                 self.__load_column_info(table)
 332             except DBError, dberror:
 333                 if txn:
 334                     txn.abort()
 335                 raise TableDBError, dberror[1]
 336
 337
 338     def __load_column_info(self, table) :
 339         """initialize the self.__tablecolumns dict"""
 340         # check the column names
 341         try:
 342             tcolpickles = self.db.get(_columns_key(table))
 343         except DBNotFoundError:
 344             raise TableDBError, "unknown table: %r" % (table,)
 345         if not tcolpickles:
 346             raise TableDBError, "unknown table: %r" % (table,)
 347         self.__tablecolumns[table] = pickle.loads(tcolpickles)
 348
 349     def __new_rowid(self, table, txn) :
 350         """Create a new unique row identifier"""
 351         unique = 0
 352         while not unique:
 353             # Generate a random 64-bit row ID string
 354             # (note: this code has <64 bits of randomness
 355             # but it's plenty for our database id needs!)
 356             p = xdrlib.Packer()
 357             p.pack_int(int(random.random()*2147483647))
 358             p.pack_int(int(random.random()*2147483647))
 359             newid = p.get_buffer()
 360
 361             # Guarantee uniqueness by adding this key to the database
 362             try:
 363                 self.db.put(_rowid_key(table, newid), None, txn=txn,
 364                             flags=DB_NOOVERWRITE)
 365             except DBKeyExistError:
 366                 pass
 367             else:
 368                 unique = 1
 369
 370         return newid
 371
 372
 373     def Insert(self, table, rowdict) :
 374         """Insert(table, datadict) - Insert a new row into the table
 375         using the keys+values from rowdict as the column values.
 376         """
 377         txn = None
 378         try:
 379             if not self.db.has_key(_columns_key(table)):
 380                 raise TableDBError, "unknown table"
 381
 382             # check the validity of each column name
 383             if not self.__tablecolumns.has_key(table):
 384                 self.__load_column_info(table)
 385             for column in rowdict.keys() :
 386                 if not self.__tablecolumns[table].count(column):
 387                     raise TableDBError, "unknown column: %r" % (column,)
 388
 389             # get a unique row identifier for this row
 390             txn = self.env.txn_begin()
 391             rowid = self.__new_rowid(table, txn=txn)
 392
 393             # insert the row values into the table database
 394             for column, dataitem in rowdict.items():
 395                 # store the value
 396                 self.db.put(_data_key(table, column, rowid), dataitem, txn=txn)
 397
 398             txn.commit()
 399             txn = None
 400
 401         except DBError, dberror:
 402             # WIBNI we could just abort the txn and re-raise the exception?
 403             # But no, because TableDBError is not related to DBError via
 404             # inheritance, so it would be backwards incompatible.  Do the next
 405             # best thing.
 406             info = sys.exc_info()
 407             if txn:
 408                 txn.abort()
 409                 self.db.delete(_rowid_key(table, rowid))
 410             raise TableDBError, dberror[1], info[2]
 411
 412
 413     def Modify(self, table, conditions={}, mappings={}):
 414         """Modify(table, conditions) - Modify in rows matching 'conditions'
 415         using mapping functions in 'mappings'
 416         * conditions is a dictionary keyed on column names
 417         containing condition functions expecting the data string as an
 418         argument and returning a boolean.
 419         * mappings is a dictionary keyed on column names containint condition
 420         functions expecting the data string as an argument and returning the
 421         new string for that column.
 422         """
 423         try:
 424             matching_rowids = self.__Select(table, [], conditions)
 425
 426             # modify only requested columns
 427             columns = mappings.keys()
 428             for rowid in matching_rowids.keys():
 429                 txn = None
 430                 try:
 431                     for column in columns:
 432                         txn = self.env.txn_begin()
 433                         # modify the requested column
 434                         try:
 435                             dataitem = self.db.get(
 436                                 _data_key(table, column, rowid),
 437                                 txn)
 438                             self.db.delete(
 439                                 _data_key(table, column, rowid),
 440                                 txn)
 441                         except DBNotFoundError:
 442                              # XXXXXXX row key somehow didn't exist, assume no
 443                              # error
 444                             dataitem = None
 445                         dataitem = mappings[column](dataitem)
 446                         if dataitem <> None:
 447                             self.db.put(
 448                                 _data_key(table, column, rowid),
 449                                 dataitem, txn=txn)
 450                         txn.commit()
 451                         txn = None
 452
 453                 except DBError, dberror:
 454                     if txn:
 455                         txn.abort()
 456                     raise
 457
 458         except DBError, dberror:
 459             raise TableDBError, dberror[1]
 460
 461     def Delete(self, table, conditions={}):
 462         """Delete(table, conditions) - Delete items matching the given
 463         conditions from the table.
 464         * conditions is a dictionary keyed on column names
 465         containing condition functions expecting the data string as an
 466         argument and returning a boolean.
 467         """
 468         try:
 469             matching_rowids = self.__Select(table, [], conditions)
 470
 471             # delete row data from all columns
 472             columns = self.__tablecolumns[table]
 473             for rowid in matching_rowids.keys():
 474                 txn = None
 475                 try:
 476                     txn = self.env.txn_begin()
 477                     for column in columns:
 478                         # delete the data key
 479                         try:
 480                             self.db.delete(_data_key(table, column, rowid),
 481                                            txn)
 482                         except DBNotFoundError:
 483                             # XXXXXXX column may not exist, assume no error
 484                             pass
 485
 486                     try:
 487                         self.db.delete(_rowid_key(table, rowid), txn)
 488                     except DBNotFoundError:
 489                         # XXXXXXX row key somehow didn't exist, assume no error
 490                         pass
 491                     txn.commit()
 492                     txn = None
 493                 except DBError, dberror:
 494                     if txn:
 495                         txn.abort()
 496                     raise
 497         except DBError, dberror:
 498             raise TableDBError, dberror[1]
 499
 500
 501     def Select(self, table, columns, conditions={}):
 502         """Select(table, conditions) - retrieve specific row data
 503         Returns a list of row column->value mapping dictionaries.
 504         * columns is a list of which column data to return.  If
 505           columns is None, all columns will be returned.
 506         * conditions is a dictionary keyed on column names
 507           containing callable conditions expecting the data string as an
 508           argument and returning a boolean.
 509         """
 510         try:
 511             if not self.__tablecolumns.has_key(table):
 512                 self.__load_column_info(table)
 513             if columns is None:
 514                 columns = self.__tablecolumns[table]
 515             matching_rowids = self.__Select(table, columns, conditions)
 516         except DBError, dberror:
 517             raise TableDBError, dberror[1]
 518         # return the matches as a list of dictionaries
 519         return matching_rowids.values()
 520
 521
 522     def __Select(self, table, columns, conditions):
 523         """__Select() - Used to implement Select and Delete (above)
 524         Returns a dictionary keyed on rowids containing dicts
 525         holding the row data for columns listed in the columns param
 526         that match the given conditions.
 527         * conditions is a dictionary keyed on column names
 528         containing callable conditions expecting the data string as an
 529         argument and returning a boolean.
 530         """
 531         # check the validity of each column name
 532         if not self.__tablecolumns.has_key(table):
 533             self.__load_column_info(table)
 534         if columns is None:
 535             columns = self.tablecolumns[table]
 536         for column in (columns + conditions.keys()):
 537             if not self.__tablecolumns[table].count(column):
 538                 raise TableDBError, "unknown column: %r" % (column,)
 539
 540         # keyed on rows that match so far, containings dicts keyed on
 541         # column names containing the data for that row and column.
 542         matching_rowids = {}
 543         # keys are rowids that do not match
 544         rejected_rowids = {}
 545
 546         # attempt to sort the conditions in such a way as to minimize full
 547         # column lookups
 548         def cmp_conditions(atuple, btuple):
 549             a = atuple[1]
 550             b = btuple[1]
 551             if type(a) is type(b):
 552                 if isinstance(a, PrefixCond) and isinstance(b, PrefixCond):
 553                     # longest prefix first
 554                     return cmp(len(b.prefix), len(a.prefix))
 555                 if isinstance(a, LikeCond) and isinstance(b, LikeCond):
 556                     # longest likestr first
 557                     return cmp(len(b.likestr), len(a.likestr))
 558                 return 0
 559             if isinstance(a, ExactCond):
 560                 return -1
 561             if isinstance(b, ExactCond):
 562                 return 1
 563             if isinstance(a, PrefixCond):
 564                 return -1
 565             if isinstance(b, PrefixCond):
 566                 return 1
 567             # leave all unknown condition callables alone as equals
 568             return 0
 569
 570         conditionlist = conditions.items()
 571         conditionlist.sort(cmp_conditions)
 572
 573         # Apply conditions to column data to find what we want
 574         cur = self.db.cursor()
 575         column_num = -1
 576         for column, condition in conditionlist:
 577             column_num = column_num + 1
 578             searchkey = _search_col_data_key(table, column)
 579             # speedup: don't linear search columns within loop
 580             if column in columns:
 581                 savethiscolumndata = 1  # save the data for return
 582             else:
 583                 savethiscolumndata = 0  # data only used for selection
 584
 585             try:
 586                 key, data = cur.set_range(searchkey)
 587                 while key[:len(searchkey)] == searchkey:
 588                     # extract the rowid from the key
 589                     rowid = key[-_rowid_str_len:]
 590
 591                     if not rejected_rowids.has_key(rowid):
 592                         # if no condition was specified or the condition
 593                         # succeeds, add row to our match list.
 594                         if not condition or condition(data):
 595                             if not matching_rowids.has_key(rowid):
 596                                 matching_rowids[rowid] = {}
 597                             if savethiscolumndata:
 598                                 matching_rowids[rowid][column] = data
 599                         else:
 600                             if matching_rowids.has_key(rowid):
 601                                 del matching_rowids[rowid]
 602                             rejected_rowids[rowid] = rowid
 603
 604                     key, data = cur.next()
 605
 606             except DBError, dberror:
 607                 if dberror[0] != DB_NOTFOUND:
 608                     raise
 609                 continue
 610
 611         cur.close()
 612
 613         # we're done selecting rows, garbage collect the reject list
 614         del rejected_rowids
 615
 616         # extract any remaining desired column data from the
 617         # database for the matching rows.
 618         if len(columns) > 0:
 619             for rowid, rowdata in matching_rowids.items():
 620                 for column in columns:
 621                     if rowdata.has_key(column):
 622                         continue
 623                     try:
 624                         rowdata[column] = self.db.get(
 625                             _data_key(table, column, rowid))
 626                     except DBError, dberror:
 627                         if dberror[0] != DB_NOTFOUND:
 628                             raise
 629                         rowdata[column] = None
 630
 631         # return the matches
 632         return matching_rowids
 633
 634
 635     def Drop(self, table):
 636         """Remove an entire table from the database"""
 637         txn = None
 638         try:
 639             txn = self.env.txn_begin()
 640
 641             # delete the column list
 642             self.db.delete(_columns_key(table), txn)
 643
 644             cur = self.db.cursor(txn)
 645
 646             # delete all keys containing this tables column and row info
 647             table_key = _search_all_data_key(table)
 648             while 1:
 649                 try:
 650                     key, data = cur.set_range(table_key)
 651                 except DBNotFoundError:
 652                     break
 653                 # only delete items in this table
 654                 if key[:len(table_key)] != table_key:
 655                     break
 656                 cur.delete()
 657
 658             # delete all rowids used by this table
 659             table_key = _search_rowid_key(table)
 660             while 1:
 661                 try:
 662                     key, data = cur.set_range(table_key)
 663                 except DBNotFoundError:
 664                     break
 665                 # only delete items in this table
 666                 if key[:len(table_key)] != table_key:
 667                     break
 668                 cur.delete()
 669
 670             cur.close()
 671
 672             # delete the tablename from the table name list
 673             tablelist = pickle.loads(
 674                 self.db.get(_table_names_key, txn=txn, flags=DB_RMW))
 675             try:
 676                 tablelist.remove(table)
 677             except ValueError:
 678                 # hmm, it wasn't there, oh well, that's what we want.
 679                 pass
 680             # delete 1st, incase we opened with DB_DUP
 681             self.db.delete(_table_names_key, txn)
 682             self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn)
 683
 684             txn.commit()
 685             txn = None
 686
 687             if self.__tablecolumns.has_key(table):
 688                 del self.__tablecolumns[table]
 689
 690         except DBError, dberror:
 691             if txn:
 692                 txn.abort()
 693             raise TableDBError, dberror[1]