Import svntest.main.run_tests explicitly.
[cvs2svn.git] / cvs2svn_lib / database.py
blob2332452511e3ffcff6139c5d713f340e88f414e8
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2007 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains database facilities used by cvs2svn."""
20 import sys
21 import os
22 import cPickle
24 from cvs2svn_lib.common import DB_OPEN_READ
25 from cvs2svn_lib.common import DB_OPEN_WRITE
26 from cvs2svn_lib.common import DB_OPEN_NEW
27 from cvs2svn_lib.common import warning_prefix
28 from cvs2svn_lib.common import error_prefix
29 from cvs2svn_lib.log import Log
30 from cvs2svn_lib.record_table import FileOffsetPacker
31 from cvs2svn_lib.record_table import RecordTable
34 # DBM module selection
36 # 1. If we have bsddb3, it is probably newer than bsddb. Fake bsddb = bsddb3,
37 # so that the dbhash module used by anydbm will use bsddb3.
38 try:
39 import bsddb3
40 sys.modules['bsddb'] = sys.modules['bsddb3']
41 except ImportError:
42 pass
44 # 2. These DBM modules are not good for cvs2svn.
45 import anydbm
46 if anydbm._defaultmod.__name__ in ['dumbdbm', 'dbm']:
47 Log().error(
48 '%s: cvs2svn uses the anydbm package, which depends on lower level '
49 'dbm\n'
50 'libraries. Your system has %s, with which cvs2svn is known to have\n'
51 'problems. To use cvs2svn, you must install a Python dbm library '
52 'other than\n'
53 'dumbdbm or dbm. See '
54 'http://python.org/doc/current/lib/module-anydbm.html\n'
55 'for more information.\n'
56 % (error_prefix, anydbm._defaultmod.__name__,)
58 sys.exit(1)
60 # 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
61 # Unfortunately, gdbm appears not to be trouble free, either.
62 if hasattr(anydbm._defaultmod, 'bsddb') \
63 and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
64 try:
65 gdbm = __import__('gdbm')
66 except ImportError:
67 Log().warn(
68 '%s: The version of the bsddb module found on your computer '
69 'has been\n'
70 'reported to malfunction on some datasets, causing KeyError '
71 'exceptions.\n'
72 % (warning_prefix,)
74 else:
75 anydbm._defaultmod = gdbm
78 class AbstractDatabase:
79 """An abstract base class for anydbm-based databases."""
81 def __init__(self, filename, mode):
82 """A convenience function for opening an anydbm database."""
84 # pybsddb3 has a bug which prevents it from working with
85 # Berkeley DB 4.2 if you open the db with 'n' ("new"). This
86 # causes the DB_TRUNCATE flag to be passed, which is disallowed
87 # for databases protected by lock and transaction support
88 # (bsddb databases use locking from bsddb version 4.2.4 onwards).
90 # Therefore, manually perform the removal (we can do this, because
91 # we know that for bsddb - but *not* anydbm in general - the database
92 # consists of one file with the name we specify, rather than several
93 # based on that name).
94 if mode == 'n' and anydbm._defaultmod.__name__ == 'dbhash':
95 if os.path.isfile(filename):
96 os.unlink(filename)
97 mode = 'c'
99 self.db = anydbm.open(filename, mode)
101 # Import implementations for many mapping interface methods. Note
102 # that we specifically do not do this for any method which handles
103 # *values*, because our derived classes define __getitem__ and
104 # __setitem__ to override the storage of values, and grabbing
105 # methods directly from the dbm object would bypass this.
106 for meth_name in ('__delitem__',
107 '__iter__', 'has_key', '__contains__', 'iterkeys', 'clear'):
108 meth_ref = getattr(self.db, meth_name, None)
109 if meth_ref:
110 setattr(self, meth_name, meth_ref)
112 def __delitem__(self, key):
113 # gdbm defines a __delitem__ method, but it cannot be assigned. So
114 # this method provides a fallback definition via explicit delegation:
115 del self.db[key]
117 def keys(self):
118 return self.db.keys()
120 def __iter__(self):
121 for key in self.keys():
122 yield key
124 def has_key(self, key):
125 try:
126 self.db[key]
127 return True
128 except KeyError:
129 return False
131 def __contains__(self, key):
132 return self.has_key(key)
134 def iterkeys(self):
135 return self.__iter__()
137 def clear(self):
138 for key in self.keys():
139 del self[key]
141 def items(self):
142 return [(key, self[key],) for key in self.keys()]
144 def values(self):
145 return [self[key] for key in self.keys()]
147 def get(self, key, default=None):
148 try:
149 return self[key]
150 except KeyError:
151 return default
153 def close(self):
154 self.db.close()
155 self.db = None
158 class Database(AbstractDatabase):
159 """A database that uses a Serializer to store objects of a certain type.
161 Since the database entry with the key self.serializer_key is used to
162 store the serializer, self.serializer_key may not be used as a key for
163 normal entries."""
165 serializer_key = '_.%$1\t;_ '
167 def __init__(self, filename, mode, serializer=None):
168 """Constructor.
170 The database stores its Serializer, so none needs to be supplied
171 when opening an existing database."""
173 AbstractDatabase.__init__(self, filename, mode)
175 if mode == DB_OPEN_NEW:
176 self.serializer = serializer
177 self.db[self.serializer_key] = cPickle.dumps(self.serializer)
178 else:
179 self.serializer = cPickle.loads(self.db[self.serializer_key])
181 def __getitem__(self, key):
182 return self.serializer.loads(self.db[key])
184 def __setitem__(self, key, value):
185 self.db[key] = self.serializer.dumps(value)
187 def keys(self): # TODO: Once needed, handle iterators as well.
188 retval = self.db.keys()
189 retval.remove(self.serializer_key)
190 return retval
193 class IndexedDatabase:
194 """A file of objects that are written sequentially and read randomly.
196 The objects are indexed by small non-negative integers, and a
197 RecordTable is used to store the index -> fileoffset map.
198 fileoffset=0 is used to represent an empty record. (An offset of 0
199 cannot occur for a legitimate record because the serializer is
200 written there.)
202 The main file consists of a sequence of pickles (or other serialized
203 data format). The zeroth record is a pickled Serializer.
204 Subsequent ones are objects serialized using the serializer. The
205 offset of each object in the file is stored to an index table so
206 that the data can later be retrieved randomly.
208 Objects are always stored to the end of the file. If an object is
209 deleted or overwritten, the fact is recorded in the index_table but
210 the space in the pickle file is not garbage collected. This has the
211 advantage that one can create a modified version of a database that
212 shares the main data file with an old version by copying the index
213 file. But it has the disadvantage that space is wasted whenever
214 objects are written multiple times."""
216 def __init__(self, filename, index_filename, mode, serializer=None):
217 """Initialize an IndexedDatabase, writing the serializer if necessary.
219 SERIALIZER is only used if MODE is DB_OPEN_NEW; otherwise the
220 serializer is read from the file."""
222 self.filename = filename
223 self.index_filename = index_filename
224 self.mode = mode
225 if self.mode == DB_OPEN_NEW:
226 self.f = open(self.filename, 'wb+')
227 elif self.mode == DB_OPEN_WRITE:
228 self.f = open(self.filename, 'rb+')
229 elif self.mode == DB_OPEN_READ:
230 self.f = open(self.filename, 'rb')
231 else:
232 raise RuntimeError('Invalid mode %r' % self.mode)
234 self.index_table = RecordTable(
235 self.index_filename, self.mode, FileOffsetPacker()
238 if self.mode == DB_OPEN_NEW:
239 assert serializer is not None
240 self.serializer = serializer
241 cPickle.dump(self.serializer, self.f, -1)
242 else:
243 # Read the memo from the first pickle:
244 self.serializer = cPickle.load(self.f)
246 # Seek to the end of the file, and record that position:
247 self.f.seek(0, 2)
248 self.fp = self.f.tell()
249 self.eofp = self.fp
251 def __setitem__(self, index, item):
252 """Write ITEM into the database indexed by INDEX."""
254 # Make sure we're at the end of the file:
255 if self.fp != self.eofp:
256 self.f.seek(self.eofp)
257 self.index_table[index] = self.eofp
258 s = self.serializer.dumps(item)
259 self.f.write(s)
260 self.eofp += len(s)
261 self.fp = self.eofp
263 def _fetch(self, offset):
264 if self.fp != offset:
265 self.f.seek(offset)
267 # There is no easy way to tell how much data will be read, so just
268 # indicate that we don't know the current file pointer:
269 self.fp = None
271 return self.serializer.loadf(self.f)
273 def iterkeys(self):
274 return self.index_table.iterkeys()
276 def itervalues(self):
277 for offset in self.index_table.itervalues():
278 yield self._fetch(offset)
280 def __getitem__(self, index):
281 offset = self.index_table[index]
282 return self._fetch(offset)
284 def get(self, item, default=None):
285 try:
286 return self[item]
287 except KeyError:
288 return default
290 def get_many(self, indexes, default=None):
291 """Yield (index,item) tuples for INDEXES, in arbitrary order.
293 Yield (index,default) for indexes with no defined values."""
295 offsets = []
296 for (index, offset) in self.index_table.get_many(indexes):
297 if offset is None:
298 yield (index, default)
299 else:
300 offsets.append((offset, index))
302 # Sort the offsets to reduce disk seeking:
303 offsets.sort()
304 for (offset,index) in offsets:
305 yield (index, self._fetch(offset))
307 def __delitem__(self, index):
308 # We don't actually free the data in self.f.
309 del self.index_table[index]
311 def close(self):
312 self.index_table.close()
313 self.index_table = None
314 self.f.close()
315 self.f = None
317 def __str__(self):
318 return 'IndexedDatabase(%r)' % (self.filename,)
321 class IndexedStore(IndexedDatabase):
322 """A file of items that is written sequentially and read randomly.
324 This is just like IndexedDatabase, except that it has an additional
325 add() method which assumes that the object to be written to the
326 database has an 'id' member, which is used as its database index.
327 See IndexedDatabase for more information."""
329 def add(self, item):
330 """Write ITEM into the database indexed by ITEM.id."""
332 self[item.id] = item