run-tests.py: Only pass the --svnadmin option to cvs2svn when needed.
[cvs2svn.git] / cvs2svn_lib / indexed_database.py
blob4a8cbdd44f23e1f6e3f33cd10f34bd477792262c
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains database facilities used by cvs2svn."""
20 import cPickle
22 from cvs2svn_lib.common import DB_OPEN_READ
23 from cvs2svn_lib.common import DB_OPEN_WRITE
24 from cvs2svn_lib.common import DB_OPEN_NEW
25 from cvs2svn_lib.record_table import FileOffsetPacker
26 from cvs2svn_lib.record_table import RecordTable
29 class IndexedDatabase:
30 """A file of objects that are written sequentially and read randomly.
32 The objects are indexed by small non-negative integers, and a
33 RecordTable is used to store the index -> fileoffset map.
34 fileoffset=0 is used to represent an empty record. (An offset of 0
35 cannot occur for a legitimate record because the serializer is
36 written there.)
38 The main file consists of a sequence of pickles (or other serialized
39 data format). The zeroth record is a pickled Serializer.
40 Subsequent ones are objects serialized using the serializer. The
41 offset of each object in the file is stored to an index table so
42 that the data can later be retrieved randomly.
44 Objects are always stored to the end of the file. If an object is
45 deleted or overwritten, the fact is recorded in the index_table but
46 the space in the pickle file is not garbage collected. This has the
47 advantage that one can create a modified version of a database that
48 shares the main data file with an old version by copying the index
49 file. But it has the disadvantage that space is wasted whenever
50 objects are written multiple times."""
52 def __init__(self, filename, index_filename, mode, serializer=None):
53 """Initialize an IndexedDatabase, writing the serializer if necessary.
55 SERIALIZER is only used if MODE is DB_OPEN_NEW; otherwise the
56 serializer is read from the file."""
58 self.filename = filename
59 self.index_filename = index_filename
60 self.mode = mode
61 if self.mode == DB_OPEN_NEW:
62 self.f = open(self.filename, 'wb+')
63 elif self.mode == DB_OPEN_WRITE:
64 self.f = open(self.filename, 'rb+')
65 elif self.mode == DB_OPEN_READ:
66 self.f = open(self.filename, 'rb')
67 else:
68 raise RuntimeError('Invalid mode %r' % self.mode)
70 self.index_table = RecordTable(
71 self.index_filename, self.mode, FileOffsetPacker()
74 if self.mode == DB_OPEN_NEW:
75 assert serializer is not None
76 self.serializer = serializer
77 cPickle.dump(self.serializer, self.f, -1)
78 else:
79 # Read the memo from the first pickle:
80 self.serializer = cPickle.load(self.f)
82 # Seek to the end of the file, and record that position:
83 self.f.seek(0, 2)
84 self.fp = self.f.tell()
85 self.eofp = self.fp
87 def __setitem__(self, index, item):
88 """Write ITEM into the database indexed by INDEX."""
90 # Make sure we're at the end of the file:
91 if self.fp != self.eofp:
92 self.f.seek(self.eofp)
93 self.index_table[index] = self.eofp
94 s = self.serializer.dumps(item)
95 self.f.write(s)
96 self.eofp += len(s)
97 self.fp = self.eofp
99 def _fetch(self, offset):
100 if self.fp != offset:
101 self.f.seek(offset)
103 # There is no easy way to tell how much data will be read, so just
104 # indicate that we don't know the current file pointer:
105 self.fp = None
107 return self.serializer.loadf(self.f)
109 def iterkeys(self):
110 return self.index_table.iterkeys()
112 def itervalues(self):
113 for offset in self.index_table.itervalues():
114 yield self._fetch(offset)
116 def __getitem__(self, index):
117 offset = self.index_table[index]
118 return self._fetch(offset)
120 def get(self, item, default=None):
121 try:
122 return self[item]
123 except KeyError:
124 return default
126 def get_many(self, indexes, default=None):
127 """Yield (index,item) tuples for INDEXES, in arbitrary order.
129 Yield (index,default) for indexes with no defined values."""
131 offsets = []
132 for (index, offset) in self.index_table.get_many(indexes):
133 if offset is None:
134 yield (index, default)
135 else:
136 offsets.append((offset, index))
138 # Sort the offsets to reduce disk seeking:
139 offsets.sort()
140 for (offset,index) in offsets:
141 yield (index, self._fetch(offset))
143 def __delitem__(self, index):
144 # We don't actually free the data in self.f.
145 del self.index_table[index]
147 def close(self):
148 self.index_table.close()
149 self.index_table = None
150 self.f.close()
151 self.f = None
153 def __str__(self):
154 return 'IndexedDatabase(%r)' % (self.filename,)
157 class IndexedStore(IndexedDatabase):
158 """A file of items that is written sequentially and read randomly.
160 This is just like IndexedDatabase, except that it has an additional
161 add() method which assumes that the object to be written to the
162 database has an 'id' member, which is used as its database index.
163 See IndexedDatabase for more information."""
165 def add(self, item):
166 """Write ITEM into the database indexed by ITEM.id."""
168 self[item.id] = item