1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2007 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Classes to manage Databases of fixed-length records.
19 The databases map small, non-negative integers to fixed-size records.
20 The records are written in index order to a disk file. Gaps in the
21 index sequence leave gaps in the data file, so for best space
22 efficiency the indexes of existing records should be approximately
25 To use a RecordTable, you need a class derived from Packer which can
26 serialize/deserialize your records into fixed-size strings. Deriving
27 classes have to specify how to pack records into strings and unpack
28 strings into records by overwriting the pack() and unpack() methods
31 Note that these classes keep track of gaps in the records that have
32 been written by filling them with packer.empty_value. If a record is
33 read which contains packer.empty_value, then a KeyError is raised."""
41 from cvs2svn_lib
.common
import DB_OPEN_READ
42 from cvs2svn_lib
.common
import DB_OPEN_WRITE
43 from cvs2svn_lib
.common
import DB_OPEN_NEW
44 from cvs2svn_lib
.log
import Log
47 # A unique value that can be used to stand for "unset" without
48 # preventing the use of None.
53 def __init__(self
, record_len
, empty_value
=None):
54 self
.record_len
= record_len
55 if empty_value
is None:
56 self
.empty_value
= '\0' * self
.record_len
58 assert type(empty_value
) is types
.StringType
59 assert len(empty_value
) == self
.record_len
60 self
.empty_value
= empty_value
63 """Pack record V into a string of length self.record_len."""
65 raise NotImplementedError()
68 """Unpack string S into a record."""
70 raise NotImplementedError()
73 class StructPacker(Packer
):
74 def __init__(self
, format
, empty_value
=_unset
):
76 if empty_value
is not _unset
:
77 empty_value
= self
.pack(empty_value
)
81 Packer
.__init
__(self
, struct
.calcsize(self
.format
),
82 empty_value
=empty_value
)
85 return struct
.pack(self
.format
, v
)
88 return struct
.unpack(self
.format
, v
)[0]
91 class UnsignedIntegerPacker(StructPacker
):
92 def __init__(self
, empty_value
=0):
93 StructPacker
.__init
__(self
, '=I', empty_value
)
96 class SignedIntegerPacker(StructPacker
):
97 def __init__(self
, empty_value
=0):
98 StructPacker
.__init
__(self
, '=i', empty_value
)
101 class FileOffsetPacker(Packer
):
102 """A packer suitable for file offsets.
104 We store the 5 least significant bytes of the file offset. This is
105 enough bits to represent 1 TiB. Of course if the computer
106 doesn't have large file support, only the lowest 31 bits can be
107 nonzero, and the offsets are limited to 2 GiB."""
109 # Convert file offsets to 8-bit little-endian unsigned longs...
111 # ...but then truncate to 5 bytes.
114 PAD
= '\0' * (struct
.calcsize(INDEX_FORMAT
) - INDEX_FORMAT_LEN
)
117 Packer
.__init
__(self
, self
.INDEX_FORMAT_LEN
)
120 return struct
.pack(self
.INDEX_FORMAT
, v
)[:self
.INDEX_FORMAT_LEN
]
123 return struct
.unpack(self
.INDEX_FORMAT
, s
+ self
.PAD
)[0]
126 class RecordTableAccessError(RuntimeError):
130 class AbstractRecordTable
:
131 def __init__(self
, filename
, mode
, packer
):
132 self
.filename
= filename
135 # Simplify and speed access to this oft-needed quantity:
136 self
._record
_len
= self
.packer
.record_len
139 return '%s(%r)' % (self
.__class
__.__name
__, self
.filename
,)
141 def _set_packed_record(self
, i
, s
):
142 """Set the value for index I to the packed value S."""
144 raise NotImplementedError()
146 def __setitem__(self
, i
, v
):
147 self
._set
_packed
_record
(i
, self
.packer
.pack(v
))
149 def _get_packed_record(self
, i
):
150 """Return the packed record for index I.
152 Raise KeyError if it is not present."""
154 raise NotImplementedError()
156 def __getitem__(self
, i
):
157 """Return the item for index I.
159 Raise KeyError if that item has never been set (or if it was set
160 to self.packer.empty_value)."""
162 s
= self
._get
_packed
_record
(i
)
164 if s
== self
.packer
.empty_value
:
167 return self
.packer
.unpack(s
)
169 def get_many(self
, indexes
, default
=None):
170 """Yield (index, item) typles for INDEXES in arbitrary order.
172 Yield (index,default) for indices for which not item is defined."""
174 indexes
= list(indexes
)
175 # Sort the indexes to reduce disk seeking:
178 yield (i
, self
.get(i
, default
))
180 def get(self
, i
, default
=None):
186 def __delitem__(self
, i
):
187 """Delete the item for index I.
189 Raise KeyError if that item has never been set (or if it was set
190 to self.packer.empty_value)."""
192 if self
.mode
== DB_OPEN_READ
:
193 raise RecordTableAccessError()
195 # Check that the value was set (otherwise raise KeyError):
197 self
._set
_packed
_record
(i
, self
.packer
.empty_value
)
200 """Yield the keys in the map in key order."""
202 for i
in xrange(0, self
._limit
):
209 def itervalues(self
):
210 """Yield the values in the map in key order.
212 Skip over values that haven't been defined."""
214 for i
in xrange(0, self
._limit
):
221 class RecordTable(AbstractRecordTable
):
222 # The approximate amount of memory that should be used for the cache
223 # for each instance of this class:
224 CACHE_MEMORY
= 4 * 1024 * 1024
226 # Empirically, each entry in the cache table has an overhead of
227 # about 96 bytes on a 32-bit computer.
228 CACHE_OVERHEAD_PER_ENTRY
= 96
230 def __init__(self
, filename
, mode
, packer
, cache_memory
=CACHE_MEMORY
):
231 AbstractRecordTable
.__init
__(self
, filename
, mode
, packer
)
232 if self
.mode
== DB_OPEN_NEW
:
233 self
.f
= open(self
.filename
, 'wb+')
234 elif self
.mode
== DB_OPEN_WRITE
:
235 self
.f
= open(self
.filename
, 'rb+')
236 elif self
.mode
== DB_OPEN_READ
:
237 self
.f
= open(self
.filename
, 'rb')
239 raise RuntimeError('Invalid mode %r' % self
.mode
)
240 self
.cache_memory
= cache_memory
242 # Number of items that can be stored in the write cache.
243 self
._max
_memory
_cache
= (
245 / (self
.CACHE_OVERHEAD_PER_ENTRY
+ self
._record
_len
))
247 # Read and write cache; a map {i : (dirty, s)}, where i is an
248 # index, dirty indicates whether the value has to be written to
249 # disk, and s is the packed value for the index. Up to
250 # self._max_memory_cache items can be stored here. When the cache
251 # fills up, it is written to disk in one go and then cleared.
254 # The index just beyond the last record ever written:
255 self
._limit
= os
.path
.getsize(self
.filename
) // self
._record
_len
257 # The index just beyond the last record ever written to disk:
258 self
._limit
_written
= self
._limit
261 Log().debug('Flushing cache for %s' % (self
,))
263 pairs
= [(i
, s
) for (i
, (dirty
, s
)) in self
._cache
.items() if dirty
]
273 elif i
<= self
._limit
_written
:
275 f
.seek(i
* self
._record
_len
)
277 # Jump to the end of the file then write _empty_values until
278 # we reach the correct location:
279 f
.seek(self
._limit
_written
* self
._record
_len
)
280 while self
._limit
_written
< i
:
281 f
.write(self
.packer
.empty_value
)
282 self
._limit
_written
+= 1
285 self
._limit
_written
= max(self
._limit
_written
, old_i
)
291 def _set_packed_record(self
, i
, s
):
292 if self
.mode
== DB_OPEN_READ
:
293 raise RecordTableAccessError()
296 self
._cache
[i
] = (True, s
)
297 if len(self
._cache
) >= self
._max
_memory
_cache
:
299 self
._limit
= max(self
._limit
, i
+ 1)
301 def _get_packed_record(self
, i
):
303 return self
._cache
[i
][1]
305 if not 0 <= i
< self
._limit
_written
:
307 self
.f
.seek(i
* self
._record
_len
)
308 s
= self
.f
.read(self
._record
_len
)
309 self
._cache
[i
] = (False, s
)
310 if len(self
._cache
) >= self
._max
_memory
_cache
:
322 class MmapRecordTable(AbstractRecordTable
):
323 GROWTH_INCREMENT
= 65536
325 def __init__(self
, filename
, mode
, packer
):
326 AbstractRecordTable
.__init
__(self
, filename
, mode
, packer
)
327 if self
.mode
== DB_OPEN_NEW
:
328 self
.python_file
= open(self
.filename
, 'wb+')
329 self
.python_file
.write('\0' * self
.GROWTH_INCREMENT
)
330 self
.python_file
.flush()
331 self
._filesize
= self
.GROWTH_INCREMENT
333 self
.python_file
.fileno(), self
._filesize
,
334 access
=mmap
.ACCESS_WRITE
337 # The index just beyond the last record ever written:
339 elif self
.mode
== DB_OPEN_WRITE
:
340 self
.python_file
= open(self
.filename
, 'rb+')
341 self
._filesize
= os
.path
.getsize(self
.filename
)
343 self
.python_file
.fileno(), self
._filesize
,
344 access
=mmap
.ACCESS_WRITE
347 # The index just beyond the last record ever written:
348 self
._limit
= os
.path
.getsize(self
.filename
) // self
._record
_len
349 elif self
.mode
== DB_OPEN_READ
:
350 self
.python_file
= open(self
.filename
, 'rb')
351 self
._filesize
= os
.path
.getsize(self
.filename
)
353 self
.python_file
.fileno(), self
._filesize
,
354 access
=mmap
.ACCESS_READ
357 # The index just beyond the last record ever written:
358 self
._limit
= os
.path
.getsize(self
.filename
) // self
._record
_len
360 raise RuntimeError('Invalid mode %r' % self
.mode
)
365 def _set_packed_record(self
, i
, s
):
366 if self
.mode
== DB_OPEN_READ
:
367 raise RecordTableAccessError()
371 # This write extends the range of valid indices. First check
372 # whether the file has to be enlarged:
373 new_size
= (i
+ 1) * self
._record
_len
374 if new_size
> self
._filesize
:
376 (new_size
+ self
.GROWTH_INCREMENT
- 1)
377 // self
.GROWTH_INCREMENT
378 * self
.GROWTH_INCREMENT
380 self
.f
.resize(self
._filesize
)
382 # Pad up to the new record with empty_value:
383 self
.f
[self
._limit
* self
._record
_len
:i
* self
._record
_len
] = \
384 self
.packer
.empty_value
* (i
- self
._limit
)
387 self
.f
[i
* self
._record
_len
:(i
+ 1) * self
._record
_len
] = s
389 def _get_packed_record(self
, i
):
390 if not 0 <= i
< self
._limit
:
392 return self
.f
[i
* self
._record
_len
:(i
+ 1) * self
._record
_len
]
397 self
.python_file
.close()