Rename InternalRevisionExcluder to InternalRevisionCollector.
[cvs2svn.git] / cvs2svn_lib / checkout_internal.py
blobbc2d535ec3ded244be43c7cfdbdc9d09ce921fb1
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains classes that implement the --use-internal-co option.
19 The idea is to patch up the revisions' contents incrementally, thus
20 avoiding the huge number of process spawns and the O(n^2) overhead of
21 using 'co' and 'cvs'.
23 InternalRevisionCollector saves the RCS deltas and RCS revision trees
24 to databases. Notably, deltas from the trunk need to be reversed, as
25 CVS stores them so they apply from HEAD backwards.
27 InternalRevisionReader produces the revisions' contents on demand. To
28 generate the text for a typical revision, we need the revision's delta
29 text plus the fulltext of the previous revision. Therefore, we
30 maintain a checkout database containing a copy of the fulltext of any
31 revision for which subsequent revisions still need to be retrieved.
32 It is crucial to remove text from this database as soon as it is no
33 longer needed, to prevent it from growing enormous.
35 There are two reasons that the text from a revision can be needed: (1)
36 because the revision itself still needs to be output to a dumpfile;
37 (2) because another revision needs it as the base of its delta. We
38 maintain a reference count for each revision, which includes *both*
39 possibilities. The first time a revision's text is needed, it is
40 generated by applying the revision's deltatext to the previous
41 revision's fulltext, and the resulting fulltext is stored in the
42 checkout database. Each time a revision's fulltext is retrieved, its
43 reference count is decremented. When the reference count goes to
44 zero, then the fulltext is deleted from the checkout database.
46 The administrative data for managing this consists of one TextRecord
47 entry for each revision. Each TextRecord has an id, which is the same
48 id as used for the corresponding CVSRevision instance. It also
49 maintains a count of the times it is expected to be retrieved.
50 TextRecords come in several varieties:
52 FullTextRecord -- Used for revisions whose fulltext is contained
53 directly in the RCS file, and therefore available during
54 CollectRevsPass (i.e., typically revision 1.1 of each file).
56 DeltaTextRecord -- Used for revisions that are defined via a delta
57 relative to some other TextRecord. These records record the id of
58 the TextRecord that holds the base text against which the delta is
59 defined. When the text for a DeltaTextRecord is retrieved, the
60 DeltaTextRecord instance is deleted and a CheckedOutTextRecord
61 instance is created to take its place.
63 CheckedOutTextRecord -- Used during OutputPass for a revision that
64 started out as a DeltaTextRecord, but has already been retrieved
65 (and therefore its fulltext is stored in the checkout database).
67 While a file is being processed during CollectRevsPass, the fulltext
68 and deltas are stored to the delta database, and TextRecord instances
69 are created to keep track of things. The reference counts are all
70 initialized to zero.
72 After CollectRevsPass has done any preliminary tree mangling, its
73 _FileDataCollector.parse_completed(), method calls
74 RevisionRecorder.finish_file(), passing it the CVSFileItems instance
75 that describes the revisions in the file. At this point the reference
76 counts for the file's TextRecords are updated: each record referred to
77 by a delta has its refcount incremented, and each record that
78 corresponds to a non-delete CVSRevision is incremented. After that,
79 any records with refcount==0 are removed. When one record is removed,
80 that can cause another record's reference count to go to zero and be
81 removed too, recursively. When a TextRecord is deleted at this stage,
82 its deltatext is also deleted from the delta database.
84 In FilterSymbolsPass, the exact same procedure (described in the
85 previous paragraph) is repeated, but this time using the CVSFileItems
86 after it has been updated for excluded symbols, symbol
87 preferred-parent grafting, etc."""
90 from cStringIO import StringIO
91 import re
92 import time
94 from cvs2svn_lib import config
95 from cvs2svn_lib.common import DB_OPEN_NEW
96 from cvs2svn_lib.common import DB_OPEN_READ
97 from cvs2svn_lib.common import warning_prefix
98 from cvs2svn_lib.common import FatalError
99 from cvs2svn_lib.common import InternalError
100 from cvs2svn_lib.common import is_trunk_revision
101 from cvs2svn_lib.context import Ctx
102 from cvs2svn_lib.log import Log
103 from cvs2svn_lib.artifact_manager import artifact_manager
104 from cvs2svn_lib.symbol import Trunk
105 from cvs2svn_lib.cvs_item import CVSRevisionModification
106 from cvs2svn_lib.database import Database
107 from cvs2svn_lib.database import IndexedDatabase
108 from cvs2svn_lib.rcs_stream import RCSStream
109 from cvs2svn_lib.rcs_stream import MalformedDeltaException
110 from cvs2svn_lib.revision_manager import RevisionCollector
111 from cvs2svn_lib.revision_manager import RevisionReader
112 from cvs2svn_lib.serializer import MarshalSerializer
113 from cvs2svn_lib.serializer import CompressingSerializer
114 from cvs2svn_lib.serializer import PrimedPickleSerializer
116 import cvs2svn_rcsparse
119 class TextRecord(object):
120 """Bookkeeping data for the text of a single CVSRevision."""
122 __slots__ = ['id', 'refcount']
124 def __init__(self, id):
125 # The cvs_rev_id of the revision whose text this is.
126 self.id = id
128 # The number of times that the text of this revision will be
129 # retrieved.
130 self.refcount = 0
132 def __getstate__(self):
133 return (self.id, self.refcount,)
135 def __setstate__(self, state):
136 (self.id, self.refcount,) = state
138 def increment_dependency_refcounts(self, text_record_db):
139 """Increment the refcounts of any records that this one depends on."""
141 pass
143 def decrement_refcount(self, text_record_db):
144 """Decrement the number of times our text still has to be checked out.
146 If the reference count goes to zero, call discard()."""
148 self.refcount -= 1
149 if self.refcount == 0:
150 text_record_db.discard(self.id)
152 def checkout(self, text_record_db):
153 """Workhorse of the checkout process.
155 Return the text for this revision, decrement our reference count,
156 and update the databases depending on whether there will be future
157 checkouts."""
159 raise NotImplementedError()
161 def free(self, text_record_db):
162 """This instance will never again be checked out; free it.
164 Also free any associated resources and decrement the refcounts of
165 any other TextRecords that this one depends on."""
167 raise NotImplementedError()
170 class FullTextRecord(TextRecord):
171 __slots__ = []
173 def __getstate__(self):
174 return (self.id, self.refcount,)
176 def __setstate__(self, state):
177 (self.id, self.refcount,) = state
179 def checkout(self, text_record_db):
180 text = text_record_db.delta_db[self.id]
181 self.decrement_refcount(text_record_db)
182 return text
184 def free(self, text_record_db):
185 del text_record_db.delta_db[self.id]
187 def __str__(self):
188 return 'FullTextRecord(%x, %d)' % (self.id, self.refcount,)
191 class DeltaTextRecord(TextRecord):
192 __slots__ = ['pred_id']
194 def __init__(self, id, pred_id):
195 TextRecord.__init__(self, id)
197 # The cvs_rev_id of the revision relative to which this delta is
198 # defined.
199 self.pred_id = pred_id
201 def __getstate__(self):
202 return (self.id, self.refcount, self.pred_id,)
204 def __setstate__(self, state):
205 (self.id, self.refcount, self.pred_id,) = state
207 def increment_dependency_refcounts(self, text_record_db):
208 text_record_db[self.pred_id].refcount += 1
210 def checkout(self, text_record_db):
211 base_text = text_record_db[self.pred_id].checkout(text_record_db)
212 co = RCSStream(base_text)
213 delta_text = text_record_db.delta_db[self.id]
214 co.apply_diff(delta_text)
215 text = co.get_text()
216 del co
217 self.refcount -= 1
218 if self.refcount == 0:
219 # This text will never be needed again; just delete ourselves
220 # without ever having stored the fulltext to the checkout
221 # database:
222 del text_record_db[self.id]
223 else:
224 # Store a new CheckedOutTextRecord in place of ourselves:
225 text_record_db.checkout_db['%x' % self.id] = text
226 new_text_record = CheckedOutTextRecord(self.id)
227 new_text_record.refcount = self.refcount
228 text_record_db.replace(new_text_record)
229 return text
231 def free(self, text_record_db):
232 del text_record_db.delta_db[self.id]
233 text_record_db[self.pred_id].decrement_refcount(text_record_db)
235 def __str__(self):
236 return 'DeltaTextRecord(%x -> %x, %d)' \
237 % (self.pred_id, self.id, self.refcount,)
240 class CheckedOutTextRecord(TextRecord):
241 __slots__ = []
243 def __getstate__(self):
244 return (self.id, self.refcount,)
246 def __setstate__(self, state):
247 (self.id, self.refcount,) = state
249 def checkout(self, text_record_db):
250 text = text_record_db.checkout_db['%x' % self.id]
251 self.decrement_refcount(text_record_db)
252 return text
254 def free(self, text_record_db):
255 del text_record_db.checkout_db['%x' % self.id]
257 def __str__(self):
258 return 'CheckedOutTextRecord(%x, %d)' % (self.id, self.refcount,)
261 class NullDatabase(object):
262 """A do-nothing database that can be used with TextRecordDatabase.
264 Use this when you don't actually want to allow anything to be
265 deleted."""
267 def __delitem__(self, id):
268 pass
271 class TextRecordDatabase:
272 """Holds the TextRecord instances that are currently live.
274 During CollectRevsPass and FilterSymbolsPass, files are processed
275 one by one and a new TextRecordDatabase instance is used for each
276 file. During OutputPass, a single TextRecordDatabase instance is
277 used for the duration of OutputPass; individual records are added
278 and removed when they are active."""
280 def __init__(self, delta_db, checkout_db):
281 # A map { cvs_rev_id -> TextRecord }.
282 self.text_records = {}
284 # A database-like object using cvs_rev_ids as keys and containing
285 # fulltext/deltatext strings as values. Its __getitem__() method
286 # is used to retrieve deltas when they are needed, and its
287 # __delitem__() method is used to delete deltas when they can be
288 # freed. The modifiability of the delta database varies from pass
289 # to pass, so the object stored here varies as well:
291 # CollectRevsPass: a fully-functional IndexedDatabase. This
292 # allows deltas that will not be needed to be deleted.
294 # FilterSymbolsPass: a NullDatabase. The delta database cannot be
295 # modified during this pass, and we have no need to retrieve
296 # deltas, so we just use a dummy object here.
298 # OutputPass: a disabled IndexedDatabase. During this pass we
299 # need to retrieve deltas, but we are not allowed to modify
300 # the delta database. So we use an IndexedDatabase whose
301 # __del__() method has been disabled to do nothing.
302 self.delta_db = delta_db
304 # A database-like object using cvs_rev_ids as keys and containing
305 # fulltext strings as values. This database is only set during
306 # OutputPass.
307 self.checkout_db = checkout_db
309 # If this is set to a list, then the list holds the ids of
310 # text_records that have to be deleted; when discard() is called,
311 # it adds the requested id to the list but does not delete it. If
312 # this member is set to None, then text_records are deleted
313 # immediately when discard() is called.
314 self.deferred_deletes = None
316 def __getstate__(self):
317 return (self.text_records.values(),)
319 def __setstate__(self, state):
320 (text_records,) = state
321 self.text_records = {}
322 for text_record in text_records:
323 self.add(text_record)
324 self.delta_db = NullDatabase()
325 self.checkout_db = NullDatabase()
326 self.deferred_deletes = None
328 def add(self, text_record):
329 """Add TEXT_RECORD to our database.
331 There must not already be a record with the same id."""
333 assert not self.text_records.has_key(text_record.id)
335 self.text_records[text_record.id] = text_record
337 def __getitem__(self, id):
338 return self.text_records[id]
340 def __delitem__(self, id):
341 """Free the record with the specified ID."""
343 del self.text_records[id]
345 def replace(self, text_record):
346 """Store TEXT_RECORD in place of the existing record with the same id.
348 Do not do anything with the old record."""
350 assert self.text_records.has_key(text_record.id)
351 self.text_records[text_record.id] = text_record
353 def discard(self, *ids):
354 """The text records with IDS are no longer needed; discard them.
356 This involves calling their free() methods and also removing them
357 from SELF.
359 If SELF.deferred_deletes is not None, then the ids to be deleted
360 are added to the list instead of deleted immediately. This
361 mechanism is to prevent a stack overflow from the avalanche of
362 deletes that can result from deleting a long chain of revisions."""
364 if self.deferred_deletes is None:
365 # This is an outer-level delete.
366 self.deferred_deletes = list(ids)
367 while self.deferred_deletes:
368 id = self.deferred_deletes.pop()
369 text_record = self[id]
370 if text_record.refcount != 0:
371 raise InternalError(
372 'TextRecordDatabase.discard(%s) called with refcount = %d'
373 % (text_record, text_record.refcount,)
375 # This call might cause other text_record ids to be added to
376 # self.deferred_deletes:
377 text_record.free(self)
378 del self[id]
379 self.deferred_deletes = None
380 else:
381 self.deferred_deletes.extend(ids)
383 def itervalues(self):
384 return self.text_records.itervalues()
386 def recompute_refcounts(self, cvs_file_items):
387 """Recompute the refcounts of the contained TextRecords.
389 Use CVS_FILE_ITEMS to determine which records will be needed by
390 cvs2svn."""
392 # First clear all of the refcounts:
393 for text_record in self.itervalues():
394 text_record.refcount = 0
396 # Now increment the reference count of records that are needed as
397 # the source of another record's deltas:
398 for text_record in self.itervalues():
399 text_record.increment_dependency_refcounts(self.text_records)
401 # Now increment the reference count of records that will be needed
402 # by cvs2svn:
403 for lod_items in cvs_file_items.iter_lods():
404 for cvs_rev in lod_items.cvs_revisions:
405 if isinstance(cvs_rev, CVSRevisionModification):
406 self[cvs_rev.id].refcount += 1
408 def free_unused(self):
409 """Free any TextRecords whose reference counts are zero."""
411 # The deletion of some of these text records might cause others to
412 # be unused, in which case they will be deleted automatically.
413 # But since the initially-unused records are not referred to by
414 # any others, we don't have to be afraid that they will be deleted
415 # before we get to them. But it *is* crucial that we create the
416 # whole unused list before starting the loop.
418 unused = [
419 text_record.id
420 for text_record in self.itervalues()
421 if text_record.refcount == 0
424 self.discard(*unused)
426 def log_leftovers(self):
427 """If any TextRecords still exist, log them."""
429 if self.text_records:
430 Log().warn(
431 "%s: internal problem: leftover revisions in the checkout cache:"
432 % warning_prefix)
433 for text_record in self.itervalues():
434 Log().warn(' %s' % (text_record,))
436 def __repr__(self):
437 """Debugging output of the current contents of the TextRecordDatabase."""
439 retval = ['TextRecordDatabase:']
440 for text_record in self.itervalues():
441 retval.append(' %s' % (text_record,))
442 return '\n'.join(retval)
445 class _Sink(cvs2svn_rcsparse.Sink):
446 def __init__(self, revision_recorder, cvs_file_items):
447 self.revision_recorder = revision_recorder
448 self.cvs_file_items = cvs_file_items
450 # A map {rev : base_rev} indicating that the text for rev is
451 # stored in CVS as a delta relative to base_rev.
452 self.base_revisions = {}
454 # The revision that is stored with its fulltext in CVS (usually
455 # the oldest revision on trunk):
456 self.head_revision = None
458 # The first logical revision on trunk (usually '1.1'):
459 self.revision_1_1 = None
461 # Keep track of the revisions whose revision info has been seen so
462 # far (to avoid repeated revision info blocks):
463 self.revisions_seen = set()
465 def set_head_revision(self, revision):
466 self.head_revision = revision
468 def define_revision(
469 self, revision, timestamp, author, state, branches, next
471 if next:
472 self.base_revisions[next] = revision
473 else:
474 if is_trunk_revision(revision):
475 self.revision_1_1 = revision
477 for branch in branches:
478 self.base_revisions[branch] = revision
480 def set_revision_info(self, revision, log, text):
481 if revision in self.revisions_seen:
482 # One common form of CVS repository corruption is that the
483 # Deltatext block for revision 1.1 appears twice. CollectData
484 # has already warned about this problem; here we can just ignore
485 # it.
486 return
487 else:
488 self.revisions_seen.add(revision)
490 cvs_rev_id = self.cvs_file_items.original_ids[revision]
491 if is_trunk_revision(revision):
492 # On trunk, revisions are encountered in reverse order (1.<N>
493 # ... 1.1) and deltas are inverted. The first text that we see
494 # is the fulltext for the HEAD revision. After that, the text
495 # corresponding to revision 1.N is the delta (1.<N+1> ->
496 # 1.<N>)). We have to invert the deltas here so that we can
497 # read the revisions out in dependency order; that is, for
498 # revision 1.1 we want the fulltext, and for revision 1.<N> we
499 # want the delta (1.<N-1> -> 1.<N>). This means that we can't
500 # compute the delta for a revision until we see its logical
501 # parent. When we finally see revision 1.1 (which is recognized
502 # because it doesn't have a parent), we can record the diff (1.1
503 # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
505 if revision == self.head_revision:
506 # This is HEAD, as fulltext. Initialize the RCSStream so
507 # that we can compute deltas backwards in time.
508 self._stream = RCSStream(text)
509 self._stream_revision = revision
510 else:
511 # Any other trunk revision is a backward delta. Apply the
512 # delta to the RCSStream to mutate it to the contents of this
513 # revision, and also to get the reverse delta, which we store
514 # as the forward delta of our child revision.
515 try:
516 text = self._stream.invert_diff(text)
517 except MalformedDeltaException, e:
518 Log().error(
519 'Malformed RCS delta in %s, revision %s: %s'
520 % (self.cvs_file_items.cvs_file.filename, revision, e)
522 raise RuntimeError()
523 text_record = DeltaTextRecord(
524 self.cvs_file_items.original_ids[self._stream_revision],
525 cvs_rev_id
527 self.revision_recorder._writeout(text_record, text)
528 self._stream_revision = revision
530 if revision == self.revision_1_1:
531 # This is revision 1.1. Write its fulltext:
532 text_record = FullTextRecord(cvs_rev_id)
533 self.revision_recorder._writeout(text_record, self._stream.get_text())
535 # There will be no more trunk revisions delivered, so free the
536 # RCSStream.
537 del self._stream
538 del self._stream_revision
540 else:
541 # On branches, revisions are encountered in logical order
542 # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
543 # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
544 # <BRANCH>.<N>). That's what we need, so just store it.
546 # FIXME: It would be nice to avoid writing out branch deltas
547 # when --trunk-only. (They will be deleted when finish_file()
548 # is called, but if the delta db is in an IndexedDatabase the
549 # deletions won't actually recover any disk space.)
550 text_record = DeltaTextRecord(
551 cvs_rev_id,
552 self.cvs_file_items.original_ids[self.base_revisions[revision]]
554 self.revision_recorder._writeout(text_record, text)
556 return None
559 class InternalRevisionCollector(RevisionCollector):
560 """The RevisionCollector used by InternalRevisionReader."""
562 def __init__(self, compress):
563 RevisionCollector.__init__(self)
564 self._compress = compress
566 def register_artifacts(self, which_pass):
567 artifact_manager.register_temp_file(
568 config.RCS_DELTAS_INDEX_TABLE, which_pass
570 artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass)
571 artifact_manager.register_temp_file(
572 config.RCS_TREES_INDEX_TABLE, which_pass
574 artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)
576 def start(self):
577 ser = MarshalSerializer()
578 if self._compress:
579 ser = CompressingSerializer(ser)
580 self._rcs_deltas = IndexedDatabase(
581 artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
582 artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
583 DB_OPEN_NEW, ser
585 primer = (FullTextRecord, DeltaTextRecord)
586 self._rcs_trees = IndexedDatabase(
587 artifact_manager.get_temp_file(config.RCS_TREES_STORE),
588 artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
589 DB_OPEN_NEW, PrimedPickleSerializer(primer)
592 def _writeout(self, text_record, text):
593 self.text_record_db.add(text_record)
594 self._rcs_deltas[text_record.id] = text
596 def process_file(self, cvs_file_items):
597 """Read revision information for the file described by CVS_FILE_ITEMS.
599 Compute the text record refcounts, discard any records that are
600 unneeded, and store the text records for the file to the
601 _rcs_trees database."""
603 # A map from cvs_rev_id to TextRecord instance:
604 self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase())
606 cvs2svn_rcsparse.parse(
607 open(cvs_file_items.cvs_file.filename, 'rb'),
608 _Sink(self, cvs_file_items),
611 self.text_record_db.recompute_refcounts(cvs_file_items)
612 self.text_record_db.free_unused()
613 self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db
614 del self.text_record_db
616 def finish(self):
617 self._rcs_deltas.close()
618 self._rcs_trees.close()
621 class _KeywordExpander:
622 """A class whose instances provide substitutions for CVS keywords.
624 This class is used via its __call__() method, which should be called
625 with a match object representing a match for a CVS keyword string.
626 The method returns the replacement for the matched text.
628 The __call__() method works by calling the method with the same name
629 as that of the CVS keyword (converted to lower case).
631 Instances of this class can be passed as the REPL argument to
632 re.sub()."""
634 date_fmt_old = "%Y/%m/%d %H:%M:%S" # CVS 1.11, rcs
635 date_fmt_new = "%Y-%m-%d %H:%M:%S" # CVS 1.12
637 date_fmt = date_fmt_new
639 @classmethod
640 def use_old_date_format(klass):
641 """Class method to ensure exact compatibility with CVS 1.11
642 output. Use this if you want to verify your conversion and you're
643 using CVS 1.11."""
644 klass.date_fmt = klass.date_fmt_old
646 def __init__(self, cvs_rev):
647 self.cvs_rev = cvs_rev
649 def __call__(self, match):
650 return '$%s: %s $' % \
651 (match.group(1), getattr(self, match.group(1).lower())(),)
653 def author(self):
654 return Ctx()._metadata_db[self.cvs_rev.metadata_id].original_author
656 def date(self):
657 return time.strftime(self.date_fmt,
658 time.gmtime(self.cvs_rev.timestamp))
660 def header(self):
661 return '%s %s %s %s Exp' % \
662 (self.source(), self.cvs_rev.rev, self.date(), self.author())
664 def id(self):
665 return '%s %s %s %s Exp' % \
666 (self.rcsfile(), self.cvs_rev.rev, self.date(), self.author())
668 def locker(self):
669 # Handle kvl like kv, as a converted repo is supposed to have no
670 # locks.
671 return ''
673 def log(self):
674 # Would need some special handling.
675 return 'not supported by cvs2svn'
677 def name(self):
678 # Cannot work, as just creating a new symbol does not check out
679 # the revision again.
680 return 'not supported by cvs2svn'
682 def rcsfile(self):
683 return self.cvs_rev.cvs_file.basename + ",v"
685 def revision(self):
686 return self.cvs_rev.rev
688 def source(self):
689 project = self.cvs_rev.cvs_file.project
690 return project.cvs_repository_root + '/' + project.cvs_module + \
691 self.cvs_rev.cvs_file.cvs_path + ",v"
693 def state(self):
694 # We check out only live revisions.
695 return 'Exp'
698 class InternalRevisionReader(RevisionReader):
699 """A RevisionReader that reads the contents from an own delta store."""
701 _kws = 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State'
702 _kw_re = re.compile(r'\$(' + _kws + r'):[^$\n]*\$')
703 _kwo_re = re.compile(r'\$(' + _kws + r')(:[^$\n]*)?\$')
705 def __init__(self, compress):
706 self._compress = compress
708 def register_artifacts(self, which_pass):
709 artifact_manager.register_temp_file(config.CVS_CHECKOUT_DB, which_pass)
710 artifact_manager.register_temp_file_needed(
711 config.RCS_DELTAS_STORE, which_pass
713 artifact_manager.register_temp_file_needed(
714 config.RCS_DELTAS_INDEX_TABLE, which_pass
716 artifact_manager.register_temp_file_needed(
717 config.RCS_TREES_STORE, which_pass
719 artifact_manager.register_temp_file_needed(
720 config.RCS_TREES_INDEX_TABLE, which_pass
723 def start(self):
724 self._delta_db = IndexedDatabase(
725 artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
726 artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
727 DB_OPEN_READ)
728 self._delta_db.__delitem__ = lambda id: None
729 self._tree_db = IndexedDatabase(
730 artifact_manager.get_temp_file(config.RCS_TREES_STORE),
731 artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
732 DB_OPEN_READ)
733 ser = MarshalSerializer()
734 if self._compress:
735 ser = CompressingSerializer(ser)
736 self._co_db = Database(
737 artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB), DB_OPEN_NEW,
738 ser)
740 # The set of CVSFile instances whose TextRecords have already been
741 # read:
742 self._loaded_files = set()
744 # A map { CVSFILE : _FileTree } for files that currently have live
745 # revisions:
746 self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db)
748 def _get_text_record(self, cvs_rev):
749 """Return the TextRecord instance for CVS_REV.
751 If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
752 do so now."""
754 if cvs_rev.cvs_file not in self._loaded_files:
755 for text_record in self._tree_db[cvs_rev.cvs_file.id].itervalues():
756 self._text_record_db.add(text_record)
757 self._loaded_files.add(cvs_rev.cvs_file)
759 return self._text_record_db[cvs_rev.id]
761 def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False):
762 """Check out the text for revision C_REV from the repository.
764 Return the text wrapped in a readable file object. If
765 SUPPRESS_KEYWORD_SUBSTITUTION is True, any RCS keywords will be
766 _un_expanded prior to returning the file content. Note that $Log$
767 never actually generates a log (which makes test 'requires_cvs()'
768 fail).
770 Revisions may be requested in any order, but if they are not
771 requested in dependency order the checkout database will become
772 very large. Revisions may be skipped. Each revision may be
773 requested only once."""
775 try:
776 text = self._get_text_record(cvs_rev).checkout(self._text_record_db)
777 except MalformedDeltaException, (msg):
778 raise FatalError('Malformed RCS delta in %s, revision %s: %s'
779 % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg))
780 if cvs_rev.cvs_file.mode != 'b' and cvs_rev.cvs_file.mode != 'o':
781 if suppress_keyword_substitution or cvs_rev.cvs_file.mode == 'k':
782 text = self._kw_re.sub(r'$\1$', text)
783 else:
784 text = self._kwo_re.sub(_KeywordExpander(cvs_rev), text)
786 return StringIO(text)
788 def finish(self):
789 self._text_record_db.log_leftovers()
791 del self._text_record_db
792 self._delta_db.close()
793 self._tree_db.close()
794 self._co_db.close()