Extract functions generate_edits_from_blocks() and write_edits().
[cvs2svn.git] / cvs2svn_lib / checkout_internal.py
blob4197809defdcfccb959e774ef82b6218695feca7
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains classes that implement the --use-internal-co option.
19 The idea is to patch up the revisions' contents incrementally, thus
20 avoiding the huge number of process spawns and the O(n^2) overhead of
21 using 'co' and 'cvs'.
23 InternalRevisionCollector saves the RCS deltas and RCS revision trees
24 to databases. Notably, deltas from the trunk need to be reversed, as
25 CVS stores them so they apply from HEAD backwards.
27 InternalRevisionReader produces the revisions' contents on demand. To
28 generate the text for a typical revision, we need the revision's delta
29 text plus the fulltext of the previous revision. Therefore, we
30 maintain a checkout database containing a copy of the fulltext of any
31 revision for which subsequent revisions still need to be retrieved.
32 It is crucial to remove text from this database as soon as it is no
33 longer needed, to prevent it from growing enormous.
35 There are two reasons that the text from a revision can be needed: (1)
36 because the revision itself still needs to be output to a dumpfile;
37 (2) because another revision needs it as the base of its delta. We
38 maintain a reference count for each revision, which includes *both*
39 possibilities. The first time a revision's text is needed, it is
40 generated by applying the revision's deltatext to the previous
41 revision's fulltext, and the resulting fulltext is stored in the
42 checkout database. Each time a revision's fulltext is retrieved, its
43 reference count is decremented. When the reference count goes to
44 zero, then the fulltext is deleted from the checkout database.
46 The administrative data for managing this consists of one TextRecord
47 entry for each revision. Each TextRecord has an id, which is the same
48 id as used for the corresponding CVSRevision instance. It also
49 maintains a count of the times it is expected to be retrieved.
50 TextRecords come in several varieties:
52 FullTextRecord -- Used for revisions whose fulltext is contained
53 directly in the RCS file, and therefore available during
54 CollectRevsPass (i.e., typically revision 1.1 of each file).
56 DeltaTextRecord -- Used for revisions that are defined via a delta
57 relative to some other TextRecord. These records record the id of
58 the TextRecord that holds the base text against which the delta is
59 defined. When the text for a DeltaTextRecord is retrieved, the
60 DeltaTextRecord instance is deleted and a CheckedOutTextRecord
61 instance is created to take its place.
63 CheckedOutTextRecord -- Used during OutputPass for a revision that
64 started out as a DeltaTextRecord, but has already been retrieved
65 (and therefore its fulltext is stored in the checkout database).
67 While a file is being processed during FilterSymbolsPass, the fulltext
68 and deltas are stored to the delta database, and TextRecord instances
69 are created to keep track of things. The reference counts are all
70 initialized: each record referred to by a delta has its refcount
71 incremented, and each record that corresponds to a non-delete
72 CVSRevision is incremented. After that, any records with refcount==0
73 are removed. When one record is removed, that can cause another
74 record's reference count to go to zero and be removed too,
75 recursively. When a TextRecord is deleted at this stage, its
76 deltatext is also deleted from the delta database."""
79 from cStringIO import StringIO
80 import re
81 import time
83 from cvs2svn_lib import config
84 from cvs2svn_lib.common import DB_OPEN_NEW
85 from cvs2svn_lib.common import DB_OPEN_READ
86 from cvs2svn_lib.common import warning_prefix
87 from cvs2svn_lib.common import FatalError
88 from cvs2svn_lib.common import InternalError
89 from cvs2svn_lib.common import is_trunk_revision
90 from cvs2svn_lib.context import Ctx
91 from cvs2svn_lib.log import Log
92 from cvs2svn_lib.artifact_manager import artifact_manager
93 from cvs2svn_lib.symbol import Trunk
94 from cvs2svn_lib.cvs_item import CVSRevisionModification
95 from cvs2svn_lib.database import Database
96 from cvs2svn_lib.database import IndexedDatabase
97 from cvs2svn_lib.rcs_stream import RCSStream
98 from cvs2svn_lib.rcs_stream import MalformedDeltaException
99 from cvs2svn_lib.revision_manager import RevisionCollector
100 from cvs2svn_lib.revision_manager import RevisionReader
101 from cvs2svn_lib.serializer import MarshalSerializer
102 from cvs2svn_lib.serializer import CompressingSerializer
103 from cvs2svn_lib.serializer import PrimedPickleSerializer
105 import cvs2svn_rcsparse
108 class TextRecord(object):
109 """Bookkeeping data for the text of a single CVSRevision."""
111 __slots__ = ['id', 'refcount']
113 def __init__(self, id):
114 # The cvs_rev_id of the revision whose text this is.
115 self.id = id
117 # The number of times that the text of this revision will be
118 # retrieved.
119 self.refcount = 0
121 def __getstate__(self):
122 return (self.id, self.refcount,)
124 def __setstate__(self, state):
125 (self.id, self.refcount,) = state
127 def increment_dependency_refcounts(self, text_record_db):
128 """Increment the refcounts of any records that this one depends on."""
130 pass
132 def decrement_refcount(self, text_record_db):
133 """Decrement the number of times our text still has to be checked out.
135 If the reference count goes to zero, call discard()."""
137 self.refcount -= 1
138 if self.refcount == 0:
139 text_record_db.discard(self.id)
141 def checkout(self, text_record_db):
142 """Workhorse of the checkout process.
144 Return the text for this revision, decrement our reference count,
145 and update the databases depending on whether there will be future
146 checkouts."""
148 raise NotImplementedError()
150 def free(self, text_record_db):
151 """This instance will never again be checked out; free it.
153 Also free any associated resources and decrement the refcounts of
154 any other TextRecords that this one depends on."""
156 raise NotImplementedError()
159 class FullTextRecord(TextRecord):
160 __slots__ = []
162 def __getstate__(self):
163 return (self.id, self.refcount,)
165 def __setstate__(self, state):
166 (self.id, self.refcount,) = state
168 def checkout(self, text_record_db):
169 text = text_record_db.delta_db[self.id]
170 self.decrement_refcount(text_record_db)
171 return text
173 def free(self, text_record_db):
174 del text_record_db.delta_db[self.id]
176 def __str__(self):
177 return 'FullTextRecord(%x, %d)' % (self.id, self.refcount,)
180 class DeltaTextRecord(TextRecord):
181 __slots__ = ['pred_id']
183 def __init__(self, id, pred_id):
184 TextRecord.__init__(self, id)
186 # The cvs_rev_id of the revision relative to which this delta is
187 # defined.
188 self.pred_id = pred_id
190 def __getstate__(self):
191 return (self.id, self.refcount, self.pred_id,)
193 def __setstate__(self, state):
194 (self.id, self.refcount, self.pred_id,) = state
196 def increment_dependency_refcounts(self, text_record_db):
197 text_record_db[self.pred_id].refcount += 1
199 def checkout(self, text_record_db):
200 base_text = text_record_db[self.pred_id].checkout(text_record_db)
201 co = RCSStream(base_text)
202 delta_text = text_record_db.delta_db[self.id]
203 co.apply_diff(delta_text)
204 text = co.get_text()
205 del co
206 self.refcount -= 1
207 if self.refcount == 0:
208 # This text will never be needed again; just delete ourselves
209 # without ever having stored the fulltext to the checkout
210 # database:
211 del text_record_db[self.id]
212 else:
213 # Store a new CheckedOutTextRecord in place of ourselves:
214 text_record_db.checkout_db['%x' % self.id] = text
215 new_text_record = CheckedOutTextRecord(self.id)
216 new_text_record.refcount = self.refcount
217 text_record_db.replace(new_text_record)
218 return text
220 def free(self, text_record_db):
221 del text_record_db.delta_db[self.id]
222 text_record_db[self.pred_id].decrement_refcount(text_record_db)
224 def __str__(self):
225 return 'DeltaTextRecord(%x -> %x, %d)' \
226 % (self.pred_id, self.id, self.refcount,)
229 class CheckedOutTextRecord(TextRecord):
230 __slots__ = []
232 def __getstate__(self):
233 return (self.id, self.refcount,)
235 def __setstate__(self, state):
236 (self.id, self.refcount,) = state
238 def checkout(self, text_record_db):
239 text = text_record_db.checkout_db['%x' % self.id]
240 self.decrement_refcount(text_record_db)
241 return text
243 def free(self, text_record_db):
244 del text_record_db.checkout_db['%x' % self.id]
246 def __str__(self):
247 return 'CheckedOutTextRecord(%x, %d)' % (self.id, self.refcount,)
250 class NullDatabase(object):
251 """A do-nothing database that can be used with TextRecordDatabase.
253 Use this when you don't actually want to allow anything to be
254 deleted."""
256 def __delitem__(self, id):
257 pass
260 class TextRecordDatabase:
261 """Holds the TextRecord instances that are currently live.
263 During CollectRevsPass and FilterSymbolsPass, files are processed
264 one by one and a new TextRecordDatabase instance is used for each
265 file. During OutputPass, a single TextRecordDatabase instance is
266 used for the duration of OutputPass; individual records are added
267 and removed when they are active."""
269 def __init__(self, delta_db, checkout_db):
270 # A map { cvs_rev_id -> TextRecord }.
271 self.text_records = {}
273 # A database-like object using cvs_rev_ids as keys and containing
274 # fulltext/deltatext strings as values. Its __getitem__() method
275 # is used to retrieve deltas when they are needed, and its
276 # __delitem__() method is used to delete deltas when they can be
277 # freed. The modifiability of the delta database varies from pass
278 # to pass, so the object stored here varies as well:
280 # CollectRevsPass: a fully-functional IndexedDatabase. This
281 # allows deltas that will not be needed to be deleted.
283 # FilterSymbolsPass: a NullDatabase. The delta database cannot be
284 # modified during this pass, and we have no need to retrieve
285 # deltas, so we just use a dummy object here.
287 # OutputPass: a disabled IndexedDatabase. During this pass we
288 # need to retrieve deltas, but we are not allowed to modify
289 # the delta database. So we use an IndexedDatabase whose
290 # __del__() method has been disabled to do nothing.
291 self.delta_db = delta_db
293 # A database-like object using cvs_rev_ids as keys and containing
294 # fulltext strings as values. This database is only set during
295 # OutputPass.
296 self.checkout_db = checkout_db
298 # If this is set to a list, then the list holds the ids of
299 # text_records that have to be deleted; when discard() is called,
300 # it adds the requested id to the list but does not delete it. If
301 # this member is set to None, then text_records are deleted
302 # immediately when discard() is called.
303 self.deferred_deletes = None
305 def __getstate__(self):
306 return (self.text_records.values(),)
308 def __setstate__(self, state):
309 (text_records,) = state
310 self.text_records = {}
311 for text_record in text_records:
312 self.add(text_record)
313 self.delta_db = NullDatabase()
314 self.checkout_db = NullDatabase()
315 self.deferred_deletes = None
317 def add(self, text_record):
318 """Add TEXT_RECORD to our database.
320 There must not already be a record with the same id."""
322 assert not self.text_records.has_key(text_record.id)
324 self.text_records[text_record.id] = text_record
326 def __getitem__(self, id):
327 return self.text_records[id]
329 def __delitem__(self, id):
330 """Free the record with the specified ID."""
332 del self.text_records[id]
334 def replace(self, text_record):
335 """Store TEXT_RECORD in place of the existing record with the same id.
337 Do not do anything with the old record."""
339 assert self.text_records.has_key(text_record.id)
340 self.text_records[text_record.id] = text_record
342 def discard(self, *ids):
343 """The text records with IDS are no longer needed; discard them.
345 This involves calling their free() methods and also removing them
346 from SELF.
348 If SELF.deferred_deletes is not None, then the ids to be deleted
349 are added to the list instead of deleted immediately. This
350 mechanism is to prevent a stack overflow from the avalanche of
351 deletes that can result from deleting a long chain of revisions."""
353 if self.deferred_deletes is None:
354 # This is an outer-level delete.
355 self.deferred_deletes = list(ids)
356 while self.deferred_deletes:
357 id = self.deferred_deletes.pop()
358 text_record = self[id]
359 if text_record.refcount != 0:
360 raise InternalError(
361 'TextRecordDatabase.discard(%s) called with refcount = %d'
362 % (text_record, text_record.refcount,)
364 # This call might cause other text_record ids to be added to
365 # self.deferred_deletes:
366 text_record.free(self)
367 del self[id]
368 self.deferred_deletes = None
369 else:
370 self.deferred_deletes.extend(ids)
372 def itervalues(self):
373 return self.text_records.itervalues()
375 def recompute_refcounts(self, cvs_file_items):
376 """Recompute the refcounts of the contained TextRecords.
378 Use CVS_FILE_ITEMS to determine which records will be needed by
379 cvs2svn."""
381 # First clear all of the refcounts:
382 for text_record in self.itervalues():
383 text_record.refcount = 0
385 # Now increment the reference count of records that are needed as
386 # the source of another record's deltas:
387 for text_record in self.itervalues():
388 text_record.increment_dependency_refcounts(self.text_records)
390 # Now increment the reference count of records that will be needed
391 # by cvs2svn:
392 for lod_items in cvs_file_items.iter_lods():
393 for cvs_rev in lod_items.cvs_revisions:
394 if isinstance(cvs_rev, CVSRevisionModification):
395 self[cvs_rev.id].refcount += 1
397 def free_unused(self):
398 """Free any TextRecords whose reference counts are zero."""
400 # The deletion of some of these text records might cause others to
401 # be unused, in which case they will be deleted automatically.
402 # But since the initially-unused records are not referred to by
403 # any others, we don't have to be afraid that they will be deleted
404 # before we get to them. But it *is* crucial that we create the
405 # whole unused list before starting the loop.
407 unused = [
408 text_record.id
409 for text_record in self.itervalues()
410 if text_record.refcount == 0
413 self.discard(*unused)
415 def log_leftovers(self):
416 """If any TextRecords still exist, log them."""
418 if self.text_records:
419 Log().warn(
420 "%s: internal problem: leftover revisions in the checkout cache:"
421 % warning_prefix)
422 for text_record in self.itervalues():
423 Log().warn(' %s' % (text_record,))
425 def __repr__(self):
426 """Debugging output of the current contents of the TextRecordDatabase."""
428 retval = ['TextRecordDatabase:']
429 for text_record in self.itervalues():
430 retval.append(' %s' % (text_record,))
431 return '\n'.join(retval)
434 class _Sink(cvs2svn_rcsparse.Sink):
435 def __init__(self, revision_collector, cvs_file_items):
436 self.revision_collector = revision_collector
437 self.cvs_file_items = cvs_file_items
439 # A map {rev : base_rev} indicating that the text for rev is
440 # stored in CVS as a delta relative to base_rev.
441 self.base_revisions = {}
443 # The revision that is stored with its fulltext in CVS (usually
444 # the oldest revision on trunk):
445 self.head_revision = None
447 # The first logical revision on trunk (usually '1.1'):
448 self.revision_1_1 = None
450 # Keep track of the revisions whose revision info has been seen so
451 # far (to avoid repeated revision info blocks):
452 self.revisions_seen = set()
454 def set_head_revision(self, revision):
455 self.head_revision = revision
457 def define_revision(
458 self, revision, timestamp, author, state, branches, next
460 if next:
461 self.base_revisions[next] = revision
462 else:
463 if is_trunk_revision(revision):
464 self.revision_1_1 = revision
466 for branch in branches:
467 self.base_revisions[branch] = revision
469 def set_revision_info(self, revision, log, text):
470 if revision in self.revisions_seen:
471 # One common form of CVS repository corruption is that the
472 # Deltatext block for revision 1.1 appears twice. CollectData
473 # has already warned about this problem; here we can just ignore
474 # it.
475 return
476 else:
477 self.revisions_seen.add(revision)
479 cvs_rev_id = self.cvs_file_items.original_ids[revision]
480 if is_trunk_revision(revision):
481 # On trunk, revisions are encountered in reverse order (1.<N>
482 # ... 1.1) and deltas are inverted. The first text that we see
483 # is the fulltext for the HEAD revision. After that, the text
484 # corresponding to revision 1.N is the delta (1.<N+1> ->
485 # 1.<N>)). We have to invert the deltas here so that we can
486 # read the revisions out in dependency order; that is, for
487 # revision 1.1 we want the fulltext, and for revision 1.<N> we
488 # want the delta (1.<N-1> -> 1.<N>). This means that we can't
489 # compute the delta for a revision until we see its logical
490 # parent. When we finally see revision 1.1 (which is recognized
491 # because it doesn't have a parent), we can record the diff (1.1
492 # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
494 if revision == self.head_revision:
495 # This is HEAD, as fulltext. Initialize the RCSStream so
496 # that we can compute deltas backwards in time.
497 self._stream = RCSStream(text)
498 self._stream_revision = revision
499 else:
500 # Any other trunk revision is a backward delta. Apply the
501 # delta to the RCSStream to mutate it to the contents of this
502 # revision, and also to get the reverse delta, which we store
503 # as the forward delta of our child revision.
504 try:
505 text = self._stream.invert_diff(text)
506 except MalformedDeltaException, e:
507 Log().error(
508 'Malformed RCS delta in %s, revision %s: %s'
509 % (self.cvs_file_items.cvs_file.filename, revision, e)
511 raise RuntimeError()
512 text_record = DeltaTextRecord(
513 self.cvs_file_items.original_ids[self._stream_revision],
514 cvs_rev_id
516 self.revision_collector._writeout(text_record, text)
517 self._stream_revision = revision
519 if revision == self.revision_1_1:
520 # This is revision 1.1. Write its fulltext:
521 text_record = FullTextRecord(cvs_rev_id)
522 self.revision_collector._writeout(
523 text_record, self._stream.get_text()
526 # There will be no more trunk revisions delivered, so free the
527 # RCSStream.
528 del self._stream
529 del self._stream_revision
531 else:
532 # On branches, revisions are encountered in logical order
533 # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
534 # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
535 # <BRANCH>.<N>). That's what we need, so just store it.
537 # FIXME: It would be nice to avoid writing out branch deltas
538 # when --trunk-only. (They will be deleted when finish_file()
539 # is called, but if the delta db is in an IndexedDatabase the
540 # deletions won't actually recover any disk space.)
541 text_record = DeltaTextRecord(
542 cvs_rev_id,
543 self.cvs_file_items.original_ids[self.base_revisions[revision]]
545 self.revision_collector._writeout(text_record, text)
547 return None
550 class InternalRevisionCollector(RevisionCollector):
551 """The RevisionCollector used by InternalRevisionReader."""
553 def __init__(self, compress):
554 RevisionCollector.__init__(self)
555 self._compress = compress
557 def register_artifacts(self, which_pass):
558 artifact_manager.register_temp_file(
559 config.RCS_DELTAS_INDEX_TABLE, which_pass
561 artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass)
562 artifact_manager.register_temp_file(
563 config.RCS_TREES_INDEX_TABLE, which_pass
565 artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)
567 def start(self):
568 ser = MarshalSerializer()
569 if self._compress:
570 ser = CompressingSerializer(ser)
571 self._rcs_deltas = IndexedDatabase(
572 artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
573 artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
574 DB_OPEN_NEW, ser
576 primer = (FullTextRecord, DeltaTextRecord)
577 self._rcs_trees = IndexedDatabase(
578 artifact_manager.get_temp_file(config.RCS_TREES_STORE),
579 artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
580 DB_OPEN_NEW, PrimedPickleSerializer(primer)
583 def _writeout(self, text_record, text):
584 self.text_record_db.add(text_record)
585 self._rcs_deltas[text_record.id] = text
587 def process_file(self, cvs_file_items):
588 """Read revision information for the file described by CVS_FILE_ITEMS.
590 Compute the text record refcounts, discard any records that are
591 unneeded, and store the text records for the file to the
592 _rcs_trees database."""
594 # A map from cvs_rev_id to TextRecord instance:
595 self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase())
597 cvs2svn_rcsparse.parse(
598 open(cvs_file_items.cvs_file.filename, 'rb'),
599 _Sink(self, cvs_file_items),
602 self.text_record_db.recompute_refcounts(cvs_file_items)
603 self.text_record_db.free_unused()
604 self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db
605 del self.text_record_db
607 def finish(self):
608 self._rcs_deltas.close()
609 self._rcs_trees.close()
612 class _KeywordExpander:
613 """A class whose instances provide substitutions for CVS keywords.
615 This class is used via its __call__() method, which should be called
616 with a match object representing a match for a CVS keyword string.
617 The method returns the replacement for the matched text.
619 The __call__() method works by calling the method with the same name
620 as that of the CVS keyword (converted to lower case).
622 Instances of this class can be passed as the REPL argument to
623 re.sub()."""
625 date_fmt_old = "%Y/%m/%d %H:%M:%S" # CVS 1.11, rcs
626 date_fmt_new = "%Y-%m-%d %H:%M:%S" # CVS 1.12
628 date_fmt = date_fmt_new
630 @classmethod
631 def use_old_date_format(klass):
632 """Class method to ensure exact compatibility with CVS 1.11
633 output. Use this if you want to verify your conversion and you're
634 using CVS 1.11."""
635 klass.date_fmt = klass.date_fmt_old
637 def __init__(self, cvs_rev):
638 self.cvs_rev = cvs_rev
640 def __call__(self, match):
641 return '$%s: %s $' % \
642 (match.group(1), getattr(self, match.group(1).lower())(),)
644 def author(self):
645 return Ctx()._metadata_db[self.cvs_rev.metadata_id].original_author
647 def date(self):
648 return time.strftime(self.date_fmt,
649 time.gmtime(self.cvs_rev.timestamp))
651 def header(self):
652 return '%s %s %s %s Exp' % \
653 (self.source(), self.cvs_rev.rev, self.date(), self.author())
655 def id(self):
656 return '%s %s %s %s Exp' % \
657 (self.rcsfile(), self.cvs_rev.rev, self.date(), self.author())
659 def locker(self):
660 # Handle kvl like kv, as a converted repo is supposed to have no
661 # locks.
662 return ''
664 def log(self):
665 # Would need some special handling.
666 return 'not supported by cvs2svn'
668 def name(self):
669 # Cannot work, as just creating a new symbol does not check out
670 # the revision again.
671 return 'not supported by cvs2svn'
673 def rcsfile(self):
674 return self.cvs_rev.cvs_file.basename + ",v"
676 def revision(self):
677 return self.cvs_rev.rev
679 def source(self):
680 project = self.cvs_rev.cvs_file.project
681 return project.cvs_repository_root + '/' + project.cvs_module + \
682 self.cvs_rev.cvs_file.cvs_path + ",v"
684 def state(self):
685 # We check out only live revisions.
686 return 'Exp'
689 class InternalRevisionReader(RevisionReader):
690 """A RevisionReader that reads the contents from an own delta store."""
692 _kws = 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State'
693 _kw_re = re.compile(r'\$(' + _kws + r'):[^$\n]*\$')
694 _kwo_re = re.compile(r'\$(' + _kws + r')(:[^$\n]*)?\$')
696 def __init__(self, compress):
697 self._compress = compress
699 def register_artifacts(self, which_pass):
700 artifact_manager.register_temp_file(config.CVS_CHECKOUT_DB, which_pass)
701 artifact_manager.register_temp_file_needed(
702 config.RCS_DELTAS_STORE, which_pass
704 artifact_manager.register_temp_file_needed(
705 config.RCS_DELTAS_INDEX_TABLE, which_pass
707 artifact_manager.register_temp_file_needed(
708 config.RCS_TREES_STORE, which_pass
710 artifact_manager.register_temp_file_needed(
711 config.RCS_TREES_INDEX_TABLE, which_pass
714 def start(self):
715 self._delta_db = IndexedDatabase(
716 artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
717 artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
718 DB_OPEN_READ)
719 self._delta_db.__delitem__ = lambda id: None
720 self._tree_db = IndexedDatabase(
721 artifact_manager.get_temp_file(config.RCS_TREES_STORE),
722 artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
723 DB_OPEN_READ)
724 ser = MarshalSerializer()
725 if self._compress:
726 ser = CompressingSerializer(ser)
727 self._co_db = Database(
728 artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB), DB_OPEN_NEW,
729 ser)
731 # The set of CVSFile instances whose TextRecords have already been
732 # read:
733 self._loaded_files = set()
735 # A map { CVSFILE : _FileTree } for files that currently have live
736 # revisions:
737 self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db)
739 def _get_text_record(self, cvs_rev):
740 """Return the TextRecord instance for CVS_REV.
742 If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
743 do so now."""
745 if cvs_rev.cvs_file not in self._loaded_files:
746 for text_record in self._tree_db[cvs_rev.cvs_file.id].itervalues():
747 self._text_record_db.add(text_record)
748 self._loaded_files.add(cvs_rev.cvs_file)
750 return self._text_record_db[cvs_rev.id]
752 def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False):
753 """Check out the text for revision C_REV from the repository.
755 Return the text wrapped in a readable file object. If
756 SUPPRESS_KEYWORD_SUBSTITUTION is True, any RCS keywords will be
757 _un_expanded prior to returning the file content. Note that $Log$
758 never actually generates a log (which makes test 'requires_cvs()'
759 fail).
761 Revisions may be requested in any order, but if they are not
762 requested in dependency order the checkout database will become
763 very large. Revisions may be skipped. Each revision may be
764 requested only once."""
766 try:
767 text = self._get_text_record(cvs_rev).checkout(self._text_record_db)
768 except MalformedDeltaException, (msg):
769 raise FatalError('Malformed RCS delta in %s, revision %s: %s'
770 % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg))
771 if cvs_rev.cvs_file.mode != 'b' and cvs_rev.cvs_file.mode != 'o':
772 if suppress_keyword_substitution or cvs_rev.cvs_file.mode == 'k':
773 text = self._kw_re.sub(r'$\1$', text)
774 else:
775 text = self._kwo_re.sub(_KeywordExpander(cvs_rev), text)
777 return StringIO(text)
779 def finish(self):
780 self._text_record_db.log_leftovers()
782 del self._text_record_db
783 self._delta_db.close()
784 self._tree_db.close()
785 self._co_db.close()