Formatting.
[cvs2svn.git] / cvs2svn_lib / checkout_internal.py
blob74ca2a6f1008683658a30074479b6ff397770a56
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains classes that implement the --use-internal-co option.
19 The idea is to patch up the revisions' contents incrementally, thus
20 avoiding the huge number of process spawns and the O(n^2) overhead of
21 using 'co' and 'cvs'.
23 InternalRevisionCollector saves the RCS deltas and RCS revision trees
24 to databases. Notably, deltas from the trunk need to be reversed, as
25 CVS stores them so they apply from HEAD backwards.
27 InternalRevisionReader produces the revisions' contents on demand. To
28 generate the text for a typical revision, we need the revision's delta
29 text plus the fulltext of the previous revision. Therefore, we
30 maintain a checkout database containing a copy of the fulltext of any
31 revision for which subsequent revisions still need to be retrieved.
32 It is crucial to remove text from this database as soon as it is no
33 longer needed, to prevent it from growing enormous.
35 There are two reasons that the text from a revision can be needed: (1)
36 because the revision itself still needs to be output to a dumpfile;
37 (2) because another revision needs it as the base of its delta. We
38 maintain a reference count for each revision, which includes *both*
39 possibilities. The first time a revision's text is needed, it is
40 generated by applying the revision's deltatext to the previous
41 revision's fulltext, and the resulting fulltext is stored in the
42 checkout database. Each time a revision's fulltext is retrieved, its
43 reference count is decremented. When the reference count goes to
44 zero, then the fulltext is deleted from the checkout database.
46 The administrative data for managing this consists of one TextRecord
47 entry for each revision. Each TextRecord has an id, which is the same
48 id as used for the corresponding CVSRevision instance. It also
49 maintains a count of the times it is expected to be retrieved.
50 TextRecords come in several varieties:
52 FullTextRecord -- Used for revisions whose fulltext is contained
53 directly in the RCS file, and therefore available during
54 CollectRevsPass (i.e., typically revision 1.1 of each file).
56 DeltaTextRecord -- Used for revisions that are defined via a delta
57 relative to some other TextRecord. These records record the id of
58 the TextRecord that holds the base text against which the delta is
59 defined. When the text for a DeltaTextRecord is retrieved, the
60 DeltaTextRecord instance is deleted and a CheckedOutTextRecord
61 instance is created to take its place.
63 CheckedOutTextRecord -- Used during OutputPass for a revision that
64 started out as a DeltaTextRecord, but has already been retrieved
65 (and therefore its fulltext is stored in the checkout database).
67 While a file is being processed during FilterSymbolsPass, the fulltext
68 and deltas are stored to the delta database, and TextRecord instances
69 are created to keep track of things. The reference counts are all
70 initialized: each record referred to by a delta has its refcount
71 incremented, and each record that corresponds to a non-delete
72 CVSRevision is incremented. After that, any records with refcount==0
73 are removed. When one record is removed, that can cause another
74 record's reference count to go to zero and be removed too,
75 recursively. When a TextRecord is deleted at this stage, its
76 deltatext is also deleted from the delta database."""
79 from cStringIO import StringIO
80 import re
81 import time
83 from cvs2svn_lib import config
84 from cvs2svn_lib.common import DB_OPEN_NEW
85 from cvs2svn_lib.common import DB_OPEN_READ
86 from cvs2svn_lib.common import warning_prefix
87 from cvs2svn_lib.common import FatalError
88 from cvs2svn_lib.common import InternalError
89 from cvs2svn_lib.common import canonicalize_eol
90 from cvs2svn_lib.common import is_trunk_revision
91 from cvs2svn_lib.context import Ctx
92 from cvs2svn_lib.log import Log
93 from cvs2svn_lib.artifact_manager import artifact_manager
94 from cvs2svn_lib.symbol import Trunk
95 from cvs2svn_lib.cvs_item import CVSRevisionModification
96 from cvs2svn_lib.database import Database
97 from cvs2svn_lib.database import IndexedDatabase
98 from cvs2svn_lib.rcs_stream import RCSStream
99 from cvs2svn_lib.rcs_stream import MalformedDeltaException
100 from cvs2svn_lib.revision_manager import RevisionCollector
101 from cvs2svn_lib.revision_manager import RevisionReader
102 from cvs2svn_lib.serializer import MarshalSerializer
103 from cvs2svn_lib.serializer import CompressingSerializer
104 from cvs2svn_lib.serializer import PrimedPickleSerializer
105 from cvs2svn_lib.apple_single_filter import get_maybe_apple_single
107 import cvs2svn_rcsparse
110 class TextRecord(object):
111 """Bookkeeping data for the text of a single CVSRevision."""
113 __slots__ = ['id', 'refcount']
115 def __init__(self, id):
116 # The cvs_rev_id of the revision whose text this is.
117 self.id = id
119 # The number of times that the text of this revision will be
120 # retrieved.
121 self.refcount = 0
123 def __getstate__(self):
124 return (self.id, self.refcount,)
126 def __setstate__(self, state):
127 (self.id, self.refcount,) = state
129 def increment_dependency_refcounts(self, text_record_db):
130 """Increment the refcounts of any records that this one depends on."""
132 pass
134 def decrement_refcount(self, text_record_db):
135 """Decrement the number of times our text still has to be checked out.
137 If the reference count goes to zero, call discard()."""
139 self.refcount -= 1
140 if self.refcount == 0:
141 text_record_db.discard(self.id)
143 def checkout(self, text_record_db):
144 """Workhorse of the checkout process.
146 Return the text for this revision, decrement our reference count,
147 and update the databases depending on whether there will be future
148 checkouts."""
150 raise NotImplementedError()
152 def free(self, text_record_db):
153 """This instance will never again be checked out; free it.
155 Also free any associated resources and decrement the refcounts of
156 any other TextRecords that this one depends on."""
158 raise NotImplementedError()
161 class FullTextRecord(TextRecord):
162 __slots__ = []
164 def __getstate__(self):
165 return (self.id, self.refcount,)
167 def __setstate__(self, state):
168 (self.id, self.refcount,) = state
170 def checkout(self, text_record_db):
171 text = text_record_db.delta_db[self.id]
172 self.decrement_refcount(text_record_db)
173 return text
175 def free(self, text_record_db):
176 del text_record_db.delta_db[self.id]
178 def __str__(self):
179 return 'FullTextRecord(%x, %d)' % (self.id, self.refcount,)
182 class DeltaTextRecord(TextRecord):
183 __slots__ = ['pred_id']
185 def __init__(self, id, pred_id):
186 TextRecord.__init__(self, id)
188 # The cvs_rev_id of the revision relative to which this delta is
189 # defined.
190 self.pred_id = pred_id
192 def __getstate__(self):
193 return (self.id, self.refcount, self.pred_id,)
195 def __setstate__(self, state):
196 (self.id, self.refcount, self.pred_id,) = state
198 def increment_dependency_refcounts(self, text_record_db):
199 text_record_db[self.pred_id].refcount += 1
201 def checkout(self, text_record_db):
202 base_text = text_record_db[self.pred_id].checkout(text_record_db)
203 co = RCSStream(base_text)
204 delta_text = text_record_db.delta_db[self.id]
205 co.apply_diff(delta_text)
206 text = co.get_text()
207 del co
208 self.refcount -= 1
209 if self.refcount == 0:
210 # This text will never be needed again; just delete ourselves
211 # without ever having stored the fulltext to the checkout
212 # database:
213 del text_record_db[self.id]
214 else:
215 # Store a new CheckedOutTextRecord in place of ourselves:
216 text_record_db.checkout_db['%x' % self.id] = text
217 new_text_record = CheckedOutTextRecord(self.id)
218 new_text_record.refcount = self.refcount
219 text_record_db.replace(new_text_record)
220 return text
222 def free(self, text_record_db):
223 del text_record_db.delta_db[self.id]
224 text_record_db[self.pred_id].decrement_refcount(text_record_db)
226 def __str__(self):
227 return 'DeltaTextRecord(%x -> %x, %d)' % (
228 self.pred_id, self.id, self.refcount,
232 class CheckedOutTextRecord(TextRecord):
233 __slots__ = []
235 def __getstate__(self):
236 return (self.id, self.refcount,)
238 def __setstate__(self, state):
239 (self.id, self.refcount,) = state
241 def checkout(self, text_record_db):
242 text = text_record_db.checkout_db['%x' % self.id]
243 self.decrement_refcount(text_record_db)
244 return text
246 def free(self, text_record_db):
247 del text_record_db.checkout_db['%x' % self.id]
249 def __str__(self):
250 return 'CheckedOutTextRecord(%x, %d)' % (self.id, self.refcount,)
253 class NullDatabase(object):
254 """A do-nothing database that can be used with TextRecordDatabase.
256 Use this when you don't actually want to allow anything to be
257 deleted."""
259 def __delitem__(self, id):
260 pass
263 class TextRecordDatabase:
264 """Holds the TextRecord instances that are currently live.
266 During CollectRevsPass and FilterSymbolsPass, files are processed
267 one by one and a new TextRecordDatabase instance is used for each
268 file. During OutputPass, a single TextRecordDatabase instance is
269 used for the duration of OutputPass; individual records are added
270 and removed when they are active."""
272 def __init__(self, delta_db, checkout_db):
273 # A map { cvs_rev_id -> TextRecord }.
274 self.text_records = {}
276 # A database-like object using cvs_rev_ids as keys and containing
277 # fulltext/deltatext strings as values. Its __getitem__() method
278 # is used to retrieve deltas when they are needed, and its
279 # __delitem__() method is used to delete deltas when they can be
280 # freed. The modifiability of the delta database varies from pass
281 # to pass, so the object stored here varies as well:
283 # CollectRevsPass: a fully-functional IndexedDatabase. This
284 # allows deltas that will not be needed to be deleted.
286 # FilterSymbolsPass: a NullDatabase. The delta database cannot be
287 # modified during this pass, and we have no need to retrieve
288 # deltas, so we just use a dummy object here.
290 # OutputPass: a disabled IndexedDatabase. During this pass we
291 # need to retrieve deltas, but we are not allowed to modify
292 # the delta database. So we use an IndexedDatabase whose
293 # __del__() method has been disabled to do nothing.
294 self.delta_db = delta_db
296 # A database-like object using cvs_rev_ids as keys and containing
297 # fulltext strings as values. This database is only set during
298 # OutputPass.
299 self.checkout_db = checkout_db
301 # If this is set to a list, then the list holds the ids of
302 # text_records that have to be deleted; when discard() is called,
303 # it adds the requested id to the list but does not delete it. If
304 # this member is set to None, then text_records are deleted
305 # immediately when discard() is called.
306 self.deferred_deletes = None
308 def __getstate__(self):
309 return (self.text_records.values(),)
311 def __setstate__(self, state):
312 (text_records,) = state
313 self.text_records = {}
314 for text_record in text_records:
315 self.add(text_record)
316 self.delta_db = NullDatabase()
317 self.checkout_db = NullDatabase()
318 self.deferred_deletes = None
320 def add(self, text_record):
321 """Add TEXT_RECORD to our database.
323 There must not already be a record with the same id."""
325 assert not self.text_records.has_key(text_record.id)
327 self.text_records[text_record.id] = text_record
329 def __getitem__(self, id):
330 return self.text_records[id]
332 def __delitem__(self, id):
333 """Free the record with the specified ID."""
335 del self.text_records[id]
337 def replace(self, text_record):
338 """Store TEXT_RECORD in place of the existing record with the same id.
340 Do not do anything with the old record."""
342 assert self.text_records.has_key(text_record.id)
343 self.text_records[text_record.id] = text_record
345 def discard(self, *ids):
346 """The text records with IDS are no longer needed; discard them.
348 This involves calling their free() methods and also removing them
349 from SELF.
351 If SELF.deferred_deletes is not None, then the ids to be deleted
352 are added to the list instead of deleted immediately. This
353 mechanism is to prevent a stack overflow from the avalanche of
354 deletes that can result from deleting a long chain of revisions."""
356 if self.deferred_deletes is None:
357 # This is an outer-level delete.
358 self.deferred_deletes = list(ids)
359 while self.deferred_deletes:
360 id = self.deferred_deletes.pop()
361 text_record = self[id]
362 if text_record.refcount != 0:
363 raise InternalError(
364 'TextRecordDatabase.discard(%s) called with refcount = %d'
365 % (text_record, text_record.refcount,)
367 # This call might cause other text_record ids to be added to
368 # self.deferred_deletes:
369 text_record.free(self)
370 del self[id]
371 self.deferred_deletes = None
372 else:
373 self.deferred_deletes.extend(ids)
375 def itervalues(self):
376 return self.text_records.itervalues()
378 def recompute_refcounts(self, cvs_file_items):
379 """Recompute the refcounts of the contained TextRecords.
381 Use CVS_FILE_ITEMS to determine which records will be needed by
382 cvs2svn."""
384 # First clear all of the refcounts:
385 for text_record in self.itervalues():
386 text_record.refcount = 0
388 # Now increment the reference count of records that are needed as
389 # the source of another record's deltas:
390 for text_record in self.itervalues():
391 text_record.increment_dependency_refcounts(self.text_records)
393 # Now increment the reference count of records that will be needed
394 # by cvs2svn:
395 for lod_items in cvs_file_items.iter_lods():
396 for cvs_rev in lod_items.cvs_revisions:
397 if isinstance(cvs_rev, CVSRevisionModification):
398 self[cvs_rev.id].refcount += 1
400 def free_unused(self):
401 """Free any TextRecords whose reference counts are zero."""
403 # The deletion of some of these text records might cause others to
404 # be unused, in which case they will be deleted automatically.
405 # But since the initially-unused records are not referred to by
406 # any others, we don't have to be afraid that they will be deleted
407 # before we get to them. But it *is* crucial that we create the
408 # whole unused list before starting the loop.
410 unused = [
411 text_record.id
412 for text_record in self.itervalues()
413 if text_record.refcount == 0
416 self.discard(*unused)
418 def log_leftovers(self):
419 """If any TextRecords still exist, log them."""
421 if self.text_records:
422 Log().warn(
423 "%s: internal problem: leftover revisions in the checkout cache:"
424 % warning_prefix)
425 for text_record in self.itervalues():
426 Log().warn(' %s' % (text_record,))
428 def __repr__(self):
429 """Debugging output of the current contents of the TextRecordDatabase."""
431 retval = ['TextRecordDatabase:']
432 for text_record in self.itervalues():
433 retval.append(' %s' % (text_record,))
434 return '\n'.join(retval)
437 class _Sink(cvs2svn_rcsparse.Sink):
438 def __init__(self, revision_collector, cvs_file_items):
439 self.revision_collector = revision_collector
440 self.cvs_file_items = cvs_file_items
442 # A map {rev : base_rev} indicating that the text for rev is
443 # stored in CVS as a delta relative to base_rev.
444 self.base_revisions = {}
446 # The revision that is stored with its fulltext in CVS (usually
447 # the oldest revision on trunk):
448 self.head_revision = None
450 # The first logical revision on trunk (usually '1.1'):
451 self.revision_1_1 = None
453 # Keep track of the revisions whose revision info has been seen so
454 # far (to avoid repeated revision info blocks):
455 self.revisions_seen = set()
457 def set_head_revision(self, revision):
458 self.head_revision = revision
460 def define_revision(
461 self, revision, timestamp, author, state, branches, next
463 if next:
464 self.base_revisions[next] = revision
465 else:
466 if is_trunk_revision(revision):
467 self.revision_1_1 = revision
469 for branch in branches:
470 self.base_revisions[branch] = revision
472 def set_revision_info(self, revision, log, text):
473 if revision in self.revisions_seen:
474 # One common form of CVS repository corruption is that the
475 # Deltatext block for revision 1.1 appears twice. CollectData
476 # has already warned about this problem; here we can just ignore
477 # it.
478 return
479 else:
480 self.revisions_seen.add(revision)
482 cvs_rev_id = self.cvs_file_items.original_ids[revision]
483 if is_trunk_revision(revision):
484 # On trunk, revisions are encountered in reverse order (1.<N>
485 # ... 1.1) and deltas are inverted. The first text that we see
486 # is the fulltext for the HEAD revision. After that, the text
487 # corresponding to revision 1.N is the delta (1.<N+1> ->
488 # 1.<N>)). We have to invert the deltas here so that we can
489 # read the revisions out in dependency order; that is, for
490 # revision 1.1 we want the fulltext, and for revision 1.<N> we
491 # want the delta (1.<N-1> -> 1.<N>). This means that we can't
492 # compute the delta for a revision until we see its logical
493 # parent. When we finally see revision 1.1 (which is recognized
494 # because it doesn't have a parent), we can record the diff (1.1
495 # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
497 if revision == self.head_revision:
498 # This is HEAD, as fulltext. Initialize the RCSStream so
499 # that we can compute deltas backwards in time.
500 self._stream = RCSStream(text)
501 self._stream_revision = revision
502 else:
503 # Any other trunk revision is a backward delta. Apply the
504 # delta to the RCSStream to mutate it to the contents of this
505 # revision, and also to get the reverse delta, which we store
506 # as the forward delta of our child revision.
507 try:
508 text = self._stream.invert_diff(text)
509 except MalformedDeltaException, e:
510 Log().error(
511 'Malformed RCS delta in %s, revision %s: %s'
512 % (self.cvs_file_items.cvs_file.filename, revision, e)
514 raise RuntimeError()
515 text_record = DeltaTextRecord(
516 self.cvs_file_items.original_ids[self._stream_revision],
517 cvs_rev_id
519 self.revision_collector._writeout(text_record, text)
520 self._stream_revision = revision
522 if revision == self.revision_1_1:
523 # This is revision 1.1. Write its fulltext:
524 text_record = FullTextRecord(cvs_rev_id)
525 self.revision_collector._writeout(
526 text_record, self._stream.get_text()
529 # There will be no more trunk revisions delivered, so free the
530 # RCSStream.
531 del self._stream
532 del self._stream_revision
534 else:
535 # On branches, revisions are encountered in logical order
536 # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
537 # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
538 # <BRANCH>.<N>). That's what we need, so just store it.
540 # FIXME: It would be nice to avoid writing out branch deltas
541 # when --trunk-only. (They will be deleted when finish_file()
542 # is called, but if the delta db is in an IndexedDatabase the
543 # deletions won't actually recover any disk space.)
544 text_record = DeltaTextRecord(
545 cvs_rev_id,
546 self.cvs_file_items.original_ids[self.base_revisions[revision]]
548 self.revision_collector._writeout(text_record, text)
550 return None
553 class InternalRevisionCollector(RevisionCollector):
554 """The RevisionCollector used by InternalRevisionReader."""
556 def __init__(self, compress):
557 RevisionCollector.__init__(self)
558 self._compress = compress
560 def register_artifacts(self, which_pass):
561 artifact_manager.register_temp_file(
562 config.RCS_DELTAS_INDEX_TABLE, which_pass
564 artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass)
565 artifact_manager.register_temp_file(
566 config.RCS_TREES_INDEX_TABLE, which_pass
568 artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)
570 def start(self):
571 ser = MarshalSerializer()
572 if self._compress:
573 ser = CompressingSerializer(ser)
574 self._rcs_deltas = IndexedDatabase(
575 artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
576 artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
577 DB_OPEN_NEW, ser
579 primer = (FullTextRecord, DeltaTextRecord)
580 self._rcs_trees = IndexedDatabase(
581 artifact_manager.get_temp_file(config.RCS_TREES_STORE),
582 artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
583 DB_OPEN_NEW, PrimedPickleSerializer(primer)
586 def _writeout(self, text_record, text):
587 self.text_record_db.add(text_record)
588 self._rcs_deltas[text_record.id] = text
590 def process_file(self, cvs_file_items):
591 """Read revision information for the file described by CVS_FILE_ITEMS.
593 Compute the text record refcounts, discard any records that are
594 unneeded, and store the text records for the file to the
595 _rcs_trees database."""
597 # A map from cvs_rev_id to TextRecord instance:
598 self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase())
600 cvs2svn_rcsparse.parse(
601 open(cvs_file_items.cvs_file.filename, 'rb'),
602 _Sink(self, cvs_file_items),
605 self.text_record_db.recompute_refcounts(cvs_file_items)
606 self.text_record_db.free_unused()
607 self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db
608 del self.text_record_db
610 def finish(self):
611 self._rcs_deltas.close()
612 self._rcs_trees.close()
615 class _KeywordExpander:
616 """A class whose instances provide substitutions for CVS keywords.
618 This class is used via its __call__() method, which should be called
619 with a match object representing a match for a CVS keyword string.
620 The method returns the replacement for the matched text.
622 The __call__() method works by calling the method with the same name
623 as that of the CVS keyword (converted to lower case).
625 Instances of this class can be passed as the REPL argument to
626 re.sub()."""
628 date_fmt_old = "%Y/%m/%d %H:%M:%S" # CVS 1.11, rcs
629 date_fmt_new = "%Y-%m-%d %H:%M:%S" # CVS 1.12
631 date_fmt = date_fmt_new
633 @classmethod
634 def use_old_date_format(klass):
635 """Class method to ensure exact compatibility with CVS 1.11
636 output. Use this if you want to verify your conversion and you're
637 using CVS 1.11."""
638 klass.date_fmt = klass.date_fmt_old
640 def __init__(self, cvs_rev):
641 self.cvs_rev = cvs_rev
643 def __call__(self, match):
644 return '$%s: %s $' % (
645 match.group(1), getattr(self, match.group(1).lower())(),
648 def author(self):
649 return Ctx()._metadata_db[self.cvs_rev.metadata_id].original_author
651 def date(self):
652 return time.strftime(self.date_fmt, time.gmtime(self.cvs_rev.timestamp))
654 def header(self):
655 return '%s %s %s %s Exp' % (
656 self.source(), self.cvs_rev.rev, self.date(), self.author(),
659 def id(self):
660 return '%s %s %s %s Exp' % (
661 self.rcsfile(), self.cvs_rev.rev, self.date(), self.author(),
664 def locker(self):
665 # Handle kvl like kv, as a converted repo is supposed to have no
666 # locks.
667 return ''
669 def log(self):
670 # Would need some special handling.
671 return 'not supported by cvs2svn'
673 def name(self):
674 # Cannot work, as just creating a new symbol does not check out
675 # the revision again.
676 return 'not supported by cvs2svn'
678 def rcsfile(self):
679 return self.cvs_rev.cvs_file.basename + ",v"
681 def revision(self):
682 return self.cvs_rev.rev
684 def source(self):
685 project = self.cvs_rev.cvs_file.project
686 return project.cvs_repository_root + '/' + project.cvs_module + \
687 self.cvs_rev.cvs_file.cvs_path + ",v"
689 def state(self):
690 # We check out only live revisions.
691 return 'Exp'
694 class InternalRevisionReader(RevisionReader):
695 """A RevisionReader that reads the contents from an own delta store."""
697 _kws = 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State'
698 _kw_re = re.compile(r'\$(' + _kws + r'):[^$\n]*\$')
699 _kwo_re = re.compile(r'\$(' + _kws + r')(:[^$\n]*)?\$')
701 def __init__(self, compress):
702 self._compress = compress
704 def register_artifacts(self, which_pass):
705 artifact_manager.register_temp_file(config.CVS_CHECKOUT_DB, which_pass)
706 artifact_manager.register_temp_file_needed(
707 config.RCS_DELTAS_STORE, which_pass
709 artifact_manager.register_temp_file_needed(
710 config.RCS_DELTAS_INDEX_TABLE, which_pass
712 artifact_manager.register_temp_file_needed(
713 config.RCS_TREES_STORE, which_pass
715 artifact_manager.register_temp_file_needed(
716 config.RCS_TREES_INDEX_TABLE, which_pass
719 def start(self):
720 self._delta_db = IndexedDatabase(
721 artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
722 artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
723 DB_OPEN_READ)
724 self._delta_db.__delitem__ = lambda id: None
725 self._tree_db = IndexedDatabase(
726 artifact_manager.get_temp_file(config.RCS_TREES_STORE),
727 artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
728 DB_OPEN_READ)
729 ser = MarshalSerializer()
730 if self._compress:
731 ser = CompressingSerializer(ser)
732 self._co_db = Database(
733 artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB), DB_OPEN_NEW,
734 ser)
736 # The set of CVSFile instances whose TextRecords have already been
737 # read:
738 self._loaded_files = set()
740 # A map { CVSFILE : _FileTree } for files that currently have live
741 # revisions:
742 self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db)
744 def _get_text_record(self, cvs_rev):
745 """Return the TextRecord instance for CVS_REV.
747 If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
748 do so now."""
750 if cvs_rev.cvs_file not in self._loaded_files:
751 for text_record in self._tree_db[cvs_rev.cvs_file.id].itervalues():
752 self._text_record_db.add(text_record)
753 self._loaded_files.add(cvs_rev.cvs_file)
755 return self._text_record_db[cvs_rev.id]
757 def get_content(self, cvs_rev):
758 """Check out the text for revision C_REV from the repository.
760 Return the text. If CVS_REV has a property _keyword_handling, use
761 it to determine how to handle RCS keywords in the output:
763 'collapsed' -- collapse keywords
765 'expanded' -- expand keywords
767 'untouched' -- output keywords in the form they are found in
768 the RCS file
770 Note that $Log$ never actually generates a log (which makes test
771 'requires_cvs()' fail).
773 Revisions may be requested in any order, but if they are not
774 requested in dependency order the checkout database will become
775 very large. Revisions may be skipped. Each revision may be
776 requested only once."""
778 try:
779 text = self._get_text_record(cvs_rev).checkout(self._text_record_db)
780 except MalformedDeltaException, (msg):
781 raise FatalError('Malformed RCS delta in %s, revision %s: %s'
782 % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg))
784 keyword_handling = cvs_rev.get_property('_keyword_handling')
786 if keyword_handling == 'untouched':
787 # Leave keywords in the form that they were checked in.
788 pass
789 elif keyword_handling == 'collapsed':
790 text = self._kw_re.sub(r'$\1$', text)
791 elif keyword_handling == 'expanded':
792 text = self._kwo_re.sub(_KeywordExpander(cvs_rev), text)
793 else:
794 raise FatalError(
795 'Undefined _keyword_handling property (%r) for %s'
796 % (keyword_handling, cvs_rev,)
799 if Ctx().decode_apple_single:
800 # Insert a filter to decode any files that are in AppleSingle
801 # format:
802 text = get_maybe_apple_single(text)
804 eol_fix = cvs_rev.get_property('_eol_fix')
805 if eol_fix:
806 text = canonicalize_eol(text, eol_fix)
808 return text
810 def finish(self):
811 self._text_record_db.log_leftovers()
813 del self._text_record_db
814 self._delta_db.close()
815 self._tree_db.close()
816 self._co_db.close()