Teach RevisionReader.get_content() to handle AppleSingle content.
[cvs2svn.git] / cvs2svn_lib / checkout_internal.py
blob399b8e1c9a16ad97ccfd21941992717580153856
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains classes that implement the --use-internal-co option.
19 The idea is to patch up the revisions' contents incrementally, thus
20 avoiding the huge number of process spawns and the O(n^2) overhead of
21 using 'co' and 'cvs'.
23 InternalRevisionCollector saves the RCS deltas and RCS revision trees
24 to databases. Notably, deltas from the trunk need to be reversed, as
25 CVS stores them so they apply from HEAD backwards.
27 InternalRevisionReader produces the revisions' contents on demand. To
28 generate the text for a typical revision, we need the revision's delta
29 text plus the fulltext of the previous revision. Therefore, we
30 maintain a checkout database containing a copy of the fulltext of any
31 revision for which subsequent revisions still need to be retrieved.
32 It is crucial to remove text from this database as soon as it is no
33 longer needed, to prevent it from growing enormous.
35 There are two reasons that the text from a revision can be needed: (1)
36 because the revision itself still needs to be output to a dumpfile;
37 (2) because another revision needs it as the base of its delta. We
38 maintain a reference count for each revision, which includes *both*
39 possibilities. The first time a revision's text is needed, it is
40 generated by applying the revision's deltatext to the previous
41 revision's fulltext, and the resulting fulltext is stored in the
42 checkout database. Each time a revision's fulltext is retrieved, its
43 reference count is decremented. When the reference count goes to
44 zero, then the fulltext is deleted from the checkout database.
46 The administrative data for managing this consists of one TextRecord
47 entry for each revision. Each TextRecord has an id, which is the same
48 id as used for the corresponding CVSRevision instance. It also
49 maintains a count of the times it is expected to be retrieved.
50 TextRecords come in several varieties:
52 FullTextRecord -- Used for revisions whose fulltext is contained
53 directly in the RCS file, and therefore available during
54 CollectRevsPass (i.e., typically revision 1.1 of each file).
56 DeltaTextRecord -- Used for revisions that are defined via a delta
57 relative to some other TextRecord. These records record the id of
58 the TextRecord that holds the base text against which the delta is
59 defined. When the text for a DeltaTextRecord is retrieved, the
60 DeltaTextRecord instance is deleted and a CheckedOutTextRecord
61 instance is created to take its place.
63 CheckedOutTextRecord -- Used during OutputPass for a revision that
64 started out as a DeltaTextRecord, but has already been retrieved
65 (and therefore its fulltext is stored in the checkout database).
67 While a file is being processed during FilterSymbolsPass, the fulltext
68 and deltas are stored to the delta database, and TextRecord instances
69 are created to keep track of things. The reference counts are all
70 initialized: each record referred to by a delta has its refcount
71 incremented, and each record that corresponds to a non-delete
72 CVSRevision is incremented. After that, any records with refcount==0
73 are removed. When one record is removed, that can cause another
74 record's reference count to go to zero and be removed too,
75 recursively. When a TextRecord is deleted at this stage, its
76 deltatext is also deleted from the delta database."""
79 from cStringIO import StringIO
80 import re
81 import time
83 from cvs2svn_lib import config
84 from cvs2svn_lib.common import DB_OPEN_NEW
85 from cvs2svn_lib.common import DB_OPEN_READ
86 from cvs2svn_lib.common import warning_prefix
87 from cvs2svn_lib.common import FatalError
88 from cvs2svn_lib.common import InternalError
89 from cvs2svn_lib.common import is_trunk_revision
90 from cvs2svn_lib.context import Ctx
91 from cvs2svn_lib.log import Log
92 from cvs2svn_lib.artifact_manager import artifact_manager
93 from cvs2svn_lib.symbol import Trunk
94 from cvs2svn_lib.cvs_item import CVSRevisionModification
95 from cvs2svn_lib.database import Database
96 from cvs2svn_lib.database import IndexedDatabase
97 from cvs2svn_lib.rcs_stream import RCSStream
98 from cvs2svn_lib.rcs_stream import MalformedDeltaException
99 from cvs2svn_lib.revision_manager import RevisionCollector
100 from cvs2svn_lib.revision_manager import RevisionReader
101 from cvs2svn_lib.serializer import MarshalSerializer
102 from cvs2svn_lib.serializer import CompressingSerializer
103 from cvs2svn_lib.serializer import PrimedPickleSerializer
104 from cvs2svn_lib.apple_single_filter import get_maybe_apple_single
106 import cvs2svn_rcsparse
109 class TextRecord(object):
110 """Bookkeeping data for the text of a single CVSRevision."""
112 __slots__ = ['id', 'refcount']
114 def __init__(self, id):
115 # The cvs_rev_id of the revision whose text this is.
116 self.id = id
118 # The number of times that the text of this revision will be
119 # retrieved.
120 self.refcount = 0
122 def __getstate__(self):
123 return (self.id, self.refcount,)
125 def __setstate__(self, state):
126 (self.id, self.refcount,) = state
128 def increment_dependency_refcounts(self, text_record_db):
129 """Increment the refcounts of any records that this one depends on."""
131 pass
133 def decrement_refcount(self, text_record_db):
134 """Decrement the number of times our text still has to be checked out.
136 If the reference count goes to zero, call discard()."""
138 self.refcount -= 1
139 if self.refcount == 0:
140 text_record_db.discard(self.id)
142 def checkout(self, text_record_db):
143 """Workhorse of the checkout process.
145 Return the text for this revision, decrement our reference count,
146 and update the databases depending on whether there will be future
147 checkouts."""
149 raise NotImplementedError()
151 def free(self, text_record_db):
152 """This instance will never again be checked out; free it.
154 Also free any associated resources and decrement the refcounts of
155 any other TextRecords that this one depends on."""
157 raise NotImplementedError()
160 class FullTextRecord(TextRecord):
161 __slots__ = []
163 def __getstate__(self):
164 return (self.id, self.refcount,)
166 def __setstate__(self, state):
167 (self.id, self.refcount,) = state
169 def checkout(self, text_record_db):
170 text = text_record_db.delta_db[self.id]
171 self.decrement_refcount(text_record_db)
172 return text
174 def free(self, text_record_db):
175 del text_record_db.delta_db[self.id]
177 def __str__(self):
178 return 'FullTextRecord(%x, %d)' % (self.id, self.refcount,)
181 class DeltaTextRecord(TextRecord):
182 __slots__ = ['pred_id']
184 def __init__(self, id, pred_id):
185 TextRecord.__init__(self, id)
187 # The cvs_rev_id of the revision relative to which this delta is
188 # defined.
189 self.pred_id = pred_id
191 def __getstate__(self):
192 return (self.id, self.refcount, self.pred_id,)
194 def __setstate__(self, state):
195 (self.id, self.refcount, self.pred_id,) = state
197 def increment_dependency_refcounts(self, text_record_db):
198 text_record_db[self.pred_id].refcount += 1
200 def checkout(self, text_record_db):
201 base_text = text_record_db[self.pred_id].checkout(text_record_db)
202 co = RCSStream(base_text)
203 delta_text = text_record_db.delta_db[self.id]
204 co.apply_diff(delta_text)
205 text = co.get_text()
206 del co
207 self.refcount -= 1
208 if self.refcount == 0:
209 # This text will never be needed again; just delete ourselves
210 # without ever having stored the fulltext to the checkout
211 # database:
212 del text_record_db[self.id]
213 else:
214 # Store a new CheckedOutTextRecord in place of ourselves:
215 text_record_db.checkout_db['%x' % self.id] = text
216 new_text_record = CheckedOutTextRecord(self.id)
217 new_text_record.refcount = self.refcount
218 text_record_db.replace(new_text_record)
219 return text
221 def free(self, text_record_db):
222 del text_record_db.delta_db[self.id]
223 text_record_db[self.pred_id].decrement_refcount(text_record_db)
225 def __str__(self):
226 return 'DeltaTextRecord(%x -> %x, %d)' \
227 % (self.pred_id, self.id, self.refcount,)
230 class CheckedOutTextRecord(TextRecord):
231 __slots__ = []
233 def __getstate__(self):
234 return (self.id, self.refcount,)
236 def __setstate__(self, state):
237 (self.id, self.refcount,) = state
239 def checkout(self, text_record_db):
240 text = text_record_db.checkout_db['%x' % self.id]
241 self.decrement_refcount(text_record_db)
242 return text
244 def free(self, text_record_db):
245 del text_record_db.checkout_db['%x' % self.id]
247 def __str__(self):
248 return 'CheckedOutTextRecord(%x, %d)' % (self.id, self.refcount,)
251 class NullDatabase(object):
252 """A do-nothing database that can be used with TextRecordDatabase.
254 Use this when you don't actually want to allow anything to be
255 deleted."""
257 def __delitem__(self, id):
258 pass
261 class TextRecordDatabase:
262 """Holds the TextRecord instances that are currently live.
264 During CollectRevsPass and FilterSymbolsPass, files are processed
265 one by one and a new TextRecordDatabase instance is used for each
266 file. During OutputPass, a single TextRecordDatabase instance is
267 used for the duration of OutputPass; individual records are added
268 and removed when they are active."""
270 def __init__(self, delta_db, checkout_db):
271 # A map { cvs_rev_id -> TextRecord }.
272 self.text_records = {}
274 # A database-like object using cvs_rev_ids as keys and containing
275 # fulltext/deltatext strings as values. Its __getitem__() method
276 # is used to retrieve deltas when they are needed, and its
277 # __delitem__() method is used to delete deltas when they can be
278 # freed. The modifiability of the delta database varies from pass
279 # to pass, so the object stored here varies as well:
281 # CollectRevsPass: a fully-functional IndexedDatabase. This
282 # allows deltas that will not be needed to be deleted.
284 # FilterSymbolsPass: a NullDatabase. The delta database cannot be
285 # modified during this pass, and we have no need to retrieve
286 # deltas, so we just use a dummy object here.
288 # OutputPass: a disabled IndexedDatabase. During this pass we
289 # need to retrieve deltas, but we are not allowed to modify
290 # the delta database. So we use an IndexedDatabase whose
291 # __del__() method has been disabled to do nothing.
292 self.delta_db = delta_db
294 # A database-like object using cvs_rev_ids as keys and containing
295 # fulltext strings as values. This database is only set during
296 # OutputPass.
297 self.checkout_db = checkout_db
299 # If this is set to a list, then the list holds the ids of
300 # text_records that have to be deleted; when discard() is called,
301 # it adds the requested id to the list but does not delete it. If
302 # this member is set to None, then text_records are deleted
303 # immediately when discard() is called.
304 self.deferred_deletes = None
306 def __getstate__(self):
307 return (self.text_records.values(),)
309 def __setstate__(self, state):
310 (text_records,) = state
311 self.text_records = {}
312 for text_record in text_records:
313 self.add(text_record)
314 self.delta_db = NullDatabase()
315 self.checkout_db = NullDatabase()
316 self.deferred_deletes = None
318 def add(self, text_record):
319 """Add TEXT_RECORD to our database.
321 There must not already be a record with the same id."""
323 assert not self.text_records.has_key(text_record.id)
325 self.text_records[text_record.id] = text_record
327 def __getitem__(self, id):
328 return self.text_records[id]
330 def __delitem__(self, id):
331 """Free the record with the specified ID."""
333 del self.text_records[id]
335 def replace(self, text_record):
336 """Store TEXT_RECORD in place of the existing record with the same id.
338 Do not do anything with the old record."""
340 assert self.text_records.has_key(text_record.id)
341 self.text_records[text_record.id] = text_record
343 def discard(self, *ids):
344 """The text records with IDS are no longer needed; discard them.
346 This involves calling their free() methods and also removing them
347 from SELF.
349 If SELF.deferred_deletes is not None, then the ids to be deleted
350 are added to the list instead of deleted immediately. This
351 mechanism is to prevent a stack overflow from the avalanche of
352 deletes that can result from deleting a long chain of revisions."""
354 if self.deferred_deletes is None:
355 # This is an outer-level delete.
356 self.deferred_deletes = list(ids)
357 while self.deferred_deletes:
358 id = self.deferred_deletes.pop()
359 text_record = self[id]
360 if text_record.refcount != 0:
361 raise InternalError(
362 'TextRecordDatabase.discard(%s) called with refcount = %d'
363 % (text_record, text_record.refcount,)
365 # This call might cause other text_record ids to be added to
366 # self.deferred_deletes:
367 text_record.free(self)
368 del self[id]
369 self.deferred_deletes = None
370 else:
371 self.deferred_deletes.extend(ids)
373 def itervalues(self):
374 return self.text_records.itervalues()
376 def recompute_refcounts(self, cvs_file_items):
377 """Recompute the refcounts of the contained TextRecords.
379 Use CVS_FILE_ITEMS to determine which records will be needed by
380 cvs2svn."""
382 # First clear all of the refcounts:
383 for text_record in self.itervalues():
384 text_record.refcount = 0
386 # Now increment the reference count of records that are needed as
387 # the source of another record's deltas:
388 for text_record in self.itervalues():
389 text_record.increment_dependency_refcounts(self.text_records)
391 # Now increment the reference count of records that will be needed
392 # by cvs2svn:
393 for lod_items in cvs_file_items.iter_lods():
394 for cvs_rev in lod_items.cvs_revisions:
395 if isinstance(cvs_rev, CVSRevisionModification):
396 self[cvs_rev.id].refcount += 1
398 def free_unused(self):
399 """Free any TextRecords whose reference counts are zero."""
401 # The deletion of some of these text records might cause others to
402 # be unused, in which case they will be deleted automatically.
403 # But since the initially-unused records are not referred to by
404 # any others, we don't have to be afraid that they will be deleted
405 # before we get to them. But it *is* crucial that we create the
406 # whole unused list before starting the loop.
408 unused = [
409 text_record.id
410 for text_record in self.itervalues()
411 if text_record.refcount == 0
414 self.discard(*unused)
416 def log_leftovers(self):
417 """If any TextRecords still exist, log them."""
419 if self.text_records:
420 Log().warn(
421 "%s: internal problem: leftover revisions in the checkout cache:"
422 % warning_prefix)
423 for text_record in self.itervalues():
424 Log().warn(' %s' % (text_record,))
426 def __repr__(self):
427 """Debugging output of the current contents of the TextRecordDatabase."""
429 retval = ['TextRecordDatabase:']
430 for text_record in self.itervalues():
431 retval.append(' %s' % (text_record,))
432 return '\n'.join(retval)
435 class _Sink(cvs2svn_rcsparse.Sink):
436 def __init__(self, revision_collector, cvs_file_items):
437 self.revision_collector = revision_collector
438 self.cvs_file_items = cvs_file_items
440 # A map {rev : base_rev} indicating that the text for rev is
441 # stored in CVS as a delta relative to base_rev.
442 self.base_revisions = {}
444 # The revision that is stored with its fulltext in CVS (usually
445 # the oldest revision on trunk):
446 self.head_revision = None
448 # The first logical revision on trunk (usually '1.1'):
449 self.revision_1_1 = None
451 # Keep track of the revisions whose revision info has been seen so
452 # far (to avoid repeated revision info blocks):
453 self.revisions_seen = set()
455 def set_head_revision(self, revision):
456 self.head_revision = revision
458 def define_revision(
459 self, revision, timestamp, author, state, branches, next
461 if next:
462 self.base_revisions[next] = revision
463 else:
464 if is_trunk_revision(revision):
465 self.revision_1_1 = revision
467 for branch in branches:
468 self.base_revisions[branch] = revision
470 def set_revision_info(self, revision, log, text):
471 if revision in self.revisions_seen:
472 # One common form of CVS repository corruption is that the
473 # Deltatext block for revision 1.1 appears twice. CollectData
474 # has already warned about this problem; here we can just ignore
475 # it.
476 return
477 else:
478 self.revisions_seen.add(revision)
480 cvs_rev_id = self.cvs_file_items.original_ids[revision]
481 if is_trunk_revision(revision):
482 # On trunk, revisions are encountered in reverse order (1.<N>
483 # ... 1.1) and deltas are inverted. The first text that we see
484 # is the fulltext for the HEAD revision. After that, the text
485 # corresponding to revision 1.N is the delta (1.<N+1> ->
486 # 1.<N>)). We have to invert the deltas here so that we can
487 # read the revisions out in dependency order; that is, for
488 # revision 1.1 we want the fulltext, and for revision 1.<N> we
489 # want the delta (1.<N-1> -> 1.<N>). This means that we can't
490 # compute the delta for a revision until we see its logical
491 # parent. When we finally see revision 1.1 (which is recognized
492 # because it doesn't have a parent), we can record the diff (1.1
493 # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
495 if revision == self.head_revision:
496 # This is HEAD, as fulltext. Initialize the RCSStream so
497 # that we can compute deltas backwards in time.
498 self._stream = RCSStream(text)
499 self._stream_revision = revision
500 else:
501 # Any other trunk revision is a backward delta. Apply the
502 # delta to the RCSStream to mutate it to the contents of this
503 # revision, and also to get the reverse delta, which we store
504 # as the forward delta of our child revision.
505 try:
506 text = self._stream.invert_diff(text)
507 except MalformedDeltaException, e:
508 Log().error(
509 'Malformed RCS delta in %s, revision %s: %s'
510 % (self.cvs_file_items.cvs_file.filename, revision, e)
512 raise RuntimeError()
513 text_record = DeltaTextRecord(
514 self.cvs_file_items.original_ids[self._stream_revision],
515 cvs_rev_id
517 self.revision_collector._writeout(text_record, text)
518 self._stream_revision = revision
520 if revision == self.revision_1_1:
521 # This is revision 1.1. Write its fulltext:
522 text_record = FullTextRecord(cvs_rev_id)
523 self.revision_collector._writeout(
524 text_record, self._stream.get_text()
527 # There will be no more trunk revisions delivered, so free the
528 # RCSStream.
529 del self._stream
530 del self._stream_revision
532 else:
533 # On branches, revisions are encountered in logical order
534 # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
535 # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
536 # <BRANCH>.<N>). That's what we need, so just store it.
538 # FIXME: It would be nice to avoid writing out branch deltas
539 # when --trunk-only. (They will be deleted when finish_file()
540 # is called, but if the delta db is in an IndexedDatabase the
541 # deletions won't actually recover any disk space.)
542 text_record = DeltaTextRecord(
543 cvs_rev_id,
544 self.cvs_file_items.original_ids[self.base_revisions[revision]]
546 self.revision_collector._writeout(text_record, text)
548 return None
551 class InternalRevisionCollector(RevisionCollector):
552 """The RevisionCollector used by InternalRevisionReader."""
554 def __init__(self, compress):
555 RevisionCollector.__init__(self)
556 self._compress = compress
558 def register_artifacts(self, which_pass):
559 artifact_manager.register_temp_file(
560 config.RCS_DELTAS_INDEX_TABLE, which_pass
562 artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass)
563 artifact_manager.register_temp_file(
564 config.RCS_TREES_INDEX_TABLE, which_pass
566 artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)
568 def start(self):
569 ser = MarshalSerializer()
570 if self._compress:
571 ser = CompressingSerializer(ser)
572 self._rcs_deltas = IndexedDatabase(
573 artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
574 artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
575 DB_OPEN_NEW, ser
577 primer = (FullTextRecord, DeltaTextRecord)
578 self._rcs_trees = IndexedDatabase(
579 artifact_manager.get_temp_file(config.RCS_TREES_STORE),
580 artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
581 DB_OPEN_NEW, PrimedPickleSerializer(primer)
584 def _writeout(self, text_record, text):
585 self.text_record_db.add(text_record)
586 self._rcs_deltas[text_record.id] = text
588 def process_file(self, cvs_file_items):
589 """Read revision information for the file described by CVS_FILE_ITEMS.
591 Compute the text record refcounts, discard any records that are
592 unneeded, and store the text records for the file to the
593 _rcs_trees database."""
595 # A map from cvs_rev_id to TextRecord instance:
596 self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase())
598 cvs2svn_rcsparse.parse(
599 open(cvs_file_items.cvs_file.filename, 'rb'),
600 _Sink(self, cvs_file_items),
603 self.text_record_db.recompute_refcounts(cvs_file_items)
604 self.text_record_db.free_unused()
605 self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db
606 del self.text_record_db
608 def finish(self):
609 self._rcs_deltas.close()
610 self._rcs_trees.close()
613 class _KeywordExpander:
614 """A class whose instances provide substitutions for CVS keywords.
616 This class is used via its __call__() method, which should be called
617 with a match object representing a match for a CVS keyword string.
618 The method returns the replacement for the matched text.
620 The __call__() method works by calling the method with the same name
621 as that of the CVS keyword (converted to lower case).
623 Instances of this class can be passed as the REPL argument to
624 re.sub()."""
626 date_fmt_old = "%Y/%m/%d %H:%M:%S" # CVS 1.11, rcs
627 date_fmt_new = "%Y-%m-%d %H:%M:%S" # CVS 1.12
629 date_fmt = date_fmt_new
631 @classmethod
632 def use_old_date_format(klass):
633 """Class method to ensure exact compatibility with CVS 1.11
634 output. Use this if you want to verify your conversion and you're
635 using CVS 1.11."""
636 klass.date_fmt = klass.date_fmt_old
638 def __init__(self, cvs_rev):
639 self.cvs_rev = cvs_rev
641 def __call__(self, match):
642 return '$%s: %s $' % \
643 (match.group(1), getattr(self, match.group(1).lower())(),)
645 def author(self):
646 return Ctx()._metadata_db[self.cvs_rev.metadata_id].original_author
648 def date(self):
649 return time.strftime(self.date_fmt,
650 time.gmtime(self.cvs_rev.timestamp))
652 def header(self):
653 return '%s %s %s %s Exp' % \
654 (self.source(), self.cvs_rev.rev, self.date(), self.author())
656 def id(self):
657 return '%s %s %s %s Exp' % \
658 (self.rcsfile(), self.cvs_rev.rev, self.date(), self.author())
660 def locker(self):
661 # Handle kvl like kv, as a converted repo is supposed to have no
662 # locks.
663 return ''
665 def log(self):
666 # Would need some special handling.
667 return 'not supported by cvs2svn'
669 def name(self):
670 # Cannot work, as just creating a new symbol does not check out
671 # the revision again.
672 return 'not supported by cvs2svn'
674 def rcsfile(self):
675 return self.cvs_rev.cvs_file.basename + ",v"
677 def revision(self):
678 return self.cvs_rev.rev
680 def source(self):
681 project = self.cvs_rev.cvs_file.project
682 return project.cvs_repository_root + '/' + project.cvs_module + \
683 self.cvs_rev.cvs_file.cvs_path + ",v"
685 def state(self):
686 # We check out only live revisions.
687 return 'Exp'
690 class InternalRevisionReader(RevisionReader):
691 """A RevisionReader that reads the contents from an own delta store."""
693 _kws = 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State'
694 _kw_re = re.compile(r'\$(' + _kws + r'):[^$\n]*\$')
695 _kwo_re = re.compile(r'\$(' + _kws + r')(:[^$\n]*)?\$')
697 def __init__(self, compress):
698 self._compress = compress
700 def register_artifacts(self, which_pass):
701 artifact_manager.register_temp_file(config.CVS_CHECKOUT_DB, which_pass)
702 artifact_manager.register_temp_file_needed(
703 config.RCS_DELTAS_STORE, which_pass
705 artifact_manager.register_temp_file_needed(
706 config.RCS_DELTAS_INDEX_TABLE, which_pass
708 artifact_manager.register_temp_file_needed(
709 config.RCS_TREES_STORE, which_pass
711 artifact_manager.register_temp_file_needed(
712 config.RCS_TREES_INDEX_TABLE, which_pass
715 def start(self):
716 self._delta_db = IndexedDatabase(
717 artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
718 artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
719 DB_OPEN_READ)
720 self._delta_db.__delitem__ = lambda id: None
721 self._tree_db = IndexedDatabase(
722 artifact_manager.get_temp_file(config.RCS_TREES_STORE),
723 artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
724 DB_OPEN_READ)
725 ser = MarshalSerializer()
726 if self._compress:
727 ser = CompressingSerializer(ser)
728 self._co_db = Database(
729 artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB), DB_OPEN_NEW,
730 ser)
732 # The set of CVSFile instances whose TextRecords have already been
733 # read:
734 self._loaded_files = set()
736 # A map { CVSFILE : _FileTree } for files that currently have live
737 # revisions:
738 self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db)
740 def _get_text_record(self, cvs_rev):
741 """Return the TextRecord instance for CVS_REV.
743 If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
744 do so now."""
746 if cvs_rev.cvs_file not in self._loaded_files:
747 for text_record in self._tree_db[cvs_rev.cvs_file.id].itervalues():
748 self._text_record_db.add(text_record)
749 self._loaded_files.add(cvs_rev.cvs_file)
751 return self._text_record_db[cvs_rev.id]
753 def get_content(self, cvs_rev):
754 """Check out the text for revision C_REV from the repository.
756 Return the text. If CVS_REV has a property _keyword_handling, use
757 it to determine how to handle RCS keywords in the output:
759 'collapsed' -- collapse keywords
761 'expanded' -- expand keywords
763 'untouched' -- output keywords in the form they are found in
764 the RCS file
766 Note that $Log$ never actually generates a log (which makes test
767 'requires_cvs()' fail).
769 Revisions may be requested in any order, but if they are not
770 requested in dependency order the checkout database will become
771 very large. Revisions may be skipped. Each revision may be
772 requested only once."""
774 try:
775 text = self._get_text_record(cvs_rev).checkout(self._text_record_db)
776 except MalformedDeltaException, (msg):
777 raise FatalError('Malformed RCS delta in %s, revision %s: %s'
778 % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg))
780 keyword_handling = cvs_rev.get_property('_keyword_handling')
782 if keyword_handling == 'untouched':
783 # Leave keywords in the form that they were checked in.
784 pass
785 elif keyword_handling == 'collapsed':
786 text = self._kw_re.sub(r'$\1$', text)
787 elif keyword_handling == 'expanded':
788 text = self._kwo_re.sub(_KeywordExpander(cvs_rev), text)
789 else:
790 raise FatalError(
791 'Undefined _keyword_handling property (%r) for %s'
792 % (keyword_handling, cvs_rev,)
795 if Ctx().decode_apple_single:
796 # Insert a filter to decode any files that are in AppleSingle
797 # format:
798 text = get_maybe_apple_single(text)
800 return text
802 def finish(self):
803 self._text_record_db.log_leftovers()
805 del self._text_record_db
806 self._delta_db.close()
807 self._tree_db.close()
808 self._co_db.close()