cvs2svn_lib/checkout_internal.py

   1 # (Be in -*- python -*- mode.)
   2 #
   3 # ====================================================================
   4 # Copyright (c) 2007-2009 CollabNet.  All rights reserved.
   5 #
   6 # This software is licensed as described in the file COPYING, which
   7 # you should have received as part of this distribution.  The terms
   8 # are also available at http://subversion.tigris.org/license-1.html.
   9 # If newer versions of this license are posted there, you may use a
  10 # newer version instead, at your option.
  11 #
  12 # This software consists of voluntary contributions made by many
  13 # individuals.  For exact contribution history, see the revision
  14 # history and logs, available at http://cvs2svn.tigris.org/.
  15 # ====================================================================
  16
  17 """This module contains classes that implement the --use-internal-co option.
  18
  19 The idea is to patch up the revisions' contents incrementally, thus
  20 avoiding the huge number of process spawns and the O(n^2) overhead of
  21 using 'co' and 'cvs'.
  22
  23 InternalRevisionCollector saves the RCS deltas and RCS revision trees
  24 to databases.  Notably, deltas from the trunk need to be reversed, as
  25 CVS stores them so they apply from HEAD backwards.
  26
  27 InternalRevisionReader produces the revisions' contents on demand.  To
  28 generate the text for a typical revision, we need the revision's delta
  29 text plus the fulltext of the previous revision.  Therefore, we
  30 maintain a checkout database containing a copy of the fulltext of any
  31 revision for which subsequent revisions still need to be retrieved.
  32 It is crucial to remove text from this database as soon as it is no
  33 longer needed, to prevent it from growing enormous.
  34
  35 There are two reasons that the text from a revision can be needed: (1)
  36 because the revision itself still needs to be output to a dumpfile;
  37 (2) because another revision needs it as the base of its delta.  We
  38 maintain a reference count for each revision, which includes *both*
  39 possibilities.  The first time a revision's text is needed, it is
  40 generated by applying the revision's deltatext to the previous
  41 revision's fulltext, and the resulting fulltext is stored in the
  42 checkout database.  Each time a revision's fulltext is retrieved, its
  43 reference count is decremented.  When the reference count goes to
  44 zero, then the fulltext is deleted from the checkout database.
  45
  46 The administrative data for managing this consists of one TextRecord
  47 entry for each revision.  Each TextRecord has an id, which is the same
  48 id as used for the corresponding CVSRevision instance.  It also
  49 maintains a count of the times it is expected to be retrieved.
  50 TextRecords come in several varieties:
  51
  52 FullTextRecord -- Used for revisions whose fulltext is contained
  53     directly in the RCS file, and therefore available during
  54     CollectRevsPass (i.e., typically revision 1.1 of each file).
  55
  56 DeltaTextRecord -- Used for revisions that are defined via a delta
  57     relative to some other TextRecord.  These records record the id of
  58     the TextRecord that holds the base text against which the delta is
  59     defined.  When the text for a DeltaTextRecord is retrieved, the
  60     DeltaTextRecord instance is deleted and a CheckedOutTextRecord
  61     instance is created to take its place.
  62
  63 CheckedOutTextRecord -- Used during OutputPass for a revision that
  64     started out as a DeltaTextRecord, but has already been retrieved
  65     (and therefore its fulltext is stored in the checkout database).
  66
  67 While a file is being processed during FilterSymbolsPass, the fulltext
  68 and deltas are stored to the delta database, and TextRecord instances
  69 are created to keep track of things.  The reference counts are all
  70 initialized: each record referred to by a delta has its refcount
  71 incremented, and each record that corresponds to a non-delete
  72 CVSRevision is incremented.  After that, any records with refcount==0
  73 are removed.  When one record is removed, that can cause another
  74 record's reference count to go to zero and be removed too,
  75 recursively.  When a TextRecord is deleted at this stage, its
  76 deltatext is also deleted from the delta database."""
  77
  78
  79 from cStringIO import StringIO
  80 import re
  81 import time
  82
  83 from cvs2svn_lib import config
  84 from cvs2svn_lib.common import DB_OPEN_NEW
  85 from cvs2svn_lib.common import DB_OPEN_READ
  86 from cvs2svn_lib.common import warning_prefix
  87 from cvs2svn_lib.common import FatalError
  88 from cvs2svn_lib.common import InternalError
  89 from cvs2svn_lib.common import is_trunk_revision
  90 from cvs2svn_lib.context import Ctx
  91 from cvs2svn_lib.log import Log
  92 from cvs2svn_lib.artifact_manager import artifact_manager
  93 from cvs2svn_lib.symbol import Trunk
  94 from cvs2svn_lib.cvs_item import CVSRevisionModification
  95 from cvs2svn_lib.database import Database
  96 from cvs2svn_lib.database import IndexedDatabase
  97 from cvs2svn_lib.rcs_stream import RCSStream
  98 from cvs2svn_lib.rcs_stream import MalformedDeltaException
  99 from cvs2svn_lib.revision_manager import RevisionCollector
 100 from cvs2svn_lib.revision_manager import RevisionReader
 101 from cvs2svn_lib.serializer import MarshalSerializer
 102 from cvs2svn_lib.serializer import CompressingSerializer
 103 from cvs2svn_lib.serializer import PrimedPickleSerializer
 104 from cvs2svn_lib.apple_single_filter import get_maybe_apple_single
 105
 106 import cvs2svn_rcsparse
 107
 108
 109 class TextRecord(object):
 110   """Bookkeeping data for the text of a single CVSRevision."""
 111
 112   __slots__ = ['id', 'refcount']
 113
 114   def __init__(self, id):
 115     # The cvs_rev_id of the revision whose text this is.
 116     self.id = id
 117
 118     # The number of times that the text of this revision will be
 119     # retrieved.
 120     self.refcount = 0
 121
 122   def __getstate__(self):
 123     return (self.id, self.refcount,)
 124
 125   def __setstate__(self, state):
 126     (self.id, self.refcount,) = state
 127
 128   def increment_dependency_refcounts(self, text_record_db):
 129     """Increment the refcounts of any records that this one depends on."""
 130
 131     pass
 132
 133   def decrement_refcount(self, text_record_db):
 134     """Decrement the number of times our text still has to be checked out.
 135
 136     If the reference count goes to zero, call discard()."""
 137
 138     self.refcount -= 1
 139     if self.refcount == 0:
 140       text_record_db.discard(self.id)
 141
 142   def checkout(self, text_record_db):
 143     """Workhorse of the checkout process.
 144
 145     Return the text for this revision, decrement our reference count,
 146     and update the databases depending on whether there will be future
 147     checkouts."""
 148
 149     raise NotImplementedError()
 150
 151   def free(self, text_record_db):
 152     """This instance will never again be checked out; free it.
 153
 154     Also free any associated resources and decrement the refcounts of
 155     any other TextRecords that this one depends on."""
 156
 157     raise NotImplementedError()
 158
 159
 160 class FullTextRecord(TextRecord):
 161   __slots__ = []
 162
 163   def __getstate__(self):
 164     return (self.id, self.refcount,)
 165
 166   def __setstate__(self, state):
 167     (self.id, self.refcount,) = state
 168
 169   def checkout(self, text_record_db):
 170     text = text_record_db.delta_db[self.id]
 171     self.decrement_refcount(text_record_db)
 172     return text
 173
 174   def free(self, text_record_db):
 175     del text_record_db.delta_db[self.id]
 176
 177   def __str__(self):
 178     return 'FullTextRecord(%x, %d)' % (self.id, self.refcount,)
 179
 180
 181 class DeltaTextRecord(TextRecord):
 182   __slots__ = ['pred_id']
 183
 184   def __init__(self, id, pred_id):
 185     TextRecord.__init__(self, id)
 186
 187     # The cvs_rev_id of the revision relative to which this delta is
 188     # defined.
 189     self.pred_id = pred_id
 190
 191   def __getstate__(self):
 192     return (self.id, self.refcount, self.pred_id,)
 193
 194   def __setstate__(self, state):
 195     (self.id, self.refcount, self.pred_id,) = state
 196
 197   def increment_dependency_refcounts(self, text_record_db):
 198     text_record_db[self.pred_id].refcount += 1
 199
 200   def checkout(self, text_record_db):
 201     base_text = text_record_db[self.pred_id].checkout(text_record_db)
 202     co = RCSStream(base_text)
 203     delta_text = text_record_db.delta_db[self.id]
 204     co.apply_diff(delta_text)
 205     text = co.get_text()
 206     del co
 207     self.refcount -= 1
 208     if self.refcount == 0:
 209       # This text will never be needed again; just delete ourselves
 210       # without ever having stored the fulltext to the checkout
 211       # database:
 212       del text_record_db[self.id]
 213     else:
 214       # Store a new CheckedOutTextRecord in place of ourselves:
 215       text_record_db.checkout_db['%x' % self.id] = text
 216       new_text_record = CheckedOutTextRecord(self.id)
 217       new_text_record.refcount = self.refcount
 218       text_record_db.replace(new_text_record)
 219     return text
 220
 221   def free(self, text_record_db):
 222     del text_record_db.delta_db[self.id]
 223     text_record_db[self.pred_id].decrement_refcount(text_record_db)
 224
 225   def __str__(self):
 226     return 'DeltaTextRecord(%x -> %x, %d)' \
 227            % (self.pred_id, self.id, self.refcount,)
 228
 229
 230 class CheckedOutTextRecord(TextRecord):
 231   __slots__ = []
 232
 233   def __getstate__(self):
 234     return (self.id, self.refcount,)
 235
 236   def __setstate__(self, state):
 237     (self.id, self.refcount,) = state
 238
 239   def checkout(self, text_record_db):
 240     text = text_record_db.checkout_db['%x' % self.id]
 241     self.decrement_refcount(text_record_db)
 242     return text
 243
 244   def free(self, text_record_db):
 245     del text_record_db.checkout_db['%x' % self.id]
 246
 247   def __str__(self):
 248     return 'CheckedOutTextRecord(%x, %d)' % (self.id, self.refcount,)
 249
 250
 251 class NullDatabase(object):
 252   """A do-nothing database that can be used with TextRecordDatabase.
 253
 254   Use this when you don't actually want to allow anything to be
 255   deleted."""
 256
 257   def __delitem__(self, id):
 258     pass
 259
 260
 261 class TextRecordDatabase:
 262   """Holds the TextRecord instances that are currently live.
 263
 264   During CollectRevsPass and FilterSymbolsPass, files are processed
 265   one by one and a new TextRecordDatabase instance is used for each
 266   file.  During OutputPass, a single TextRecordDatabase instance is
 267   used for the duration of OutputPass; individual records are added
 268   and removed when they are active."""
 269
 270   def __init__(self, delta_db, checkout_db):
 271     # A map { cvs_rev_id -> TextRecord }.
 272     self.text_records = {}
 273
 274     # A database-like object using cvs_rev_ids as keys and containing
 275     # fulltext/deltatext strings as values.  Its __getitem__() method
 276     # is used to retrieve deltas when they are needed, and its
 277     # __delitem__() method is used to delete deltas when they can be
 278     # freed.  The modifiability of the delta database varies from pass
 279     # to pass, so the object stored here varies as well:
 280     #
 281     # CollectRevsPass: a fully-functional IndexedDatabase.  This
 282     #     allows deltas that will not be needed to be deleted.
 283     #
 284     # FilterSymbolsPass: a NullDatabase.  The delta database cannot be
 285     #     modified during this pass, and we have no need to retrieve
 286     #     deltas, so we just use a dummy object here.
 287     #
 288     # OutputPass: a disabled IndexedDatabase.  During this pass we
 289     #     need to retrieve deltas, but we are not allowed to modify
 290     #     the delta database.  So we use an IndexedDatabase whose
 291     #     __del__() method has been disabled to do nothing.
 292     self.delta_db = delta_db
 293
 294     # A database-like object using cvs_rev_ids as keys and containing
 295     # fulltext strings as values.  This database is only set during
 296     # OutputPass.
 297     self.checkout_db = checkout_db
 298
 299     # If this is set to a list, then the list holds the ids of
 300     # text_records that have to be deleted; when discard() is called,
 301     # it adds the requested id to the list but does not delete it.  If
 302     # this member is set to None, then text_records are deleted
 303     # immediately when discard() is called.
 304     self.deferred_deletes = None
 305
 306   def __getstate__(self):
 307     return (self.text_records.values(),)
 308
 309   def __setstate__(self, state):
 310     (text_records,) = state
 311     self.text_records = {}
 312     for text_record in text_records:
 313       self.add(text_record)
 314     self.delta_db = NullDatabase()
 315     self.checkout_db = NullDatabase()
 316     self.deferred_deletes = None
 317
 318   def add(self, text_record):
 319     """Add TEXT_RECORD to our database.
 320
 321     There must not already be a record with the same id."""
 322
 323     assert not self.text_records.has_key(text_record.id)
 324
 325     self.text_records[text_record.id] = text_record
 326
 327   def __getitem__(self, id):
 328     return self.text_records[id]
 329
 330   def __delitem__(self, id):
 331     """Free the record with the specified ID."""
 332
 333     del self.text_records[id]
 334
 335   def replace(self, text_record):
 336     """Store TEXT_RECORD in place of the existing record with the same id.
 337
 338     Do not do anything with the old record."""
 339
 340     assert self.text_records.has_key(text_record.id)
 341     self.text_records[text_record.id] = text_record
 342
 343   def discard(self, *ids):
 344     """The text records with IDS are no longer needed; discard them.
 345
 346     This involves calling their free() methods and also removing them
 347     from SELF.
 348
 349     If SELF.deferred_deletes is not None, then the ids to be deleted
 350     are added to the list instead of deleted immediately.  This
 351     mechanism is to prevent a stack overflow from the avalanche of
 352     deletes that can result from deleting a long chain of revisions."""
 353
 354     if self.deferred_deletes is None:
 355       # This is an outer-level delete.
 356       self.deferred_deletes = list(ids)
 357       while self.deferred_deletes:
 358         id = self.deferred_deletes.pop()
 359         text_record = self[id]
 360         if text_record.refcount != 0:
 361           raise InternalError(
 362               'TextRecordDatabase.discard(%s) called with refcount = %d'
 363               % (text_record, text_record.refcount,)
 364               )
 365         # This call might cause other text_record ids to be added to
 366         # self.deferred_deletes:
 367         text_record.free(self)
 368         del self[id]
 369       self.deferred_deletes = None
 370     else:
 371       self.deferred_deletes.extend(ids)
 372
 373   def itervalues(self):
 374     return self.text_records.itervalues()
 375
 376   def recompute_refcounts(self, cvs_file_items):
 377     """Recompute the refcounts of the contained TextRecords.
 378
 379     Use CVS_FILE_ITEMS to determine which records will be needed by
 380     cvs2svn."""
 381
 382     # First clear all of the refcounts:
 383     for text_record in self.itervalues():
 384       text_record.refcount = 0
 385
 386     # Now increment the reference count of records that are needed as
 387     # the source of another record's deltas:
 388     for text_record in self.itervalues():
 389       text_record.increment_dependency_refcounts(self.text_records)
 390
 391     # Now increment the reference count of records that will be needed
 392     # by cvs2svn:
 393     for lod_items in cvs_file_items.iter_lods():
 394       for cvs_rev in lod_items.cvs_revisions:
 395         if isinstance(cvs_rev, CVSRevisionModification):
 396           self[cvs_rev.id].refcount += 1
 397
 398   def free_unused(self):
 399     """Free any TextRecords whose reference counts are zero."""
 400
 401     # The deletion of some of these text records might cause others to
 402     # be unused, in which case they will be deleted automatically.
 403     # But since the initially-unused records are not referred to by
 404     # any others, we don't have to be afraid that they will be deleted
 405     # before we get to them.  But it *is* crucial that we create the
 406     # whole unused list before starting the loop.
 407
 408     unused = [
 409         text_record.id
 410         for text_record in self.itervalues()
 411         if text_record.refcount == 0
 412         ]
 413
 414     self.discard(*unused)
 415
 416   def log_leftovers(self):
 417     """If any TextRecords still exist, log them."""
 418
 419     if self.text_records:
 420       Log().warn(
 421           "%s: internal problem: leftover revisions in the checkout cache:"
 422           % warning_prefix)
 423       for text_record in self.itervalues():
 424         Log().warn('    %s' % (text_record,))
 425
 426   def __repr__(self):
 427     """Debugging output of the current contents of the TextRecordDatabase."""
 428
 429     retval = ['TextRecordDatabase:']
 430     for text_record in self.itervalues():
 431       retval.append('    %s' % (text_record,))
 432     return '\n'.join(retval)
 433
 434
 435 class _Sink(cvs2svn_rcsparse.Sink):
 436   def __init__(self, revision_collector, cvs_file_items):
 437     self.revision_collector = revision_collector
 438     self.cvs_file_items = cvs_file_items
 439
 440     # A map {rev : base_rev} indicating that the text for rev is
 441     # stored in CVS as a delta relative to base_rev.
 442     self.base_revisions = {}
 443
 444     # The revision that is stored with its fulltext in CVS (usually
 445     # the oldest revision on trunk):
 446     self.head_revision = None
 447
 448     # The first logical revision on trunk (usually '1.1'):
 449     self.revision_1_1 = None
 450
 451     # Keep track of the revisions whose revision info has been seen so
 452     # far (to avoid repeated revision info blocks):
 453     self.revisions_seen = set()
 454
 455   def set_head_revision(self, revision):
 456     self.head_revision = revision
 457
 458   def define_revision(
 459         self, revision, timestamp, author, state, branches, next
 460         ):
 461     if next:
 462       self.base_revisions[next] = revision
 463     else:
 464       if is_trunk_revision(revision):
 465         self.revision_1_1 = revision
 466
 467     for branch in branches:
 468       self.base_revisions[branch] = revision
 469
 470   def set_revision_info(self, revision, log, text):
 471     if revision in self.revisions_seen:
 472       # One common form of CVS repository corruption is that the
 473       # Deltatext block for revision 1.1 appears twice.  CollectData
 474       # has already warned about this problem; here we can just ignore
 475       # it.
 476       return
 477     else:
 478       self.revisions_seen.add(revision)
 479
 480     cvs_rev_id = self.cvs_file_items.original_ids[revision]
 481     if is_trunk_revision(revision):
 482       # On trunk, revisions are encountered in reverse order (1.<N>
 483       # ... 1.1) and deltas are inverted.  The first text that we see
 484       # is the fulltext for the HEAD revision.  After that, the text
 485       # corresponding to revision 1.N is the delta (1.<N+1> ->
 486       # 1.<N>)).  We have to invert the deltas here so that we can
 487       # read the revisions out in dependency order; that is, for
 488       # revision 1.1 we want the fulltext, and for revision 1.<N> we
 489       # want the delta (1.<N-1> -> 1.<N>).  This means that we can't
 490       # compute the delta for a revision until we see its logical
 491       # parent.  When we finally see revision 1.1 (which is recognized
 492       # because it doesn't have a parent), we can record the diff (1.1
 493       # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
 494
 495       if revision == self.head_revision:
 496         # This is HEAD, as fulltext.  Initialize the RCSStream so
 497         # that we can compute deltas backwards in time.
 498         self._stream = RCSStream(text)
 499         self._stream_revision = revision
 500       else:
 501         # Any other trunk revision is a backward delta.  Apply the
 502         # delta to the RCSStream to mutate it to the contents of this
 503         # revision, and also to get the reverse delta, which we store
 504         # as the forward delta of our child revision.
 505         try:
 506           text = self._stream.invert_diff(text)
 507         except MalformedDeltaException, e:
 508           Log().error(
 509               'Malformed RCS delta in %s, revision %s: %s'
 510               % (self.cvs_file_items.cvs_file.filename, revision, e)
 511               )
 512           raise RuntimeError()
 513         text_record = DeltaTextRecord(
 514             self.cvs_file_items.original_ids[self._stream_revision],
 515             cvs_rev_id
 516             )
 517         self.revision_collector._writeout(text_record, text)
 518         self._stream_revision = revision
 519
 520       if revision == self.revision_1_1:
 521         # This is revision 1.1.  Write its fulltext:
 522         text_record = FullTextRecord(cvs_rev_id)
 523         self.revision_collector._writeout(
 524             text_record, self._stream.get_text()
 525             )
 526
 527         # There will be no more trunk revisions delivered, so free the
 528         # RCSStream.
 529         del self._stream
 530         del self._stream_revision
 531
 532     else:
 533       # On branches, revisions are encountered in logical order
 534       # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
 535       # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
 536       # <BRANCH>.<N>).  That's what we need, so just store it.
 537
 538       # FIXME: It would be nice to avoid writing out branch deltas
 539       # when --trunk-only.  (They will be deleted when finish_file()
 540       # is called, but if the delta db is in an IndexedDatabase the
 541       # deletions won't actually recover any disk space.)
 542       text_record = DeltaTextRecord(
 543           cvs_rev_id,
 544           self.cvs_file_items.original_ids[self.base_revisions[revision]]
 545           )
 546       self.revision_collector._writeout(text_record, text)
 547
 548     return None
 549
 550
 551 class InternalRevisionCollector(RevisionCollector):
 552   """The RevisionCollector used by InternalRevisionReader."""
 553
 554   def __init__(self, compress):
 555     RevisionCollector.__init__(self)
 556     self._compress = compress
 557
 558   def register_artifacts(self, which_pass):
 559     artifact_manager.register_temp_file(
 560         config.RCS_DELTAS_INDEX_TABLE, which_pass
 561         )
 562     artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass)
 563     artifact_manager.register_temp_file(
 564         config.RCS_TREES_INDEX_TABLE, which_pass
 565         )
 566     artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)
 567
 568   def start(self):
 569     ser = MarshalSerializer()
 570     if self._compress:
 571       ser = CompressingSerializer(ser)
 572     self._rcs_deltas = IndexedDatabase(
 573         artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
 574         artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
 575         DB_OPEN_NEW, ser
 576         )
 577     primer = (FullTextRecord, DeltaTextRecord)
 578     self._rcs_trees = IndexedDatabase(
 579         artifact_manager.get_temp_file(config.RCS_TREES_STORE),
 580         artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
 581         DB_OPEN_NEW, PrimedPickleSerializer(primer)
 582         )
 583
 584   def _writeout(self, text_record, text):
 585     self.text_record_db.add(text_record)
 586     self._rcs_deltas[text_record.id] = text
 587
 588   def process_file(self, cvs_file_items):
 589     """Read revision information for the file described by CVS_FILE_ITEMS.
 590
 591     Compute the text record refcounts, discard any records that are
 592     unneeded, and store the text records for the file to the
 593     _rcs_trees database."""
 594
 595     # A map from cvs_rev_id to TextRecord instance:
 596     self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase())
 597
 598     cvs2svn_rcsparse.parse(
 599         open(cvs_file_items.cvs_file.filename, 'rb'),
 600         _Sink(self, cvs_file_items),
 601         )
 602
 603     self.text_record_db.recompute_refcounts(cvs_file_items)
 604     self.text_record_db.free_unused()
 605     self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db
 606     del self.text_record_db
 607
 608   def finish(self):
 609     self._rcs_deltas.close()
 610     self._rcs_trees.close()
 611
 612
 613 class _KeywordExpander:
 614   """A class whose instances provide substitutions for CVS keywords.
 615
 616   This class is used via its __call__() method, which should be called
 617   with a match object representing a match for a CVS keyword string.
 618   The method returns the replacement for the matched text.
 619
 620   The __call__() method works by calling the method with the same name
 621   as that of the CVS keyword (converted to lower case).
 622
 623   Instances of this class can be passed as the REPL argument to
 624   re.sub()."""
 625
 626   date_fmt_old = "%Y/%m/%d %H:%M:%S"    # CVS 1.11, rcs
 627   date_fmt_new = "%Y-%m-%d %H:%M:%S"    # CVS 1.12
 628
 629   date_fmt = date_fmt_new
 630
 631   @classmethod
 632   def use_old_date_format(klass):
 633       """Class method to ensure exact compatibility with CVS 1.11
 634       output.  Use this if you want to verify your conversion and you're
 635       using CVS 1.11."""
 636       klass.date_fmt = klass.date_fmt_old
 637
 638   def __init__(self, cvs_rev):
 639     self.cvs_rev = cvs_rev
 640
 641   def __call__(self, match):
 642     return '$%s: %s $' % \
 643            (match.group(1), getattr(self, match.group(1).lower())(),)
 644
 645   def author(self):
 646     return Ctx()._metadata_db[self.cvs_rev.metadata_id].original_author
 647
 648   def date(self):
 649     return time.strftime(self.date_fmt,
 650                          time.gmtime(self.cvs_rev.timestamp))
 651
 652   def header(self):
 653     return '%s %s %s %s Exp' % \
 654            (self.source(), self.cvs_rev.rev, self.date(), self.author())
 655
 656   def id(self):
 657     return '%s %s %s %s Exp' % \
 658            (self.rcsfile(), self.cvs_rev.rev, self.date(), self.author())
 659
 660   def locker(self):
 661     # Handle kvl like kv, as a converted repo is supposed to have no
 662     # locks.
 663     return ''
 664
 665   def log(self):
 666     # Would need some special handling.
 667     return 'not supported by cvs2svn'
 668
 669   def name(self):
 670     # Cannot work, as just creating a new symbol does not check out
 671     # the revision again.
 672     return 'not supported by cvs2svn'
 673
 674   def rcsfile(self):
 675     return self.cvs_rev.cvs_file.basename + ",v"
 676
 677   def revision(self):
 678     return self.cvs_rev.rev
 679
 680   def source(self):
 681     project = self.cvs_rev.cvs_file.project
 682     return project.cvs_repository_root + '/' + project.cvs_module + \
 683         self.cvs_rev.cvs_file.cvs_path + ",v"
 684
 685   def state(self):
 686     # We check out only live revisions.
 687     return 'Exp'
 688
 689
 690 class InternalRevisionReader(RevisionReader):
 691   """A RevisionReader that reads the contents from an own delta store."""
 692
 693   _kws = 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State'
 694   _kw_re = re.compile(r'\$(' + _kws + r'):[^$\n]*\$')
 695   _kwo_re = re.compile(r'\$(' + _kws + r')(:[^$\n]*)?\$')
 696
 697   def __init__(self, compress):
 698     self._compress = compress
 699
 700   def register_artifacts(self, which_pass):
 701     artifact_manager.register_temp_file(config.CVS_CHECKOUT_DB, which_pass)
 702     artifact_manager.register_temp_file_needed(
 703         config.RCS_DELTAS_STORE, which_pass
 704         )
 705     artifact_manager.register_temp_file_needed(
 706         config.RCS_DELTAS_INDEX_TABLE, which_pass
 707         )
 708     artifact_manager.register_temp_file_needed(
 709         config.RCS_TREES_STORE, which_pass
 710         )
 711     artifact_manager.register_temp_file_needed(
 712         config.RCS_TREES_INDEX_TABLE, which_pass
 713         )
 714
 715   def start(self):
 716     self._delta_db = IndexedDatabase(
 717         artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
 718         artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
 719         DB_OPEN_READ)
 720     self._delta_db.__delitem__ = lambda id: None
 721     self._tree_db = IndexedDatabase(
 722         artifact_manager.get_temp_file(config.RCS_TREES_STORE),
 723         artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
 724         DB_OPEN_READ)
 725     ser = MarshalSerializer()
 726     if self._compress:
 727       ser = CompressingSerializer(ser)
 728     self._co_db = Database(
 729         artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB), DB_OPEN_NEW,
 730         ser)
 731
 732     # The set of CVSFile instances whose TextRecords have already been
 733     # read:
 734     self._loaded_files = set()
 735
 736     # A map { CVSFILE : _FileTree } for files that currently have live
 737     # revisions:
 738     self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db)
 739
 740   def _get_text_record(self, cvs_rev):
 741     """Return the TextRecord instance for CVS_REV.
 742
 743     If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
 744     do so now."""
 745
 746     if cvs_rev.cvs_file not in self._loaded_files:
 747       for text_record in self._tree_db[cvs_rev.cvs_file.id].itervalues():
 748         self._text_record_db.add(text_record)
 749       self._loaded_files.add(cvs_rev.cvs_file)
 750
 751     return self._text_record_db[cvs_rev.id]
 752
 753   def get_content(self, cvs_rev):
 754     """Check out the text for revision C_REV from the repository.
 755
 756     Return the text.  If CVS_REV has a property _keyword_handling, use
 757     it to determine how to handle RCS keywords in the output:
 758
 759         'collapsed' -- collapse keywords
 760
 761         'expanded' -- expand keywords
 762
 763         'untouched' -- output keywords in the form they are found in
 764             the RCS file
 765
 766     Note that $Log$ never actually generates a log (which makes test
 767     'requires_cvs()' fail).
 768
 769     Revisions may be requested in any order, but if they are not
 770     requested in dependency order the checkout database will become
 771     very large.  Revisions may be skipped.  Each revision may be
 772     requested only once."""
 773
 774     try:
 775       text = self._get_text_record(cvs_rev).checkout(self._text_record_db)
 776     except MalformedDeltaException, (msg):
 777       raise FatalError('Malformed RCS delta in %s, revision %s: %s'
 778                        % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg))
 779
 780     keyword_handling = cvs_rev.get_property('_keyword_handling')
 781
 782     if keyword_handling == 'untouched':
 783       # Leave keywords in the form that they were checked in.
 784       pass
 785     elif keyword_handling == 'collapsed':
 786       text = self._kw_re.sub(r'$\1$', text)
 787     elif keyword_handling == 'expanded':
 788       text = self._kwo_re.sub(_KeywordExpander(cvs_rev), text)
 789     else:
 790       raise FatalError(
 791           'Undefined _keyword_handling property (%r) for %s'
 792           % (keyword_handling, cvs_rev,)
 793           )
 794
 795     if Ctx().decode_apple_single:
 796       # Insert a filter to decode any files that are in AppleSingle
 797       # format:
 798       text = get_maybe_apple_single(text)
 799
 800     return text
 801
 802   def finish(self):
 803     self._text_record_db.log_leftovers()
 804
 805     del self._text_record_db
 806     self._delta_db.close()
 807     self._tree_db.close()
 808     self._co_db.close()
 809