cvs2svn_lib/checkout_internal.py

   1 # (Be in -*- python -*- mode.)
   2 #
   3 # ====================================================================
   4 # Copyright (c) 2007-2009 CollabNet.  All rights reserved.
   5 #
   6 # This software is licensed as described in the file COPYING, which
   7 # you should have received as part of this distribution.  The terms
   8 # are also available at http://subversion.tigris.org/license-1.html.
   9 # If newer versions of this license are posted there, you may use a
  10 # newer version instead, at your option.
  11 #
  12 # This software consists of voluntary contributions made by many
  13 # individuals.  For exact contribution history, see the revision
  14 # history and logs, available at http://cvs2svn.tigris.org/.
  15 # ====================================================================
  16
  17 """This module contains classes that implement the --use-internal-co option.
  18
  19 The idea is to patch up the revisions' contents incrementally, thus
  20 avoiding the huge number of process spawns and the O(n^2) overhead of
  21 using 'co' and 'cvs'.
  22
  23 InternalRevisionCollector saves the RCS deltas and RCS revision trees
  24 to databases.  Notably, deltas from the trunk need to be reversed, as
  25 CVS stores them so they apply from HEAD backwards.
  26
  27 InternalRevisionReader produces the revisions' contents on demand.  To
  28 generate the text for a typical revision, we need the revision's delta
  29 text plus the fulltext of the previous revision.  Therefore, we
  30 maintain a checkout database containing a copy of the fulltext of any
  31 revision for which subsequent revisions still need to be retrieved.
  32 It is crucial to remove text from this database as soon as it is no
  33 longer needed, to prevent it from growing enormous.
  34
  35 There are two reasons that the text from a revision can be needed: (1)
  36 because the revision itself still needs to be output to a dumpfile;
  37 (2) because another revision needs it as the base of its delta.  We
  38 maintain a reference count for each revision, which includes *both*
  39 possibilities.  The first time a revision's text is needed, it is
  40 generated by applying the revision's deltatext to the previous
  41 revision's fulltext, and the resulting fulltext is stored in the
  42 checkout database.  Each time a revision's fulltext is retrieved, its
  43 reference count is decremented.  When the reference count goes to
  44 zero, then the fulltext is deleted from the checkout database.
  45
  46 The administrative data for managing this consists of one TextRecord
  47 entry for each revision.  Each TextRecord has an id, which is the same
  48 id as used for the corresponding CVSRevision instance.  It also
  49 maintains a count of the times it is expected to be retrieved.
  50 TextRecords come in several varieties:
  51
  52 FullTextRecord -- Used for revisions whose fulltext is contained
  53     directly in the RCS file, and therefore available during
  54     CollectRevsPass (i.e., typically revision 1.1 of each file).
  55
  56 DeltaTextRecord -- Used for revisions that are defined via a delta
  57     relative to some other TextRecord.  These records record the id of
  58     the TextRecord that holds the base text against which the delta is
  59     defined.  When the text for a DeltaTextRecord is retrieved, the
  60     DeltaTextRecord instance is deleted and a CheckedOutTextRecord
  61     instance is created to take its place.
  62
  63 CheckedOutTextRecord -- Used during OutputPass for a revision that
  64     started out as a DeltaTextRecord, but has already been retrieved
  65     (and therefore its fulltext is stored in the checkout database).
  66
  67 While a file is being processed during FilterSymbolsPass, the fulltext
  68 and deltas are stored to the delta database, and TextRecord instances
  69 are created to keep track of things.  The reference counts are all
  70 initialized: each record referred to by a delta has its refcount
  71 incremented, and each record that corresponds to a non-delete
  72 CVSRevision is incremented.  After that, any records with refcount==0
  73 are removed.  When one record is removed, that can cause another
  74 record's reference count to go to zero and be removed too,
  75 recursively.  When a TextRecord is deleted at this stage, its
  76 deltatext is also deleted from the delta database."""
  77
  78
  79 from cStringIO import StringIO
  80 import re
  81 import time
  82
  83 from cvs2svn_lib import config
  84 from cvs2svn_lib.common import DB_OPEN_NEW
  85 from cvs2svn_lib.common import DB_OPEN_READ
  86 from cvs2svn_lib.common import warning_prefix
  87 from cvs2svn_lib.common import FatalError
  88 from cvs2svn_lib.common import InternalError
  89 from cvs2svn_lib.common import is_trunk_revision
  90 from cvs2svn_lib.context import Ctx
  91 from cvs2svn_lib.log import Log
  92 from cvs2svn_lib.artifact_manager import artifact_manager
  93 from cvs2svn_lib.symbol import Trunk
  94 from cvs2svn_lib.cvs_item import CVSRevisionModification
  95 from cvs2svn_lib.database import Database
  96 from cvs2svn_lib.database import IndexedDatabase
  97 from cvs2svn_lib.rcs_stream import RCSStream
  98 from cvs2svn_lib.rcs_stream import MalformedDeltaException
  99 from cvs2svn_lib.revision_manager import RevisionCollector
 100 from cvs2svn_lib.revision_manager import RevisionReader
 101 from cvs2svn_lib.serializer import MarshalSerializer
 102 from cvs2svn_lib.serializer import CompressingSerializer
 103 from cvs2svn_lib.serializer import PrimedPickleSerializer
 104
 105 import cvs2svn_rcsparse
 106
 107
 108 class TextRecord(object):
 109   """Bookkeeping data for the text of a single CVSRevision."""
 110
 111   __slots__ = ['id', 'refcount']
 112
 113   def __init__(self, id):
 114     # The cvs_rev_id of the revision whose text this is.
 115     self.id = id
 116
 117     # The number of times that the text of this revision will be
 118     # retrieved.
 119     self.refcount = 0
 120
 121   def __getstate__(self):
 122     return (self.id, self.refcount,)
 123
 124   def __setstate__(self, state):
 125     (self.id, self.refcount,) = state
 126
 127   def increment_dependency_refcounts(self, text_record_db):
 128     """Increment the refcounts of any records that this one depends on."""
 129
 130     pass
 131
 132   def decrement_refcount(self, text_record_db):
 133     """Decrement the number of times our text still has to be checked out.
 134
 135     If the reference count goes to zero, call discard()."""
 136
 137     self.refcount -= 1
 138     if self.refcount == 0:
 139       text_record_db.discard(self.id)
 140
 141   def checkout(self, text_record_db):
 142     """Workhorse of the checkout process.
 143
 144     Return the text for this revision, decrement our reference count,
 145     and update the databases depending on whether there will be future
 146     checkouts."""
 147
 148     raise NotImplementedError()
 149
 150   def free(self, text_record_db):
 151     """This instance will never again be checked out; free it.
 152
 153     Also free any associated resources and decrement the refcounts of
 154     any other TextRecords that this one depends on."""
 155
 156     raise NotImplementedError()
 157
 158
 159 class FullTextRecord(TextRecord):
 160   __slots__ = []
 161
 162   def __getstate__(self):
 163     return (self.id, self.refcount,)
 164
 165   def __setstate__(self, state):
 166     (self.id, self.refcount,) = state
 167
 168   def checkout(self, text_record_db):
 169     text = text_record_db.delta_db[self.id]
 170     self.decrement_refcount(text_record_db)
 171     return text
 172
 173   def free(self, text_record_db):
 174     del text_record_db.delta_db[self.id]
 175
 176   def __str__(self):
 177     return 'FullTextRecord(%x, %d)' % (self.id, self.refcount,)
 178
 179
 180 class DeltaTextRecord(TextRecord):
 181   __slots__ = ['pred_id']
 182
 183   def __init__(self, id, pred_id):
 184     TextRecord.__init__(self, id)
 185
 186     # The cvs_rev_id of the revision relative to which this delta is
 187     # defined.
 188     self.pred_id = pred_id
 189
 190   def __getstate__(self):
 191     return (self.id, self.refcount, self.pred_id,)
 192
 193   def __setstate__(self, state):
 194     (self.id, self.refcount, self.pred_id,) = state
 195
 196   def increment_dependency_refcounts(self, text_record_db):
 197     text_record_db[self.pred_id].refcount += 1
 198
 199   def checkout(self, text_record_db):
 200     base_text = text_record_db[self.pred_id].checkout(text_record_db)
 201     co = RCSStream(base_text)
 202     delta_text = text_record_db.delta_db[self.id]
 203     co.apply_diff(delta_text)
 204     text = co.get_text()
 205     del co
 206     self.refcount -= 1
 207     if self.refcount == 0:
 208       # This text will never be needed again; just delete ourselves
 209       # without ever having stored the fulltext to the checkout
 210       # database:
 211       del text_record_db[self.id]
 212     else:
 213       # Store a new CheckedOutTextRecord in place of ourselves:
 214       text_record_db.checkout_db['%x' % self.id] = text
 215       new_text_record = CheckedOutTextRecord(self.id)
 216       new_text_record.refcount = self.refcount
 217       text_record_db.replace(new_text_record)
 218     return text
 219
 220   def free(self, text_record_db):
 221     del text_record_db.delta_db[self.id]
 222     text_record_db[self.pred_id].decrement_refcount(text_record_db)
 223
 224   def __str__(self):
 225     return 'DeltaTextRecord(%x -> %x, %d)' \
 226            % (self.pred_id, self.id, self.refcount,)
 227
 228
 229 class CheckedOutTextRecord(TextRecord):
 230   __slots__ = []
 231
 232   def __getstate__(self):
 233     return (self.id, self.refcount,)
 234
 235   def __setstate__(self, state):
 236     (self.id, self.refcount,) = state
 237
 238   def checkout(self, text_record_db):
 239     text = text_record_db.checkout_db['%x' % self.id]
 240     self.decrement_refcount(text_record_db)
 241     return text
 242
 243   def free(self, text_record_db):
 244     del text_record_db.checkout_db['%x' % self.id]
 245
 246   def __str__(self):
 247     return 'CheckedOutTextRecord(%x, %d)' % (self.id, self.refcount,)
 248
 249
 250 class NullDatabase(object):
 251   """A do-nothing database that can be used with TextRecordDatabase.
 252
 253   Use this when you don't actually want to allow anything to be
 254   deleted."""
 255
 256   def __delitem__(self, id):
 257     pass
 258
 259
 260 class TextRecordDatabase:
 261   """Holds the TextRecord instances that are currently live.
 262
 263   During CollectRevsPass and FilterSymbolsPass, files are processed
 264   one by one and a new TextRecordDatabase instance is used for each
 265   file.  During OutputPass, a single TextRecordDatabase instance is
 266   used for the duration of OutputPass; individual records are added
 267   and removed when they are active."""
 268
 269   def __init__(self, delta_db, checkout_db):
 270     # A map { cvs_rev_id -> TextRecord }.
 271     self.text_records = {}
 272
 273     # A database-like object using cvs_rev_ids as keys and containing
 274     # fulltext/deltatext strings as values.  Its __getitem__() method
 275     # is used to retrieve deltas when they are needed, and its
 276     # __delitem__() method is used to delete deltas when they can be
 277     # freed.  The modifiability of the delta database varies from pass
 278     # to pass, so the object stored here varies as well:
 279     #
 280     # CollectRevsPass: a fully-functional IndexedDatabase.  This
 281     #     allows deltas that will not be needed to be deleted.
 282     #
 283     # FilterSymbolsPass: a NullDatabase.  The delta database cannot be
 284     #     modified during this pass, and we have no need to retrieve
 285     #     deltas, so we just use a dummy object here.
 286     #
 287     # OutputPass: a disabled IndexedDatabase.  During this pass we
 288     #     need to retrieve deltas, but we are not allowed to modify
 289     #     the delta database.  So we use an IndexedDatabase whose
 290     #     __del__() method has been disabled to do nothing.
 291     self.delta_db = delta_db
 292
 293     # A database-like object using cvs_rev_ids as keys and containing
 294     # fulltext strings as values.  This database is only set during
 295     # OutputPass.
 296     self.checkout_db = checkout_db
 297
 298     # If this is set to a list, then the list holds the ids of
 299     # text_records that have to be deleted; when discard() is called,
 300     # it adds the requested id to the list but does not delete it.  If
 301     # this member is set to None, then text_records are deleted
 302     # immediately when discard() is called.
 303     self.deferred_deletes = None
 304
 305   def __getstate__(self):
 306     return (self.text_records.values(),)
 307
 308   def __setstate__(self, state):
 309     (text_records,) = state
 310     self.text_records = {}
 311     for text_record in text_records:
 312       self.add(text_record)
 313     self.delta_db = NullDatabase()
 314     self.checkout_db = NullDatabase()
 315     self.deferred_deletes = None
 316
 317   def add(self, text_record):
 318     """Add TEXT_RECORD to our database.
 319
 320     There must not already be a record with the same id."""
 321
 322     assert not self.text_records.has_key(text_record.id)
 323
 324     self.text_records[text_record.id] = text_record
 325
 326   def __getitem__(self, id):
 327     return self.text_records[id]
 328
 329   def __delitem__(self, id):
 330     """Free the record with the specified ID."""
 331
 332     del self.text_records[id]
 333
 334   def replace(self, text_record):
 335     """Store TEXT_RECORD in place of the existing record with the same id.
 336
 337     Do not do anything with the old record."""
 338
 339     assert self.text_records.has_key(text_record.id)
 340     self.text_records[text_record.id] = text_record
 341
 342   def discard(self, *ids):
 343     """The text records with IDS are no longer needed; discard them.
 344
 345     This involves calling their free() methods and also removing them
 346     from SELF.
 347
 348     If SELF.deferred_deletes is not None, then the ids to be deleted
 349     are added to the list instead of deleted immediately.  This
 350     mechanism is to prevent a stack overflow from the avalanche of
 351     deletes that can result from deleting a long chain of revisions."""
 352
 353     if self.deferred_deletes is None:
 354       # This is an outer-level delete.
 355       self.deferred_deletes = list(ids)
 356       while self.deferred_deletes:
 357         id = self.deferred_deletes.pop()
 358         text_record = self[id]
 359         if text_record.refcount != 0:
 360           raise InternalError(
 361               'TextRecordDatabase.discard(%s) called with refcount = %d'
 362               % (text_record, text_record.refcount,)
 363               )
 364         # This call might cause other text_record ids to be added to
 365         # self.deferred_deletes:
 366         text_record.free(self)
 367         del self[id]
 368       self.deferred_deletes = None
 369     else:
 370       self.deferred_deletes.extend(ids)
 371
 372   def itervalues(self):
 373     return self.text_records.itervalues()
 374
 375   def recompute_refcounts(self, cvs_file_items):
 376     """Recompute the refcounts of the contained TextRecords.
 377
 378     Use CVS_FILE_ITEMS to determine which records will be needed by
 379     cvs2svn."""
 380
 381     # First clear all of the refcounts:
 382     for text_record in self.itervalues():
 383       text_record.refcount = 0
 384
 385     # Now increment the reference count of records that are needed as
 386     # the source of another record's deltas:
 387     for text_record in self.itervalues():
 388       text_record.increment_dependency_refcounts(self.text_records)
 389
 390     # Now increment the reference count of records that will be needed
 391     # by cvs2svn:
 392     for lod_items in cvs_file_items.iter_lods():
 393       for cvs_rev in lod_items.cvs_revisions:
 394         if isinstance(cvs_rev, CVSRevisionModification):
 395           self[cvs_rev.id].refcount += 1
 396
 397   def free_unused(self):
 398     """Free any TextRecords whose reference counts are zero."""
 399
 400     # The deletion of some of these text records might cause others to
 401     # be unused, in which case they will be deleted automatically.
 402     # But since the initially-unused records are not referred to by
 403     # any others, we don't have to be afraid that they will be deleted
 404     # before we get to them.  But it *is* crucial that we create the
 405     # whole unused list before starting the loop.
 406
 407     unused = [
 408         text_record.id
 409         for text_record in self.itervalues()
 410         if text_record.refcount == 0
 411         ]
 412
 413     self.discard(*unused)
 414
 415   def log_leftovers(self):
 416     """If any TextRecords still exist, log them."""
 417
 418     if self.text_records:
 419       Log().warn(
 420           "%s: internal problem: leftover revisions in the checkout cache:"
 421           % warning_prefix)
 422       for text_record in self.itervalues():
 423         Log().warn('    %s' % (text_record,))
 424
 425   def __repr__(self):
 426     """Debugging output of the current contents of the TextRecordDatabase."""
 427
 428     retval = ['TextRecordDatabase:']
 429     for text_record in self.itervalues():
 430       retval.append('    %s' % (text_record,))
 431     return '\n'.join(retval)
 432
 433
 434 class _Sink(cvs2svn_rcsparse.Sink):
 435   def __init__(self, revision_collector, cvs_file_items):
 436     self.revision_collector = revision_collector
 437     self.cvs_file_items = cvs_file_items
 438
 439     # A map {rev : base_rev} indicating that the text for rev is
 440     # stored in CVS as a delta relative to base_rev.
 441     self.base_revisions = {}
 442
 443     # The revision that is stored with its fulltext in CVS (usually
 444     # the oldest revision on trunk):
 445     self.head_revision = None
 446
 447     # The first logical revision on trunk (usually '1.1'):
 448     self.revision_1_1 = None
 449
 450     # Keep track of the revisions whose revision info has been seen so
 451     # far (to avoid repeated revision info blocks):
 452     self.revisions_seen = set()
 453
 454   def set_head_revision(self, revision):
 455     self.head_revision = revision
 456
 457   def define_revision(
 458         self, revision, timestamp, author, state, branches, next
 459         ):
 460     if next:
 461       self.base_revisions[next] = revision
 462     else:
 463       if is_trunk_revision(revision):
 464         self.revision_1_1 = revision
 465
 466     for branch in branches:
 467       self.base_revisions[branch] = revision
 468
 469   def set_revision_info(self, revision, log, text):
 470     if revision in self.revisions_seen:
 471       # One common form of CVS repository corruption is that the
 472       # Deltatext block for revision 1.1 appears twice.  CollectData
 473       # has already warned about this problem; here we can just ignore
 474       # it.
 475       return
 476     else:
 477       self.revisions_seen.add(revision)
 478
 479     cvs_rev_id = self.cvs_file_items.original_ids[revision]
 480     if is_trunk_revision(revision):
 481       # On trunk, revisions are encountered in reverse order (1.<N>
 482       # ... 1.1) and deltas are inverted.  The first text that we see
 483       # is the fulltext for the HEAD revision.  After that, the text
 484       # corresponding to revision 1.N is the delta (1.<N+1> ->
 485       # 1.<N>)).  We have to invert the deltas here so that we can
 486       # read the revisions out in dependency order; that is, for
 487       # revision 1.1 we want the fulltext, and for revision 1.<N> we
 488       # want the delta (1.<N-1> -> 1.<N>).  This means that we can't
 489       # compute the delta for a revision until we see its logical
 490       # parent.  When we finally see revision 1.1 (which is recognized
 491       # because it doesn't have a parent), we can record the diff (1.1
 492       # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
 493
 494       if revision == self.head_revision:
 495         # This is HEAD, as fulltext.  Initialize the RCSStream so
 496         # that we can compute deltas backwards in time.
 497         self._stream = RCSStream(text)
 498         self._stream_revision = revision
 499       else:
 500         # Any other trunk revision is a backward delta.  Apply the
 501         # delta to the RCSStream to mutate it to the contents of this
 502         # revision, and also to get the reverse delta, which we store
 503         # as the forward delta of our child revision.
 504         try:
 505           text = self._stream.invert_diff(text)
 506         except MalformedDeltaException, e:
 507           Log().error(
 508               'Malformed RCS delta in %s, revision %s: %s'
 509               % (self.cvs_file_items.cvs_file.filename, revision, e)
 510               )
 511           raise RuntimeError()
 512         text_record = DeltaTextRecord(
 513             self.cvs_file_items.original_ids[self._stream_revision],
 514             cvs_rev_id
 515             )
 516         self.revision_collector._writeout(text_record, text)
 517         self._stream_revision = revision
 518
 519       if revision == self.revision_1_1:
 520         # This is revision 1.1.  Write its fulltext:
 521         text_record = FullTextRecord(cvs_rev_id)
 522         self.revision_collector._writeout(
 523             text_record, self._stream.get_text()
 524             )
 525
 526         # There will be no more trunk revisions delivered, so free the
 527         # RCSStream.
 528         del self._stream
 529         del self._stream_revision
 530
 531     else:
 532       # On branches, revisions are encountered in logical order
 533       # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
 534       # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
 535       # <BRANCH>.<N>).  That's what we need, so just store it.
 536
 537       # FIXME: It would be nice to avoid writing out branch deltas
 538       # when --trunk-only.  (They will be deleted when finish_file()
 539       # is called, but if the delta db is in an IndexedDatabase the
 540       # deletions won't actually recover any disk space.)
 541       text_record = DeltaTextRecord(
 542           cvs_rev_id,
 543           self.cvs_file_items.original_ids[self.base_revisions[revision]]
 544           )
 545       self.revision_collector._writeout(text_record, text)
 546
 547     return None
 548
 549
 550 class InternalRevisionCollector(RevisionCollector):
 551   """The RevisionCollector used by InternalRevisionReader."""
 552
 553   def __init__(self, compress):
 554     RevisionCollector.__init__(self)
 555     self._compress = compress
 556
 557   def register_artifacts(self, which_pass):
 558     artifact_manager.register_temp_file(
 559         config.RCS_DELTAS_INDEX_TABLE, which_pass
 560         )
 561     artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass)
 562     artifact_manager.register_temp_file(
 563         config.RCS_TREES_INDEX_TABLE, which_pass
 564         )
 565     artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)
 566
 567   def start(self):
 568     ser = MarshalSerializer()
 569     if self._compress:
 570       ser = CompressingSerializer(ser)
 571     self._rcs_deltas = IndexedDatabase(
 572         artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
 573         artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
 574         DB_OPEN_NEW, ser
 575         )
 576     primer = (FullTextRecord, DeltaTextRecord)
 577     self._rcs_trees = IndexedDatabase(
 578         artifact_manager.get_temp_file(config.RCS_TREES_STORE),
 579         artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
 580         DB_OPEN_NEW, PrimedPickleSerializer(primer)
 581         )
 582
 583   def _writeout(self, text_record, text):
 584     self.text_record_db.add(text_record)
 585     self._rcs_deltas[text_record.id] = text
 586
 587   def process_file(self, cvs_file_items):
 588     """Read revision information for the file described by CVS_FILE_ITEMS.
 589
 590     Compute the text record refcounts, discard any records that are
 591     unneeded, and store the text records for the file to the
 592     _rcs_trees database."""
 593
 594     # A map from cvs_rev_id to TextRecord instance:
 595     self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase())
 596
 597     cvs2svn_rcsparse.parse(
 598         open(cvs_file_items.cvs_file.filename, 'rb'),
 599         _Sink(self, cvs_file_items),
 600         )
 601
 602     self.text_record_db.recompute_refcounts(cvs_file_items)
 603     self.text_record_db.free_unused()
 604     self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db
 605     del self.text_record_db
 606
 607   def finish(self):
 608     self._rcs_deltas.close()
 609     self._rcs_trees.close()
 610
 611
 612 class _KeywordExpander:
 613   """A class whose instances provide substitutions for CVS keywords.
 614
 615   This class is used via its __call__() method, which should be called
 616   with a match object representing a match for a CVS keyword string.
 617   The method returns the replacement for the matched text.
 618
 619   The __call__() method works by calling the method with the same name
 620   as that of the CVS keyword (converted to lower case).
 621
 622   Instances of this class can be passed as the REPL argument to
 623   re.sub()."""
 624
 625   date_fmt_old = "%Y/%m/%d %H:%M:%S"    # CVS 1.11, rcs
 626   date_fmt_new = "%Y-%m-%d %H:%M:%S"    # CVS 1.12
 627
 628   date_fmt = date_fmt_new
 629
 630   @classmethod
 631   def use_old_date_format(klass):
 632       """Class method to ensure exact compatibility with CVS 1.11
 633       output.  Use this if you want to verify your conversion and you're
 634       using CVS 1.11."""
 635       klass.date_fmt = klass.date_fmt_old
 636
 637   def __init__(self, cvs_rev):
 638     self.cvs_rev = cvs_rev
 639
 640   def __call__(self, match):
 641     return '$%s: %s $' % \
 642            (match.group(1), getattr(self, match.group(1).lower())(),)
 643
 644   def author(self):
 645     return Ctx()._metadata_db[self.cvs_rev.metadata_id].original_author
 646
 647   def date(self):
 648     return time.strftime(self.date_fmt,
 649                          time.gmtime(self.cvs_rev.timestamp))
 650
 651   def header(self):
 652     return '%s %s %s %s Exp' % \
 653            (self.source(), self.cvs_rev.rev, self.date(), self.author())
 654
 655   def id(self):
 656     return '%s %s %s %s Exp' % \
 657            (self.rcsfile(), self.cvs_rev.rev, self.date(), self.author())
 658
 659   def locker(self):
 660     # Handle kvl like kv, as a converted repo is supposed to have no
 661     # locks.
 662     return ''
 663
 664   def log(self):
 665     # Would need some special handling.
 666     return 'not supported by cvs2svn'
 667
 668   def name(self):
 669     # Cannot work, as just creating a new symbol does not check out
 670     # the revision again.
 671     return 'not supported by cvs2svn'
 672
 673   def rcsfile(self):
 674     return self.cvs_rev.cvs_file.basename + ",v"
 675
 676   def revision(self):
 677     return self.cvs_rev.rev
 678
 679   def source(self):
 680     project = self.cvs_rev.cvs_file.project
 681     return project.cvs_repository_root + '/' + project.cvs_module + \
 682         self.cvs_rev.cvs_file.cvs_path + ",v"
 683
 684   def state(self):
 685     # We check out only live revisions.
 686     return 'Exp'
 687
 688
 689 class InternalRevisionReader(RevisionReader):
 690   """A RevisionReader that reads the contents from an own delta store."""
 691
 692   _kws = 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State'
 693   _kw_re = re.compile(r'\$(' + _kws + r'):[^$\n]*\$')
 694   _kwo_re = re.compile(r'\$(' + _kws + r')(:[^$\n]*)?\$')
 695
 696   def __init__(self, compress):
 697     self._compress = compress
 698
 699   def register_artifacts(self, which_pass):
 700     artifact_manager.register_temp_file(config.CVS_CHECKOUT_DB, which_pass)
 701     artifact_manager.register_temp_file_needed(
 702         config.RCS_DELTAS_STORE, which_pass
 703         )
 704     artifact_manager.register_temp_file_needed(
 705         config.RCS_DELTAS_INDEX_TABLE, which_pass
 706         )
 707     artifact_manager.register_temp_file_needed(
 708         config.RCS_TREES_STORE, which_pass
 709         )
 710     artifact_manager.register_temp_file_needed(
 711         config.RCS_TREES_INDEX_TABLE, which_pass
 712         )
 713
 714   def start(self):
 715     self._delta_db = IndexedDatabase(
 716         artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
 717         artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
 718         DB_OPEN_READ)
 719     self._delta_db.__delitem__ = lambda id: None
 720     self._tree_db = IndexedDatabase(
 721         artifact_manager.get_temp_file(config.RCS_TREES_STORE),
 722         artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
 723         DB_OPEN_READ)
 724     ser = MarshalSerializer()
 725     if self._compress:
 726       ser = CompressingSerializer(ser)
 727     self._co_db = Database(
 728         artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB), DB_OPEN_NEW,
 729         ser)
 730
 731     # The set of CVSFile instances whose TextRecords have already been
 732     # read:
 733     self._loaded_files = set()
 734
 735     # A map { CVSFILE : _FileTree } for files that currently have live
 736     # revisions:
 737     self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db)
 738
 739   def _get_text_record(self, cvs_rev):
 740     """Return the TextRecord instance for CVS_REV.
 741
 742     If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
 743     do so now."""
 744
 745     if cvs_rev.cvs_file not in self._loaded_files:
 746       for text_record in self._tree_db[cvs_rev.cvs_file.id].itervalues():
 747         self._text_record_db.add(text_record)
 748       self._loaded_files.add(cvs_rev.cvs_file)
 749
 750     return self._text_record_db[cvs_rev.id]
 751
 752   def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False):
 753     """Check out the text for revision C_REV from the repository.
 754
 755     Return the text wrapped in a readable file object.  If
 756     SUPPRESS_KEYWORD_SUBSTITUTION is True, any RCS keywords will be
 757     _un_expanded prior to returning the file content.  Note that $Log$
 758     never actually generates a log (which makes test 'requires_cvs()'
 759     fail).
 760
 761     Revisions may be requested in any order, but if they are not
 762     requested in dependency order the checkout database will become
 763     very large.  Revisions may be skipped.  Each revision may be
 764     requested only once."""
 765
 766     try:
 767       text = self._get_text_record(cvs_rev).checkout(self._text_record_db)
 768     except MalformedDeltaException, (msg):
 769       raise FatalError('Malformed RCS delta in %s, revision %s: %s'
 770                        % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg))
 771     if cvs_rev.cvs_file.mode != 'b' and cvs_rev.cvs_file.mode != 'o':
 772       if suppress_keyword_substitution or cvs_rev.cvs_file.mode == 'k':
 773         text = self._kw_re.sub(r'$\1$', text)
 774       else:
 775         text = self._kwo_re.sub(_KeywordExpander(cvs_rev), text)
 776
 777     return StringIO(text)
 778
 779   def finish(self):
 780     self._text_record_db.log_leftovers()
 781
 782     del self._text_record_db
 783     self._delta_db.close()
 784     self._tree_db.close()
 785     self._co_db.close()
 786