cvs2svn_lib/checkout_internal.py

   1 # (Be in -*- python -*- mode.)
   2 #
   3 # ====================================================================
   4 # Copyright (c) 2007-2009 CollabNet.  All rights reserved.
   5 #
   6 # This software is licensed as described in the file COPYING, which
   7 # you should have received as part of this distribution.  The terms
   8 # are also available at http://subversion.tigris.org/license-1.html.
   9 # If newer versions of this license are posted there, you may use a
  10 # newer version instead, at your option.
  11 #
  12 # This software consists of voluntary contributions made by many
  13 # individuals.  For exact contribution history, see the revision
  14 # history and logs, available at http://cvs2svn.tigris.org/.
  15 # ====================================================================
  16
  17 """This module contains classes that implement the --use-internal-co option.
  18
  19 The idea is to patch up the revisions' contents incrementally, thus
  20 avoiding the huge number of process spawns and the O(n^2) overhead of
  21 using 'co' and 'cvs'.
  22
  23 InternalRevisionRecorder saves the RCS deltas and RCS revision trees
  24 to databases.  Notably, deltas from the trunk need to be reversed, as
  25 CVS stores them so they apply from HEAD backwards.
  26
  27 InternalRevisionExcluder copies the revision trees to a new database,
  28 omitting excluded branches.
  29
  30 InternalRevisionReader produces the revisions' contents on demand.  To
  31 generate the text for a typical revision, we need the revision's delta
  32 text plus the fulltext of the previous revision.  Therefore, we
  33 maintain a checkout database containing a copy of the fulltext of any
  34 revision for which subsequent revisions still need to be retrieved.
  35 It is crucial to remove text from this database as soon as it is no
  36 longer needed, to prevent it from growing enormous.
  37
  38 There are two reasons that the text from a revision can be needed: (1)
  39 because the revision itself still needs to be output to a dumpfile;
  40 (2) because another revision needs it as the base of its delta.  We
  41 maintain a reference count for each revision, which includes *both*
  42 possibilities.  The first time a revision's text is needed, it is
  43 generated by applying the revision's deltatext to the previous
  44 revision's fulltext, and the resulting fulltext is stored in the
  45 checkout database.  Each time a revision's fulltext is retrieved, its
  46 reference count is decremented.  When the reference count goes to
  47 zero, then the fulltext is deleted from the checkout database.
  48
  49 The administrative data for managing this consists of one TextRecord
  50 entry for each revision.  Each TextRecord has an id, which is the same
  51 id as used for the corresponding CVSRevision instance.  It also
  52 maintains a count of the times it is expected to be retrieved.
  53 TextRecords come in several varieties:
  54
  55 FullTextRecord -- Used for revisions whose fulltext is contained
  56     directly in the RCS file, and therefore available during
  57     CollectRevsPass (i.e., typically revision 1.1 of each file).
  58
  59 DeltaTextRecord -- Used for revisions that are defined via a delta
  60     relative to some other TextRecord.  These records record the id of
  61     the TextRecord that holds the base text against which the delta is
  62     defined.  When the text for a DeltaTextRecord is retrieved, the
  63     DeltaTextRecord instance is deleted and a CheckedOutTextRecord
  64     instance is created to take its place.
  65
  66 CheckedOutTextRecord -- Used during OutputPass for a revision that
  67     started out as a DeltaTextRecord, but has already been retrieved
  68     (and therefore its fulltext is stored in the checkout database).
  69
  70 While a file is being processed during CollectRevsPass, the fulltext
  71 and deltas are stored to the delta database, and TextRecord instances
  72 are created to keep track of things.  The reference counts are all
  73 initialized to zero.
  74
  75 After CollectRevsPass has done any preliminary tree mangling, its
  76 _FileDataCollector.parse_completed(), method calls
  77 RevisionRecorder.finish_file(), passing it the CVSFileItems instance
  78 that describes the revisions in the file.  At this point the reference
  79 counts for the file's TextRecords are updated: each record referred to
  80 by a delta has its refcount incremented, and each record that
  81 corresponds to a non-delete CVSRevision is incremented.  After that,
  82 any records with refcount==0 are removed.  When one record is removed,
  83 that can cause another record's reference count to go to zero and be
  84 removed too, recursively.  When a TextRecord is deleted at this stage,
  85 its deltatext is also deleted from the delta database.
  86
  87 In FilterSymbolsPass, the exact same procedure (described in the
  88 previous paragraph) is repeated, but this time using the CVSFileItems
  89 after it has been updated for excluded symbols, symbol
  90 preferred-parent grafting, etc."""
  91
  92
  93 import cStringIO
  94 import re
  95 import time
  96
  97 from cvs2svn_lib import config
  98 from cvs2svn_lib.common import DB_OPEN_NEW
  99 from cvs2svn_lib.common import DB_OPEN_READ
 100 from cvs2svn_lib.common import warning_prefix
 101 from cvs2svn_lib.common import FatalError
 102 from cvs2svn_lib.common import InternalError
 103 from cvs2svn_lib.context import Ctx
 104 from cvs2svn_lib.log import Log
 105 from cvs2svn_lib.artifact_manager import artifact_manager
 106 from cvs2svn_lib.symbol import Trunk
 107 from cvs2svn_lib.cvs_item import CVSRevisionModification
 108 from cvs2svn_lib.database import Database
 109 from cvs2svn_lib.database import IndexedDatabase
 110 from cvs2svn_lib.rcs_stream import RCSStream
 111 from cvs2svn_lib.rcs_stream import MalformedDeltaException
 112 from cvs2svn_lib.revision_manager import RevisionRecorder
 113 from cvs2svn_lib.revision_manager import RevisionExcluder
 114 from cvs2svn_lib.revision_manager import RevisionReader
 115 from cvs2svn_lib.serializer import MarshalSerializer
 116 from cvs2svn_lib.serializer import CompressingSerializer
 117 from cvs2svn_lib.serializer import PrimedPickleSerializer
 118
 119
 120 class TextRecord(object):
 121   """Bookkeeping data for the text of a single CVSRevision."""
 122
 123   __slots__ = ['id', 'refcount']
 124
 125   def __init__(self, id):
 126     # The cvs_rev_id of the revision whose text this is.
 127     self.id = id
 128
 129     # The number of times that the text of this revision will be
 130     # retrieved.
 131     self.refcount = 0
 132
 133   def __getstate__(self):
 134     return (self.id, self.refcount,)
 135
 136   def __setstate__(self, state):
 137     (self.id, self.refcount,) = state
 138
 139   def increment_dependency_refcounts(self, text_record_db):
 140     """Increment the refcounts of any records that this one depends on."""
 141
 142     pass
 143
 144   def decrement_refcount(self, text_record_db):
 145     """Decrement the number of times our text still has to be checked out.
 146
 147     If the reference count goes to zero, call discard()."""
 148
 149     self.refcount -= 1
 150     if self.refcount == 0:
 151       text_record_db.discard(self.id)
 152
 153   def checkout(self, text_record_db):
 154     """Workhorse of the checkout process.
 155
 156     Return the text for this revision, decrement our reference count,
 157     and update the databases depending on whether there will be future
 158     checkouts."""
 159
 160     raise NotImplementedError()
 161
 162   def free(self, text_record_db):
 163     """This instance will never again be checked out; free it.
 164
 165     Also free any associated resources and decrement the refcounts of
 166     any other TextRecords that this one depends on."""
 167
 168     raise NotImplementedError()
 169
 170
 171 class FullTextRecord(TextRecord):
 172   __slots__ = []
 173
 174   def __getstate__(self):
 175     return (self.id, self.refcount,)
 176
 177   def __setstate__(self, state):
 178     (self.id, self.refcount,) = state
 179
 180   def checkout(self, text_record_db):
 181     text = text_record_db.delta_db[self.id]
 182     self.decrement_refcount(text_record_db)
 183     return text
 184
 185   def free(self, text_record_db):
 186     del text_record_db.delta_db[self.id]
 187
 188   def __str__(self):
 189     return 'FullTextRecord(%x, %d)' % (self.id, self.refcount,)
 190
 191
 192 class DeltaTextRecord(TextRecord):
 193   __slots__ = ['pred_id']
 194
 195   def __init__(self, id, pred_id):
 196     TextRecord.__init__(self, id)
 197
 198     # The cvs_rev_id of the revision relative to which this delta is
 199     # defined.
 200     self.pred_id = pred_id
 201
 202   def __getstate__(self):
 203     return (self.id, self.refcount, self.pred_id,)
 204
 205   def __setstate__(self, state):
 206     (self.id, self.refcount, self.pred_id,) = state
 207
 208   def increment_dependency_refcounts(self, text_record_db):
 209     text_record_db[self.pred_id].refcount += 1
 210
 211   def checkout(self, text_record_db):
 212     base_text = text_record_db[self.pred_id].checkout(text_record_db)
 213     co = RCSStream(base_text)
 214     delta_text = text_record_db.delta_db[self.id]
 215     co.apply_diff(delta_text)
 216     text = co.get_text()
 217     del co
 218     self.refcount -= 1
 219     if self.refcount == 0:
 220       # This text will never be needed again; just delete ourselves
 221       # without ever having stored the fulltext to the checkout
 222       # database:
 223       del text_record_db[self.id]
 224     else:
 225       # Store a new CheckedOutTextRecord in place of ourselves:
 226       text_record_db.checkout_db['%x' % self.id] = text
 227       new_text_record = CheckedOutTextRecord(self.id)
 228       new_text_record.refcount = self.refcount
 229       text_record_db.replace(new_text_record)
 230     return text
 231
 232   def free(self, text_record_db):
 233     del text_record_db.delta_db[self.id]
 234     text_record_db[self.pred_id].decrement_refcount(text_record_db)
 235
 236   def __str__(self):
 237     return 'DeltaTextRecord(%x -> %x, %d)' \
 238            % (self.pred_id, self.id, self.refcount,)
 239
 240
 241 class CheckedOutTextRecord(TextRecord):
 242   __slots__ = []
 243
 244   def __getstate__(self):
 245     return (self.id, self.refcount,)
 246
 247   def __setstate__(self, state):
 248     (self.id, self.refcount,) = state
 249
 250   def checkout(self, text_record_db):
 251     text = text_record_db.checkout_db['%x' % self.id]
 252     self.decrement_refcount(text_record_db)
 253     return text
 254
 255   def free(self, text_record_db):
 256     del text_record_db.checkout_db['%x' % self.id]
 257
 258   def __str__(self):
 259     return 'CheckedOutTextRecord(%x, %d)' % (self.id, self.refcount,)
 260
 261
 262 class NullDatabase(object):
 263   """A do-nothing database that can be used with TextRecordDatabase.
 264
 265   Use this when you don't actually want to allow anything to be
 266   deleted."""
 267
 268   def __delitem__(self, id):
 269     pass
 270
 271
 272 class TextRecordDatabase:
 273   """Holds the TextRecord instances that are currently live.
 274
 275   During CollectRevsPass and FilterSymbolsPass, files are processed
 276   one by one and a new TextRecordDatabase instance is used for each
 277   file.  During OutputPass, a single TextRecordDatabase instance is
 278   used for the duration of OutputPass; individual records are added
 279   and removed when they are active."""
 280
 281   def __init__(self, delta_db, checkout_db):
 282     # A map { cvs_rev_id -> TextRecord }.
 283     self.text_records = {}
 284
 285     # A database-like object using cvs_rev_ids as keys and containing
 286     # fulltext/deltatext strings as values.  Its __getitem__() method
 287     # is used to retrieve deltas when they are needed, and its
 288     # __delitem__() method is used to delete deltas when they can be
 289     # freed.  The modifiability of the delta database varies from pass
 290     # to pass, so the object stored here varies as well:
 291     #
 292     # CollectRevsPass: a fully-functional IndexedDatabase.  This
 293     #     allows deltas that will not be needed to be deleted.
 294     #
 295     # FilterSymbolsPass: a NullDatabase.  The delta database cannot be
 296     #     modified during this pass, and we have no need to retrieve
 297     #     deltas, so we just use a dummy object here.
 298     #
 299     # OutputPass: a disabled IndexedDatabase.  During this pass we
 300     # need to retrieve deltas, but we are not allowed to modify the
 301     # delta database.  So we use an IndexedDatabase whose __del__()
 302     # method has been disabled to do nothing.
 303     self.delta_db = delta_db
 304
 305     # A database-like object using cvs_rev_ids as keys and containing
 306     # fulltext strings as values.  This database is only set during
 307     # OutputPass.
 308     self.checkout_db = checkout_db
 309
 310     # If this is set to a list, then the list holds the ids of
 311     # text_records that have to be deleted; when discard() is called,
 312     # it adds the requested id to the list but does not delete it.  If
 313     # this member is set to None, then text_records are deleted
 314     # immediately when discard() is called.
 315     self.deferred_deletes = None
 316
 317   def __getstate__(self):
 318     return (self.text_records.values(),)
 319
 320   def __setstate__(self, state):
 321     (text_records,) = state
 322     self.text_records = {}
 323     for text_record in text_records:
 324       self.add(text_record)
 325     self.delta_db = NullDatabase()
 326     self.checkout_db = NullDatabase()
 327     self.deferred_deletes = None
 328
 329   def add(self, text_record):
 330     """Add TEXT_RECORD to our database.
 331
 332     There must not already be a record with the same id."""
 333
 334     assert not self.text_records.has_key(text_record.id)
 335
 336     self.text_records[text_record.id] = text_record
 337
 338   def __getitem__(self, id):
 339     return self.text_records[id]
 340
 341   def __delitem__(self, id):
 342     """Free the record with the specified ID."""
 343
 344     del self.text_records[id]
 345
 346   def replace(self, text_record):
 347     """Store TEXT_RECORD in place of the existing record with the same id.
 348
 349     Do not do anything with the old record."""
 350
 351     assert self.text_records.has_key(text_record.id)
 352     self.text_records[text_record.id] = text_record
 353
 354   def discard(self, *ids):
 355     """The text records with IDS are no longer needed; discard them.
 356
 357     This involves calling their free() methods and also removing them
 358     from SELF.
 359
 360     If SELF.deferred_deletes is not None, then the ids to be deleted
 361     are added to the list instead of deleted immediately.  This
 362     mechanism is to prevent a stack overflow from the avalanche of
 363     deletes that can result from deleting a long chain of revisions."""
 364
 365     if self.deferred_deletes is None:
 366       # This is an outer-level delete.
 367       self.deferred_deletes = list(ids)
 368       while self.deferred_deletes:
 369         id = self.deferred_deletes.pop()
 370         text_record = self[id]
 371         if text_record.refcount != 0:
 372           raise InternalError(
 373               'TextRecordDatabase.discard(%s) called with refcount = %d'
 374               % (text_record, text_record.refcount,)
 375               )
 376         # This call might cause other text_record ids to be added to
 377         # self.deferred_deletes:
 378         text_record.free(self)
 379         del self[id]
 380       self.deferred_deletes = None
 381     else:
 382       self.deferred_deletes.extend(ids)
 383
 384   def itervalues(self):
 385     return self.text_records.itervalues()
 386
 387   def recompute_refcounts(self, cvs_file_items):
 388     """Recompute the refcounts of the contained TextRecords.
 389
 390     Use CVS_FILE_ITEMS to determine which records will be needed by
 391     cvs2svn."""
 392
 393     # First clear all of the refcounts:
 394     for text_record in self.itervalues():
 395       text_record.refcount = 0
 396
 397     # Now increment the reference count of records that are needed as
 398     # the source of another record's deltas:
 399     for text_record in self.itervalues():
 400       text_record.increment_dependency_refcounts(self.text_records)
 401
 402     # Now increment the reference count of records that will be needed
 403     # by cvs2svn:
 404     for lod_items in cvs_file_items.iter_lods():
 405       for cvs_rev in lod_items.cvs_revisions:
 406         if isinstance(cvs_rev, CVSRevisionModification):
 407           self[cvs_rev.id].refcount += 1
 408
 409   def free_unused(self):
 410     """Free any TextRecords whose reference counts are zero."""
 411
 412     # The deletion of some of these text records might cause others to
 413     # be unused, in which case they will be deleted automatically.
 414     # But since the initially-unused records are not referred to by
 415     # any others, we don't have to be afraid that they will be deleted
 416     # before we get to them.  But it *is* crucial that we create the
 417     # whole unused list before starting the loop.
 418
 419     unused = [
 420         text_record.id
 421         for text_record in self.itervalues()
 422         if text_record.refcount == 0
 423         ]
 424
 425     self.discard(*unused)
 426
 427   def log_leftovers(self):
 428     """If any TextRecords still exist, log them."""
 429
 430     if self.text_records:
 431       Log().warn(
 432           "%s: internal problem: leftover revisions in the checkout cache:"
 433           % warning_prefix)
 434       for text_record in self.itervalues():
 435         Log().warn('    %s' % (text_record,))
 436
 437   def __repr__(self):
 438     """Debugging output of the current contents of the TextRecordDatabase."""
 439
 440     retval = ['TextRecordDatabase:']
 441     for text_record in self.itervalues():
 442       retval.append('    %s' % (text_record,))
 443     return '\n'.join(retval)
 444
 445
 446 class InternalRevisionRecorder(RevisionRecorder):
 447   """A RevisionRecorder that reconstructs the fulltext internally."""
 448
 449   def __init__(self, compress):
 450     RevisionRecorder.__init__(self)
 451     self._compress = compress
 452
 453   def register_artifacts(self, which_pass):
 454     artifact_manager.register_temp_file(
 455         config.RCS_DELTAS_INDEX_TABLE, which_pass
 456         )
 457     artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass)
 458     artifact_manager.register_temp_file(
 459         config.RCS_TREES_INDEX_TABLE, which_pass
 460         )
 461     artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)
 462
 463   def start(self):
 464     ser = MarshalSerializer()
 465     if self._compress:
 466       ser = CompressingSerializer(ser)
 467     self._rcs_deltas = IndexedDatabase(
 468         artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
 469         artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
 470         DB_OPEN_NEW, ser)
 471     primer = (FullTextRecord, DeltaTextRecord)
 472     self._rcs_trees = IndexedDatabase(
 473         artifact_manager.get_temp_file(config.RCS_TREES_STORE),
 474         artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
 475         DB_OPEN_NEW, PrimedPickleSerializer(primer))
 476
 477   def start_file(self, cvs_file_items):
 478     self._cvs_file_items = cvs_file_items
 479
 480     # A map from cvs_rev_id to TextRecord instance:
 481     self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase())
 482
 483   def record_text(self, cvs_rev, log, text):
 484     if isinstance(cvs_rev.lod, Trunk):
 485       # On trunk, revisions are encountered in reverse order (1.<N>
 486       # ... 1.1) and deltas are inverted.  The first text that we see
 487       # is the fulltext for the HEAD revision.  After that, the text
 488       # corresponding to revision 1.N is the delta (1.<N+1> ->
 489       # 1.<N>)).  We have to invert the deltas here so that we can
 490       # read the revisions out in dependency order; that is, for
 491       # revision 1.1 we want the fulltext, and for revision 1.<N> we
 492       # want the delta (1.<N-1> -> 1.<N>).  This means that we can't
 493       # compute the delta for a revision until we see its logical
 494       # parent.  When we finally see revision 1.1 (which is recognized
 495       # because it doesn't have a parent), we can record the diff (1.1
 496       # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
 497
 498       if cvs_rev.next_id is None:
 499         # This is HEAD, as fulltext.  Initialize the RCSStream so
 500         # that we can compute deltas backwards in time.
 501         self._stream = RCSStream(text)
 502       else:
 503         # Any other trunk revision is a backward delta.  Apply the
 504         # delta to the RCSStream to mutate it to the contents of this
 505         # revision, and also to get the reverse delta, which we store
 506         # as the forward delta of our child revision.
 507         try:
 508           text = self._stream.invert_diff(text)
 509         except MalformedDeltaException, (msg):
 510           Log().error('Malformed RCS delta in %s, revision %s: %s'
 511                       % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev,
 512                          msg))
 513           raise RuntimeError
 514         text_record = DeltaTextRecord(cvs_rev.next_id, cvs_rev.id)
 515         self._writeout(text_record, text)
 516
 517       if cvs_rev.prev_id is None:
 518         # This is revision 1.1.  Write its fulltext:
 519         text_record = FullTextRecord(cvs_rev.id)
 520         self._writeout(text_record, self._stream.get_text())
 521
 522         # There will be no more trunk revisions delivered, so free the
 523         # RCSStream.
 524         del self._stream
 525
 526     else:
 527       # On branches, revisions are encountered in logical order
 528       # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
 529       # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
 530       # <BRANCH>.<N>).  That's what we need, so just store it.
 531
 532       # FIXME: It would be nice to avoid writing out branch deltas
 533       # when --trunk-only.  (They will be deleted when finish_file()
 534       # is called, but if the delta db is in an IndexedDatabase the
 535       # deletions won't actually recover any disk space.)
 536       text_record = DeltaTextRecord(cvs_rev.id, cvs_rev.prev_id)
 537       self._writeout(text_record, text)
 538
 539     return None
 540
 541   def _writeout(self, text_record, text):
 542     self.text_record_db.add(text_record)
 543     self._rcs_deltas[text_record.id] = text
 544
 545   def finish_file(self, cvs_file_items):
 546     """Finish processing of the current file.
 547
 548     Compute the initial text record refcounts, discard any records
 549     that are unneeded, and store the text records for the file to the
 550     _rcs_trees database."""
 551
 552     # Delete our copy of the preliminary CVSFileItems:
 553     del self._cvs_file_items
 554
 555     self.text_record_db.recompute_refcounts(cvs_file_items)
 556     self.text_record_db.free_unused()
 557     self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db
 558     del self.text_record_db
 559
 560   def finish(self):
 561     self._rcs_deltas.close()
 562     self._rcs_trees.close()
 563
 564
 565 class InternalRevisionExcluder(RevisionExcluder):
 566   """The RevisionExcluder used by InternalRevisionReader."""
 567
 568   def register_artifacts(self, which_pass):
 569     artifact_manager.register_temp_file_needed(
 570         config.RCS_TREES_STORE, which_pass
 571         )
 572     artifact_manager.register_temp_file_needed(
 573         config.RCS_TREES_INDEX_TABLE, which_pass
 574         )
 575     artifact_manager.register_temp_file(
 576         config.RCS_TREES_FILTERED_STORE, which_pass
 577         )
 578     artifact_manager.register_temp_file(
 579         config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass
 580         )
 581
 582   def start(self):
 583     self._tree_db = IndexedDatabase(
 584         artifact_manager.get_temp_file(config.RCS_TREES_STORE),
 585         artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
 586         DB_OPEN_READ)
 587     primer = (FullTextRecord, DeltaTextRecord)
 588     self._new_tree_db = IndexedDatabase(
 589         artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE),
 590         artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE),
 591         DB_OPEN_NEW, PrimedPickleSerializer(primer))
 592
 593   def process_file(self, cvs_file_items):
 594     text_record_db = self._tree_db[cvs_file_items.cvs_file.id]
 595     text_record_db.recompute_refcounts(cvs_file_items)
 596     text_record_db.free_unused()
 597     self._new_tree_db[cvs_file_items.cvs_file.id] = text_record_db
 598
 599   def finish(self):
 600     self._tree_db.close()
 601     self._new_tree_db.close()
 602
 603
 604 class _KeywordExpander:
 605   """A class whose instances provide substitutions for CVS keywords.
 606
 607   This class is used via its __call__() method, which should be called
 608   with a match object representing a match for a CVS keyword string.
 609   The method returns the replacement for the matched text.
 610
 611   The __call__() method works by calling the method with the same name
 612   as that of the CVS keyword (converted to lower case).
 613
 614   Instances of this class can be passed as the REPL argument to
 615   re.sub()."""
 616
 617   date_fmt_old = "%Y/%m/%d %H:%M:%S"    # CVS 1.11, rcs
 618   date_fmt_new = "%Y-%m-%d %H:%M:%S"    # CVS 1.12
 619
 620   date_fmt = date_fmt_new
 621
 622   @classmethod
 623   def use_old_date_format(klass):
 624       """Class method to ensure exact compatibility with CVS 1.11
 625       output.  Use this if you want to verify your conversion and you're
 626       using CVS 1.11."""
 627       klass.date_fmt = klass.date_fmt_old
 628
 629   def __init__(self, cvs_rev):
 630     self.cvs_rev = cvs_rev
 631
 632   def __call__(self, match):
 633     return '$%s: %s $' % \
 634            (match.group(1), getattr(self, match.group(1).lower())(),)
 635
 636   def author(self):
 637     return Ctx()._metadata_db[self.cvs_rev.metadata_id].original_author
 638
 639   def date(self):
 640     return time.strftime(self.date_fmt,
 641                          time.gmtime(self.cvs_rev.timestamp))
 642
 643   def header(self):
 644     return '%s %s %s %s Exp' % \
 645            (self.source(), self.cvs_rev.rev, self.date(), self.author())
 646
 647   def id(self):
 648     return '%s %s %s %s Exp' % \
 649            (self.rcsfile(), self.cvs_rev.rev, self.date(), self.author())
 650
 651   def locker(self):
 652     # Handle kvl like kv, as a converted repo is supposed to have no
 653     # locks.
 654     return ''
 655
 656   def log(self):
 657     # Would need some special handling.
 658     return 'not supported by cvs2svn'
 659
 660   def name(self):
 661     # Cannot work, as just creating a new symbol does not check out
 662     # the revision again.
 663     return 'not supported by cvs2svn'
 664
 665   def rcsfile(self):
 666     return self.cvs_rev.cvs_file.basename + ",v"
 667
 668   def revision(self):
 669     return self.cvs_rev.rev
 670
 671   def source(self):
 672     project = self.cvs_rev.cvs_file.project
 673     return project.cvs_repository_root + '/' + project.cvs_module + \
 674         self.cvs_rev.cvs_file.cvs_path + ",v"
 675
 676   def state(self):
 677     # We check out only live revisions.
 678     return 'Exp'
 679
 680
 681 class InternalRevisionReader(RevisionReader):
 682   """A RevisionReader that reads the contents from an own delta store."""
 683
 684   _kws = 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State'
 685   _kw_re = re.compile(r'\$(' + _kws + r'):[^$\n]*\$')
 686   _kwo_re = re.compile(r'\$(' + _kws + r')(:[^$\n]*)?\$')
 687
 688   def __init__(self, compress):
 689     self._compress = compress
 690
 691   def register_artifacts(self, which_pass):
 692     artifact_manager.register_temp_file(config.CVS_CHECKOUT_DB, which_pass)
 693     artifact_manager.register_temp_file_needed(
 694         config.RCS_DELTAS_STORE, which_pass
 695         )
 696     artifact_manager.register_temp_file_needed(
 697         config.RCS_DELTAS_INDEX_TABLE, which_pass
 698         )
 699     artifact_manager.register_temp_file_needed(
 700         config.RCS_TREES_FILTERED_STORE, which_pass
 701         )
 702     artifact_manager.register_temp_file_needed(
 703         config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass
 704         )
 705
 706   def start(self):
 707     self._delta_db = IndexedDatabase(
 708         artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
 709         artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
 710         DB_OPEN_READ)
 711     self._delta_db.__delitem__ = lambda id: None
 712     self._tree_db = IndexedDatabase(
 713         artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE),
 714         artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE),
 715         DB_OPEN_READ)
 716     ser = MarshalSerializer()
 717     if self._compress:
 718       ser = CompressingSerializer(ser)
 719     self._co_db = Database(
 720         artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB), DB_OPEN_NEW,
 721         ser)
 722
 723     # The set of CVSFile instances whose TextRecords have already been
 724     # read:
 725     self._loaded_files = set()
 726
 727     # A map { CVSFILE : _FileTree } for files that currently have live
 728     # revisions:
 729     self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db)
 730
 731   def _get_text_record(self, cvs_rev):
 732     """Return the TextRecord instance for CVS_REV.
 733
 734     If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
 735     do so now."""
 736
 737     if cvs_rev.cvs_file not in self._loaded_files:
 738       for text_record in self._tree_db[cvs_rev.cvs_file.id].itervalues():
 739         self._text_record_db.add(text_record)
 740       self._loaded_files.add(cvs_rev.cvs_file)
 741
 742     return self._text_record_db[cvs_rev.id]
 743
 744   def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False):
 745     """Check out the text for revision C_REV from the repository.
 746
 747     Return the text wrapped in a readable file object.  If
 748     SUPPRESS_KEYWORD_SUBSTITUTION is True, any RCS keywords will be
 749     _un_expanded prior to returning the file content.  Note that $Log$
 750     never actually generates a log (which makes test 'requires_cvs()'
 751     fail).
 752
 753     Revisions may be requested in any order, but if they are not
 754     requested in dependency order the checkout database will become
 755     very large.  Revisions may be skipped.  Each revision may be
 756     requested only once."""
 757
 758     try:
 759       text = self._get_text_record(cvs_rev).checkout(self._text_record_db)
 760     except MalformedDeltaException, (msg):
 761       raise FatalError('Malformed RCS delta in %s, revision %s: %s'
 762                        % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg))
 763     if cvs_rev.cvs_file.mode != 'b' and cvs_rev.cvs_file.mode != 'o':
 764       if suppress_keyword_substitution or cvs_rev.cvs_file.mode == 'k':
 765         text = self._kw_re.sub(r'$\1$', text)
 766       else:
 767         text = self._kwo_re.sub(_KeywordExpander(cvs_rev), text)
 768
 769     return cStringIO.StringIO(text)
 770
 771   def finish(self):
 772     self._text_record_db.log_leftovers()
 773
 774     del self._text_record_db
 775     self._delta_db.close()
 776     self._tree_db.close()
 777     self._co_db.close()
 778