cvs2svn_lib/checkout_internal.py

   1 # (Be in -*- python -*- mode.)
   2 #
   3 # ====================================================================
   4 # Copyright (c) 2007-2009 CollabNet.  All rights reserved.
   5 #
   6 # This software is licensed as described in the file COPYING, which
   7 # you should have received as part of this distribution.  The terms
   8 # are also available at http://subversion.tigris.org/license-1.html.
   9 # If newer versions of this license are posted there, you may use a
  10 # newer version instead, at your option.
  11 #
  12 # This software consists of voluntary contributions made by many
  13 # individuals.  For exact contribution history, see the revision
  14 # history and logs, available at http://cvs2svn.tigris.org/.
  15 # ====================================================================
  16
  17 """This module contains classes that implement the --use-internal-co option.
  18
  19 The idea is to patch up the revisions' contents incrementally, thus
  20 avoiding the huge number of process spawns and the O(n^2) overhead of
  21 using 'co' and 'cvs'.
  22
  23 InternalRevisionRecorder saves the RCS deltas and RCS revision trees
  24 to databases.  Notably, deltas from the trunk need to be reversed, as
  25 CVS stores them so they apply from HEAD backwards.
  26
  27 InternalRevisionExcluder copies the revision trees to a new database,
  28 omitting excluded branches.
  29
  30 InternalRevisionReader produces the revisions' contents on demand.  To
  31 generate the text for a typical revision, we need the revision's delta
  32 text plus the fulltext of the previous revision.  Therefore, we
  33 maintain a checkout database containing a copy of the fulltext of any
  34 revision for which subsequent revisions still need to be retrieved.
  35 It is crucial to remove text from this database as soon as it is no
  36 longer needed, to prevent it from growing enormous.
  37
  38 There are two reasons that the text from a revision can be needed: (1)
  39 because the revision itself still needs to be output to a dumpfile;
  40 (2) because another revision needs it as the base of its delta.  We
  41 maintain a reference count for each revision, which includes *both*
  42 possibilities.  The first time a revision's text is needed, it is
  43 generated by applying the revision's deltatext to the previous
  44 revision's fulltext, and the resulting fulltext is stored in the
  45 checkout database.  Each time a revision's fulltext is retrieved, its
  46 reference count is decremented.  When the reference count goes to
  47 zero, then the fulltext is deleted from the checkout database.
  48
  49 The administrative data for managing this consists of one TextRecord
  50 entry for each revision.  Each TextRecord has an id, which is the same
  51 id as used for the corresponding CVSRevision instance.  It also
  52 maintains a count of the times it is expected to be retrieved.
  53 TextRecords come in several varieties:
  54
  55 FullTextRecord -- Used for revisions whose fulltext is contained
  56     directly in the RCS file, and therefore available during
  57     CollectRevsPass (i.e., typically revision 1.1 of each file).
  58
  59 DeltaTextRecord -- Used for revisions that are defined via a delta
  60     relative to some other TextRecord.  These records record the id of
  61     the TextRecord that holds the base text against which the delta is
  62     defined.  When the text for a DeltaTextRecord is retrieved, the
  63     DeltaTextRecord instance is deleted and a CheckedOutTextRecord
  64     instance is created to take its place.
  65
  66 CheckedOutTextRecord -- Used during OutputPass for a revision that
  67     started out as a DeltaTextRecord, but has already been retrieved
  68     (and therefore its fulltext is stored in the checkout database).
  69
  70 While a file is being processed during CollectRevsPass, the fulltext
  71 and deltas are stored to the delta database, and TextRecord instances
  72 are created to keep track of things.  The reference counts are all
  73 initialized to zero.
  74
  75 After CollectRevsPass has done any preliminary tree mangling, its
  76 _FileDataCollector.parse_completed(), method calls
  77 RevisionRecorder.finish_file(), passing it the CVSFileItems instance
  78 that describes the revisions in the file.  At this point the reference
  79 counts for the file's TextRecords are updated: each record referred to
  80 by a delta has its refcount incremented, and each record that
  81 corresponds to a non-delete CVSRevision is incremented.  After that,
  82 any records with refcount==0 are removed.  When one record is removed,
  83 that can cause another record's reference count to go to zero and be
  84 removed too, recursively.  When a TextRecord is deleted at this stage,
  85 its deltatext is also deleted from the delta database.
  86
  87 In FilterSymbolsPass, the exact same procedure (described in the
  88 previous paragraph) is repeated, but this time using the CVSFileItems
  89 after it has been updated for excluded symbols, symbol
  90 preferred-parent grafting, etc."""
  91
  92
  93 from cStringIO import StringIO
  94 import re
  95 import time
  96
  97 from cvs2svn_lib import config
  98 from cvs2svn_lib.common import DB_OPEN_NEW
  99 from cvs2svn_lib.common import DB_OPEN_READ
 100 from cvs2svn_lib.common import warning_prefix
 101 from cvs2svn_lib.common import FatalError
 102 from cvs2svn_lib.common import InternalError
 103 from cvs2svn_lib.context import Ctx
 104 from cvs2svn_lib.log import Log
 105 from cvs2svn_lib.artifact_manager import artifact_manager
 106 from cvs2svn_lib.symbol import Trunk
 107 from cvs2svn_lib.cvs_item import CVSRevisionModification
 108 from cvs2svn_lib.database import Database
 109 from cvs2svn_lib.database import IndexedDatabase
 110 from cvs2svn_lib.rcs_stream import RCSStream
 111 from cvs2svn_lib.rcs_stream import MalformedDeltaException
 112 from cvs2svn_lib.revision_manager import RevisionRecorder
 113 from cvs2svn_lib.revision_manager import RevisionExcluder
 114 from cvs2svn_lib.revision_manager import RevisionReader
 115 from cvs2svn_lib.serializer import MarshalSerializer
 116 from cvs2svn_lib.serializer import CompressingSerializer
 117 from cvs2svn_lib.serializer import PrimedPickleSerializer
 118
 119
 120 class TextRecord(object):
 121   """Bookkeeping data for the text of a single CVSRevision."""
 122
 123   __slots__ = ['id', 'refcount']
 124
 125   def __init__(self, id):
 126     # The cvs_rev_id of the revision whose text this is.
 127     self.id = id
 128
 129     # The number of times that the text of this revision will be
 130     # retrieved.
 131     self.refcount = 0
 132
 133   def __getstate__(self):
 134     return (self.id, self.refcount,)
 135
 136   def __setstate__(self, state):
 137     (self.id, self.refcount,) = state
 138
 139   def increment_dependency_refcounts(self, text_record_db):
 140     """Increment the refcounts of any records that this one depends on."""
 141
 142     pass
 143
 144   def decrement_refcount(self, text_record_db):
 145     """Decrement the number of times our text still has to be checked out.
 146
 147     If the reference count goes to zero, call discard()."""
 148
 149     self.refcount -= 1
 150     if self.refcount == 0:
 151       text_record_db.discard(self.id)
 152
 153   def checkout(self, text_record_db):
 154     """Workhorse of the checkout process.
 155
 156     Return the text for this revision, decrement our reference count,
 157     and update the databases depending on whether there will be future
 158     checkouts."""
 159
 160     raise NotImplementedError()
 161
 162   def free(self, text_record_db):
 163     """This instance will never again be checked out; free it.
 164
 165     Also free any associated resources and decrement the refcounts of
 166     any other TextRecords that this one depends on."""
 167
 168     raise NotImplementedError()
 169
 170
 171 class FullTextRecord(TextRecord):
 172   __slots__ = []
 173
 174   def __getstate__(self):
 175     return (self.id, self.refcount,)
 176
 177   def __setstate__(self, state):
 178     (self.id, self.refcount,) = state
 179
 180   def checkout(self, text_record_db):
 181     text = text_record_db.delta_db[self.id]
 182     self.decrement_refcount(text_record_db)
 183     return text
 184
 185   def free(self, text_record_db):
 186     del text_record_db.delta_db[self.id]
 187
 188   def __str__(self):
 189     return 'FullTextRecord(%x, %d)' % (self.id, self.refcount,)
 190
 191
 192 class DeltaTextRecord(TextRecord):
 193   __slots__ = ['pred_id']
 194
 195   def __init__(self, id, pred_id):
 196     TextRecord.__init__(self, id)
 197
 198     # The cvs_rev_id of the revision relative to which this delta is
 199     # defined.
 200     self.pred_id = pred_id
 201
 202   def __getstate__(self):
 203     return (self.id, self.refcount, self.pred_id,)
 204
 205   def __setstate__(self, state):
 206     (self.id, self.refcount, self.pred_id,) = state
 207
 208   def increment_dependency_refcounts(self, text_record_db):
 209     text_record_db[self.pred_id].refcount += 1
 210
 211   def checkout(self, text_record_db):
 212     base_text = text_record_db[self.pred_id].checkout(text_record_db)
 213     co = RCSStream(base_text)
 214     delta_text = text_record_db.delta_db[self.id]
 215     co.apply_diff(delta_text)
 216     text = co.get_text()
 217     del co
 218     self.refcount -= 1
 219     if self.refcount == 0:
 220       # This text will never be needed again; just delete ourselves
 221       # without ever having stored the fulltext to the checkout
 222       # database:
 223       del text_record_db[self.id]
 224     else:
 225       # Store a new CheckedOutTextRecord in place of ourselves:
 226       text_record_db.checkout_db['%x' % self.id] = text
 227       new_text_record = CheckedOutTextRecord(self.id)
 228       new_text_record.refcount = self.refcount
 229       text_record_db.replace(new_text_record)
 230     return text
 231
 232   def free(self, text_record_db):
 233     del text_record_db.delta_db[self.id]
 234     text_record_db[self.pred_id].decrement_refcount(text_record_db)
 235
 236   def __str__(self):
 237     return 'DeltaTextRecord(%x -> %x, %d)' \
 238            % (self.pred_id, self.id, self.refcount,)
 239
 240
 241 class CheckedOutTextRecord(TextRecord):
 242   __slots__ = []
 243
 244   def __getstate__(self):
 245     return (self.id, self.refcount,)
 246
 247   def __setstate__(self, state):
 248     (self.id, self.refcount,) = state
 249
 250   def checkout(self, text_record_db):
 251     text = text_record_db.checkout_db['%x' % self.id]
 252     self.decrement_refcount(text_record_db)
 253     return text
 254
 255   def free(self, text_record_db):
 256     del text_record_db.checkout_db['%x' % self.id]
 257
 258   def __str__(self):
 259     return 'CheckedOutTextRecord(%x, %d)' % (self.id, self.refcount,)
 260
 261
 262 class NullDatabase(object):
 263   """A do-nothing database that can be used with TextRecordDatabase.
 264
 265   Use this when you don't actually want to allow anything to be
 266   deleted."""
 267
 268   def __delitem__(self, id):
 269     pass
 270
 271
 272 class TextRecordDatabase:
 273   """Holds the TextRecord instances that are currently live.
 274
 275   During CollectRevsPass and FilterSymbolsPass, files are processed
 276   one by one and a new TextRecordDatabase instance is used for each
 277   file.  During OutputPass, a single TextRecordDatabase instance is
 278   used for the duration of OutputPass; individual records are added
 279   and removed when they are active."""
 280
 281   def __init__(self, delta_db, checkout_db):
 282     # A map { cvs_rev_id -> TextRecord }.
 283     self.text_records = {}
 284
 285     # A database-like object using cvs_rev_ids as keys and containing
 286     # fulltext/deltatext strings as values.  Its __getitem__() method
 287     # is used to retrieve deltas when they are needed, and its
 288     # __delitem__() method is used to delete deltas when they can be
 289     # freed.  The modifiability of the delta database varies from pass
 290     # to pass, so the object stored here varies as well:
 291     #
 292     # CollectRevsPass: a fully-functional IndexedDatabase.  This
 293     #     allows deltas that will not be needed to be deleted.
 294     #
 295     # FilterSymbolsPass: a NullDatabase.  The delta database cannot be
 296     #     modified during this pass, and we have no need to retrieve
 297     #     deltas, so we just use a dummy object here.
 298     #
 299     # OutputPass: a disabled IndexedDatabase.  During this pass we
 300     #     need to retrieve deltas, but we are not allowed to modify
 301     #     the delta database.  So we use an IndexedDatabase whose
 302     #     __del__() method has been disabled to do nothing.
 303     self.delta_db = delta_db
 304
 305     # A database-like object using cvs_rev_ids as keys and containing
 306     # fulltext strings as values.  This database is only set during
 307     # OutputPass.
 308     self.checkout_db = checkout_db
 309
 310     # If this is set to a list, then the list holds the ids of
 311     # text_records that have to be deleted; when discard() is called,
 312     # it adds the requested id to the list but does not delete it.  If
 313     # this member is set to None, then text_records are deleted
 314     # immediately when discard() is called.
 315     self.deferred_deletes = None
 316
 317   def __getstate__(self):
 318     return (self.text_records.values(),)
 319
 320   def __setstate__(self, state):
 321     (text_records,) = state
 322     self.text_records = {}
 323     for text_record in text_records:
 324       self.add(text_record)
 325     self.delta_db = NullDatabase()
 326     self.checkout_db = NullDatabase()
 327     self.deferred_deletes = None
 328
 329   def add(self, text_record):
 330     """Add TEXT_RECORD to our database.
 331
 332     There must not already be a record with the same id."""
 333
 334     assert not self.text_records.has_key(text_record.id)
 335
 336     self.text_records[text_record.id] = text_record
 337
 338   def __getitem__(self, id):
 339     return self.text_records[id]
 340
 341   def __delitem__(self, id):
 342     """Free the record with the specified ID."""
 343
 344     del self.text_records[id]
 345
 346   def replace(self, text_record):
 347     """Store TEXT_RECORD in place of the existing record with the same id.
 348
 349     Do not do anything with the old record."""
 350
 351     assert self.text_records.has_key(text_record.id)
 352     self.text_records[text_record.id] = text_record
 353
 354   def discard(self, *ids):
 355     """The text records with IDS are no longer needed; discard them.
 356
 357     This involves calling their free() methods and also removing them
 358     from SELF.
 359
 360     If SELF.deferred_deletes is not None, then the ids to be deleted
 361     are added to the list instead of deleted immediately.  This
 362     mechanism is to prevent a stack overflow from the avalanche of
 363     deletes that can result from deleting a long chain of revisions."""
 364
 365     if self.deferred_deletes is None:
 366       # This is an outer-level delete.
 367       self.deferred_deletes = list(ids)
 368       while self.deferred_deletes:
 369         id = self.deferred_deletes.pop()
 370         text_record = self[id]
 371         if text_record.refcount != 0:
 372           raise InternalError(
 373               'TextRecordDatabase.discard(%s) called with refcount = %d'
 374               % (text_record, text_record.refcount,)
 375               )
 376         # This call might cause other text_record ids to be added to
 377         # self.deferred_deletes:
 378         text_record.free(self)
 379         del self[id]
 380       self.deferred_deletes = None
 381     else:
 382       self.deferred_deletes.extend(ids)
 383
 384   def itervalues(self):
 385     return self.text_records.itervalues()
 386
 387   def recompute_refcounts(self, cvs_file_items):
 388     """Recompute the refcounts of the contained TextRecords.
 389
 390     Use CVS_FILE_ITEMS to determine which records will be needed by
 391     cvs2svn."""
 392
 393     # First clear all of the refcounts:
 394     for text_record in self.itervalues():
 395       text_record.refcount = 0
 396
 397     # Now increment the reference count of records that are needed as
 398     # the source of another record's deltas:
 399     for text_record in self.itervalues():
 400       text_record.increment_dependency_refcounts(self.text_records)
 401
 402     # Now increment the reference count of records that will be needed
 403     # by cvs2svn:
 404     for lod_items in cvs_file_items.iter_lods():
 405       for cvs_rev in lod_items.cvs_revisions:
 406         if isinstance(cvs_rev, CVSRevisionModification):
 407           self[cvs_rev.id].refcount += 1
 408
 409   def free_unused(self):
 410     """Free any TextRecords whose reference counts are zero."""
 411
 412     # The deletion of some of these text records might cause others to
 413     # be unused, in which case they will be deleted automatically.
 414     # But since the initially-unused records are not referred to by
 415     # any others, we don't have to be afraid that they will be deleted
 416     # before we get to them.  But it *is* crucial that we create the
 417     # whole unused list before starting the loop.
 418
 419     unused = [
 420         text_record.id
 421         for text_record in self.itervalues()
 422         if text_record.refcount == 0
 423         ]
 424
 425     self.discard(*unused)
 426
 427   def log_leftovers(self):
 428     """If any TextRecords still exist, log them."""
 429
 430     if self.text_records:
 431       Log().warn(
 432           "%s: internal problem: leftover revisions in the checkout cache:"
 433           % warning_prefix)
 434       for text_record in self.itervalues():
 435         Log().warn('    %s' % (text_record,))
 436
 437   def __repr__(self):
 438     """Debugging output of the current contents of the TextRecordDatabase."""
 439
 440     retval = ['TextRecordDatabase:']
 441     for text_record in self.itervalues():
 442       retval.append('    %s' % (text_record,))
 443     return '\n'.join(retval)
 444
 445
 446 class InternalRevisionRecorder(RevisionRecorder):
 447   """A RevisionRecorder that reconstructs the fulltext internally."""
 448
 449   def __init__(self, compress):
 450     RevisionRecorder.__init__(self)
 451     self._compress = compress
 452
 453   def register_artifacts(self, which_pass):
 454     artifact_manager.register_temp_file(
 455         config.RCS_DELTAS_INDEX_TABLE, which_pass
 456         )
 457     artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass)
 458     artifact_manager.register_temp_file(
 459         config.RCS_TREES_INDEX_TABLE, which_pass
 460         )
 461     artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)
 462
 463   def start(self):
 464     ser = MarshalSerializer()
 465     if self._compress:
 466       ser = CompressingSerializer(ser)
 467     self._rcs_deltas = IndexedDatabase(
 468         artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
 469         artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
 470         DB_OPEN_NEW, ser)
 471     primer = (FullTextRecord, DeltaTextRecord)
 472     self._rcs_trees = IndexedDatabase(
 473         artifact_manager.get_temp_file(config.RCS_TREES_STORE),
 474         artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
 475         DB_OPEN_NEW, PrimedPickleSerializer(primer))
 476
 477   def start_file(self, cvs_file_items):
 478     # A map from cvs_rev_id to TextRecord instance:
 479     self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase())
 480
 481   def record_text(self, cvs_rev, log, text):
 482     if isinstance(cvs_rev.lod, Trunk):
 483       # On trunk, revisions are encountered in reverse order (1.<N>
 484       # ... 1.1) and deltas are inverted.  The first text that we see
 485       # is the fulltext for the HEAD revision.  After that, the text
 486       # corresponding to revision 1.N is the delta (1.<N+1> ->
 487       # 1.<N>)).  We have to invert the deltas here so that we can
 488       # read the revisions out in dependency order; that is, for
 489       # revision 1.1 we want the fulltext, and for revision 1.<N> we
 490       # want the delta (1.<N-1> -> 1.<N>).  This means that we can't
 491       # compute the delta for a revision until we see its logical
 492       # parent.  When we finally see revision 1.1 (which is recognized
 493       # because it doesn't have a parent), we can record the diff (1.1
 494       # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
 495
 496       if cvs_rev.next_id is None:
 497         # This is HEAD, as fulltext.  Initialize the RCSStream so
 498         # that we can compute deltas backwards in time.
 499         self._stream = RCSStream(text)
 500       else:
 501         # Any other trunk revision is a backward delta.  Apply the
 502         # delta to the RCSStream to mutate it to the contents of this
 503         # revision, and also to get the reverse delta, which we store
 504         # as the forward delta of our child revision.
 505         try:
 506           text = self._stream.invert_diff(text)
 507         except MalformedDeltaException, (msg):
 508           Log().error('Malformed RCS delta in %s, revision %s: %s'
 509                       % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev,
 510                          msg))
 511           raise RuntimeError
 512         text_record = DeltaTextRecord(cvs_rev.next_id, cvs_rev.id)
 513         self._writeout(text_record, text)
 514
 515       if cvs_rev.prev_id is None:
 516         # This is revision 1.1.  Write its fulltext:
 517         text_record = FullTextRecord(cvs_rev.id)
 518         self._writeout(text_record, self._stream.get_text())
 519
 520         # There will be no more trunk revisions delivered, so free the
 521         # RCSStream.
 522         del self._stream
 523
 524     else:
 525       # On branches, revisions are encountered in logical order
 526       # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
 527       # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
 528       # <BRANCH>.<N>).  That's what we need, so just store it.
 529
 530       # FIXME: It would be nice to avoid writing out branch deltas
 531       # when --trunk-only.  (They will be deleted when finish_file()
 532       # is called, but if the delta db is in an IndexedDatabase the
 533       # deletions won't actually recover any disk space.)
 534       text_record = DeltaTextRecord(cvs_rev.id, cvs_rev.prev_id)
 535       self._writeout(text_record, text)
 536
 537     return None
 538
 539   def _writeout(self, text_record, text):
 540     self.text_record_db.add(text_record)
 541     self._rcs_deltas[text_record.id] = text
 542
 543   def finish_file(self, cvs_file_items):
 544     """Finish processing of the current file.
 545
 546     Compute the initial text record refcounts, discard any records
 547     that are unneeded, and store the text records for the file to the
 548     _rcs_trees database."""
 549
 550     self.text_record_db.recompute_refcounts(cvs_file_items)
 551     self.text_record_db.free_unused()
 552     self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db
 553     del self.text_record_db
 554
 555   def finish(self):
 556     self._rcs_deltas.close()
 557     self._rcs_trees.close()
 558
 559
 560 class InternalRevisionExcluder(RevisionExcluder):
 561   """The RevisionExcluder used by InternalRevisionReader."""
 562
 563   def register_artifacts(self, which_pass):
 564     artifact_manager.register_temp_file_needed(
 565         config.RCS_TREES_STORE, which_pass
 566         )
 567     artifact_manager.register_temp_file_needed(
 568         config.RCS_TREES_INDEX_TABLE, which_pass
 569         )
 570     artifact_manager.register_temp_file(
 571         config.RCS_TREES_FILTERED_STORE, which_pass
 572         )
 573     artifact_manager.register_temp_file(
 574         config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass
 575         )
 576
 577   def start(self):
 578     self._tree_db = IndexedDatabase(
 579         artifact_manager.get_temp_file(config.RCS_TREES_STORE),
 580         artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
 581         DB_OPEN_READ)
 582     primer = (FullTextRecord, DeltaTextRecord)
 583     self._new_tree_db = IndexedDatabase(
 584         artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE),
 585         artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE),
 586         DB_OPEN_NEW, PrimedPickleSerializer(primer))
 587
 588   def process_file(self, cvs_file_items):
 589     text_record_db = self._tree_db[cvs_file_items.cvs_file.id]
 590     text_record_db.recompute_refcounts(cvs_file_items)
 591     text_record_db.free_unused()
 592     self._new_tree_db[cvs_file_items.cvs_file.id] = text_record_db
 593
 594   def finish(self):
 595     self._tree_db.close()
 596     self._new_tree_db.close()
 597
 598
 599 class _KeywordExpander:
 600   """A class whose instances provide substitutions for CVS keywords.
 601
 602   This class is used via its __call__() method, which should be called
 603   with a match object representing a match for a CVS keyword string.
 604   The method returns the replacement for the matched text.
 605
 606   The __call__() method works by calling the method with the same name
 607   as that of the CVS keyword (converted to lower case).
 608
 609   Instances of this class can be passed as the REPL argument to
 610   re.sub()."""
 611
 612   date_fmt_old = "%Y/%m/%d %H:%M:%S"    # CVS 1.11, rcs
 613   date_fmt_new = "%Y-%m-%d %H:%M:%S"    # CVS 1.12
 614
 615   date_fmt = date_fmt_new
 616
 617   @classmethod
 618   def use_old_date_format(klass):
 619       """Class method to ensure exact compatibility with CVS 1.11
 620       output.  Use this if you want to verify your conversion and you're
 621       using CVS 1.11."""
 622       klass.date_fmt = klass.date_fmt_old
 623
 624   def __init__(self, cvs_rev):
 625     self.cvs_rev = cvs_rev
 626
 627   def __call__(self, match):
 628     return '$%s: %s $' % \
 629            (match.group(1), getattr(self, match.group(1).lower())(),)
 630
 631   def author(self):
 632     return Ctx()._metadata_db[self.cvs_rev.metadata_id].original_author
 633
 634   def date(self):
 635     return time.strftime(self.date_fmt,
 636                          time.gmtime(self.cvs_rev.timestamp))
 637
 638   def header(self):
 639     return '%s %s %s %s Exp' % \
 640            (self.source(), self.cvs_rev.rev, self.date(), self.author())
 641
 642   def id(self):
 643     return '%s %s %s %s Exp' % \
 644            (self.rcsfile(), self.cvs_rev.rev, self.date(), self.author())
 645
 646   def locker(self):
 647     # Handle kvl like kv, as a converted repo is supposed to have no
 648     # locks.
 649     return ''
 650
 651   def log(self):
 652     # Would need some special handling.
 653     return 'not supported by cvs2svn'
 654
 655   def name(self):
 656     # Cannot work, as just creating a new symbol does not check out
 657     # the revision again.
 658     return 'not supported by cvs2svn'
 659
 660   def rcsfile(self):
 661     return self.cvs_rev.cvs_file.basename + ",v"
 662
 663   def revision(self):
 664     return self.cvs_rev.rev
 665
 666   def source(self):
 667     project = self.cvs_rev.cvs_file.project
 668     return project.cvs_repository_root + '/' + project.cvs_module + \
 669         self.cvs_rev.cvs_file.cvs_path + ",v"
 670
 671   def state(self):
 672     # We check out only live revisions.
 673     return 'Exp'
 674
 675
 676 class InternalRevisionReader(RevisionReader):
 677   """A RevisionReader that reads the contents from an own delta store."""
 678
 679   _kws = 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State'
 680   _kw_re = re.compile(r'\$(' + _kws + r'):[^$\n]*\$')
 681   _kwo_re = re.compile(r'\$(' + _kws + r')(:[^$\n]*)?\$')
 682
 683   def __init__(self, compress):
 684     self._compress = compress
 685
 686   def register_artifacts(self, which_pass):
 687     artifact_manager.register_temp_file(config.CVS_CHECKOUT_DB, which_pass)
 688     artifact_manager.register_temp_file_needed(
 689         config.RCS_DELTAS_STORE, which_pass
 690         )
 691     artifact_manager.register_temp_file_needed(
 692         config.RCS_DELTAS_INDEX_TABLE, which_pass
 693         )
 694     artifact_manager.register_temp_file_needed(
 695         config.RCS_TREES_FILTERED_STORE, which_pass
 696         )
 697     artifact_manager.register_temp_file_needed(
 698         config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass
 699         )
 700
 701   def start(self):
 702     self._delta_db = IndexedDatabase(
 703         artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
 704         artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
 705         DB_OPEN_READ)
 706     self._delta_db.__delitem__ = lambda id: None
 707     self._tree_db = IndexedDatabase(
 708         artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE),
 709         artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE),
 710         DB_OPEN_READ)
 711     ser = MarshalSerializer()
 712     if self._compress:
 713       ser = CompressingSerializer(ser)
 714     self._co_db = Database(
 715         artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB), DB_OPEN_NEW,
 716         ser)
 717
 718     # The set of CVSFile instances whose TextRecords have already been
 719     # read:
 720     self._loaded_files = set()
 721
 722     # A map { CVSFILE : _FileTree } for files that currently have live
 723     # revisions:
 724     self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db)
 725
 726   def _get_text_record(self, cvs_rev):
 727     """Return the TextRecord instance for CVS_REV.
 728
 729     If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
 730     do so now."""
 731
 732     if cvs_rev.cvs_file not in self._loaded_files:
 733       for text_record in self._tree_db[cvs_rev.cvs_file.id].itervalues():
 734         self._text_record_db.add(text_record)
 735       self._loaded_files.add(cvs_rev.cvs_file)
 736
 737     return self._text_record_db[cvs_rev.id]
 738
 739   def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False):
 740     """Check out the text for revision C_REV from the repository.
 741
 742     Return the text wrapped in a readable file object.  If
 743     SUPPRESS_KEYWORD_SUBSTITUTION is True, any RCS keywords will be
 744     _un_expanded prior to returning the file content.  Note that $Log$
 745     never actually generates a log (which makes test 'requires_cvs()'
 746     fail).
 747
 748     Revisions may be requested in any order, but if they are not
 749     requested in dependency order the checkout database will become
 750     very large.  Revisions may be skipped.  Each revision may be
 751     requested only once."""
 752
 753     try:
 754       text = self._get_text_record(cvs_rev).checkout(self._text_record_db)
 755     except MalformedDeltaException, (msg):
 756       raise FatalError('Malformed RCS delta in %s, revision %s: %s'
 757                        % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg))
 758     if cvs_rev.cvs_file.mode != 'b' and cvs_rev.cvs_file.mode != 'o':
 759       if suppress_keyword_substitution or cvs_rev.cvs_file.mode == 'k':
 760         text = self._kw_re.sub(r'$\1$', text)
 761       else:
 762         text = self._kwo_re.sub(_KeywordExpander(cvs_rev), text)
 763
 764     return StringIO(text)
 765
 766   def finish(self):
 767     self._text_record_db.log_leftovers()
 768
 769     del self._text_record_db
 770     self._delta_db.close()
 771     self._tree_db.close()
 772     self._co_db.close()
 773