cvs2svn_lib/checkout_internal.py

   1 # (Be in -*- python -*- mode.)
   2 #
   3 # ====================================================================
   4 # Copyright (c) 2007-2009 CollabNet.  All rights reserved.
   5 #
   6 # This software is licensed as described in the file COPYING, which
   7 # you should have received as part of this distribution.  The terms
   8 # are also available at http://subversion.tigris.org/license-1.html.
   9 # If newer versions of this license are posted there, you may use a
  10 # newer version instead, at your option.
  11 #
  12 # This software consists of voluntary contributions made by many
  13 # individuals.  For exact contribution history, see the revision
  14 # history and logs, available at http://cvs2svn.tigris.org/.
  15 # ====================================================================
  16
  17 """This module contains classes that implement the --use-internal-co option.
  18
  19 The idea is to patch up the revisions' contents incrementally, thus
  20 avoiding the huge number of process spawns and the O(n^2) overhead of
  21 using 'co' and 'cvs'.
  22
  23 InternalRevisionCollector saves the RCS deltas and RCS revision trees
  24 to databases.  Notably, deltas from the trunk need to be reversed, as
  25 CVS stores them so they apply from HEAD backwards.
  26
  27 InternalRevisionReader produces the revisions' contents on demand.  To
  28 generate the text for a typical revision, we need the revision's delta
  29 text plus the fulltext of the previous revision.  Therefore, we
  30 maintain a checkout database containing a copy of the fulltext of any
  31 revision for which subsequent revisions still need to be retrieved.
  32 It is crucial to remove text from this database as soon as it is no
  33 longer needed, to prevent it from growing enormous.
  34
  35 There are two reasons that the text from a revision can be needed: (1)
  36 because the revision itself still needs to be output to a dumpfile;
  37 (2) because another revision needs it as the base of its delta.  We
  38 maintain a reference count for each revision, which includes *both*
  39 possibilities.  The first time a revision's text is needed, it is
  40 generated by applying the revision's deltatext to the previous
  41 revision's fulltext, and the resulting fulltext is stored in the
  42 checkout database.  Each time a revision's fulltext is retrieved, its
  43 reference count is decremented.  When the reference count goes to
  44 zero, then the fulltext is deleted from the checkout database.
  45
  46 The administrative data for managing this consists of one TextRecord
  47 entry for each revision.  Each TextRecord has an id, which is the same
  48 id as used for the corresponding CVSRevision instance.  It also
  49 maintains a count of the times it is expected to be retrieved.
  50 TextRecords come in several varieties:
  51
  52 FullTextRecord -- Used for revisions whose fulltext is contained
  53     directly in the RCS file, and therefore available during
  54     CollectRevsPass (i.e., typically revision 1.1 of each file).
  55
  56 DeltaTextRecord -- Used for revisions that are defined via a delta
  57     relative to some other TextRecord.  These records record the id of
  58     the TextRecord that holds the base text against which the delta is
  59     defined.  When the text for a DeltaTextRecord is retrieved, the
  60     DeltaTextRecord instance is deleted and a CheckedOutTextRecord
  61     instance is created to take its place.
  62
  63 CheckedOutTextRecord -- Used during OutputPass for a revision that
  64     started out as a DeltaTextRecord, but has already been retrieved
  65     (and therefore its fulltext is stored in the checkout database).
  66
  67 While a file is being processed during FilterSymbolsPass, the fulltext
  68 and deltas are stored to the delta database, and TextRecord instances
  69 are created to keep track of things.  The reference counts are all
  70 initialized: each record referred to by a delta has its refcount
  71 incremented, and each record that corresponds to a non-delete
  72 CVSRevision is incremented.  After that, any records with refcount==0
  73 are removed.  When one record is removed, that can cause another
  74 record's reference count to go to zero and be removed too,
  75 recursively.  When a TextRecord is deleted at this stage, its
  76 deltatext is also deleted from the delta database."""
  77
  78
  79 from cStringIO import StringIO
  80 import re
  81 import time
  82
  83 from cvs2svn_lib import config
  84 from cvs2svn_lib.common import DB_OPEN_NEW
  85 from cvs2svn_lib.common import DB_OPEN_READ
  86 from cvs2svn_lib.common import warning_prefix
  87 from cvs2svn_lib.common import FatalError
  88 from cvs2svn_lib.common import InternalError
  89 from cvs2svn_lib.common import canonicalize_eol
  90 from cvs2svn_lib.common import is_trunk_revision
  91 from cvs2svn_lib.context import Ctx
  92 from cvs2svn_lib.log import Log
  93 from cvs2svn_lib.artifact_manager import artifact_manager
  94 from cvs2svn_lib.symbol import Trunk
  95 from cvs2svn_lib.cvs_item import CVSRevisionModification
  96 from cvs2svn_lib.database import Database
  97 from cvs2svn_lib.database import IndexedDatabase
  98 from cvs2svn_lib.rcs_stream import RCSStream
  99 from cvs2svn_lib.rcs_stream import MalformedDeltaException
 100 from cvs2svn_lib.revision_manager import RevisionCollector
 101 from cvs2svn_lib.revision_manager import RevisionReader
 102 from cvs2svn_lib.serializer import MarshalSerializer
 103 from cvs2svn_lib.serializer import CompressingSerializer
 104 from cvs2svn_lib.serializer import PrimedPickleSerializer
 105 from cvs2svn_lib.apple_single_filter import get_maybe_apple_single
 106
 107 import cvs2svn_rcsparse
 108
 109
 110 class TextRecord(object):
 111   """Bookkeeping data for the text of a single CVSRevision."""
 112
 113   __slots__ = ['id', 'refcount']
 114
 115   def __init__(self, id):
 116     # The cvs_rev_id of the revision whose text this is.
 117     self.id = id
 118
 119     # The number of times that the text of this revision will be
 120     # retrieved.
 121     self.refcount = 0
 122
 123   def __getstate__(self):
 124     return (self.id, self.refcount,)
 125
 126   def __setstate__(self, state):
 127     (self.id, self.refcount,) = state
 128
 129   def increment_dependency_refcounts(self, text_record_db):
 130     """Increment the refcounts of any records that this one depends on."""
 131
 132     pass
 133
 134   def decrement_refcount(self, text_record_db):
 135     """Decrement the number of times our text still has to be checked out.
 136
 137     If the reference count goes to zero, call discard()."""
 138
 139     self.refcount -= 1
 140     if self.refcount == 0:
 141       text_record_db.discard(self.id)
 142
 143   def checkout(self, text_record_db):
 144     """Workhorse of the checkout process.
 145
 146     Return the text for this revision, decrement our reference count,
 147     and update the databases depending on whether there will be future
 148     checkouts."""
 149
 150     raise NotImplementedError()
 151
 152   def free(self, text_record_db):
 153     """This instance will never again be checked out; free it.
 154
 155     Also free any associated resources and decrement the refcounts of
 156     any other TextRecords that this one depends on."""
 157
 158     raise NotImplementedError()
 159
 160
 161 class FullTextRecord(TextRecord):
 162   __slots__ = []
 163
 164   def __getstate__(self):
 165     return (self.id, self.refcount,)
 166
 167   def __setstate__(self, state):
 168     (self.id, self.refcount,) = state
 169
 170   def checkout(self, text_record_db):
 171     text = text_record_db.delta_db[self.id]
 172     self.decrement_refcount(text_record_db)
 173     return text
 174
 175   def free(self, text_record_db):
 176     del text_record_db.delta_db[self.id]
 177
 178   def __str__(self):
 179     return 'FullTextRecord(%x, %d)' % (self.id, self.refcount,)
 180
 181
 182 class DeltaTextRecord(TextRecord):
 183   __slots__ = ['pred_id']
 184
 185   def __init__(self, id, pred_id):
 186     TextRecord.__init__(self, id)
 187
 188     # The cvs_rev_id of the revision relative to which this delta is
 189     # defined.
 190     self.pred_id = pred_id
 191
 192   def __getstate__(self):
 193     return (self.id, self.refcount, self.pred_id,)
 194
 195   def __setstate__(self, state):
 196     (self.id, self.refcount, self.pred_id,) = state
 197
 198   def increment_dependency_refcounts(self, text_record_db):
 199     text_record_db[self.pred_id].refcount += 1
 200
 201   def checkout(self, text_record_db):
 202     base_text = text_record_db[self.pred_id].checkout(text_record_db)
 203     co = RCSStream(base_text)
 204     delta_text = text_record_db.delta_db[self.id]
 205     co.apply_diff(delta_text)
 206     text = co.get_text()
 207     del co
 208     self.refcount -= 1
 209     if self.refcount == 0:
 210       # This text will never be needed again; just delete ourselves
 211       # without ever having stored the fulltext to the checkout
 212       # database:
 213       del text_record_db[self.id]
 214     else:
 215       # Store a new CheckedOutTextRecord in place of ourselves:
 216       text_record_db.checkout_db['%x' % self.id] = text
 217       new_text_record = CheckedOutTextRecord(self.id)
 218       new_text_record.refcount = self.refcount
 219       text_record_db.replace(new_text_record)
 220     return text
 221
 222   def free(self, text_record_db):
 223     del text_record_db.delta_db[self.id]
 224     text_record_db[self.pred_id].decrement_refcount(text_record_db)
 225
 226   def __str__(self):
 227     return 'DeltaTextRecord(%x -> %x, %d)' % (
 228         self.pred_id, self.id, self.refcount,
 229         )
 230
 231
 232 class CheckedOutTextRecord(TextRecord):
 233   __slots__ = []
 234
 235   def __getstate__(self):
 236     return (self.id, self.refcount,)
 237
 238   def __setstate__(self, state):
 239     (self.id, self.refcount,) = state
 240
 241   def checkout(self, text_record_db):
 242     text = text_record_db.checkout_db['%x' % self.id]
 243     self.decrement_refcount(text_record_db)
 244     return text
 245
 246   def free(self, text_record_db):
 247     del text_record_db.checkout_db['%x' % self.id]
 248
 249   def __str__(self):
 250     return 'CheckedOutTextRecord(%x, %d)' % (self.id, self.refcount,)
 251
 252
 253 class NullDatabase(object):
 254   """A do-nothing database that can be used with TextRecordDatabase.
 255
 256   Use this when you don't actually want to allow anything to be
 257   deleted."""
 258
 259   def __delitem__(self, id):
 260     pass
 261
 262
 263 class TextRecordDatabase:
 264   """Holds the TextRecord instances that are currently live.
 265
 266   During CollectRevsPass and FilterSymbolsPass, files are processed
 267   one by one and a new TextRecordDatabase instance is used for each
 268   file.  During OutputPass, a single TextRecordDatabase instance is
 269   used for the duration of OutputPass; individual records are added
 270   and removed when they are active."""
 271
 272   def __init__(self, delta_db, checkout_db):
 273     # A map { cvs_rev_id -> TextRecord }.
 274     self.text_records = {}
 275
 276     # A database-like object using cvs_rev_ids as keys and containing
 277     # fulltext/deltatext strings as values.  Its __getitem__() method
 278     # is used to retrieve deltas when they are needed, and its
 279     # __delitem__() method is used to delete deltas when they can be
 280     # freed.  The modifiability of the delta database varies from pass
 281     # to pass, so the object stored here varies as well:
 282     #
 283     # CollectRevsPass: a fully-functional IndexedDatabase.  This
 284     #     allows deltas that will not be needed to be deleted.
 285     #
 286     # FilterSymbolsPass: a NullDatabase.  The delta database cannot be
 287     #     modified during this pass, and we have no need to retrieve
 288     #     deltas, so we just use a dummy object here.
 289     #
 290     # OutputPass: a disabled IndexedDatabase.  During this pass we
 291     #     need to retrieve deltas, but we are not allowed to modify
 292     #     the delta database.  So we use an IndexedDatabase whose
 293     #     __del__() method has been disabled to do nothing.
 294     self.delta_db = delta_db
 295
 296     # A database-like object using cvs_rev_ids as keys and containing
 297     # fulltext strings as values.  This database is only set during
 298     # OutputPass.
 299     self.checkout_db = checkout_db
 300
 301     # If this is set to a list, then the list holds the ids of
 302     # text_records that have to be deleted; when discard() is called,
 303     # it adds the requested id to the list but does not delete it.  If
 304     # this member is set to None, then text_records are deleted
 305     # immediately when discard() is called.
 306     self.deferred_deletes = None
 307
 308   def __getstate__(self):
 309     return (self.text_records.values(),)
 310
 311   def __setstate__(self, state):
 312     (text_records,) = state
 313     self.text_records = {}
 314     for text_record in text_records:
 315       self.add(text_record)
 316     self.delta_db = NullDatabase()
 317     self.checkout_db = NullDatabase()
 318     self.deferred_deletes = None
 319
 320   def add(self, text_record):
 321     """Add TEXT_RECORD to our database.
 322
 323     There must not already be a record with the same id."""
 324
 325     assert not self.text_records.has_key(text_record.id)
 326
 327     self.text_records[text_record.id] = text_record
 328
 329   def __getitem__(self, id):
 330     return self.text_records[id]
 331
 332   def __delitem__(self, id):
 333     """Free the record with the specified ID."""
 334
 335     del self.text_records[id]
 336
 337   def replace(self, text_record):
 338     """Store TEXT_RECORD in place of the existing record with the same id.
 339
 340     Do not do anything with the old record."""
 341
 342     assert self.text_records.has_key(text_record.id)
 343     self.text_records[text_record.id] = text_record
 344
 345   def discard(self, *ids):
 346     """The text records with IDS are no longer needed; discard them.
 347
 348     This involves calling their free() methods and also removing them
 349     from SELF.
 350
 351     If SELF.deferred_deletes is not None, then the ids to be deleted
 352     are added to the list instead of deleted immediately.  This
 353     mechanism is to prevent a stack overflow from the avalanche of
 354     deletes that can result from deleting a long chain of revisions."""
 355
 356     if self.deferred_deletes is None:
 357       # This is an outer-level delete.
 358       self.deferred_deletes = list(ids)
 359       while self.deferred_deletes:
 360         id = self.deferred_deletes.pop()
 361         text_record = self[id]
 362         if text_record.refcount != 0:
 363           raise InternalError(
 364               'TextRecordDatabase.discard(%s) called with refcount = %d'
 365               % (text_record, text_record.refcount,)
 366               )
 367         # This call might cause other text_record ids to be added to
 368         # self.deferred_deletes:
 369         text_record.free(self)
 370         del self[id]
 371       self.deferred_deletes = None
 372     else:
 373       self.deferred_deletes.extend(ids)
 374
 375   def itervalues(self):
 376     return self.text_records.itervalues()
 377
 378   def recompute_refcounts(self, cvs_file_items):
 379     """Recompute the refcounts of the contained TextRecords.
 380
 381     Use CVS_FILE_ITEMS to determine which records will be needed by
 382     cvs2svn."""
 383
 384     # First clear all of the refcounts:
 385     for text_record in self.itervalues():
 386       text_record.refcount = 0
 387
 388     # Now increment the reference count of records that are needed as
 389     # the source of another record's deltas:
 390     for text_record in self.itervalues():
 391       text_record.increment_dependency_refcounts(self.text_records)
 392
 393     # Now increment the reference count of records that will be needed
 394     # by cvs2svn:
 395     for lod_items in cvs_file_items.iter_lods():
 396       for cvs_rev in lod_items.cvs_revisions:
 397         if isinstance(cvs_rev, CVSRevisionModification):
 398           self[cvs_rev.id].refcount += 1
 399
 400   def free_unused(self):
 401     """Free any TextRecords whose reference counts are zero."""
 402
 403     # The deletion of some of these text records might cause others to
 404     # be unused, in which case they will be deleted automatically.
 405     # But since the initially-unused records are not referred to by
 406     # any others, we don't have to be afraid that they will be deleted
 407     # before we get to them.  But it *is* crucial that we create the
 408     # whole unused list before starting the loop.
 409
 410     unused = [
 411         text_record.id
 412         for text_record in self.itervalues()
 413         if text_record.refcount == 0
 414         ]
 415
 416     self.discard(*unused)
 417
 418   def log_leftovers(self):
 419     """If any TextRecords still exist, log them."""
 420
 421     if self.text_records:
 422       Log().warn(
 423           "%s: internal problem: leftover revisions in the checkout cache:"
 424           % warning_prefix)
 425       for text_record in self.itervalues():
 426         Log().warn('    %s' % (text_record,))
 427
 428   def __repr__(self):
 429     """Debugging output of the current contents of the TextRecordDatabase."""
 430
 431     retval = ['TextRecordDatabase:']
 432     for text_record in self.itervalues():
 433       retval.append('    %s' % (text_record,))
 434     return '\n'.join(retval)
 435
 436
 437 class _Sink(cvs2svn_rcsparse.Sink):
 438   def __init__(self, revision_collector, cvs_file_items):
 439     self.revision_collector = revision_collector
 440     self.cvs_file_items = cvs_file_items
 441
 442     # A map {rev : base_rev} indicating that the text for rev is
 443     # stored in CVS as a delta relative to base_rev.
 444     self.base_revisions = {}
 445
 446     # The revision that is stored with its fulltext in CVS (usually
 447     # the oldest revision on trunk):
 448     self.head_revision = None
 449
 450     # The first logical revision on trunk (usually '1.1'):
 451     self.revision_1_1 = None
 452
 453     # Keep track of the revisions whose revision info has been seen so
 454     # far (to avoid repeated revision info blocks):
 455     self.revisions_seen = set()
 456
 457   def set_head_revision(self, revision):
 458     self.head_revision = revision
 459
 460   def define_revision(
 461         self, revision, timestamp, author, state, branches, next
 462         ):
 463     if next:
 464       self.base_revisions[next] = revision
 465     else:
 466       if is_trunk_revision(revision):
 467         self.revision_1_1 = revision
 468
 469     for branch in branches:
 470       self.base_revisions[branch] = revision
 471
 472   def set_revision_info(self, revision, log, text):
 473     if revision in self.revisions_seen:
 474       # One common form of CVS repository corruption is that the
 475       # Deltatext block for revision 1.1 appears twice.  CollectData
 476       # has already warned about this problem; here we can just ignore
 477       # it.
 478       return
 479     else:
 480       self.revisions_seen.add(revision)
 481
 482     cvs_rev_id = self.cvs_file_items.original_ids[revision]
 483     if is_trunk_revision(revision):
 484       # On trunk, revisions are encountered in reverse order (1.<N>
 485       # ... 1.1) and deltas are inverted.  The first text that we see
 486       # is the fulltext for the HEAD revision.  After that, the text
 487       # corresponding to revision 1.N is the delta (1.<N+1> ->
 488       # 1.<N>)).  We have to invert the deltas here so that we can
 489       # read the revisions out in dependency order; that is, for
 490       # revision 1.1 we want the fulltext, and for revision 1.<N> we
 491       # want the delta (1.<N-1> -> 1.<N>).  This means that we can't
 492       # compute the delta for a revision until we see its logical
 493       # parent.  When we finally see revision 1.1 (which is recognized
 494       # because it doesn't have a parent), we can record the diff (1.1
 495       # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
 496
 497       if revision == self.head_revision:
 498         # This is HEAD, as fulltext.  Initialize the RCSStream so
 499         # that we can compute deltas backwards in time.
 500         self._stream = RCSStream(text)
 501         self._stream_revision = revision
 502       else:
 503         # Any other trunk revision is a backward delta.  Apply the
 504         # delta to the RCSStream to mutate it to the contents of this
 505         # revision, and also to get the reverse delta, which we store
 506         # as the forward delta of our child revision.
 507         try:
 508           text = self._stream.invert_diff(text)
 509         except MalformedDeltaException, e:
 510           Log().error(
 511               'Malformed RCS delta in %s, revision %s: %s'
 512               % (self.cvs_file_items.cvs_file.filename, revision, e)
 513               )
 514           raise RuntimeError()
 515         text_record = DeltaTextRecord(
 516             self.cvs_file_items.original_ids[self._stream_revision],
 517             cvs_rev_id
 518             )
 519         self.revision_collector._writeout(text_record, text)
 520         self._stream_revision = revision
 521
 522       if revision == self.revision_1_1:
 523         # This is revision 1.1.  Write its fulltext:
 524         text_record = FullTextRecord(cvs_rev_id)
 525         self.revision_collector._writeout(
 526             text_record, self._stream.get_text()
 527             )
 528
 529         # There will be no more trunk revisions delivered, so free the
 530         # RCSStream.
 531         del self._stream
 532         del self._stream_revision
 533
 534     else:
 535       # On branches, revisions are encountered in logical order
 536       # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
 537       # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
 538       # <BRANCH>.<N>).  That's what we need, so just store it.
 539
 540       # FIXME: It would be nice to avoid writing out branch deltas
 541       # when --trunk-only.  (They will be deleted when finish_file()
 542       # is called, but if the delta db is in an IndexedDatabase the
 543       # deletions won't actually recover any disk space.)
 544       text_record = DeltaTextRecord(
 545           cvs_rev_id,
 546           self.cvs_file_items.original_ids[self.base_revisions[revision]]
 547           )
 548       self.revision_collector._writeout(text_record, text)
 549
 550     return None
 551
 552
 553 class InternalRevisionCollector(RevisionCollector):
 554   """The RevisionCollector used by InternalRevisionReader."""
 555
 556   def __init__(self, compress):
 557     RevisionCollector.__init__(self)
 558     self._compress = compress
 559
 560   def register_artifacts(self, which_pass):
 561     artifact_manager.register_temp_file(
 562         config.RCS_DELTAS_INDEX_TABLE, which_pass
 563         )
 564     artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass)
 565     artifact_manager.register_temp_file(
 566         config.RCS_TREES_INDEX_TABLE, which_pass
 567         )
 568     artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)
 569
 570   def start(self):
 571     ser = MarshalSerializer()
 572     if self._compress:
 573       ser = CompressingSerializer(ser)
 574     self._rcs_deltas = IndexedDatabase(
 575         artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
 576         artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
 577         DB_OPEN_NEW, ser
 578         )
 579     primer = (FullTextRecord, DeltaTextRecord)
 580     self._rcs_trees = IndexedDatabase(
 581         artifact_manager.get_temp_file(config.RCS_TREES_STORE),
 582         artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
 583         DB_OPEN_NEW, PrimedPickleSerializer(primer)
 584         )
 585
 586   def _writeout(self, text_record, text):
 587     self.text_record_db.add(text_record)
 588     self._rcs_deltas[text_record.id] = text
 589
 590   def process_file(self, cvs_file_items):
 591     """Read revision information for the file described by CVS_FILE_ITEMS.
 592
 593     Compute the text record refcounts, discard any records that are
 594     unneeded, and store the text records for the file to the
 595     _rcs_trees database."""
 596
 597     # A map from cvs_rev_id to TextRecord instance:
 598     self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase())
 599
 600     cvs2svn_rcsparse.parse(
 601         open(cvs_file_items.cvs_file.filename, 'rb'),
 602         _Sink(self, cvs_file_items),
 603         )
 604
 605     self.text_record_db.recompute_refcounts(cvs_file_items)
 606     self.text_record_db.free_unused()
 607     self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db
 608     del self.text_record_db
 609
 610   def finish(self):
 611     self._rcs_deltas.close()
 612     self._rcs_trees.close()
 613
 614
 615 class _KeywordExpander:
 616   """A class whose instances provide substitutions for CVS keywords.
 617
 618   This class is used via its __call__() method, which should be called
 619   with a match object representing a match for a CVS keyword string.
 620   The method returns the replacement for the matched text.
 621
 622   The __call__() method works by calling the method with the same name
 623   as that of the CVS keyword (converted to lower case).
 624
 625   Instances of this class can be passed as the REPL argument to
 626   re.sub()."""
 627
 628   date_fmt_old = "%Y/%m/%d %H:%M:%S"    # CVS 1.11, rcs
 629   date_fmt_new = "%Y-%m-%d %H:%M:%S"    # CVS 1.12
 630
 631   date_fmt = date_fmt_new
 632
 633   @classmethod
 634   def use_old_date_format(klass):
 635       """Class method to ensure exact compatibility with CVS 1.11
 636       output.  Use this if you want to verify your conversion and you're
 637       using CVS 1.11."""
 638       klass.date_fmt = klass.date_fmt_old
 639
 640   def __init__(self, cvs_rev):
 641     self.cvs_rev = cvs_rev
 642
 643   def __call__(self, match):
 644     return '$%s: %s $' % (
 645         match.group(1), getattr(self, match.group(1).lower())(),
 646         )
 647
 648   def author(self):
 649     return Ctx()._metadata_db[self.cvs_rev.metadata_id].original_author
 650
 651   def date(self):
 652     return time.strftime(self.date_fmt, time.gmtime(self.cvs_rev.timestamp))
 653
 654   def header(self):
 655     return '%s %s %s %s Exp' % (
 656         self.source(), self.cvs_rev.rev, self.date(), self.author(),
 657         )
 658
 659   def id(self):
 660     return '%s %s %s %s Exp' % (
 661         self.rcsfile(), self.cvs_rev.rev, self.date(), self.author(),
 662         )
 663
 664   def locker(self):
 665     # Handle kvl like kv, as a converted repo is supposed to have no
 666     # locks.
 667     return ''
 668
 669   def log(self):
 670     # Would need some special handling.
 671     return 'not supported by cvs2svn'
 672
 673   def name(self):
 674     # Cannot work, as just creating a new symbol does not check out
 675     # the revision again.
 676     return 'not supported by cvs2svn'
 677
 678   def rcsfile(self):
 679     return self.cvs_rev.cvs_file.basename + ",v"
 680
 681   def revision(self):
 682     return self.cvs_rev.rev
 683
 684   def source(self):
 685     project = self.cvs_rev.cvs_file.project
 686     return project.cvs_repository_root + '/' + project.cvs_module + \
 687         self.cvs_rev.cvs_file.cvs_path + ",v"
 688
 689   def state(self):
 690     # We check out only live revisions.
 691     return 'Exp'
 692
 693
 694 class InternalRevisionReader(RevisionReader):
 695   """A RevisionReader that reads the contents from an own delta store."""
 696
 697   _kws = 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State'
 698   _kw_re = re.compile(r'\$(' + _kws + r'):[^$\n]*\$')
 699   _kwo_re = re.compile(r'\$(' + _kws + r')(:[^$\n]*)?\$')
 700
 701   def __init__(self, compress):
 702     self._compress = compress
 703
 704   def register_artifacts(self, which_pass):
 705     artifact_manager.register_temp_file(config.CVS_CHECKOUT_DB, which_pass)
 706     artifact_manager.register_temp_file_needed(
 707         config.RCS_DELTAS_STORE, which_pass
 708         )
 709     artifact_manager.register_temp_file_needed(
 710         config.RCS_DELTAS_INDEX_TABLE, which_pass
 711         )
 712     artifact_manager.register_temp_file_needed(
 713         config.RCS_TREES_STORE, which_pass
 714         )
 715     artifact_manager.register_temp_file_needed(
 716         config.RCS_TREES_INDEX_TABLE, which_pass
 717         )
 718
 719   def start(self):
 720     self._delta_db = IndexedDatabase(
 721         artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
 722         artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
 723         DB_OPEN_READ)
 724     self._delta_db.__delitem__ = lambda id: None
 725     self._tree_db = IndexedDatabase(
 726         artifact_manager.get_temp_file(config.RCS_TREES_STORE),
 727         artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
 728         DB_OPEN_READ)
 729     ser = MarshalSerializer()
 730     if self._compress:
 731       ser = CompressingSerializer(ser)
 732     self._co_db = Database(
 733         artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB), DB_OPEN_NEW,
 734         ser)
 735
 736     # The set of CVSFile instances whose TextRecords have already been
 737     # read:
 738     self._loaded_files = set()
 739
 740     # A map { CVSFILE : _FileTree } for files that currently have live
 741     # revisions:
 742     self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db)
 743
 744   def _get_text_record(self, cvs_rev):
 745     """Return the TextRecord instance for CVS_REV.
 746
 747     If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
 748     do so now."""
 749
 750     if cvs_rev.cvs_file not in self._loaded_files:
 751       for text_record in self._tree_db[cvs_rev.cvs_file.id].itervalues():
 752         self._text_record_db.add(text_record)
 753       self._loaded_files.add(cvs_rev.cvs_file)
 754
 755     return self._text_record_db[cvs_rev.id]
 756
 757   def get_content(self, cvs_rev):
 758     """Check out the text for revision C_REV from the repository.
 759
 760     Return the text.  If CVS_REV has a property _keyword_handling, use
 761     it to determine how to handle RCS keywords in the output:
 762
 763         'collapsed' -- collapse keywords
 764
 765         'expanded' -- expand keywords
 766
 767         'untouched' -- output keywords in the form they are found in
 768             the RCS file
 769
 770     Note that $Log$ never actually generates a log (which makes test
 771     'requires_cvs()' fail).
 772
 773     Revisions may be requested in any order, but if they are not
 774     requested in dependency order the checkout database will become
 775     very large.  Revisions may be skipped.  Each revision may be
 776     requested only once."""
 777
 778     try:
 779       text = self._get_text_record(cvs_rev).checkout(self._text_record_db)
 780     except MalformedDeltaException, (msg):
 781       raise FatalError('Malformed RCS delta in %s, revision %s: %s'
 782                        % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg))
 783
 784     keyword_handling = cvs_rev.get_property('_keyword_handling')
 785
 786     if keyword_handling == 'untouched':
 787       # Leave keywords in the form that they were checked in.
 788       pass
 789     elif keyword_handling == 'collapsed':
 790       text = self._kw_re.sub(r'$\1$', text)
 791     elif keyword_handling == 'expanded':
 792       text = self._kwo_re.sub(_KeywordExpander(cvs_rev), text)
 793     else:
 794       raise FatalError(
 795           'Undefined _keyword_handling property (%r) for %s'
 796           % (keyword_handling, cvs_rev,)
 797           )
 798
 799     if Ctx().decode_apple_single:
 800       # Insert a filter to decode any files that are in AppleSingle
 801       # format:
 802       text = get_maybe_apple_single(text)
 803
 804     eol_fix = cvs_rev.get_property('_eol_fix')
 805     if eol_fix:
 806       text = canonicalize_eol(text, eol_fix)
 807
 808     return text
 809
 810   def finish(self):
 811     self._text_record_db.log_leftovers()
 812
 813     del self._text_record_db
 814     self._delta_db.close()
 815     self._tree_db.close()
 816     self._co_db.close()
 817