cvs2svn_lib/checkout_internal.py

   1 # (Be in -*- python -*- mode.)
   2 #
   3 # ====================================================================
   4 # Copyright (c) 2007-2009 CollabNet.  All rights reserved.
   5 #
   6 # This software is licensed as described in the file COPYING, which
   7 # you should have received as part of this distribution.  The terms
   8 # are also available at http://subversion.tigris.org/license-1.html.
   9 # If newer versions of this license are posted there, you may use a
  10 # newer version instead, at your option.
  11 #
  12 # This software consists of voluntary contributions made by many
  13 # individuals.  For exact contribution history, see the revision
  14 # history and logs, available at http://cvs2svn.tigris.org/.
  15 # ====================================================================
  16
  17 """This module contains classes that implement the --use-internal-co option.
  18
  19 The idea is to patch up the revisions' contents incrementally, thus
  20 avoiding the huge number of process spawns and the O(n^2) overhead of
  21 using 'co' and 'cvs'.
  22
  23 InternalRevisionCollector saves the RCS deltas and RCS revision trees
  24 to databases.  Notably, deltas from the trunk need to be reversed, as
  25 CVS stores them so they apply from HEAD backwards.
  26
  27 InternalRevisionReader produces the revisions' contents on demand.  To
  28 generate the text for a typical revision, we need the revision's delta
  29 text plus the fulltext of the previous revision.  Therefore, we
  30 maintain a checkout database containing a copy of the fulltext of any
  31 revision for which subsequent revisions still need to be retrieved.
  32 It is crucial to remove text from this database as soon as it is no
  33 longer needed, to prevent it from growing enormous.
  34
  35 There are two reasons that the text from a revision can be needed: (1)
  36 because the revision itself still needs to be output to a dumpfile;
  37 (2) because another revision needs it as the base of its delta.  We
  38 maintain a reference count for each revision, which includes *both*
  39 possibilities.  The first time a revision's text is needed, it is
  40 generated by applying the revision's deltatext to the previous
  41 revision's fulltext, and the resulting fulltext is stored in the
  42 checkout database.  Each time a revision's fulltext is retrieved, its
  43 reference count is decremented.  When the reference count goes to
  44 zero, then the fulltext is deleted from the checkout database.
  45
  46 The administrative data for managing this consists of one TextRecord
  47 entry for each revision.  Each TextRecord has an id, which is the same
  48 id as used for the corresponding CVSRevision instance.  It also
  49 maintains a count of the times it is expected to be retrieved.
  50 TextRecords come in several varieties:
  51
  52 FullTextRecord -- Used for revisions whose fulltext is contained
  53     directly in the RCS file, and therefore available during
  54     CollectRevsPass (i.e., typically revision 1.1 of each file).
  55
  56 DeltaTextRecord -- Used for revisions that are defined via a delta
  57     relative to some other TextRecord.  These records record the id of
  58     the TextRecord that holds the base text against which the delta is
  59     defined.  When the text for a DeltaTextRecord is retrieved, the
  60     DeltaTextRecord instance is deleted and a CheckedOutTextRecord
  61     instance is created to take its place.
  62
  63 CheckedOutTextRecord -- Used during OutputPass for a revision that
  64     started out as a DeltaTextRecord, but has already been retrieved
  65     (and therefore its fulltext is stored in the checkout database).
  66
  67 While a file is being processed during CollectRevsPass, the fulltext
  68 and deltas are stored to the delta database, and TextRecord instances
  69 are created to keep track of things.  The reference counts are all
  70 initialized to zero.
  71
  72 After CollectRevsPass has done any preliminary tree mangling, its
  73 _FileDataCollector.parse_completed(), method calls
  74 RevisionRecorder.finish_file(), passing it the CVSFileItems instance
  75 that describes the revisions in the file.  At this point the reference
  76 counts for the file's TextRecords are updated: each record referred to
  77 by a delta has its refcount incremented, and each record that
  78 corresponds to a non-delete CVSRevision is incremented.  After that,
  79 any records with refcount==0 are removed.  When one record is removed,
  80 that can cause another record's reference count to go to zero and be
  81 removed too, recursively.  When a TextRecord is deleted at this stage,
  82 its deltatext is also deleted from the delta database.
  83
  84 In FilterSymbolsPass, the exact same procedure (described in the
  85 previous paragraph) is repeated, but this time using the CVSFileItems
  86 after it has been updated for excluded symbols, symbol
  87 preferred-parent grafting, etc."""
  88
  89
  90 from cStringIO import StringIO
  91 import re
  92 import time
  93
  94 from cvs2svn_lib import config
  95 from cvs2svn_lib.common import DB_OPEN_NEW
  96 from cvs2svn_lib.common import DB_OPEN_READ
  97 from cvs2svn_lib.common import warning_prefix
  98 from cvs2svn_lib.common import FatalError
  99 from cvs2svn_lib.common import InternalError
 100 from cvs2svn_lib.common import is_trunk_revision
 101 from cvs2svn_lib.context import Ctx
 102 from cvs2svn_lib.log import Log
 103 from cvs2svn_lib.artifact_manager import artifact_manager
 104 from cvs2svn_lib.symbol import Trunk
 105 from cvs2svn_lib.cvs_item import CVSRevisionModification
 106 from cvs2svn_lib.database import Database
 107 from cvs2svn_lib.database import IndexedDatabase
 108 from cvs2svn_lib.rcs_stream import RCSStream
 109 from cvs2svn_lib.rcs_stream import MalformedDeltaException
 110 from cvs2svn_lib.revision_manager import RevisionCollector
 111 from cvs2svn_lib.revision_manager import RevisionReader
 112 from cvs2svn_lib.serializer import MarshalSerializer
 113 from cvs2svn_lib.serializer import CompressingSerializer
 114 from cvs2svn_lib.serializer import PrimedPickleSerializer
 115
 116 import cvs2svn_rcsparse
 117
 118
 119 class TextRecord(object):
 120   """Bookkeeping data for the text of a single CVSRevision."""
 121
 122   __slots__ = ['id', 'refcount']
 123
 124   def __init__(self, id):
 125     # The cvs_rev_id of the revision whose text this is.
 126     self.id = id
 127
 128     # The number of times that the text of this revision will be
 129     # retrieved.
 130     self.refcount = 0
 131
 132   def __getstate__(self):
 133     return (self.id, self.refcount,)
 134
 135   def __setstate__(self, state):
 136     (self.id, self.refcount,) = state
 137
 138   def increment_dependency_refcounts(self, text_record_db):
 139     """Increment the refcounts of any records that this one depends on."""
 140
 141     pass
 142
 143   def decrement_refcount(self, text_record_db):
 144     """Decrement the number of times our text still has to be checked out.
 145
 146     If the reference count goes to zero, call discard()."""
 147
 148     self.refcount -= 1
 149     if self.refcount == 0:
 150       text_record_db.discard(self.id)
 151
 152   def checkout(self, text_record_db):
 153     """Workhorse of the checkout process.
 154
 155     Return the text for this revision, decrement our reference count,
 156     and update the databases depending on whether there will be future
 157     checkouts."""
 158
 159     raise NotImplementedError()
 160
 161   def free(self, text_record_db):
 162     """This instance will never again be checked out; free it.
 163
 164     Also free any associated resources and decrement the refcounts of
 165     any other TextRecords that this one depends on."""
 166
 167     raise NotImplementedError()
 168
 169
 170 class FullTextRecord(TextRecord):
 171   __slots__ = []
 172
 173   def __getstate__(self):
 174     return (self.id, self.refcount,)
 175
 176   def __setstate__(self, state):
 177     (self.id, self.refcount,) = state
 178
 179   def checkout(self, text_record_db):
 180     text = text_record_db.delta_db[self.id]
 181     self.decrement_refcount(text_record_db)
 182     return text
 183
 184   def free(self, text_record_db):
 185     del text_record_db.delta_db[self.id]
 186
 187   def __str__(self):
 188     return 'FullTextRecord(%x, %d)' % (self.id, self.refcount,)
 189
 190
 191 class DeltaTextRecord(TextRecord):
 192   __slots__ = ['pred_id']
 193
 194   def __init__(self, id, pred_id):
 195     TextRecord.__init__(self, id)
 196
 197     # The cvs_rev_id of the revision relative to which this delta is
 198     # defined.
 199     self.pred_id = pred_id
 200
 201   def __getstate__(self):
 202     return (self.id, self.refcount, self.pred_id,)
 203
 204   def __setstate__(self, state):
 205     (self.id, self.refcount, self.pred_id,) = state
 206
 207   def increment_dependency_refcounts(self, text_record_db):
 208     text_record_db[self.pred_id].refcount += 1
 209
 210   def checkout(self, text_record_db):
 211     base_text = text_record_db[self.pred_id].checkout(text_record_db)
 212     co = RCSStream(base_text)
 213     delta_text = text_record_db.delta_db[self.id]
 214     co.apply_diff(delta_text)
 215     text = co.get_text()
 216     del co
 217     self.refcount -= 1
 218     if self.refcount == 0:
 219       # This text will never be needed again; just delete ourselves
 220       # without ever having stored the fulltext to the checkout
 221       # database:
 222       del text_record_db[self.id]
 223     else:
 224       # Store a new CheckedOutTextRecord in place of ourselves:
 225       text_record_db.checkout_db['%x' % self.id] = text
 226       new_text_record = CheckedOutTextRecord(self.id)
 227       new_text_record.refcount = self.refcount
 228       text_record_db.replace(new_text_record)
 229     return text
 230
 231   def free(self, text_record_db):
 232     del text_record_db.delta_db[self.id]
 233     text_record_db[self.pred_id].decrement_refcount(text_record_db)
 234
 235   def __str__(self):
 236     return 'DeltaTextRecord(%x -> %x, %d)' \
 237            % (self.pred_id, self.id, self.refcount,)
 238
 239
 240 class CheckedOutTextRecord(TextRecord):
 241   __slots__ = []
 242
 243   def __getstate__(self):
 244     return (self.id, self.refcount,)
 245
 246   def __setstate__(self, state):
 247     (self.id, self.refcount,) = state
 248
 249   def checkout(self, text_record_db):
 250     text = text_record_db.checkout_db['%x' % self.id]
 251     self.decrement_refcount(text_record_db)
 252     return text
 253
 254   def free(self, text_record_db):
 255     del text_record_db.checkout_db['%x' % self.id]
 256
 257   def __str__(self):
 258     return 'CheckedOutTextRecord(%x, %d)' % (self.id, self.refcount,)
 259
 260
 261 class NullDatabase(object):
 262   """A do-nothing database that can be used with TextRecordDatabase.
 263
 264   Use this when you don't actually want to allow anything to be
 265   deleted."""
 266
 267   def __delitem__(self, id):
 268     pass
 269
 270
 271 class TextRecordDatabase:
 272   """Holds the TextRecord instances that are currently live.
 273
 274   During CollectRevsPass and FilterSymbolsPass, files are processed
 275   one by one and a new TextRecordDatabase instance is used for each
 276   file.  During OutputPass, a single TextRecordDatabase instance is
 277   used for the duration of OutputPass; individual records are added
 278   and removed when they are active."""
 279
 280   def __init__(self, delta_db, checkout_db):
 281     # A map { cvs_rev_id -> TextRecord }.
 282     self.text_records = {}
 283
 284     # A database-like object using cvs_rev_ids as keys and containing
 285     # fulltext/deltatext strings as values.  Its __getitem__() method
 286     # is used to retrieve deltas when they are needed, and its
 287     # __delitem__() method is used to delete deltas when they can be
 288     # freed.  The modifiability of the delta database varies from pass
 289     # to pass, so the object stored here varies as well:
 290     #
 291     # CollectRevsPass: a fully-functional IndexedDatabase.  This
 292     #     allows deltas that will not be needed to be deleted.
 293     #
 294     # FilterSymbolsPass: a NullDatabase.  The delta database cannot be
 295     #     modified during this pass, and we have no need to retrieve
 296     #     deltas, so we just use a dummy object here.
 297     #
 298     # OutputPass: a disabled IndexedDatabase.  During this pass we
 299     #     need to retrieve deltas, but we are not allowed to modify
 300     #     the delta database.  So we use an IndexedDatabase whose
 301     #     __del__() method has been disabled to do nothing.
 302     self.delta_db = delta_db
 303
 304     # A database-like object using cvs_rev_ids as keys and containing
 305     # fulltext strings as values.  This database is only set during
 306     # OutputPass.
 307     self.checkout_db = checkout_db
 308
 309     # If this is set to a list, then the list holds the ids of
 310     # text_records that have to be deleted; when discard() is called,
 311     # it adds the requested id to the list but does not delete it.  If
 312     # this member is set to None, then text_records are deleted
 313     # immediately when discard() is called.
 314     self.deferred_deletes = None
 315
 316   def __getstate__(self):
 317     return (self.text_records.values(),)
 318
 319   def __setstate__(self, state):
 320     (text_records,) = state
 321     self.text_records = {}
 322     for text_record in text_records:
 323       self.add(text_record)
 324     self.delta_db = NullDatabase()
 325     self.checkout_db = NullDatabase()
 326     self.deferred_deletes = None
 327
 328   def add(self, text_record):
 329     """Add TEXT_RECORD to our database.
 330
 331     There must not already be a record with the same id."""
 332
 333     assert not self.text_records.has_key(text_record.id)
 334
 335     self.text_records[text_record.id] = text_record
 336
 337   def __getitem__(self, id):
 338     return self.text_records[id]
 339
 340   def __delitem__(self, id):
 341     """Free the record with the specified ID."""
 342
 343     del self.text_records[id]
 344
 345   def replace(self, text_record):
 346     """Store TEXT_RECORD in place of the existing record with the same id.
 347
 348     Do not do anything with the old record."""
 349
 350     assert self.text_records.has_key(text_record.id)
 351     self.text_records[text_record.id] = text_record
 352
 353   def discard(self, *ids):
 354     """The text records with IDS are no longer needed; discard them.
 355
 356     This involves calling their free() methods and also removing them
 357     from SELF.
 358
 359     If SELF.deferred_deletes is not None, then the ids to be deleted
 360     are added to the list instead of deleted immediately.  This
 361     mechanism is to prevent a stack overflow from the avalanche of
 362     deletes that can result from deleting a long chain of revisions."""
 363
 364     if self.deferred_deletes is None:
 365       # This is an outer-level delete.
 366       self.deferred_deletes = list(ids)
 367       while self.deferred_deletes:
 368         id = self.deferred_deletes.pop()
 369         text_record = self[id]
 370         if text_record.refcount != 0:
 371           raise InternalError(
 372               'TextRecordDatabase.discard(%s) called with refcount = %d'
 373               % (text_record, text_record.refcount,)
 374               )
 375         # This call might cause other text_record ids to be added to
 376         # self.deferred_deletes:
 377         text_record.free(self)
 378         del self[id]
 379       self.deferred_deletes = None
 380     else:
 381       self.deferred_deletes.extend(ids)
 382
 383   def itervalues(self):
 384     return self.text_records.itervalues()
 385
 386   def recompute_refcounts(self, cvs_file_items):
 387     """Recompute the refcounts of the contained TextRecords.
 388
 389     Use CVS_FILE_ITEMS to determine which records will be needed by
 390     cvs2svn."""
 391
 392     # First clear all of the refcounts:
 393     for text_record in self.itervalues():
 394       text_record.refcount = 0
 395
 396     # Now increment the reference count of records that are needed as
 397     # the source of another record's deltas:
 398     for text_record in self.itervalues():
 399       text_record.increment_dependency_refcounts(self.text_records)
 400
 401     # Now increment the reference count of records that will be needed
 402     # by cvs2svn:
 403     for lod_items in cvs_file_items.iter_lods():
 404       for cvs_rev in lod_items.cvs_revisions:
 405         if isinstance(cvs_rev, CVSRevisionModification):
 406           self[cvs_rev.id].refcount += 1
 407
 408   def free_unused(self):
 409     """Free any TextRecords whose reference counts are zero."""
 410
 411     # The deletion of some of these text records might cause others to
 412     # be unused, in which case they will be deleted automatically.
 413     # But since the initially-unused records are not referred to by
 414     # any others, we don't have to be afraid that they will be deleted
 415     # before we get to them.  But it *is* crucial that we create the
 416     # whole unused list before starting the loop.
 417
 418     unused = [
 419         text_record.id
 420         for text_record in self.itervalues()
 421         if text_record.refcount == 0
 422         ]
 423
 424     self.discard(*unused)
 425
 426   def log_leftovers(self):
 427     """If any TextRecords still exist, log them."""
 428
 429     if self.text_records:
 430       Log().warn(
 431           "%s: internal problem: leftover revisions in the checkout cache:"
 432           % warning_prefix)
 433       for text_record in self.itervalues():
 434         Log().warn('    %s' % (text_record,))
 435
 436   def __repr__(self):
 437     """Debugging output of the current contents of the TextRecordDatabase."""
 438
 439     retval = ['TextRecordDatabase:']
 440     for text_record in self.itervalues():
 441       retval.append('    %s' % (text_record,))
 442     return '\n'.join(retval)
 443
 444
 445 class _Sink(cvs2svn_rcsparse.Sink):
 446   def __init__(self, revision_recorder, cvs_file_items):
 447     self.revision_recorder = revision_recorder
 448     self.cvs_file_items = cvs_file_items
 449
 450     # A map {rev : base_rev} indicating that the text for rev is
 451     # stored in CVS as a delta relative to base_rev.
 452     self.base_revisions = {}
 453
 454     # The revision that is stored with its fulltext in CVS (usually
 455     # the oldest revision on trunk):
 456     self.head_revision = None
 457
 458     # The first logical revision on trunk (usually '1.1'):
 459     self.revision_1_1 = None
 460
 461     # Keep track of the revisions whose revision info has been seen so
 462     # far (to avoid repeated revision info blocks):
 463     self.revisions_seen = set()
 464
 465   def set_head_revision(self, revision):
 466     self.head_revision = revision
 467
 468   def define_revision(
 469         self, revision, timestamp, author, state, branches, next
 470         ):
 471     if next:
 472       self.base_revisions[next] = revision
 473     else:
 474       if is_trunk_revision(revision):
 475         self.revision_1_1 = revision
 476
 477     for branch in branches:
 478       self.base_revisions[branch] = revision
 479
 480   def set_revision_info(self, revision, log, text):
 481     if revision in self.revisions_seen:
 482       # One common form of CVS repository corruption is that the
 483       # Deltatext block for revision 1.1 appears twice.  CollectData
 484       # has already warned about this problem; here we can just ignore
 485       # it.
 486       return
 487     else:
 488       self.revisions_seen.add(revision)
 489
 490     cvs_rev_id = self.cvs_file_items.original_ids[revision]
 491     if is_trunk_revision(revision):
 492       # On trunk, revisions are encountered in reverse order (1.<N>
 493       # ... 1.1) and deltas are inverted.  The first text that we see
 494       # is the fulltext for the HEAD revision.  After that, the text
 495       # corresponding to revision 1.N is the delta (1.<N+1> ->
 496       # 1.<N>)).  We have to invert the deltas here so that we can
 497       # read the revisions out in dependency order; that is, for
 498       # revision 1.1 we want the fulltext, and for revision 1.<N> we
 499       # want the delta (1.<N-1> -> 1.<N>).  This means that we can't
 500       # compute the delta for a revision until we see its logical
 501       # parent.  When we finally see revision 1.1 (which is recognized
 502       # because it doesn't have a parent), we can record the diff (1.1
 503       # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
 504
 505       if revision == self.head_revision:
 506         # This is HEAD, as fulltext.  Initialize the RCSStream so
 507         # that we can compute deltas backwards in time.
 508         self._stream = RCSStream(text)
 509         self._stream_revision = revision
 510       else:
 511         # Any other trunk revision is a backward delta.  Apply the
 512         # delta to the RCSStream to mutate it to the contents of this
 513         # revision, and also to get the reverse delta, which we store
 514         # as the forward delta of our child revision.
 515         try:
 516           text = self._stream.invert_diff(text)
 517         except MalformedDeltaException, e:
 518           Log().error(
 519               'Malformed RCS delta in %s, revision %s: %s'
 520               % (self.cvs_file_items.cvs_file.filename, revision, e)
 521               )
 522           raise RuntimeError()
 523         text_record = DeltaTextRecord(
 524             self.cvs_file_items.original_ids[self._stream_revision],
 525             cvs_rev_id
 526             )
 527         self.revision_recorder._writeout(text_record, text)
 528         self._stream_revision = revision
 529
 530       if revision == self.revision_1_1:
 531         # This is revision 1.1.  Write its fulltext:
 532         text_record = FullTextRecord(cvs_rev_id)
 533         self.revision_recorder._writeout(text_record, self._stream.get_text())
 534
 535         # There will be no more trunk revisions delivered, so free the
 536         # RCSStream.
 537         del self._stream
 538         del self._stream_revision
 539
 540     else:
 541       # On branches, revisions are encountered in logical order
 542       # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
 543       # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
 544       # <BRANCH>.<N>).  That's what we need, so just store it.
 545
 546       # FIXME: It would be nice to avoid writing out branch deltas
 547       # when --trunk-only.  (They will be deleted when finish_file()
 548       # is called, but if the delta db is in an IndexedDatabase the
 549       # deletions won't actually recover any disk space.)
 550       text_record = DeltaTextRecord(
 551           cvs_rev_id,
 552           self.cvs_file_items.original_ids[self.base_revisions[revision]]
 553           )
 554       self.revision_recorder._writeout(text_record, text)
 555
 556     return None
 557
 558
 559 class InternalRevisionCollector(RevisionCollector):
 560   """The RevisionCollector used by InternalRevisionReader."""
 561
 562   def __init__(self, compress):
 563     RevisionCollector.__init__(self)
 564     self._compress = compress
 565
 566   def register_artifacts(self, which_pass):
 567     artifact_manager.register_temp_file(
 568         config.RCS_DELTAS_INDEX_TABLE, which_pass
 569         )
 570     artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass)
 571     artifact_manager.register_temp_file(
 572         config.RCS_TREES_INDEX_TABLE, which_pass
 573         )
 574     artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)
 575
 576   def start(self):
 577     ser = MarshalSerializer()
 578     if self._compress:
 579       ser = CompressingSerializer(ser)
 580     self._rcs_deltas = IndexedDatabase(
 581         artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
 582         artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
 583         DB_OPEN_NEW, ser
 584         )
 585     primer = (FullTextRecord, DeltaTextRecord)
 586     self._rcs_trees = IndexedDatabase(
 587         artifact_manager.get_temp_file(config.RCS_TREES_STORE),
 588         artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
 589         DB_OPEN_NEW, PrimedPickleSerializer(primer)
 590         )
 591
 592   def _writeout(self, text_record, text):
 593     self.text_record_db.add(text_record)
 594     self._rcs_deltas[text_record.id] = text
 595
 596   def process_file(self, cvs_file_items):
 597     """Read revision information for the file described by CVS_FILE_ITEMS.
 598
 599     Compute the text record refcounts, discard any records that are
 600     unneeded, and store the text records for the file to the
 601     _rcs_trees database."""
 602
 603     # A map from cvs_rev_id to TextRecord instance:
 604     self.text_record_db = TextRecordDatabase(self._rcs_deltas, NullDatabase())
 605
 606     cvs2svn_rcsparse.parse(
 607         open(cvs_file_items.cvs_file.filename, 'rb'),
 608         _Sink(self, cvs_file_items),
 609         )
 610
 611     self.text_record_db.recompute_refcounts(cvs_file_items)
 612     self.text_record_db.free_unused()
 613     self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db
 614     del self.text_record_db
 615
 616   def finish(self):
 617     self._rcs_deltas.close()
 618     self._rcs_trees.close()
 619
 620
 621 class _KeywordExpander:
 622   """A class whose instances provide substitutions for CVS keywords.
 623
 624   This class is used via its __call__() method, which should be called
 625   with a match object representing a match for a CVS keyword string.
 626   The method returns the replacement for the matched text.
 627
 628   The __call__() method works by calling the method with the same name
 629   as that of the CVS keyword (converted to lower case).
 630
 631   Instances of this class can be passed as the REPL argument to
 632   re.sub()."""
 633
 634   date_fmt_old = "%Y/%m/%d %H:%M:%S"    # CVS 1.11, rcs
 635   date_fmt_new = "%Y-%m-%d %H:%M:%S"    # CVS 1.12
 636
 637   date_fmt = date_fmt_new
 638
 639   @classmethod
 640   def use_old_date_format(klass):
 641       """Class method to ensure exact compatibility with CVS 1.11
 642       output.  Use this if you want to verify your conversion and you're
 643       using CVS 1.11."""
 644       klass.date_fmt = klass.date_fmt_old
 645
 646   def __init__(self, cvs_rev):
 647     self.cvs_rev = cvs_rev
 648
 649   def __call__(self, match):
 650     return '$%s: %s $' % \
 651            (match.group(1), getattr(self, match.group(1).lower())(),)
 652
 653   def author(self):
 654     return Ctx()._metadata_db[self.cvs_rev.metadata_id].original_author
 655
 656   def date(self):
 657     return time.strftime(self.date_fmt,
 658                          time.gmtime(self.cvs_rev.timestamp))
 659
 660   def header(self):
 661     return '%s %s %s %s Exp' % \
 662            (self.source(), self.cvs_rev.rev, self.date(), self.author())
 663
 664   def id(self):
 665     return '%s %s %s %s Exp' % \
 666            (self.rcsfile(), self.cvs_rev.rev, self.date(), self.author())
 667
 668   def locker(self):
 669     # Handle kvl like kv, as a converted repo is supposed to have no
 670     # locks.
 671     return ''
 672
 673   def log(self):
 674     # Would need some special handling.
 675     return 'not supported by cvs2svn'
 676
 677   def name(self):
 678     # Cannot work, as just creating a new symbol does not check out
 679     # the revision again.
 680     return 'not supported by cvs2svn'
 681
 682   def rcsfile(self):
 683     return self.cvs_rev.cvs_file.basename + ",v"
 684
 685   def revision(self):
 686     return self.cvs_rev.rev
 687
 688   def source(self):
 689     project = self.cvs_rev.cvs_file.project
 690     return project.cvs_repository_root + '/' + project.cvs_module + \
 691         self.cvs_rev.cvs_file.cvs_path + ",v"
 692
 693   def state(self):
 694     # We check out only live revisions.
 695     return 'Exp'
 696
 697
 698 class InternalRevisionReader(RevisionReader):
 699   """A RevisionReader that reads the contents from an own delta store."""
 700
 701   _kws = 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State'
 702   _kw_re = re.compile(r'\$(' + _kws + r'):[^$\n]*\$')
 703   _kwo_re = re.compile(r'\$(' + _kws + r')(:[^$\n]*)?\$')
 704
 705   def __init__(self, compress):
 706     self._compress = compress
 707
 708   def register_artifacts(self, which_pass):
 709     artifact_manager.register_temp_file(config.CVS_CHECKOUT_DB, which_pass)
 710     artifact_manager.register_temp_file_needed(
 711         config.RCS_DELTAS_STORE, which_pass
 712         )
 713     artifact_manager.register_temp_file_needed(
 714         config.RCS_DELTAS_INDEX_TABLE, which_pass
 715         )
 716     artifact_manager.register_temp_file_needed(
 717         config.RCS_TREES_STORE, which_pass
 718         )
 719     artifact_manager.register_temp_file_needed(
 720         config.RCS_TREES_INDEX_TABLE, which_pass
 721         )
 722
 723   def start(self):
 724     self._delta_db = IndexedDatabase(
 725         artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
 726         artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
 727         DB_OPEN_READ)
 728     self._delta_db.__delitem__ = lambda id: None
 729     self._tree_db = IndexedDatabase(
 730         artifact_manager.get_temp_file(config.RCS_TREES_STORE),
 731         artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
 732         DB_OPEN_READ)
 733     ser = MarshalSerializer()
 734     if self._compress:
 735       ser = CompressingSerializer(ser)
 736     self._co_db = Database(
 737         artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB), DB_OPEN_NEW,
 738         ser)
 739
 740     # The set of CVSFile instances whose TextRecords have already been
 741     # read:
 742     self._loaded_files = set()
 743
 744     # A map { CVSFILE : _FileTree } for files that currently have live
 745     # revisions:
 746     self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db)
 747
 748   def _get_text_record(self, cvs_rev):
 749     """Return the TextRecord instance for CVS_REV.
 750
 751     If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
 752     do so now."""
 753
 754     if cvs_rev.cvs_file not in self._loaded_files:
 755       for text_record in self._tree_db[cvs_rev.cvs_file.id].itervalues():
 756         self._text_record_db.add(text_record)
 757       self._loaded_files.add(cvs_rev.cvs_file)
 758
 759     return self._text_record_db[cvs_rev.id]
 760
 761   def get_content_stream(self, cvs_rev, suppress_keyword_substitution=False):
 762     """Check out the text for revision C_REV from the repository.
 763
 764     Return the text wrapped in a readable file object.  If
 765     SUPPRESS_KEYWORD_SUBSTITUTION is True, any RCS keywords will be
 766     _un_expanded prior to returning the file content.  Note that $Log$
 767     never actually generates a log (which makes test 'requires_cvs()'
 768     fail).
 769
 770     Revisions may be requested in any order, but if they are not
 771     requested in dependency order the checkout database will become
 772     very large.  Revisions may be skipped.  Each revision may be
 773     requested only once."""
 774
 775     try:
 776       text = self._get_text_record(cvs_rev).checkout(self._text_record_db)
 777     except MalformedDeltaException, (msg):
 778       raise FatalError('Malformed RCS delta in %s, revision %s: %s'
 779                        % (cvs_rev.cvs_file.get_filename(), cvs_rev.rev, msg))
 780     if cvs_rev.cvs_file.mode != 'b' and cvs_rev.cvs_file.mode != 'o':
 781       if suppress_keyword_substitution or cvs_rev.cvs_file.mode == 'k':
 782         text = self._kw_re.sub(r'$\1$', text)
 783       else:
 784         text = self._kwo_re.sub(_KeywordExpander(cvs_rev), text)
 785
 786     return StringIO(text)
 787
 788   def finish(self):
 789     self._text_record_db.log_leftovers()
 790
 791     del self._text_record_db
 792     self._delta_db.close()
 793     self._tree_db.close()
 794     self._co_db.close()
 795