cvs2svn_lib/checkout_internal.py

   1 # (Be in -*- python -*- mode.)
   2 #
   3 # ====================================================================
   4 # Copyright (c) 2007-2009 CollabNet.  All rights reserved.
   5 #
   6 # This software is licensed as described in the file COPYING, which
   7 # you should have received as part of this distribution.  The terms
   8 # are also available at http://subversion.tigris.org/license-1.html.
   9 # If newer versions of this license are posted there, you may use a
  10 # newer version instead, at your option.
  11 #
  12 # This software consists of voluntary contributions made by many
  13 # individuals.  For exact contribution history, see the revision
  14 # history and logs, available at http://cvs2svn.tigris.org/.
  15 # ====================================================================
  16
  17 """This module contains classes that implement the --use-internal-co option.
  18
  19 The idea is to patch up the revisions' contents incrementally, thus
  20 avoiding the huge number of process spawns and the O(n^2) overhead of
  21 using 'co' and 'cvs'.
  22
  23 InternalRevisionCollector saves the RCS deltas and RCS revision trees
  24 to databases.  Notably, deltas from the trunk need to be reversed, as
  25 CVS stores them so they apply from HEAD backwards.
  26
  27 InternalRevisionReader produces the revisions' contents on demand.  To
  28 generate the text for a typical revision, we need the revision's delta
  29 text plus the fulltext of the previous revision.  Therefore, we
  30 maintain a checkout database containing a copy of the fulltext of any
  31 revision for which subsequent revisions still need to be retrieved.
  32 It is crucial to remove text from this database as soon as it is no
  33 longer needed, to prevent it from growing enormous.
  34
  35 There are two reasons that the text from a revision can be needed: (1)
  36 because the revision itself still needs to be output to a dumpfile;
  37 (2) because another revision needs it as the base of its delta.  We
  38 maintain a reference count for each revision, which includes *both*
  39 possibilities.  The first time a revision's text is needed, it is
  40 generated by applying the revision's deltatext to the previous
  41 revision's fulltext, and the resulting fulltext is stored in the
  42 checkout database.  Each time a revision's fulltext is retrieved, its
  43 reference count is decremented.  When the reference count goes to
  44 zero, then the fulltext is deleted from the checkout database.
  45
  46 The administrative data for managing this consists of one TextRecord
  47 entry for each revision.  Each TextRecord has an id, which is the same
  48 id as used for the corresponding CVSRevision instance.  It also
  49 maintains a count of the times it is expected to be retrieved.
  50 TextRecords come in several varieties:
  51
  52 FullTextRecord -- Used for revisions whose fulltext is derived
  53     directly from the RCS file by the InternalRevisionCollector (i.e.,
  54     typically revision 1.1 of each file).
  55
  56 DeltaTextRecord -- Used for revisions that are defined via a delta
  57     relative to some other TextRecord.  These records record the id of
  58     the TextRecord that holds the base text against which the delta is
  59     defined.  When the text for a DeltaTextRecord is retrieved, the
  60     DeltaTextRecord instance is deleted and a CheckedOutTextRecord
  61     instance is created to take its place.
  62
  63 CheckedOutTextRecord -- Used during OutputPass for a revision that
  64     started out as a DeltaTextRecord, but has already been retrieved
  65     (and therefore its fulltext is stored in the checkout database).
  66
  67 While a file is being processed during FilterSymbolsPass, the fulltext
  68 and deltas are stored to the delta database, and TextRecord instances
  69 are created to keep track of things.  The reference counts are all
  70 initialized: each record referred to by a delta has its refcount
  71 incremented, and each record that corresponds to a non-delete
  72 CVSRevision is incremented.  After that, any records with refcount==0
  73 are removed.  When one record is removed, that can cause another
  74 record's reference count to go to zero and be removed too,
  75 recursively.  When a TextRecord is deleted at this stage, its
  76 deltatext is also deleted from the delta database."""
  77
  78
  79 from cvs2svn_lib import config
  80 from cvs2svn_lib.common import DB_OPEN_NEW
  81 from cvs2svn_lib.common import DB_OPEN_READ
  82 from cvs2svn_lib.common import warning_prefix
  83 from cvs2svn_lib.common import FatalError
  84 from cvs2svn_lib.common import InternalError
  85 from cvs2svn_lib.common import canonicalize_eol
  86 from cvs2svn_lib.common import is_trunk_revision
  87 from cvs2svn_lib.context import Ctx
  88 from cvs2svn_lib.log import logger
  89 from cvs2svn_lib.artifact_manager import artifact_manager
  90 from cvs2svn_lib.cvs_item import CVSRevisionModification
  91 from cvs2svn_lib.indexed_database import IndexedDatabase
  92 from cvs2svn_lib.rcs_stream import RCSStream
  93 from cvs2svn_lib.rcs_stream import MalformedDeltaException
  94 from cvs2svn_lib.keyword_expander import expand_keywords
  95 from cvs2svn_lib.keyword_expander import collapse_keywords
  96 from cvs2svn_lib.revision_manager import RevisionCollector
  97 from cvs2svn_lib.revision_manager import RevisionReader
  98 from cvs2svn_lib.serializer import MarshalSerializer
  99 from cvs2svn_lib.serializer import CompressingSerializer
 100 from cvs2svn_lib.serializer import PrimedPickleSerializer
 101 from cvs2svn_lib.apple_single_filter import get_maybe_apple_single
 102
 103 import cvs2svn_rcsparse
 104
 105
 106 class TextRecord(object):
 107   """Bookkeeping data for the text of a single CVSRevision."""
 108
 109   __slots__ = ['id', 'refcount']
 110
 111   def __init__(self, id):
 112     # The cvs_rev_id of the revision whose text this is.
 113     self.id = id
 114
 115     # The number of times that the text of this revision will be
 116     # retrieved.
 117     self.refcount = 0
 118
 119   def __getstate__(self):
 120     return (self.id, self.refcount,)
 121
 122   def __setstate__(self, state):
 123     (self.id, self.refcount,) = state
 124
 125   def increment_dependency_refcounts(self, text_record_db):
 126     """Increment the refcounts of any records that this one depends on."""
 127
 128     pass
 129
 130   def decrement_refcount(self, text_record_db):
 131     """Decrement the number of times our text still has to be checked out.
 132
 133     If the reference count goes to zero, call discard()."""
 134
 135     self.refcount -= 1
 136     if self.refcount == 0:
 137       text_record_db.discard(self.id)
 138
 139   def checkout(self, text_record_db):
 140     """Workhorse of the checkout process.
 141
 142     Return the text for this revision, decrement our reference count,
 143     and update the databases depending on whether there will be future
 144     checkouts."""
 145
 146     raise NotImplementedError()
 147
 148   def free(self, text_record_db):
 149     """This instance will never again be checked out; free it.
 150
 151     Also free any associated resources and decrement the refcounts of
 152     any other TextRecords that this one depends on."""
 153
 154     raise NotImplementedError()
 155
 156
 157 class FullTextRecord(TextRecord):
 158   """A record whose revision's fulltext is stored in the delta_db.
 159
 160   These records are used for revisions whose fulltext was determined
 161   by the InternalRevisionCollector during FilterSymbolsPass.  The
 162   fulltext for such a revision is is stored in the delta_db as a
 163   single string."""
 164
 165   __slots__ = []
 166
 167   def __getstate__(self):
 168     return (self.id, self.refcount,)
 169
 170   def __setstate__(self, state):
 171     (self.id, self.refcount,) = state
 172
 173   def checkout(self, text_record_db):
 174     text = text_record_db.delta_db[self.id]
 175     self.decrement_refcount(text_record_db)
 176     return text
 177
 178   def free(self, text_record_db):
 179     del text_record_db.delta_db[self.id]
 180
 181   def __str__(self):
 182     return 'FullTextRecord(%x, %d)' % (self.id, self.refcount,)
 183
 184
 185 class DeltaTextRecord(TextRecord):
 186   """A record whose revision's delta is stored as an RCS delta.
 187
 188   The text of this revision must be derived by applying an RCS delta
 189   to the text of the predecessor revision.  The RCS delta is stored
 190   in the delta_db."""
 191
 192   __slots__ = ['pred_id']
 193
 194   def __init__(self, id, pred_id):
 195     TextRecord.__init__(self, id)
 196
 197     # The cvs_rev_id of the revision relative to which this delta is
 198     # defined.
 199     self.pred_id = pred_id
 200
 201   def __getstate__(self):
 202     return (self.id, self.refcount, self.pred_id,)
 203
 204   def __setstate__(self, state):
 205     (self.id, self.refcount, self.pred_id,) = state
 206
 207   def increment_dependency_refcounts(self, text_record_db):
 208     text_record_db[self.pred_id].refcount += 1
 209
 210   def checkout(self, text_record_db):
 211     base_text = text_record_db[self.pred_id].checkout(text_record_db)
 212     rcs_stream = RCSStream(base_text)
 213     delta_text = text_record_db.delta_db[self.id]
 214     rcs_stream.apply_diff(delta_text)
 215     text = rcs_stream.get_text()
 216     del rcs_stream
 217     self.refcount -= 1
 218     if self.refcount == 0:
 219       # This text will never be needed again; just delete ourselves
 220       # without ever having stored the fulltext to the checkout
 221       # database:
 222       del text_record_db[self.id]
 223     else:
 224       # Store a new CheckedOutTextRecord in place of ourselves:
 225       text_record_db.checkout_db['%x' % self.id] = text
 226       new_text_record = CheckedOutTextRecord(self.id)
 227       new_text_record.refcount = self.refcount
 228       text_record_db.replace(new_text_record)
 229     return text
 230
 231   def free(self, text_record_db):
 232     del text_record_db.delta_db[self.id]
 233     text_record_db[self.pred_id].decrement_refcount(text_record_db)
 234
 235   def __str__(self):
 236     return 'DeltaTextRecord(%x -> %x, %d)' % (
 237         self.pred_id, self.id, self.refcount,
 238         )
 239
 240
 241 class CheckedOutTextRecord(TextRecord):
 242   """A record whose revision's fulltext is stored in the text_record_db.
 243
 244   These records are used for revisions whose fulltext has been
 245   computed already during OutputPass.  The fulltext for such a
 246   revision is stored in the text_record_db as a single string."""
 247
 248   __slots__ = []
 249
 250   def __getstate__(self):
 251     return (self.id, self.refcount,)
 252
 253   def __setstate__(self, state):
 254     (self.id, self.refcount,) = state
 255
 256   def checkout(self, text_record_db):
 257     text = text_record_db.checkout_db['%x' % self.id]
 258     self.decrement_refcount(text_record_db)
 259     return text
 260
 261   def free(self, text_record_db):
 262     del text_record_db.checkout_db['%x' % self.id]
 263
 264   def __str__(self):
 265     return 'CheckedOutTextRecord(%x, %d)' % (self.id, self.refcount,)
 266
 267
 268 class NullDatabase(object):
 269   """A do-nothing database that can be used with TextRecordDatabase.
 270
 271   Use this when you don't actually want to allow anything to be
 272   deleted."""
 273
 274   def __delitem__(self, id):
 275     pass
 276
 277
 278 class TextRecordDatabase:
 279   """Holds the TextRecord instances that are currently live.
 280
 281   During FilterSymbolsPass, files are processed one by one and a new
 282   TextRecordDatabase instance is used for each file.  During
 283   OutputPass, a single TextRecordDatabase instance is used for the
 284   duration of OutputPass; individual records are added and removed
 285   when they are active."""
 286
 287   def __init__(self, delta_db, checkout_db):
 288     # A map { cvs_rev_id -> TextRecord }.
 289     self.text_records = {}
 290
 291     # A database-like object using cvs_rev_ids as keys and containing
 292     # fulltext/deltatext strings as values.  Its __getitem__() method
 293     # is used to retrieve deltas when they are needed, and its
 294     # __delitem__() method is used to delete deltas when they can be
 295     # freed.  The modifiability of the delta database varies from pass
 296     # to pass, so the object stored here varies as well:
 297     #
 298     # FilterSymbolsPass: a NullDatabase.  The delta database cannot be
 299     #     modified during this pass, and we have no need to retrieve
 300     #     deltas, so we just use a dummy object here.
 301     #
 302     # OutputPass: a disabled IndexedDatabase.  During this pass we
 303     #     need to retrieve deltas, but we are not allowed to modify
 304     #     the delta database.  So we use an IndexedDatabase whose
 305     #     __del__() method has been disabled to do nothing.
 306     self.delta_db = delta_db
 307
 308     # A database-like object using cvs_rev_ids as keys and containing
 309     # fulltext strings as values.  This database is only set during
 310     # OutputPass.
 311     self.checkout_db = checkout_db
 312
 313     # If this is set to a list, then the list holds the ids of
 314     # text_records that have to be deleted; when discard() is called,
 315     # it adds the requested id to the list but does not delete it.  If
 316     # this member is set to None, then text_records are deleted
 317     # immediately when discard() is called.
 318     self.deferred_deletes = None
 319
 320   def __getstate__(self):
 321     return (self.text_records.values(),)
 322
 323   def __setstate__(self, state):
 324     (text_records,) = state
 325     self.text_records = {}
 326     for text_record in text_records:
 327       self.add(text_record)
 328     self.delta_db = NullDatabase()
 329     self.checkout_db = NullDatabase()
 330     self.deferred_deletes = None
 331
 332   def add(self, text_record):
 333     """Add TEXT_RECORD to our database.
 334
 335     There must not already be a record with the same id."""
 336
 337     assert not self.text_records.has_key(text_record.id)
 338
 339     self.text_records[text_record.id] = text_record
 340
 341   def __getitem__(self, id):
 342     return self.text_records[id]
 343
 344   def __delitem__(self, id):
 345     """Free the record with the specified ID."""
 346
 347     del self.text_records[id]
 348
 349   def replace(self, text_record):
 350     """Store TEXT_RECORD in place of the existing record with the same id.
 351
 352     Do not do anything with the old record."""
 353
 354     assert self.text_records.has_key(text_record.id)
 355     self.text_records[text_record.id] = text_record
 356
 357   def discard(self, *ids):
 358     """The text records with IDS are no longer needed; discard them.
 359
 360     This involves calling their free() methods and also removing them
 361     from SELF.
 362
 363     If SELF.deferred_deletes is not None, then the ids to be deleted
 364     are added to the list instead of deleted immediately.  This
 365     mechanism is to prevent a stack overflow from the avalanche of
 366     deletes that can result from deleting a long chain of revisions."""
 367
 368     if self.deferred_deletes is None:
 369       # This is an outer-level delete.
 370       self.deferred_deletes = list(ids)
 371       while self.deferred_deletes:
 372         id = self.deferred_deletes.pop()
 373         text_record = self[id]
 374         if text_record.refcount != 0:
 375           raise InternalError(
 376               'TextRecordDatabase.discard(%s) called with refcount = %d'
 377               % (text_record, text_record.refcount,)
 378               )
 379         # This call might cause other text_record ids to be added to
 380         # self.deferred_deletes:
 381         text_record.free(self)
 382         del self[id]
 383       self.deferred_deletes = None
 384     else:
 385       self.deferred_deletes.extend(ids)
 386
 387   def itervalues(self):
 388     return self.text_records.itervalues()
 389
 390   def recompute_refcounts(self, cvs_file_items):
 391     """Recompute the refcounts of the contained TextRecords.
 392
 393     Use CVS_FILE_ITEMS to determine which records will be needed by
 394     cvs2svn."""
 395
 396     # First clear all of the refcounts:
 397     for text_record in self.itervalues():
 398       text_record.refcount = 0
 399
 400     # Now increment the reference count of records that are needed as
 401     # the source of another record's deltas:
 402     for text_record in self.itervalues():
 403       text_record.increment_dependency_refcounts(self.text_records)
 404
 405     # Now increment the reference count of records that will be needed
 406     # by cvs2svn:
 407     for lod_items in cvs_file_items.iter_lods():
 408       for cvs_rev in lod_items.cvs_revisions:
 409         if isinstance(cvs_rev, CVSRevisionModification):
 410           self[cvs_rev.id].refcount += 1
 411
 412   def free_unused(self):
 413     """Free any TextRecords whose reference counts are zero."""
 414
 415     # The deletion of some of these text records might cause others to
 416     # be unused, in which case they will be deleted automatically.
 417     # But since the initially-unused records are not referred to by
 418     # any others, we don't have to be afraid that they will be deleted
 419     # before we get to them.  But it *is* crucial that we create the
 420     # whole unused list before starting the loop.
 421
 422     unused = [
 423         text_record.id
 424         for text_record in self.itervalues()
 425         if text_record.refcount == 0
 426         ]
 427
 428     self.discard(*unused)
 429
 430   def log_leftovers(self):
 431     """If any TextRecords still exist, log them."""
 432
 433     if self.text_records:
 434       logger.warn(
 435           "%s: internal problem: leftover revisions in the checkout cache:"
 436           % warning_prefix)
 437       for text_record in self.itervalues():
 438         logger.warn('    %s' % (text_record,))
 439
 440   def __repr__(self):
 441     """Debugging output of the current contents of the TextRecordDatabase."""
 442
 443     retval = ['TextRecordDatabase:']
 444     for text_record in self.itervalues():
 445       retval.append('    %s' % (text_record,))
 446     return '\n'.join(retval)
 447
 448
 449 class _Sink(cvs2svn_rcsparse.Sink):
 450   def __init__(self, revision_collector, cvs_file_items):
 451     self.revision_collector = revision_collector
 452     self.cvs_file_items = cvs_file_items
 453
 454     # A map {rev : base_rev} indicating that the text for rev is
 455     # stored in CVS as a delta relative to base_rev.
 456     self.base_revisions = {}
 457
 458     # The revision that is stored with its fulltext in CVS (usually
 459     # the oldest revision on trunk):
 460     self.head_revision = None
 461
 462     # The first logical revision on trunk (usually '1.1'):
 463     self.revision_1_1 = None
 464
 465     # Keep track of the revisions whose revision info has been seen so
 466     # far (to avoid repeated revision info blocks):
 467     self.revisions_seen = set()
 468
 469   def set_head_revision(self, revision):
 470     self.head_revision = revision
 471
 472   def define_revision(
 473         self, revision, timestamp, author, state, branches, next
 474         ):
 475     if next:
 476       self.base_revisions[next] = revision
 477     else:
 478       if is_trunk_revision(revision):
 479         self.revision_1_1 = revision
 480
 481     for branch in branches:
 482       self.base_revisions[branch] = revision
 483
 484   def set_revision_info(self, revision, log, text):
 485     if revision in self.revisions_seen:
 486       # One common form of CVS repository corruption is that the
 487       # Deltatext block for revision 1.1 appears twice.  CollectData
 488       # has already warned about this problem; here we can just ignore
 489       # it.
 490       return
 491     else:
 492       self.revisions_seen.add(revision)
 493
 494     cvs_rev_id = self.cvs_file_items.original_ids[revision]
 495     if is_trunk_revision(revision):
 496       # On trunk, revisions are encountered in reverse order (1.<N>
 497       # ... 1.1) and deltas are inverted.  The first text that we see
 498       # is the fulltext for the HEAD revision.  After that, the text
 499       # corresponding to revision 1.N is the delta (1.<N+1> ->
 500       # 1.<N>)).  We have to invert the deltas here so that we can
 501       # read the revisions out in dependency order; that is, for
 502       # revision 1.1 we want the fulltext, and for revision 1.<N> we
 503       # want the delta (1.<N-1> -> 1.<N>).  This means that we can't
 504       # compute the delta for a revision until we see its logical
 505       # parent.  When we finally see revision 1.1 (which is recognized
 506       # because it doesn't have a parent), we can record the diff (1.1
 507       # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
 508
 509       if revision == self.head_revision:
 510         # This is HEAD, as fulltext.  Initialize the RCSStream so
 511         # that we can compute deltas backwards in time.
 512         self._rcs_stream = RCSStream(text)
 513         self._rcs_stream_revision = revision
 514       else:
 515         # Any other trunk revision is a backward delta.  Apply the
 516         # delta to the RCSStream to mutate it to the contents of this
 517         # revision, and also to get the reverse delta, which we store
 518         # as the forward delta of our child revision.
 519         try:
 520           text = self._rcs_stream.invert_diff(text)
 521         except MalformedDeltaException, e:
 522           logger.error(
 523               'Malformed RCS delta in %s, revision %s: %s'
 524               % (self.cvs_file_items.cvs_file.rcs_path, revision, e)
 525               )
 526           raise RuntimeError()
 527         text_record = DeltaTextRecord(
 528             self.cvs_file_items.original_ids[self._rcs_stream_revision],
 529             cvs_rev_id
 530             )
 531         self.revision_collector._writeout(text_record, text)
 532         self._rcs_stream_revision = revision
 533
 534       if revision == self.revision_1_1:
 535         # This is revision 1.1.  Write its fulltext:
 536         text_record = FullTextRecord(cvs_rev_id)
 537         self.revision_collector._writeout(
 538             text_record, self._rcs_stream.get_text()
 539             )
 540
 541         # There will be no more trunk revisions delivered, so free the
 542         # RCSStream.
 543         del self._rcs_stream
 544         del self._rcs_stream_revision
 545
 546     else:
 547       # On branches, revisions are encountered in logical order
 548       # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
 549       # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
 550       # <BRANCH>.<N>).  That's what we need, so just store it.
 551
 552       # FIXME: It would be nice to avoid writing out branch deltas
 553       # when --trunk-only.  (They will be deleted when finish_file()
 554       # is called, but if the delta db is in an IndexedDatabase the
 555       # deletions won't actually recover any disk space.)
 556       text_record = DeltaTextRecord(
 557           cvs_rev_id,
 558           self.cvs_file_items.original_ids[self.base_revisions[revision]]
 559           )
 560       self.revision_collector._writeout(text_record, text)
 561
 562     return None
 563
 564
 565 class InternalRevisionCollector(RevisionCollector):
 566   """The RevisionCollector used by InternalRevisionReader."""
 567
 568   def __init__(self, compress):
 569     RevisionCollector.__init__(self)
 570     self._compress = compress
 571
 572   def register_artifacts(self, which_pass):
 573     artifact_manager.register_temp_file(
 574         config.RCS_DELTAS_INDEX_TABLE, which_pass
 575         )
 576     artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass)
 577     artifact_manager.register_temp_file(
 578         config.RCS_TREES_INDEX_TABLE, which_pass
 579         )
 580     artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass)
 581
 582   def start(self):
 583     serializer = MarshalSerializer()
 584     if self._compress:
 585       serializer = CompressingSerializer(serializer)
 586     self._delta_db = IndexedDatabase(
 587         artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
 588         artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
 589         DB_OPEN_NEW, serializer,
 590         )
 591     primer = (FullTextRecord, DeltaTextRecord)
 592     self._rcs_trees = IndexedDatabase(
 593         artifact_manager.get_temp_file(config.RCS_TREES_STORE),
 594         artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
 595         DB_OPEN_NEW, PrimedPickleSerializer(primer),
 596         )
 597
 598   def _writeout(self, text_record, text):
 599     self.text_record_db.add(text_record)
 600     self._delta_db[text_record.id] = text
 601
 602   def process_file(self, cvs_file_items):
 603     """Read revision information for the file described by CVS_FILE_ITEMS.
 604
 605     Compute the text record refcounts, discard any records that are
 606     unneeded, and store the text records for the file to the
 607     _rcs_trees database."""
 608
 609     # A map from cvs_rev_id to TextRecord instance:
 610     self.text_record_db = TextRecordDatabase(self._delta_db, NullDatabase())
 611
 612     cvs2svn_rcsparse.parse(
 613         open(cvs_file_items.cvs_file.rcs_path, 'rb'),
 614         _Sink(self, cvs_file_items),
 615         )
 616
 617     self.text_record_db.recompute_refcounts(cvs_file_items)
 618     self.text_record_db.free_unused()
 619     self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db
 620     del self.text_record_db
 621
 622   def finish(self):
 623     self._delta_db.close()
 624     self._rcs_trees.close()
 625
 626
 627 class InternalRevisionReader(RevisionReader):
 628   """A RevisionReader that reads the contents from an own delta store."""
 629
 630   def __init__(self, compress):
 631     # Only import Database if an InternalRevisionReader is really
 632     # instantiated, because the import fails if a decent dbm is not
 633     # installed.
 634     from cvs2svn_lib.database import Database
 635     self._Database = Database
 636
 637     self._compress = compress
 638
 639   def register_artifacts(self, which_pass):
 640     artifact_manager.register_temp_file(config.CVS_CHECKOUT_DB, which_pass)
 641     artifact_manager.register_temp_file_needed(
 642         config.RCS_DELTAS_STORE, which_pass
 643         )
 644     artifact_manager.register_temp_file_needed(
 645         config.RCS_DELTAS_INDEX_TABLE, which_pass
 646         )
 647     artifact_manager.register_temp_file_needed(
 648         config.RCS_TREES_STORE, which_pass
 649         )
 650     artifact_manager.register_temp_file_needed(
 651         config.RCS_TREES_INDEX_TABLE, which_pass
 652         )
 653
 654   def start(self):
 655     self._delta_db = IndexedDatabase(
 656         artifact_manager.get_temp_file(config.RCS_DELTAS_STORE),
 657         artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE),
 658         DB_OPEN_READ,
 659         )
 660     self._delta_db.__delitem__ = lambda id: None
 661     self._tree_db = IndexedDatabase(
 662         artifact_manager.get_temp_file(config.RCS_TREES_STORE),
 663         artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE),
 664         DB_OPEN_READ,
 665         )
 666     serializer = MarshalSerializer()
 667     if self._compress:
 668       serializer = CompressingSerializer(serializer)
 669     self._co_db = self._Database(
 670         artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB),
 671         DB_OPEN_NEW, serializer,
 672         )
 673
 674     # The set of CVSFile instances whose TextRecords have already been
 675     # read:
 676     self._loaded_files = set()
 677
 678     # A map { CVSFILE : _FileTree } for files that currently have live
 679     # revisions:
 680     self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db)
 681
 682   def _get_text_record(self, cvs_rev):
 683     """Return the TextRecord instance for CVS_REV.
 684
 685     If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
 686     do so now."""
 687
 688     if cvs_rev.cvs_file not in self._loaded_files:
 689       for text_record in self._tree_db[cvs_rev.cvs_file.id].itervalues():
 690         self._text_record_db.add(text_record)
 691       self._loaded_files.add(cvs_rev.cvs_file)
 692
 693     return self._text_record_db[cvs_rev.id]
 694
 695   def get_content(self, cvs_rev):
 696     """Check out the text for revision C_REV from the repository.
 697
 698     Return the text.  If CVS_REV has a property _keyword_handling, use
 699     it to determine how to handle RCS keywords in the output:
 700
 701         'collapsed' -- collapse keywords
 702
 703         'expanded' -- expand keywords
 704
 705         'untouched' -- output keywords in the form they are found in
 706             the RCS file
 707
 708     Note that $Log$ never actually generates a log (which makes test
 709     'requires_cvs()' fail).
 710
 711     Revisions may be requested in any order, but if they are not
 712     requested in dependency order the checkout database will become
 713     very large.  Revisions may be skipped.  Each revision may be
 714     requested only once."""
 715
 716     try:
 717       text = self._get_text_record(cvs_rev).checkout(self._text_record_db)
 718     except MalformedDeltaException, (msg):
 719       raise FatalError(
 720           'Malformed RCS delta in %s, revision %s: %s'
 721           % (cvs_rev.cvs_file.rcs_path, cvs_rev.rev, msg)
 722           )
 723
 724     keyword_handling = cvs_rev.get_property('_keyword_handling')
 725
 726     if keyword_handling == 'untouched':
 727       # Leave keywords in the form that they were checked in.
 728       pass
 729     elif keyword_handling == 'collapsed':
 730       text = collapse_keywords(text)
 731     elif keyword_handling == 'expanded':
 732       text = expand_keywords(text, cvs_rev)
 733     else:
 734       raise FatalError(
 735           'Undefined _keyword_handling property (%r) for %s'
 736           % (keyword_handling, cvs_rev,)
 737           )
 738
 739     if Ctx().decode_apple_single:
 740       # Insert a filter to decode any files that are in AppleSingle
 741       # format:
 742       text = get_maybe_apple_single(text)
 743
 744     eol_fix = cvs_rev.get_property('_eol_fix')
 745     if eol_fix:
 746       text = canonicalize_eol(text, eol_fix)
 747
 748     return text
 749
 750   def finish(self):
 751     self._text_record_db.log_leftovers()
 752
 753     del self._text_record_db
 754     self._delta_db.close()
 755     self._tree_db.close()
 756     self._co_db.close()
 757