1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains classes that implement the --use-internal-co option.
19 The idea is to patch up the revisions' contents incrementally, thus
20 avoiding the huge number of process spawns and the O(n^2) overhead of
23 InternalRevisionCollector saves the RCS deltas and RCS revision trees
24 to databases. Notably, deltas from the trunk need to be reversed, as
25 CVS stores them so they apply from HEAD backwards.
27 InternalRevisionReader produces the revisions' contents on demand. To
28 generate the text for a typical revision, we need the revision's delta
29 text plus the fulltext of the previous revision. Therefore, we
30 maintain a checkout database containing a copy of the fulltext of any
31 revision for which subsequent revisions still need to be retrieved.
32 It is crucial to remove text from this database as soon as it is no
33 longer needed, to prevent it from growing enormous.
35 There are two reasons that the text from a revision can be needed: (1)
36 because the revision itself still needs to be output to a dumpfile;
37 (2) because another revision needs it as the base of its delta. We
38 maintain a reference count for each revision, which includes *both*
39 possibilities. The first time a revision's text is needed, it is
40 generated by applying the revision's deltatext to the previous
41 revision's fulltext, and the resulting fulltext is stored in the
42 checkout database. Each time a revision's fulltext is retrieved, its
43 reference count is decremented. When the reference count goes to
44 zero, then the fulltext is deleted from the checkout database.
46 The administrative data for managing this consists of one TextRecord
47 entry for each revision. Each TextRecord has an id, which is the same
48 id as used for the corresponding CVSRevision instance. It also
49 maintains a count of the times it is expected to be retrieved.
50 TextRecords come in several varieties:
52 FullTextRecord -- Used for revisions whose fulltext is derived
53 directly from the RCS file by the InternalRevisionCollector (i.e.,
54 typically revision 1.1 of each file).
56 DeltaTextRecord -- Used for revisions that are defined via a delta
57 relative to some other TextRecord. These records record the id of
58 the TextRecord that holds the base text against which the delta is
59 defined. When the text for a DeltaTextRecord is retrieved, the
60 DeltaTextRecord instance is deleted and a CheckedOutTextRecord
61 instance is created to take its place.
63 CheckedOutTextRecord -- Used during OutputPass for a revision that
64 started out as a DeltaTextRecord, but has already been retrieved
65 (and therefore its fulltext is stored in the checkout database).
67 While a file is being processed during FilterSymbolsPass, the fulltext
68 and deltas are stored to the delta database, and TextRecord instances
69 are created to keep track of things. The reference counts are all
70 initialized: each record referred to by a delta has its refcount
71 incremented, and each record that corresponds to a non-delete
72 CVSRevision is incremented. After that, any records with refcount==0
73 are removed. When one record is removed, that can cause another
74 record's reference count to go to zero and be removed too,
75 recursively. When a TextRecord is deleted at this stage, its
76 deltatext is also deleted from the delta database."""
82 from cvs2svn_lib
import config
83 from cvs2svn_lib
.common
import DB_OPEN_NEW
84 from cvs2svn_lib
.common
import DB_OPEN_READ
85 from cvs2svn_lib
.common
import warning_prefix
86 from cvs2svn_lib
.common
import FatalError
87 from cvs2svn_lib
.common
import InternalError
88 from cvs2svn_lib
.common
import canonicalize_eol
89 from cvs2svn_lib
.common
import is_trunk_revision
90 from cvs2svn_lib
.context
import Ctx
91 from cvs2svn_lib
.log
import logger
92 from cvs2svn_lib
.artifact_manager
import artifact_manager
93 from cvs2svn_lib
.cvs_item
import CVSRevisionModification
94 from cvs2svn_lib
.database
import Database
95 from cvs2svn_lib
.database
import IndexedDatabase
96 from cvs2svn_lib
.rcs_stream
import RCSStream
97 from cvs2svn_lib
.rcs_stream
import MalformedDeltaException
98 from cvs2svn_lib
.revision_manager
import RevisionCollector
99 from cvs2svn_lib
.revision_manager
import RevisionReader
100 from cvs2svn_lib
.serializer
import MarshalSerializer
101 from cvs2svn_lib
.serializer
import CompressingSerializer
102 from cvs2svn_lib
.serializer
import PrimedPickleSerializer
103 from cvs2svn_lib
.apple_single_filter
import get_maybe_apple_single
105 import cvs2svn_rcsparse
108 class TextRecord(object):
109 """Bookkeeping data for the text of a single CVSRevision."""
111 __slots__
= ['id', 'refcount']
113 def __init__(self
, id):
114 # The cvs_rev_id of the revision whose text this is.
117 # The number of times that the text of this revision will be
121 def __getstate__(self
):
122 return (self
.id, self
.refcount
,)
124 def __setstate__(self
, state
):
125 (self
.id, self
.refcount
,) = state
127 def increment_dependency_refcounts(self
, text_record_db
):
128 """Increment the refcounts of any records that this one depends on."""
132 def decrement_refcount(self
, text_record_db
):
133 """Decrement the number of times our text still has to be checked out.
135 If the reference count goes to zero, call discard()."""
138 if self
.refcount
== 0:
139 text_record_db
.discard(self
.id)
141 def checkout(self
, text_record_db
):
142 """Workhorse of the checkout process.
144 Return the text for this revision, decrement our reference count,
145 and update the databases depending on whether there will be future
148 raise NotImplementedError()
150 def free(self
, text_record_db
):
151 """This instance will never again be checked out; free it.
153 Also free any associated resources and decrement the refcounts of
154 any other TextRecords that this one depends on."""
156 raise NotImplementedError()
159 class FullTextRecord(TextRecord
):
160 """A record whose revision's fulltext is stored in the delta_db.
162 These records are used for revisions whose fulltext was determined
163 by the InternalRevisionCollector during FilterSymbolsPass. The
164 fulltext for such a revision is is stored in the delta_db as a
169 def __getstate__(self
):
170 return (self
.id, self
.refcount
,)
172 def __setstate__(self
, state
):
173 (self
.id, self
.refcount
,) = state
175 def checkout(self
, text_record_db
):
176 text
= text_record_db
.delta_db
[self
.id]
177 self
.decrement_refcount(text_record_db
)
180 def free(self
, text_record_db
):
181 del text_record_db
.delta_db
[self
.id]
184 return 'FullTextRecord(%x, %d)' % (self
.id, self
.refcount
,)
187 class DeltaTextRecord(TextRecord
):
188 """A record whose revision's delta is stored as an RCS delta.
190 The text of this revision must be derived by applying an RCS delta
191 to the text of the predecessor revision. The RCS delta is stored
194 __slots__
= ['pred_id']
196 def __init__(self
, id, pred_id
):
197 TextRecord
.__init
__(self
, id)
199 # The cvs_rev_id of the revision relative to which this delta is
201 self
.pred_id
= pred_id
203 def __getstate__(self
):
204 return (self
.id, self
.refcount
, self
.pred_id
,)
206 def __setstate__(self
, state
):
207 (self
.id, self
.refcount
, self
.pred_id
,) = state
209 def increment_dependency_refcounts(self
, text_record_db
):
210 text_record_db
[self
.pred_id
].refcount
+= 1
212 def checkout(self
, text_record_db
):
213 base_text
= text_record_db
[self
.pred_id
].checkout(text_record_db
)
214 rcs_stream
= RCSStream(base_text
)
215 delta_text
= text_record_db
.delta_db
[self
.id]
216 rcs_stream
.apply_diff(delta_text
)
217 text
= rcs_stream
.get_text()
220 if self
.refcount
== 0:
221 # This text will never be needed again; just delete ourselves
222 # without ever having stored the fulltext to the checkout
224 del text_record_db
[self
.id]
226 # Store a new CheckedOutTextRecord in place of ourselves:
227 text_record_db
.checkout_db
['%x' % self
.id] = text
228 new_text_record
= CheckedOutTextRecord(self
.id)
229 new_text_record
.refcount
= self
.refcount
230 text_record_db
.replace(new_text_record
)
233 def free(self
, text_record_db
):
234 del text_record_db
.delta_db
[self
.id]
235 text_record_db
[self
.pred_id
].decrement_refcount(text_record_db
)
238 return 'DeltaTextRecord(%x -> %x, %d)' % (
239 self
.pred_id
, self
.id, self
.refcount
,
243 class CheckedOutTextRecord(TextRecord
):
244 """A record whose revision's fulltext is stored in the text_record_db.
246 These records are used for revisions whose fulltext has been
247 computed already during OutputPass. The fulltext for such a
248 revision is stored in the text_record_db as a single string."""
252 def __getstate__(self
):
253 return (self
.id, self
.refcount
,)
255 def __setstate__(self
, state
):
256 (self
.id, self
.refcount
,) = state
258 def checkout(self
, text_record_db
):
259 text
= text_record_db
.checkout_db
['%x' % self
.id]
260 self
.decrement_refcount(text_record_db
)
263 def free(self
, text_record_db
):
264 del text_record_db
.checkout_db
['%x' % self
.id]
267 return 'CheckedOutTextRecord(%x, %d)' % (self
.id, self
.refcount
,)
270 class NullDatabase(object):
271 """A do-nothing database that can be used with TextRecordDatabase.
273 Use this when you don't actually want to allow anything to be
276 def __delitem__(self
, id):
280 class TextRecordDatabase
:
281 """Holds the TextRecord instances that are currently live.
283 During FilterSymbolsPass, files are processed one by one and a new
284 TextRecordDatabase instance is used for each file. During
285 OutputPass, a single TextRecordDatabase instance is used for the
286 duration of OutputPass; individual records are added and removed
287 when they are active."""
289 def __init__(self
, delta_db
, checkout_db
):
290 # A map { cvs_rev_id -> TextRecord }.
291 self
.text_records
= {}
293 # A database-like object using cvs_rev_ids as keys and containing
294 # fulltext/deltatext strings as values. Its __getitem__() method
295 # is used to retrieve deltas when they are needed, and its
296 # __delitem__() method is used to delete deltas when they can be
297 # freed. The modifiability of the delta database varies from pass
298 # to pass, so the object stored here varies as well:
300 # FilterSymbolsPass: a NullDatabase. The delta database cannot be
301 # modified during this pass, and we have no need to retrieve
302 # deltas, so we just use a dummy object here.
304 # OutputPass: a disabled IndexedDatabase. During this pass we
305 # need to retrieve deltas, but we are not allowed to modify
306 # the delta database. So we use an IndexedDatabase whose
307 # __del__() method has been disabled to do nothing.
308 self
.delta_db
= delta_db
310 # A database-like object using cvs_rev_ids as keys and containing
311 # fulltext strings as values. This database is only set during
313 self
.checkout_db
= checkout_db
315 # If this is set to a list, then the list holds the ids of
316 # text_records that have to be deleted; when discard() is called,
317 # it adds the requested id to the list but does not delete it. If
318 # this member is set to None, then text_records are deleted
319 # immediately when discard() is called.
320 self
.deferred_deletes
= None
322 def __getstate__(self
):
323 return (self
.text_records
.values(),)
325 def __setstate__(self
, state
):
326 (text_records
,) = state
327 self
.text_records
= {}
328 for text_record
in text_records
:
329 self
.add(text_record
)
330 self
.delta_db
= NullDatabase()
331 self
.checkout_db
= NullDatabase()
332 self
.deferred_deletes
= None
334 def add(self
, text_record
):
335 """Add TEXT_RECORD to our database.
337 There must not already be a record with the same id."""
339 assert not self
.text_records
.has_key(text_record
.id)
341 self
.text_records
[text_record
.id] = text_record
343 def __getitem__(self
, id):
344 return self
.text_records
[id]
346 def __delitem__(self
, id):
347 """Free the record with the specified ID."""
349 del self
.text_records
[id]
351 def replace(self
, text_record
):
352 """Store TEXT_RECORD in place of the existing record with the same id.
354 Do not do anything with the old record."""
356 assert self
.text_records
.has_key(text_record
.id)
357 self
.text_records
[text_record
.id] = text_record
359 def discard(self
, *ids
):
360 """The text records with IDS are no longer needed; discard them.
362 This involves calling their free() methods and also removing them
365 If SELF.deferred_deletes is not None, then the ids to be deleted
366 are added to the list instead of deleted immediately. This
367 mechanism is to prevent a stack overflow from the avalanche of
368 deletes that can result from deleting a long chain of revisions."""
370 if self
.deferred_deletes
is None:
371 # This is an outer-level delete.
372 self
.deferred_deletes
= list(ids
)
373 while self
.deferred_deletes
:
374 id = self
.deferred_deletes
.pop()
375 text_record
= self
[id]
376 if text_record
.refcount
!= 0:
378 'TextRecordDatabase.discard(%s) called with refcount = %d'
379 % (text_record
, text_record
.refcount
,)
381 # This call might cause other text_record ids to be added to
382 # self.deferred_deletes:
383 text_record
.free(self
)
385 self
.deferred_deletes
= None
387 self
.deferred_deletes
.extend(ids
)
389 def itervalues(self
):
390 return self
.text_records
.itervalues()
392 def recompute_refcounts(self
, cvs_file_items
):
393 """Recompute the refcounts of the contained TextRecords.
395 Use CVS_FILE_ITEMS to determine which records will be needed by
398 # First clear all of the refcounts:
399 for text_record
in self
.itervalues():
400 text_record
.refcount
= 0
402 # Now increment the reference count of records that are needed as
403 # the source of another record's deltas:
404 for text_record
in self
.itervalues():
405 text_record
.increment_dependency_refcounts(self
.text_records
)
407 # Now increment the reference count of records that will be needed
409 for lod_items
in cvs_file_items
.iter_lods():
410 for cvs_rev
in lod_items
.cvs_revisions
:
411 if isinstance(cvs_rev
, CVSRevisionModification
):
412 self
[cvs_rev
.id].refcount
+= 1
414 def free_unused(self
):
415 """Free any TextRecords whose reference counts are zero."""
417 # The deletion of some of these text records might cause others to
418 # be unused, in which case they will be deleted automatically.
419 # But since the initially-unused records are not referred to by
420 # any others, we don't have to be afraid that they will be deleted
421 # before we get to them. But it *is* crucial that we create the
422 # whole unused list before starting the loop.
426 for text_record
in self
.itervalues()
427 if text_record
.refcount
== 0
430 self
.discard(*unused
)
432 def log_leftovers(self
):
433 """If any TextRecords still exist, log them."""
435 if self
.text_records
:
437 "%s: internal problem: leftover revisions in the checkout cache:"
439 for text_record
in self
.itervalues():
440 logger
.warn(' %s' % (text_record
,))
443 """Debugging output of the current contents of the TextRecordDatabase."""
445 retval
= ['TextRecordDatabase:']
446 for text_record
in self
.itervalues():
447 retval
.append(' %s' % (text_record
,))
448 return '\n'.join(retval
)
451 class _Sink(cvs2svn_rcsparse
.Sink
):
452 def __init__(self
, revision_collector
, cvs_file_items
):
453 self
.revision_collector
= revision_collector
454 self
.cvs_file_items
= cvs_file_items
456 # A map {rev : base_rev} indicating that the text for rev is
457 # stored in CVS as a delta relative to base_rev.
458 self
.base_revisions
= {}
460 # The revision that is stored with its fulltext in CVS (usually
461 # the oldest revision on trunk):
462 self
.head_revision
= None
464 # The first logical revision on trunk (usually '1.1'):
465 self
.revision_1_1
= None
467 # Keep track of the revisions whose revision info has been seen so
468 # far (to avoid repeated revision info blocks):
469 self
.revisions_seen
= set()
471 def set_head_revision(self
, revision
):
472 self
.head_revision
= revision
475 self
, revision
, timestamp
, author
, state
, branches
, next
478 self
.base_revisions
[next
] = revision
480 if is_trunk_revision(revision
):
481 self
.revision_1_1
= revision
483 for branch
in branches
:
484 self
.base_revisions
[branch
] = revision
486 def set_revision_info(self
, revision
, log
, text
):
487 if revision
in self
.revisions_seen
:
488 # One common form of CVS repository corruption is that the
489 # Deltatext block for revision 1.1 appears twice. CollectData
490 # has already warned about this problem; here we can just ignore
494 self
.revisions_seen
.add(revision
)
496 cvs_rev_id
= self
.cvs_file_items
.original_ids
[revision
]
497 if is_trunk_revision(revision
):
498 # On trunk, revisions are encountered in reverse order (1.<N>
499 # ... 1.1) and deltas are inverted. The first text that we see
500 # is the fulltext for the HEAD revision. After that, the text
501 # corresponding to revision 1.N is the delta (1.<N+1> ->
502 # 1.<N>)). We have to invert the deltas here so that we can
503 # read the revisions out in dependency order; that is, for
504 # revision 1.1 we want the fulltext, and for revision 1.<N> we
505 # want the delta (1.<N-1> -> 1.<N>). This means that we can't
506 # compute the delta for a revision until we see its logical
507 # parent. When we finally see revision 1.1 (which is recognized
508 # because it doesn't have a parent), we can record the diff (1.1
509 # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
511 if revision
== self
.head_revision
:
512 # This is HEAD, as fulltext. Initialize the RCSStream so
513 # that we can compute deltas backwards in time.
514 self
._rcs
_stream
= RCSStream(text
)
515 self
._rcs
_stream
_revision
= revision
517 # Any other trunk revision is a backward delta. Apply the
518 # delta to the RCSStream to mutate it to the contents of this
519 # revision, and also to get the reverse delta, which we store
520 # as the forward delta of our child revision.
522 text
= self
._rcs
_stream
.invert_diff(text
)
523 except MalformedDeltaException
, e
:
525 'Malformed RCS delta in %s, revision %s: %s'
526 % (self
.cvs_file_items
.cvs_file
.rcs_path
, revision
, e
)
529 text_record
= DeltaTextRecord(
530 self
.cvs_file_items
.original_ids
[self
._rcs
_stream
_revision
],
533 self
.revision_collector
._writeout
(text_record
, text
)
534 self
._rcs
_stream
_revision
= revision
536 if revision
== self
.revision_1_1
:
537 # This is revision 1.1. Write its fulltext:
538 text_record
= FullTextRecord(cvs_rev_id
)
539 self
.revision_collector
._writeout
(
540 text_record
, self
._rcs
_stream
.get_text()
543 # There will be no more trunk revisions delivered, so free the
546 del self
._rcs
_stream
_revision
549 # On branches, revisions are encountered in logical order
550 # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
551 # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
552 # <BRANCH>.<N>). That's what we need, so just store it.
554 # FIXME: It would be nice to avoid writing out branch deltas
555 # when --trunk-only. (They will be deleted when finish_file()
556 # is called, but if the delta db is in an IndexedDatabase the
557 # deletions won't actually recover any disk space.)
558 text_record
= DeltaTextRecord(
560 self
.cvs_file_items
.original_ids
[self
.base_revisions
[revision
]]
562 self
.revision_collector
._writeout
(text_record
, text
)
567 class InternalRevisionCollector(RevisionCollector
):
568 """The RevisionCollector used by InternalRevisionReader."""
570 def __init__(self
, compress
):
571 RevisionCollector
.__init
__(self
)
572 self
._compress
= compress
574 def register_artifacts(self
, which_pass
):
575 artifact_manager
.register_temp_file(
576 config
.RCS_DELTAS_INDEX_TABLE
, which_pass
578 artifact_manager
.register_temp_file(config
.RCS_DELTAS_STORE
, which_pass
)
579 artifact_manager
.register_temp_file(
580 config
.RCS_TREES_INDEX_TABLE
, which_pass
582 artifact_manager
.register_temp_file(config
.RCS_TREES_STORE
, which_pass
)
585 serializer
= MarshalSerializer()
587 serializer
= CompressingSerializer(serializer
)
588 self
._delta
_db
= IndexedDatabase(
589 artifact_manager
.get_temp_file(config
.RCS_DELTAS_STORE
),
590 artifact_manager
.get_temp_file(config
.RCS_DELTAS_INDEX_TABLE
),
591 DB_OPEN_NEW
, serializer
,
593 primer
= (FullTextRecord
, DeltaTextRecord
)
594 self
._rcs
_trees
= IndexedDatabase(
595 artifact_manager
.get_temp_file(config
.RCS_TREES_STORE
),
596 artifact_manager
.get_temp_file(config
.RCS_TREES_INDEX_TABLE
),
597 DB_OPEN_NEW
, PrimedPickleSerializer(primer
),
600 def _writeout(self
, text_record
, text
):
601 self
.text_record_db
.add(text_record
)
602 self
._delta
_db
[text_record
.id] = text
604 def process_file(self
, cvs_file_items
):
605 """Read revision information for the file described by CVS_FILE_ITEMS.
607 Compute the text record refcounts, discard any records that are
608 unneeded, and store the text records for the file to the
609 _rcs_trees database."""
611 # A map from cvs_rev_id to TextRecord instance:
612 self
.text_record_db
= TextRecordDatabase(self
._delta
_db
, NullDatabase())
614 cvs2svn_rcsparse
.parse(
615 open(cvs_file_items
.cvs_file
.rcs_path
, 'rb'),
616 _Sink(self
, cvs_file_items
),
619 self
.text_record_db
.recompute_refcounts(cvs_file_items
)
620 self
.text_record_db
.free_unused()
621 self
._rcs
_trees
[cvs_file_items
.cvs_file
.id] = self
.text_record_db
622 del self
.text_record_db
625 self
._delta
_db
.close()
626 self
._rcs
_trees
.close()
629 class _KeywordExpander
:
630 """A class whose instances provide substitutions for CVS keywords.
632 This class is used via its __call__() method, which should be called
633 with a match object representing a match for a CVS keyword string.
634 The method returns the replacement for the matched text.
636 The __call__() method works by calling the method with the same name
637 as that of the CVS keyword (converted to lower case).
639 Instances of this class can be passed as the REPL argument to
642 date_fmt_old
= "%Y/%m/%d %H:%M:%S" # CVS 1.11, rcs
643 date_fmt_new
= "%Y-%m-%d %H:%M:%S" # CVS 1.12
645 date_fmt
= date_fmt_new
648 def use_old_date_format(klass
):
649 """Class method to ensure exact compatibility with CVS 1.11
650 output. Use this if you want to verify your conversion and you're
652 klass
.date_fmt
= klass
.date_fmt_old
654 def __init__(self
, cvs_rev
):
655 self
.cvs_rev
= cvs_rev
657 def __call__(self
, match
):
658 return '$%s: %s $' % (
659 match
.group(1), getattr(self
, match
.group(1).lower())(),
663 return Ctx()._metadata
_db
[self
.cvs_rev
.metadata_id
].original_author
666 return time
.strftime(self
.date_fmt
, time
.gmtime(self
.cvs_rev
.timestamp
))
669 return '%s %s %s %s Exp' % (
670 self
.source(), self
.cvs_rev
.rev
, self
.date(), self
.author(),
674 return '%s %s %s %s Exp' % (
675 self
.rcsfile(), self
.cvs_rev
.rev
, self
.date(), self
.author(),
679 # Handle kvl like kv, as a converted repo is supposed to have no
684 # Would need some special handling.
685 return 'not supported by cvs2svn'
688 # Cannot work, as just creating a new symbol does not check out
689 # the revision again.
690 return 'not supported by cvs2svn'
693 return self
.cvs_rev
.cvs_file
.rcs_basename
+ ",v"
696 return self
.cvs_rev
.rev
699 project
= self
.cvs_rev
.cvs_file
.project
700 return '%s/%s%s,v' % (
701 project
.cvs_repository_root
,
703 self
.cvs_rev
.cvs_file
.cvs_path
,
707 # We check out only live revisions.
711 class InternalRevisionReader(RevisionReader
):
712 """A RevisionReader that reads the contents from an own delta store."""
714 _kws
= 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State'
715 _kw_re
= re
.compile(r
'\$(' + _kws
+ r
'):[^$\n]*\$')
716 _kwo_re
= re
.compile(r
'\$(' + _kws
+ r
')(:[^$\n]*)?\$')
718 def __init__(self
, compress
):
719 self
._compress
= compress
721 def register_artifacts(self
, which_pass
):
722 artifact_manager
.register_temp_file(config
.CVS_CHECKOUT_DB
, which_pass
)
723 artifact_manager
.register_temp_file_needed(
724 config
.RCS_DELTAS_STORE
, which_pass
726 artifact_manager
.register_temp_file_needed(
727 config
.RCS_DELTAS_INDEX_TABLE
, which_pass
729 artifact_manager
.register_temp_file_needed(
730 config
.RCS_TREES_STORE
, which_pass
732 artifact_manager
.register_temp_file_needed(
733 config
.RCS_TREES_INDEX_TABLE
, which_pass
737 self
._delta
_db
= IndexedDatabase(
738 artifact_manager
.get_temp_file(config
.RCS_DELTAS_STORE
),
739 artifact_manager
.get_temp_file(config
.RCS_DELTAS_INDEX_TABLE
),
742 self
._delta
_db
.__delitem
__ = lambda id: None
743 self
._tree
_db
= IndexedDatabase(
744 artifact_manager
.get_temp_file(config
.RCS_TREES_STORE
),
745 artifact_manager
.get_temp_file(config
.RCS_TREES_INDEX_TABLE
),
748 serializer
= MarshalSerializer()
750 serializer
= CompressingSerializer(serializer
)
751 self
._co
_db
= Database(
752 artifact_manager
.get_temp_file(config
.CVS_CHECKOUT_DB
),
753 DB_OPEN_NEW
, serializer
,
756 # The set of CVSFile instances whose TextRecords have already been
758 self
._loaded
_files
= set()
760 # A map { CVSFILE : _FileTree } for files that currently have live
762 self
._text
_record
_db
= TextRecordDatabase(self
._delta
_db
, self
._co
_db
)
764 def _get_text_record(self
, cvs_rev
):
765 """Return the TextRecord instance for CVS_REV.
767 If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
770 if cvs_rev
.cvs_file
not in self
._loaded
_files
:
771 for text_record
in self
._tree
_db
[cvs_rev
.cvs_file
.id].itervalues():
772 self
._text
_record
_db
.add(text_record
)
773 self
._loaded
_files
.add(cvs_rev
.cvs_file
)
775 return self
._text
_record
_db
[cvs_rev
.id]
777 def get_content(self
, cvs_rev
):
778 """Check out the text for revision C_REV from the repository.
780 Return the text. If CVS_REV has a property _keyword_handling, use
781 it to determine how to handle RCS keywords in the output:
783 'collapsed' -- collapse keywords
785 'expanded' -- expand keywords
787 'untouched' -- output keywords in the form they are found in
790 Note that $Log$ never actually generates a log (which makes test
791 'requires_cvs()' fail).
793 Revisions may be requested in any order, but if they are not
794 requested in dependency order the checkout database will become
795 very large. Revisions may be skipped. Each revision may be
796 requested only once."""
799 text
= self
._get
_text
_record
(cvs_rev
).checkout(self
._text
_record
_db
)
800 except MalformedDeltaException
, (msg
):
802 'Malformed RCS delta in %s, revision %s: %s'
803 % (cvs_rev
.cvs_file
.rcs_path
, cvs_rev
.rev
, msg
)
806 keyword_handling
= cvs_rev
.get_property('_keyword_handling')
808 if keyword_handling
== 'untouched':
809 # Leave keywords in the form that they were checked in.
811 elif keyword_handling
== 'collapsed':
812 text
= self
._kw
_re
.sub(r
'$\1$', text
)
813 elif keyword_handling
== 'expanded':
814 text
= self
._kwo
_re
.sub(_KeywordExpander(cvs_rev
), text
)
817 'Undefined _keyword_handling property (%r) for %s'
818 % (keyword_handling
, cvs_rev
,)
821 if Ctx().decode_apple_single
:
822 # Insert a filter to decode any files that are in AppleSingle
824 text
= get_maybe_apple_single(text
)
826 eol_fix
= cvs_rev
.get_property('_eol_fix')
828 text
= canonicalize_eol(text
, eol_fix
)
833 self
._text
_record
_db
.log_leftovers()
835 del self
._text
_record
_db
836 self
._delta
_db
.close()
837 self
._tree
_db
.close()