1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains classes that implement the --use-internal-co option.
19 The idea is to patch up the revisions' contents incrementally, thus
20 avoiding the huge number of process spawns and the O(n^2) overhead of
23 InternalRevisionRecorder saves the RCS deltas and RCS revision trees
24 to databases. Notably, deltas from the trunk need to be reversed, as
25 CVS stores them so they apply from HEAD backwards.
27 InternalRevisionExcluder copies the revision trees to a new database,
28 omitting excluded branches.
30 InternalRevisionReader produces the revisions' contents on demand. To
31 generate the text for a typical revision, we need the revision's delta
32 text plus the fulltext of the previous revision. Therefore, we
33 maintain a checkout database containing a copy of the fulltext of any
34 revision for which subsequent revisions still need to be retrieved.
35 It is crucial to remove text from this database as soon as it is no
36 longer needed, to prevent it from growing enormous.
38 There are two reasons that the text from a revision can be needed: (1)
39 because the revision itself still needs to be output to a dumpfile;
40 (2) because another revision needs it as the base of its delta. We
41 maintain a reference count for each revision, which includes *both*
42 possibilities. The first time a revision's text is needed, it is
43 generated by applying the revision's deltatext to the previous
44 revision's fulltext, and the resulting fulltext is stored in the
45 checkout database. Each time a revision's fulltext is retrieved, its
46 reference count is decremented. When the reference count goes to
47 zero, then the fulltext is deleted from the checkout database.
49 The administrative data for managing this consists of one TextRecord
50 entry for each revision. Each TextRecord has an id, which is the same
51 id as used for the corresponding CVSRevision instance. It also
52 maintains a count of the times it is expected to be retrieved.
53 TextRecords come in several varieties:
55 FullTextRecord -- Used for revisions whose fulltext is contained
56 directly in the RCS file, and therefore available during
57 CollectRevsPass (i.e., typically revision 1.1 of each file).
59 DeltaTextRecord -- Used for revisions that are defined via a delta
60 relative to some other TextRecord. These records record the id of
61 the TextRecord that holds the base text against which the delta is
62 defined. When the text for a DeltaTextRecord is retrieved, the
63 DeltaTextRecord instance is deleted and a CheckedOutTextRecord
64 instance is created to take its place.
66 CheckedOutTextRecord -- Used during OutputPass for a revision that
67 started out as a DeltaTextRecord, but has already been retrieved
68 (and therefore its fulltext is stored in the checkout database).
70 While a file is being processed during CollectRevsPass, the fulltext
71 and deltas are stored to the delta database, and TextRecord instances
72 are created to keep track of things. The reference counts are all
75 After CollectRevsPass has done any preliminary tree mangling, its
76 _FileDataCollector.parse_completed(), method calls
77 RevisionRecorder.finish_file(), passing it the CVSFileItems instance
78 that describes the revisions in the file. At this point the reference
79 counts for the file's TextRecords are updated: each record referred to
80 by a delta has its refcount incremented, and each record that
81 corresponds to a non-delete CVSRevision is incremented. After that,
82 any records with refcount==0 are removed. When one record is removed,
83 that can cause another record's reference count to go to zero and be
84 removed too, recursively. When a TextRecord is deleted at this stage,
85 its deltatext is also deleted from the delta database.
87 In FilterSymbolsPass, the exact same procedure (described in the
88 previous paragraph) is repeated, but this time using the CVSFileItems
89 after it has been updated for excluded symbols, symbol
90 preferred-parent grafting, etc."""
93 from cStringIO
import StringIO
97 from cvs2svn_lib
import config
98 from cvs2svn_lib
.common
import DB_OPEN_NEW
99 from cvs2svn_lib
.common
import DB_OPEN_READ
100 from cvs2svn_lib
.common
import warning_prefix
101 from cvs2svn_lib
.common
import FatalError
102 from cvs2svn_lib
.common
import InternalError
103 from cvs2svn_lib
.context
import Ctx
104 from cvs2svn_lib
.log
import Log
105 from cvs2svn_lib
.artifact_manager
import artifact_manager
106 from cvs2svn_lib
.symbol
import Trunk
107 from cvs2svn_lib
.cvs_item
import CVSRevisionModification
108 from cvs2svn_lib
.database
import Database
109 from cvs2svn_lib
.database
import IndexedDatabase
110 from cvs2svn_lib
.rcs_stream
import RCSStream
111 from cvs2svn_lib
.rcs_stream
import MalformedDeltaException
112 from cvs2svn_lib
.revision_manager
import RevisionRecorder
113 from cvs2svn_lib
.revision_manager
import RevisionExcluder
114 from cvs2svn_lib
.revision_manager
import RevisionReader
115 from cvs2svn_lib
.serializer
import MarshalSerializer
116 from cvs2svn_lib
.serializer
import CompressingSerializer
117 from cvs2svn_lib
.serializer
import PrimedPickleSerializer
120 class TextRecord(object):
121 """Bookkeeping data for the text of a single CVSRevision."""
123 __slots__
= ['id', 'refcount']
125 def __init__(self
, id):
126 # The cvs_rev_id of the revision whose text this is.
129 # The number of times that the text of this revision will be
133 def __getstate__(self
):
134 return (self
.id, self
.refcount
,)
136 def __setstate__(self
, state
):
137 (self
.id, self
.refcount
,) = state
139 def increment_dependency_refcounts(self
, text_record_db
):
140 """Increment the refcounts of any records that this one depends on."""
144 def decrement_refcount(self
, text_record_db
):
145 """Decrement the number of times our text still has to be checked out.
147 If the reference count goes to zero, call discard()."""
150 if self
.refcount
== 0:
151 text_record_db
.discard(self
.id)
153 def checkout(self
, text_record_db
):
154 """Workhorse of the checkout process.
156 Return the text for this revision, decrement our reference count,
157 and update the databases depending on whether there will be future
160 raise NotImplementedError()
162 def free(self
, text_record_db
):
163 """This instance will never again be checked out; free it.
165 Also free any associated resources and decrement the refcounts of
166 any other TextRecords that this one depends on."""
168 raise NotImplementedError()
171 class FullTextRecord(TextRecord
):
174 def __getstate__(self
):
175 return (self
.id, self
.refcount
,)
177 def __setstate__(self
, state
):
178 (self
.id, self
.refcount
,) = state
180 def checkout(self
, text_record_db
):
181 text
= text_record_db
.delta_db
[self
.id]
182 self
.decrement_refcount(text_record_db
)
185 def free(self
, text_record_db
):
186 del text_record_db
.delta_db
[self
.id]
189 return 'FullTextRecord(%x, %d)' % (self
.id, self
.refcount
,)
192 class DeltaTextRecord(TextRecord
):
193 __slots__
= ['pred_id']
195 def __init__(self
, id, pred_id
):
196 TextRecord
.__init
__(self
, id)
198 # The cvs_rev_id of the revision relative to which this delta is
200 self
.pred_id
= pred_id
202 def __getstate__(self
):
203 return (self
.id, self
.refcount
, self
.pred_id
,)
205 def __setstate__(self
, state
):
206 (self
.id, self
.refcount
, self
.pred_id
,) = state
208 def increment_dependency_refcounts(self
, text_record_db
):
209 text_record_db
[self
.pred_id
].refcount
+= 1
211 def checkout(self
, text_record_db
):
212 base_text
= text_record_db
[self
.pred_id
].checkout(text_record_db
)
213 co
= RCSStream(base_text
)
214 delta_text
= text_record_db
.delta_db
[self
.id]
215 co
.apply_diff(delta_text
)
219 if self
.refcount
== 0:
220 # This text will never be needed again; just delete ourselves
221 # without ever having stored the fulltext to the checkout
223 del text_record_db
[self
.id]
225 # Store a new CheckedOutTextRecord in place of ourselves:
226 text_record_db
.checkout_db
['%x' % self
.id] = text
227 new_text_record
= CheckedOutTextRecord(self
.id)
228 new_text_record
.refcount
= self
.refcount
229 text_record_db
.replace(new_text_record
)
232 def free(self
, text_record_db
):
233 del text_record_db
.delta_db
[self
.id]
234 text_record_db
[self
.pred_id
].decrement_refcount(text_record_db
)
237 return 'DeltaTextRecord(%x -> %x, %d)' \
238 % (self
.pred_id
, self
.id, self
.refcount
,)
241 class CheckedOutTextRecord(TextRecord
):
244 def __getstate__(self
):
245 return (self
.id, self
.refcount
,)
247 def __setstate__(self
, state
):
248 (self
.id, self
.refcount
,) = state
250 def checkout(self
, text_record_db
):
251 text
= text_record_db
.checkout_db
['%x' % self
.id]
252 self
.decrement_refcount(text_record_db
)
255 def free(self
, text_record_db
):
256 del text_record_db
.checkout_db
['%x' % self
.id]
259 return 'CheckedOutTextRecord(%x, %d)' % (self
.id, self
.refcount
,)
262 class NullDatabase(object):
263 """A do-nothing database that can be used with TextRecordDatabase.
265 Use this when you don't actually want to allow anything to be
268 def __delitem__(self
, id):
272 class TextRecordDatabase
:
273 """Holds the TextRecord instances that are currently live.
275 During CollectRevsPass and FilterSymbolsPass, files are processed
276 one by one and a new TextRecordDatabase instance is used for each
277 file. During OutputPass, a single TextRecordDatabase instance is
278 used for the duration of OutputPass; individual records are added
279 and removed when they are active."""
281 def __init__(self
, delta_db
, checkout_db
):
282 # A map { cvs_rev_id -> TextRecord }.
283 self
.text_records
= {}
285 # A database-like object using cvs_rev_ids as keys and containing
286 # fulltext/deltatext strings as values. Its __getitem__() method
287 # is used to retrieve deltas when they are needed, and its
288 # __delitem__() method is used to delete deltas when they can be
289 # freed. The modifiability of the delta database varies from pass
290 # to pass, so the object stored here varies as well:
292 # CollectRevsPass: a fully-functional IndexedDatabase. This
293 # allows deltas that will not be needed to be deleted.
295 # FilterSymbolsPass: a NullDatabase. The delta database cannot be
296 # modified during this pass, and we have no need to retrieve
297 # deltas, so we just use a dummy object here.
299 # OutputPass: a disabled IndexedDatabase. During this pass we
300 # need to retrieve deltas, but we are not allowed to modify
301 # the delta database. So we use an IndexedDatabase whose
302 # __del__() method has been disabled to do nothing.
303 self
.delta_db
= delta_db
305 # A database-like object using cvs_rev_ids as keys and containing
306 # fulltext strings as values. This database is only set during
308 self
.checkout_db
= checkout_db
310 # If this is set to a list, then the list holds the ids of
311 # text_records that have to be deleted; when discard() is called,
312 # it adds the requested id to the list but does not delete it. If
313 # this member is set to None, then text_records are deleted
314 # immediately when discard() is called.
315 self
.deferred_deletes
= None
317 def __getstate__(self
):
318 return (self
.text_records
.values(),)
320 def __setstate__(self
, state
):
321 (text_records
,) = state
322 self
.text_records
= {}
323 for text_record
in text_records
:
324 self
.add(text_record
)
325 self
.delta_db
= NullDatabase()
326 self
.checkout_db
= NullDatabase()
327 self
.deferred_deletes
= None
329 def add(self
, text_record
):
330 """Add TEXT_RECORD to our database.
332 There must not already be a record with the same id."""
334 assert not self
.text_records
.has_key(text_record
.id)
336 self
.text_records
[text_record
.id] = text_record
338 def __getitem__(self
, id):
339 return self
.text_records
[id]
341 def __delitem__(self
, id):
342 """Free the record with the specified ID."""
344 del self
.text_records
[id]
346 def replace(self
, text_record
):
347 """Store TEXT_RECORD in place of the existing record with the same id.
349 Do not do anything with the old record."""
351 assert self
.text_records
.has_key(text_record
.id)
352 self
.text_records
[text_record
.id] = text_record
354 def discard(self
, *ids
):
355 """The text records with IDS are no longer needed; discard them.
357 This involves calling their free() methods and also removing them
360 If SELF.deferred_deletes is not None, then the ids to be deleted
361 are added to the list instead of deleted immediately. This
362 mechanism is to prevent a stack overflow from the avalanche of
363 deletes that can result from deleting a long chain of revisions."""
365 if self
.deferred_deletes
is None:
366 # This is an outer-level delete.
367 self
.deferred_deletes
= list(ids
)
368 while self
.deferred_deletes
:
369 id = self
.deferred_deletes
.pop()
370 text_record
= self
[id]
371 if text_record
.refcount
!= 0:
373 'TextRecordDatabase.discard(%s) called with refcount = %d'
374 % (text_record
, text_record
.refcount
,)
376 # This call might cause other text_record ids to be added to
377 # self.deferred_deletes:
378 text_record
.free(self
)
380 self
.deferred_deletes
= None
382 self
.deferred_deletes
.extend(ids
)
384 def itervalues(self
):
385 return self
.text_records
.itervalues()
387 def recompute_refcounts(self
, cvs_file_items
):
388 """Recompute the refcounts of the contained TextRecords.
390 Use CVS_FILE_ITEMS to determine which records will be needed by
393 # First clear all of the refcounts:
394 for text_record
in self
.itervalues():
395 text_record
.refcount
= 0
397 # Now increment the reference count of records that are needed as
398 # the source of another record's deltas:
399 for text_record
in self
.itervalues():
400 text_record
.increment_dependency_refcounts(self
.text_records
)
402 # Now increment the reference count of records that will be needed
404 for lod_items
in cvs_file_items
.iter_lods():
405 for cvs_rev
in lod_items
.cvs_revisions
:
406 if isinstance(cvs_rev
, CVSRevisionModification
):
407 self
[cvs_rev
.id].refcount
+= 1
409 def free_unused(self
):
410 """Free any TextRecords whose reference counts are zero."""
412 # The deletion of some of these text records might cause others to
413 # be unused, in which case they will be deleted automatically.
414 # But since the initially-unused records are not referred to by
415 # any others, we don't have to be afraid that they will be deleted
416 # before we get to them. But it *is* crucial that we create the
417 # whole unused list before starting the loop.
421 for text_record
in self
.itervalues()
422 if text_record
.refcount
== 0
425 self
.discard(*unused
)
427 def log_leftovers(self
):
428 """If any TextRecords still exist, log them."""
430 if self
.text_records
:
432 "%s: internal problem: leftover revisions in the checkout cache:"
434 for text_record
in self
.itervalues():
435 Log().warn(' %s' % (text_record
,))
438 """Debugging output of the current contents of the TextRecordDatabase."""
440 retval
= ['TextRecordDatabase:']
441 for text_record
in self
.itervalues():
442 retval
.append(' %s' % (text_record
,))
443 return '\n'.join(retval
)
446 class InternalRevisionRecorder(RevisionRecorder
):
447 """A RevisionRecorder that reconstructs the fulltext internally."""
449 def __init__(self
, compress
):
450 RevisionRecorder
.__init
__(self
)
451 self
._compress
= compress
453 def register_artifacts(self
, which_pass
):
454 artifact_manager
.register_temp_file(
455 config
.RCS_DELTAS_INDEX_TABLE
, which_pass
457 artifact_manager
.register_temp_file(config
.RCS_DELTAS_STORE
, which_pass
)
458 artifact_manager
.register_temp_file(
459 config
.RCS_TREES_INDEX_TABLE
, which_pass
461 artifact_manager
.register_temp_file(config
.RCS_TREES_STORE
, which_pass
)
464 ser
= MarshalSerializer()
466 ser
= CompressingSerializer(ser
)
467 self
._rcs
_deltas
= IndexedDatabase(
468 artifact_manager
.get_temp_file(config
.RCS_DELTAS_STORE
),
469 artifact_manager
.get_temp_file(config
.RCS_DELTAS_INDEX_TABLE
),
471 primer
= (FullTextRecord
, DeltaTextRecord
)
472 self
._rcs
_trees
= IndexedDatabase(
473 artifact_manager
.get_temp_file(config
.RCS_TREES_STORE
),
474 artifact_manager
.get_temp_file(config
.RCS_TREES_INDEX_TABLE
),
475 DB_OPEN_NEW
, PrimedPickleSerializer(primer
))
477 def start_file(self
, cvs_file_items
):
478 # A map from cvs_rev_id to TextRecord instance:
479 self
.text_record_db
= TextRecordDatabase(self
._rcs
_deltas
, NullDatabase())
481 def record_text(self
, cvs_rev
, log
, text
):
482 if isinstance(cvs_rev
.lod
, Trunk
):
483 # On trunk, revisions are encountered in reverse order (1.<N>
484 # ... 1.1) and deltas are inverted. The first text that we see
485 # is the fulltext for the HEAD revision. After that, the text
486 # corresponding to revision 1.N is the delta (1.<N+1> ->
487 # 1.<N>)). We have to invert the deltas here so that we can
488 # read the revisions out in dependency order; that is, for
489 # revision 1.1 we want the fulltext, and for revision 1.<N> we
490 # want the delta (1.<N-1> -> 1.<N>). This means that we can't
491 # compute the delta for a revision until we see its logical
492 # parent. When we finally see revision 1.1 (which is recognized
493 # because it doesn't have a parent), we can record the diff (1.1
494 # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
496 if cvs_rev
.next_id
is None:
497 # This is HEAD, as fulltext. Initialize the RCSStream so
498 # that we can compute deltas backwards in time.
499 self
._stream
= RCSStream(text
)
501 # Any other trunk revision is a backward delta. Apply the
502 # delta to the RCSStream to mutate it to the contents of this
503 # revision, and also to get the reverse delta, which we store
504 # as the forward delta of our child revision.
506 text
= self
._stream
.invert_diff(text
)
507 except MalformedDeltaException
, (msg
):
508 Log().error('Malformed RCS delta in %s, revision %s: %s'
509 % (cvs_rev
.cvs_file
.get_filename(), cvs_rev
.rev
,
512 text_record
= DeltaTextRecord(cvs_rev
.next_id
, cvs_rev
.id)
513 self
._writeout
(text_record
, text
)
515 if cvs_rev
.prev_id
is None:
516 # This is revision 1.1. Write its fulltext:
517 text_record
= FullTextRecord(cvs_rev
.id)
518 self
._writeout
(text_record
, self
._stream
.get_text())
520 # There will be no more trunk revisions delivered, so free the
525 # On branches, revisions are encountered in logical order
526 # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
527 # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
528 # <BRANCH>.<N>). That's what we need, so just store it.
530 # FIXME: It would be nice to avoid writing out branch deltas
531 # when --trunk-only. (They will be deleted when finish_file()
532 # is called, but if the delta db is in an IndexedDatabase the
533 # deletions won't actually recover any disk space.)
534 text_record
= DeltaTextRecord(cvs_rev
.id, cvs_rev
.prev_id
)
535 self
._writeout
(text_record
, text
)
539 def _writeout(self
, text_record
, text
):
540 self
.text_record_db
.add(text_record
)
541 self
._rcs
_deltas
[text_record
.id] = text
543 def finish_file(self
, cvs_file_items
):
544 """Finish processing of the current file.
546 Compute the initial text record refcounts, discard any records
547 that are unneeded, and store the text records for the file to the
548 _rcs_trees database."""
550 self
.text_record_db
.recompute_refcounts(cvs_file_items
)
551 self
.text_record_db
.free_unused()
552 self
._rcs
_trees
[cvs_file_items
.cvs_file
.id] = self
.text_record_db
553 del self
.text_record_db
556 self
._rcs
_deltas
.close()
557 self
._rcs
_trees
.close()
560 class InternalRevisionExcluder(RevisionExcluder
):
561 """The RevisionExcluder used by InternalRevisionReader."""
563 def register_artifacts(self
, which_pass
):
564 artifact_manager
.register_temp_file_needed(
565 config
.RCS_TREES_STORE
, which_pass
567 artifact_manager
.register_temp_file_needed(
568 config
.RCS_TREES_INDEX_TABLE
, which_pass
570 artifact_manager
.register_temp_file(
571 config
.RCS_TREES_FILTERED_STORE
, which_pass
573 artifact_manager
.register_temp_file(
574 config
.RCS_TREES_FILTERED_INDEX_TABLE
, which_pass
578 self
._tree
_db
= IndexedDatabase(
579 artifact_manager
.get_temp_file(config
.RCS_TREES_STORE
),
580 artifact_manager
.get_temp_file(config
.RCS_TREES_INDEX_TABLE
),
582 primer
= (FullTextRecord
, DeltaTextRecord
)
583 self
._new
_tree
_db
= IndexedDatabase(
584 artifact_manager
.get_temp_file(config
.RCS_TREES_FILTERED_STORE
),
585 artifact_manager
.get_temp_file(config
.RCS_TREES_FILTERED_INDEX_TABLE
),
586 DB_OPEN_NEW
, PrimedPickleSerializer(primer
))
588 def process_file(self
, cvs_file_items
):
589 text_record_db
= self
._tree
_db
[cvs_file_items
.cvs_file
.id]
590 text_record_db
.recompute_refcounts(cvs_file_items
)
591 text_record_db
.free_unused()
592 self
._new
_tree
_db
[cvs_file_items
.cvs_file
.id] = text_record_db
595 self
._tree
_db
.close()
596 self
._new
_tree
_db
.close()
599 class _KeywordExpander
:
600 """A class whose instances provide substitutions for CVS keywords.
602 This class is used via its __call__() method, which should be called
603 with a match object representing a match for a CVS keyword string.
604 The method returns the replacement for the matched text.
606 The __call__() method works by calling the method with the same name
607 as that of the CVS keyword (converted to lower case).
609 Instances of this class can be passed as the REPL argument to
612 date_fmt_old
= "%Y/%m/%d %H:%M:%S" # CVS 1.11, rcs
613 date_fmt_new
= "%Y-%m-%d %H:%M:%S" # CVS 1.12
615 date_fmt
= date_fmt_new
618 def use_old_date_format(klass
):
619 """Class method to ensure exact compatibility with CVS 1.11
620 output. Use this if you want to verify your conversion and you're
622 klass
.date_fmt
= klass
.date_fmt_old
624 def __init__(self
, cvs_rev
):
625 self
.cvs_rev
= cvs_rev
627 def __call__(self
, match
):
628 return '$%s: %s $' % \
629 (match
.group(1), getattr(self
, match
.group(1).lower())(),)
632 return Ctx()._metadata
_db
[self
.cvs_rev
.metadata_id
].original_author
635 return time
.strftime(self
.date_fmt
,
636 time
.gmtime(self
.cvs_rev
.timestamp
))
639 return '%s %s %s %s Exp' % \
640 (self
.source(), self
.cvs_rev
.rev
, self
.date(), self
.author())
643 return '%s %s %s %s Exp' % \
644 (self
.rcsfile(), self
.cvs_rev
.rev
, self
.date(), self
.author())
647 # Handle kvl like kv, as a converted repo is supposed to have no
652 # Would need some special handling.
653 return 'not supported by cvs2svn'
656 # Cannot work, as just creating a new symbol does not check out
657 # the revision again.
658 return 'not supported by cvs2svn'
661 return self
.cvs_rev
.cvs_file
.basename
+ ",v"
664 return self
.cvs_rev
.rev
667 project
= self
.cvs_rev
.cvs_file
.project
668 return project
.cvs_repository_root
+ '/' + project
.cvs_module
+ \
669 self
.cvs_rev
.cvs_file
.cvs_path
+ ",v"
672 # We check out only live revisions.
676 class InternalRevisionReader(RevisionReader
):
677 """A RevisionReader that reads the contents from an own delta store."""
679 _kws
= 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State'
680 _kw_re
= re
.compile(r
'\$(' + _kws
+ r
'):[^$\n]*\$')
681 _kwo_re
= re
.compile(r
'\$(' + _kws
+ r
')(:[^$\n]*)?\$')
683 def __init__(self
, compress
):
684 self
._compress
= compress
686 def register_artifacts(self
, which_pass
):
687 artifact_manager
.register_temp_file(config
.CVS_CHECKOUT_DB
, which_pass
)
688 artifact_manager
.register_temp_file_needed(
689 config
.RCS_DELTAS_STORE
, which_pass
691 artifact_manager
.register_temp_file_needed(
692 config
.RCS_DELTAS_INDEX_TABLE
, which_pass
694 artifact_manager
.register_temp_file_needed(
695 config
.RCS_TREES_FILTERED_STORE
, which_pass
697 artifact_manager
.register_temp_file_needed(
698 config
.RCS_TREES_FILTERED_INDEX_TABLE
, which_pass
702 self
._delta
_db
= IndexedDatabase(
703 artifact_manager
.get_temp_file(config
.RCS_DELTAS_STORE
),
704 artifact_manager
.get_temp_file(config
.RCS_DELTAS_INDEX_TABLE
),
706 self
._delta
_db
.__delitem
__ = lambda id: None
707 self
._tree
_db
= IndexedDatabase(
708 artifact_manager
.get_temp_file(config
.RCS_TREES_FILTERED_STORE
),
709 artifact_manager
.get_temp_file(config
.RCS_TREES_FILTERED_INDEX_TABLE
),
711 ser
= MarshalSerializer()
713 ser
= CompressingSerializer(ser
)
714 self
._co
_db
= Database(
715 artifact_manager
.get_temp_file(config
.CVS_CHECKOUT_DB
), DB_OPEN_NEW
,
718 # The set of CVSFile instances whose TextRecords have already been
720 self
._loaded
_files
= set()
722 # A map { CVSFILE : _FileTree } for files that currently have live
724 self
._text
_record
_db
= TextRecordDatabase(self
._delta
_db
, self
._co
_db
)
726 def _get_text_record(self
, cvs_rev
):
727 """Return the TextRecord instance for CVS_REV.
729 If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
732 if cvs_rev
.cvs_file
not in self
._loaded
_files
:
733 for text_record
in self
._tree
_db
[cvs_rev
.cvs_file
.id].itervalues():
734 self
._text
_record
_db
.add(text_record
)
735 self
._loaded
_files
.add(cvs_rev
.cvs_file
)
737 return self
._text
_record
_db
[cvs_rev
.id]
739 def get_content_stream(self
, cvs_rev
, suppress_keyword_substitution
=False):
740 """Check out the text for revision C_REV from the repository.
742 Return the text wrapped in a readable file object. If
743 SUPPRESS_KEYWORD_SUBSTITUTION is True, any RCS keywords will be
744 _un_expanded prior to returning the file content. Note that $Log$
745 never actually generates a log (which makes test 'requires_cvs()'
748 Revisions may be requested in any order, but if they are not
749 requested in dependency order the checkout database will become
750 very large. Revisions may be skipped. Each revision may be
751 requested only once."""
754 text
= self
._get
_text
_record
(cvs_rev
).checkout(self
._text
_record
_db
)
755 except MalformedDeltaException
, (msg
):
756 raise FatalError('Malformed RCS delta in %s, revision %s: %s'
757 % (cvs_rev
.cvs_file
.get_filename(), cvs_rev
.rev
, msg
))
758 if cvs_rev
.cvs_file
.mode
!= 'b' and cvs_rev
.cvs_file
.mode
!= 'o':
759 if suppress_keyword_substitution
or cvs_rev
.cvs_file
.mode
== 'k':
760 text
= self
._kw
_re
.sub(r
'$\1$', text
)
762 text
= self
._kwo
_re
.sub(_KeywordExpander(cvs_rev
), text
)
764 return StringIO(text
)
767 self
._text
_record
_db
.log_leftovers()
769 del self
._text
_record
_db
770 self
._delta
_db
.close()
771 self
._tree
_db
.close()