1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains classes that implement the --use-internal-co option.
19 The idea is to patch up the revisions' contents incrementally, thus
20 avoiding the huge number of process spawns and the O(n^2) overhead of
23 InternalRevisionCollector saves the RCS deltas and RCS revision trees
24 to databases. Notably, deltas from the trunk need to be reversed, as
25 CVS stores them so they apply from HEAD backwards.
27 InternalRevisionReader produces the revisions' contents on demand. To
28 generate the text for a typical revision, we need the revision's delta
29 text plus the fulltext of the previous revision. Therefore, we
30 maintain a checkout database containing a copy of the fulltext of any
31 revision for which subsequent revisions still need to be retrieved.
32 It is crucial to remove text from this database as soon as it is no
33 longer needed, to prevent it from growing enormous.
35 There are two reasons that the text from a revision can be needed: (1)
36 because the revision itself still needs to be output to a dumpfile;
37 (2) because another revision needs it as the base of its delta. We
38 maintain a reference count for each revision, which includes *both*
39 possibilities. The first time a revision's text is needed, it is
40 generated by applying the revision's deltatext to the previous
41 revision's fulltext, and the resulting fulltext is stored in the
42 checkout database. Each time a revision's fulltext is retrieved, its
43 reference count is decremented. When the reference count goes to
44 zero, then the fulltext is deleted from the checkout database.
46 The administrative data for managing this consists of one TextRecord
47 entry for each revision. Each TextRecord has an id, which is the same
48 id as used for the corresponding CVSRevision instance. It also
49 maintains a count of the times it is expected to be retrieved.
50 TextRecords come in several varieties:
52 FullTextRecord -- Used for revisions whose fulltext is contained
53 directly in the RCS file, and therefore available during
54 CollectRevsPass (i.e., typically revision 1.1 of each file).
56 DeltaTextRecord -- Used for revisions that are defined via a delta
57 relative to some other TextRecord. These records record the id of
58 the TextRecord that holds the base text against which the delta is
59 defined. When the text for a DeltaTextRecord is retrieved, the
60 DeltaTextRecord instance is deleted and a CheckedOutTextRecord
61 instance is created to take its place.
63 CheckedOutTextRecord -- Used during OutputPass for a revision that
64 started out as a DeltaTextRecord, but has already been retrieved
65 (and therefore its fulltext is stored in the checkout database).
67 While a file is being processed during CollectRevsPass, the fulltext
68 and deltas are stored to the delta database, and TextRecord instances
69 are created to keep track of things. The reference counts are all
72 After CollectRevsPass has done any preliminary tree mangling, its
73 _FileDataCollector.parse_completed(), method calls
74 RevisionRecorder.finish_file(), passing it the CVSFileItems instance
75 that describes the revisions in the file. At this point the reference
76 counts for the file's TextRecords are updated: each record referred to
77 by a delta has its refcount incremented, and each record that
78 corresponds to a non-delete CVSRevision is incremented. After that,
79 any records with refcount==0 are removed. When one record is removed,
80 that can cause another record's reference count to go to zero and be
81 removed too, recursively. When a TextRecord is deleted at this stage,
82 its deltatext is also deleted from the delta database.
84 In FilterSymbolsPass, the exact same procedure (described in the
85 previous paragraph) is repeated, but this time using the CVSFileItems
86 after it has been updated for excluded symbols, symbol
87 preferred-parent grafting, etc."""
90 from cStringIO
import StringIO
94 from cvs2svn_lib
import config
95 from cvs2svn_lib
.common
import DB_OPEN_NEW
96 from cvs2svn_lib
.common
import DB_OPEN_READ
97 from cvs2svn_lib
.common
import warning_prefix
98 from cvs2svn_lib
.common
import FatalError
99 from cvs2svn_lib
.common
import InternalError
100 from cvs2svn_lib
.common
import is_trunk_revision
101 from cvs2svn_lib
.context
import Ctx
102 from cvs2svn_lib
.log
import Log
103 from cvs2svn_lib
.artifact_manager
import artifact_manager
104 from cvs2svn_lib
.symbol
import Trunk
105 from cvs2svn_lib
.cvs_item
import CVSRevisionModification
106 from cvs2svn_lib
.database
import Database
107 from cvs2svn_lib
.database
import IndexedDatabase
108 from cvs2svn_lib
.rcs_stream
import RCSStream
109 from cvs2svn_lib
.rcs_stream
import MalformedDeltaException
110 from cvs2svn_lib
.revision_manager
import RevisionCollector
111 from cvs2svn_lib
.revision_manager
import RevisionReader
112 from cvs2svn_lib
.serializer
import MarshalSerializer
113 from cvs2svn_lib
.serializer
import CompressingSerializer
114 from cvs2svn_lib
.serializer
import PrimedPickleSerializer
116 import cvs2svn_rcsparse
119 class TextRecord(object):
120 """Bookkeeping data for the text of a single CVSRevision."""
122 __slots__
= ['id', 'refcount']
124 def __init__(self
, id):
125 # The cvs_rev_id of the revision whose text this is.
128 # The number of times that the text of this revision will be
132 def __getstate__(self
):
133 return (self
.id, self
.refcount
,)
135 def __setstate__(self
, state
):
136 (self
.id, self
.refcount
,) = state
138 def increment_dependency_refcounts(self
, text_record_db
):
139 """Increment the refcounts of any records that this one depends on."""
143 def decrement_refcount(self
, text_record_db
):
144 """Decrement the number of times our text still has to be checked out.
146 If the reference count goes to zero, call discard()."""
149 if self
.refcount
== 0:
150 text_record_db
.discard(self
.id)
152 def checkout(self
, text_record_db
):
153 """Workhorse of the checkout process.
155 Return the text for this revision, decrement our reference count,
156 and update the databases depending on whether there will be future
159 raise NotImplementedError()
161 def free(self
, text_record_db
):
162 """This instance will never again be checked out; free it.
164 Also free any associated resources and decrement the refcounts of
165 any other TextRecords that this one depends on."""
167 raise NotImplementedError()
170 class FullTextRecord(TextRecord
):
173 def __getstate__(self
):
174 return (self
.id, self
.refcount
,)
176 def __setstate__(self
, state
):
177 (self
.id, self
.refcount
,) = state
179 def checkout(self
, text_record_db
):
180 text
= text_record_db
.delta_db
[self
.id]
181 self
.decrement_refcount(text_record_db
)
184 def free(self
, text_record_db
):
185 del text_record_db
.delta_db
[self
.id]
188 return 'FullTextRecord(%x, %d)' % (self
.id, self
.refcount
,)
191 class DeltaTextRecord(TextRecord
):
192 __slots__
= ['pred_id']
194 def __init__(self
, id, pred_id
):
195 TextRecord
.__init
__(self
, id)
197 # The cvs_rev_id of the revision relative to which this delta is
199 self
.pred_id
= pred_id
201 def __getstate__(self
):
202 return (self
.id, self
.refcount
, self
.pred_id
,)
204 def __setstate__(self
, state
):
205 (self
.id, self
.refcount
, self
.pred_id
,) = state
207 def increment_dependency_refcounts(self
, text_record_db
):
208 text_record_db
[self
.pred_id
].refcount
+= 1
210 def checkout(self
, text_record_db
):
211 base_text
= text_record_db
[self
.pred_id
].checkout(text_record_db
)
212 co
= RCSStream(base_text
)
213 delta_text
= text_record_db
.delta_db
[self
.id]
214 co
.apply_diff(delta_text
)
218 if self
.refcount
== 0:
219 # This text will never be needed again; just delete ourselves
220 # without ever having stored the fulltext to the checkout
222 del text_record_db
[self
.id]
224 # Store a new CheckedOutTextRecord in place of ourselves:
225 text_record_db
.checkout_db
['%x' % self
.id] = text
226 new_text_record
= CheckedOutTextRecord(self
.id)
227 new_text_record
.refcount
= self
.refcount
228 text_record_db
.replace(new_text_record
)
231 def free(self
, text_record_db
):
232 del text_record_db
.delta_db
[self
.id]
233 text_record_db
[self
.pred_id
].decrement_refcount(text_record_db
)
236 return 'DeltaTextRecord(%x -> %x, %d)' \
237 % (self
.pred_id
, self
.id, self
.refcount
,)
240 class CheckedOutTextRecord(TextRecord
):
243 def __getstate__(self
):
244 return (self
.id, self
.refcount
,)
246 def __setstate__(self
, state
):
247 (self
.id, self
.refcount
,) = state
249 def checkout(self
, text_record_db
):
250 text
= text_record_db
.checkout_db
['%x' % self
.id]
251 self
.decrement_refcount(text_record_db
)
254 def free(self
, text_record_db
):
255 del text_record_db
.checkout_db
['%x' % self
.id]
258 return 'CheckedOutTextRecord(%x, %d)' % (self
.id, self
.refcount
,)
261 class NullDatabase(object):
262 """A do-nothing database that can be used with TextRecordDatabase.
264 Use this when you don't actually want to allow anything to be
267 def __delitem__(self
, id):
271 class TextRecordDatabase
:
272 """Holds the TextRecord instances that are currently live.
274 During CollectRevsPass and FilterSymbolsPass, files are processed
275 one by one and a new TextRecordDatabase instance is used for each
276 file. During OutputPass, a single TextRecordDatabase instance is
277 used for the duration of OutputPass; individual records are added
278 and removed when they are active."""
280 def __init__(self
, delta_db
, checkout_db
):
281 # A map { cvs_rev_id -> TextRecord }.
282 self
.text_records
= {}
284 # A database-like object using cvs_rev_ids as keys and containing
285 # fulltext/deltatext strings as values. Its __getitem__() method
286 # is used to retrieve deltas when they are needed, and its
287 # __delitem__() method is used to delete deltas when they can be
288 # freed. The modifiability of the delta database varies from pass
289 # to pass, so the object stored here varies as well:
291 # CollectRevsPass: a fully-functional IndexedDatabase. This
292 # allows deltas that will not be needed to be deleted.
294 # FilterSymbolsPass: a NullDatabase. The delta database cannot be
295 # modified during this pass, and we have no need to retrieve
296 # deltas, so we just use a dummy object here.
298 # OutputPass: a disabled IndexedDatabase. During this pass we
299 # need to retrieve deltas, but we are not allowed to modify
300 # the delta database. So we use an IndexedDatabase whose
301 # __del__() method has been disabled to do nothing.
302 self
.delta_db
= delta_db
304 # A database-like object using cvs_rev_ids as keys and containing
305 # fulltext strings as values. This database is only set during
307 self
.checkout_db
= checkout_db
309 # If this is set to a list, then the list holds the ids of
310 # text_records that have to be deleted; when discard() is called,
311 # it adds the requested id to the list but does not delete it. If
312 # this member is set to None, then text_records are deleted
313 # immediately when discard() is called.
314 self
.deferred_deletes
= None
316 def __getstate__(self
):
317 return (self
.text_records
.values(),)
319 def __setstate__(self
, state
):
320 (text_records
,) = state
321 self
.text_records
= {}
322 for text_record
in text_records
:
323 self
.add(text_record
)
324 self
.delta_db
= NullDatabase()
325 self
.checkout_db
= NullDatabase()
326 self
.deferred_deletes
= None
328 def add(self
, text_record
):
329 """Add TEXT_RECORD to our database.
331 There must not already be a record with the same id."""
333 assert not self
.text_records
.has_key(text_record
.id)
335 self
.text_records
[text_record
.id] = text_record
337 def __getitem__(self
, id):
338 return self
.text_records
[id]
340 def __delitem__(self
, id):
341 """Free the record with the specified ID."""
343 del self
.text_records
[id]
345 def replace(self
, text_record
):
346 """Store TEXT_RECORD in place of the existing record with the same id.
348 Do not do anything with the old record."""
350 assert self
.text_records
.has_key(text_record
.id)
351 self
.text_records
[text_record
.id] = text_record
353 def discard(self
, *ids
):
354 """The text records with IDS are no longer needed; discard them.
356 This involves calling their free() methods and also removing them
359 If SELF.deferred_deletes is not None, then the ids to be deleted
360 are added to the list instead of deleted immediately. This
361 mechanism is to prevent a stack overflow from the avalanche of
362 deletes that can result from deleting a long chain of revisions."""
364 if self
.deferred_deletes
is None:
365 # This is an outer-level delete.
366 self
.deferred_deletes
= list(ids
)
367 while self
.deferred_deletes
:
368 id = self
.deferred_deletes
.pop()
369 text_record
= self
[id]
370 if text_record
.refcount
!= 0:
372 'TextRecordDatabase.discard(%s) called with refcount = %d'
373 % (text_record
, text_record
.refcount
,)
375 # This call might cause other text_record ids to be added to
376 # self.deferred_deletes:
377 text_record
.free(self
)
379 self
.deferred_deletes
= None
381 self
.deferred_deletes
.extend(ids
)
383 def itervalues(self
):
384 return self
.text_records
.itervalues()
386 def recompute_refcounts(self
, cvs_file_items
):
387 """Recompute the refcounts of the contained TextRecords.
389 Use CVS_FILE_ITEMS to determine which records will be needed by
392 # First clear all of the refcounts:
393 for text_record
in self
.itervalues():
394 text_record
.refcount
= 0
396 # Now increment the reference count of records that are needed as
397 # the source of another record's deltas:
398 for text_record
in self
.itervalues():
399 text_record
.increment_dependency_refcounts(self
.text_records
)
401 # Now increment the reference count of records that will be needed
403 for lod_items
in cvs_file_items
.iter_lods():
404 for cvs_rev
in lod_items
.cvs_revisions
:
405 if isinstance(cvs_rev
, CVSRevisionModification
):
406 self
[cvs_rev
.id].refcount
+= 1
408 def free_unused(self
):
409 """Free any TextRecords whose reference counts are zero."""
411 # The deletion of some of these text records might cause others to
412 # be unused, in which case they will be deleted automatically.
413 # But since the initially-unused records are not referred to by
414 # any others, we don't have to be afraid that they will be deleted
415 # before we get to them. But it *is* crucial that we create the
416 # whole unused list before starting the loop.
420 for text_record
in self
.itervalues()
421 if text_record
.refcount
== 0
424 self
.discard(*unused
)
426 def log_leftovers(self
):
427 """If any TextRecords still exist, log them."""
429 if self
.text_records
:
431 "%s: internal problem: leftover revisions in the checkout cache:"
433 for text_record
in self
.itervalues():
434 Log().warn(' %s' % (text_record
,))
437 """Debugging output of the current contents of the TextRecordDatabase."""
439 retval
= ['TextRecordDatabase:']
440 for text_record
in self
.itervalues():
441 retval
.append(' %s' % (text_record
,))
442 return '\n'.join(retval
)
445 class _Sink(cvs2svn_rcsparse
.Sink
):
446 def __init__(self
, revision_recorder
, cvs_file_items
):
447 self
.revision_recorder
= revision_recorder
448 self
.cvs_file_items
= cvs_file_items
450 # A map {rev : base_rev} indicating that the text for rev is
451 # stored in CVS as a delta relative to base_rev.
452 self
.base_revisions
= {}
454 # The revision that is stored with its fulltext in CVS (usually
455 # the oldest revision on trunk):
456 self
.head_revision
= None
458 # The first logical revision on trunk (usually '1.1'):
459 self
.revision_1_1
= None
461 # Keep track of the revisions whose revision info has been seen so
462 # far (to avoid repeated revision info blocks):
463 self
.revisions_seen
= set()
465 def set_head_revision(self
, revision
):
466 self
.head_revision
= revision
469 self
, revision
, timestamp
, author
, state
, branches
, next
472 self
.base_revisions
[next
] = revision
474 if is_trunk_revision(revision
):
475 self
.revision_1_1
= revision
477 for branch
in branches
:
478 self
.base_revisions
[branch
] = revision
480 def set_revision_info(self
, revision
, log
, text
):
481 if revision
in self
.revisions_seen
:
482 # One common form of CVS repository corruption is that the
483 # Deltatext block for revision 1.1 appears twice. CollectData
484 # has already warned about this problem; here we can just ignore
488 self
.revisions_seen
.add(revision
)
490 cvs_rev_id
= self
.cvs_file_items
.original_ids
[revision
]
491 if is_trunk_revision(revision
):
492 # On trunk, revisions are encountered in reverse order (1.<N>
493 # ... 1.1) and deltas are inverted. The first text that we see
494 # is the fulltext for the HEAD revision. After that, the text
495 # corresponding to revision 1.N is the delta (1.<N+1> ->
496 # 1.<N>)). We have to invert the deltas here so that we can
497 # read the revisions out in dependency order; that is, for
498 # revision 1.1 we want the fulltext, and for revision 1.<N> we
499 # want the delta (1.<N-1> -> 1.<N>). This means that we can't
500 # compute the delta for a revision until we see its logical
501 # parent. When we finally see revision 1.1 (which is recognized
502 # because it doesn't have a parent), we can record the diff (1.1
503 # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
505 if revision
== self
.head_revision
:
506 # This is HEAD, as fulltext. Initialize the RCSStream so
507 # that we can compute deltas backwards in time.
508 self
._stream
= RCSStream(text
)
509 self
._stream
_revision
= revision
511 # Any other trunk revision is a backward delta. Apply the
512 # delta to the RCSStream to mutate it to the contents of this
513 # revision, and also to get the reverse delta, which we store
514 # as the forward delta of our child revision.
516 text
= self
._stream
.invert_diff(text
)
517 except MalformedDeltaException
, e
:
519 'Malformed RCS delta in %s, revision %s: %s'
520 % (self
.cvs_file_items
.cvs_file
.filename
, revision
, e
)
523 text_record
= DeltaTextRecord(
524 self
.cvs_file_items
.original_ids
[self
._stream
_revision
],
527 self
.revision_recorder
._writeout
(text_record
, text
)
528 self
._stream
_revision
= revision
530 if revision
== self
.revision_1_1
:
531 # This is revision 1.1. Write its fulltext:
532 text_record
= FullTextRecord(cvs_rev_id
)
533 self
.revision_recorder
._writeout
(text_record
, self
._stream
.get_text())
535 # There will be no more trunk revisions delivered, so free the
538 del self
._stream
_revision
541 # On branches, revisions are encountered in logical order
542 # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
543 # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
544 # <BRANCH>.<N>). That's what we need, so just store it.
546 # FIXME: It would be nice to avoid writing out branch deltas
547 # when --trunk-only. (They will be deleted when finish_file()
548 # is called, but if the delta db is in an IndexedDatabase the
549 # deletions won't actually recover any disk space.)
550 text_record
= DeltaTextRecord(
552 self
.cvs_file_items
.original_ids
[self
.base_revisions
[revision
]]
554 self
.revision_recorder
._writeout
(text_record
, text
)
559 class InternalRevisionCollector(RevisionCollector
):
560 """The RevisionCollector used by InternalRevisionReader."""
562 def __init__(self
, compress
):
563 RevisionCollector
.__init
__(self
)
564 self
._compress
= compress
566 def register_artifacts(self
, which_pass
):
567 artifact_manager
.register_temp_file(
568 config
.RCS_DELTAS_INDEX_TABLE
, which_pass
570 artifact_manager
.register_temp_file(config
.RCS_DELTAS_STORE
, which_pass
)
571 artifact_manager
.register_temp_file(
572 config
.RCS_TREES_INDEX_TABLE
, which_pass
574 artifact_manager
.register_temp_file(config
.RCS_TREES_STORE
, which_pass
)
577 ser
= MarshalSerializer()
579 ser
= CompressingSerializer(ser
)
580 self
._rcs
_deltas
= IndexedDatabase(
581 artifact_manager
.get_temp_file(config
.RCS_DELTAS_STORE
),
582 artifact_manager
.get_temp_file(config
.RCS_DELTAS_INDEX_TABLE
),
585 primer
= (FullTextRecord
, DeltaTextRecord
)
586 self
._rcs
_trees
= IndexedDatabase(
587 artifact_manager
.get_temp_file(config
.RCS_TREES_STORE
),
588 artifact_manager
.get_temp_file(config
.RCS_TREES_INDEX_TABLE
),
589 DB_OPEN_NEW
, PrimedPickleSerializer(primer
)
592 def _writeout(self
, text_record
, text
):
593 self
.text_record_db
.add(text_record
)
594 self
._rcs
_deltas
[text_record
.id] = text
596 def process_file(self
, cvs_file_items
):
597 """Read revision information for the file described by CVS_FILE_ITEMS.
599 Compute the text record refcounts, discard any records that are
600 unneeded, and store the text records for the file to the
601 _rcs_trees database."""
603 # A map from cvs_rev_id to TextRecord instance:
604 self
.text_record_db
= TextRecordDatabase(self
._rcs
_deltas
, NullDatabase())
606 cvs2svn_rcsparse
.parse(
607 open(cvs_file_items
.cvs_file
.filename
, 'rb'),
608 _Sink(self
, cvs_file_items
),
611 self
.text_record_db
.recompute_refcounts(cvs_file_items
)
612 self
.text_record_db
.free_unused()
613 self
._rcs
_trees
[cvs_file_items
.cvs_file
.id] = self
.text_record_db
614 del self
.text_record_db
617 self
._rcs
_deltas
.close()
618 self
._rcs
_trees
.close()
621 class _KeywordExpander
:
622 """A class whose instances provide substitutions for CVS keywords.
624 This class is used via its __call__() method, which should be called
625 with a match object representing a match for a CVS keyword string.
626 The method returns the replacement for the matched text.
628 The __call__() method works by calling the method with the same name
629 as that of the CVS keyword (converted to lower case).
631 Instances of this class can be passed as the REPL argument to
634 date_fmt_old
= "%Y/%m/%d %H:%M:%S" # CVS 1.11, rcs
635 date_fmt_new
= "%Y-%m-%d %H:%M:%S" # CVS 1.12
637 date_fmt
= date_fmt_new
640 def use_old_date_format(klass
):
641 """Class method to ensure exact compatibility with CVS 1.11
642 output. Use this if you want to verify your conversion and you're
644 klass
.date_fmt
= klass
.date_fmt_old
646 def __init__(self
, cvs_rev
):
647 self
.cvs_rev
= cvs_rev
649 def __call__(self
, match
):
650 return '$%s: %s $' % \
651 (match
.group(1), getattr(self
, match
.group(1).lower())(),)
654 return Ctx()._metadata
_db
[self
.cvs_rev
.metadata_id
].original_author
657 return time
.strftime(self
.date_fmt
,
658 time
.gmtime(self
.cvs_rev
.timestamp
))
661 return '%s %s %s %s Exp' % \
662 (self
.source(), self
.cvs_rev
.rev
, self
.date(), self
.author())
665 return '%s %s %s %s Exp' % \
666 (self
.rcsfile(), self
.cvs_rev
.rev
, self
.date(), self
.author())
669 # Handle kvl like kv, as a converted repo is supposed to have no
674 # Would need some special handling.
675 return 'not supported by cvs2svn'
678 # Cannot work, as just creating a new symbol does not check out
679 # the revision again.
680 return 'not supported by cvs2svn'
683 return self
.cvs_rev
.cvs_file
.basename
+ ",v"
686 return self
.cvs_rev
.rev
689 project
= self
.cvs_rev
.cvs_file
.project
690 return project
.cvs_repository_root
+ '/' + project
.cvs_module
+ \
691 self
.cvs_rev
.cvs_file
.cvs_path
+ ",v"
694 # We check out only live revisions.
698 class InternalRevisionReader(RevisionReader
):
699 """A RevisionReader that reads the contents from an own delta store."""
701 _kws
= 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State'
702 _kw_re
= re
.compile(r
'\$(' + _kws
+ r
'):[^$\n]*\$')
703 _kwo_re
= re
.compile(r
'\$(' + _kws
+ r
')(:[^$\n]*)?\$')
705 def __init__(self
, compress
):
706 self
._compress
= compress
708 def register_artifacts(self
, which_pass
):
709 artifact_manager
.register_temp_file(config
.CVS_CHECKOUT_DB
, which_pass
)
710 artifact_manager
.register_temp_file_needed(
711 config
.RCS_DELTAS_STORE
, which_pass
713 artifact_manager
.register_temp_file_needed(
714 config
.RCS_DELTAS_INDEX_TABLE
, which_pass
716 artifact_manager
.register_temp_file_needed(
717 config
.RCS_TREES_STORE
, which_pass
719 artifact_manager
.register_temp_file_needed(
720 config
.RCS_TREES_INDEX_TABLE
, which_pass
724 self
._delta
_db
= IndexedDatabase(
725 artifact_manager
.get_temp_file(config
.RCS_DELTAS_STORE
),
726 artifact_manager
.get_temp_file(config
.RCS_DELTAS_INDEX_TABLE
),
728 self
._delta
_db
.__delitem
__ = lambda id: None
729 self
._tree
_db
= IndexedDatabase(
730 artifact_manager
.get_temp_file(config
.RCS_TREES_STORE
),
731 artifact_manager
.get_temp_file(config
.RCS_TREES_INDEX_TABLE
),
733 ser
= MarshalSerializer()
735 ser
= CompressingSerializer(ser
)
736 self
._co
_db
= Database(
737 artifact_manager
.get_temp_file(config
.CVS_CHECKOUT_DB
), DB_OPEN_NEW
,
740 # The set of CVSFile instances whose TextRecords have already been
742 self
._loaded
_files
= set()
744 # A map { CVSFILE : _FileTree } for files that currently have live
746 self
._text
_record
_db
= TextRecordDatabase(self
._delta
_db
, self
._co
_db
)
748 def _get_text_record(self
, cvs_rev
):
749 """Return the TextRecord instance for CVS_REV.
751 If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
754 if cvs_rev
.cvs_file
not in self
._loaded
_files
:
755 for text_record
in self
._tree
_db
[cvs_rev
.cvs_file
.id].itervalues():
756 self
._text
_record
_db
.add(text_record
)
757 self
._loaded
_files
.add(cvs_rev
.cvs_file
)
759 return self
._text
_record
_db
[cvs_rev
.id]
761 def get_content_stream(self
, cvs_rev
, suppress_keyword_substitution
=False):
762 """Check out the text for revision C_REV from the repository.
764 Return the text wrapped in a readable file object. If
765 SUPPRESS_KEYWORD_SUBSTITUTION is True, any RCS keywords will be
766 _un_expanded prior to returning the file content. Note that $Log$
767 never actually generates a log (which makes test 'requires_cvs()'
770 Revisions may be requested in any order, but if they are not
771 requested in dependency order the checkout database will become
772 very large. Revisions may be skipped. Each revision may be
773 requested only once."""
776 text
= self
._get
_text
_record
(cvs_rev
).checkout(self
._text
_record
_db
)
777 except MalformedDeltaException
, (msg
):
778 raise FatalError('Malformed RCS delta in %s, revision %s: %s'
779 % (cvs_rev
.cvs_file
.get_filename(), cvs_rev
.rev
, msg
))
780 if cvs_rev
.cvs_file
.mode
!= 'b' and cvs_rev
.cvs_file
.mode
!= 'o':
781 if suppress_keyword_substitution
or cvs_rev
.cvs_file
.mode
== 'k':
782 text
= self
._kw
_re
.sub(r
'$\1$', text
)
784 text
= self
._kwo
_re
.sub(_KeywordExpander(cvs_rev
), text
)
786 return StringIO(text
)
789 self
._text
_record
_db
.log_leftovers()
791 del self
._text
_record
_db
792 self
._delta
_db
.close()
793 self
._tree
_db
.close()