1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains classes that implement the --use-internal-co option.
19 The idea is to patch up the revisions' contents incrementally, thus
20 avoiding the huge number of process spawns and the O(n^2) overhead of
23 InternalRevisionCollector saves the RCS deltas and RCS revision trees
24 to databases. Notably, deltas from the trunk need to be reversed, as
25 CVS stores them so they apply from HEAD backwards.
27 InternalRevisionReader produces the revisions' contents on demand. To
28 generate the text for a typical revision, we need the revision's delta
29 text plus the fulltext of the previous revision. Therefore, we
30 maintain a checkout database containing a copy of the fulltext of any
31 revision for which subsequent revisions still need to be retrieved.
32 It is crucial to remove text from this database as soon as it is no
33 longer needed, to prevent it from growing enormous.
35 There are two reasons that the text from a revision can be needed: (1)
36 because the revision itself still needs to be output to a dumpfile;
37 (2) because another revision needs it as the base of its delta. We
38 maintain a reference count for each revision, which includes *both*
39 possibilities. The first time a revision's text is needed, it is
40 generated by applying the revision's deltatext to the previous
41 revision's fulltext, and the resulting fulltext is stored in the
42 checkout database. Each time a revision's fulltext is retrieved, its
43 reference count is decremented. When the reference count goes to
44 zero, then the fulltext is deleted from the checkout database.
46 The administrative data for managing this consists of one TextRecord
47 entry for each revision. Each TextRecord has an id, which is the same
48 id as used for the corresponding CVSRevision instance. It also
49 maintains a count of the times it is expected to be retrieved.
50 TextRecords come in several varieties:
52 FullTextRecord -- Used for revisions whose fulltext is contained
53 directly in the RCS file, and therefore available during
54 CollectRevsPass (i.e., typically revision 1.1 of each file).
56 DeltaTextRecord -- Used for revisions that are defined via a delta
57 relative to some other TextRecord. These records record the id of
58 the TextRecord that holds the base text against which the delta is
59 defined. When the text for a DeltaTextRecord is retrieved, the
60 DeltaTextRecord instance is deleted and a CheckedOutTextRecord
61 instance is created to take its place.
63 CheckedOutTextRecord -- Used during OutputPass for a revision that
64 started out as a DeltaTextRecord, but has already been retrieved
65 (and therefore its fulltext is stored in the checkout database).
67 While a file is being processed during FilterSymbolsPass, the fulltext
68 and deltas are stored to the delta database, and TextRecord instances
69 are created to keep track of things. The reference counts are all
70 initialized: each record referred to by a delta has its refcount
71 incremented, and each record that corresponds to a non-delete
72 CVSRevision is incremented. After that, any records with refcount==0
73 are removed. When one record is removed, that can cause another
74 record's reference count to go to zero and be removed too,
75 recursively. When a TextRecord is deleted at this stage, its
76 deltatext is also deleted from the delta database."""
79 from cStringIO
import StringIO
83 from cvs2svn_lib
import config
84 from cvs2svn_lib
.common
import DB_OPEN_NEW
85 from cvs2svn_lib
.common
import DB_OPEN_READ
86 from cvs2svn_lib
.common
import warning_prefix
87 from cvs2svn_lib
.common
import FatalError
88 from cvs2svn_lib
.common
import InternalError
89 from cvs2svn_lib
.common
import is_trunk_revision
90 from cvs2svn_lib
.context
import Ctx
91 from cvs2svn_lib
.log
import Log
92 from cvs2svn_lib
.artifact_manager
import artifact_manager
93 from cvs2svn_lib
.symbol
import Trunk
94 from cvs2svn_lib
.cvs_item
import CVSRevisionModification
95 from cvs2svn_lib
.database
import Database
96 from cvs2svn_lib
.database
import IndexedDatabase
97 from cvs2svn_lib
.rcs_stream
import RCSStream
98 from cvs2svn_lib
.rcs_stream
import MalformedDeltaException
99 from cvs2svn_lib
.revision_manager
import RevisionCollector
100 from cvs2svn_lib
.revision_manager
import RevisionReader
101 from cvs2svn_lib
.serializer
import MarshalSerializer
102 from cvs2svn_lib
.serializer
import CompressingSerializer
103 from cvs2svn_lib
.serializer
import PrimedPickleSerializer
104 from cvs2svn_lib
.apple_single_filter
import get_maybe_apple_single
106 import cvs2svn_rcsparse
109 class TextRecord(object):
110 """Bookkeeping data for the text of a single CVSRevision."""
112 __slots__
= ['id', 'refcount']
114 def __init__(self
, id):
115 # The cvs_rev_id of the revision whose text this is.
118 # The number of times that the text of this revision will be
122 def __getstate__(self
):
123 return (self
.id, self
.refcount
,)
125 def __setstate__(self
, state
):
126 (self
.id, self
.refcount
,) = state
128 def increment_dependency_refcounts(self
, text_record_db
):
129 """Increment the refcounts of any records that this one depends on."""
133 def decrement_refcount(self
, text_record_db
):
134 """Decrement the number of times our text still has to be checked out.
136 If the reference count goes to zero, call discard()."""
139 if self
.refcount
== 0:
140 text_record_db
.discard(self
.id)
142 def checkout(self
, text_record_db
):
143 """Workhorse of the checkout process.
145 Return the text for this revision, decrement our reference count,
146 and update the databases depending on whether there will be future
149 raise NotImplementedError()
151 def free(self
, text_record_db
):
152 """This instance will never again be checked out; free it.
154 Also free any associated resources and decrement the refcounts of
155 any other TextRecords that this one depends on."""
157 raise NotImplementedError()
160 class FullTextRecord(TextRecord
):
163 def __getstate__(self
):
164 return (self
.id, self
.refcount
,)
166 def __setstate__(self
, state
):
167 (self
.id, self
.refcount
,) = state
169 def checkout(self
, text_record_db
):
170 text
= text_record_db
.delta_db
[self
.id]
171 self
.decrement_refcount(text_record_db
)
174 def free(self
, text_record_db
):
175 del text_record_db
.delta_db
[self
.id]
178 return 'FullTextRecord(%x, %d)' % (self
.id, self
.refcount
,)
181 class DeltaTextRecord(TextRecord
):
182 __slots__
= ['pred_id']
184 def __init__(self
, id, pred_id
):
185 TextRecord
.__init
__(self
, id)
187 # The cvs_rev_id of the revision relative to which this delta is
189 self
.pred_id
= pred_id
191 def __getstate__(self
):
192 return (self
.id, self
.refcount
, self
.pred_id
,)
194 def __setstate__(self
, state
):
195 (self
.id, self
.refcount
, self
.pred_id
,) = state
197 def increment_dependency_refcounts(self
, text_record_db
):
198 text_record_db
[self
.pred_id
].refcount
+= 1
200 def checkout(self
, text_record_db
):
201 base_text
= text_record_db
[self
.pred_id
].checkout(text_record_db
)
202 co
= RCSStream(base_text
)
203 delta_text
= text_record_db
.delta_db
[self
.id]
204 co
.apply_diff(delta_text
)
208 if self
.refcount
== 0:
209 # This text will never be needed again; just delete ourselves
210 # without ever having stored the fulltext to the checkout
212 del text_record_db
[self
.id]
214 # Store a new CheckedOutTextRecord in place of ourselves:
215 text_record_db
.checkout_db
['%x' % self
.id] = text
216 new_text_record
= CheckedOutTextRecord(self
.id)
217 new_text_record
.refcount
= self
.refcount
218 text_record_db
.replace(new_text_record
)
221 def free(self
, text_record_db
):
222 del text_record_db
.delta_db
[self
.id]
223 text_record_db
[self
.pred_id
].decrement_refcount(text_record_db
)
226 return 'DeltaTextRecord(%x -> %x, %d)' \
227 % (self
.pred_id
, self
.id, self
.refcount
,)
230 class CheckedOutTextRecord(TextRecord
):
233 def __getstate__(self
):
234 return (self
.id, self
.refcount
,)
236 def __setstate__(self
, state
):
237 (self
.id, self
.refcount
,) = state
239 def checkout(self
, text_record_db
):
240 text
= text_record_db
.checkout_db
['%x' % self
.id]
241 self
.decrement_refcount(text_record_db
)
244 def free(self
, text_record_db
):
245 del text_record_db
.checkout_db
['%x' % self
.id]
248 return 'CheckedOutTextRecord(%x, %d)' % (self
.id, self
.refcount
,)
251 class NullDatabase(object):
252 """A do-nothing database that can be used with TextRecordDatabase.
254 Use this when you don't actually want to allow anything to be
257 def __delitem__(self
, id):
261 class TextRecordDatabase
:
262 """Holds the TextRecord instances that are currently live.
264 During CollectRevsPass and FilterSymbolsPass, files are processed
265 one by one and a new TextRecordDatabase instance is used for each
266 file. During OutputPass, a single TextRecordDatabase instance is
267 used for the duration of OutputPass; individual records are added
268 and removed when they are active."""
270 def __init__(self
, delta_db
, checkout_db
):
271 # A map { cvs_rev_id -> TextRecord }.
272 self
.text_records
= {}
274 # A database-like object using cvs_rev_ids as keys and containing
275 # fulltext/deltatext strings as values. Its __getitem__() method
276 # is used to retrieve deltas when they are needed, and its
277 # __delitem__() method is used to delete deltas when they can be
278 # freed. The modifiability of the delta database varies from pass
279 # to pass, so the object stored here varies as well:
281 # CollectRevsPass: a fully-functional IndexedDatabase. This
282 # allows deltas that will not be needed to be deleted.
284 # FilterSymbolsPass: a NullDatabase. The delta database cannot be
285 # modified during this pass, and we have no need to retrieve
286 # deltas, so we just use a dummy object here.
288 # OutputPass: a disabled IndexedDatabase. During this pass we
289 # need to retrieve deltas, but we are not allowed to modify
290 # the delta database. So we use an IndexedDatabase whose
291 # __del__() method has been disabled to do nothing.
292 self
.delta_db
= delta_db
294 # A database-like object using cvs_rev_ids as keys and containing
295 # fulltext strings as values. This database is only set during
297 self
.checkout_db
= checkout_db
299 # If this is set to a list, then the list holds the ids of
300 # text_records that have to be deleted; when discard() is called,
301 # it adds the requested id to the list but does not delete it. If
302 # this member is set to None, then text_records are deleted
303 # immediately when discard() is called.
304 self
.deferred_deletes
= None
306 def __getstate__(self
):
307 return (self
.text_records
.values(),)
309 def __setstate__(self
, state
):
310 (text_records
,) = state
311 self
.text_records
= {}
312 for text_record
in text_records
:
313 self
.add(text_record
)
314 self
.delta_db
= NullDatabase()
315 self
.checkout_db
= NullDatabase()
316 self
.deferred_deletes
= None
318 def add(self
, text_record
):
319 """Add TEXT_RECORD to our database.
321 There must not already be a record with the same id."""
323 assert not self
.text_records
.has_key(text_record
.id)
325 self
.text_records
[text_record
.id] = text_record
327 def __getitem__(self
, id):
328 return self
.text_records
[id]
330 def __delitem__(self
, id):
331 """Free the record with the specified ID."""
333 del self
.text_records
[id]
335 def replace(self
, text_record
):
336 """Store TEXT_RECORD in place of the existing record with the same id.
338 Do not do anything with the old record."""
340 assert self
.text_records
.has_key(text_record
.id)
341 self
.text_records
[text_record
.id] = text_record
343 def discard(self
, *ids
):
344 """The text records with IDS are no longer needed; discard them.
346 This involves calling their free() methods and also removing them
349 If SELF.deferred_deletes is not None, then the ids to be deleted
350 are added to the list instead of deleted immediately. This
351 mechanism is to prevent a stack overflow from the avalanche of
352 deletes that can result from deleting a long chain of revisions."""
354 if self
.deferred_deletes
is None:
355 # This is an outer-level delete.
356 self
.deferred_deletes
= list(ids
)
357 while self
.deferred_deletes
:
358 id = self
.deferred_deletes
.pop()
359 text_record
= self
[id]
360 if text_record
.refcount
!= 0:
362 'TextRecordDatabase.discard(%s) called with refcount = %d'
363 % (text_record
, text_record
.refcount
,)
365 # This call might cause other text_record ids to be added to
366 # self.deferred_deletes:
367 text_record
.free(self
)
369 self
.deferred_deletes
= None
371 self
.deferred_deletes
.extend(ids
)
373 def itervalues(self
):
374 return self
.text_records
.itervalues()
376 def recompute_refcounts(self
, cvs_file_items
):
377 """Recompute the refcounts of the contained TextRecords.
379 Use CVS_FILE_ITEMS to determine which records will be needed by
382 # First clear all of the refcounts:
383 for text_record
in self
.itervalues():
384 text_record
.refcount
= 0
386 # Now increment the reference count of records that are needed as
387 # the source of another record's deltas:
388 for text_record
in self
.itervalues():
389 text_record
.increment_dependency_refcounts(self
.text_records
)
391 # Now increment the reference count of records that will be needed
393 for lod_items
in cvs_file_items
.iter_lods():
394 for cvs_rev
in lod_items
.cvs_revisions
:
395 if isinstance(cvs_rev
, CVSRevisionModification
):
396 self
[cvs_rev
.id].refcount
+= 1
398 def free_unused(self
):
399 """Free any TextRecords whose reference counts are zero."""
401 # The deletion of some of these text records might cause others to
402 # be unused, in which case they will be deleted automatically.
403 # But since the initially-unused records are not referred to by
404 # any others, we don't have to be afraid that they will be deleted
405 # before we get to them. But it *is* crucial that we create the
406 # whole unused list before starting the loop.
410 for text_record
in self
.itervalues()
411 if text_record
.refcount
== 0
414 self
.discard(*unused
)
416 def log_leftovers(self
):
417 """If any TextRecords still exist, log them."""
419 if self
.text_records
:
421 "%s: internal problem: leftover revisions in the checkout cache:"
423 for text_record
in self
.itervalues():
424 Log().warn(' %s' % (text_record
,))
427 """Debugging output of the current contents of the TextRecordDatabase."""
429 retval
= ['TextRecordDatabase:']
430 for text_record
in self
.itervalues():
431 retval
.append(' %s' % (text_record
,))
432 return '\n'.join(retval
)
435 class _Sink(cvs2svn_rcsparse
.Sink
):
436 def __init__(self
, revision_collector
, cvs_file_items
):
437 self
.revision_collector
= revision_collector
438 self
.cvs_file_items
= cvs_file_items
440 # A map {rev : base_rev} indicating that the text for rev is
441 # stored in CVS as a delta relative to base_rev.
442 self
.base_revisions
= {}
444 # The revision that is stored with its fulltext in CVS (usually
445 # the oldest revision on trunk):
446 self
.head_revision
= None
448 # The first logical revision on trunk (usually '1.1'):
449 self
.revision_1_1
= None
451 # Keep track of the revisions whose revision info has been seen so
452 # far (to avoid repeated revision info blocks):
453 self
.revisions_seen
= set()
455 def set_head_revision(self
, revision
):
456 self
.head_revision
= revision
459 self
, revision
, timestamp
, author
, state
, branches
, next
462 self
.base_revisions
[next
] = revision
464 if is_trunk_revision(revision
):
465 self
.revision_1_1
= revision
467 for branch
in branches
:
468 self
.base_revisions
[branch
] = revision
470 def set_revision_info(self
, revision
, log
, text
):
471 if revision
in self
.revisions_seen
:
472 # One common form of CVS repository corruption is that the
473 # Deltatext block for revision 1.1 appears twice. CollectData
474 # has already warned about this problem; here we can just ignore
478 self
.revisions_seen
.add(revision
)
480 cvs_rev_id
= self
.cvs_file_items
.original_ids
[revision
]
481 if is_trunk_revision(revision
):
482 # On trunk, revisions are encountered in reverse order (1.<N>
483 # ... 1.1) and deltas are inverted. The first text that we see
484 # is the fulltext for the HEAD revision. After that, the text
485 # corresponding to revision 1.N is the delta (1.<N+1> ->
486 # 1.<N>)). We have to invert the deltas here so that we can
487 # read the revisions out in dependency order; that is, for
488 # revision 1.1 we want the fulltext, and for revision 1.<N> we
489 # want the delta (1.<N-1> -> 1.<N>). This means that we can't
490 # compute the delta for a revision until we see its logical
491 # parent. When we finally see revision 1.1 (which is recognized
492 # because it doesn't have a parent), we can record the diff (1.1
493 # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
495 if revision
== self
.head_revision
:
496 # This is HEAD, as fulltext. Initialize the RCSStream so
497 # that we can compute deltas backwards in time.
498 self
._stream
= RCSStream(text
)
499 self
._stream
_revision
= revision
501 # Any other trunk revision is a backward delta. Apply the
502 # delta to the RCSStream to mutate it to the contents of this
503 # revision, and also to get the reverse delta, which we store
504 # as the forward delta of our child revision.
506 text
= self
._stream
.invert_diff(text
)
507 except MalformedDeltaException
, e
:
509 'Malformed RCS delta in %s, revision %s: %s'
510 % (self
.cvs_file_items
.cvs_file
.filename
, revision
, e
)
513 text_record
= DeltaTextRecord(
514 self
.cvs_file_items
.original_ids
[self
._stream
_revision
],
517 self
.revision_collector
._writeout
(text_record
, text
)
518 self
._stream
_revision
= revision
520 if revision
== self
.revision_1_1
:
521 # This is revision 1.1. Write its fulltext:
522 text_record
= FullTextRecord(cvs_rev_id
)
523 self
.revision_collector
._writeout
(
524 text_record
, self
._stream
.get_text()
527 # There will be no more trunk revisions delivered, so free the
530 del self
._stream
_revision
533 # On branches, revisions are encountered in logical order
534 # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
535 # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
536 # <BRANCH>.<N>). That's what we need, so just store it.
538 # FIXME: It would be nice to avoid writing out branch deltas
539 # when --trunk-only. (They will be deleted when finish_file()
540 # is called, but if the delta db is in an IndexedDatabase the
541 # deletions won't actually recover any disk space.)
542 text_record
= DeltaTextRecord(
544 self
.cvs_file_items
.original_ids
[self
.base_revisions
[revision
]]
546 self
.revision_collector
._writeout
(text_record
, text
)
551 class InternalRevisionCollector(RevisionCollector
):
552 """The RevisionCollector used by InternalRevisionReader."""
554 def __init__(self
, compress
):
555 RevisionCollector
.__init
__(self
)
556 self
._compress
= compress
558 def register_artifacts(self
, which_pass
):
559 artifact_manager
.register_temp_file(
560 config
.RCS_DELTAS_INDEX_TABLE
, which_pass
562 artifact_manager
.register_temp_file(config
.RCS_DELTAS_STORE
, which_pass
)
563 artifact_manager
.register_temp_file(
564 config
.RCS_TREES_INDEX_TABLE
, which_pass
566 artifact_manager
.register_temp_file(config
.RCS_TREES_STORE
, which_pass
)
569 ser
= MarshalSerializer()
571 ser
= CompressingSerializer(ser
)
572 self
._rcs
_deltas
= IndexedDatabase(
573 artifact_manager
.get_temp_file(config
.RCS_DELTAS_STORE
),
574 artifact_manager
.get_temp_file(config
.RCS_DELTAS_INDEX_TABLE
),
577 primer
= (FullTextRecord
, DeltaTextRecord
)
578 self
._rcs
_trees
= IndexedDatabase(
579 artifact_manager
.get_temp_file(config
.RCS_TREES_STORE
),
580 artifact_manager
.get_temp_file(config
.RCS_TREES_INDEX_TABLE
),
581 DB_OPEN_NEW
, PrimedPickleSerializer(primer
)
584 def _writeout(self
, text_record
, text
):
585 self
.text_record_db
.add(text_record
)
586 self
._rcs
_deltas
[text_record
.id] = text
588 def process_file(self
, cvs_file_items
):
589 """Read revision information for the file described by CVS_FILE_ITEMS.
591 Compute the text record refcounts, discard any records that are
592 unneeded, and store the text records for the file to the
593 _rcs_trees database."""
595 # A map from cvs_rev_id to TextRecord instance:
596 self
.text_record_db
= TextRecordDatabase(self
._rcs
_deltas
, NullDatabase())
598 cvs2svn_rcsparse
.parse(
599 open(cvs_file_items
.cvs_file
.filename
, 'rb'),
600 _Sink(self
, cvs_file_items
),
603 self
.text_record_db
.recompute_refcounts(cvs_file_items
)
604 self
.text_record_db
.free_unused()
605 self
._rcs
_trees
[cvs_file_items
.cvs_file
.id] = self
.text_record_db
606 del self
.text_record_db
609 self
._rcs
_deltas
.close()
610 self
._rcs
_trees
.close()
613 class _KeywordExpander
:
614 """A class whose instances provide substitutions for CVS keywords.
616 This class is used via its __call__() method, which should be called
617 with a match object representing a match for a CVS keyword string.
618 The method returns the replacement for the matched text.
620 The __call__() method works by calling the method with the same name
621 as that of the CVS keyword (converted to lower case).
623 Instances of this class can be passed as the REPL argument to
626 date_fmt_old
= "%Y/%m/%d %H:%M:%S" # CVS 1.11, rcs
627 date_fmt_new
= "%Y-%m-%d %H:%M:%S" # CVS 1.12
629 date_fmt
= date_fmt_new
632 def use_old_date_format(klass
):
633 """Class method to ensure exact compatibility with CVS 1.11
634 output. Use this if you want to verify your conversion and you're
636 klass
.date_fmt
= klass
.date_fmt_old
638 def __init__(self
, cvs_rev
):
639 self
.cvs_rev
= cvs_rev
641 def __call__(self
, match
):
642 return '$%s: %s $' % \
643 (match
.group(1), getattr(self
, match
.group(1).lower())(),)
646 return Ctx()._metadata
_db
[self
.cvs_rev
.metadata_id
].original_author
649 return time
.strftime(self
.date_fmt
,
650 time
.gmtime(self
.cvs_rev
.timestamp
))
653 return '%s %s %s %s Exp' % \
654 (self
.source(), self
.cvs_rev
.rev
, self
.date(), self
.author())
657 return '%s %s %s %s Exp' % \
658 (self
.rcsfile(), self
.cvs_rev
.rev
, self
.date(), self
.author())
661 # Handle kvl like kv, as a converted repo is supposed to have no
666 # Would need some special handling.
667 return 'not supported by cvs2svn'
670 # Cannot work, as just creating a new symbol does not check out
671 # the revision again.
672 return 'not supported by cvs2svn'
675 return self
.cvs_rev
.cvs_file
.basename
+ ",v"
678 return self
.cvs_rev
.rev
681 project
= self
.cvs_rev
.cvs_file
.project
682 return project
.cvs_repository_root
+ '/' + project
.cvs_module
+ \
683 self
.cvs_rev
.cvs_file
.cvs_path
+ ",v"
686 # We check out only live revisions.
690 class InternalRevisionReader(RevisionReader
):
691 """A RevisionReader that reads the contents from an own delta store."""
693 _kws
= 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State'
694 _kw_re
= re
.compile(r
'\$(' + _kws
+ r
'):[^$\n]*\$')
695 _kwo_re
= re
.compile(r
'\$(' + _kws
+ r
')(:[^$\n]*)?\$')
697 def __init__(self
, compress
):
698 self
._compress
= compress
700 def register_artifacts(self
, which_pass
):
701 artifact_manager
.register_temp_file(config
.CVS_CHECKOUT_DB
, which_pass
)
702 artifact_manager
.register_temp_file_needed(
703 config
.RCS_DELTAS_STORE
, which_pass
705 artifact_manager
.register_temp_file_needed(
706 config
.RCS_DELTAS_INDEX_TABLE
, which_pass
708 artifact_manager
.register_temp_file_needed(
709 config
.RCS_TREES_STORE
, which_pass
711 artifact_manager
.register_temp_file_needed(
712 config
.RCS_TREES_INDEX_TABLE
, which_pass
716 self
._delta
_db
= IndexedDatabase(
717 artifact_manager
.get_temp_file(config
.RCS_DELTAS_STORE
),
718 artifact_manager
.get_temp_file(config
.RCS_DELTAS_INDEX_TABLE
),
720 self
._delta
_db
.__delitem
__ = lambda id: None
721 self
._tree
_db
= IndexedDatabase(
722 artifact_manager
.get_temp_file(config
.RCS_TREES_STORE
),
723 artifact_manager
.get_temp_file(config
.RCS_TREES_INDEX_TABLE
),
725 ser
= MarshalSerializer()
727 ser
= CompressingSerializer(ser
)
728 self
._co
_db
= Database(
729 artifact_manager
.get_temp_file(config
.CVS_CHECKOUT_DB
), DB_OPEN_NEW
,
732 # The set of CVSFile instances whose TextRecords have already been
734 self
._loaded
_files
= set()
736 # A map { CVSFILE : _FileTree } for files that currently have live
738 self
._text
_record
_db
= TextRecordDatabase(self
._delta
_db
, self
._co
_db
)
740 def _get_text_record(self
, cvs_rev
):
741 """Return the TextRecord instance for CVS_REV.
743 If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
746 if cvs_rev
.cvs_file
not in self
._loaded
_files
:
747 for text_record
in self
._tree
_db
[cvs_rev
.cvs_file
.id].itervalues():
748 self
._text
_record
_db
.add(text_record
)
749 self
._loaded
_files
.add(cvs_rev
.cvs_file
)
751 return self
._text
_record
_db
[cvs_rev
.id]
753 def get_content(self
, cvs_rev
):
754 """Check out the text for revision C_REV from the repository.
756 Return the text. If CVS_REV has a property _keyword_handling, use
757 it to determine how to handle RCS keywords in the output:
759 'collapsed' -- collapse keywords
761 'expanded' -- expand keywords
763 'untouched' -- output keywords in the form they are found in
766 Note that $Log$ never actually generates a log (which makes test
767 'requires_cvs()' fail).
769 Revisions may be requested in any order, but if they are not
770 requested in dependency order the checkout database will become
771 very large. Revisions may be skipped. Each revision may be
772 requested only once."""
775 text
= self
._get
_text
_record
(cvs_rev
).checkout(self
._text
_record
_db
)
776 except MalformedDeltaException
, (msg
):
777 raise FatalError('Malformed RCS delta in %s, revision %s: %s'
778 % (cvs_rev
.cvs_file
.get_filename(), cvs_rev
.rev
, msg
))
780 keyword_handling
= cvs_rev
.get_property('_keyword_handling')
782 if keyword_handling
== 'untouched':
783 # Leave keywords in the form that they were checked in.
785 elif keyword_handling
== 'collapsed':
786 text
= self
._kw
_re
.sub(r
'$\1$', text
)
787 elif keyword_handling
== 'expanded':
788 text
= self
._kwo
_re
.sub(_KeywordExpander(cvs_rev
), text
)
791 'Undefined _keyword_handling property (%r) for %s'
792 % (keyword_handling
, cvs_rev
,)
795 if Ctx().decode_apple_single
:
796 # Insert a filter to decode any files that are in AppleSingle
798 text
= get_maybe_apple_single(text
)
803 self
._text
_record
_db
.log_leftovers()
805 del self
._text
_record
_db
806 self
._delta
_db
.close()
807 self
._tree
_db
.close()