1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains classes that implement the --use-internal-co option.
19 The idea is to patch up the revisions' contents incrementally, thus
20 avoiding the huge number of process spawns and the O(n^2) overhead of
23 InternalRevisionCollector saves the RCS deltas and RCS revision trees
24 to databases. Notably, deltas from the trunk need to be reversed, as
25 CVS stores them so they apply from HEAD backwards.
27 InternalRevisionReader produces the revisions' contents on demand. To
28 generate the text for a typical revision, we need the revision's delta
29 text plus the fulltext of the previous revision. Therefore, we
30 maintain a checkout database containing a copy of the fulltext of any
31 revision for which subsequent revisions still need to be retrieved.
32 It is crucial to remove text from this database as soon as it is no
33 longer needed, to prevent it from growing enormous.
35 There are two reasons that the text from a revision can be needed: (1)
36 because the revision itself still needs to be output to a dumpfile;
37 (2) because another revision needs it as the base of its delta. We
38 maintain a reference count for each revision, which includes *both*
39 possibilities. The first time a revision's text is needed, it is
40 generated by applying the revision's deltatext to the previous
41 revision's fulltext, and the resulting fulltext is stored in the
42 checkout database. Each time a revision's fulltext is retrieved, its
43 reference count is decremented. When the reference count goes to
44 zero, then the fulltext is deleted from the checkout database.
46 The administrative data for managing this consists of one TextRecord
47 entry for each revision. Each TextRecord has an id, which is the same
48 id as used for the corresponding CVSRevision instance. It also
49 maintains a count of the times it is expected to be retrieved.
50 TextRecords come in several varieties:
52 FullTextRecord -- Used for revisions whose fulltext is derived
53 directly from the RCS file by the InternalRevisionCollector (i.e.,
54 typically revision 1.1 of each file).
56 DeltaTextRecord -- Used for revisions that are defined via a delta
57 relative to some other TextRecord. These records record the id of
58 the TextRecord that holds the base text against which the delta is
59 defined. When the text for a DeltaTextRecord is retrieved, the
60 DeltaTextRecord instance is deleted and a CheckedOutTextRecord
61 instance is created to take its place.
63 CheckedOutTextRecord -- Used during OutputPass for a revision that
64 started out as a DeltaTextRecord, but has already been retrieved
65 (and therefore its fulltext is stored in the checkout database).
67 While a file is being processed during FilterSymbolsPass, the fulltext
68 and deltas are stored to the delta database, and TextRecord instances
69 are created to keep track of things. The reference counts are all
70 initialized: each record referred to by a delta has its refcount
71 incremented, and each record that corresponds to a non-delete
72 CVSRevision is incremented. After that, any records with refcount==0
73 are removed. When one record is removed, that can cause another
74 record's reference count to go to zero and be removed too,
75 recursively. When a TextRecord is deleted at this stage, its
76 deltatext is also deleted from the delta database."""
79 from cStringIO
import StringIO
83 from cvs2svn_lib
import config
84 from cvs2svn_lib
.common
import DB_OPEN_NEW
85 from cvs2svn_lib
.common
import DB_OPEN_READ
86 from cvs2svn_lib
.common
import warning_prefix
87 from cvs2svn_lib
.common
import FatalError
88 from cvs2svn_lib
.common
import InternalError
89 from cvs2svn_lib
.common
import canonicalize_eol
90 from cvs2svn_lib
.common
import is_trunk_revision
91 from cvs2svn_lib
.context
import Ctx
92 from cvs2svn_lib
.log
import Log
93 from cvs2svn_lib
.artifact_manager
import artifact_manager
94 from cvs2svn_lib
.symbol
import Trunk
95 from cvs2svn_lib
.cvs_item
import CVSRevisionModification
96 from cvs2svn_lib
.database
import Database
97 from cvs2svn_lib
.database
import IndexedDatabase
98 from cvs2svn_lib
.rcs_stream
import RCSStream
99 from cvs2svn_lib
.rcs_stream
import MalformedDeltaException
100 from cvs2svn_lib
.revision_manager
import RevisionCollector
101 from cvs2svn_lib
.revision_manager
import RevisionReader
102 from cvs2svn_lib
.serializer
import MarshalSerializer
103 from cvs2svn_lib
.serializer
import CompressingSerializer
104 from cvs2svn_lib
.serializer
import PrimedPickleSerializer
105 from cvs2svn_lib
.apple_single_filter
import get_maybe_apple_single
107 import cvs2svn_rcsparse
110 class TextRecord(object):
111 """Bookkeeping data for the text of a single CVSRevision."""
113 __slots__
= ['id', 'refcount']
115 def __init__(self
, id):
116 # The cvs_rev_id of the revision whose text this is.
119 # The number of times that the text of this revision will be
123 def __getstate__(self
):
124 return (self
.id, self
.refcount
,)
126 def __setstate__(self
, state
):
127 (self
.id, self
.refcount
,) = state
129 def increment_dependency_refcounts(self
, text_record_db
):
130 """Increment the refcounts of any records that this one depends on."""
134 def decrement_refcount(self
, text_record_db
):
135 """Decrement the number of times our text still has to be checked out.
137 If the reference count goes to zero, call discard()."""
140 if self
.refcount
== 0:
141 text_record_db
.discard(self
.id)
143 def checkout(self
, text_record_db
):
144 """Workhorse of the checkout process.
146 Return the text for this revision, decrement our reference count,
147 and update the databases depending on whether there will be future
150 raise NotImplementedError()
152 def free(self
, text_record_db
):
153 """This instance will never again be checked out; free it.
155 Also free any associated resources and decrement the refcounts of
156 any other TextRecords that this one depends on."""
158 raise NotImplementedError()
161 class FullTextRecord(TextRecord
):
162 """A record whose revision's fulltext is stored in the delta_db.
164 These records are used for revisions whose fulltext was determined
165 by the InternalRevisionCollector during FilterSymbolsPass. The
166 fulltext for such a revision is is stored in the delta_db as a
171 def __getstate__(self
):
172 return (self
.id, self
.refcount
,)
174 def __setstate__(self
, state
):
175 (self
.id, self
.refcount
,) = state
177 def checkout(self
, text_record_db
):
178 text
= text_record_db
.delta_db
[self
.id]
179 self
.decrement_refcount(text_record_db
)
182 def free(self
, text_record_db
):
183 del text_record_db
.delta_db
[self
.id]
186 return 'FullTextRecord(%x, %d)' % (self
.id, self
.refcount
,)
189 class DeltaTextRecord(TextRecord
):
190 """A record whose revision's delta is stored as an RCS delta.
192 The text of this revision must be derived by applying an RCS delta
193 to the text of the predecessor revision. The RCS delta is stored
196 __slots__
= ['pred_id']
198 def __init__(self
, id, pred_id
):
199 TextRecord
.__init
__(self
, id)
201 # The cvs_rev_id of the revision relative to which this delta is
203 self
.pred_id
= pred_id
205 def __getstate__(self
):
206 return (self
.id, self
.refcount
, self
.pred_id
,)
208 def __setstate__(self
, state
):
209 (self
.id, self
.refcount
, self
.pred_id
,) = state
211 def increment_dependency_refcounts(self
, text_record_db
):
212 text_record_db
[self
.pred_id
].refcount
+= 1
214 def checkout(self
, text_record_db
):
215 base_text
= text_record_db
[self
.pred_id
].checkout(text_record_db
)
216 rcs_stream
= RCSStream(base_text
)
217 delta_text
= text_record_db
.delta_db
[self
.id]
218 rcs_stream
.apply_diff(delta_text
)
219 text
= rcs_stream
.get_text()
222 if self
.refcount
== 0:
223 # This text will never be needed again; just delete ourselves
224 # without ever having stored the fulltext to the checkout
226 del text_record_db
[self
.id]
228 # Store a new CheckedOutTextRecord in place of ourselves:
229 text_record_db
.checkout_db
['%x' % self
.id] = text
230 new_text_record
= CheckedOutTextRecord(self
.id)
231 new_text_record
.refcount
= self
.refcount
232 text_record_db
.replace(new_text_record
)
235 def free(self
, text_record_db
):
236 del text_record_db
.delta_db
[self
.id]
237 text_record_db
[self
.pred_id
].decrement_refcount(text_record_db
)
240 return 'DeltaTextRecord(%x -> %x, %d)' % (
241 self
.pred_id
, self
.id, self
.refcount
,
245 class CheckedOutTextRecord(TextRecord
):
246 """A record whose revision's fulltext is stored in the text_record_db.
248 These records are used for revisions whose fulltext has been
249 computed already during OutputPass. The fulltext for such a
250 revision is stored in the text_record_db as a single string."""
254 def __getstate__(self
):
255 return (self
.id, self
.refcount
,)
257 def __setstate__(self
, state
):
258 (self
.id, self
.refcount
,) = state
260 def checkout(self
, text_record_db
):
261 text
= text_record_db
.checkout_db
['%x' % self
.id]
262 self
.decrement_refcount(text_record_db
)
265 def free(self
, text_record_db
):
266 del text_record_db
.checkout_db
['%x' % self
.id]
269 return 'CheckedOutTextRecord(%x, %d)' % (self
.id, self
.refcount
,)
272 class NullDatabase(object):
273 """A do-nothing database that can be used with TextRecordDatabase.
275 Use this when you don't actually want to allow anything to be
278 def __delitem__(self
, id):
282 class TextRecordDatabase
:
283 """Holds the TextRecord instances that are currently live.
285 During FilterSymbolsPass, files are processed one by one and a new
286 TextRecordDatabase instance is used for each file. During
287 OutputPass, a single TextRecordDatabase instance is used for the
288 duration of OutputPass; individual records are added and removed
289 when they are active."""
291 def __init__(self
, delta_db
, checkout_db
):
292 # A map { cvs_rev_id -> TextRecord }.
293 self
.text_records
= {}
295 # A database-like object using cvs_rev_ids as keys and containing
296 # fulltext/deltatext strings as values. Its __getitem__() method
297 # is used to retrieve deltas when they are needed, and its
298 # __delitem__() method is used to delete deltas when they can be
299 # freed. The modifiability of the delta database varies from pass
300 # to pass, so the object stored here varies as well:
302 # FilterSymbolsPass: a NullDatabase. The delta database cannot be
303 # modified during this pass, and we have no need to retrieve
304 # deltas, so we just use a dummy object here.
306 # OutputPass: a disabled IndexedDatabase. During this pass we
307 # need to retrieve deltas, but we are not allowed to modify
308 # the delta database. So we use an IndexedDatabase whose
309 # __del__() method has been disabled to do nothing.
310 self
.delta_db
= delta_db
312 # A database-like object using cvs_rev_ids as keys and containing
313 # fulltext strings as values. This database is only set during
315 self
.checkout_db
= checkout_db
317 # If this is set to a list, then the list holds the ids of
318 # text_records that have to be deleted; when discard() is called,
319 # it adds the requested id to the list but does not delete it. If
320 # this member is set to None, then text_records are deleted
321 # immediately when discard() is called.
322 self
.deferred_deletes
= None
324 def __getstate__(self
):
325 return (self
.text_records
.values(),)
327 def __setstate__(self
, state
):
328 (text_records
,) = state
329 self
.text_records
= {}
330 for text_record
in text_records
:
331 self
.add(text_record
)
332 self
.delta_db
= NullDatabase()
333 self
.checkout_db
= NullDatabase()
334 self
.deferred_deletes
= None
336 def add(self
, text_record
):
337 """Add TEXT_RECORD to our database.
339 There must not already be a record with the same id."""
341 assert not self
.text_records
.has_key(text_record
.id)
343 self
.text_records
[text_record
.id] = text_record
345 def __getitem__(self
, id):
346 return self
.text_records
[id]
348 def __delitem__(self
, id):
349 """Free the record with the specified ID."""
351 del self
.text_records
[id]
353 def replace(self
, text_record
):
354 """Store TEXT_RECORD in place of the existing record with the same id.
356 Do not do anything with the old record."""
358 assert self
.text_records
.has_key(text_record
.id)
359 self
.text_records
[text_record
.id] = text_record
361 def discard(self
, *ids
):
362 """The text records with IDS are no longer needed; discard them.
364 This involves calling their free() methods and also removing them
367 If SELF.deferred_deletes is not None, then the ids to be deleted
368 are added to the list instead of deleted immediately. This
369 mechanism is to prevent a stack overflow from the avalanche of
370 deletes that can result from deleting a long chain of revisions."""
372 if self
.deferred_deletes
is None:
373 # This is an outer-level delete.
374 self
.deferred_deletes
= list(ids
)
375 while self
.deferred_deletes
:
376 id = self
.deferred_deletes
.pop()
377 text_record
= self
[id]
378 if text_record
.refcount
!= 0:
380 'TextRecordDatabase.discard(%s) called with refcount = %d'
381 % (text_record
, text_record
.refcount
,)
383 # This call might cause other text_record ids to be added to
384 # self.deferred_deletes:
385 text_record
.free(self
)
387 self
.deferred_deletes
= None
389 self
.deferred_deletes
.extend(ids
)
391 def itervalues(self
):
392 return self
.text_records
.itervalues()
394 def recompute_refcounts(self
, cvs_file_items
):
395 """Recompute the refcounts of the contained TextRecords.
397 Use CVS_FILE_ITEMS to determine which records will be needed by
400 # First clear all of the refcounts:
401 for text_record
in self
.itervalues():
402 text_record
.refcount
= 0
404 # Now increment the reference count of records that are needed as
405 # the source of another record's deltas:
406 for text_record
in self
.itervalues():
407 text_record
.increment_dependency_refcounts(self
.text_records
)
409 # Now increment the reference count of records that will be needed
411 for lod_items
in cvs_file_items
.iter_lods():
412 for cvs_rev
in lod_items
.cvs_revisions
:
413 if isinstance(cvs_rev
, CVSRevisionModification
):
414 self
[cvs_rev
.id].refcount
+= 1
416 def free_unused(self
):
417 """Free any TextRecords whose reference counts are zero."""
419 # The deletion of some of these text records might cause others to
420 # be unused, in which case they will be deleted automatically.
421 # But since the initially-unused records are not referred to by
422 # any others, we don't have to be afraid that they will be deleted
423 # before we get to them. But it *is* crucial that we create the
424 # whole unused list before starting the loop.
428 for text_record
in self
.itervalues()
429 if text_record
.refcount
== 0
432 self
.discard(*unused
)
434 def log_leftovers(self
):
435 """If any TextRecords still exist, log them."""
437 if self
.text_records
:
439 "%s: internal problem: leftover revisions in the checkout cache:"
441 for text_record
in self
.itervalues():
442 Log().warn(' %s' % (text_record
,))
445 """Debugging output of the current contents of the TextRecordDatabase."""
447 retval
= ['TextRecordDatabase:']
448 for text_record
in self
.itervalues():
449 retval
.append(' %s' % (text_record
,))
450 return '\n'.join(retval
)
453 class _Sink(cvs2svn_rcsparse
.Sink
):
454 def __init__(self
, revision_collector
, cvs_file_items
):
455 self
.revision_collector
= revision_collector
456 self
.cvs_file_items
= cvs_file_items
458 # A map {rev : base_rev} indicating that the text for rev is
459 # stored in CVS as a delta relative to base_rev.
460 self
.base_revisions
= {}
462 # The revision that is stored with its fulltext in CVS (usually
463 # the oldest revision on trunk):
464 self
.head_revision
= None
466 # The first logical revision on trunk (usually '1.1'):
467 self
.revision_1_1
= None
469 # Keep track of the revisions whose revision info has been seen so
470 # far (to avoid repeated revision info blocks):
471 self
.revisions_seen
= set()
473 def set_head_revision(self
, revision
):
474 self
.head_revision
= revision
477 self
, revision
, timestamp
, author
, state
, branches
, next
480 self
.base_revisions
[next
] = revision
482 if is_trunk_revision(revision
):
483 self
.revision_1_1
= revision
485 for branch
in branches
:
486 self
.base_revisions
[branch
] = revision
488 def set_revision_info(self
, revision
, log
, text
):
489 if revision
in self
.revisions_seen
:
490 # One common form of CVS repository corruption is that the
491 # Deltatext block for revision 1.1 appears twice. CollectData
492 # has already warned about this problem; here we can just ignore
496 self
.revisions_seen
.add(revision
)
498 cvs_rev_id
= self
.cvs_file_items
.original_ids
[revision
]
499 if is_trunk_revision(revision
):
500 # On trunk, revisions are encountered in reverse order (1.<N>
501 # ... 1.1) and deltas are inverted. The first text that we see
502 # is the fulltext for the HEAD revision. After that, the text
503 # corresponding to revision 1.N is the delta (1.<N+1> ->
504 # 1.<N>)). We have to invert the deltas here so that we can
505 # read the revisions out in dependency order; that is, for
506 # revision 1.1 we want the fulltext, and for revision 1.<N> we
507 # want the delta (1.<N-1> -> 1.<N>). This means that we can't
508 # compute the delta for a revision until we see its logical
509 # parent. When we finally see revision 1.1 (which is recognized
510 # because it doesn't have a parent), we can record the diff (1.1
511 # -> 1.2) for revision 1.2, and also the fulltext for 1.1.
513 if revision
== self
.head_revision
:
514 # This is HEAD, as fulltext. Initialize the RCSStream so
515 # that we can compute deltas backwards in time.
516 self
._rcs
_stream
= RCSStream(text
)
517 self
._rcs
_stream
_revision
= revision
519 # Any other trunk revision is a backward delta. Apply the
520 # delta to the RCSStream to mutate it to the contents of this
521 # revision, and also to get the reverse delta, which we store
522 # as the forward delta of our child revision.
524 text
= self
._rcs
_stream
.invert_diff(text
)
525 except MalformedDeltaException
, e
:
527 'Malformed RCS delta in %s, revision %s: %s'
528 % (self
.cvs_file_items
.cvs_file
.filename
, revision
, e
)
531 text_record
= DeltaTextRecord(
532 self
.cvs_file_items
.original_ids
[self
._rcs
_stream
_revision
],
535 self
.revision_collector
._writeout
(text_record
, text
)
536 self
._rcs
_stream
_revision
= revision
538 if revision
== self
.revision_1_1
:
539 # This is revision 1.1. Write its fulltext:
540 text_record
= FullTextRecord(cvs_rev_id
)
541 self
.revision_collector
._writeout
(
542 text_record
, self
._rcs
_stream
.get_text()
545 # There will be no more trunk revisions delivered, so free the
548 del self
._rcs
_stream
_revision
551 # On branches, revisions are encountered in logical order
552 # (<BRANCH>.1 ... <BRANCH>.<N>) and the text corresponding to
553 # revision <BRANCH>.<N> is the forward delta (<BRANCH>.<N-1> ->
554 # <BRANCH>.<N>). That's what we need, so just store it.
556 # FIXME: It would be nice to avoid writing out branch deltas
557 # when --trunk-only. (They will be deleted when finish_file()
558 # is called, but if the delta db is in an IndexedDatabase the
559 # deletions won't actually recover any disk space.)
560 text_record
= DeltaTextRecord(
562 self
.cvs_file_items
.original_ids
[self
.base_revisions
[revision
]]
564 self
.revision_collector
._writeout
(text_record
, text
)
569 class InternalRevisionCollector(RevisionCollector
):
570 """The RevisionCollector used by InternalRevisionReader."""
572 def __init__(self
, compress
):
573 RevisionCollector
.__init
__(self
)
574 self
._compress
= compress
576 def register_artifacts(self
, which_pass
):
577 artifact_manager
.register_temp_file(
578 config
.RCS_DELTAS_INDEX_TABLE
, which_pass
580 artifact_manager
.register_temp_file(config
.RCS_DELTAS_STORE
, which_pass
)
581 artifact_manager
.register_temp_file(
582 config
.RCS_TREES_INDEX_TABLE
, which_pass
584 artifact_manager
.register_temp_file(config
.RCS_TREES_STORE
, which_pass
)
587 serializer
= MarshalSerializer()
589 serializer
= CompressingSerializer(serializer
)
590 self
._delta
_db
= IndexedDatabase(
591 artifact_manager
.get_temp_file(config
.RCS_DELTAS_STORE
),
592 artifact_manager
.get_temp_file(config
.RCS_DELTAS_INDEX_TABLE
),
593 DB_OPEN_NEW
, serializer
,
595 primer
= (FullTextRecord
, DeltaTextRecord
)
596 self
._rcs
_trees
= IndexedDatabase(
597 artifact_manager
.get_temp_file(config
.RCS_TREES_STORE
),
598 artifact_manager
.get_temp_file(config
.RCS_TREES_INDEX_TABLE
),
599 DB_OPEN_NEW
, PrimedPickleSerializer(primer
),
602 def _writeout(self
, text_record
, text
):
603 self
.text_record_db
.add(text_record
)
604 self
._delta
_db
[text_record
.id] = text
606 def process_file(self
, cvs_file_items
):
607 """Read revision information for the file described by CVS_FILE_ITEMS.
609 Compute the text record refcounts, discard any records that are
610 unneeded, and store the text records for the file to the
611 _rcs_trees database."""
613 # A map from cvs_rev_id to TextRecord instance:
614 self
.text_record_db
= TextRecordDatabase(self
._delta
_db
, NullDatabase())
616 cvs2svn_rcsparse
.parse(
617 open(cvs_file_items
.cvs_file
.filename
, 'rb'),
618 _Sink(self
, cvs_file_items
),
621 self
.text_record_db
.recompute_refcounts(cvs_file_items
)
622 self
.text_record_db
.free_unused()
623 self
._rcs
_trees
[cvs_file_items
.cvs_file
.id] = self
.text_record_db
624 del self
.text_record_db
627 self
._delta
_db
.close()
628 self
._rcs
_trees
.close()
631 class _KeywordExpander
:
632 """A class whose instances provide substitutions for CVS keywords.
634 This class is used via its __call__() method, which should be called
635 with a match object representing a match for a CVS keyword string.
636 The method returns the replacement for the matched text.
638 The __call__() method works by calling the method with the same name
639 as that of the CVS keyword (converted to lower case).
641 Instances of this class can be passed as the REPL argument to
644 date_fmt_old
= "%Y/%m/%d %H:%M:%S" # CVS 1.11, rcs
645 date_fmt_new
= "%Y-%m-%d %H:%M:%S" # CVS 1.12
647 date_fmt
= date_fmt_new
650 def use_old_date_format(klass
):
651 """Class method to ensure exact compatibility with CVS 1.11
652 output. Use this if you want to verify your conversion and you're
654 klass
.date_fmt
= klass
.date_fmt_old
656 def __init__(self
, cvs_rev
):
657 self
.cvs_rev
= cvs_rev
659 def __call__(self
, match
):
660 return '$%s: %s $' % (
661 match
.group(1), getattr(self
, match
.group(1).lower())(),
665 return Ctx()._metadata
_db
[self
.cvs_rev
.metadata_id
].original_author
668 return time
.strftime(self
.date_fmt
, time
.gmtime(self
.cvs_rev
.timestamp
))
671 return '%s %s %s %s Exp' % (
672 self
.source(), self
.cvs_rev
.rev
, self
.date(), self
.author(),
676 return '%s %s %s %s Exp' % (
677 self
.rcsfile(), self
.cvs_rev
.rev
, self
.date(), self
.author(),
681 # Handle kvl like kv, as a converted repo is supposed to have no
686 # Would need some special handling.
687 return 'not supported by cvs2svn'
690 # Cannot work, as just creating a new symbol does not check out
691 # the revision again.
692 return 'not supported by cvs2svn'
695 return self
.cvs_rev
.cvs_file
.basename
+ ",v"
698 return self
.cvs_rev
.rev
701 project
= self
.cvs_rev
.cvs_file
.project
702 return '%s/%s%s,v' % (
703 project
.cvs_repository_root
,
705 self
.cvs_rev
.cvs_file
.cvs_path
,
709 # We check out only live revisions.
713 class InternalRevisionReader(RevisionReader
):
714 """A RevisionReader that reads the contents from an own delta store."""
716 _kws
= 'Author|Date|Header|Id|Locker|Log|Name|RCSfile|Revision|Source|State'
717 _kw_re
= re
.compile(r
'\$(' + _kws
+ r
'):[^$\n]*\$')
718 _kwo_re
= re
.compile(r
'\$(' + _kws
+ r
')(:[^$\n]*)?\$')
720 def __init__(self
, compress
):
721 self
._compress
= compress
723 def register_artifacts(self
, which_pass
):
724 artifact_manager
.register_temp_file(config
.CVS_CHECKOUT_DB
, which_pass
)
725 artifact_manager
.register_temp_file_needed(
726 config
.RCS_DELTAS_STORE
, which_pass
728 artifact_manager
.register_temp_file_needed(
729 config
.RCS_DELTAS_INDEX_TABLE
, which_pass
731 artifact_manager
.register_temp_file_needed(
732 config
.RCS_TREES_STORE
, which_pass
734 artifact_manager
.register_temp_file_needed(
735 config
.RCS_TREES_INDEX_TABLE
, which_pass
739 self
._delta
_db
= IndexedDatabase(
740 artifact_manager
.get_temp_file(config
.RCS_DELTAS_STORE
),
741 artifact_manager
.get_temp_file(config
.RCS_DELTAS_INDEX_TABLE
),
744 self
._delta
_db
.__delitem
__ = lambda id: None
745 self
._tree
_db
= IndexedDatabase(
746 artifact_manager
.get_temp_file(config
.RCS_TREES_STORE
),
747 artifact_manager
.get_temp_file(config
.RCS_TREES_INDEX_TABLE
),
750 serializer
= MarshalSerializer()
752 serializer
= CompressingSerializer(serializer
)
753 self
._co
_db
= Database(
754 artifact_manager
.get_temp_file(config
.CVS_CHECKOUT_DB
),
755 DB_OPEN_NEW
, serializer
,
758 # The set of CVSFile instances whose TextRecords have already been
760 self
._loaded
_files
= set()
762 # A map { CVSFILE : _FileTree } for files that currently have live
764 self
._text
_record
_db
= TextRecordDatabase(self
._delta
_db
, self
._co
_db
)
766 def _get_text_record(self
, cvs_rev
):
767 """Return the TextRecord instance for CVS_REV.
769 If the TextRecords for CVS_REV.cvs_file haven't been loaded yet,
772 if cvs_rev
.cvs_file
not in self
._loaded
_files
:
773 for text_record
in self
._tree
_db
[cvs_rev
.cvs_file
.id].itervalues():
774 self
._text
_record
_db
.add(text_record
)
775 self
._loaded
_files
.add(cvs_rev
.cvs_file
)
777 return self
._text
_record
_db
[cvs_rev
.id]
779 def get_content(self
, cvs_rev
):
780 """Check out the text for revision C_REV from the repository.
782 Return the text. If CVS_REV has a property _keyword_handling, use
783 it to determine how to handle RCS keywords in the output:
785 'collapsed' -- collapse keywords
787 'expanded' -- expand keywords
789 'untouched' -- output keywords in the form they are found in
792 Note that $Log$ never actually generates a log (which makes test
793 'requires_cvs()' fail).
795 Revisions may be requested in any order, but if they are not
796 requested in dependency order the checkout database will become
797 very large. Revisions may be skipped. Each revision may be
798 requested only once."""
801 text
= self
._get
_text
_record
(cvs_rev
).checkout(self
._text
_record
_db
)
802 except MalformedDeltaException
, (msg
):
803 raise FatalError('Malformed RCS delta in %s, revision %s: %s'
804 % (cvs_rev
.cvs_file
.get_filename(), cvs_rev
.rev
, msg
))
806 keyword_handling
= cvs_rev
.get_property('_keyword_handling')
808 if keyword_handling
== 'untouched':
809 # Leave keywords in the form that they were checked in.
811 elif keyword_handling
== 'collapsed':
812 text
= self
._kw
_re
.sub(r
'$\1$', text
)
813 elif keyword_handling
== 'expanded':
814 text
= self
._kwo
_re
.sub(_KeywordExpander(cvs_rev
), text
)
817 'Undefined _keyword_handling property (%r) for %s'
818 % (keyword_handling
, cvs_rev
,)
821 if Ctx().decode_apple_single
:
822 # Insert a filter to decode any files that are in AppleSingle
824 text
= get_maybe_apple_single(text
)
826 eol_fix
= cvs_rev
.get_property('_eol_fix')
828 text
= canonicalize_eol(text
, eol_fix
)
833 self
._text
_record
_db
.log_leftovers()
835 del self
._text
_record
_db
836 self
._delta
_db
.close()
837 self
._tree
_db
.close()