Re-update to rcsparse r2495 to get all the goodness of the new update script.
[cvs2svn.git] / cvs2svn_lib / cvs_file_items.py
blobd339047b26c2d91f174bd21088083b2694c2f088
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2006-2008 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains a class to manage the CVSItems related to one file."""
20 import re
22 from cvs2svn_lib.common import InternalError
23 from cvs2svn_lib.common import FatalError
24 from cvs2svn_lib.context import Ctx
25 from cvs2svn_lib.log import logger
26 from cvs2svn_lib.symbol import Trunk
27 from cvs2svn_lib.symbol import Branch
28 from cvs2svn_lib.symbol import Tag
29 from cvs2svn_lib.symbol import ExcludedSymbol
30 from cvs2svn_lib.cvs_item import CVSRevision
31 from cvs2svn_lib.cvs_item import CVSRevisionModification
32 from cvs2svn_lib.cvs_item import CVSRevisionAdd
33 from cvs2svn_lib.cvs_item import CVSRevisionChange
34 from cvs2svn_lib.cvs_item import CVSRevisionAbsent
35 from cvs2svn_lib.cvs_item import CVSRevisionNoop
36 from cvs2svn_lib.cvs_item import CVSSymbol
37 from cvs2svn_lib.cvs_item import CVSBranch
38 from cvs2svn_lib.cvs_item import CVSTag
39 from cvs2svn_lib.cvs_item import cvs_revision_type_map
40 from cvs2svn_lib.cvs_item import cvs_branch_type_map
41 from cvs2svn_lib.cvs_item import cvs_tag_type_map
44 class VendorBranchError(Exception):
45 """There is an error in the structure of the file revision tree."""
47 pass
50 class LODItems(object):
51 def __init__(self, lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags):
52 # The LineOfDevelopment described by this instance.
53 self.lod = lod
55 # The CVSBranch starting this LOD, if any; otherwise, None.
56 self.cvs_branch = cvs_branch
58 # The list of CVSRevisions on this LOD, if any. The CVSRevisions
59 # are listed in dependency order.
60 self.cvs_revisions = cvs_revisions
62 # A list of CVSBranches that sprout from this LOD (either from
63 # cvs_branch or from one of the CVSRevisions).
64 self.cvs_branches = cvs_branches
66 # A list of CVSTags that sprout from this LOD (either from
67 # cvs_branch or from one of the CVSRevisions).
68 self.cvs_tags = cvs_tags
70 def is_trivial_import(self):
71 """Return True iff this LOD is a trivial import branch in this file.
73 A trivial import branch is a branch that was used for a single
74 import and nothing else. Such a branch is eligible for being
75 grafted onto trunk, even if it has branch blockers."""
77 return (
78 len(self.cvs_revisions) == 1
79 and self.cvs_revisions[0].ntdbr
82 def is_pure_ntdb(self):
83 """Return True iff this LOD is a pure NTDB in this file.
85 A pure non-trunk default branch is defined to be a branch that
86 contains only NTDB revisions (and at least one of them). Such a
87 branch is eligible for being grafted onto trunk, even if it has
88 branch blockers."""
90 return (
91 self.cvs_revisions
92 and self.cvs_revisions[-1].ntdbr
95 def iter_blockers(self):
96 if self.is_pure_ntdb():
97 # Such a branch has no blockers, because the blockers can be
98 # grafted to trunk.
99 pass
100 else:
101 # Other branches are only blocked by symbols that sprout from
102 # non-NTDB revisions:
103 non_ntdbr_revision_ids = set()
104 for cvs_revision in self.cvs_revisions:
105 if not cvs_revision.ntdbr:
106 non_ntdbr_revision_ids.add(cvs_revision.id)
108 for cvs_tag in self.cvs_tags:
109 if cvs_tag.source_id in non_ntdbr_revision_ids:
110 yield cvs_tag
112 for cvs_branch in self.cvs_branches:
113 if cvs_branch.source_id in non_ntdbr_revision_ids:
114 yield cvs_branch
117 class CVSFileItems(object):
118 def __init__(self, cvs_file, trunk, cvs_items, original_ids=None):
119 # The file whose data this instance holds.
120 self.cvs_file = cvs_file
122 # The symbol that represents "Trunk" in this file.
123 self.trunk = trunk
125 # A map from CVSItem.id to CVSItem:
126 self._cvs_items = {}
128 # The cvs_item_id of each root in the CVSItem forest. (A root is
129 # defined to be any CVSRevision with no prev_id.)
130 self.root_ids = set()
132 for cvs_item in cvs_items:
133 self.add(cvs_item)
134 if isinstance(cvs_item, CVSRevision) and cvs_item.prev_id is None:
135 self.root_ids.add(cvs_item.id)
137 # self.original_ids is a dict {cvs_rev.rev : cvs_rev.id} holding
138 # the IDs originally allocated to each CVS revision number. This
139 # member is stored for the convenience of RevisionCollectors.
140 if original_ids is not None:
141 self.original_ids = original_ids
142 else:
143 self.original_ids = {}
144 for cvs_item in cvs_items:
145 if isinstance(cvs_item, CVSRevision):
146 self.original_ids[cvs_item.rev] = cvs_item.id
148 def __getstate__(self):
149 return (self.cvs_file.id, self.values(), self.original_ids,)
151 def __setstate__(self, state):
152 (cvs_file_id, cvs_items, original_ids,) = state
153 cvs_file = Ctx()._cvs_path_db.get_path(cvs_file_id)
154 CVSFileItems.__init__(
155 self, cvs_file, cvs_file.project.get_trunk(), cvs_items,
156 original_ids=original_ids,
159 def add(self, cvs_item):
160 self._cvs_items[cvs_item.id] = cvs_item
162 def __getitem__(self, id):
163 """Return the CVSItem with the specified ID."""
165 return self._cvs_items[id]
167 def get(self, id, default=None):
168 return self._cvs_items.get(id, default)
170 def __delitem__(self, id):
171 assert id not in self.root_ids
172 del self._cvs_items[id]
174 def values(self):
175 return self._cvs_items.values()
177 def check_link_consistency(self):
178 """Check that the CVSItems are linked correctly with each other."""
180 for cvs_item in self.values():
181 try:
182 cvs_item.check_links(self)
183 except AssertionError:
184 logger.error(
185 'Link consistency error in %s\n'
186 'This is probably a bug internal to cvs2svn. Please file a bug\n'
187 'report including the following stack trace (see FAQ for more '
188 'info).'
189 % (cvs_item,))
190 raise
192 def _get_lod(self, lod, cvs_branch, start_id):
193 """Return the indicated LODItems.
195 LOD is the corresponding LineOfDevelopment. CVS_BRANCH is the
196 CVSBranch instance that starts the LOD if any; otherwise it is
197 None. START_ID is the id of the first CVSRevision on this LOD, or
198 None if there are none."""
200 cvs_revisions = []
201 cvs_branches = []
202 cvs_tags = []
204 def process_subitems(cvs_item):
205 """Process the branches and tags that are rooted in CVS_ITEM.
207 CVS_ITEM can be a CVSRevision or a CVSBranch."""
209 for branch_id in cvs_item.branch_ids[:]:
210 cvs_branches.append(self[branch_id])
212 for tag_id in cvs_item.tag_ids:
213 cvs_tags.append(self[tag_id])
215 if cvs_branch is not None:
216 # Include the symbols sprouting directly from the CVSBranch:
217 process_subitems(cvs_branch)
219 id = start_id
220 while id is not None:
221 cvs_rev = self[id]
222 cvs_revisions.append(cvs_rev)
223 process_subitems(cvs_rev)
224 id = cvs_rev.next_id
226 return LODItems(lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags)
228 def get_lod_items(self, cvs_branch):
229 """Return an LODItems describing the branch that starts at CVS_BRANCH.
231 CVS_BRANCH must be an instance of CVSBranch contained in this
232 CVSFileItems."""
234 return self._get_lod(cvs_branch.symbol, cvs_branch, cvs_branch.next_id)
236 def iter_root_lods(self):
237 """Iterate over the LODItems for all root LODs (non-recursively)."""
239 for id in list(self.root_ids):
240 cvs_item = self[id]
241 if isinstance(cvs_item, CVSRevision):
242 # This LOD doesn't have a CVSBranch associated with it.
243 # Either it is Trunk, or it is a branch whose CVSBranch has
244 # been deleted.
245 yield self._get_lod(cvs_item.lod, None, id)
246 elif isinstance(cvs_item, CVSBranch):
247 # This is a Branch that has been severed from the rest of the
248 # tree.
249 yield self._get_lod(cvs_item.symbol, cvs_item, cvs_item.next_id)
250 else:
251 raise InternalError('Unexpected root item: %s' % (cvs_item,))
253 def _iter_tree(self, lod, cvs_branch, start_id):
254 """Iterate over the tree that starts at the specified line of development.
256 LOD is the LineOfDevelopment where the iteration should start.
257 CVS_BRANCH is the CVSBranch instance that starts the LOD if any;
258 otherwise it is None. START_ID is the id of the first CVSRevision
259 on this LOD, or None if there are none.
261 There are two cases handled by this routine: trunk (where LOD is a
262 Trunk instance, CVS_BRANCH is None, and START_ID is the id of the
263 1.1 revision) and a branch (where LOD is a Branch instance,
264 CVS_BRANCH is a CVSBranch instance, and START_ID is either the id
265 of the first CVSRevision on the branch or None if there are no
266 CVSRevisions on the branch). Note that CVS_BRANCH and START_ID cannot
267 simultaneously be None.
269 Yield an LODItems instance for each line of development."""
271 cvs_revisions = []
272 cvs_branches = []
273 cvs_tags = []
275 def process_subitems(cvs_item):
276 """Process the branches and tags that are rooted in CVS_ITEM.
278 CVS_ITEM can be a CVSRevision or a CVSBranch."""
280 for branch_id in cvs_item.branch_ids[:]:
281 # Recurse into the branch:
282 branch = self[branch_id]
283 for lod_items in self._iter_tree(
284 branch.symbol, branch, branch.next_id
286 yield lod_items
287 # The caller might have deleted the branch that we just
288 # yielded. If it is no longer present, then do not add it to
289 # the list of cvs_branches.
290 try:
291 cvs_branches.append(self[branch_id])
292 except KeyError:
293 pass
295 for tag_id in cvs_item.tag_ids:
296 cvs_tags.append(self[tag_id])
298 if cvs_branch is not None:
299 # Include the symbols sprouting directly from the CVSBranch:
300 for lod_items in process_subitems(cvs_branch):
301 yield lod_items
303 id = start_id
304 while id is not None:
305 cvs_rev = self[id]
306 cvs_revisions.append(cvs_rev)
308 for lod_items in process_subitems(cvs_rev):
309 yield lod_items
311 id = cvs_rev.next_id
313 yield LODItems(lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags)
315 def iter_lods(self):
316 """Iterate over LinesOfDevelopment in this file, in depth-first order.
318 For each LOD, yield an LODItems instance. The traversal starts at
319 each root node but returns the LODs in depth-first order.
321 It is allowed to modify the CVSFileItems instance while the
322 traversal is occurring, but only in ways that don't affect the
323 tree structure above (i.e., towards the trunk from) the current
324 LOD."""
326 # Make a list out of root_ids so that callers can change it:
327 for id in list(self.root_ids):
328 cvs_item = self[id]
329 if isinstance(cvs_item, CVSRevision):
330 # This LOD doesn't have a CVSBranch associated with it.
331 # Either it is Trunk, or it is a branch whose CVSBranch has
332 # been deleted.
333 lod = cvs_item.lod
334 cvs_branch = None
335 elif isinstance(cvs_item, CVSBranch):
336 # This is a Branch that has been severed from the rest of the
337 # tree.
338 lod = cvs_item.symbol
339 id = cvs_item.next_id
340 cvs_branch = cvs_item
341 else:
342 raise InternalError('Unexpected root item: %s' % (cvs_item,))
344 for lod_items in self._iter_tree(lod, cvs_branch, id):
345 yield lod_items
347 def iter_deltatext_ancestors(self, cvs_rev):
348 """Generate the delta-dependency ancestors of CVS_REV.
350 Generate then ancestors of CVS_REV in deltatext order; i.e., back
351 along branches towards trunk, then outwards along trunk towards
352 HEAD."""
354 while True:
355 # Determine the next candidate source revision:
356 if isinstance(cvs_rev.lod, Trunk):
357 if cvs_rev.next_id is None:
358 # HEAD has no ancestors, so we are done:
359 return
360 else:
361 cvs_rev = self[cvs_rev.next_id]
362 else:
363 cvs_rev = self[cvs_rev.prev_id]
365 yield cvs_rev
367 def _sever_branch(self, lod_items):
368 """Sever the branch from its source and discard the CVSBranch.
370 LOD_ITEMS describes a branch that should be severed from its
371 source, deleting the CVSBranch and creating a new root. Also set
372 LOD_ITEMS.cvs_branch to None.
374 If LOD_ITEMS has no source (e.g., because it is the trunk branch
375 or because it has already been severed), do nothing.
377 This method can only be used before symbols have been grafted onto
378 CVSBranches. It does not adjust NTDBR, NTDBR_PREV_ID or
379 NTDBR_NEXT_ID even if LOD_ITEMS describes a NTDB."""
381 cvs_branch = lod_items.cvs_branch
382 if cvs_branch is None:
383 return
385 assert not cvs_branch.tag_ids
386 assert not cvs_branch.branch_ids
387 source_rev = self[cvs_branch.source_id]
389 # We only cover the following case, even though after
390 # FilterSymbolsPass cvs_branch.source_id might refer to another
391 # CVSBranch.
392 assert isinstance(source_rev, CVSRevision)
394 # Delete the CVSBranch itself:
395 lod_items.cvs_branch = None
396 del self[cvs_branch.id]
398 # Delete the reference from the source revision to the CVSBranch:
399 source_rev.branch_ids.remove(cvs_branch.id)
401 # Delete the reference from the first revision on the branch to
402 # the CVSBranch:
403 if lod_items.cvs_revisions:
404 first_rev = lod_items.cvs_revisions[0]
406 # Delete the reference from first_rev to the CVSBranch:
407 first_rev.first_on_branch_id = None
409 # Delete the reference from the source revision to the first
410 # revision on the branch:
411 source_rev.branch_commit_ids.remove(first_rev.id)
413 # ...and vice versa:
414 first_rev.prev_id = None
416 # Change the type of first_rev (e.g., from Change to Add):
417 first_rev.__class__ = cvs_revision_type_map[
418 (isinstance(first_rev, CVSRevisionModification), False,)
421 # Now first_rev is a new root:
422 self.root_ids.add(first_rev.id)
424 def adjust_ntdbrs(self, ntdbr_cvs_revs):
425 """Adjust the specified non-trunk default branch revisions.
427 NTDBR_CVS_REVS is a list of CVSRevision instances in this file
428 that have been determined to be non-trunk default branch
429 revisions.
431 The first revision on the default branch is handled strangely by
432 CVS. If a file is imported (as opposed to being added), CVS
433 creates a 1.1 revision, then creates a vendor branch 1.1.1 based
434 on 1.1, then creates a 1.1.1.1 revision that is identical to the
435 1.1 revision (i.e., its deltatext is empty). The log message that
436 the user typed when importing is stored with the 1.1.1.1 revision.
437 The 1.1 revision always contains a standard, generated log
438 message, 'Initial revision\n'.
440 When we detect a straightforward import like this, we want to
441 handle it by deleting the 1.1 revision (which doesn't contain any
442 useful information) and making 1.1.1.1 into an independent root in
443 the file's dependency tree. In SVN, 1.1.1.1 will be added
444 directly to the vendor branch with its initial content. Then in a
445 special 'post-commit', the 1.1.1.1 revision is copied back to
446 trunk.
448 If the user imports again to the same vendor branch, then CVS
449 creates revisions 1.1.1.2, 1.1.1.3, etc. on the vendor branch,
450 *without* counterparts in trunk (even though these revisions
451 effectively play the role of trunk revisions). So after we add
452 such revisions to the vendor branch, we also copy them back to
453 trunk in post-commits.
455 Set the ntdbr members of the revisions listed in NTDBR_CVS_REVS to
456 True. Also, if there is a 1.2 revision, then set that revision to
457 depend on the last non-trunk default branch revision and possibly
458 adjust its type accordingly."""
460 for cvs_rev in ntdbr_cvs_revs:
461 cvs_rev.ntdbr = True
463 # Look for a 1.2 revision:
464 rev_1_1 = self[ntdbr_cvs_revs[0].prev_id]
466 rev_1_2 = self.get(rev_1_1.next_id)
467 if rev_1_2 is not None:
468 # Revision 1.2 logically follows the imported revisions, not
469 # 1.1. Accordingly, connect it to the last NTDBR and possibly
470 # change its type.
471 last_ntdbr = ntdbr_cvs_revs[-1]
472 rev_1_2.ntdbr_prev_id = last_ntdbr.id
473 last_ntdbr.ntdbr_next_id = rev_1_2.id
474 rev_1_2.__class__ = cvs_revision_type_map[(
475 isinstance(rev_1_2, CVSRevisionModification),
476 isinstance(last_ntdbr, CVSRevisionModification),
479 def process_live_ntdb(self, vendor_lod_items):
480 """VENDOR_LOD_ITEMS is a live default branch; process it.
482 In this case, all revisions on the default branch are NTDBRs and
483 it is an error if there is also a '1.2' revision.
485 Return True iff this transformation really does something. Raise
486 a VendorBranchError if there is a '1.2' revision."""
488 rev_1_1 = self[vendor_lod_items.cvs_branch.source_id]
489 rev_1_2_id = rev_1_1.next_id
490 if rev_1_2_id is not None:
491 raise VendorBranchError(
492 'File \'%s\' has default branch=%s but also a revision %s'
493 % (self.cvs_file.rcs_path,
494 vendor_lod_items.cvs_branch.branch_number, self[rev_1_2_id].rev,)
497 ntdbr_cvs_revs = list(vendor_lod_items.cvs_revisions)
499 if ntdbr_cvs_revs:
500 self.adjust_ntdbrs(ntdbr_cvs_revs)
501 return True
502 else:
503 return False
505 def process_historical_ntdb(self, vendor_lod_items):
506 """There appears to have been a non-trunk default branch in the past.
508 There is currently no default branch, but the branch described by
509 file appears to have been imported. So our educated guess is that
510 all revisions on the '1.1.1' branch (described by
511 VENDOR_LOD_ITEMS) with timestamps prior to the timestamp of '1.2'
512 were non-trunk default branch revisions.
514 Return True iff this transformation really does something.
516 This really only handles standard '1.1.1.*'-style vendor
517 revisions. One could conceivably have a file whose default branch
518 is 1.1.3 or whatever, or was that at some point in time, with
519 vendor revisions 1.1.3.1, 1.1.3.2, etc. But with the default
520 branch gone now, we'd have no basis for assuming that the
521 non-standard vendor branch had ever been the default branch
522 anyway.
524 Note that we rely on comparisons between the timestamps of the
525 revisions on the vendor branch and that of revision 1.2, even
526 though the timestamps might be incorrect due to clock skew. We
527 could do a slightly better job if we used the changeset
528 timestamps, as it is possible that the dependencies that went into
529 determining those timestamps are more accurate. But that would
530 require an extra pass or two."""
532 rev_1_1 = self[vendor_lod_items.cvs_branch.source_id]
533 rev_1_2_id = rev_1_1.next_id
535 if rev_1_2_id is None:
536 rev_1_2_timestamp = None
537 else:
538 rev_1_2_timestamp = self[rev_1_2_id].timestamp
540 ntdbr_cvs_revs = []
541 for cvs_rev in vendor_lod_items.cvs_revisions:
542 if rev_1_2_timestamp is not None \
543 and cvs_rev.timestamp >= rev_1_2_timestamp:
544 # That's the end of the once-default branch.
545 break
546 ntdbr_cvs_revs.append(cvs_rev)
548 if ntdbr_cvs_revs:
549 self.adjust_ntdbrs(ntdbr_cvs_revs)
550 return True
551 else:
552 return False
554 def imported_remove_1_1(self, vendor_lod_items):
555 """This file was imported. Remove the 1.1 revision if possible.
557 VENDOR_LOD_ITEMS is the LODItems instance for the vendor branch.
558 See adjust_ntdbrs() for more information."""
560 assert vendor_lod_items.cvs_revisions
561 cvs_rev = vendor_lod_items.cvs_revisions[0]
563 if isinstance(cvs_rev, CVSRevisionModification) \
564 and not cvs_rev.deltatext_exists:
565 cvs_branch = vendor_lod_items.cvs_branch
566 rev_1_1 = self[cvs_branch.source_id]
567 assert isinstance(rev_1_1, CVSRevision)
568 logger.debug('Removing unnecessary revision %s' % (rev_1_1,))
570 # Delete the 1.1.1 CVSBranch and sever the vendor branch from trunk:
571 self._sever_branch(vendor_lod_items)
573 # Delete rev_1_1:
574 self.root_ids.remove(rev_1_1.id)
575 del self[rev_1_1.id]
576 rev_1_2_id = rev_1_1.next_id
577 if rev_1_2_id is not None:
578 rev_1_2 = self[rev_1_2_id]
579 rev_1_2.prev_id = None
580 self.root_ids.add(rev_1_2.id)
582 # Move any tags and branches from rev_1_1 to cvs_rev:
583 cvs_rev.tag_ids.extend(rev_1_1.tag_ids)
584 for id in rev_1_1.tag_ids:
585 cvs_tag = self[id]
586 cvs_tag.source_lod = cvs_rev.lod
587 cvs_tag.source_id = cvs_rev.id
588 cvs_rev.branch_ids[0:0] = rev_1_1.branch_ids
589 for id in rev_1_1.branch_ids:
590 cvs_branch = self[id]
591 cvs_branch.source_lod = cvs_rev.lod
592 cvs_branch.source_id = cvs_rev.id
593 cvs_rev.branch_commit_ids[0:0] = rev_1_1.branch_commit_ids
594 for id in rev_1_1.branch_commit_ids:
595 cvs_rev2 = self[id]
596 cvs_rev2.prev_id = cvs_rev.id
598 def _is_unneeded_initial_trunk_delete(self, cvs_item, metadata_db):
599 if not isinstance(cvs_item, CVSRevisionNoop):
600 # This rule can only be applied to dead revisions.
601 return False
603 if cvs_item.rev != '1.1':
604 return False
606 if not isinstance(cvs_item.lod, Trunk):
607 return False
609 if cvs_item.closed_symbols:
610 return False
612 if cvs_item.ntdbr:
613 return False
615 log_msg = metadata_db[cvs_item.metadata_id].log_msg
616 return bool(
617 re.match(
618 r'file .* was initially added on branch .*\.\n$',
619 log_msg,
621 or re.match(
622 # This variant commit message was reported by one user:
623 r'file .* was added on branch .*\n$',
624 log_msg,
628 def remove_unneeded_initial_trunk_delete(self, metadata_db):
629 """Remove unneeded deletes for this file.
631 If a file is added on a branch, then a trunk revision is added at
632 the same time in the 'Dead' state. This revision doesn't do
633 anything useful, so delete it."""
635 for id in self.root_ids:
636 cvs_item = self[id]
637 if self._is_unneeded_initial_trunk_delete(cvs_item, metadata_db):
638 logger.debug('Removing unnecessary delete %s' % (cvs_item,))
640 # Sever any CVSBranches rooted at cvs_item.
641 for cvs_branch_id in cvs_item.branch_ids[:]:
642 cvs_branch = self[cvs_branch_id]
643 self._sever_branch(self.get_lod_items(cvs_branch))
645 # Tagging a dead revision doesn't do anything, so remove any
646 # CVSTags that refer to cvs_item:
647 while cvs_item.tag_ids:
648 del self[cvs_item.tag_ids.pop()]
650 # Now delete cvs_item itself:
651 self.root_ids.remove(cvs_item.id)
652 del self[cvs_item.id]
653 if cvs_item.next_id is not None:
654 cvs_rev_next = self[cvs_item.next_id]
655 cvs_rev_next.prev_id = None
656 self.root_ids.add(cvs_rev_next.id)
658 # This can only happen once per file, so we're done:
659 return
661 def _is_unneeded_initial_branch_delete(self, lod_items, metadata_db):
662 """Return True iff the initial revision in LOD_ITEMS can be deleted."""
664 if not lod_items.cvs_revisions:
665 return False
667 cvs_revision = lod_items.cvs_revisions[0]
669 if cvs_revision.ntdbr:
670 return False
672 if not isinstance(cvs_revision, CVSRevisionAbsent):
673 return False
675 if cvs_revision.branch_ids:
676 return False
678 log_msg = metadata_db[cvs_revision.metadata_id].log_msg
679 return bool(re.match(
680 r'file .* was added on branch .* on '
681 r'\d{4}\-\d{2}\-\d{2} \d{2}\:\d{2}\:\d{2}( [\+\-]\d{4})?'
682 '\n$',
683 log_msg,
686 def remove_initial_branch_deletes(self, metadata_db):
687 """If the first revision on a branch is an unnecessary delete, remove it.
689 If a file is added on a branch (whether or not it already existed
690 on trunk), then new versions of CVS add a first branch revision in
691 the 'dead' state (to indicate that the file did not exist on the
692 branch when the branch was created) followed by the second branch
693 revision, which is an add. When we encounter this situation, we
694 sever the branch from trunk and delete the first branch
695 revision."""
697 for lod_items in self.iter_lods():
698 if self._is_unneeded_initial_branch_delete(lod_items, metadata_db):
699 cvs_revision = lod_items.cvs_revisions[0]
700 logger.debug(
701 'Removing unnecessary initial branch delete %s' % (cvs_revision,)
704 # Sever the branch from its source if necessary:
705 self._sever_branch(lod_items)
707 # Delete the first revision on the branch:
708 self.root_ids.remove(cvs_revision.id)
709 del self[cvs_revision.id]
711 # If it had a successor, adjust its backreference and add it
712 # to the root_ids:
713 if cvs_revision.next_id is not None:
714 cvs_rev_next = self[cvs_revision.next_id]
715 cvs_rev_next.prev_id = None
716 self.root_ids.add(cvs_rev_next.id)
718 # Tagging a dead revision doesn't do anything, so remove any
719 # tags that were set on it:
720 for tag_id in cvs_revision.tag_ids:
721 del self[tag_id]
723 def _exclude_tag(self, cvs_tag):
724 """Exclude the specified CVS_TAG."""
726 del self[cvs_tag.id]
728 # A CVSTag is the successor of the CVSRevision that it
729 # sprouts from. Delete this tag from that revision's
730 # tag_ids:
731 self[cvs_tag.source_id].tag_ids.remove(cvs_tag.id)
733 def _exclude_branch(self, lod_items):
734 """Exclude the branch described by LOD_ITEMS, including its revisions.
736 (Do not update the LOD_ITEMS instance itself.)
738 If the LOD starts with non-trunk default branch revisions, leave
739 the branch and the NTDB revisions in place, but delete any
740 subsequent revisions that are not NTDB revisions. In this case,
741 return True; otherwise return False"""
743 if lod_items.cvs_revisions and lod_items.cvs_revisions[0].ntdbr:
744 for cvs_rev in lod_items.cvs_revisions:
745 if not cvs_rev.ntdbr:
746 # We've found the first non-NTDBR, and it's stored in cvs_rev:
747 break
748 else:
749 # There was no revision following the NTDBRs:
750 cvs_rev = None
752 if cvs_rev:
753 last_ntdbr = self[cvs_rev.prev_id]
754 last_ntdbr.next_id = None
755 while True:
756 del self[cvs_rev.id]
757 if cvs_rev.next_id is None:
758 break
759 cvs_rev = self[cvs_rev.next_id]
761 return True
763 else:
764 if lod_items.cvs_branch is not None:
765 # Delete the CVSBranch itself:
766 cvs_branch = lod_items.cvs_branch
768 del self[cvs_branch.id]
770 # A CVSBranch is the successor of the CVSRevision that it
771 # sprouts from. Delete this branch from that revision's
772 # branch_ids:
773 self[cvs_branch.source_id].branch_ids.remove(cvs_branch.id)
775 if lod_items.cvs_revisions:
776 # The first CVSRevision on the branch has to be either detached
777 # from the revision from which the branch sprang, or removed
778 # from self.root_ids:
779 cvs_rev = lod_items.cvs_revisions[0]
780 if cvs_rev.prev_id is None:
781 self.root_ids.remove(cvs_rev.id)
782 else:
783 self[cvs_rev.prev_id].branch_commit_ids.remove(cvs_rev.id)
785 for cvs_rev in lod_items.cvs_revisions:
786 del self[cvs_rev.id]
788 return False
790 def graft_ntdbr_to_trunk(self):
791 """Graft the non-trunk default branch revisions to trunk.
793 They should already be alone on a branch that may or may not have
794 a CVSBranch connecting it to trunk."""
796 for lod_items in self.iter_lods():
797 if lod_items.cvs_revisions and lod_items.cvs_revisions[0].ntdbr:
798 assert lod_items.is_pure_ntdb()
800 first_rev = lod_items.cvs_revisions[0]
801 last_rev = lod_items.cvs_revisions[-1]
802 rev_1_1 = self.get(first_rev.prev_id)
803 rev_1_2 = self.get(last_rev.ntdbr_next_id)
805 self._sever_branch(lod_items)
807 if rev_1_1 is not None:
808 rev_1_1.next_id = first_rev.id
809 first_rev.prev_id = rev_1_1.id
811 self.root_ids.remove(first_rev.id)
813 first_rev.__class__ = cvs_revision_type_map[(
814 isinstance(first_rev, CVSRevisionModification),
815 isinstance(rev_1_1, CVSRevisionModification),
818 if rev_1_2 is not None:
819 rev_1_2.ntdbr_prev_id = None
820 last_rev.ntdbr_next_id = None
822 if rev_1_2.prev_id is None:
823 self.root_ids.remove(rev_1_2.id)
825 rev_1_2.prev_id = last_rev.id
826 last_rev.next_id = rev_1_2.id
828 # The effective_pred_id of rev_1_2 was not changed, so we
829 # don't have to change rev_1_2's type.
831 for cvs_rev in lod_items.cvs_revisions:
832 cvs_rev.ntdbr = False
833 cvs_rev.lod = self.trunk
835 for cvs_branch in lod_items.cvs_branches:
836 cvs_branch.source_lod = self.trunk
838 for cvs_tag in lod_items.cvs_tags:
839 cvs_tag.source_lod = self.trunk
841 return
843 def exclude_non_trunk(self):
844 """Delete all tags and branches."""
846 ntdbr_excluded = False
847 for lod_items in self.iter_lods():
848 for cvs_tag in lod_items.cvs_tags[:]:
849 self._exclude_tag(cvs_tag)
850 lod_items.cvs_tags.remove(cvs_tag)
852 if not isinstance(lod_items.lod, Trunk):
853 assert not lod_items.cvs_branches
855 ntdbr_excluded |= self._exclude_branch(lod_items)
857 if ntdbr_excluded:
858 self.graft_ntdbr_to_trunk()
860 def filter_excluded_symbols(self):
861 """Delete any excluded symbols and references to them."""
863 ntdbr_excluded = False
864 for lod_items in self.iter_lods():
865 # Delete any excluded tags:
866 for cvs_tag in lod_items.cvs_tags[:]:
867 if isinstance(cvs_tag.symbol, ExcludedSymbol):
868 self._exclude_tag(cvs_tag)
870 lod_items.cvs_tags.remove(cvs_tag)
872 # Delete the whole branch if it is to be excluded:
873 if isinstance(lod_items.lod, ExcludedSymbol):
874 # A symbol can only be excluded if no other symbols spring
875 # from it. This was already checked in CollateSymbolsPass, so
876 # these conditions should already be satisfied.
877 assert not list(lod_items.iter_blockers())
879 ntdbr_excluded |= self._exclude_branch(lod_items)
881 if ntdbr_excluded:
882 self.graft_ntdbr_to_trunk()
884 def _mutate_branch_to_tag(self, cvs_branch):
885 """Mutate the branch CVS_BRANCH into a tag."""
887 if cvs_branch.next_id is not None:
888 # This shouldn't happen because it was checked in
889 # CollateSymbolsPass:
890 raise FatalError('Attempt to exclude a branch with commits.')
891 cvs_tag = CVSTag(
892 cvs_branch.id, cvs_branch.cvs_file, cvs_branch.symbol,
893 cvs_branch.source_lod, cvs_branch.source_id,
894 cvs_branch.revision_reader_token,
896 self.add(cvs_tag)
897 cvs_revision = self[cvs_tag.source_id]
898 cvs_revision.branch_ids.remove(cvs_tag.id)
899 cvs_revision.tag_ids.append(cvs_tag.id)
901 def _mutate_tag_to_branch(self, cvs_tag):
902 """Mutate the tag into a branch."""
904 cvs_branch = CVSBranch(
905 cvs_tag.id, cvs_tag.cvs_file, cvs_tag.symbol,
906 None, cvs_tag.source_lod, cvs_tag.source_id, None,
907 cvs_tag.revision_reader_token,
909 self.add(cvs_branch)
910 cvs_revision = self[cvs_branch.source_id]
911 cvs_revision.tag_ids.remove(cvs_branch.id)
912 cvs_revision.branch_ids.append(cvs_branch.id)
914 def _mutate_symbol(self, cvs_symbol):
915 """Mutate CVS_SYMBOL if necessary."""
917 symbol = cvs_symbol.symbol
918 if isinstance(cvs_symbol, CVSBranch) and isinstance(symbol, Tag):
919 self._mutate_branch_to_tag(cvs_symbol)
920 elif isinstance(cvs_symbol, CVSTag) and isinstance(symbol, Branch):
921 self._mutate_tag_to_branch(cvs_symbol)
923 def mutate_symbols(self):
924 """Force symbols to be tags/branches based on self.symbol_db."""
926 for cvs_item in self.values():
927 if isinstance(cvs_item, CVSRevision):
928 # This CVSRevision may be affected by the mutation of any
929 # CVSSymbols that it references, but there is nothing to do
930 # here directly.
931 pass
932 elif isinstance(cvs_item, CVSSymbol):
933 self._mutate_symbol(cvs_item)
934 else:
935 raise RuntimeError('Unknown cvs item type')
937 def _adjust_tag_parent(self, cvs_tag):
938 """Adjust the parent of CVS_TAG if possible and preferred.
940 CVS_TAG is an instance of CVSTag. This method must be called in
941 leaf-to-trunk order."""
943 # The Symbol that cvs_tag would like to have as a parent:
944 preferred_parent = Ctx()._symbol_db.get_symbol(
945 cvs_tag.symbol.preferred_parent_id)
947 if cvs_tag.source_lod == preferred_parent:
948 # The preferred parent is already the parent.
949 return
951 # The CVSRevision that is its direct parent:
952 source = self[cvs_tag.source_id]
953 assert isinstance(source, CVSRevision)
955 if isinstance(preferred_parent, Trunk):
956 # It is not possible to graft *onto* Trunk:
957 return
959 # Try to find the preferred parent among the possible parents:
960 for branch_id in source.branch_ids:
961 if self[branch_id].symbol == preferred_parent:
962 # We found it!
963 break
964 else:
965 # The preferred parent is not a possible parent in this file.
966 return
968 parent = self[branch_id]
969 assert isinstance(parent, CVSBranch)
971 logger.debug('Grafting %s from %s (on %s) onto %s' % (
972 cvs_tag, source, source.lod, parent,))
973 # Switch parent:
974 source.tag_ids.remove(cvs_tag.id)
975 parent.tag_ids.append(cvs_tag.id)
976 cvs_tag.source_lod = parent.symbol
977 cvs_tag.source_id = parent.id
979 def _adjust_branch_parents(self, cvs_branch):
980 """Adjust the parent of CVS_BRANCH if possible and preferred.
982 CVS_BRANCH is an instance of CVSBranch. This method must be
983 called in leaf-to-trunk order."""
985 # The Symbol that cvs_branch would like to have as a parent:
986 preferred_parent = Ctx()._symbol_db.get_symbol(
987 cvs_branch.symbol.preferred_parent_id)
989 if cvs_branch.source_lod == preferred_parent:
990 # The preferred parent is already the parent.
991 return
993 # The CVSRevision that is its direct parent:
994 source = self[cvs_branch.source_id]
995 # This is always a CVSRevision because we haven't adjusted it yet:
996 assert isinstance(source, CVSRevision)
998 if isinstance(preferred_parent, Trunk):
999 # It is not possible to graft *onto* Trunk:
1000 return
1002 # Try to find the preferred parent among the possible parents:
1003 for branch_id in source.branch_ids:
1004 possible_parent = self[branch_id]
1005 if possible_parent.symbol == preferred_parent:
1006 # We found it!
1007 break
1008 elif possible_parent.symbol == cvs_branch.symbol:
1009 # Only branches that precede the branch to be adjusted are
1010 # considered possible parents. Leave parentage unchanged:
1011 return
1012 else:
1013 # This point should never be reached.
1014 raise InternalError(
1015 'Possible parent search did not terminate as expected')
1017 parent = possible_parent
1018 assert isinstance(parent, CVSBranch)
1020 logger.debug('Grafting %s from %s (on %s) onto %s' % (
1021 cvs_branch, source, source.lod, parent,))
1022 # Switch parent:
1023 source.branch_ids.remove(cvs_branch.id)
1024 parent.branch_ids.append(cvs_branch.id)
1025 cvs_branch.source_lod = parent.symbol
1026 cvs_branch.source_id = parent.id
1028 def adjust_parents(self):
1029 """Adjust the parents of symbols to their preferred parents.
1031 If a CVSSymbol has a preferred parent that is different than its
1032 current parent, and if the preferred parent is an allowed parent
1033 of the CVSSymbol in this file, then graft the CVSSymbol onto its
1034 preferred parent."""
1036 for lod_items in self.iter_lods():
1037 for cvs_tag in lod_items.cvs_tags:
1038 self._adjust_tag_parent(cvs_tag)
1040 # It is important to process branches in reverse order, so that
1041 # a branch graft target (which necessarily occurs earlier in the
1042 # list than the branch itself) is not moved before the branch
1043 # itself.
1044 for cvs_branch in reversed(lod_items.cvs_branches):
1045 self._adjust_branch_parents(cvs_branch)
1047 def _get_revision_source(self, cvs_symbol):
1048 """Return the CVSRevision that is the ultimate source of CVS_SYMBOL."""
1050 while True:
1051 cvs_item = self[cvs_symbol.source_id]
1052 if isinstance(cvs_item, CVSRevision):
1053 return cvs_item
1054 else:
1055 cvs_symbol = cvs_item
1057 def refine_symbols(self):
1058 """Refine the types of the CVSSymbols in this file.
1060 Adjust the symbol types based on whether the source exists:
1061 CVSBranch vs. CVSBranchNoop and CVSTag vs. CVSTagNoop."""
1063 for lod_items in self.iter_lods():
1064 for cvs_tag in lod_items.cvs_tags:
1065 source = self._get_revision_source(cvs_tag)
1066 cvs_tag.__class__ = cvs_tag_type_map[
1067 isinstance(source, CVSRevisionModification)
1070 for cvs_branch in lod_items.cvs_branches:
1071 source = self._get_revision_source(cvs_branch)
1072 cvs_branch.__class__ = cvs_branch_type_map[
1073 isinstance(source, CVSRevisionModification)
1076 def determine_revision_properties(self, revision_property_setters):
1077 """Set the properties and properties_changed fields on CVSRevisions."""
1079 for lod_items in self.iter_lods():
1080 for cvs_rev in lod_items.cvs_revisions:
1081 cvs_rev.properties = {}
1082 for revision_property_setter in revision_property_setters:
1083 revision_property_setter.set_properties(cvs_rev)
1085 for lod_items in self.iter_lods():
1086 for cvs_rev in lod_items.cvs_revisions:
1087 if isinstance(cvs_rev, CVSRevisionAdd):
1088 cvs_rev.properties_changed = True
1089 elif isinstance(cvs_rev, CVSRevisionChange):
1090 prev_properties = self[
1091 cvs_rev.get_effective_prev_id()
1092 ].get_properties()
1093 properties = cvs_rev.get_properties()
1095 cvs_rev.properties_changed = properties != prev_properties
1096 else:
1097 cvs_rev.properties_changed = False
1099 def record_opened_symbols(self):
1100 """Set CVSRevision.opened_symbols for the surviving revisions."""
1102 for cvs_item in self.values():
1103 if isinstance(cvs_item, (CVSRevision, CVSBranch)):
1104 cvs_item.opened_symbols = []
1105 for cvs_symbol_opened_id in cvs_item.get_cvs_symbol_ids_opened():
1106 cvs_symbol_opened = self[cvs_symbol_opened_id]
1107 cvs_item.opened_symbols.append(
1108 (cvs_symbol_opened.symbol.id, cvs_symbol_opened.id,)
1111 def record_closed_symbols(self):
1112 """Set CVSRevision.closed_symbols for the surviving revisions.
1114 A CVSRevision closes the symbols that were opened by the CVSItems
1115 that the CVSRevision closes. Got it?
1117 This method must be called after record_opened_symbols()."""
1119 for cvs_item in self.values():
1120 if isinstance(cvs_item, CVSRevision):
1121 cvs_item.closed_symbols = []
1122 for cvs_item_closed_id in cvs_item.get_ids_closed():
1123 cvs_item_closed = self[cvs_item_closed_id]
1124 cvs_item.closed_symbols.extend(cvs_item_closed.opened_symbols)