Bring CHANGES up to date.
[cvs2svn.git] / cvs2svn_lib / cvs_file_items.py
blob130a003798b35a7a0504bcbe0e9f8d6f664a8d04
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2006-2008 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains a class to manage the CVSItems related to one file."""
20 import re
22 from cvs2svn_lib.common import InternalError
23 from cvs2svn_lib.common import FatalError
24 from cvs2svn_lib.context import Ctx
25 from cvs2svn_lib.log import logger
26 from cvs2svn_lib.symbol import Trunk
27 from cvs2svn_lib.symbol import Branch
28 from cvs2svn_lib.symbol import Tag
29 from cvs2svn_lib.symbol import ExcludedSymbol
30 from cvs2svn_lib.cvs_item import CVSRevision
31 from cvs2svn_lib.cvs_item import CVSRevisionModification
32 from cvs2svn_lib.cvs_item import CVSRevisionAdd
33 from cvs2svn_lib.cvs_item import CVSRevisionChange
34 from cvs2svn_lib.cvs_item import CVSRevisionAbsent
35 from cvs2svn_lib.cvs_item import CVSRevisionNoop
36 from cvs2svn_lib.cvs_item import CVSSymbol
37 from cvs2svn_lib.cvs_item import CVSBranch
38 from cvs2svn_lib.cvs_item import CVSTag
39 from cvs2svn_lib.cvs_item import cvs_revision_type_map
40 from cvs2svn_lib.cvs_item import cvs_branch_type_map
41 from cvs2svn_lib.cvs_item import cvs_tag_type_map
44 class VendorBranchError(Exception):
45 """There is an error in the structure of the file revision tree."""
47 pass
50 class LODItems(object):
51 def __init__(self, lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags):
52 # The LineOfDevelopment described by this instance.
53 self.lod = lod
55 # The CVSBranch starting this LOD, if any; otherwise, None.
56 self.cvs_branch = cvs_branch
58 # The list of CVSRevisions on this LOD, if any. The CVSRevisions
59 # are listed in dependency order.
60 self.cvs_revisions = cvs_revisions
62 # A list of CVSBranches that sprout from this LOD (either from
63 # cvs_branch or from one of the CVSRevisions).
64 self.cvs_branches = cvs_branches
66 # A list of CVSTags that sprout from this LOD (either from
67 # cvs_branch or from one of the CVSRevisions).
68 self.cvs_tags = cvs_tags
70 def is_trivial_import(self):
71 """Return True iff this LOD is a trivial import branch in this file.
73 A trivial import branch is a branch that was used for a single
74 import and nothing else. Such a branch is eligible for being
75 grafted onto trunk, even if it has branch blockers."""
77 return (
78 len(self.cvs_revisions) == 1
79 and self.cvs_revisions[0].ntdbr
82 def is_pure_ntdb(self):
83 """Return True iff this LOD is a pure NTDB in this file.
85 A pure non-trunk default branch is defined to be a branch that
86 contains only NTDB revisions (and at least one of them). Such a
87 branch is eligible for being grafted onto trunk, even if it has
88 branch blockers."""
90 return (
91 self.cvs_revisions
92 and self.cvs_revisions[-1].ntdbr
95 def iter_blockers(self):
96 if self.is_pure_ntdb():
97 # Such a branch has no blockers, because the blockers can be
98 # grafted to trunk.
99 pass
100 else:
101 # Other branches are only blocked by symbols that sprout from
102 # non-NTDB revisions:
103 non_ntdbr_revision_ids = set()
104 for cvs_revision in self.cvs_revisions:
105 if not cvs_revision.ntdbr:
106 non_ntdbr_revision_ids.add(cvs_revision.id)
108 for cvs_tag in self.cvs_tags:
109 if cvs_tag.source_id in non_ntdbr_revision_ids:
110 yield cvs_tag
112 for cvs_branch in self.cvs_branches:
113 if cvs_branch.source_id in non_ntdbr_revision_ids:
114 yield cvs_branch
117 class CVSFileItems(object):
118 def __init__(self, cvs_file, trunk, cvs_items, original_ids=None):
119 # The file whose data this instance holds.
120 self.cvs_file = cvs_file
122 # The symbol that represents "Trunk" in this file.
123 self.trunk = trunk
125 # A map from CVSItem.id to CVSItem:
126 self._cvs_items = {}
128 # The cvs_item_id of each root in the CVSItem forest. (A root is
129 # defined to be any CVSRevision with no prev_id.)
130 self.root_ids = set()
132 for cvs_item in cvs_items:
133 self.add(cvs_item)
134 if isinstance(cvs_item, CVSRevision) and cvs_item.prev_id is None:
135 self.root_ids.add(cvs_item.id)
137 # self.original_ids is a dict {cvs_rev.rev : cvs_rev.id} holding
138 # the IDs originally allocated to each CVS revision number. This
139 # member is stored for the convenience of RevisionCollectors.
140 if original_ids is not None:
141 self.original_ids = original_ids
142 else:
143 self.original_ids = {}
144 for cvs_item in cvs_items:
145 if isinstance(cvs_item, CVSRevision):
146 self.original_ids[cvs_item.rev] = cvs_item.id
148 def __getstate__(self):
149 return (self.cvs_file.id, self.values(), self.original_ids,)
151 def __setstate__(self, state):
152 (cvs_file_id, cvs_items, original_ids,) = state
153 cvs_file = Ctx()._cvs_path_db.get_path(cvs_file_id)
154 CVSFileItems.__init__(
155 self, cvs_file, cvs_file.project.get_trunk(), cvs_items,
156 original_ids=original_ids,
159 def add(self, cvs_item):
160 self._cvs_items[cvs_item.id] = cvs_item
162 def __getitem__(self, id):
163 """Return the CVSItem with the specified ID."""
165 return self._cvs_items[id]
167 def get(self, id, default=None):
168 return self._cvs_items.get(id, default)
170 def __delitem__(self, id):
171 assert id not in self.root_ids
172 del self._cvs_items[id]
174 def values(self):
175 return self._cvs_items.values()
177 def check_link_consistency(self):
178 """Check that the CVSItems are linked correctly with each other."""
180 for cvs_item in self.values():
181 try:
182 cvs_item.check_links(self)
183 except AssertionError:
184 logger.error(
185 'Link consistency error in %s\n'
186 'This is probably a bug internal to cvs2svn. Please file a bug\n'
187 'report including the following stack trace (see FAQ for more '
188 'info).'
189 % (cvs_item,))
190 raise
192 def _get_lod(self, lod, cvs_branch, start_id):
193 """Return the indicated LODItems.
195 LOD is the corresponding LineOfDevelopment. CVS_BRANCH is the
196 CVSBranch instance that starts the LOD if any; otherwise it is
197 None. START_ID is the id of the first CVSRevision on this LOD, or
198 None if there are none."""
200 cvs_revisions = []
201 cvs_branches = []
202 cvs_tags = []
204 def process_subitems(cvs_item):
205 """Process the branches and tags that are rooted in CVS_ITEM.
207 CVS_ITEM can be a CVSRevision or a CVSBranch."""
209 for branch_id in cvs_item.branch_ids[:]:
210 cvs_branches.append(self[branch_id])
212 for tag_id in cvs_item.tag_ids:
213 cvs_tags.append(self[tag_id])
215 if cvs_branch is not None:
216 # Include the symbols sprouting directly from the CVSBranch:
217 process_subitems(cvs_branch)
219 id = start_id
220 while id is not None:
221 cvs_rev = self[id]
222 cvs_revisions.append(cvs_rev)
223 process_subitems(cvs_rev)
224 id = cvs_rev.next_id
226 return LODItems(lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags)
228 def get_lod_items(self, cvs_branch):
229 """Return an LODItems describing the branch that starts at CVS_BRANCH.
231 CVS_BRANCH must be an instance of CVSBranch contained in this
232 CVSFileItems."""
234 return self._get_lod(cvs_branch.symbol, cvs_branch, cvs_branch.next_id)
236 def iter_root_lods(self):
237 """Iterate over the LODItems for all root LODs (non-recursively)."""
239 for id in list(self.root_ids):
240 cvs_item = self[id]
241 if isinstance(cvs_item, CVSRevision):
242 # This LOD doesn't have a CVSBranch associated with it.
243 # Either it is Trunk, or it is a branch whose CVSBranch has
244 # been deleted.
245 yield self._get_lod(cvs_item.lod, None, id)
246 elif isinstance(cvs_item, CVSBranch):
247 # This is a Branch that has been severed from the rest of the
248 # tree.
249 yield self._get_lod(cvs_item.symbol, cvs_item, cvs_item.next_id)
250 else:
251 raise InternalError('Unexpected root item: %s' % (cvs_item,))
253 def _iter_tree(self, lod, cvs_branch, start_id):
254 """Iterate over the tree that starts at the specified line of development.
256 LOD is the LineOfDevelopment where the iteration should start.
257 CVS_BRANCH is the CVSBranch instance that starts the LOD if any;
258 otherwise it is None. START_ID is the id of the first CVSRevision
259 on this LOD, or None if there are none.
261 There are two cases handled by this routine: trunk (where LOD is a
262 Trunk instance, CVS_BRANCH is None, and START_ID is the id of the
263 1.1 revision) and a branch (where LOD is a Branch instance,
264 CVS_BRANCH is a CVSBranch instance, and START_ID is either the id
265 of the first CVSRevision on the branch or None if there are no
266 CVSRevisions on the branch). Note that CVS_BRANCH and START_ID cannot
267 simultaneously be None.
269 Yield an LODItems instance for each line of development."""
271 cvs_revisions = []
272 cvs_branches = []
273 cvs_tags = []
275 def process_subitems(cvs_item):
276 """Process the branches and tags that are rooted in CVS_ITEM.
278 CVS_ITEM can be a CVSRevision or a CVSBranch."""
280 for branch_id in cvs_item.branch_ids[:]:
281 # Recurse into the branch:
282 branch = self[branch_id]
283 for lod_items in self._iter_tree(
284 branch.symbol, branch, branch.next_id
286 yield lod_items
287 # The caller might have deleted the branch that we just
288 # yielded. If it is no longer present, then do not add it to
289 # the list of cvs_branches.
290 try:
291 cvs_branches.append(self[branch_id])
292 except KeyError:
293 pass
295 for tag_id in cvs_item.tag_ids:
296 cvs_tags.append(self[tag_id])
298 if cvs_branch is not None:
299 # Include the symbols sprouting directly from the CVSBranch:
300 for lod_items in process_subitems(cvs_branch):
301 yield lod_items
303 id = start_id
304 while id is not None:
305 cvs_rev = self[id]
306 cvs_revisions.append(cvs_rev)
308 for lod_items in process_subitems(cvs_rev):
309 yield lod_items
311 id = cvs_rev.next_id
313 yield LODItems(lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags)
315 def iter_lods(self):
316 """Iterate over LinesOfDevelopment in this file, in depth-first order.
318 For each LOD, yield an LODItems instance. The traversal starts at
319 each root node but returns the LODs in depth-first order.
321 It is allowed to modify the CVSFileItems instance while the
322 traversal is occurring, but only in ways that don't affect the
323 tree structure above (i.e., towards the trunk from) the current
324 LOD."""
326 # Make a list out of root_ids so that callers can change it:
327 for id in list(self.root_ids):
328 cvs_item = self[id]
329 if isinstance(cvs_item, CVSRevision):
330 # This LOD doesn't have a CVSBranch associated with it.
331 # Either it is Trunk, or it is a branch whose CVSBranch has
332 # been deleted.
333 lod = cvs_item.lod
334 cvs_branch = None
335 elif isinstance(cvs_item, CVSBranch):
336 # This is a Branch that has been severed from the rest of the
337 # tree.
338 lod = cvs_item.symbol
339 id = cvs_item.next_id
340 cvs_branch = cvs_item
341 else:
342 raise InternalError('Unexpected root item: %s' % (cvs_item,))
344 for lod_items in self._iter_tree(lod, cvs_branch, id):
345 yield lod_items
347 def iter_deltatext_ancestors(self, cvs_rev):
348 """Generate the delta-dependency ancestors of CVS_REV.
350 Generate then ancestors of CVS_REV in deltatext order; i.e., back
351 along branches towards trunk, then outwards along trunk towards
352 HEAD."""
354 while True:
355 # Determine the next candidate source revision:
356 if isinstance(cvs_rev.lod, Trunk):
357 if cvs_rev.next_id is None:
358 # HEAD has no ancestors, so we are done:
359 return
360 else:
361 cvs_rev = self[cvs_rev.next_id]
362 else:
363 cvs_rev = self[cvs_rev.prev_id]
365 yield cvs_rev
367 def _sever_branch(self, lod_items):
368 """Sever the branch from its source and discard the CVSBranch.
370 LOD_ITEMS describes a branch that should be severed from its
371 source, deleting the CVSBranch and creating a new root. Also set
372 LOD_ITEMS.cvs_branch to None.
374 If LOD_ITEMS has no source (e.g., because it is the trunk branch
375 or because it has already been severed), do nothing.
377 This method can only be used before symbols have been grafted onto
378 CVSBranches. It does not adjust NTDBR, NTDBR_PREV_ID or
379 NTDBR_NEXT_ID even if LOD_ITEMS describes a NTDB."""
381 cvs_branch = lod_items.cvs_branch
382 if cvs_branch is None:
383 return
385 assert not cvs_branch.tag_ids
386 assert not cvs_branch.branch_ids
387 source_rev = self[cvs_branch.source_id]
389 # We only cover the following case, even though after
390 # FilterSymbolsPass cvs_branch.source_id might refer to another
391 # CVSBranch.
392 assert isinstance(source_rev, CVSRevision)
394 # Delete the CVSBranch itself:
395 lod_items.cvs_branch = None
396 del self[cvs_branch.id]
398 # Delete the reference from the source revision to the CVSBranch:
399 source_rev.branch_ids.remove(cvs_branch.id)
401 # Delete the reference from the first revision on the branch to
402 # the CVSBranch:
403 if lod_items.cvs_revisions:
404 first_rev = lod_items.cvs_revisions[0]
406 # Delete the reference from first_rev to the CVSBranch:
407 first_rev.first_on_branch_id = None
409 # Delete the reference from the source revision to the first
410 # revision on the branch:
411 source_rev.branch_commit_ids.remove(first_rev.id)
413 # ...and vice versa:
414 first_rev.prev_id = None
416 # Change the type of first_rev (e.g., from Change to Add):
417 first_rev.__class__ = cvs_revision_type_map[
418 (isinstance(first_rev, CVSRevisionModification), False,)
421 # Now first_rev is a new root:
422 self.root_ids.add(first_rev.id)
424 def adjust_ntdbrs(self, ntdbr_cvs_revs):
425 """Adjust the specified non-trunk default branch revisions.
427 NTDBR_CVS_REVS is a list of CVSRevision instances in this file
428 that have been determined to be non-trunk default branch
429 revisions.
431 The first revision on the default branch is handled strangely by
432 CVS. If a file is imported (as opposed to being added), CVS
433 creates a 1.1 revision, then creates a vendor branch 1.1.1 based
434 on 1.1, then creates a 1.1.1.1 revision that is identical to the
435 1.1 revision (i.e., its deltatext is empty). The log message that
436 the user typed when importing is stored with the 1.1.1.1 revision.
437 The 1.1 revision always contains a standard, generated log
438 message, 'Initial revision\n'.
440 When we detect a straightforward import like this, we want to
441 handle it by deleting the 1.1 revision (which doesn't contain any
442 useful information) and making 1.1.1.1 into an independent root in
443 the file's dependency tree. In SVN, 1.1.1.1 will be added
444 directly to the vendor branch with its initial content. Then in a
445 special 'post-commit', the 1.1.1.1 revision is copied back to
446 trunk.
448 If the user imports again to the same vendor branch, then CVS
449 creates revisions 1.1.1.2, 1.1.1.3, etc. on the vendor branch,
450 *without* counterparts in trunk (even though these revisions
451 effectively play the role of trunk revisions). So after we add
452 such revisions to the vendor branch, we also copy them back to
453 trunk in post-commits.
455 Set the ntdbr members of the revisions listed in NTDBR_CVS_REVS to
456 True. Also, if there is a 1.2 revision, then set that revision to
457 depend on the last non-trunk default branch revision and possibly
458 adjust its type accordingly."""
460 for cvs_rev in ntdbr_cvs_revs:
461 cvs_rev.ntdbr = True
463 # Look for a 1.2 revision:
464 rev_1_1 = self[ntdbr_cvs_revs[0].prev_id]
466 rev_1_2 = self.get(rev_1_1.next_id)
467 if rev_1_2 is not None:
468 # Revision 1.2 logically follows the imported revisions, not
469 # 1.1. Accordingly, connect it to the last NTDBR and possibly
470 # change its type.
471 last_ntdbr = ntdbr_cvs_revs[-1]
472 rev_1_2.ntdbr_prev_id = last_ntdbr.id
473 last_ntdbr.ntdbr_next_id = rev_1_2.id
474 rev_1_2.__class__ = cvs_revision_type_map[(
475 isinstance(rev_1_2, CVSRevisionModification),
476 isinstance(last_ntdbr, CVSRevisionModification),
479 def process_live_ntdb(self, vendor_lod_items):
480 """VENDOR_LOD_ITEMS is a live default branch; process it.
482 In this case, all revisions on the default branch are NTDBRs and
483 it is an error if there is also a '1.2' revision.
485 Return True iff this transformation really does something. Raise
486 a VendorBranchError if there is a '1.2' revision."""
488 rev_1_1 = self[vendor_lod_items.cvs_branch.source_id]
489 rev_1_2_id = rev_1_1.next_id
490 if rev_1_2_id is not None:
491 raise VendorBranchError(
492 'File \'%s\' has default branch=%s but also a revision %s'
493 % (self.cvs_file.rcs_path,
494 vendor_lod_items.cvs_branch.branch_number, self[rev_1_2_id].rev,)
497 ntdbr_cvs_revs = list(vendor_lod_items.cvs_revisions)
499 if ntdbr_cvs_revs:
500 self.adjust_ntdbrs(ntdbr_cvs_revs)
501 return True
502 else:
503 return False
505 def process_historical_ntdb(self, vendor_lod_items):
506 """There appears to have been a non-trunk default branch in the past.
508 There is currently no default branch, but the branch described by
509 file appears to have been imported. So our educated guess is that
510 all revisions on the '1.1.1' branch (described by
511 VENDOR_LOD_ITEMS) with timestamps prior to the timestamp of '1.2'
512 were non-trunk default branch revisions.
514 Return True iff this transformation really does something.
516 This really only handles standard '1.1.1.*'-style vendor
517 revisions. One could conceivably have a file whose default branch
518 is 1.1.3 or whatever, or was that at some point in time, with
519 vendor revisions 1.1.3.1, 1.1.3.2, etc. But with the default
520 branch gone now, we'd have no basis for assuming that the
521 non-standard vendor branch had ever been the default branch
522 anyway.
524 Note that we rely on comparisons between the timestamps of the
525 revisions on the vendor branch and that of revision 1.2, even
526 though the timestamps might be incorrect due to clock skew. We
527 could do a slightly better job if we used the changeset
528 timestamps, as it is possible that the dependencies that went into
529 determining those timestamps are more accurate. But that would
530 require an extra pass or two."""
532 rev_1_1 = self[vendor_lod_items.cvs_branch.source_id]
533 rev_1_2_id = rev_1_1.next_id
535 if rev_1_2_id is None:
536 rev_1_2_timestamp = None
537 else:
538 rev_1_2_timestamp = self[rev_1_2_id].timestamp
540 ntdbr_cvs_revs = []
541 for cvs_rev in vendor_lod_items.cvs_revisions:
542 if rev_1_2_timestamp is not None \
543 and cvs_rev.timestamp >= rev_1_2_timestamp:
544 # That's the end of the once-default branch.
545 break
546 ntdbr_cvs_revs.append(cvs_rev)
548 if ntdbr_cvs_revs:
549 self.adjust_ntdbrs(ntdbr_cvs_revs)
550 return True
551 else:
552 return False
554 def imported_remove_1_1(self, vendor_lod_items):
555 """This file was imported. Remove the 1.1 revision if possible.
557 VENDOR_LOD_ITEMS is the LODItems instance for the vendor branch.
558 See adjust_ntdbrs() for more information."""
560 assert vendor_lod_items.cvs_revisions
561 cvs_rev = vendor_lod_items.cvs_revisions[0]
563 if not isinstance(cvs_rev, CVSRevisionModification) \
564 or cvs_rev.deltatext_exists:
565 return
567 cvs_branch = vendor_lod_items.cvs_branch
568 rev_1_1 = self[cvs_branch.source_id]
569 assert isinstance(rev_1_1, CVSRevision)
571 if rev_1_1.prev_id:
572 # That's not a revision 1.1 after all, since it has a predecessor.
573 return
575 logger.debug('Removing unnecessary revision %s' % (rev_1_1,))
577 # Delete the 1.1.1 CVSBranch and sever the vendor branch from trunk:
578 self._sever_branch(vendor_lod_items)
580 # Delete rev_1_1:
581 self.root_ids.remove(rev_1_1.id)
582 del self[rev_1_1.id]
583 rev_1_2_id = rev_1_1.next_id
584 if rev_1_2_id is not None:
585 rev_1_2 = self[rev_1_2_id]
586 rev_1_2.prev_id = None
587 self.root_ids.add(rev_1_2.id)
589 # Move any tags and branches from rev_1_1 to cvs_rev:
590 cvs_rev.tag_ids.extend(rev_1_1.tag_ids)
591 for id in rev_1_1.tag_ids:
592 cvs_tag = self[id]
593 cvs_tag.source_lod = cvs_rev.lod
594 cvs_tag.source_id = cvs_rev.id
595 cvs_rev.branch_ids[0:0] = rev_1_1.branch_ids
596 for id in rev_1_1.branch_ids:
597 cvs_branch = self[id]
598 cvs_branch.source_lod = cvs_rev.lod
599 cvs_branch.source_id = cvs_rev.id
600 cvs_rev.branch_commit_ids[0:0] = rev_1_1.branch_commit_ids
601 for id in rev_1_1.branch_commit_ids:
602 cvs_rev2 = self[id]
603 cvs_rev2.prev_id = cvs_rev.id
605 def _is_unneeded_initial_trunk_delete(self, cvs_item, metadata_db):
606 if not isinstance(cvs_item, CVSRevisionNoop):
607 # This rule can only be applied to dead revisions.
608 return False
610 if cvs_item.rev != '1.1':
611 return False
613 if not isinstance(cvs_item.lod, Trunk):
614 return False
616 if cvs_item.closed_symbols:
617 return False
619 if cvs_item.ntdbr:
620 return False
622 log_msg = metadata_db[cvs_item.metadata_id].log_msg
623 return bool(
624 re.match(
625 r'file .* was initially added on branch .*\.\n$',
626 log_msg,
628 or re.match(
629 # This variant commit message was reported by one user:
630 r'file .* was added on branch .*\n$',
631 log_msg,
635 def remove_unneeded_initial_trunk_delete(self, metadata_db):
636 """Remove unneeded deletes for this file.
638 If a file is added on a branch, then a trunk revision is added at
639 the same time in the 'Dead' state. This revision doesn't do
640 anything useful, so delete it."""
642 for id in self.root_ids:
643 cvs_item = self[id]
644 if self._is_unneeded_initial_trunk_delete(cvs_item, metadata_db):
645 logger.debug('Removing unnecessary delete %s' % (cvs_item,))
647 # Sever any CVSBranches rooted at cvs_item.
648 for cvs_branch_id in cvs_item.branch_ids[:]:
649 cvs_branch = self[cvs_branch_id]
650 self._sever_branch(self.get_lod_items(cvs_branch))
652 # Tagging a dead revision doesn't do anything, so remove any
653 # CVSTags that refer to cvs_item:
654 while cvs_item.tag_ids:
655 del self[cvs_item.tag_ids.pop()]
657 # Now delete cvs_item itself:
658 self.root_ids.remove(cvs_item.id)
659 del self[cvs_item.id]
660 if cvs_item.next_id is not None:
661 cvs_rev_next = self[cvs_item.next_id]
662 cvs_rev_next.prev_id = None
663 self.root_ids.add(cvs_rev_next.id)
665 # This can only happen once per file, so we're done:
666 return
668 def _is_unneeded_initial_branch_delete(self, lod_items, metadata_db):
669 """Return True iff the initial revision in LOD_ITEMS can be deleted."""
671 if not lod_items.cvs_revisions:
672 return False
674 cvs_revision = lod_items.cvs_revisions[0]
676 if cvs_revision.ntdbr:
677 return False
679 if not isinstance(cvs_revision, CVSRevisionAbsent):
680 return False
682 if cvs_revision.branch_ids:
683 return False
685 log_msg = metadata_db[cvs_revision.metadata_id].log_msg
686 return bool(re.match(
687 r'file .* was added on branch .* on '
688 r'\d{4}\-\d{2}\-\d{2} \d{2}\:\d{2}\:\d{2}( [\+\-]\d{4})?'
689 '\n$',
690 log_msg,
693 def remove_initial_branch_deletes(self, metadata_db):
694 """If the first revision on a branch is an unnecessary delete, remove it.
696 If a file is added on a branch (whether or not it already existed
697 on trunk), then new versions of CVS add a first branch revision in
698 the 'dead' state (to indicate that the file did not exist on the
699 branch when the branch was created) followed by the second branch
700 revision, which is an add. When we encounter this situation, we
701 sever the branch from trunk and delete the first branch
702 revision."""
704 for lod_items in self.iter_lods():
705 if self._is_unneeded_initial_branch_delete(lod_items, metadata_db):
706 cvs_revision = lod_items.cvs_revisions[0]
707 logger.debug(
708 'Removing unnecessary initial branch delete %s' % (cvs_revision,)
711 # Sever the branch from its source if necessary:
712 self._sever_branch(lod_items)
714 # Delete the first revision on the branch:
715 self.root_ids.remove(cvs_revision.id)
716 del self[cvs_revision.id]
718 # If it had a successor, adjust its backreference and add it
719 # to the root_ids:
720 if cvs_revision.next_id is not None:
721 cvs_rev_next = self[cvs_revision.next_id]
722 cvs_rev_next.prev_id = None
723 self.root_ids.add(cvs_rev_next.id)
725 # Tagging a dead revision doesn't do anything, so remove any
726 # tags that were set on it:
727 for tag_id in cvs_revision.tag_ids:
728 del self[tag_id]
730 def _exclude_tag(self, cvs_tag):
731 """Exclude the specified CVS_TAG."""
733 del self[cvs_tag.id]
735 # A CVSTag is the successor of the CVSRevision that it
736 # sprouts from. Delete this tag from that revision's
737 # tag_ids:
738 self[cvs_tag.source_id].tag_ids.remove(cvs_tag.id)
740 def _exclude_branch(self, lod_items):
741 """Exclude the branch described by LOD_ITEMS, including its revisions.
743 (Do not update the LOD_ITEMS instance itself.)
745 If the LOD starts with non-trunk default branch revisions, leave
746 the branch and the NTDB revisions in place, but delete any
747 subsequent revisions that are not NTDB revisions. In this case,
748 return True; otherwise return False"""
750 if lod_items.cvs_revisions and lod_items.cvs_revisions[0].ntdbr:
751 for cvs_rev in lod_items.cvs_revisions:
752 if not cvs_rev.ntdbr:
753 # We've found the first non-NTDBR, and it's stored in cvs_rev:
754 break
755 else:
756 # There was no revision following the NTDBRs:
757 cvs_rev = None
759 if cvs_rev:
760 last_ntdbr = self[cvs_rev.prev_id]
761 last_ntdbr.next_id = None
762 while True:
763 del self[cvs_rev.id]
764 if cvs_rev.next_id is None:
765 break
766 cvs_rev = self[cvs_rev.next_id]
768 return True
770 else:
771 if lod_items.cvs_branch is not None:
772 # Delete the CVSBranch itself:
773 cvs_branch = lod_items.cvs_branch
775 del self[cvs_branch.id]
777 # A CVSBranch is the successor of the CVSRevision that it
778 # sprouts from. Delete this branch from that revision's
779 # branch_ids:
780 self[cvs_branch.source_id].branch_ids.remove(cvs_branch.id)
782 if lod_items.cvs_revisions:
783 # The first CVSRevision on the branch has to be either detached
784 # from the revision from which the branch sprang, or removed
785 # from self.root_ids:
786 cvs_rev = lod_items.cvs_revisions[0]
787 if cvs_rev.prev_id is None:
788 self.root_ids.remove(cvs_rev.id)
789 else:
790 self[cvs_rev.prev_id].branch_commit_ids.remove(cvs_rev.id)
792 for cvs_rev in lod_items.cvs_revisions:
793 del self[cvs_rev.id]
795 return False
797 def graft_ntdbr_to_trunk(self):
798 """Graft the non-trunk default branch revisions to trunk.
800 They should already be alone on a branch that may or may not have
801 a CVSBranch connecting it to trunk."""
803 for lod_items in self.iter_lods():
804 if lod_items.cvs_revisions and lod_items.cvs_revisions[0].ntdbr:
805 assert lod_items.is_pure_ntdb()
807 first_rev = lod_items.cvs_revisions[0]
808 last_rev = lod_items.cvs_revisions[-1]
809 rev_1_1 = self.get(first_rev.prev_id)
810 rev_1_2 = self.get(last_rev.ntdbr_next_id)
812 self._sever_branch(lod_items)
814 if rev_1_1 is not None:
815 rev_1_1.next_id = first_rev.id
816 first_rev.prev_id = rev_1_1.id
818 self.root_ids.remove(first_rev.id)
820 first_rev.__class__ = cvs_revision_type_map[(
821 isinstance(first_rev, CVSRevisionModification),
822 isinstance(rev_1_1, CVSRevisionModification),
825 if rev_1_2 is not None:
826 rev_1_2.ntdbr_prev_id = None
827 last_rev.ntdbr_next_id = None
829 if rev_1_2.prev_id is None:
830 self.root_ids.remove(rev_1_2.id)
832 rev_1_2.prev_id = last_rev.id
833 last_rev.next_id = rev_1_2.id
835 # The effective_pred_id of rev_1_2 was not changed, so we
836 # don't have to change rev_1_2's type.
838 for cvs_rev in lod_items.cvs_revisions:
839 cvs_rev.ntdbr = False
840 cvs_rev.lod = self.trunk
842 for cvs_branch in lod_items.cvs_branches:
843 cvs_branch.source_lod = self.trunk
845 for cvs_tag in lod_items.cvs_tags:
846 cvs_tag.source_lod = self.trunk
848 return
850 def exclude_non_trunk(self):
851 """Delete all tags and branches."""
853 ntdbr_excluded = False
854 for lod_items in self.iter_lods():
855 for cvs_tag in lod_items.cvs_tags[:]:
856 self._exclude_tag(cvs_tag)
857 lod_items.cvs_tags.remove(cvs_tag)
859 if not isinstance(lod_items.lod, Trunk):
860 assert not lod_items.cvs_branches
862 ntdbr_excluded |= self._exclude_branch(lod_items)
864 if ntdbr_excluded:
865 self.graft_ntdbr_to_trunk()
867 def filter_excluded_symbols(self):
868 """Delete any excluded symbols and references to them."""
870 ntdbr_excluded = False
871 for lod_items in self.iter_lods():
872 # Delete any excluded tags:
873 for cvs_tag in lod_items.cvs_tags[:]:
874 if isinstance(cvs_tag.symbol, ExcludedSymbol):
875 self._exclude_tag(cvs_tag)
877 lod_items.cvs_tags.remove(cvs_tag)
879 # Delete the whole branch if it is to be excluded:
880 if isinstance(lod_items.lod, ExcludedSymbol):
881 # A symbol can only be excluded if no other symbols spring
882 # from it. This was already checked in CollateSymbolsPass, so
883 # these conditions should already be satisfied.
884 assert not list(lod_items.iter_blockers())
886 ntdbr_excluded |= self._exclude_branch(lod_items)
888 if ntdbr_excluded:
889 self.graft_ntdbr_to_trunk()
891 def _mutate_branch_to_tag(self, cvs_branch):
892 """Mutate the branch CVS_BRANCH into a tag."""
894 if cvs_branch.next_id is not None:
895 # This shouldn't happen because it was checked in
896 # CollateSymbolsPass:
897 raise FatalError('Attempt to exclude a branch with commits.')
898 cvs_tag = CVSTag(
899 cvs_branch.id, cvs_branch.cvs_file, cvs_branch.symbol,
900 cvs_branch.source_lod, cvs_branch.source_id,
901 cvs_branch.revision_reader_token,
903 self.add(cvs_tag)
904 cvs_revision = self[cvs_tag.source_id]
905 cvs_revision.branch_ids.remove(cvs_tag.id)
906 cvs_revision.tag_ids.append(cvs_tag.id)
908 def _mutate_tag_to_branch(self, cvs_tag):
909 """Mutate the tag into a branch."""
911 cvs_branch = CVSBranch(
912 cvs_tag.id, cvs_tag.cvs_file, cvs_tag.symbol,
913 None, cvs_tag.source_lod, cvs_tag.source_id, None,
914 cvs_tag.revision_reader_token,
916 self.add(cvs_branch)
917 cvs_revision = self[cvs_branch.source_id]
918 cvs_revision.tag_ids.remove(cvs_branch.id)
919 cvs_revision.branch_ids.append(cvs_branch.id)
921 def _mutate_symbol(self, cvs_symbol):
922 """Mutate CVS_SYMBOL if necessary."""
924 symbol = cvs_symbol.symbol
925 if isinstance(cvs_symbol, CVSBranch) and isinstance(symbol, Tag):
926 self._mutate_branch_to_tag(cvs_symbol)
927 elif isinstance(cvs_symbol, CVSTag) and isinstance(symbol, Branch):
928 self._mutate_tag_to_branch(cvs_symbol)
930 def mutate_symbols(self):
931 """Force symbols to be tags/branches based on self.symbol_db."""
933 for cvs_item in self.values():
934 if isinstance(cvs_item, CVSRevision):
935 # This CVSRevision may be affected by the mutation of any
936 # CVSSymbols that it references, but there is nothing to do
937 # here directly.
938 pass
939 elif isinstance(cvs_item, CVSSymbol):
940 self._mutate_symbol(cvs_item)
941 else:
942 raise RuntimeError('Unknown cvs item type')
944 def _adjust_tag_parent(self, cvs_tag):
945 """Adjust the parent of CVS_TAG if possible and preferred.
947 CVS_TAG is an instance of CVSTag. This method must be called in
948 leaf-to-trunk order."""
950 # The Symbol that cvs_tag would like to have as a parent:
951 preferred_parent = Ctx()._symbol_db.get_symbol(
952 cvs_tag.symbol.preferred_parent_id)
954 if cvs_tag.source_lod == preferred_parent:
955 # The preferred parent is already the parent.
956 return
958 # The CVSRevision that is its direct parent:
959 source = self[cvs_tag.source_id]
960 assert isinstance(source, CVSRevision)
962 if isinstance(preferred_parent, Trunk):
963 # It is not possible to graft *onto* Trunk:
964 return
966 # Try to find the preferred parent among the possible parents:
967 for branch_id in source.branch_ids:
968 if self[branch_id].symbol == preferred_parent:
969 # We found it!
970 break
971 else:
972 # The preferred parent is not a possible parent in this file.
973 return
975 parent = self[branch_id]
976 assert isinstance(parent, CVSBranch)
978 logger.debug('Grafting %s from %s (on %s) onto %s' % (
979 cvs_tag, source, source.lod, parent,))
980 # Switch parent:
981 source.tag_ids.remove(cvs_tag.id)
982 parent.tag_ids.append(cvs_tag.id)
983 cvs_tag.source_lod = parent.symbol
984 cvs_tag.source_id = parent.id
986 def _adjust_branch_parents(self, cvs_branch):
987 """Adjust the parent of CVS_BRANCH if possible and preferred.
989 CVS_BRANCH is an instance of CVSBranch. This method must be
990 called in leaf-to-trunk order."""
992 # The Symbol that cvs_branch would like to have as a parent:
993 preferred_parent = Ctx()._symbol_db.get_symbol(
994 cvs_branch.symbol.preferred_parent_id)
996 if cvs_branch.source_lod == preferred_parent:
997 # The preferred parent is already the parent.
998 return
1000 # The CVSRevision that is its direct parent:
1001 source = self[cvs_branch.source_id]
1002 # This is always a CVSRevision because we haven't adjusted it yet:
1003 assert isinstance(source, CVSRevision)
1005 if isinstance(preferred_parent, Trunk):
1006 # It is not possible to graft *onto* Trunk:
1007 return
1009 # Try to find the preferred parent among the possible parents:
1010 for branch_id in source.branch_ids:
1011 possible_parent = self[branch_id]
1012 if possible_parent.symbol == preferred_parent:
1013 # We found it!
1014 break
1015 elif possible_parent.symbol == cvs_branch.symbol:
1016 # Only branches that precede the branch to be adjusted are
1017 # considered possible parents. Leave parentage unchanged:
1018 return
1019 else:
1020 # This point should never be reached.
1021 raise InternalError(
1022 'Possible parent search did not terminate as expected')
1024 parent = possible_parent
1025 assert isinstance(parent, CVSBranch)
1027 logger.debug('Grafting %s from %s (on %s) onto %s' % (
1028 cvs_branch, source, source.lod, parent,))
1029 # Switch parent:
1030 source.branch_ids.remove(cvs_branch.id)
1031 parent.branch_ids.append(cvs_branch.id)
1032 cvs_branch.source_lod = parent.symbol
1033 cvs_branch.source_id = parent.id
1035 def adjust_parents(self):
1036 """Adjust the parents of symbols to their preferred parents.
1038 If a CVSSymbol has a preferred parent that is different than its
1039 current parent, and if the preferred parent is an allowed parent
1040 of the CVSSymbol in this file, then graft the CVSSymbol onto its
1041 preferred parent."""
1043 for lod_items in self.iter_lods():
1044 for cvs_tag in lod_items.cvs_tags:
1045 self._adjust_tag_parent(cvs_tag)
1047 # It is important to process branches in reverse order, so that
1048 # a branch graft target (which necessarily occurs earlier in the
1049 # list than the branch itself) is not moved before the branch
1050 # itself.
1051 for cvs_branch in reversed(lod_items.cvs_branches):
1052 self._adjust_branch_parents(cvs_branch)
1054 def _get_revision_source(self, cvs_symbol):
1055 """Return the CVSRevision that is the ultimate source of CVS_SYMBOL."""
1057 while True:
1058 cvs_item = self[cvs_symbol.source_id]
1059 if isinstance(cvs_item, CVSRevision):
1060 return cvs_item
1061 else:
1062 cvs_symbol = cvs_item
1064 def refine_symbols(self):
1065 """Refine the types of the CVSSymbols in this file.
1067 Adjust the symbol types based on whether the source exists:
1068 CVSBranch vs. CVSBranchNoop and CVSTag vs. CVSTagNoop."""
1070 for lod_items in self.iter_lods():
1071 for cvs_tag in lod_items.cvs_tags:
1072 source = self._get_revision_source(cvs_tag)
1073 cvs_tag.__class__ = cvs_tag_type_map[
1074 isinstance(source, CVSRevisionModification)
1077 for cvs_branch in lod_items.cvs_branches:
1078 source = self._get_revision_source(cvs_branch)
1079 cvs_branch.__class__ = cvs_branch_type_map[
1080 isinstance(source, CVSRevisionModification)
1083 def determine_revision_properties(self, revision_property_setters):
1084 """Set the properties and properties_changed fields on CVSRevisions."""
1086 for lod_items in self.iter_lods():
1087 for cvs_rev in lod_items.cvs_revisions:
1088 cvs_rev.properties = {}
1089 for revision_property_setter in revision_property_setters:
1090 revision_property_setter.set_properties(cvs_rev)
1092 for lod_items in self.iter_lods():
1093 for cvs_rev in lod_items.cvs_revisions:
1094 if isinstance(cvs_rev, CVSRevisionAdd):
1095 cvs_rev.properties_changed = True
1096 elif isinstance(cvs_rev, CVSRevisionChange):
1097 prev_properties = self[
1098 cvs_rev.get_effective_prev_id()
1099 ].get_properties()
1100 properties = cvs_rev.get_properties()
1102 cvs_rev.properties_changed = properties != prev_properties
1103 else:
1104 cvs_rev.properties_changed = False
1106 def record_opened_symbols(self):
1107 """Set CVSRevision.opened_symbols for the surviving revisions."""
1109 for cvs_item in self.values():
1110 if isinstance(cvs_item, (CVSRevision, CVSBranch)):
1111 cvs_item.opened_symbols = []
1112 for cvs_symbol_opened_id in cvs_item.get_cvs_symbol_ids_opened():
1113 cvs_symbol_opened = self[cvs_symbol_opened_id]
1114 cvs_item.opened_symbols.append(
1115 (cvs_symbol_opened.symbol.id, cvs_symbol_opened.id,)
1118 def record_closed_symbols(self):
1119 """Set CVSRevision.closed_symbols for the surviving revisions.
1121 A CVSRevision closes the symbols that were opened by the CVSItems
1122 that the CVSRevision closes. Got it?
1124 This method must be called after record_opened_symbols()."""
1126 for cvs_item in self.values():
1127 if isinstance(cvs_item, CVSRevision):
1128 cvs_item.closed_symbols = []
1129 for cvs_item_closed_id in cvs_item.get_ids_closed():
1130 cvs_item_closed = self[cvs_item_closed_id]
1131 cvs_item.closed_symbols.extend(cvs_item_closed.opened_symbols)