Allow initial branch deletes with tags to be omitted.
[cvs2svn.git] / cvs2svn_lib / cvs_file_items.py
blob584e4963065f0accb957a78d66e8a50457911ddd
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2006-2008 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains a class to manage the CVSItems related to one file."""
20 import re
22 from cvs2svn_lib.common import InternalError
23 from cvs2svn_lib.common import FatalError
24 from cvs2svn_lib.context import Ctx
25 from cvs2svn_lib.log import Log
26 from cvs2svn_lib.symbol import Trunk
27 from cvs2svn_lib.symbol import Branch
28 from cvs2svn_lib.symbol import Tag
29 from cvs2svn_lib.symbol import ExcludedSymbol
30 from cvs2svn_lib.cvs_item import CVSRevision
31 from cvs2svn_lib.cvs_item import CVSRevisionModification
32 from cvs2svn_lib.cvs_item import CVSRevisionAbsent
33 from cvs2svn_lib.cvs_item import CVSRevisionNoop
34 from cvs2svn_lib.cvs_item import CVSSymbol
35 from cvs2svn_lib.cvs_item import CVSBranch
36 from cvs2svn_lib.cvs_item import CVSTag
37 from cvs2svn_lib.cvs_item import cvs_revision_type_map
38 from cvs2svn_lib.cvs_item import cvs_branch_type_map
39 from cvs2svn_lib.cvs_item import cvs_tag_type_map
42 class VendorBranchError(Exception):
43 """There is an error in the structure of the file revision tree."""
45 pass
48 class LODItems(object):
49 def __init__(self, lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags):
50 # The LineOfDevelopment described by this instance.
51 self.lod = lod
53 # The CVSBranch starting this LOD, if any; otherwise, None.
54 self.cvs_branch = cvs_branch
56 # The list of CVSRevisions on this LOD, if any. The CVSRevisions
57 # are listed in dependency order.
58 self.cvs_revisions = cvs_revisions
60 # A list of CVSBranches that sprout from this LOD (either from
61 # cvs_branch or from one of the CVSRevisions).
62 self.cvs_branches = cvs_branches
64 # A list of CVSTags that sprout from this LOD (either from
65 # cvs_branch or from one of the CVSRevisions).
66 self.cvs_tags = cvs_tags
68 def is_trivial_import(self):
69 """Return True iff this LOD is a trivial import branch in this file.
71 A trivial import branch is a branch that was used for a single
72 import and nothing else. Such a branch is eligible for being
73 grafted onto trunk, even if it has branch blockers."""
75 return (
76 len(self.cvs_revisions) == 1
77 and self.cvs_revisions[0].ntdbr
80 def is_pure_ntdb(self):
81 """Return True iff this LOD is a pure NTDB in this file.
83 A pure non-trunk default branch is defined to be a branch that
84 contains only NTDB revisions (and at least one of them). Such a
85 branch is eligible for being grafted onto trunk, even if it has
86 branch blockers."""
88 return (
89 self.cvs_revisions
90 and self.cvs_revisions[-1].ntdbr
93 def iter_blockers(self):
94 if self.is_pure_ntdb():
95 # Such a branch has no blockers, because the blockers can be
96 # grafted to trunk.
97 pass
98 else:
99 # Other branches are only blocked by symbols that sprout from
100 # non-NTDB revisions:
101 non_ntdbr_revision_ids = set()
102 for cvs_revision in self.cvs_revisions:
103 if not cvs_revision.ntdbr:
104 non_ntdbr_revision_ids.add(cvs_revision.id)
106 for cvs_tag in self.cvs_tags:
107 if cvs_tag.source_id in non_ntdbr_revision_ids:
108 yield cvs_tag
110 for cvs_branch in self.cvs_branches:
111 if cvs_branch.source_id in non_ntdbr_revision_ids:
112 yield cvs_branch
115 class CVSFileItems(object):
116 def __init__(self, cvs_file, trunk, cvs_items):
117 # The file whose data this instance holds.
118 self.cvs_file = cvs_file
120 # The symbol that represents "Trunk" in this file.
121 self.trunk = trunk
123 # A map from CVSItem.id to CVSItem:
124 self._cvs_items = {}
126 # The cvs_item_id of each root in the CVSItem forest. (A root is
127 # defined to be any CVSRevision with no prev_id.)
128 self.root_ids = set()
130 for cvs_item in cvs_items:
131 self.add(cvs_item)
132 if isinstance(cvs_item, CVSRevision) and cvs_item.prev_id is None:
133 self.root_ids.add(cvs_item.id)
135 def __getstate__(self):
136 return (self.cvs_file.id, self.values(),)
138 def __setstate__(self, state):
139 (cvs_file_id, cvs_items,) = state
140 cvs_file = Ctx()._cvs_file_db.get_file(cvs_file_id)
141 CVSFileItems.__init__(
142 self, cvs_file, cvs_file.project.get_trunk(), cvs_items,
145 def add(self, cvs_item):
146 self._cvs_items[cvs_item.id] = cvs_item
148 def __getitem__(self, id):
149 """Return the CVSItem with the specified ID."""
151 return self._cvs_items[id]
153 def get(self, id, default=None):
154 return self._cvs_items.get(id, default)
156 def __delitem__(self, id):
157 assert id not in self.root_ids
158 del self._cvs_items[id]
160 def values(self):
161 return self._cvs_items.values()
163 def check_link_consistency(self):
164 """Check that the CVSItems are linked correctly with each other."""
166 for cvs_item in self.values():
167 try:
168 cvs_item.check_links(self)
169 except AssertionError:
170 Log().error(
171 'Link consistency error in %s\n'
172 'This is probably a bug internal to cvs2svn. Please file a bug\n'
173 'report including the following stack trace (see FAQ for more '
174 'info).'
175 % (cvs_item,))
176 raise
178 def _get_lod(self, lod, cvs_branch, start_id):
179 """Return the indicated LODItems.
181 LOD is the corresponding LineOfDevelopment. CVS_BRANCH is the
182 CVSBranch instance that starts the LOD if any; otherwise it is
183 None. START_ID is the id of the first CVSRevision on this LOD, or
184 None if there are none."""
186 cvs_revisions = []
187 cvs_branches = []
188 cvs_tags = []
190 def process_subitems(cvs_item):
191 """Process the branches and tags that are rooted in CVS_ITEM.
193 CVS_ITEM can be a CVSRevision or a CVSBranch."""
195 for branch_id in cvs_item.branch_ids[:]:
196 cvs_branches.append(self[branch_id])
198 for tag_id in cvs_item.tag_ids:
199 cvs_tags.append(self[tag_id])
201 if cvs_branch is not None:
202 # Include the symbols sprouting directly from the CVSBranch:
203 process_subitems(cvs_branch)
205 id = start_id
206 while id is not None:
207 cvs_rev = self[id]
208 cvs_revisions.append(cvs_rev)
209 process_subitems(cvs_rev)
210 id = cvs_rev.next_id
212 return LODItems(lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags)
214 def get_lod_items(self, cvs_branch):
215 """Return an LODItems describing the branch that starts at CVS_BRANCH.
217 CVS_BRANCH must be an instance of CVSBranch contained in this
218 CVSFileItems."""
220 return self._get_lod(cvs_branch.symbol, cvs_branch, cvs_branch.next_id)
222 def iter_root_lods(self):
223 """Iterate over the LODItems for all root LODs (non-recursively)."""
225 for id in list(self.root_ids):
226 cvs_item = self[id]
227 if isinstance(cvs_item, CVSRevision):
228 # This LOD doesn't have a CVSBranch associated with it.
229 # Either it is Trunk, or it is a branch whose CVSBranch has
230 # been deleted.
231 yield self._get_lod(cvs_item.lod, None, id)
232 elif isinstance(cvs_item, CVSBranch):
233 # This is a Branch that has been severed from the rest of the
234 # tree.
235 yield self._get_lod(cvs_item.symbol, cvs_item, cvs_item.next_id)
236 else:
237 raise InternalError('Unexpected root item: %s' % (cvs_item,))
239 def _iter_tree(self, lod, cvs_branch, start_id):
240 """Iterate over the tree that starts at the specified line of development.
242 LOD is the LineOfDevelopment where the iteration should start.
243 CVS_BRANCH is the CVSBranch instance that starts the LOD if any;
244 otherwise it is None. ID is the id of the first CVSRevision on
245 this LOD, or None if there are none.
247 There are two cases handled by this routine: trunk (where LOD is a
248 Trunk instance, CVS_BRANCH is None, and ID is the id of the 1.1
249 revision) and a branch (where LOD is a Branch instance, CVS_BRANCH
250 is a CVSBranch instance, and ID is either the id of the first
251 CVSRevision on the branch or None if there are no CVSRevisions on
252 the branch). Note that CVS_BRANCH and ID cannot simultaneously be
253 None.
255 Yield an LODItems instance for each line of development."""
257 cvs_revisions = []
258 cvs_branches = []
259 cvs_tags = []
261 def process_subitems(cvs_item):
262 """Process the branches and tags that are rooted in CVS_ITEM.
264 CVS_ITEM can be a CVSRevision or a CVSBranch."""
266 for branch_id in cvs_item.branch_ids[:]:
267 # Recurse into the branch:
268 branch = self[branch_id]
269 for lod_items in self._iter_tree(
270 branch.symbol, branch, branch.next_id
272 yield lod_items
273 # The caller might have deleted the branch that we just
274 # yielded. If it is no longer present, then do not add it to
275 # the list of cvs_branches.
276 try:
277 cvs_branches.append(self[branch_id])
278 except KeyError:
279 pass
281 for tag_id in cvs_item.tag_ids:
282 cvs_tags.append(self[tag_id])
284 if cvs_branch is not None:
285 # Include the symbols sprouting directly from the CVSBranch:
286 for lod_items in process_subitems(cvs_branch):
287 yield lod_items
289 id = start_id
290 while id is not None:
291 cvs_rev = self[id]
292 cvs_revisions.append(cvs_rev)
294 for lod_items in process_subitems(cvs_rev):
295 yield lod_items
297 id = cvs_rev.next_id
299 yield LODItems(lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags)
301 def iter_lods(self):
302 """Iterate over LinesOfDevelopment in this file, in depth-first order.
304 For each LOD, yield an LODItems instance. The traversal starts at
305 each root node but returns the LODs in depth-first order.
307 It is allowed to modify the CVSFileItems instance while the
308 traversal is occurring, but only in ways that don't affect the
309 tree structure above (i.e., towards the trunk from) the current
310 LOD."""
312 # Make a list out of root_ids so that callers can change it:
313 for id in list(self.root_ids):
314 cvs_item = self[id]
315 if isinstance(cvs_item, CVSRevision):
316 # This LOD doesn't have a CVSBranch associated with it.
317 # Either it is Trunk, or it is a branch whose CVSBranch has
318 # been deleted.
319 lod = cvs_item.lod
320 cvs_branch = None
321 elif isinstance(cvs_item, CVSBranch):
322 # This is a Branch that has been severed from the rest of the
323 # tree.
324 lod = cvs_item.symbol
325 id = cvs_item.next_id
326 cvs_branch = cvs_item
327 else:
328 raise InternalError('Unexpected root item: %s' % (cvs_item,))
330 for lod_items in self._iter_tree(lod, cvs_branch, id):
331 yield lod_items
333 def iter_deltatext_ancestors(self, cvs_rev):
334 """Generate the delta-dependency ancestors of CVS_REV.
336 Generate then ancestors of CVS_REV in deltatext order; i.e., back
337 along branches towards trunk, then outwards along trunk towards
338 HEAD."""
340 while True:
341 # Determine the next candidate source revision:
342 if isinstance(cvs_rev.lod, Trunk):
343 if cvs_rev.next_id is None:
344 # HEAD has no ancestors, so we are done:
345 return
346 else:
347 cvs_rev = self[cvs_rev.next_id]
348 else:
349 cvs_rev = self[cvs_rev.prev_id]
351 yield cvs_rev
353 def _sever_branch(self, lod_items):
354 """Sever the branch from its source and discard the CVSBranch.
356 LOD_ITEMS describes a branch that should be severed from its
357 source, deleting the CVSBranch and creating a new root. Also set
358 LOD_ITEMS.cvs_branch to None.
360 If LOD_ITEMS has no source (e.g., because it is the trunk branch
361 or because it has already been severed), do nothing.
363 This method can only be used before symbols have been grafted onto
364 CVSBranches. It does not adjust NTDBR, NTDBR_PREV_ID or
365 NTDBR_NEXT_ID even if LOD_ITEMS describes a NTDB."""
367 cvs_branch = lod_items.cvs_branch
368 if cvs_branch is None:
369 return
371 assert not cvs_branch.tag_ids
372 assert not cvs_branch.branch_ids
373 source_rev = self[cvs_branch.source_id]
375 # We only cover the following case, even though after
376 # FilterSymbolsPass cvs_branch.source_id might refer to another
377 # CVSBranch.
378 assert isinstance(source_rev, CVSRevision)
380 # Delete the CVSBranch itself:
381 lod_items.cvs_branch = None
382 del self[cvs_branch.id]
384 # Delete the reference from the source revision to the CVSBranch:
385 source_rev.branch_ids.remove(cvs_branch.id)
387 # Delete the reference from the first revision on the branch to
388 # the CVSBranch:
389 if lod_items.cvs_revisions:
390 first_rev = lod_items.cvs_revisions[0]
392 # Delete the reference from first_rev to the CVSBranch:
393 first_rev.first_on_branch_id = None
395 # Delete the reference from the source revision to the first
396 # revision on the branch:
397 source_rev.branch_commit_ids.remove(first_rev.id)
399 # ...and vice versa:
400 first_rev.prev_id = None
402 # Change the type of first_rev (e.g., from Change to Add):
403 first_rev.__class__ = cvs_revision_type_map[
404 (isinstance(first_rev, CVSRevisionModification), False,)
407 # Now first_rev is a new root:
408 self.root_ids.add(first_rev.id)
410 def adjust_ntdbrs(self, ntdbr_cvs_revs):
411 """Adjust the specified non-trunk default branch revisions.
413 NTDBR_CVS_REVS is a list of CVSRevision instances in this file
414 that have been determined to be non-trunk default branch
415 revisions.
417 The first revision on the default branch is handled strangely by
418 CVS. If a file is imported (as opposed to being added), CVS
419 creates a 1.1 revision, then creates a vendor branch 1.1.1 based
420 on 1.1, then creates a 1.1.1.1 revision that is identical to the
421 1.1 revision (i.e., its deltatext is empty). The log message that
422 the user typed when importing is stored with the 1.1.1.1 revision.
423 The 1.1 revision always contains a standard, generated log
424 message, 'Initial revision\n'.
426 When we detect a straightforward import like this, we want to
427 handle it by deleting the 1.1 revision (which doesn't contain any
428 useful information) and making 1.1.1.1 into an independent root in
429 the file's dependency tree. In SVN, 1.1.1.1 will be added
430 directly to the vendor branch with its initial content. Then in a
431 special 'post-commit', the 1.1.1.1 revision is copied back to
432 trunk.
434 If the user imports again to the same vendor branch, then CVS
435 creates revisions 1.1.1.2, 1.1.1.3, etc. on the vendor branch,
436 *without* counterparts in trunk (even though these revisions
437 effectively play the role of trunk revisions). So after we add
438 such revisions to the vendor branch, we also copy them back to
439 trunk in post-commits.
441 Set the ntdbr members of the revisions listed in NTDBR_CVS_REVS to
442 True. Also, if there is a 1.2 revision, then set that revision to
443 depend on the last non-trunk default branch revision and possibly
444 adjust its type accordingly."""
446 for cvs_rev in ntdbr_cvs_revs:
447 cvs_rev.ntdbr = True
449 # Look for a 1.2 revision:
450 rev_1_1 = self[ntdbr_cvs_revs[0].prev_id]
452 rev_1_2 = self.get(rev_1_1.next_id)
453 if rev_1_2 is not None:
454 # Revision 1.2 logically follows the imported revisions, not
455 # 1.1. Accordingly, connect it to the last NTDBR and possibly
456 # change its type.
457 last_ntdbr = ntdbr_cvs_revs[-1]
458 rev_1_2.ntdbr_prev_id = last_ntdbr.id
459 last_ntdbr.ntdbr_next_id = rev_1_2.id
460 rev_1_2.__class__ = cvs_revision_type_map[(
461 isinstance(rev_1_2, CVSRevisionModification),
462 isinstance(last_ntdbr, CVSRevisionModification),
465 def process_live_ntdb(self, vendor_lod_items):
466 """VENDOR_LOD_ITEMS is a live default branch; process it.
468 In this case, all revisions on the default branch are NTDBRs and
469 it is an error if there is also a '1.2' revision.
471 Return True iff this transformation really does something. Raise
472 a VendorBranchError if there is a '1.2' revision."""
474 rev_1_1 = self[vendor_lod_items.cvs_branch.source_id]
475 rev_1_2_id = rev_1_1.next_id
476 if rev_1_2_id is not None:
477 raise VendorBranchError(
478 'File \'%s\' has default branch=%s but also a revision %s'
479 % (self.cvs_file.filename,
480 vendor_lod_items.cvs_branch.branch_number, self[rev_1_2_id].rev,)
483 ntdbr_cvs_revs = list(vendor_lod_items.cvs_revisions)
485 if ntdbr_cvs_revs:
486 self.adjust_ntdbrs(ntdbr_cvs_revs)
487 return True
488 else:
489 return False
491 def process_historical_ntdb(self, vendor_lod_items):
492 """There appears to have been a non-trunk default branch in the past.
494 There is currently no default branch, but the branch described by
495 file appears to have been imported. So our educated guess is that
496 all revisions on the '1.1.1' branch (described by
497 VENDOR_LOD_ITEMS) with timestamps prior to the timestamp of '1.2'
498 were non-trunk default branch revisions.
500 Return True iff this transformation really does something.
502 This really only handles standard '1.1.1.*'-style vendor
503 revisions. One could conceivably have a file whose default branch
504 is 1.1.3 or whatever, or was that at some point in time, with
505 vendor revisions 1.1.3.1, 1.1.3.2, etc. But with the default
506 branch gone now, we'd have no basis for assuming that the
507 non-standard vendor branch had ever been the default branch
508 anyway.
510 Note that we rely on comparisons between the timestamps of the
511 revisions on the vendor branch and that of revision 1.2, even
512 though the timestamps might be incorrect due to clock skew. We
513 could do a slightly better job if we used the changeset
514 timestamps, as it is possible that the dependencies that went into
515 determining those timestamps are more accurate. But that would
516 require an extra pass or two."""
518 rev_1_1 = self[vendor_lod_items.cvs_branch.source_id]
519 rev_1_2_id = rev_1_1.next_id
521 if rev_1_2_id is None:
522 rev_1_2_timestamp = None
523 else:
524 rev_1_2_timestamp = self[rev_1_2_id].timestamp
526 ntdbr_cvs_revs = []
527 for cvs_rev in vendor_lod_items.cvs_revisions:
528 if rev_1_2_timestamp is not None \
529 and cvs_rev.timestamp >= rev_1_2_timestamp:
530 # That's the end of the once-default branch.
531 break
532 ntdbr_cvs_revs.append(cvs_rev)
534 if ntdbr_cvs_revs:
535 self.adjust_ntdbrs(ntdbr_cvs_revs)
536 return True
537 else:
538 return False
540 def imported_remove_1_1(self, vendor_lod_items):
541 """This file was imported. Remove the 1.1 revision if possible.
543 VENDOR_LOD_ITEMS is the LODItems instance for the vendor branch.
544 See adjust_ntdbrs() for more information."""
546 assert vendor_lod_items.cvs_revisions
547 cvs_rev = vendor_lod_items.cvs_revisions[0]
549 if isinstance(cvs_rev, CVSRevisionModification) \
550 and not cvs_rev.deltatext_exists:
551 cvs_branch = vendor_lod_items.cvs_branch
552 rev_1_1 = self[cvs_branch.source_id]
553 assert isinstance(rev_1_1, CVSRevision)
554 Log().debug('Removing unnecessary revision %s' % (rev_1_1,))
556 # Delete the 1.1.1 CVSBranch and sever the vendor branch from trunk:
557 self._sever_branch(vendor_lod_items)
559 # Delete rev_1_1:
560 self.root_ids.remove(rev_1_1.id)
561 del self[rev_1_1.id]
562 rev_1_2_id = rev_1_1.next_id
563 if rev_1_2_id is not None:
564 rev_1_2 = self[rev_1_2_id]
565 rev_1_2.prev_id = None
566 self.root_ids.add(rev_1_2.id)
568 # Move any tags and branches from rev_1_1 to cvs_rev:
569 cvs_rev.tag_ids.extend(rev_1_1.tag_ids)
570 for id in rev_1_1.tag_ids:
571 cvs_tag = self[id]
572 cvs_tag.source_lod = cvs_rev.lod
573 cvs_tag.source_id = cvs_rev.id
574 cvs_rev.branch_ids[0:0] = rev_1_1.branch_ids
575 for id in rev_1_1.branch_ids:
576 cvs_branch = self[id]
577 cvs_branch.source_lod = cvs_rev.lod
578 cvs_branch.source_id = cvs_rev.id
579 cvs_rev.branch_commit_ids[0:0] = rev_1_1.branch_commit_ids
580 for id in rev_1_1.branch_commit_ids:
581 cvs_rev2 = self[id]
582 cvs_rev2.prev_id = cvs_rev.id
584 def _is_unneeded_initial_trunk_delete(self, cvs_item, metadata_db):
585 if not isinstance(cvs_item, CVSRevisionNoop):
586 # This rule can only be applied to dead revisions.
587 return False
589 if cvs_item.rev != '1.1':
590 return False
592 if not isinstance(cvs_item.lod, Trunk):
593 return False
595 if cvs_item.closed_symbols:
596 return False
598 if cvs_item.ntdbr:
599 return False
601 log_msg = metadata_db[cvs_item.metadata_id].log_msg
602 return bool(re.match(
603 r'file .* was initially added on branch .*\.\n$',
604 log_msg,
607 def remove_unneeded_deletes(self, metadata_db):
608 """Remove unneeded deletes for this file.
610 If a file is added on a branch, then a trunk revision is added at
611 the same time in the 'Dead' state. This revision doesn't do
612 anything useful, so delete it."""
614 for id in self.root_ids:
615 cvs_item = self[id]
616 if self._is_unneeded_initial_trunk_delete(cvs_item, metadata_db):
617 Log().debug('Removing unnecessary delete %s' % (cvs_item,))
619 # Delete cvs_item:
620 self.root_ids.remove(cvs_item.id)
621 del self[id]
622 if cvs_item.next_id is not None:
623 cvs_rev_next = self[cvs_item.next_id]
624 cvs_rev_next.prev_id = None
625 self.root_ids.add(cvs_rev_next.id)
627 # Delete all CVSBranches rooted at this revision. If there is
628 # a CVSRevision on the branch, it should already be an add so
629 # it doesn't have to be changed.
630 for cvs_branch_id in cvs_item.branch_ids:
631 cvs_branch = self[cvs_branch_id]
632 del self[cvs_branch.id]
634 if cvs_branch.next_id is not None:
635 cvs_branch_next = self[cvs_branch.next_id]
636 cvs_branch_next.first_on_branch_id = None
637 cvs_branch_next.prev_id = None
638 self.root_ids.add(cvs_branch_next.id)
640 # Tagging a dead revision doesn't do anything, so remove any
641 # tags that were set on 1.1:
642 for cvs_tag_id in cvs_item.tag_ids:
643 del self[cvs_tag_id]
645 # This can only happen once per file, and we might have just
646 # changed self.root_ids, so break out of the loop:
647 break
649 def _is_unneeded_initial_branch_delete(self, lod_items, metadata_db):
650 """Return True iff the initial revision in LOD_ITEMS can be deleted."""
652 if not lod_items.cvs_revisions:
653 return False
655 cvs_revision = lod_items.cvs_revisions[0]
657 if cvs_revision.ntdbr:
658 return False
660 if not isinstance(cvs_revision, CVSRevisionAbsent):
661 return False
663 if cvs_revision.branch_ids:
664 return False
666 log_msg = metadata_db[cvs_revision.metadata_id].log_msg
667 return bool(re.match(
668 r'file .* was added on branch .* on '
669 r'\d{4}\-\d{2}\-\d{2} \d{2}\:\d{2}\:\d{2}( [\+\-]\d{4})?'
670 '\n$',
671 log_msg,
674 def remove_initial_branch_deletes(self, metadata_db):
675 """If the first revision on a branch is an unnecessary delete, remove it.
677 If a file is added on a branch (whether or not it already existed
678 on trunk), then new versions of CVS add a first branch revision in
679 the 'dead' state (to indicate that the file did not exist on the
680 branch when the branch was created) followed by the second branch
681 revision, which is an add. When we encounter this situation, we
682 sever the branch from trunk and delete the first branch
683 revision."""
685 for lod_items in self.iter_lods():
686 if self._is_unneeded_initial_branch_delete(lod_items, metadata_db):
687 cvs_revision = lod_items.cvs_revisions[0]
688 Log().debug(
689 'Removing unnecessary initial branch delete %s' % (cvs_revision,)
692 # Sever the branch from its source if necessary:
693 self._sever_branch(lod_items)
695 # Delete the first revision on the branch:
696 self.root_ids.remove(cvs_revision.id)
697 del self[cvs_revision.id]
699 # If it had a successor, adjust its backreference and add it
700 # to the root_ids:
701 if cvs_revision.next_id is not None:
702 cvs_rev_next = self[cvs_revision.next_id]
703 cvs_rev_next.prev_id = None
704 self.root_ids.add(cvs_rev_next.id)
706 # Tagging a dead revision doesn't do anything, so remove any
707 # tags that were set on it:
708 for tag_id in cvs_revision.tag_ids:
709 del self[tag_id]
711 def _exclude_tag(self, cvs_tag):
712 """Exclude the specified CVS_TAG."""
714 del self[cvs_tag.id]
716 # A CVSTag is the successor of the CVSRevision that it
717 # sprouts from. Delete this tag from that revision's
718 # tag_ids:
719 self[cvs_tag.source_id].tag_ids.remove(cvs_tag.id)
721 def _exclude_branch(self, lod_items):
722 """Exclude the branch described by LOD_ITEMS, including its revisions.
724 (Do not update the LOD_ITEMS instance itself.)
726 If the LOD starts with non-trunk default branch revisions, leave
727 the branch and the NTDB revisions in place, but delete any
728 subsequent revisions that are not NTDB revisions. In this case,
729 return True; otherwise return False"""
731 if lod_items.cvs_revisions and lod_items.cvs_revisions[0].ntdbr:
732 for cvs_rev in lod_items.cvs_revisions:
733 if not cvs_rev.ntdbr:
734 # We've found the first non-NTDBR, and it's stored in cvs_rev:
735 break
736 else:
737 # There was no revision following the NTDBRs:
738 cvs_rev = None
740 if cvs_rev:
741 last_ntdbr = self[cvs_rev.prev_id]
742 last_ntdbr.next_id = None
743 while True:
744 del self[cvs_rev.id]
745 if cvs_rev.next_id is None:
746 break
747 cvs_rev = self[cvs_rev.next_id]
749 return True
751 else:
752 if lod_items.cvs_branch is not None:
753 # Delete the CVSBranch itself:
754 cvs_branch = lod_items.cvs_branch
756 del self[cvs_branch.id]
758 # A CVSBranch is the successor of the CVSRevision that it
759 # sprouts from. Delete this branch from that revision's
760 # branch_ids:
761 self[cvs_branch.source_id].branch_ids.remove(cvs_branch.id)
763 if lod_items.cvs_revisions:
764 # The first CVSRevision on the branch has to be either detached
765 # from the revision from which the branch sprang, or removed
766 # from self.root_ids:
767 cvs_rev = lod_items.cvs_revisions[0]
768 if cvs_rev.prev_id is None:
769 self.root_ids.remove(cvs_rev.id)
770 else:
771 self[cvs_rev.prev_id].branch_commit_ids.remove(cvs_rev.id)
773 for cvs_rev in lod_items.cvs_revisions:
774 del self[cvs_rev.id]
776 return False
778 def graft_ntdbr_to_trunk(self):
779 """Graft the non-trunk default branch revisions to trunk.
781 They should already be alone on a branch that may or may not have
782 a CVSBranch connecting it to trunk."""
784 for lod_items in self.iter_lods():
785 if lod_items.cvs_revisions and lod_items.cvs_revisions[0].ntdbr:
786 assert lod_items.is_pure_ntdb()
788 first_rev = lod_items.cvs_revisions[0]
789 last_rev = lod_items.cvs_revisions[-1]
790 rev_1_1 = self.get(first_rev.prev_id)
791 rev_1_2 = self.get(last_rev.ntdbr_next_id)
793 self._sever_branch(lod_items)
795 if rev_1_1 is not None:
796 rev_1_1.next_id = first_rev.id
797 first_rev.prev_id = rev_1_1.id
799 self.root_ids.remove(first_rev.id)
801 first_rev.__class__ = cvs_revision_type_map[(
802 isinstance(first_rev, CVSRevisionModification),
803 isinstance(rev_1_1, CVSRevisionModification),
806 if rev_1_2 is not None:
807 rev_1_2.ntdbr_prev_id = None
808 last_rev.ntdbr_next_id = None
810 if rev_1_2.prev_id is None:
811 self.root_ids.remove(rev_1_2.id)
813 rev_1_2.prev_id = last_rev.id
814 last_rev.next_id = rev_1_2.id
816 # The effective_pred_id of rev_1_2 was not changed, so we
817 # don't have to change rev_1_2's type.
819 for cvs_rev in lod_items.cvs_revisions:
820 cvs_rev.ntdbr = False
821 cvs_rev.lod = self.trunk
823 for cvs_branch in lod_items.cvs_branches:
824 cvs_branch.source_lod = self.trunk
826 for cvs_tag in lod_items.cvs_tags:
827 cvs_tag.source_lod = self.trunk
829 return
831 def exclude_non_trunk(self):
832 """Delete all tags and branches."""
834 ntdbr_excluded = False
835 for lod_items in self.iter_lods():
836 for cvs_tag in lod_items.cvs_tags[:]:
837 self._exclude_tag(cvs_tag)
838 lod_items.cvs_tags.remove(cvs_tag)
840 if not isinstance(lod_items.lod, Trunk):
841 assert not lod_items.cvs_branches
843 ntdbr_excluded |= self._exclude_branch(lod_items)
845 if ntdbr_excluded:
846 self.graft_ntdbr_to_trunk()
848 def filter_excluded_symbols(self, revision_excluder):
849 """Delete any excluded symbols and references to them.
851 Call the revision_excluder's callback methods to let it know what
852 is being excluded."""
854 ntdbr_excluded = False
855 for lod_items in self.iter_lods():
856 # Delete any excluded tags:
857 for cvs_tag in lod_items.cvs_tags[:]:
858 if isinstance(cvs_tag.symbol, ExcludedSymbol):
859 self._exclude_tag(cvs_tag)
861 lod_items.cvs_tags.remove(cvs_tag)
863 # Delete the whole branch if it is to be excluded:
864 if isinstance(lod_items.lod, ExcludedSymbol):
865 # A symbol can only be excluded if no other symbols spring
866 # from it. This was already checked in CollateSymbolsPass, so
867 # these conditions should already be satisfied.
868 assert not list(lod_items.iter_blockers())
870 ntdbr_excluded |= self._exclude_branch(lod_items)
872 if ntdbr_excluded:
873 self.graft_ntdbr_to_trunk()
875 revision_excluder.process_file(self)
877 def _mutate_branch_to_tag(self, cvs_branch):
878 """Mutate the branch CVS_BRANCH into a tag."""
880 if cvs_branch.next_id is not None:
881 # This shouldn't happen because it was checked in
882 # CollateSymbolsPass:
883 raise FatalError('Attempt to exclude a branch with commits.')
884 cvs_tag = CVSTag(
885 cvs_branch.id, cvs_branch.cvs_file, cvs_branch.symbol,
886 cvs_branch.source_lod, cvs_branch.source_id,
887 cvs_branch.revision_recorder_token,
889 self.add(cvs_tag)
890 cvs_revision = self[cvs_tag.source_id]
891 cvs_revision.branch_ids.remove(cvs_tag.id)
892 cvs_revision.tag_ids.append(cvs_tag.id)
894 def _mutate_tag_to_branch(self, cvs_tag):
895 """Mutate the tag into a branch."""
897 cvs_branch = CVSBranch(
898 cvs_tag.id, cvs_tag.cvs_file, cvs_tag.symbol,
899 None, cvs_tag.source_lod, cvs_tag.source_id, None,
900 cvs_tag.revision_recorder_token,
902 self.add(cvs_branch)
903 cvs_revision = self[cvs_branch.source_id]
904 cvs_revision.tag_ids.remove(cvs_branch.id)
905 cvs_revision.branch_ids.append(cvs_branch.id)
907 def _mutate_symbol(self, cvs_symbol):
908 """Mutate CVS_SYMBOL if necessary."""
910 symbol = cvs_symbol.symbol
911 if isinstance(cvs_symbol, CVSBranch) and isinstance(symbol, Tag):
912 self._mutate_branch_to_tag(cvs_symbol)
913 elif isinstance(cvs_symbol, CVSTag) and isinstance(symbol, Branch):
914 self._mutate_tag_to_branch(cvs_symbol)
916 def mutate_symbols(self):
917 """Force symbols to be tags/branches based on self.symbol_db."""
919 for cvs_item in self.values():
920 if isinstance(cvs_item, CVSRevision):
921 # This CVSRevision may be affected by the mutation of any
922 # CVSSymbols that it references, but there is nothing to do
923 # here directly.
924 pass
925 elif isinstance(cvs_item, CVSSymbol):
926 self._mutate_symbol(cvs_item)
927 else:
928 raise RuntimeError('Unknown cvs item type')
930 def _adjust_tag_parent(self, cvs_tag):
931 """Adjust the parent of CVS_TAG if possible and preferred.
933 CVS_TAG is an instance of CVSTag. This method must be called in
934 leaf-to-trunk order."""
936 # The Symbol that cvs_tag would like to have as a parent:
937 preferred_parent = Ctx()._symbol_db.get_symbol(
938 cvs_tag.symbol.preferred_parent_id)
940 if cvs_tag.source_lod == preferred_parent:
941 # The preferred parent is already the parent.
942 return
944 # The CVSRevision that is its direct parent:
945 source = self[cvs_tag.source_id]
946 assert isinstance(source, CVSRevision)
948 if isinstance(preferred_parent, Trunk):
949 # It is not possible to graft *onto* Trunk:
950 return
952 # Try to find the preferred parent among the possible parents:
953 for branch_id in source.branch_ids:
954 if self[branch_id].symbol == preferred_parent:
955 # We found it!
956 break
957 else:
958 # The preferred parent is not a possible parent in this file.
959 return
961 parent = self[branch_id]
962 assert isinstance(parent, CVSBranch)
964 Log().debug('Grafting %s from %s (on %s) onto %s' % (
965 cvs_tag, source, source.lod, parent,))
966 # Switch parent:
967 source.tag_ids.remove(cvs_tag.id)
968 parent.tag_ids.append(cvs_tag.id)
969 cvs_tag.source_lod = parent.symbol
970 cvs_tag.source_id = parent.id
972 def _adjust_branch_parents(self, cvs_branch):
973 """Adjust the parent of CVS_BRANCH if possible and preferred.
975 CVS_BRANCH is an instance of CVSBranch. This method must be
976 called in leaf-to-trunk order."""
978 # The Symbol that cvs_branch would like to have as a parent:
979 preferred_parent = Ctx()._symbol_db.get_symbol(
980 cvs_branch.symbol.preferred_parent_id)
982 if cvs_branch.source_lod == preferred_parent:
983 # The preferred parent is already the parent.
984 return
986 # The CVSRevision that is its direct parent:
987 source = self[cvs_branch.source_id]
988 # This is always a CVSRevision because we haven't adjusted it yet:
989 assert isinstance(source, CVSRevision)
991 if isinstance(preferred_parent, Trunk):
992 # It is not possible to graft *onto* Trunk:
993 return
995 # Try to find the preferred parent among the possible parents:
996 for branch_id in source.branch_ids:
997 possible_parent = self[branch_id]
998 if possible_parent.symbol == preferred_parent:
999 # We found it!
1000 break
1001 elif possible_parent.symbol == cvs_branch.symbol:
1002 # Only branches that precede the branch to be adjusted are
1003 # considered possible parents. Leave parentage unchanged:
1004 return
1005 else:
1006 # This point should never be reached.
1007 raise InternalError(
1008 'Possible parent search did not terminate as expected')
1010 parent = possible_parent
1011 assert isinstance(parent, CVSBranch)
1013 Log().debug('Grafting %s from %s (on %s) onto %s' % (
1014 cvs_branch, source, source.lod, parent,))
1015 # Switch parent:
1016 source.branch_ids.remove(cvs_branch.id)
1017 parent.branch_ids.append(cvs_branch.id)
1018 cvs_branch.source_lod = parent.symbol
1019 cvs_branch.source_id = parent.id
1021 def adjust_parents(self):
1022 """Adjust the parents of symbols to their preferred parents.
1024 If a CVSSymbol has a preferred parent that is different than its
1025 current parent, and if the preferred parent is an allowed parent
1026 of the CVSSymbol in this file, then graft the CVSSymbol onto its
1027 preferred parent."""
1029 for lod_items in self.iter_lods():
1030 for cvs_tag in lod_items.cvs_tags:
1031 self._adjust_tag_parent(cvs_tag)
1033 for cvs_branch in lod_items.cvs_branches:
1034 self._adjust_branch_parents(cvs_branch)
1036 def _get_revision_source(self, cvs_symbol):
1037 """Return the CVSRevision that is the ultimate source of CVS_SYMBOL."""
1039 while True:
1040 cvs_item = self[cvs_symbol.source_id]
1041 if isinstance(cvs_item, CVSRevision):
1042 return cvs_item
1043 else:
1044 cvs_symbol = cvs_item
1046 def refine_symbols(self):
1047 """Refine the types of the CVSSymbols in this file.
1049 Adjust the symbol types based on whether the source exists:
1050 CVSBranch vs. CVSBranchNoop and CVSTag vs. CVSTagNoop."""
1052 for lod_items in self.iter_lods():
1053 for cvs_tag in lod_items.cvs_tags:
1054 source = self._get_revision_source(cvs_tag)
1055 cvs_tag.__class__ = cvs_tag_type_map[
1056 isinstance(source, CVSRevisionModification)
1059 for cvs_branch in lod_items.cvs_branches:
1060 source = self._get_revision_source(cvs_branch)
1061 cvs_branch.__class__ = cvs_branch_type_map[
1062 isinstance(source, CVSRevisionModification)
1065 def record_opened_symbols(self):
1066 """Set CVSRevision.opened_symbols for the surviving revisions."""
1068 for cvs_item in self.values():
1069 if isinstance(cvs_item, (CVSRevision, CVSBranch)):
1070 cvs_item.opened_symbols = []
1071 for cvs_symbol_opened_id in cvs_item.get_cvs_symbol_ids_opened():
1072 cvs_symbol_opened = self[cvs_symbol_opened_id]
1073 cvs_item.opened_symbols.append(
1074 (cvs_symbol_opened.symbol.id, cvs_symbol_opened.id,)
1077 def record_closed_symbols(self):
1078 """Set CVSRevision.closed_symbols for the surviving revisions.
1080 A CVSRevision closes the symbols that were opened by the CVSItems
1081 that the CVSRevision closes. Got it?
1083 This method must be called after record_opened_symbols()."""
1085 for cvs_item in self.values():
1086 if isinstance(cvs_item, CVSRevision):
1087 cvs_item.closed_symbols = []
1088 for cvs_item_closed_id in cvs_item.get_ids_closed():
1089 cvs_item_closed = self[cvs_item_closed_id]
1090 cvs_item.closed_symbols.extend(cvs_item_closed.opened_symbols)