Iterate through CVSRevisions using CVSFileItems.iter_lods().
[cvs2svn.git] / cvs2svn_lib / cvs_file_items.py
blob9cf50b640596bfa9e4bf7c122f3f8d77c702c4b8
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2006-2008 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains a class to manage the CVSItems related to one file."""
20 import re
22 from cvs2svn_lib.common import InternalError
23 from cvs2svn_lib.common import FatalError
24 from cvs2svn_lib.context import Ctx
25 from cvs2svn_lib.log import Log
26 from cvs2svn_lib.symbol import Trunk
27 from cvs2svn_lib.symbol import Branch
28 from cvs2svn_lib.symbol import Tag
29 from cvs2svn_lib.symbol import ExcludedSymbol
30 from cvs2svn_lib.cvs_item import CVSRevision
31 from cvs2svn_lib.cvs_item import CVSRevisionModification
32 from cvs2svn_lib.cvs_item import CVSRevisionAbsent
33 from cvs2svn_lib.cvs_item import CVSRevisionNoop
34 from cvs2svn_lib.cvs_item import CVSSymbol
35 from cvs2svn_lib.cvs_item import CVSBranch
36 from cvs2svn_lib.cvs_item import CVSTag
37 from cvs2svn_lib.cvs_item import cvs_revision_type_map
38 from cvs2svn_lib.cvs_item import cvs_branch_type_map
39 from cvs2svn_lib.cvs_item import cvs_tag_type_map
42 class VendorBranchError(Exception):
43 """There is an error in the structure of the file revision tree."""
45 pass
48 class LODItems(object):
49 def __init__(self, lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags):
50 # The LineOfDevelopment described by this instance.
51 self.lod = lod
53 # The CVSBranch starting this LOD, if any; otherwise, None.
54 self.cvs_branch = cvs_branch
56 # The list of CVSRevisions on this LOD, if any. The CVSRevisions
57 # are listed in dependency order.
58 self.cvs_revisions = cvs_revisions
60 # A list of CVSBranches that sprout from this LOD (either from
61 # cvs_branch or from one of the CVSRevisions).
62 self.cvs_branches = cvs_branches
64 # A list of CVSTags that sprout from this LOD (either from
65 # cvs_branch or from one of the CVSRevisions).
66 self.cvs_tags = cvs_tags
68 def is_trivial_import(self):
69 """Return True iff this LOD is a trivial import branch in this file.
71 A trivial import branch is a branch that was used for a single
72 import and nothing else. Such a branch is eligible for being
73 grafted onto trunk, even if it has branch blockers."""
75 return (
76 len(self.cvs_revisions) == 1
77 and self.cvs_revisions[0].ntdbr
80 def is_pure_ntdb(self):
81 """Return True iff this LOD is a pure NTDB in this file.
83 A pure non-trunk default branch is defined to be a branch that
84 contains only NTDB revisions (and at least one of them). Such a
85 branch is eligible for being grafted onto trunk, even if it has
86 branch blockers."""
88 return (
89 self.cvs_revisions
90 and self.cvs_revisions[-1].ntdbr
93 def iter_blockers(self):
94 if self.is_pure_ntdb():
95 # Such a branch has no blockers, because the blockers can be
96 # grafted to trunk.
97 pass
98 else:
99 # Other branches are only blocked by symbols that sprout from
100 # non-NTDB revisions:
101 non_ntdbr_revision_ids = set()
102 for cvs_revision in self.cvs_revisions:
103 if not cvs_revision.ntdbr:
104 non_ntdbr_revision_ids.add(cvs_revision.id)
106 for cvs_tag in self.cvs_tags:
107 if cvs_tag.source_id in non_ntdbr_revision_ids:
108 yield cvs_tag
110 for cvs_branch in self.cvs_branches:
111 if cvs_branch.source_id in non_ntdbr_revision_ids:
112 yield cvs_branch
115 class CVSFileItems(object):
116 def __init__(self, cvs_file, trunk, cvs_items, original_ids=None):
117 # The file whose data this instance holds.
118 self.cvs_file = cvs_file
120 # The symbol that represents "Trunk" in this file.
121 self.trunk = trunk
123 # A map from CVSItem.id to CVSItem:
124 self._cvs_items = {}
126 # The cvs_item_id of each root in the CVSItem forest. (A root is
127 # defined to be any CVSRevision with no prev_id.)
128 self.root_ids = set()
130 for cvs_item in cvs_items:
131 self.add(cvs_item)
132 if isinstance(cvs_item, CVSRevision) and cvs_item.prev_id is None:
133 self.root_ids.add(cvs_item.id)
135 # self.original_ids is a dict {cvs_rev.rev : cvs_rev.id} holding
136 # the IDs originally allocated to each CVS revision number. This
137 # member is stored for the convenience of RevisionManagers.
138 if original_ids is not None:
139 self.original_ids = original_ids
140 else:
141 self.original_ids = {}
142 for cvs_item in cvs_items:
143 if isinstance(cvs_item, CVSRevision):
144 self.original_ids[cvs_item.rev] = cvs_item.id
146 def __getstate__(self):
147 return (self.cvs_file.id, self.values(), self.original_ids,)
149 def __setstate__(self, state):
150 (cvs_file_id, cvs_items, original_ids,) = state
151 cvs_file = Ctx()._cvs_path_db.get_path(cvs_file_id)
152 CVSFileItems.__init__(
153 self, cvs_file, cvs_file.project.get_trunk(), cvs_items,
154 original_ids=original_ids,
157 def add(self, cvs_item):
158 self._cvs_items[cvs_item.id] = cvs_item
160 def __getitem__(self, id):
161 """Return the CVSItem with the specified ID."""
163 return self._cvs_items[id]
165 def get(self, id, default=None):
166 return self._cvs_items.get(id, default)
168 def __delitem__(self, id):
169 assert id not in self.root_ids
170 del self._cvs_items[id]
172 def values(self):
173 return self._cvs_items.values()
175 def check_link_consistency(self):
176 """Check that the CVSItems are linked correctly with each other."""
178 for cvs_item in self.values():
179 try:
180 cvs_item.check_links(self)
181 except AssertionError:
182 Log().error(
183 'Link consistency error in %s\n'
184 'This is probably a bug internal to cvs2svn. Please file a bug\n'
185 'report including the following stack trace (see FAQ for more '
186 'info).'
187 % (cvs_item,))
188 raise
190 def _get_lod(self, lod, cvs_branch, start_id):
191 """Return the indicated LODItems.
193 LOD is the corresponding LineOfDevelopment. CVS_BRANCH is the
194 CVSBranch instance that starts the LOD if any; otherwise it is
195 None. START_ID is the id of the first CVSRevision on this LOD, or
196 None if there are none."""
198 cvs_revisions = []
199 cvs_branches = []
200 cvs_tags = []
202 def process_subitems(cvs_item):
203 """Process the branches and tags that are rooted in CVS_ITEM.
205 CVS_ITEM can be a CVSRevision or a CVSBranch."""
207 for branch_id in cvs_item.branch_ids[:]:
208 cvs_branches.append(self[branch_id])
210 for tag_id in cvs_item.tag_ids:
211 cvs_tags.append(self[tag_id])
213 if cvs_branch is not None:
214 # Include the symbols sprouting directly from the CVSBranch:
215 process_subitems(cvs_branch)
217 id = start_id
218 while id is not None:
219 cvs_rev = self[id]
220 cvs_revisions.append(cvs_rev)
221 process_subitems(cvs_rev)
222 id = cvs_rev.next_id
224 return LODItems(lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags)
226 def get_lod_items(self, cvs_branch):
227 """Return an LODItems describing the branch that starts at CVS_BRANCH.
229 CVS_BRANCH must be an instance of CVSBranch contained in this
230 CVSFileItems."""
232 return self._get_lod(cvs_branch.symbol, cvs_branch, cvs_branch.next_id)
234 def iter_root_lods(self):
235 """Iterate over the LODItems for all root LODs (non-recursively)."""
237 for id in list(self.root_ids):
238 cvs_item = self[id]
239 if isinstance(cvs_item, CVSRevision):
240 # This LOD doesn't have a CVSBranch associated with it.
241 # Either it is Trunk, or it is a branch whose CVSBranch has
242 # been deleted.
243 yield self._get_lod(cvs_item.lod, None, id)
244 elif isinstance(cvs_item, CVSBranch):
245 # This is a Branch that has been severed from the rest of the
246 # tree.
247 yield self._get_lod(cvs_item.symbol, cvs_item, cvs_item.next_id)
248 else:
249 raise InternalError('Unexpected root item: %s' % (cvs_item,))
251 def _iter_tree(self, lod, cvs_branch, start_id):
252 """Iterate over the tree that starts at the specified line of development.
254 LOD is the LineOfDevelopment where the iteration should start.
255 CVS_BRANCH is the CVSBranch instance that starts the LOD if any;
256 otherwise it is None. START_ID is the id of the first CVSRevision
257 on this LOD, or None if there are none.
259 There are two cases handled by this routine: trunk (where LOD is a
260 Trunk instance, CVS_BRANCH is None, and START_ID is the id of the
261 1.1 revision) and a branch (where LOD is a Branch instance,
262 CVS_BRANCH is a CVSBranch instance, and START_ID is either the id
263 of the first CVSRevision on the branch or None if there are no
264 CVSRevisions on the branch). Note that CVS_BRANCH and START_ID cannot
265 simultaneously be None.
267 Yield an LODItems instance for each line of development."""
269 cvs_revisions = []
270 cvs_branches = []
271 cvs_tags = []
273 def process_subitems(cvs_item):
274 """Process the branches and tags that are rooted in CVS_ITEM.
276 CVS_ITEM can be a CVSRevision or a CVSBranch."""
278 for branch_id in cvs_item.branch_ids[:]:
279 # Recurse into the branch:
280 branch = self[branch_id]
281 for lod_items in self._iter_tree(
282 branch.symbol, branch, branch.next_id
284 yield lod_items
285 # The caller might have deleted the branch that we just
286 # yielded. If it is no longer present, then do not add it to
287 # the list of cvs_branches.
288 try:
289 cvs_branches.append(self[branch_id])
290 except KeyError:
291 pass
293 for tag_id in cvs_item.tag_ids:
294 cvs_tags.append(self[tag_id])
296 if cvs_branch is not None:
297 # Include the symbols sprouting directly from the CVSBranch:
298 for lod_items in process_subitems(cvs_branch):
299 yield lod_items
301 id = start_id
302 while id is not None:
303 cvs_rev = self[id]
304 cvs_revisions.append(cvs_rev)
306 for lod_items in process_subitems(cvs_rev):
307 yield lod_items
309 id = cvs_rev.next_id
311 yield LODItems(lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags)
313 def iter_lods(self):
314 """Iterate over LinesOfDevelopment in this file, in depth-first order.
316 For each LOD, yield an LODItems instance. The traversal starts at
317 each root node but returns the LODs in depth-first order.
319 It is allowed to modify the CVSFileItems instance while the
320 traversal is occurring, but only in ways that don't affect the
321 tree structure above (i.e., towards the trunk from) the current
322 LOD."""
324 # Make a list out of root_ids so that callers can change it:
325 for id in list(self.root_ids):
326 cvs_item = self[id]
327 if isinstance(cvs_item, CVSRevision):
328 # This LOD doesn't have a CVSBranch associated with it.
329 # Either it is Trunk, or it is a branch whose CVSBranch has
330 # been deleted.
331 lod = cvs_item.lod
332 cvs_branch = None
333 elif isinstance(cvs_item, CVSBranch):
334 # This is a Branch that has been severed from the rest of the
335 # tree.
336 lod = cvs_item.symbol
337 id = cvs_item.next_id
338 cvs_branch = cvs_item
339 else:
340 raise InternalError('Unexpected root item: %s' % (cvs_item,))
342 for lod_items in self._iter_tree(lod, cvs_branch, id):
343 yield lod_items
345 def iter_deltatext_ancestors(self, cvs_rev):
346 """Generate the delta-dependency ancestors of CVS_REV.
348 Generate then ancestors of CVS_REV in deltatext order; i.e., back
349 along branches towards trunk, then outwards along trunk towards
350 HEAD."""
352 while True:
353 # Determine the next candidate source revision:
354 if isinstance(cvs_rev.lod, Trunk):
355 if cvs_rev.next_id is None:
356 # HEAD has no ancestors, so we are done:
357 return
358 else:
359 cvs_rev = self[cvs_rev.next_id]
360 else:
361 cvs_rev = self[cvs_rev.prev_id]
363 yield cvs_rev
365 def _sever_branch(self, lod_items):
366 """Sever the branch from its source and discard the CVSBranch.
368 LOD_ITEMS describes a branch that should be severed from its
369 source, deleting the CVSBranch and creating a new root. Also set
370 LOD_ITEMS.cvs_branch to None.
372 If LOD_ITEMS has no source (e.g., because it is the trunk branch
373 or because it has already been severed), do nothing.
375 This method can only be used before symbols have been grafted onto
376 CVSBranches. It does not adjust NTDBR, NTDBR_PREV_ID or
377 NTDBR_NEXT_ID even if LOD_ITEMS describes a NTDB."""
379 cvs_branch = lod_items.cvs_branch
380 if cvs_branch is None:
381 return
383 assert not cvs_branch.tag_ids
384 assert not cvs_branch.branch_ids
385 source_rev = self[cvs_branch.source_id]
387 # We only cover the following case, even though after
388 # FilterSymbolsPass cvs_branch.source_id might refer to another
389 # CVSBranch.
390 assert isinstance(source_rev, CVSRevision)
392 # Delete the CVSBranch itself:
393 lod_items.cvs_branch = None
394 del self[cvs_branch.id]
396 # Delete the reference from the source revision to the CVSBranch:
397 source_rev.branch_ids.remove(cvs_branch.id)
399 # Delete the reference from the first revision on the branch to
400 # the CVSBranch:
401 if lod_items.cvs_revisions:
402 first_rev = lod_items.cvs_revisions[0]
404 # Delete the reference from first_rev to the CVSBranch:
405 first_rev.first_on_branch_id = None
407 # Delete the reference from the source revision to the first
408 # revision on the branch:
409 source_rev.branch_commit_ids.remove(first_rev.id)
411 # ...and vice versa:
412 first_rev.prev_id = None
414 # Change the type of first_rev (e.g., from Change to Add):
415 first_rev.__class__ = cvs_revision_type_map[
416 (isinstance(first_rev, CVSRevisionModification), False,)
419 # Now first_rev is a new root:
420 self.root_ids.add(first_rev.id)
422 def adjust_ntdbrs(self, ntdbr_cvs_revs):
423 """Adjust the specified non-trunk default branch revisions.
425 NTDBR_CVS_REVS is a list of CVSRevision instances in this file
426 that have been determined to be non-trunk default branch
427 revisions.
429 The first revision on the default branch is handled strangely by
430 CVS. If a file is imported (as opposed to being added), CVS
431 creates a 1.1 revision, then creates a vendor branch 1.1.1 based
432 on 1.1, then creates a 1.1.1.1 revision that is identical to the
433 1.1 revision (i.e., its deltatext is empty). The log message that
434 the user typed when importing is stored with the 1.1.1.1 revision.
435 The 1.1 revision always contains a standard, generated log
436 message, 'Initial revision\n'.
438 When we detect a straightforward import like this, we want to
439 handle it by deleting the 1.1 revision (which doesn't contain any
440 useful information) and making 1.1.1.1 into an independent root in
441 the file's dependency tree. In SVN, 1.1.1.1 will be added
442 directly to the vendor branch with its initial content. Then in a
443 special 'post-commit', the 1.1.1.1 revision is copied back to
444 trunk.
446 If the user imports again to the same vendor branch, then CVS
447 creates revisions 1.1.1.2, 1.1.1.3, etc. on the vendor branch,
448 *without* counterparts in trunk (even though these revisions
449 effectively play the role of trunk revisions). So after we add
450 such revisions to the vendor branch, we also copy them back to
451 trunk in post-commits.
453 Set the ntdbr members of the revisions listed in NTDBR_CVS_REVS to
454 True. Also, if there is a 1.2 revision, then set that revision to
455 depend on the last non-trunk default branch revision and possibly
456 adjust its type accordingly."""
458 for cvs_rev in ntdbr_cvs_revs:
459 cvs_rev.ntdbr = True
461 # Look for a 1.2 revision:
462 rev_1_1 = self[ntdbr_cvs_revs[0].prev_id]
464 rev_1_2 = self.get(rev_1_1.next_id)
465 if rev_1_2 is not None:
466 # Revision 1.2 logically follows the imported revisions, not
467 # 1.1. Accordingly, connect it to the last NTDBR and possibly
468 # change its type.
469 last_ntdbr = ntdbr_cvs_revs[-1]
470 rev_1_2.ntdbr_prev_id = last_ntdbr.id
471 last_ntdbr.ntdbr_next_id = rev_1_2.id
472 rev_1_2.__class__ = cvs_revision_type_map[(
473 isinstance(rev_1_2, CVSRevisionModification),
474 isinstance(last_ntdbr, CVSRevisionModification),
477 def process_live_ntdb(self, vendor_lod_items):
478 """VENDOR_LOD_ITEMS is a live default branch; process it.
480 In this case, all revisions on the default branch are NTDBRs and
481 it is an error if there is also a '1.2' revision.
483 Return True iff this transformation really does something. Raise
484 a VendorBranchError if there is a '1.2' revision."""
486 rev_1_1 = self[vendor_lod_items.cvs_branch.source_id]
487 rev_1_2_id = rev_1_1.next_id
488 if rev_1_2_id is not None:
489 raise VendorBranchError(
490 'File \'%s\' has default branch=%s but also a revision %s'
491 % (self.cvs_file.filename,
492 vendor_lod_items.cvs_branch.branch_number, self[rev_1_2_id].rev,)
495 ntdbr_cvs_revs = list(vendor_lod_items.cvs_revisions)
497 if ntdbr_cvs_revs:
498 self.adjust_ntdbrs(ntdbr_cvs_revs)
499 return True
500 else:
501 return False
503 def process_historical_ntdb(self, vendor_lod_items):
504 """There appears to have been a non-trunk default branch in the past.
506 There is currently no default branch, but the branch described by
507 file appears to have been imported. So our educated guess is that
508 all revisions on the '1.1.1' branch (described by
509 VENDOR_LOD_ITEMS) with timestamps prior to the timestamp of '1.2'
510 were non-trunk default branch revisions.
512 Return True iff this transformation really does something.
514 This really only handles standard '1.1.1.*'-style vendor
515 revisions. One could conceivably have a file whose default branch
516 is 1.1.3 or whatever, or was that at some point in time, with
517 vendor revisions 1.1.3.1, 1.1.3.2, etc. But with the default
518 branch gone now, we'd have no basis for assuming that the
519 non-standard vendor branch had ever been the default branch
520 anyway.
522 Note that we rely on comparisons between the timestamps of the
523 revisions on the vendor branch and that of revision 1.2, even
524 though the timestamps might be incorrect due to clock skew. We
525 could do a slightly better job if we used the changeset
526 timestamps, as it is possible that the dependencies that went into
527 determining those timestamps are more accurate. But that would
528 require an extra pass or two."""
530 rev_1_1 = self[vendor_lod_items.cvs_branch.source_id]
531 rev_1_2_id = rev_1_1.next_id
533 if rev_1_2_id is None:
534 rev_1_2_timestamp = None
535 else:
536 rev_1_2_timestamp = self[rev_1_2_id].timestamp
538 ntdbr_cvs_revs = []
539 for cvs_rev in vendor_lod_items.cvs_revisions:
540 if rev_1_2_timestamp is not None \
541 and cvs_rev.timestamp >= rev_1_2_timestamp:
542 # That's the end of the once-default branch.
543 break
544 ntdbr_cvs_revs.append(cvs_rev)
546 if ntdbr_cvs_revs:
547 self.adjust_ntdbrs(ntdbr_cvs_revs)
548 return True
549 else:
550 return False
552 def imported_remove_1_1(self, vendor_lod_items):
553 """This file was imported. Remove the 1.1 revision if possible.
555 VENDOR_LOD_ITEMS is the LODItems instance for the vendor branch.
556 See adjust_ntdbrs() for more information."""
558 assert vendor_lod_items.cvs_revisions
559 cvs_rev = vendor_lod_items.cvs_revisions[0]
561 if isinstance(cvs_rev, CVSRevisionModification) \
562 and not cvs_rev.deltatext_exists:
563 cvs_branch = vendor_lod_items.cvs_branch
564 rev_1_1 = self[cvs_branch.source_id]
565 assert isinstance(rev_1_1, CVSRevision)
566 Log().debug('Removing unnecessary revision %s' % (rev_1_1,))
568 # Delete the 1.1.1 CVSBranch and sever the vendor branch from trunk:
569 self._sever_branch(vendor_lod_items)
571 # Delete rev_1_1:
572 self.root_ids.remove(rev_1_1.id)
573 del self[rev_1_1.id]
574 rev_1_2_id = rev_1_1.next_id
575 if rev_1_2_id is not None:
576 rev_1_2 = self[rev_1_2_id]
577 rev_1_2.prev_id = None
578 self.root_ids.add(rev_1_2.id)
580 # Move any tags and branches from rev_1_1 to cvs_rev:
581 cvs_rev.tag_ids.extend(rev_1_1.tag_ids)
582 for id in rev_1_1.tag_ids:
583 cvs_tag = self[id]
584 cvs_tag.source_lod = cvs_rev.lod
585 cvs_tag.source_id = cvs_rev.id
586 cvs_rev.branch_ids[0:0] = rev_1_1.branch_ids
587 for id in rev_1_1.branch_ids:
588 cvs_branch = self[id]
589 cvs_branch.source_lod = cvs_rev.lod
590 cvs_branch.source_id = cvs_rev.id
591 cvs_rev.branch_commit_ids[0:0] = rev_1_1.branch_commit_ids
592 for id in rev_1_1.branch_commit_ids:
593 cvs_rev2 = self[id]
594 cvs_rev2.prev_id = cvs_rev.id
596 def _is_unneeded_initial_trunk_delete(self, cvs_item, metadata_db):
597 if not isinstance(cvs_item, CVSRevisionNoop):
598 # This rule can only be applied to dead revisions.
599 return False
601 if cvs_item.rev != '1.1':
602 return False
604 if not isinstance(cvs_item.lod, Trunk):
605 return False
607 if cvs_item.closed_symbols:
608 return False
610 if cvs_item.ntdbr:
611 return False
613 log_msg = metadata_db[cvs_item.metadata_id].log_msg
614 return bool(
615 re.match(
616 r'file .* was initially added on branch .*\.\n$',
617 log_msg,
619 or re.match(
620 # This variant commit message was reported by one user:
621 r'file .* was added on branch .*\n$',
622 log_msg,
626 def remove_unneeded_initial_trunk_delete(self, metadata_db):
627 """Remove unneeded deletes for this file.
629 If a file is added on a branch, then a trunk revision is added at
630 the same time in the 'Dead' state. This revision doesn't do
631 anything useful, so delete it."""
633 for id in self.root_ids:
634 cvs_item = self[id]
635 if self._is_unneeded_initial_trunk_delete(cvs_item, metadata_db):
636 Log().debug('Removing unnecessary delete %s' % (cvs_item,))
638 # Sever any CVSBranches rooted at cvs_item.
639 for cvs_branch_id in cvs_item.branch_ids[:]:
640 cvs_branch = self[cvs_branch_id]
641 self._sever_branch(self.get_lod_items(cvs_branch))
643 # Tagging a dead revision doesn't do anything, so remove any
644 # CVSTags that refer to cvs_item:
645 while cvs_item.tag_ids:
646 del self[cvs_item.tag_ids.pop()]
648 # Now delete cvs_item itself:
649 self.root_ids.remove(cvs_item.id)
650 del self[cvs_item.id]
651 if cvs_item.next_id is not None:
652 cvs_rev_next = self[cvs_item.next_id]
653 cvs_rev_next.prev_id = None
654 self.root_ids.add(cvs_rev_next.id)
656 # This can only happen once per file, so we're done:
657 return
659 def _is_unneeded_initial_branch_delete(self, lod_items, metadata_db):
660 """Return True iff the initial revision in LOD_ITEMS can be deleted."""
662 if not lod_items.cvs_revisions:
663 return False
665 cvs_revision = lod_items.cvs_revisions[0]
667 if cvs_revision.ntdbr:
668 return False
670 if not isinstance(cvs_revision, CVSRevisionAbsent):
671 return False
673 if cvs_revision.branch_ids:
674 return False
676 log_msg = metadata_db[cvs_revision.metadata_id].log_msg
677 return bool(re.match(
678 r'file .* was added on branch .* on '
679 r'\d{4}\-\d{2}\-\d{2} \d{2}\:\d{2}\:\d{2}( [\+\-]\d{4})?'
680 '\n$',
681 log_msg,
684 def remove_initial_branch_deletes(self, metadata_db):
685 """If the first revision on a branch is an unnecessary delete, remove it.
687 If a file is added on a branch (whether or not it already existed
688 on trunk), then new versions of CVS add a first branch revision in
689 the 'dead' state (to indicate that the file did not exist on the
690 branch when the branch was created) followed by the second branch
691 revision, which is an add. When we encounter this situation, we
692 sever the branch from trunk and delete the first branch
693 revision."""
695 for lod_items in self.iter_lods():
696 if self._is_unneeded_initial_branch_delete(lod_items, metadata_db):
697 cvs_revision = lod_items.cvs_revisions[0]
698 Log().debug(
699 'Removing unnecessary initial branch delete %s' % (cvs_revision,)
702 # Sever the branch from its source if necessary:
703 self._sever_branch(lod_items)
705 # Delete the first revision on the branch:
706 self.root_ids.remove(cvs_revision.id)
707 del self[cvs_revision.id]
709 # If it had a successor, adjust its backreference and add it
710 # to the root_ids:
711 if cvs_revision.next_id is not None:
712 cvs_rev_next = self[cvs_revision.next_id]
713 cvs_rev_next.prev_id = None
714 self.root_ids.add(cvs_rev_next.id)
716 # Tagging a dead revision doesn't do anything, so remove any
717 # tags that were set on it:
718 for tag_id in cvs_revision.tag_ids:
719 del self[tag_id]
721 def _exclude_tag(self, cvs_tag):
722 """Exclude the specified CVS_TAG."""
724 del self[cvs_tag.id]
726 # A CVSTag is the successor of the CVSRevision that it
727 # sprouts from. Delete this tag from that revision's
728 # tag_ids:
729 self[cvs_tag.source_id].tag_ids.remove(cvs_tag.id)
731 def _exclude_branch(self, lod_items):
732 """Exclude the branch described by LOD_ITEMS, including its revisions.
734 (Do not update the LOD_ITEMS instance itself.)
736 If the LOD starts with non-trunk default branch revisions, leave
737 the branch and the NTDB revisions in place, but delete any
738 subsequent revisions that are not NTDB revisions. In this case,
739 return True; otherwise return False"""
741 if lod_items.cvs_revisions and lod_items.cvs_revisions[0].ntdbr:
742 for cvs_rev in lod_items.cvs_revisions:
743 if not cvs_rev.ntdbr:
744 # We've found the first non-NTDBR, and it's stored in cvs_rev:
745 break
746 else:
747 # There was no revision following the NTDBRs:
748 cvs_rev = None
750 if cvs_rev:
751 last_ntdbr = self[cvs_rev.prev_id]
752 last_ntdbr.next_id = None
753 while True:
754 del self[cvs_rev.id]
755 if cvs_rev.next_id is None:
756 break
757 cvs_rev = self[cvs_rev.next_id]
759 return True
761 else:
762 if lod_items.cvs_branch is not None:
763 # Delete the CVSBranch itself:
764 cvs_branch = lod_items.cvs_branch
766 del self[cvs_branch.id]
768 # A CVSBranch is the successor of the CVSRevision that it
769 # sprouts from. Delete this branch from that revision's
770 # branch_ids:
771 self[cvs_branch.source_id].branch_ids.remove(cvs_branch.id)
773 if lod_items.cvs_revisions:
774 # The first CVSRevision on the branch has to be either detached
775 # from the revision from which the branch sprang, or removed
776 # from self.root_ids:
777 cvs_rev = lod_items.cvs_revisions[0]
778 if cvs_rev.prev_id is None:
779 self.root_ids.remove(cvs_rev.id)
780 else:
781 self[cvs_rev.prev_id].branch_commit_ids.remove(cvs_rev.id)
783 for cvs_rev in lod_items.cvs_revisions:
784 del self[cvs_rev.id]
786 return False
788 def graft_ntdbr_to_trunk(self):
789 """Graft the non-trunk default branch revisions to trunk.
791 They should already be alone on a branch that may or may not have
792 a CVSBranch connecting it to trunk."""
794 for lod_items in self.iter_lods():
795 if lod_items.cvs_revisions and lod_items.cvs_revisions[0].ntdbr:
796 assert lod_items.is_pure_ntdb()
798 first_rev = lod_items.cvs_revisions[0]
799 last_rev = lod_items.cvs_revisions[-1]
800 rev_1_1 = self.get(first_rev.prev_id)
801 rev_1_2 = self.get(last_rev.ntdbr_next_id)
803 self._sever_branch(lod_items)
805 if rev_1_1 is not None:
806 rev_1_1.next_id = first_rev.id
807 first_rev.prev_id = rev_1_1.id
809 self.root_ids.remove(first_rev.id)
811 first_rev.__class__ = cvs_revision_type_map[(
812 isinstance(first_rev, CVSRevisionModification),
813 isinstance(rev_1_1, CVSRevisionModification),
816 if rev_1_2 is not None:
817 rev_1_2.ntdbr_prev_id = None
818 last_rev.ntdbr_next_id = None
820 if rev_1_2.prev_id is None:
821 self.root_ids.remove(rev_1_2.id)
823 rev_1_2.prev_id = last_rev.id
824 last_rev.next_id = rev_1_2.id
826 # The effective_pred_id of rev_1_2 was not changed, so we
827 # don't have to change rev_1_2's type.
829 for cvs_rev in lod_items.cvs_revisions:
830 cvs_rev.ntdbr = False
831 cvs_rev.lod = self.trunk
833 for cvs_branch in lod_items.cvs_branches:
834 cvs_branch.source_lod = self.trunk
836 for cvs_tag in lod_items.cvs_tags:
837 cvs_tag.source_lod = self.trunk
839 return
841 def exclude_non_trunk(self):
842 """Delete all tags and branches."""
844 ntdbr_excluded = False
845 for lod_items in self.iter_lods():
846 for cvs_tag in lod_items.cvs_tags[:]:
847 self._exclude_tag(cvs_tag)
848 lod_items.cvs_tags.remove(cvs_tag)
850 if not isinstance(lod_items.lod, Trunk):
851 assert not lod_items.cvs_branches
853 ntdbr_excluded |= self._exclude_branch(lod_items)
855 if ntdbr_excluded:
856 self.graft_ntdbr_to_trunk()
858 def filter_excluded_symbols(self):
859 """Delete any excluded symbols and references to them."""
861 ntdbr_excluded = False
862 for lod_items in self.iter_lods():
863 # Delete any excluded tags:
864 for cvs_tag in lod_items.cvs_tags[:]:
865 if isinstance(cvs_tag.symbol, ExcludedSymbol):
866 self._exclude_tag(cvs_tag)
868 lod_items.cvs_tags.remove(cvs_tag)
870 # Delete the whole branch if it is to be excluded:
871 if isinstance(lod_items.lod, ExcludedSymbol):
872 # A symbol can only be excluded if no other symbols spring
873 # from it. This was already checked in CollateSymbolsPass, so
874 # these conditions should already be satisfied.
875 assert not list(lod_items.iter_blockers())
877 ntdbr_excluded |= self._exclude_branch(lod_items)
879 if ntdbr_excluded:
880 self.graft_ntdbr_to_trunk()
882 def _mutate_branch_to_tag(self, cvs_branch):
883 """Mutate the branch CVS_BRANCH into a tag."""
885 if cvs_branch.next_id is not None:
886 # This shouldn't happen because it was checked in
887 # CollateSymbolsPass:
888 raise FatalError('Attempt to exclude a branch with commits.')
889 cvs_tag = CVSTag(
890 cvs_branch.id, cvs_branch.cvs_file, cvs_branch.symbol,
891 cvs_branch.source_lod, cvs_branch.source_id,
892 cvs_branch.revision_reader_token,
894 self.add(cvs_tag)
895 cvs_revision = self[cvs_tag.source_id]
896 cvs_revision.branch_ids.remove(cvs_tag.id)
897 cvs_revision.tag_ids.append(cvs_tag.id)
899 def _mutate_tag_to_branch(self, cvs_tag):
900 """Mutate the tag into a branch."""
902 cvs_branch = CVSBranch(
903 cvs_tag.id, cvs_tag.cvs_file, cvs_tag.symbol,
904 None, cvs_tag.source_lod, cvs_tag.source_id, None,
905 cvs_tag.revision_reader_token,
907 self.add(cvs_branch)
908 cvs_revision = self[cvs_branch.source_id]
909 cvs_revision.tag_ids.remove(cvs_branch.id)
910 cvs_revision.branch_ids.append(cvs_branch.id)
912 def _mutate_symbol(self, cvs_symbol):
913 """Mutate CVS_SYMBOL if necessary."""
915 symbol = cvs_symbol.symbol
916 if isinstance(cvs_symbol, CVSBranch) and isinstance(symbol, Tag):
917 self._mutate_branch_to_tag(cvs_symbol)
918 elif isinstance(cvs_symbol, CVSTag) and isinstance(symbol, Branch):
919 self._mutate_tag_to_branch(cvs_symbol)
921 def mutate_symbols(self):
922 """Force symbols to be tags/branches based on self.symbol_db."""
924 for cvs_item in self.values():
925 if isinstance(cvs_item, CVSRevision):
926 # This CVSRevision may be affected by the mutation of any
927 # CVSSymbols that it references, but there is nothing to do
928 # here directly.
929 pass
930 elif isinstance(cvs_item, CVSSymbol):
931 self._mutate_symbol(cvs_item)
932 else:
933 raise RuntimeError('Unknown cvs item type')
935 def _adjust_tag_parent(self, cvs_tag):
936 """Adjust the parent of CVS_TAG if possible and preferred.
938 CVS_TAG is an instance of CVSTag. This method must be called in
939 leaf-to-trunk order."""
941 # The Symbol that cvs_tag would like to have as a parent:
942 preferred_parent = Ctx()._symbol_db.get_symbol(
943 cvs_tag.symbol.preferred_parent_id)
945 if cvs_tag.source_lod == preferred_parent:
946 # The preferred parent is already the parent.
947 return
949 # The CVSRevision that is its direct parent:
950 source = self[cvs_tag.source_id]
951 assert isinstance(source, CVSRevision)
953 if isinstance(preferred_parent, Trunk):
954 # It is not possible to graft *onto* Trunk:
955 return
957 # Try to find the preferred parent among the possible parents:
958 for branch_id in source.branch_ids:
959 if self[branch_id].symbol == preferred_parent:
960 # We found it!
961 break
962 else:
963 # The preferred parent is not a possible parent in this file.
964 return
966 parent = self[branch_id]
967 assert isinstance(parent, CVSBranch)
969 Log().debug('Grafting %s from %s (on %s) onto %s' % (
970 cvs_tag, source, source.lod, parent,))
971 # Switch parent:
972 source.tag_ids.remove(cvs_tag.id)
973 parent.tag_ids.append(cvs_tag.id)
974 cvs_tag.source_lod = parent.symbol
975 cvs_tag.source_id = parent.id
977 def _adjust_branch_parents(self, cvs_branch):
978 """Adjust the parent of CVS_BRANCH if possible and preferred.
980 CVS_BRANCH is an instance of CVSBranch. This method must be
981 called in leaf-to-trunk order."""
983 # The Symbol that cvs_branch would like to have as a parent:
984 preferred_parent = Ctx()._symbol_db.get_symbol(
985 cvs_branch.symbol.preferred_parent_id)
987 if cvs_branch.source_lod == preferred_parent:
988 # The preferred parent is already the parent.
989 return
991 # The CVSRevision that is its direct parent:
992 source = self[cvs_branch.source_id]
993 # This is always a CVSRevision because we haven't adjusted it yet:
994 assert isinstance(source, CVSRevision)
996 if isinstance(preferred_parent, Trunk):
997 # It is not possible to graft *onto* Trunk:
998 return
1000 # Try to find the preferred parent among the possible parents:
1001 for branch_id in source.branch_ids:
1002 possible_parent = self[branch_id]
1003 if possible_parent.symbol == preferred_parent:
1004 # We found it!
1005 break
1006 elif possible_parent.symbol == cvs_branch.symbol:
1007 # Only branches that precede the branch to be adjusted are
1008 # considered possible parents. Leave parentage unchanged:
1009 return
1010 else:
1011 # This point should never be reached.
1012 raise InternalError(
1013 'Possible parent search did not terminate as expected')
1015 parent = possible_parent
1016 assert isinstance(parent, CVSBranch)
1018 Log().debug('Grafting %s from %s (on %s) onto %s' % (
1019 cvs_branch, source, source.lod, parent,))
1020 # Switch parent:
1021 source.branch_ids.remove(cvs_branch.id)
1022 parent.branch_ids.append(cvs_branch.id)
1023 cvs_branch.source_lod = parent.symbol
1024 cvs_branch.source_id = parent.id
1026 def adjust_parents(self):
1027 """Adjust the parents of symbols to their preferred parents.
1029 If a CVSSymbol has a preferred parent that is different than its
1030 current parent, and if the preferred parent is an allowed parent
1031 of the CVSSymbol in this file, then graft the CVSSymbol onto its
1032 preferred parent."""
1034 for lod_items in self.iter_lods():
1035 for cvs_tag in lod_items.cvs_tags:
1036 self._adjust_tag_parent(cvs_tag)
1038 for cvs_branch in lod_items.cvs_branches:
1039 self._adjust_branch_parents(cvs_branch)
1041 def _get_revision_source(self, cvs_symbol):
1042 """Return the CVSRevision that is the ultimate source of CVS_SYMBOL."""
1044 while True:
1045 cvs_item = self[cvs_symbol.source_id]
1046 if isinstance(cvs_item, CVSRevision):
1047 return cvs_item
1048 else:
1049 cvs_symbol = cvs_item
1051 def refine_symbols(self):
1052 """Refine the types of the CVSSymbols in this file.
1054 Adjust the symbol types based on whether the source exists:
1055 CVSBranch vs. CVSBranchNoop and CVSTag vs. CVSTagNoop."""
1057 for lod_items in self.iter_lods():
1058 for cvs_tag in lod_items.cvs_tags:
1059 source = self._get_revision_source(cvs_tag)
1060 cvs_tag.__class__ = cvs_tag_type_map[
1061 isinstance(source, CVSRevisionModification)
1064 for cvs_branch in lod_items.cvs_branches:
1065 source = self._get_revision_source(cvs_branch)
1066 cvs_branch.__class__ = cvs_branch_type_map[
1067 isinstance(source, CVSRevisionModification)
1070 def determine_revision_properties(self, revision_property_setters):
1071 """Set the properties field for any CVSRevisions."""
1073 for lod_items in self.iter_lods():
1074 for cvs_rev in lod_items.cvs_revisions:
1075 cvs_rev.properties = {}
1076 for revision_property_setter in revision_property_setters:
1077 revision_property_setter.set_properties(cvs_rev)
1079 def record_opened_symbols(self):
1080 """Set CVSRevision.opened_symbols for the surviving revisions."""
1082 for cvs_item in self.values():
1083 if isinstance(cvs_item, (CVSRevision, CVSBranch)):
1084 cvs_item.opened_symbols = []
1085 for cvs_symbol_opened_id in cvs_item.get_cvs_symbol_ids_opened():
1086 cvs_symbol_opened = self[cvs_symbol_opened_id]
1087 cvs_item.opened_symbols.append(
1088 (cvs_symbol_opened.symbol.id, cvs_symbol_opened.id,)
1091 def record_closed_symbols(self):
1092 """Set CVSRevision.closed_symbols for the surviving revisions.
1094 A CVSRevision closes the symbols that were opened by the CVSItems
1095 that the CVSRevision closes. Got it?
1097 This method must be called after record_opened_symbols()."""
1099 for cvs_item in self.values():
1100 if isinstance(cvs_item, CVSRevision):
1101 cvs_item.closed_symbols = []
1102 for cvs_item_closed_id in cvs_item.get_ids_closed():
1103 cvs_item_closed = self[cvs_item_closed_id]
1104 cvs_item.closed_symbols.extend(cvs_item_closed.opened_symbols)