Add a way to specify the MimeMapper mappings to its constructor directly.
[cvs2svn.git] / cvs2svn_lib / cvs_file_items.py
blob6b80d21260e12ab4326f020ea0d213605b7e312e
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2006-2008 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains a class to manage the CVSItems related to one file."""
20 import re
22 from cvs2svn_lib.common import InternalError
23 from cvs2svn_lib.common import FatalError
24 from cvs2svn_lib.context import Ctx
25 from cvs2svn_lib.log import Log
26 from cvs2svn_lib.symbol import Trunk
27 from cvs2svn_lib.symbol import Branch
28 from cvs2svn_lib.symbol import Tag
29 from cvs2svn_lib.symbol import ExcludedSymbol
30 from cvs2svn_lib.cvs_item import CVSRevision
31 from cvs2svn_lib.cvs_item import CVSRevisionModification
32 from cvs2svn_lib.cvs_item import CVSRevisionAbsent
33 from cvs2svn_lib.cvs_item import CVSRevisionNoop
34 from cvs2svn_lib.cvs_item import CVSSymbol
35 from cvs2svn_lib.cvs_item import CVSBranch
36 from cvs2svn_lib.cvs_item import CVSTag
37 from cvs2svn_lib.cvs_item import cvs_revision_type_map
38 from cvs2svn_lib.cvs_item import cvs_branch_type_map
39 from cvs2svn_lib.cvs_item import cvs_tag_type_map
42 class VendorBranchError(Exception):
43 """There is an error in the structure of the file revision tree."""
45 pass
48 class LODItems(object):
49 def __init__(self, lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags):
50 # The LineOfDevelopment described by this instance.
51 self.lod = lod
53 # The CVSBranch starting this LOD, if any; otherwise, None.
54 self.cvs_branch = cvs_branch
56 # The list of CVSRevisions on this LOD, if any. The CVSRevisions
57 # are listed in dependency order.
58 self.cvs_revisions = cvs_revisions
60 # A list of CVSBranches that sprout from this LOD (either from
61 # cvs_branch or from one of the CVSRevisions).
62 self.cvs_branches = cvs_branches
64 # A list of CVSTags that sprout from this LOD (either from
65 # cvs_branch or from one of the CVSRevisions).
66 self.cvs_tags = cvs_tags
68 def is_trivial_import(self):
69 """Return True iff this LOD is a trivial import branch in this file.
71 A trivial import branch is a branch that was used for a single
72 import and nothing else. Such a branch is eligible for being
73 grafted onto trunk, even if it has branch blockers."""
75 return (
76 len(self.cvs_revisions) == 1
77 and self.cvs_revisions[0].ntdbr
80 def is_pure_ntdb(self):
81 """Return True iff this LOD is a pure NTDB in this file.
83 A pure non-trunk default branch is defined to be a branch that
84 contains only NTDB revisions (and at least one of them). Such a
85 branch is eligible for being grafted onto trunk, even if it has
86 branch blockers."""
88 return (
89 self.cvs_revisions
90 and self.cvs_revisions[-1].ntdbr
93 def iter_blockers(self):
94 if self.is_pure_ntdb():
95 # Such a branch has no blockers, because the blockers can be
96 # grafted to trunk.
97 pass
98 else:
99 # Other branches are only blocked by symbols that sprout from
100 # non-NTDB revisions:
101 non_ntdbr_revision_ids = set()
102 for cvs_revision in self.cvs_revisions:
103 if not cvs_revision.ntdbr:
104 non_ntdbr_revision_ids.add(cvs_revision.id)
106 for cvs_tag in self.cvs_tags:
107 if cvs_tag.source_id in non_ntdbr_revision_ids:
108 yield cvs_tag
110 for cvs_branch in self.cvs_branches:
111 if cvs_branch.source_id in non_ntdbr_revision_ids:
112 yield cvs_branch
115 class CVSFileItems(object):
116 def __init__(self, cvs_file, trunk, cvs_items, original_ids=None):
117 # The file whose data this instance holds.
118 self.cvs_file = cvs_file
120 # The symbol that represents "Trunk" in this file.
121 self.trunk = trunk
123 # A map from CVSItem.id to CVSItem:
124 self._cvs_items = {}
126 # The cvs_item_id of each root in the CVSItem forest. (A root is
127 # defined to be any CVSRevision with no prev_id.)
128 self.root_ids = set()
130 for cvs_item in cvs_items:
131 self.add(cvs_item)
132 if isinstance(cvs_item, CVSRevision) and cvs_item.prev_id is None:
133 self.root_ids.add(cvs_item.id)
135 # self.original_ids is a dict {cvs_rev.rev : cvs_rev.id} holding
136 # the IDs originally allocated to each CVS revision number. This
137 # member is stored for the convenience of RevisionManagers.
138 if original_ids is not None:
139 self.original_ids = original_ids
140 else:
141 self.original_ids = {}
142 for cvs_item in cvs_items:
143 if isinstance(cvs_item, CVSRevision):
144 self.original_ids[cvs_item.rev] = cvs_item.id
146 def __getstate__(self):
147 return (self.cvs_file.id, self.values(), self.original_ids,)
149 def __setstate__(self, state):
150 (cvs_file_id, cvs_items, original_ids,) = state
151 cvs_file = Ctx()._cvs_path_db.get_path(cvs_file_id)
152 CVSFileItems.__init__(
153 self, cvs_file, cvs_file.project.get_trunk(), cvs_items,
154 original_ids=original_ids,
157 def add(self, cvs_item):
158 self._cvs_items[cvs_item.id] = cvs_item
160 def __getitem__(self, id):
161 """Return the CVSItem with the specified ID."""
163 return self._cvs_items[id]
165 def get(self, id, default=None):
166 return self._cvs_items.get(id, default)
168 def __delitem__(self, id):
169 assert id not in self.root_ids
170 del self._cvs_items[id]
172 def values(self):
173 return self._cvs_items.values()
175 def check_link_consistency(self):
176 """Check that the CVSItems are linked correctly with each other."""
178 for cvs_item in self.values():
179 try:
180 cvs_item.check_links(self)
181 except AssertionError:
182 Log().error(
183 'Link consistency error in %s\n'
184 'This is probably a bug internal to cvs2svn. Please file a bug\n'
185 'report including the following stack trace (see FAQ for more '
186 'info).'
187 % (cvs_item,))
188 raise
190 def _get_lod(self, lod, cvs_branch, start_id):
191 """Return the indicated LODItems.
193 LOD is the corresponding LineOfDevelopment. CVS_BRANCH is the
194 CVSBranch instance that starts the LOD if any; otherwise it is
195 None. START_ID is the id of the first CVSRevision on this LOD, or
196 None if there are none."""
198 cvs_revisions = []
199 cvs_branches = []
200 cvs_tags = []
202 def process_subitems(cvs_item):
203 """Process the branches and tags that are rooted in CVS_ITEM.
205 CVS_ITEM can be a CVSRevision or a CVSBranch."""
207 for branch_id in cvs_item.branch_ids[:]:
208 cvs_branches.append(self[branch_id])
210 for tag_id in cvs_item.tag_ids:
211 cvs_tags.append(self[tag_id])
213 if cvs_branch is not None:
214 # Include the symbols sprouting directly from the CVSBranch:
215 process_subitems(cvs_branch)
217 id = start_id
218 while id is not None:
219 cvs_rev = self[id]
220 cvs_revisions.append(cvs_rev)
221 process_subitems(cvs_rev)
222 id = cvs_rev.next_id
224 return LODItems(lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags)
226 def get_lod_items(self, cvs_branch):
227 """Return an LODItems describing the branch that starts at CVS_BRANCH.
229 CVS_BRANCH must be an instance of CVSBranch contained in this
230 CVSFileItems."""
232 return self._get_lod(cvs_branch.symbol, cvs_branch, cvs_branch.next_id)
234 def iter_root_lods(self):
235 """Iterate over the LODItems for all root LODs (non-recursively)."""
237 for id in list(self.root_ids):
238 cvs_item = self[id]
239 if isinstance(cvs_item, CVSRevision):
240 # This LOD doesn't have a CVSBranch associated with it.
241 # Either it is Trunk, or it is a branch whose CVSBranch has
242 # been deleted.
243 yield self._get_lod(cvs_item.lod, None, id)
244 elif isinstance(cvs_item, CVSBranch):
245 # This is a Branch that has been severed from the rest of the
246 # tree.
247 yield self._get_lod(cvs_item.symbol, cvs_item, cvs_item.next_id)
248 else:
249 raise InternalError('Unexpected root item: %s' % (cvs_item,))
251 def _iter_tree(self, lod, cvs_branch, start_id):
252 """Iterate over the tree that starts at the specified line of development.
254 LOD is the LineOfDevelopment where the iteration should start.
255 CVS_BRANCH is the CVSBranch instance that starts the LOD if any;
256 otherwise it is None. ID is the id of the first CVSRevision on
257 this LOD, or None if there are none.
259 There are two cases handled by this routine: trunk (where LOD is a
260 Trunk instance, CVS_BRANCH is None, and ID is the id of the 1.1
261 revision) and a branch (where LOD is a Branch instance, CVS_BRANCH
262 is a CVSBranch instance, and ID is either the id of the first
263 CVSRevision on the branch or None if there are no CVSRevisions on
264 the branch). Note that CVS_BRANCH and ID cannot simultaneously be
265 None.
267 Yield an LODItems instance for each line of development."""
269 cvs_revisions = []
270 cvs_branches = []
271 cvs_tags = []
273 def process_subitems(cvs_item):
274 """Process the branches and tags that are rooted in CVS_ITEM.
276 CVS_ITEM can be a CVSRevision or a CVSBranch."""
278 for branch_id in cvs_item.branch_ids[:]:
279 # Recurse into the branch:
280 branch = self[branch_id]
281 for lod_items in self._iter_tree(
282 branch.symbol, branch, branch.next_id
284 yield lod_items
285 # The caller might have deleted the branch that we just
286 # yielded. If it is no longer present, then do not add it to
287 # the list of cvs_branches.
288 try:
289 cvs_branches.append(self[branch_id])
290 except KeyError:
291 pass
293 for tag_id in cvs_item.tag_ids:
294 cvs_tags.append(self[tag_id])
296 if cvs_branch is not None:
297 # Include the symbols sprouting directly from the CVSBranch:
298 for lod_items in process_subitems(cvs_branch):
299 yield lod_items
301 id = start_id
302 while id is not None:
303 cvs_rev = self[id]
304 cvs_revisions.append(cvs_rev)
306 for lod_items in process_subitems(cvs_rev):
307 yield lod_items
309 id = cvs_rev.next_id
311 yield LODItems(lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags)
313 def iter_lods(self):
314 """Iterate over LinesOfDevelopment in this file, in depth-first order.
316 For each LOD, yield an LODItems instance. The traversal starts at
317 each root node but returns the LODs in depth-first order.
319 It is allowed to modify the CVSFileItems instance while the
320 traversal is occurring, but only in ways that don't affect the
321 tree structure above (i.e., towards the trunk from) the current
322 LOD."""
324 # Make a list out of root_ids so that callers can change it:
325 for id in list(self.root_ids):
326 cvs_item = self[id]
327 if isinstance(cvs_item, CVSRevision):
328 # This LOD doesn't have a CVSBranch associated with it.
329 # Either it is Trunk, or it is a branch whose CVSBranch has
330 # been deleted.
331 lod = cvs_item.lod
332 cvs_branch = None
333 elif isinstance(cvs_item, CVSBranch):
334 # This is a Branch that has been severed from the rest of the
335 # tree.
336 lod = cvs_item.symbol
337 id = cvs_item.next_id
338 cvs_branch = cvs_item
339 else:
340 raise InternalError('Unexpected root item: %s' % (cvs_item,))
342 for lod_items in self._iter_tree(lod, cvs_branch, id):
343 yield lod_items
345 def iter_deltatext_ancestors(self, cvs_rev):
346 """Generate the delta-dependency ancestors of CVS_REV.
348 Generate then ancestors of CVS_REV in deltatext order; i.e., back
349 along branches towards trunk, then outwards along trunk towards
350 HEAD."""
352 while True:
353 # Determine the next candidate source revision:
354 if isinstance(cvs_rev.lod, Trunk):
355 if cvs_rev.next_id is None:
356 # HEAD has no ancestors, so we are done:
357 return
358 else:
359 cvs_rev = self[cvs_rev.next_id]
360 else:
361 cvs_rev = self[cvs_rev.prev_id]
363 yield cvs_rev
365 def _sever_branch(self, lod_items):
366 """Sever the branch from its source and discard the CVSBranch.
368 LOD_ITEMS describes a branch that should be severed from its
369 source, deleting the CVSBranch and creating a new root. Also set
370 LOD_ITEMS.cvs_branch to None.
372 If LOD_ITEMS has no source (e.g., because it is the trunk branch
373 or because it has already been severed), do nothing.
375 This method can only be used before symbols have been grafted onto
376 CVSBranches. It does not adjust NTDBR, NTDBR_PREV_ID or
377 NTDBR_NEXT_ID even if LOD_ITEMS describes a NTDB."""
379 cvs_branch = lod_items.cvs_branch
380 if cvs_branch is None:
381 return
383 assert not cvs_branch.tag_ids
384 assert not cvs_branch.branch_ids
385 source_rev = self[cvs_branch.source_id]
387 # We only cover the following case, even though after
388 # FilterSymbolsPass cvs_branch.source_id might refer to another
389 # CVSBranch.
390 assert isinstance(source_rev, CVSRevision)
392 # Delete the CVSBranch itself:
393 lod_items.cvs_branch = None
394 del self[cvs_branch.id]
396 # Delete the reference from the source revision to the CVSBranch:
397 source_rev.branch_ids.remove(cvs_branch.id)
399 # Delete the reference from the first revision on the branch to
400 # the CVSBranch:
401 if lod_items.cvs_revisions:
402 first_rev = lod_items.cvs_revisions[0]
404 # Delete the reference from first_rev to the CVSBranch:
405 first_rev.first_on_branch_id = None
407 # Delete the reference from the source revision to the first
408 # revision on the branch:
409 source_rev.branch_commit_ids.remove(first_rev.id)
411 # ...and vice versa:
412 first_rev.prev_id = None
414 # Change the type of first_rev (e.g., from Change to Add):
415 first_rev.__class__ = cvs_revision_type_map[
416 (isinstance(first_rev, CVSRevisionModification), False,)
419 # Now first_rev is a new root:
420 self.root_ids.add(first_rev.id)
422 def adjust_ntdbrs(self, ntdbr_cvs_revs):
423 """Adjust the specified non-trunk default branch revisions.
425 NTDBR_CVS_REVS is a list of CVSRevision instances in this file
426 that have been determined to be non-trunk default branch
427 revisions.
429 The first revision on the default branch is handled strangely by
430 CVS. If a file is imported (as opposed to being added), CVS
431 creates a 1.1 revision, then creates a vendor branch 1.1.1 based
432 on 1.1, then creates a 1.1.1.1 revision that is identical to the
433 1.1 revision (i.e., its deltatext is empty). The log message that
434 the user typed when importing is stored with the 1.1.1.1 revision.
435 The 1.1 revision always contains a standard, generated log
436 message, 'Initial revision\n'.
438 When we detect a straightforward import like this, we want to
439 handle it by deleting the 1.1 revision (which doesn't contain any
440 useful information) and making 1.1.1.1 into an independent root in
441 the file's dependency tree. In SVN, 1.1.1.1 will be added
442 directly to the vendor branch with its initial content. Then in a
443 special 'post-commit', the 1.1.1.1 revision is copied back to
444 trunk.
446 If the user imports again to the same vendor branch, then CVS
447 creates revisions 1.1.1.2, 1.1.1.3, etc. on the vendor branch,
448 *without* counterparts in trunk (even though these revisions
449 effectively play the role of trunk revisions). So after we add
450 such revisions to the vendor branch, we also copy them back to
451 trunk in post-commits.
453 Set the ntdbr members of the revisions listed in NTDBR_CVS_REVS to
454 True. Also, if there is a 1.2 revision, then set that revision to
455 depend on the last non-trunk default branch revision and possibly
456 adjust its type accordingly."""
458 for cvs_rev in ntdbr_cvs_revs:
459 cvs_rev.ntdbr = True
461 # Look for a 1.2 revision:
462 rev_1_1 = self[ntdbr_cvs_revs[0].prev_id]
464 rev_1_2 = self.get(rev_1_1.next_id)
465 if rev_1_2 is not None:
466 # Revision 1.2 logically follows the imported revisions, not
467 # 1.1. Accordingly, connect it to the last NTDBR and possibly
468 # change its type.
469 last_ntdbr = ntdbr_cvs_revs[-1]
470 rev_1_2.ntdbr_prev_id = last_ntdbr.id
471 last_ntdbr.ntdbr_next_id = rev_1_2.id
472 rev_1_2.__class__ = cvs_revision_type_map[(
473 isinstance(rev_1_2, CVSRevisionModification),
474 isinstance(last_ntdbr, CVSRevisionModification),
477 def process_live_ntdb(self, vendor_lod_items):
478 """VENDOR_LOD_ITEMS is a live default branch; process it.
480 In this case, all revisions on the default branch are NTDBRs and
481 it is an error if there is also a '1.2' revision.
483 Return True iff this transformation really does something. Raise
484 a VendorBranchError if there is a '1.2' revision."""
486 rev_1_1 = self[vendor_lod_items.cvs_branch.source_id]
487 rev_1_2_id = rev_1_1.next_id
488 if rev_1_2_id is not None:
489 raise VendorBranchError(
490 'File \'%s\' has default branch=%s but also a revision %s'
491 % (self.cvs_file.filename,
492 vendor_lod_items.cvs_branch.branch_number, self[rev_1_2_id].rev,)
495 ntdbr_cvs_revs = list(vendor_lod_items.cvs_revisions)
497 if ntdbr_cvs_revs:
498 self.adjust_ntdbrs(ntdbr_cvs_revs)
499 return True
500 else:
501 return False
503 def process_historical_ntdb(self, vendor_lod_items):
504 """There appears to have been a non-trunk default branch in the past.
506 There is currently no default branch, but the branch described by
507 file appears to have been imported. So our educated guess is that
508 all revisions on the '1.1.1' branch (described by
509 VENDOR_LOD_ITEMS) with timestamps prior to the timestamp of '1.2'
510 were non-trunk default branch revisions.
512 Return True iff this transformation really does something.
514 This really only handles standard '1.1.1.*'-style vendor
515 revisions. One could conceivably have a file whose default branch
516 is 1.1.3 or whatever, or was that at some point in time, with
517 vendor revisions 1.1.3.1, 1.1.3.2, etc. But with the default
518 branch gone now, we'd have no basis for assuming that the
519 non-standard vendor branch had ever been the default branch
520 anyway.
522 Note that we rely on comparisons between the timestamps of the
523 revisions on the vendor branch and that of revision 1.2, even
524 though the timestamps might be incorrect due to clock skew. We
525 could do a slightly better job if we used the changeset
526 timestamps, as it is possible that the dependencies that went into
527 determining those timestamps are more accurate. But that would
528 require an extra pass or two."""
530 rev_1_1 = self[vendor_lod_items.cvs_branch.source_id]
531 rev_1_2_id = rev_1_1.next_id
533 if rev_1_2_id is None:
534 rev_1_2_timestamp = None
535 else:
536 rev_1_2_timestamp = self[rev_1_2_id].timestamp
538 ntdbr_cvs_revs = []
539 for cvs_rev in vendor_lod_items.cvs_revisions:
540 if rev_1_2_timestamp is not None \
541 and cvs_rev.timestamp >= rev_1_2_timestamp:
542 # That's the end of the once-default branch.
543 break
544 ntdbr_cvs_revs.append(cvs_rev)
546 if ntdbr_cvs_revs:
547 self.adjust_ntdbrs(ntdbr_cvs_revs)
548 return True
549 else:
550 return False
552 def imported_remove_1_1(self, vendor_lod_items):
553 """This file was imported. Remove the 1.1 revision if possible.
555 VENDOR_LOD_ITEMS is the LODItems instance for the vendor branch.
556 See adjust_ntdbrs() for more information."""
558 assert vendor_lod_items.cvs_revisions
559 cvs_rev = vendor_lod_items.cvs_revisions[0]
561 if isinstance(cvs_rev, CVSRevisionModification) \
562 and not cvs_rev.deltatext_exists:
563 cvs_branch = vendor_lod_items.cvs_branch
564 rev_1_1 = self[cvs_branch.source_id]
565 assert isinstance(rev_1_1, CVSRevision)
566 Log().debug('Removing unnecessary revision %s' % (rev_1_1,))
568 # Delete the 1.1.1 CVSBranch and sever the vendor branch from trunk:
569 self._sever_branch(vendor_lod_items)
571 # Delete rev_1_1:
572 self.root_ids.remove(rev_1_1.id)
573 del self[rev_1_1.id]
574 rev_1_2_id = rev_1_1.next_id
575 if rev_1_2_id is not None:
576 rev_1_2 = self[rev_1_2_id]
577 rev_1_2.prev_id = None
578 self.root_ids.add(rev_1_2.id)
580 # Move any tags and branches from rev_1_1 to cvs_rev:
581 cvs_rev.tag_ids.extend(rev_1_1.tag_ids)
582 for id in rev_1_1.tag_ids:
583 cvs_tag = self[id]
584 cvs_tag.source_lod = cvs_rev.lod
585 cvs_tag.source_id = cvs_rev.id
586 cvs_rev.branch_ids[0:0] = rev_1_1.branch_ids
587 for id in rev_1_1.branch_ids:
588 cvs_branch = self[id]
589 cvs_branch.source_lod = cvs_rev.lod
590 cvs_branch.source_id = cvs_rev.id
591 cvs_rev.branch_commit_ids[0:0] = rev_1_1.branch_commit_ids
592 for id in rev_1_1.branch_commit_ids:
593 cvs_rev2 = self[id]
594 cvs_rev2.prev_id = cvs_rev.id
596 def _is_unneeded_initial_trunk_delete(self, cvs_item, metadata_db):
597 if not isinstance(cvs_item, CVSRevisionNoop):
598 # This rule can only be applied to dead revisions.
599 return False
601 if cvs_item.rev != '1.1':
602 return False
604 if not isinstance(cvs_item.lod, Trunk):
605 return False
607 if cvs_item.closed_symbols:
608 return False
610 if cvs_item.ntdbr:
611 return False
613 log_msg = metadata_db[cvs_item.metadata_id].log_msg
614 return bool(re.match(
615 r'file .* was initially added on branch .*\.\n$',
616 log_msg,
619 def remove_unneeded_initial_trunk_delete(self, metadata_db):
620 """Remove unneeded deletes for this file.
622 If a file is added on a branch, then a trunk revision is added at
623 the same time in the 'Dead' state. This revision doesn't do
624 anything useful, so delete it."""
626 for id in self.root_ids:
627 cvs_item = self[id]
628 if self._is_unneeded_initial_trunk_delete(cvs_item, metadata_db):
629 Log().debug('Removing unnecessary delete %s' % (cvs_item,))
631 # Sever any CVSBranches rooted at cvs_item.
632 for cvs_branch_id in cvs_item.branch_ids[:]:
633 cvs_branch = self[cvs_branch_id]
634 self._sever_branch(self.get_lod_items(cvs_branch))
636 # Tagging a dead revision doesn't do anything, so remove any
637 # CVSTags that refer to cvs_item:
638 while cvs_item.tag_ids:
639 del self[cvs_item.tag_ids.pop()]
641 # Now delete cvs_item itself:
642 self.root_ids.remove(cvs_item.id)
643 del self[cvs_item.id]
644 if cvs_item.next_id is not None:
645 cvs_rev_next = self[cvs_item.next_id]
646 cvs_rev_next.prev_id = None
647 self.root_ids.add(cvs_rev_next.id)
649 # This can only happen once per file, so we're done:
650 return
652 def _is_unneeded_initial_branch_delete(self, lod_items, metadata_db):
653 """Return True iff the initial revision in LOD_ITEMS can be deleted."""
655 if not lod_items.cvs_revisions:
656 return False
658 cvs_revision = lod_items.cvs_revisions[0]
660 if cvs_revision.ntdbr:
661 return False
663 if not isinstance(cvs_revision, CVSRevisionAbsent):
664 return False
666 if cvs_revision.branch_ids:
667 return False
669 log_msg = metadata_db[cvs_revision.metadata_id].log_msg
670 return bool(re.match(
671 r'file .* was added on branch .* on '
672 r'\d{4}\-\d{2}\-\d{2} \d{2}\:\d{2}\:\d{2}( [\+\-]\d{4})?'
673 '\n$',
674 log_msg,
677 def remove_initial_branch_deletes(self, metadata_db):
678 """If the first revision on a branch is an unnecessary delete, remove it.
680 If a file is added on a branch (whether or not it already existed
681 on trunk), then new versions of CVS add a first branch revision in
682 the 'dead' state (to indicate that the file did not exist on the
683 branch when the branch was created) followed by the second branch
684 revision, which is an add. When we encounter this situation, we
685 sever the branch from trunk and delete the first branch
686 revision."""
688 for lod_items in self.iter_lods():
689 if self._is_unneeded_initial_branch_delete(lod_items, metadata_db):
690 cvs_revision = lod_items.cvs_revisions[0]
691 Log().debug(
692 'Removing unnecessary initial branch delete %s' % (cvs_revision,)
695 # Sever the branch from its source if necessary:
696 self._sever_branch(lod_items)
698 # Delete the first revision on the branch:
699 self.root_ids.remove(cvs_revision.id)
700 del self[cvs_revision.id]
702 # If it had a successor, adjust its backreference and add it
703 # to the root_ids:
704 if cvs_revision.next_id is not None:
705 cvs_rev_next = self[cvs_revision.next_id]
706 cvs_rev_next.prev_id = None
707 self.root_ids.add(cvs_rev_next.id)
709 # Tagging a dead revision doesn't do anything, so remove any
710 # tags that were set on it:
711 for tag_id in cvs_revision.tag_ids:
712 del self[tag_id]
714 def _exclude_tag(self, cvs_tag):
715 """Exclude the specified CVS_TAG."""
717 del self[cvs_tag.id]
719 # A CVSTag is the successor of the CVSRevision that it
720 # sprouts from. Delete this tag from that revision's
721 # tag_ids:
722 self[cvs_tag.source_id].tag_ids.remove(cvs_tag.id)
724 def _exclude_branch(self, lod_items):
725 """Exclude the branch described by LOD_ITEMS, including its revisions.
727 (Do not update the LOD_ITEMS instance itself.)
729 If the LOD starts with non-trunk default branch revisions, leave
730 the branch and the NTDB revisions in place, but delete any
731 subsequent revisions that are not NTDB revisions. In this case,
732 return True; otherwise return False"""
734 if lod_items.cvs_revisions and lod_items.cvs_revisions[0].ntdbr:
735 for cvs_rev in lod_items.cvs_revisions:
736 if not cvs_rev.ntdbr:
737 # We've found the first non-NTDBR, and it's stored in cvs_rev:
738 break
739 else:
740 # There was no revision following the NTDBRs:
741 cvs_rev = None
743 if cvs_rev:
744 last_ntdbr = self[cvs_rev.prev_id]
745 last_ntdbr.next_id = None
746 while True:
747 del self[cvs_rev.id]
748 if cvs_rev.next_id is None:
749 break
750 cvs_rev = self[cvs_rev.next_id]
752 return True
754 else:
755 if lod_items.cvs_branch is not None:
756 # Delete the CVSBranch itself:
757 cvs_branch = lod_items.cvs_branch
759 del self[cvs_branch.id]
761 # A CVSBranch is the successor of the CVSRevision that it
762 # sprouts from. Delete this branch from that revision's
763 # branch_ids:
764 self[cvs_branch.source_id].branch_ids.remove(cvs_branch.id)
766 if lod_items.cvs_revisions:
767 # The first CVSRevision on the branch has to be either detached
768 # from the revision from which the branch sprang, or removed
769 # from self.root_ids:
770 cvs_rev = lod_items.cvs_revisions[0]
771 if cvs_rev.prev_id is None:
772 self.root_ids.remove(cvs_rev.id)
773 else:
774 self[cvs_rev.prev_id].branch_commit_ids.remove(cvs_rev.id)
776 for cvs_rev in lod_items.cvs_revisions:
777 del self[cvs_rev.id]
779 return False
781 def graft_ntdbr_to_trunk(self):
782 """Graft the non-trunk default branch revisions to trunk.
784 They should already be alone on a branch that may or may not have
785 a CVSBranch connecting it to trunk."""
787 for lod_items in self.iter_lods():
788 if lod_items.cvs_revisions and lod_items.cvs_revisions[0].ntdbr:
789 assert lod_items.is_pure_ntdb()
791 first_rev = lod_items.cvs_revisions[0]
792 last_rev = lod_items.cvs_revisions[-1]
793 rev_1_1 = self.get(first_rev.prev_id)
794 rev_1_2 = self.get(last_rev.ntdbr_next_id)
796 self._sever_branch(lod_items)
798 if rev_1_1 is not None:
799 rev_1_1.next_id = first_rev.id
800 first_rev.prev_id = rev_1_1.id
802 self.root_ids.remove(first_rev.id)
804 first_rev.__class__ = cvs_revision_type_map[(
805 isinstance(first_rev, CVSRevisionModification),
806 isinstance(rev_1_1, CVSRevisionModification),
809 if rev_1_2 is not None:
810 rev_1_2.ntdbr_prev_id = None
811 last_rev.ntdbr_next_id = None
813 if rev_1_2.prev_id is None:
814 self.root_ids.remove(rev_1_2.id)
816 rev_1_2.prev_id = last_rev.id
817 last_rev.next_id = rev_1_2.id
819 # The effective_pred_id of rev_1_2 was not changed, so we
820 # don't have to change rev_1_2's type.
822 for cvs_rev in lod_items.cvs_revisions:
823 cvs_rev.ntdbr = False
824 cvs_rev.lod = self.trunk
826 for cvs_branch in lod_items.cvs_branches:
827 cvs_branch.source_lod = self.trunk
829 for cvs_tag in lod_items.cvs_tags:
830 cvs_tag.source_lod = self.trunk
832 return
834 def exclude_non_trunk(self):
835 """Delete all tags and branches."""
837 ntdbr_excluded = False
838 for lod_items in self.iter_lods():
839 for cvs_tag in lod_items.cvs_tags[:]:
840 self._exclude_tag(cvs_tag)
841 lod_items.cvs_tags.remove(cvs_tag)
843 if not isinstance(lod_items.lod, Trunk):
844 assert not lod_items.cvs_branches
846 ntdbr_excluded |= self._exclude_branch(lod_items)
848 if ntdbr_excluded:
849 self.graft_ntdbr_to_trunk()
851 def filter_excluded_symbols(self):
852 """Delete any excluded symbols and references to them."""
854 ntdbr_excluded = False
855 for lod_items in self.iter_lods():
856 # Delete any excluded tags:
857 for cvs_tag in lod_items.cvs_tags[:]:
858 if isinstance(cvs_tag.symbol, ExcludedSymbol):
859 self._exclude_tag(cvs_tag)
861 lod_items.cvs_tags.remove(cvs_tag)
863 # Delete the whole branch if it is to be excluded:
864 if isinstance(lod_items.lod, ExcludedSymbol):
865 # A symbol can only be excluded if no other symbols spring
866 # from it. This was already checked in CollateSymbolsPass, so
867 # these conditions should already be satisfied.
868 assert not list(lod_items.iter_blockers())
870 ntdbr_excluded |= self._exclude_branch(lod_items)
872 if ntdbr_excluded:
873 self.graft_ntdbr_to_trunk()
875 def _mutate_branch_to_tag(self, cvs_branch):
876 """Mutate the branch CVS_BRANCH into a tag."""
878 if cvs_branch.next_id is not None:
879 # This shouldn't happen because it was checked in
880 # CollateSymbolsPass:
881 raise FatalError('Attempt to exclude a branch with commits.')
882 cvs_tag = CVSTag(
883 cvs_branch.id, cvs_branch.cvs_file, cvs_branch.symbol,
884 cvs_branch.source_lod, cvs_branch.source_id,
885 cvs_branch.revision_reader_token,
887 self.add(cvs_tag)
888 cvs_revision = self[cvs_tag.source_id]
889 cvs_revision.branch_ids.remove(cvs_tag.id)
890 cvs_revision.tag_ids.append(cvs_tag.id)
892 def _mutate_tag_to_branch(self, cvs_tag):
893 """Mutate the tag into a branch."""
895 cvs_branch = CVSBranch(
896 cvs_tag.id, cvs_tag.cvs_file, cvs_tag.symbol,
897 None, cvs_tag.source_lod, cvs_tag.source_id, None,
898 cvs_tag.revision_reader_token,
900 self.add(cvs_branch)
901 cvs_revision = self[cvs_branch.source_id]
902 cvs_revision.tag_ids.remove(cvs_branch.id)
903 cvs_revision.branch_ids.append(cvs_branch.id)
905 def _mutate_symbol(self, cvs_symbol):
906 """Mutate CVS_SYMBOL if necessary."""
908 symbol = cvs_symbol.symbol
909 if isinstance(cvs_symbol, CVSBranch) and isinstance(symbol, Tag):
910 self._mutate_branch_to_tag(cvs_symbol)
911 elif isinstance(cvs_symbol, CVSTag) and isinstance(symbol, Branch):
912 self._mutate_tag_to_branch(cvs_symbol)
914 def mutate_symbols(self):
915 """Force symbols to be tags/branches based on self.symbol_db."""
917 for cvs_item in self.values():
918 if isinstance(cvs_item, CVSRevision):
919 # This CVSRevision may be affected by the mutation of any
920 # CVSSymbols that it references, but there is nothing to do
921 # here directly.
922 pass
923 elif isinstance(cvs_item, CVSSymbol):
924 self._mutate_symbol(cvs_item)
925 else:
926 raise RuntimeError('Unknown cvs item type')
928 def _adjust_tag_parent(self, cvs_tag):
929 """Adjust the parent of CVS_TAG if possible and preferred.
931 CVS_TAG is an instance of CVSTag. This method must be called in
932 leaf-to-trunk order."""
934 # The Symbol that cvs_tag would like to have as a parent:
935 preferred_parent = Ctx()._symbol_db.get_symbol(
936 cvs_tag.symbol.preferred_parent_id)
938 if cvs_tag.source_lod == preferred_parent:
939 # The preferred parent is already the parent.
940 return
942 # The CVSRevision that is its direct parent:
943 source = self[cvs_tag.source_id]
944 assert isinstance(source, CVSRevision)
946 if isinstance(preferred_parent, Trunk):
947 # It is not possible to graft *onto* Trunk:
948 return
950 # Try to find the preferred parent among the possible parents:
951 for branch_id in source.branch_ids:
952 if self[branch_id].symbol == preferred_parent:
953 # We found it!
954 break
955 else:
956 # The preferred parent is not a possible parent in this file.
957 return
959 parent = self[branch_id]
960 assert isinstance(parent, CVSBranch)
962 Log().debug('Grafting %s from %s (on %s) onto %s' % (
963 cvs_tag, source, source.lod, parent,))
964 # Switch parent:
965 source.tag_ids.remove(cvs_tag.id)
966 parent.tag_ids.append(cvs_tag.id)
967 cvs_tag.source_lod = parent.symbol
968 cvs_tag.source_id = parent.id
970 def _adjust_branch_parents(self, cvs_branch):
971 """Adjust the parent of CVS_BRANCH if possible and preferred.
973 CVS_BRANCH is an instance of CVSBranch. This method must be
974 called in leaf-to-trunk order."""
976 # The Symbol that cvs_branch would like to have as a parent:
977 preferred_parent = Ctx()._symbol_db.get_symbol(
978 cvs_branch.symbol.preferred_parent_id)
980 if cvs_branch.source_lod == preferred_parent:
981 # The preferred parent is already the parent.
982 return
984 # The CVSRevision that is its direct parent:
985 source = self[cvs_branch.source_id]
986 # This is always a CVSRevision because we haven't adjusted it yet:
987 assert isinstance(source, CVSRevision)
989 if isinstance(preferred_parent, Trunk):
990 # It is not possible to graft *onto* Trunk:
991 return
993 # Try to find the preferred parent among the possible parents:
994 for branch_id in source.branch_ids:
995 possible_parent = self[branch_id]
996 if possible_parent.symbol == preferred_parent:
997 # We found it!
998 break
999 elif possible_parent.symbol == cvs_branch.symbol:
1000 # Only branches that precede the branch to be adjusted are
1001 # considered possible parents. Leave parentage unchanged:
1002 return
1003 else:
1004 # This point should never be reached.
1005 raise InternalError(
1006 'Possible parent search did not terminate as expected')
1008 parent = possible_parent
1009 assert isinstance(parent, CVSBranch)
1011 Log().debug('Grafting %s from %s (on %s) onto %s' % (
1012 cvs_branch, source, source.lod, parent,))
1013 # Switch parent:
1014 source.branch_ids.remove(cvs_branch.id)
1015 parent.branch_ids.append(cvs_branch.id)
1016 cvs_branch.source_lod = parent.symbol
1017 cvs_branch.source_id = parent.id
1019 def adjust_parents(self):
1020 """Adjust the parents of symbols to their preferred parents.
1022 If a CVSSymbol has a preferred parent that is different than its
1023 current parent, and if the preferred parent is an allowed parent
1024 of the CVSSymbol in this file, then graft the CVSSymbol onto its
1025 preferred parent."""
1027 for lod_items in self.iter_lods():
1028 for cvs_tag in lod_items.cvs_tags:
1029 self._adjust_tag_parent(cvs_tag)
1031 for cvs_branch in lod_items.cvs_branches:
1032 self._adjust_branch_parents(cvs_branch)
1034 def _get_revision_source(self, cvs_symbol):
1035 """Return the CVSRevision that is the ultimate source of CVS_SYMBOL."""
1037 while True:
1038 cvs_item = self[cvs_symbol.source_id]
1039 if isinstance(cvs_item, CVSRevision):
1040 return cvs_item
1041 else:
1042 cvs_symbol = cvs_item
1044 def refine_symbols(self):
1045 """Refine the types of the CVSSymbols in this file.
1047 Adjust the symbol types based on whether the source exists:
1048 CVSBranch vs. CVSBranchNoop and CVSTag vs. CVSTagNoop."""
1050 for lod_items in self.iter_lods():
1051 for cvs_tag in lod_items.cvs_tags:
1052 source = self._get_revision_source(cvs_tag)
1053 cvs_tag.__class__ = cvs_tag_type_map[
1054 isinstance(source, CVSRevisionModification)
1057 for cvs_branch in lod_items.cvs_branches:
1058 source = self._get_revision_source(cvs_branch)
1059 cvs_branch.__class__ = cvs_branch_type_map[
1060 isinstance(source, CVSRevisionModification)
1063 def record_opened_symbols(self):
1064 """Set CVSRevision.opened_symbols for the surviving revisions."""
1066 for cvs_item in self.values():
1067 if isinstance(cvs_item, (CVSRevision, CVSBranch)):
1068 cvs_item.opened_symbols = []
1069 for cvs_symbol_opened_id in cvs_item.get_cvs_symbol_ids_opened():
1070 cvs_symbol_opened = self[cvs_symbol_opened_id]
1071 cvs_item.opened_symbols.append(
1072 (cvs_symbol_opened.symbol.id, cvs_symbol_opened.id,)
1075 def record_closed_symbols(self):
1076 """Set CVSRevision.closed_symbols for the surviving revisions.
1078 A CVSRevision closes the symbols that were opened by the CVSItems
1079 that the CVSRevision closes. Got it?
1081 This method must be called after record_opened_symbols()."""
1083 for cvs_item in self.values():
1084 if isinstance(cvs_item, CVSRevision):
1085 cvs_item.closed_symbols = []
1086 for cvs_item_closed_id in cvs_item.get_ids_closed():
1087 cvs_item_closed = self[cvs_item_closed_id]
1088 cvs_item.closed_symbols.extend(cvs_item_closed.opened_symbols)