Allow --encoding and --fallback-encoding to be used with --options.
[cvs2svn.git] / cvs2svn_lib / cvs_file_items.py
blob00b11e258ccc29cb80e34eab0aee138f39c7c3c1
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2006-2007 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains a class to manage the CVSItems related to one file."""
20 import re
22 from cvs2svn_lib.common import InternalError
23 from cvs2svn_lib.common import FatalError
24 from cvs2svn_lib.context import Ctx
25 from cvs2svn_lib.log import Log
26 from cvs2svn_lib.symbol import Trunk
27 from cvs2svn_lib.symbol import Branch
28 from cvs2svn_lib.symbol import Tag
29 from cvs2svn_lib.symbol import ExcludedSymbol
30 from cvs2svn_lib.cvs_item import CVSRevision
31 from cvs2svn_lib.cvs_item import CVSRevisionModification
32 from cvs2svn_lib.cvs_item import CVSRevisionAbsent
33 from cvs2svn_lib.cvs_item import CVSRevisionNoop
34 from cvs2svn_lib.cvs_item import CVSSymbol
35 from cvs2svn_lib.cvs_item import CVSBranch
36 from cvs2svn_lib.cvs_item import CVSTag
37 from cvs2svn_lib.cvs_item import cvs_revision_type_map
38 from cvs2svn_lib.cvs_item import cvs_branch_type_map
39 from cvs2svn_lib.cvs_item import cvs_tag_type_map
42 class VendorBranchError(Exception):
43 """There is an error in the structure of the file revision tree."""
45 pass
48 class LODItems(object):
49 def __init__(self, lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags):
50 # The LineOfDevelopment described by this instance.
51 self.lod = lod
53 # The CVSBranch starting this LOD, if any; otherwise, None.
54 self.cvs_branch = cvs_branch
56 # The list of CVSRevisions on this LOD, if any. The CVSRevisions
57 # are listed in dependency order.
58 self.cvs_revisions = cvs_revisions
60 # A list of CVSBranches that sprout from this LOD (either from
61 # cvs_branch or from one of the CVSRevisions).
62 self.cvs_branches = cvs_branches
64 # A list of CVSTags that sprout from this LOD (either from
65 # cvs_branch or from one of the CVSRevisions).
66 self.cvs_tags = cvs_tags
68 def is_trivial_import(self):
69 """Return True iff this LOD is a trivial import branch in this file.
71 A trivial import branch is a branch that was used for a single
72 import and nothing else. Such a branch is eligible for being
73 grafted onto trunk, even if it has branch blockers."""
75 return (
76 len(self.cvs_revisions) == 1
77 and self.cvs_revisions[0].ntdbr
80 def is_pure_ntdb(self):
81 """Return True iff this LOD is a pure NTDB in this file.
83 A pure non-trunk default branch is defined to be a branch that
84 contains only NTDB revisions (and at least one of them). Such a
85 branch is eligible for being grafted onto trunk, even if it has
86 branch blockers."""
88 return (
89 self.cvs_revisions
90 and self.cvs_revisions[-1].ntdbr
93 def iter_blockers(self):
94 if self.is_pure_ntdb():
95 # Such a branch has no blockers, because the blockers can be
96 # grafted to trunk.
97 pass
98 else:
99 # Other branches are only blocked by symbols that sprout from
100 # non-NTDB revisions:
101 non_ntdbr_revision_ids = set()
102 for cvs_revision in self.cvs_revisions:
103 if not cvs_revision.ntdbr:
104 non_ntdbr_revision_ids.add(cvs_revision.id)
106 for cvs_tag in self.cvs_tags:
107 if cvs_tag.source_id in non_ntdbr_revision_ids:
108 yield cvs_tag
110 for cvs_branch in self.cvs_branches:
111 if cvs_branch.source_id in non_ntdbr_revision_ids:
112 yield cvs_branch
115 class CVSFileItems(object):
116 def __init__(self, cvs_file, trunk, cvs_items):
117 # The file whose data this instance holds.
118 self.cvs_file = cvs_file
120 # The symbol that represents "Trunk" in this file.
121 self.trunk = trunk
123 # A map from CVSItem.id to CVSItem:
124 self._cvs_items = {}
126 # The cvs_item_id of each root in the CVSItem forest. (A root is
127 # defined to be any CVSRevision with no prev_id.)
128 self.root_ids = set()
130 for cvs_item in cvs_items:
131 self.add(cvs_item)
132 if isinstance(cvs_item, CVSRevision) and cvs_item.prev_id is None:
133 self.root_ids.add(cvs_item.id)
135 def __getstate__(self):
136 return (self.cvs_file.id, self.values(),)
138 def __setstate__(self, state):
139 (cvs_file_id, cvs_items,) = state
140 cvs_file = Ctx()._cvs_file_db.get_file(cvs_file_id)
141 CVSFileItems.__init__(
142 self, cvs_file, cvs_file.project.get_trunk(), cvs_items,
145 def add(self, cvs_item):
146 self._cvs_items[cvs_item.id] = cvs_item
148 def __getitem__(self, id):
149 """Return the CVSItem with the specified ID."""
151 return self._cvs_items[id]
153 def get(self, id, default=None):
154 return self._cvs_items.get(id, default)
156 def __delitem__(self, id):
157 assert id not in self.root_ids
158 del self._cvs_items[id]
160 def values(self):
161 return self._cvs_items.values()
163 def check_link_consistency(self):
164 """Check that the CVSItems are linked correctly with each other."""
166 for cvs_item in self.values():
167 try:
168 cvs_item.check_links(self)
169 except AssertionError:
170 Log().error(
171 'Link consistency error in %s\n'
172 'This is probably a bug internal to cvs2svn. Please file a bug\n'
173 'report including the following stack trace (see FAQ for more '
174 'info).'
175 % (cvs_item,))
176 raise
178 def _get_lod(self, lod, cvs_branch, start_id):
179 """Return the indicated LODItems.
181 LOD is the corresponding LineOfDevelopment. CVS_BRANCH is the
182 CVSBranch instance that starts the LOD if any; otherwise it is
183 None. START_ID is the id of the first CVSRevision on this LOD, or
184 None if there are none."""
186 cvs_revisions = []
187 cvs_branches = []
188 cvs_tags = []
190 def process_subitems(cvs_item):
191 """Process the branches and tags that are rooted in CVS_ITEM.
193 CVS_ITEM can be a CVSRevision or a CVSBranch."""
195 for branch_id in cvs_item.branch_ids[:]:
196 cvs_branches.append(self[branch_id])
198 for tag_id in cvs_item.tag_ids:
199 cvs_tags.append(self[tag_id])
201 if cvs_branch is not None:
202 # Include the symbols sprouting directly from the CVSBranch:
203 process_subitems(cvs_branch)
205 id = start_id
206 while id is not None:
207 cvs_rev = self[id]
208 cvs_revisions.append(cvs_rev)
209 process_subitems(cvs_rev)
210 id = cvs_rev.next_id
212 return LODItems(lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags)
214 def get_lod_items(self, cvs_branch):
215 """Return an LODItems describing the branch that starts at CVS_BRANCH.
217 CVS_BRANCH must be an instance of CVSBranch contained in this
218 CVSFileItems."""
220 return self._get_lod(cvs_branch.symbol, cvs_branch, cvs_branch.next_id)
222 def iter_root_lods(self):
223 """Iterate over the LODItems for all root LODs (non-recursively)."""
225 for id in list(self.root_ids):
226 cvs_item = self[id]
227 if isinstance(cvs_item, CVSRevision):
228 # This LOD doesn't have a CVSBranch associated with it.
229 # Either it is Trunk, or it is a branch whose CVSBranch has
230 # been deleted.
231 yield self._get_lod(cvs_item.lod, None, id)
232 elif isinstance(cvs_item, CVSBranch):
233 # This is a Branch that has been severed from the rest of the
234 # tree.
235 yield self._get_lod(cvs_item.symbol, cvs_item, cvs_item.next_id)
236 else:
237 raise InternalError('Unexpected root item: %s' % (cvs_item,))
239 def _iter_tree(self, lod, cvs_branch, start_id):
240 """Iterate over the tree that starts at the specified line of development.
242 LOD is the LineOfDevelopment where the iteration should start.
243 CVS_BRANCH is the CVSBranch instance that starts the LOD if any;
244 otherwise it is None. ID is the id of the first CVSRevision on
245 this LOD, or None if there are none.
247 There are two cases handled by this routine: trunk (where LOD is a
248 Trunk instance, CVS_BRANCH is None, and ID is the id of the 1.1
249 revision) and a branch (where LOD is a Branch instance, CVS_BRANCH
250 is a CVSBranch instance, and ID is either the id of the first
251 CVSRevision on the branch or None if there are no CVSRevisions on
252 the branch). Note that CVS_BRANCH and ID cannot simultaneously be
253 None.
255 Yield an LODItems instance for each line of development."""
257 cvs_revisions = []
258 cvs_branches = []
259 cvs_tags = []
261 def process_subitems(cvs_item):
262 """Process the branches and tags that are rooted in CVS_ITEM.
264 CVS_ITEM can be a CVSRevision or a CVSBranch."""
266 for branch_id in cvs_item.branch_ids[:]:
267 # Recurse into the branch:
268 branch = self[branch_id]
269 for lod_items in self._iter_tree(
270 branch.symbol, branch, branch.next_id
272 yield lod_items
273 # The caller might have deleted the branch that we just
274 # yielded. If it is no longer present, then do not add it to
275 # the list of cvs_branches.
276 try:
277 cvs_branches.append(self[branch_id])
278 except KeyError:
279 pass
281 for tag_id in cvs_item.tag_ids:
282 cvs_tags.append(self[tag_id])
284 if cvs_branch is not None:
285 # Include the symbols sprouting directly from the CVSBranch:
286 for lod_items in process_subitems(cvs_branch):
287 yield lod_items
289 id = start_id
290 while id is not None:
291 cvs_rev = self[id]
292 cvs_revisions.append(cvs_rev)
294 for lod_items in process_subitems(cvs_rev):
295 yield lod_items
297 id = cvs_rev.next_id
299 yield LODItems(lod, cvs_branch, cvs_revisions, cvs_branches, cvs_tags)
301 def iter_lods(self):
302 """Iterate over LinesOfDevelopment in this file, in depth-first order.
304 For each LOD, yield an LODItems instance. The traversal starts at
305 each root node but returns the LODs in depth-first order.
307 It is allowed to modify the CVSFileItems instance while the
308 traversal is occurring, but only in ways that don't affect the
309 tree structure above (i.e., towards the trunk from) the current
310 LOD."""
312 # Make a list out of root_ids so that callers can change it:
313 for id in list(self.root_ids):
314 cvs_item = self[id]
315 if isinstance(cvs_item, CVSRevision):
316 # This LOD doesn't have a CVSBranch associated with it.
317 # Either it is Trunk, or it is a branch whose CVSBranch has
318 # been deleted.
319 lod = cvs_item.lod
320 cvs_branch = None
321 elif isinstance(cvs_item, CVSBranch):
322 # This is a Branch that has been severed from the rest of the
323 # tree.
324 lod = cvs_item.symbol
325 id = cvs_item.next_id
326 cvs_branch = cvs_item
327 else:
328 raise InternalError('Unexpected root item: %s' % (cvs_item,))
330 for lod_items in self._iter_tree(lod, cvs_branch, id):
331 yield lod_items
333 def iter_deltatext_ancestors(self, cvs_rev):
334 """Generate the delta-dependency ancestors of CVS_REV.
336 Generate then ancestors of CVS_REV in deltatext order; i.e., back
337 along branches towards trunk, then outwards along trunk towards
338 HEAD."""
340 while True:
341 # Determine the next candidate source revision:
342 if isinstance(cvs_rev.lod, Trunk):
343 if cvs_rev.next_id is None:
344 # HEAD has no ancestors, so we are done:
345 return
346 else:
347 cvs_rev = self[cvs_rev.next_id]
348 else:
349 cvs_rev = self[cvs_rev.prev_id]
351 yield cvs_rev
353 def _sever_branch(self, lod_items):
354 """Sever the branch from its source and discard the CVSBranch.
356 LOD_ITEMS describes a branch that should be severed from its
357 source, deleting the CVSBranch and creating a new root. Also set
358 LOD_ITEMS.cvs_branch to none.
360 This method can only be used before symbols have been grafted onto
361 CVSBranches. It does not adjust NTDBR, NTDBR_PREV_ID or
362 NTDBR_NEXT_ID even if LOD_ITEMS describes a NTDB."""
364 cvs_branch = lod_items.cvs_branch
365 assert cvs_branch is not None
366 assert not cvs_branch.tag_ids
367 assert not cvs_branch.branch_ids
368 source_rev = self[cvs_branch.source_id]
370 # We only cover the following case, even though after
371 # FilterSymbolsPass cvs_branch.source_id might refer to another
372 # CVSBranch.
373 assert isinstance(source_rev, CVSRevision)
375 # Delete the CVSBranch itself:
376 lod_items.cvs_branch = None
377 del self[cvs_branch.id]
379 # Delete the reference from the source revision to the CVSBranch:
380 source_rev.branch_ids.remove(cvs_branch.id)
382 # Delete the reference from the first revision on the branch to
383 # the CVSBranch:
384 if lod_items.cvs_revisions:
385 first_rev = lod_items.cvs_revisions[0]
387 # Delete the reference from first_rev to the CVSBranch:
388 first_rev.first_on_branch_id = None
390 # Delete the reference from the source revision to the first
391 # revision on the branch:
392 source_rev.branch_commit_ids.remove(first_rev.id)
394 # ...and vice versa:
395 first_rev.prev_id = None
397 # Change the type of first_rev (e.g., from Change to Add):
398 first_rev.__class__ = cvs_revision_type_map[
399 (isinstance(first_rev, CVSRevisionModification), False,)
402 # Now first_rev is a new root:
403 self.root_ids.add(first_rev.id)
405 def adjust_ntdbrs(self, ntdbr_cvs_revs):
406 """Adjust the specified non-trunk default branch revisions.
408 NTDBR_CVS_REVS is a list of CVSRevision instances in this file
409 that have been determined to be non-trunk default branch
410 revisions.
412 The first revision on the default branch is handled strangely by
413 CVS. If a file is imported (as opposed to being added), CVS
414 creates a 1.1 revision, then creates a vendor branch 1.1.1 based
415 on 1.1, then creates a 1.1.1.1 revision that is identical to the
416 1.1 revision (i.e., its deltatext is empty). The log message that
417 the user typed when importing is stored with the 1.1.1.1 revision.
418 The 1.1 revision always contains a standard, generated log
419 message, 'Initial revision\n'.
421 When we detect a straightforward import like this, we want to
422 handle it by deleting the 1.1 revision (which doesn't contain any
423 useful information) and making 1.1.1.1 into an independent root in
424 the file's dependency tree. In SVN, 1.1.1.1 will be added
425 directly to the vendor branch with its initial content. Then in a
426 special 'post-commit', the 1.1.1.1 revision is copied back to
427 trunk.
429 If the user imports again to the same vendor branch, then CVS
430 creates revisions 1.1.1.2, 1.1.1.3, etc. on the vendor branch,
431 *without* counterparts in trunk (even though these revisions
432 effectively play the role of trunk revisions). So after we add
433 such revisions to the vendor branch, we also copy them back to
434 trunk in post-commits.
436 Set the ntdbr members of the revisions listed in NTDBR_CVS_REVS to
437 True. Also, if there is a 1.2 revision, then set that revision to
438 depend on the last non-trunk default branch revision and possibly
439 adjust its type accordingly."""
441 for cvs_rev in ntdbr_cvs_revs:
442 cvs_rev.ntdbr = True
444 # Look for a 1.2 revision:
445 rev_1_1 = self[ntdbr_cvs_revs[0].prev_id]
447 rev_1_2 = self.get(rev_1_1.next_id)
448 if rev_1_2 is not None:
449 # Revision 1.2 logically follows the imported revisions, not
450 # 1.1. Accordingly, connect it to the last NTDBR and possibly
451 # change its type.
452 last_ntdbr = ntdbr_cvs_revs[-1]
453 rev_1_2.ntdbr_prev_id = last_ntdbr.id
454 last_ntdbr.ntdbr_next_id = rev_1_2.id
455 rev_1_2.__class__ = cvs_revision_type_map[(
456 isinstance(rev_1_2, CVSRevisionModification),
457 isinstance(last_ntdbr, CVSRevisionModification),
460 def process_live_ntdb(self, vendor_lod_items):
461 """VENDOR_LOD_ITEMS is a live default branch; process it.
463 In this case, all revisions on the default branch are NTDBRs and
464 it is an error if there is also a '1.2' revision.
466 Return True iff this transformation really does something. Raise
467 a VendorBranchError if there is a '1.2' revision."""
469 rev_1_1 = self[vendor_lod_items.cvs_branch.source_id]
470 rev_1_2_id = rev_1_1.next_id
471 if rev_1_2_id is not None:
472 raise VendorBranchError(
473 'File \'%s\' has default branch=%s but also a revision %s'
474 % (self.cvs_file.filename,
475 vendor_lod_items.cvs_branch.branch_number, self[rev_1_2_id].rev,)
478 ntdbr_cvs_revs = list(vendor_lod_items.cvs_revisions)
480 if ntdbr_cvs_revs:
481 self.adjust_ntdbrs(ntdbr_cvs_revs)
482 return True
483 else:
484 return False
486 def process_historical_ntdb(self, vendor_lod_items):
487 """There appears to have been a non-trunk default branch in the past.
489 There is currently no default branch, but the branch described by
490 file appears to have been imported. So our educated guess is that
491 all revisions on the '1.1.1' branch (described by
492 VENDOR_LOD_ITEMS) with timestamps prior to the timestamp of '1.2'
493 were non-trunk default branch revisions.
495 Return True iff this transformation really does something.
497 This really only handles standard '1.1.1.*'-style vendor
498 revisions. One could conceivably have a file whose default branch
499 is 1.1.3 or whatever, or was that at some point in time, with
500 vendor revisions 1.1.3.1, 1.1.3.2, etc. But with the default
501 branch gone now, we'd have no basis for assuming that the
502 non-standard vendor branch had ever been the default branch
503 anyway.
505 Note that we rely on comparisons between the timestamps of the
506 revisions on the vendor branch and that of revision 1.2, even
507 though the timestamps might be incorrect due to clock skew. We
508 could do a slightly better job if we used the changeset
509 timestamps, as it is possible that the dependencies that went into
510 determining those timestamps are more accurate. But that would
511 require an extra pass or two."""
513 rev_1_1 = self[vendor_lod_items.cvs_branch.source_id]
514 rev_1_2_id = rev_1_1.next_id
516 if rev_1_2_id is None:
517 rev_1_2_timestamp = None
518 else:
519 rev_1_2_timestamp = self[rev_1_2_id].timestamp
521 ntdbr_cvs_revs = []
522 for cvs_rev in vendor_lod_items.cvs_revisions:
523 if rev_1_2_timestamp is not None \
524 and cvs_rev.timestamp >= rev_1_2_timestamp:
525 # That's the end of the once-default branch.
526 break
527 ntdbr_cvs_revs.append(cvs_rev)
529 if ntdbr_cvs_revs:
530 self.adjust_ntdbrs(ntdbr_cvs_revs)
531 return True
532 else:
533 return False
535 def imported_remove_1_1(self, vendor_lod_items):
536 """This file was imported. Remove the 1.1 revision if possible.
538 VENDOR_LOD_ITEMS is the LODItems instance for the vendor branch.
539 See adjust_ntdbrs() for more information."""
541 assert vendor_lod_items.cvs_revisions
542 cvs_rev = vendor_lod_items.cvs_revisions[0]
544 if isinstance(cvs_rev, CVSRevisionModification) \
545 and not cvs_rev.deltatext_exists:
546 cvs_branch = vendor_lod_items.cvs_branch
547 rev_1_1 = self[cvs_branch.source_id]
548 assert isinstance(rev_1_1, CVSRevision)
549 Log().debug('Removing unnecessary revision %s' % (rev_1_1,))
551 # Delete the 1.1.1 CVSBranch and sever the vendor branch from trunk:
552 self._sever_branch(vendor_lod_items)
554 # Delete rev_1_1:
555 self.root_ids.remove(rev_1_1.id)
556 del self[rev_1_1.id]
557 rev_1_2_id = rev_1_1.next_id
558 if rev_1_2_id is not None:
559 rev_1_2 = self[rev_1_2_id]
560 rev_1_2.prev_id = None
561 self.root_ids.add(rev_1_2.id)
563 # Move any tags and branches from rev_1_1 to cvs_rev:
564 cvs_rev.tag_ids.extend(rev_1_1.tag_ids)
565 for id in rev_1_1.tag_ids:
566 cvs_tag = self[id]
567 cvs_tag.source_lod = cvs_rev.lod
568 cvs_tag.source_id = cvs_rev.id
569 cvs_rev.branch_ids[0:0] = rev_1_1.branch_ids
570 for id in rev_1_1.branch_ids:
571 cvs_branch = self[id]
572 cvs_branch.source_lod = cvs_rev.lod
573 cvs_branch.source_id = cvs_rev.id
574 cvs_rev.branch_commit_ids[0:0] = rev_1_1.branch_commit_ids
575 for id in rev_1_1.branch_commit_ids:
576 cvs_rev2 = self[id]
577 cvs_rev2.prev_id = cvs_rev.id
579 def _delete_unneeded(self, cvs_item, metadata_db):
580 if isinstance(cvs_item, CVSRevisionNoop) \
581 and cvs_item.rev == '1.1' \
582 and isinstance(cvs_item.lod, Trunk) \
583 and len(cvs_item.branch_ids) >= 1 \
584 and self[cvs_item.branch_ids[0]].next_id is not None \
585 and not cvs_item.closed_symbols \
586 and not cvs_item.ntdbr:
587 # FIXME: This message will not match if the RCS file was renamed
588 # manually after it was created.
589 log_msg = metadata_db[cvs_item.metadata_id].log_msg
590 cvs_generated_msg = 'file %s was initially added on branch %s.\n' % (
591 self.cvs_file.basename,
592 self[cvs_item.branch_ids[0]].symbol.name,)
593 return log_msg == cvs_generated_msg
594 else:
595 return False
597 def remove_unneeded_deletes(self, metadata_db):
598 """Remove unneeded deletes for this file.
600 If a file is added on a branch, then a trunk revision is added at
601 the same time in the 'Dead' state. This revision doesn't do
602 anything useful, so delete it."""
604 for id in self.root_ids:
605 cvs_item = self[id]
606 if self._delete_unneeded(cvs_item, metadata_db):
607 Log().debug('Removing unnecessary delete %s' % (cvs_item,))
609 # Delete cvs_item:
610 self.root_ids.remove(cvs_item.id)
611 del self[id]
612 if cvs_item.next_id is not None:
613 cvs_rev_next = self[cvs_item.next_id]
614 cvs_rev_next.prev_id = None
615 self.root_ids.add(cvs_rev_next.id)
617 # Delete all CVSBranches rooted at this revision. If there is
618 # a CVSRevision on the branch, it should already be an add so
619 # it doesn't have to be changed.
620 for cvs_branch_id in cvs_item.branch_ids:
621 cvs_branch = self[cvs_branch_id]
622 del self[cvs_branch.id]
624 if cvs_branch.next_id is not None:
625 cvs_branch_next = self[cvs_branch.next_id]
626 cvs_branch_next.first_on_branch_id = None
627 cvs_branch_next.prev_id = None
628 self.root_ids.add(cvs_branch_next.id)
630 # Tagging a dead revision doesn't do anything, so remove any
631 # tags that were set on 1.1:
632 for cvs_tag_id in cvs_item.tag_ids:
633 del self[cvs_tag_id]
635 # This can only happen once per file, and we might have just
636 # changed self.root_ids, so break out of the loop:
637 break
639 def _initial_branch_delete_unneeded(self, lod_items, metadata_db):
640 """Return True iff the initial revision in LOD_ITEMS can be deleted."""
642 if lod_items.cvs_branch is not None \
643 and lod_items.cvs_branch.source_id is not None \
644 and len(lod_items.cvs_revisions) >= 2:
645 cvs_revision = lod_items.cvs_revisions[0]
646 cvs_rev_source = self[lod_items.cvs_branch.source_id]
647 if isinstance(cvs_revision, CVSRevisionAbsent) \
648 and not cvs_revision.tag_ids \
649 and not cvs_revision.branch_ids \
650 and abs(cvs_revision.timestamp - cvs_rev_source.timestamp) <= 2:
651 # FIXME: This message will not match if the RCS file was renamed
652 # manually after it was created.
653 log_msg = metadata_db[cvs_revision.metadata_id].log_msg
654 return bool(re.match(
655 r'file %s was added on branch .* on '
656 r'\d{4}\-\d{2}\-\d{2} \d{2}\:\d{2}\:\d{2}( [\+\-]\d{4})?'
657 '\n' % (re.escape(self.cvs_file.basename),),
658 log_msg,
660 return False
662 def remove_initial_branch_deletes(self, metadata_db):
663 """If the first revision on a branch is an unnecessary delete, remove it.
665 If a file is added on a branch (whether or not it already existed
666 on trunk), then new versions of CVS add a first branch revision in
667 the 'dead' state (to indicate that the file did not exist on the
668 branch when the branch was created) followed by the second branch
669 revision, which is an add. When we encounter this situation, we
670 sever the branch from trunk and delete the first branch
671 revision."""
673 for lod_items in self.iter_lods():
674 if self._initial_branch_delete_unneeded(lod_items, metadata_db):
675 cvs_revision = lod_items.cvs_revisions[0]
676 Log().debug(
677 'Removing unnecessary initial branch delete %s' % (cvs_revision,)
679 cvs_branch = lod_items.cvs_branch
680 cvs_rev_source = self[cvs_branch.source_id]
681 cvs_rev_next = lod_items.cvs_revisions[1]
683 # Delete cvs_revision:
684 del self[cvs_revision.id]
685 cvs_rev_next.prev_id = None
686 self.root_ids.add(cvs_rev_next.id)
687 cvs_rev_source.branch_commit_ids.remove(cvs_revision.id)
689 # Delete the CVSBranch on which it is located:
690 del self[cvs_branch.id]
691 cvs_rev_source.branch_ids.remove(cvs_branch.id)
693 def _exclude_tag(self, cvs_tag):
694 """Exclude the specified CVS_TAG."""
696 del self[cvs_tag.id]
698 # A CVSTag is the successor of the CVSRevision that it
699 # sprouts from. Delete this tag from that revision's
700 # tag_ids:
701 self[cvs_tag.source_id].tag_ids.remove(cvs_tag.id)
703 def _exclude_branch(self, lod_items):
704 """Exclude the branch described by LOD_ITEMS, including its revisions.
706 (Do not update the LOD_ITEMS instance itself.)
708 If the LOD starts with non-trunk default branch revisions, leave
709 the branch and the NTDB revisions in place, but delete any
710 subsequent revisions that are not NTDB revisions. In this case,
711 return True; otherwise return False"""
713 if lod_items.cvs_revisions and lod_items.cvs_revisions[0].ntdbr:
714 for cvs_rev in lod_items.cvs_revisions:
715 if not cvs_rev.ntdbr:
716 # We've found the first non-NTDBR, and it's stored in cvs_rev:
717 break
718 else:
719 # There was no revision following the NTDBRs:
720 cvs_rev = None
722 if cvs_rev:
723 last_ntdbr = self[cvs_rev.prev_id]
724 last_ntdbr.next_id = None
725 while True:
726 del self[cvs_rev.id]
727 if cvs_rev.next_id is None:
728 break
729 cvs_rev = self[cvs_rev.next_id]
731 return True
733 else:
734 if lod_items.cvs_branch is not None:
735 # Delete the CVSBranch itself:
736 cvs_branch = lod_items.cvs_branch
738 del self[cvs_branch.id]
740 # A CVSBranch is the successor of the CVSRevision that it
741 # sprouts from. Delete this branch from that revision's
742 # branch_ids:
743 self[cvs_branch.source_id].branch_ids.remove(cvs_branch.id)
745 if lod_items.cvs_revisions:
746 # The first CVSRevision on the branch has to be either detached
747 # from the revision from which the branch sprang, or removed
748 # from self.root_ids:
749 cvs_rev = lod_items.cvs_revisions[0]
750 if cvs_rev.prev_id is None:
751 self.root_ids.remove(cvs_rev.id)
752 else:
753 self[cvs_rev.prev_id].branch_commit_ids.remove(cvs_rev.id)
755 for cvs_rev in lod_items.cvs_revisions:
756 del self[cvs_rev.id]
758 return False
760 def graft_ntdbr_to_trunk(self):
761 """Graft the non-trunk default branch revisions to trunk.
763 They should already be alone on a branch that may or may not have
764 a CVSBranch connecting it to trunk."""
766 for lod_items in self.iter_lods():
767 if lod_items.cvs_revisions and lod_items.cvs_revisions[0].ntdbr:
768 assert lod_items.is_pure_ntdb()
770 first_rev = lod_items.cvs_revisions[0]
771 last_rev = lod_items.cvs_revisions[-1]
772 rev_1_1 = self.get(first_rev.prev_id)
773 rev_1_2 = self.get(last_rev.ntdbr_next_id)
775 if lod_items.cvs_branch is not None:
776 self._sever_branch(lod_items)
778 if rev_1_1 is not None:
779 rev_1_1.next_id = first_rev.id
780 first_rev.prev_id = rev_1_1.id
782 self.root_ids.remove(first_rev.id)
784 first_rev.__class__ = cvs_revision_type_map[(
785 isinstance(first_rev, CVSRevisionModification),
786 isinstance(rev_1_1, CVSRevisionModification),
789 if rev_1_2 is not None:
790 rev_1_2.ntdbr_prev_id = None
791 last_rev.ntdbr_next_id = None
793 if rev_1_2.prev_id is None:
794 self.root_ids.remove(rev_1_2.id)
796 rev_1_2.prev_id = last_rev.id
797 last_rev.next_id = rev_1_2.id
799 # The effective_pred_id of rev_1_2 was not changed, so we
800 # don't have to change rev_1_2's type.
802 for cvs_rev in lod_items.cvs_revisions:
803 cvs_rev.ntdbr = False
804 cvs_rev.lod = self.trunk
806 for cvs_branch in lod_items.cvs_branches:
807 cvs_branch.source_lod = self.trunk
809 for cvs_tag in lod_items.cvs_tags:
810 cvs_tag.source_lod = self.trunk
812 return
814 def exclude_non_trunk(self):
815 """Delete all tags and branches."""
817 ntdbr_excluded = False
818 for lod_items in self.iter_lods():
819 for cvs_tag in lod_items.cvs_tags[:]:
820 self._exclude_tag(cvs_tag)
821 lod_items.cvs_tags.remove(cvs_tag)
823 if not isinstance(lod_items.lod, Trunk):
824 assert not lod_items.cvs_branches
826 ntdbr_excluded |= self._exclude_branch(lod_items)
828 if ntdbr_excluded:
829 self.graft_ntdbr_to_trunk()
831 def filter_excluded_symbols(self, revision_excluder):
832 """Delete any excluded symbols and references to them.
834 Call the revision_excluder's callback methods to let it know what
835 is being excluded."""
837 ntdbr_excluded = False
838 for lod_items in self.iter_lods():
839 # Delete any excluded tags:
840 for cvs_tag in lod_items.cvs_tags[:]:
841 if isinstance(cvs_tag.symbol, ExcludedSymbol):
842 self._exclude_tag(cvs_tag)
844 lod_items.cvs_tags.remove(cvs_tag)
846 # Delete the whole branch if it is to be excluded:
847 if isinstance(lod_items.lod, ExcludedSymbol):
848 # A symbol can only be excluded if no other symbols spring
849 # from it. This was already checked in CollateSymbolsPass, so
850 # these conditions should already be satisfied.
851 assert not list(lod_items.iter_blockers())
853 ntdbr_excluded |= self._exclude_branch(lod_items)
855 if ntdbr_excluded:
856 self.graft_ntdbr_to_trunk()
858 revision_excluder.process_file(self)
860 def _mutate_branch_to_tag(self, cvs_branch):
861 """Mutate the branch CVS_BRANCH into a tag."""
863 if cvs_branch.next_id is not None:
864 # This shouldn't happen because it was checked in
865 # CollateSymbolsPass:
866 raise FatalError('Attempt to exclude a branch with commits.')
867 cvs_tag = CVSTag(
868 cvs_branch.id, cvs_branch.cvs_file, cvs_branch.symbol,
869 cvs_branch.source_lod, cvs_branch.source_id,
870 cvs_branch.revision_recorder_token,
872 self.add(cvs_tag)
873 cvs_revision = self[cvs_tag.source_id]
874 cvs_revision.branch_ids.remove(cvs_tag.id)
875 cvs_revision.tag_ids.append(cvs_tag.id)
877 def _mutate_tag_to_branch(self, cvs_tag):
878 """Mutate the tag into a branch."""
880 cvs_branch = CVSBranch(
881 cvs_tag.id, cvs_tag.cvs_file, cvs_tag.symbol,
882 None, cvs_tag.source_lod, cvs_tag.source_id, None,
883 cvs_tag.revision_recorder_token,
885 self.add(cvs_branch)
886 cvs_revision = self[cvs_branch.source_id]
887 cvs_revision.tag_ids.remove(cvs_branch.id)
888 cvs_revision.branch_ids.append(cvs_branch.id)
890 def _mutate_symbol(self, cvs_symbol):
891 """Mutate CVS_SYMBOL if necessary."""
893 symbol = cvs_symbol.symbol
894 if isinstance(cvs_symbol, CVSBranch) and isinstance(symbol, Tag):
895 self._mutate_branch_to_tag(cvs_symbol)
896 elif isinstance(cvs_symbol, CVSTag) and isinstance(symbol, Branch):
897 self._mutate_tag_to_branch(cvs_symbol)
899 def mutate_symbols(self):
900 """Force symbols to be tags/branches based on self.symbol_db."""
902 for cvs_item in self.values():
903 if isinstance(cvs_item, CVSRevision):
904 # This CVSRevision may be affected by the mutation of any
905 # CVSSymbols that it references, but there is nothing to do
906 # here directly.
907 pass
908 elif isinstance(cvs_item, CVSSymbol):
909 self._mutate_symbol(cvs_item)
910 else:
911 raise RuntimeError('Unknown cvs item type')
913 def _adjust_tag_parent(self, cvs_tag):
914 """Adjust the parent of CVS_TAG if possible and preferred.
916 CVS_TAG is an instance of CVSTag. This method must be called in
917 leaf-to-trunk order."""
919 # The Symbol that cvs_tag would like to have as a parent:
920 preferred_parent = Ctx()._symbol_db.get_symbol(
921 cvs_tag.symbol.preferred_parent_id)
923 if cvs_tag.source_lod == preferred_parent:
924 # The preferred parent is already the parent.
925 return
927 # The CVSRevision that is its direct parent:
928 source = self[cvs_tag.source_id]
929 assert isinstance(source, CVSRevision)
931 if isinstance(preferred_parent, Trunk):
932 # It is not possible to graft *onto* Trunk:
933 return
935 # Try to find the preferred parent among the possible parents:
936 for branch_id in source.branch_ids:
937 if self[branch_id].symbol == preferred_parent:
938 # We found it!
939 break
940 else:
941 # The preferred parent is not a possible parent in this file.
942 return
944 parent = self[branch_id]
945 assert isinstance(parent, CVSBranch)
947 Log().debug('Grafting %s from %s (on %s) onto %s' % (
948 cvs_tag, source, source.lod, parent,))
949 # Switch parent:
950 source.tag_ids.remove(cvs_tag.id)
951 parent.tag_ids.append(cvs_tag.id)
952 cvs_tag.source_lod = parent.symbol
953 cvs_tag.source_id = parent.id
955 def _adjust_branch_parents(self, cvs_branch):
956 """Adjust the parent of CVS_BRANCH if possible and preferred.
958 CVS_BRANCH is an instance of CVSBranch. This method must be
959 called in leaf-to-trunk order."""
961 # The Symbol that cvs_branch would like to have as a parent:
962 preferred_parent = Ctx()._symbol_db.get_symbol(
963 cvs_branch.symbol.preferred_parent_id)
965 if cvs_branch.source_lod == preferred_parent:
966 # The preferred parent is already the parent.
967 return
969 # The CVSRevision that is its direct parent:
970 source = self[cvs_branch.source_id]
971 # This is always a CVSRevision because we haven't adjusted it yet:
972 assert isinstance(source, CVSRevision)
974 if isinstance(preferred_parent, Trunk):
975 # It is not possible to graft *onto* Trunk:
976 return
978 # Try to find the preferred parent among the possible parents:
979 for branch_id in source.branch_ids:
980 possible_parent = self[branch_id]
981 if possible_parent.symbol == preferred_parent:
982 # We found it!
983 break
984 elif possible_parent.symbol == cvs_branch.symbol:
985 # Only branches that precede the branch to be adjusted are
986 # considered possible parents. Leave parentage unchanged:
987 return
988 else:
989 # This point should never be reached.
990 raise InternalError(
991 'Possible parent search did not terminate as expected')
993 parent = possible_parent
994 assert isinstance(parent, CVSBranch)
996 Log().debug('Grafting %s from %s (on %s) onto %s' % (
997 cvs_branch, source, source.lod, parent,))
998 # Switch parent:
999 source.branch_ids.remove(cvs_branch.id)
1000 parent.branch_ids.append(cvs_branch.id)
1001 cvs_branch.source_lod = parent.symbol
1002 cvs_branch.source_id = parent.id
1004 def adjust_parents(self):
1005 """Adjust the parents of symbols to their preferred parents.
1007 If a CVSSymbol has a preferred parent that is different than its
1008 current parent, and if the preferred parent is an allowed parent
1009 of the CVSSymbol in this file, then graft the CVSSymbol onto its
1010 preferred parent."""
1012 for lod_items in self.iter_lods():
1013 for cvs_tag in lod_items.cvs_tags:
1014 self._adjust_tag_parent(cvs_tag)
1016 for cvs_branch in lod_items.cvs_branches:
1017 self._adjust_branch_parents(cvs_branch)
1019 def _get_revision_source(self, cvs_symbol):
1020 """Return the CVSRevision that is the ultimate source of CVS_SYMBOL."""
1022 while True:
1023 cvs_item = self[cvs_symbol.source_id]
1024 if isinstance(cvs_item, CVSRevision):
1025 return cvs_item
1026 else:
1027 cvs_symbol = cvs_item
1029 def refine_symbols(self):
1030 """Refine the types of the CVSSymbols in this file.
1032 Adjust the symbol types based on whether the source exists:
1033 CVSBranch vs. CVSBranchNoop and CVSTag vs. CVSTagNoop."""
1035 for lod_items in self.iter_lods():
1036 for cvs_tag in lod_items.cvs_tags:
1037 source = self._get_revision_source(cvs_tag)
1038 cvs_tag.__class__ = cvs_tag_type_map[
1039 isinstance(source, CVSRevisionModification)
1042 for cvs_branch in lod_items.cvs_branches:
1043 source = self._get_revision_source(cvs_branch)
1044 cvs_branch.__class__ = cvs_branch_type_map[
1045 isinstance(source, CVSRevisionModification)
1048 def record_opened_symbols(self):
1049 """Set CVSRevision.opened_symbols for the surviving revisions."""
1051 for cvs_item in self.values():
1052 if isinstance(cvs_item, (CVSRevision, CVSBranch)):
1053 cvs_item.opened_symbols = []
1054 for cvs_symbol_opened_id in cvs_item.get_cvs_symbol_ids_opened():
1055 cvs_symbol_opened = self[cvs_symbol_opened_id]
1056 cvs_item.opened_symbols.append(
1057 (cvs_symbol_opened.symbol.id, cvs_symbol_opened.id,)
1060 def record_closed_symbols(self):
1061 """Set CVSRevision.closed_symbols for the surviving revisions.
1063 A CVSRevision closes the symbols that were opened by the CVSItems
1064 that the CVSRevision closes. Got it?
1066 This method must be called after record_opened_symbols()."""
1068 for cvs_item in self.values():
1069 if isinstance(cvs_item, CVSRevision):
1070 cvs_item.closed_symbols = []
1071 for cvs_item_closed_id in cvs_item.get_ids_closed():
1072 cvs_item_closed = self[cvs_item_closed_id]
1073 cvs_item.closed_symbols.extend(cvs_item_closed.opened_symbols)