Count trunk as possible parent of branches from a vendor branch.
[cvs2svn.git] / cvs2svn_lib / symbol_statistics.py
blob352f7b480181a00cce48525098666a6bdfeff5b7
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2008 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module gathers and processes statistics about lines of development."""
19 import cPickle
21 from cvs2svn_lib import config
22 from cvs2svn_lib.common import error_prefix
23 from cvs2svn_lib.common import FatalException
24 from cvs2svn_lib.log import logger
25 from cvs2svn_lib.artifact_manager import artifact_manager
26 from cvs2svn_lib.symbol import Trunk
27 from cvs2svn_lib.symbol import IncludedSymbol
28 from cvs2svn_lib.symbol import Branch
29 from cvs2svn_lib.symbol import Tag
30 from cvs2svn_lib.symbol import ExcludedSymbol
33 class SymbolPlanError(FatalException):
34 pass
37 class SymbolPlanException(SymbolPlanError):
38 def __init__(self, stats, symbol, msg):
39 self.stats = stats
40 self.symbol = symbol
41 SymbolPlanError.__init__(
42 self,
43 'Cannot convert the following symbol to %s: %s\n %s'
44 % (symbol, msg, self.stats,)
48 class IndeterminateSymbolException(SymbolPlanException):
49 def __init__(self, stats, symbol):
50 SymbolPlanException.__init__(self, stats, symbol, 'Indeterminate type')
53 class _Stats:
54 """A summary of information about a symbol (tag or branch).
56 Members:
58 lod -- the LineOfDevelopment instance of the lod being described
60 tag_create_count -- the number of files in which this lod appears
61 as a tag
63 branch_create_count -- the number of files in which this lod
64 appears as a branch
66 branch_commit_count -- the number of files in which there were
67 commits on this lod
69 trivial_import_count -- the number of files in which this branch
70 was purely a non-trunk default branch containing exactly one
71 revision.
73 pure_ntdb_count -- the number of files in which this branch was
74 purely a non-trunk default branch (consisting only of
75 non-trunk default branch revisions).
77 branch_blockers -- a set of Symbol instances for any symbols that
78 sprout from a branch with this name.
80 possible_parents -- a map {LineOfDevelopment : count} indicating
81 in how many files each LOD could have served as the parent of
82 self.lod."""
84 def __init__(self, lod):
85 self.lod = lod
86 self.tag_create_count = 0
87 self.branch_create_count = 0
88 self.branch_commit_count = 0
89 self.branch_blockers = set()
90 self.trivial_import_count = 0
91 self.pure_ntdb_count = 0
92 self.possible_parents = { }
94 def register_tag_creation(self):
95 """Register the creation of this lod as a tag."""
97 self.tag_create_count += 1
99 def register_branch_creation(self):
100 """Register the creation of this lod as a branch."""
102 self.branch_create_count += 1
104 def register_branch_commit(self):
105 """Register that there were commit(s) on this branch in one file."""
107 self.branch_commit_count += 1
109 def register_branch_blocker(self, blocker):
110 """Register BLOCKER as preventing this symbol from being deleted.
112 BLOCKER is a tag or a branch that springs from a revision on this
113 symbol."""
115 self.branch_blockers.add(blocker)
117 def register_trivial_import(self):
118 """Register that this branch is a trivial import branch in one file."""
120 self.trivial_import_count += 1
122 def register_pure_ntdb(self):
123 """Register that this branch is a pure import branch in one file."""
125 self.pure_ntdb_count += 1
127 def register_possible_parent(self, lod):
128 """Register that LOD was a possible parent for SELF.lod in a file."""
130 self.possible_parents[lod] = self.possible_parents.get(lod, 0) + 1
132 def register_branch_possible_parents(self, cvs_branch, cvs_file_items):
133 """Register any possible parents of this symbol from CVS_BRANCH."""
135 # This routine is a bottleneck. So we define some local variables
136 # to speed up access to frequently-needed variables.
137 register = self.register_possible_parent
138 parent_cvs_rev = cvs_file_items[cvs_branch.source_id]
140 # The "obvious" parent of a branch is the branch holding the
141 # revision where the branch is rooted:
142 register(parent_cvs_rev.lod)
144 if parent_cvs_rev.ntdbr:
145 # If the parent revision is a vendor branch revision, and it
146 # existed when the vendor branch was the default branch, then
147 # trunk is a possible parent.
148 register(cvs_file_items.trunk)
150 # Any other branches that are rooted at the same revision and
151 # were committed earlier than the branch are also possible
152 # parents:
153 symbol = cvs_branch.symbol
154 for branch_id in parent_cvs_rev.branch_ids:
155 parent_symbol = cvs_file_items[branch_id].symbol
156 # A branch cannot be its own parent, nor can a branch's
157 # parent be a branch that was created after it. So we stop
158 # iterating when we reached the branch whose parents we are
159 # collecting:
160 if parent_symbol == symbol:
161 break
162 register(parent_symbol)
164 def register_tag_possible_parents(self, cvs_tag, cvs_file_items):
165 """Register any possible parents of this symbol from CVS_TAG."""
167 # This routine is a bottleneck. So use local variables to speed
168 # up access to frequently-needed objects.
169 register = self.register_possible_parent
170 parent_cvs_rev = cvs_file_items[cvs_tag.source_id]
172 # The "obvious" parent of a tag is the branch holding the
173 # revision where the branch is rooted:
174 register(parent_cvs_rev.lod)
176 # Branches that are rooted at the same revision are also
177 # possible parents:
178 for branch_id in parent_cvs_rev.branch_ids:
179 parent_symbol = cvs_file_items[branch_id].symbol
180 register(parent_symbol)
182 def is_ghost(self):
183 """Return True iff this lod never really existed."""
185 return (
186 not isinstance(self.lod, Trunk)
187 and self.branch_commit_count == 0
188 and not self.branch_blockers
189 and not self.possible_parents
192 def check_valid(self, symbol):
193 """Check whether SYMBOL is a valid conversion of SELF.lod.
195 It is planned to convert SELF.lod as SYMBOL. Verify that SYMBOL
196 is a TypedSymbol and that the information that it contains is
197 consistent with that stored in SELF.lod. (This routine does not
198 do higher-level tests of whether the chosen conversion is actually
199 sensible.) If there are any problems, raise a
200 SymbolPlanException."""
202 if not isinstance(symbol, (Trunk, Branch, Tag, ExcludedSymbol)):
203 raise IndeterminateSymbolException(self, symbol)
205 if symbol.id != self.lod.id:
206 raise SymbolPlanException(self, symbol, 'IDs must match')
208 if symbol.project != self.lod.project:
209 raise SymbolPlanException(self, symbol, 'Projects must match')
211 if isinstance(symbol, IncludedSymbol) and symbol.name != self.lod.name:
212 raise SymbolPlanException(self, symbol, 'Names must match')
214 def check_preferred_parent_allowed(self, symbol):
215 """Check that SYMBOL's preferred_parent_id is an allowed parent.
217 SYMBOL is the planned conversion of SELF.lod. Verify that its
218 preferred_parent_id is a possible parent of SELF.lod. If not,
219 raise a SymbolPlanException describing the problem."""
221 if isinstance(symbol, IncludedSymbol) \
222 and symbol.preferred_parent_id is not None:
223 for pp in self.possible_parents.keys():
224 if pp.id == symbol.preferred_parent_id:
225 return
226 else:
227 raise SymbolPlanException(
228 self, symbol,
229 'The selected parent is not among the symbol\'s '
230 'possible parents.'
233 def __str__(self):
234 return (
235 '\'%s\' is '
236 'a tag in %d files, '
237 'a branch in %d files, '
238 'a trivial import in %d files, '
239 'a pure import in %d files, '
240 'and has commits in %d files'
241 % (self.lod, self.tag_create_count, self.branch_create_count,
242 self.trivial_import_count, self.pure_ntdb_count,
243 self.branch_commit_count)
246 def __repr__(self):
247 retval = ['%s\n possible parents:\n' % (self,)]
248 parent_counts = self.possible_parents.items()
249 parent_counts.sort(lambda a,b: - cmp(a[1], b[1]))
250 for (symbol, count) in parent_counts:
251 if isinstance(symbol, Trunk):
252 retval.append(' trunk : %d\n' % count)
253 else:
254 retval.append(' \'%s\' : %d\n' % (symbol.name, count))
255 if self.branch_blockers:
256 blockers = list(self.branch_blockers)
257 blockers.sort()
258 retval.append(' blockers:\n')
259 for blocker in blockers:
260 retval.append(' \'%s\'\n' % (blocker,))
261 return ''.join(retval)
264 class SymbolStatisticsCollector:
265 """Collect statistics about lines of development.
267 Record a summary of information about each line of development in
268 the RCS files for later storage into a database. The database is
269 created in CollectRevsPass and it is used in CollateSymbolsPass (via
270 the SymbolStatistics class).
272 collect_data._SymbolDataCollector inserts information into instances
273 of this class by by calling its register_*() methods.
275 Its main purpose is to assist in the decisions about which symbols
276 can be treated as branches and tags and which may be excluded.
278 The data collected by this class can be written to the file
279 config.SYMBOL_STATISTICS."""
281 def __init__(self):
282 # A map { lod -> _Stats } for all lines of development:
283 self._stats = { }
285 def __getitem__(self, lod):
286 """Return the _Stats record for line of development LOD.
288 Create and register a new one if necessary."""
290 try:
291 return self._stats[lod]
292 except KeyError:
293 stats = _Stats(lod)
294 self._stats[lod] = stats
295 return stats
297 def register(self, cvs_file_items):
298 """Register the statistics for each symbol in CVS_FILE_ITEMS."""
300 for lod_items in cvs_file_items.iter_lods():
301 if lod_items.lod is not None:
302 branch_stats = self[lod_items.lod]
304 branch_stats.register_branch_creation()
306 if lod_items.cvs_revisions:
307 branch_stats.register_branch_commit()
309 if lod_items.is_trivial_import():
310 branch_stats.register_trivial_import()
312 if lod_items.is_pure_ntdb():
313 branch_stats.register_pure_ntdb()
315 for cvs_symbol in lod_items.iter_blockers():
316 branch_stats.register_branch_blocker(cvs_symbol.symbol)
318 if lod_items.cvs_branch is not None:
319 branch_stats.register_branch_possible_parents(
320 lod_items.cvs_branch, cvs_file_items
323 for cvs_tag in lod_items.cvs_tags:
324 tag_stats = self[cvs_tag.symbol]
326 tag_stats.register_tag_creation()
328 tag_stats.register_tag_possible_parents(cvs_tag, cvs_file_items)
330 def purge_ghost_symbols(self):
331 """Purge any symbols that don't have any activity.
333 Such ghost symbols can arise if a symbol was defined in an RCS
334 file but pointed at a non-existent revision."""
336 for stats in self._stats.values():
337 if stats.is_ghost():
338 logger.warn('Deleting ghost symbol: %s' % (stats.lod,))
339 del self._stats[stats.lod]
341 def close(self):
342 """Store the stats database to the SYMBOL_STATISTICS file."""
344 f = open(artifact_manager.get_temp_file(config.SYMBOL_STATISTICS), 'wb')
345 cPickle.dump(self._stats.values(), f, -1)
346 f.close()
347 self._stats = None
350 class SymbolStatistics:
351 """Read and handle line of development statistics.
353 The statistics are read from a database created by
354 SymbolStatisticsCollector. This class has methods to process the
355 statistics information and help with decisions about:
357 1. What tags and branches should be processed/excluded
359 2. What tags should be forced to be branches and vice versa (this
360 class maintains some statistics to help the user decide)
362 3. Are there inconsistencies?
364 - A symbol that is sometimes a branch and sometimes a tag
366 - A forced branch with commit(s) on it
368 - A non-excluded branch depends on an excluded branch
370 The data in this class is read from a pickle file."""
372 def __init__(self, filename):
373 """Read the stats database from FILENAME."""
375 # A map { LineOfDevelopment -> _Stats } for all lines of
376 # development:
377 self._stats = { }
379 # A map { LineOfDevelopment.id -> _Stats } for all lines of
380 # development:
381 self._stats_by_id = { }
383 stats_list = cPickle.load(open(filename, 'rb'))
385 for stats in stats_list:
386 self._stats[stats.lod] = stats
387 self._stats_by_id[stats.lod.id] = stats
389 def __len__(self):
390 return len(self._stats)
392 def __getitem__(self, lod_id):
393 return self._stats_by_id[lod_id]
395 def get_stats(self, lod):
396 """Return the _Stats object for LineOfDevelopment instance LOD.
398 Raise KeyError if no such lod exists."""
400 return self._stats[lod]
402 def __iter__(self):
403 return self._stats.itervalues()
405 def _check_blocked_excludes(self, symbol_map):
406 """Check for any excluded LODs that are blocked by non-excluded symbols.
408 If any are found, describe the problem to logger.error() and raise
409 a FatalException."""
411 # A list of (lod,[blocker,...]) tuples for excludes that are
412 # blocked by the specified non-excluded blockers:
413 problems = []
415 for lod in symbol_map.itervalues():
416 if isinstance(lod, ExcludedSymbol):
417 # Symbol is excluded; make sure that its blockers are also
418 # excluded:
419 lod_blockers = []
420 for blocker in self.get_stats(lod).branch_blockers:
421 if isinstance(symbol_map.get(blocker, None), IncludedSymbol):
422 lod_blockers.append(blocker)
423 if lod_blockers:
424 problems.append((lod, lod_blockers))
426 if problems:
427 s = []
428 for (lod, lod_blockers) in problems:
429 s.append(
430 '%s: %s cannot be excluded because the following symbols '
431 'depend on it:\n'
432 % (error_prefix, lod,)
434 for blocker in lod_blockers:
435 s.append(' %s\n' % (blocker,))
436 s.append('\n')
437 logger.error(''.join(s))
439 raise FatalException()
441 def _check_invalid_tags(self, symbol_map):
442 """Check for commits on any symbols that are to be converted as tags.
444 SYMBOL_MAP is a map {AbstractSymbol : (Trunk|TypedSymbol)}
445 indicating how each AbstractSymbol is to be converted. If there
446 is a commit on a symbol, then it cannot be converted as a tag. If
447 any tags with commits are found, output error messages describing
448 the problems then raise a FatalException."""
450 logger.quiet("Checking for forced tags with commits...")
452 invalid_tags = [ ]
453 for symbol in symbol_map.itervalues():
454 if isinstance(symbol, Tag):
455 stats = self.get_stats(symbol)
456 if stats.branch_commit_count > 0:
457 invalid_tags.append(symbol)
459 if not invalid_tags:
460 # No problems found:
461 return
463 s = []
464 s.append(
465 '%s: The following branches cannot be forced to be tags '
466 'because they have commits:\n'
467 % (error_prefix,)
469 for tag in invalid_tags:
470 s.append(' %s\n' % (tag.name))
471 s.append('\n')
472 logger.error(''.join(s))
474 raise FatalException()
476 def check_consistency(self, symbol_map):
477 """Check the plan for how to convert symbols for consistency.
479 SYMBOL_MAP is a map {AbstractSymbol : (Trunk|TypedSymbol)}
480 indicating how each AbstractSymbol is to be converted. If any
481 problems are detected, describe the problem to logger.error() and
482 raise a FatalException."""
484 # We want to do all of the consistency checks even if one of them
485 # fails, so that the user gets as much feedback as possible. Set
486 # this variable to True if any errors are found.
487 error_found = False
489 # Check that the planned preferred parents are OK for all
490 # IncludedSymbols:
491 for lod in symbol_map.itervalues():
492 if isinstance(lod, IncludedSymbol):
493 stats = self.get_stats(lod)
494 try:
495 stats.check_preferred_parent_allowed(lod)
496 except SymbolPlanException, e:
497 logger.error('%s\n' % (e,))
498 error_found = True
500 try:
501 self._check_blocked_excludes(symbol_map)
502 except FatalException:
503 error_found = True
505 try:
506 self._check_invalid_tags(symbol_map)
507 except FatalException:
508 error_found = True
510 if error_found:
511 raise FatalException(
512 'Please fix the above errors and restart CollateSymbolsPass'
515 def exclude_symbol(self, symbol):
516 """SYMBOL has been excluded; remove it from our statistics."""
518 del self._stats[symbol]
519 del self._stats_by_id[symbol.id]
521 # Remove references to this symbol from other statistics objects:
522 for stats in self._stats.itervalues():
523 stats.branch_blockers.discard(symbol)
524 if symbol in stats.possible_parents:
525 del stats.possible_parents[symbol]