cvs2git: Make the --blobfile argument optional.
[cvs2svn.git] / cvs2svn_lib / symbol_strategy.py
blobbafdeb24799cacf5a4000a2fdca1b1f5daa822c5
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2008 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """SymbolStrategy classes determine how to convert symbols."""
19 import re
21 from cvs2svn_lib.common import FatalError
22 from cvs2svn_lib.common import path_join
23 from cvs2svn_lib.common import normalize_svn_path
24 from cvs2svn_lib.log import logger
25 from cvs2svn_lib.symbol import Trunk
26 from cvs2svn_lib.symbol import TypedSymbol
27 from cvs2svn_lib.symbol import Branch
28 from cvs2svn_lib.symbol import Tag
29 from cvs2svn_lib.symbol import ExcludedSymbol
30 from cvs2svn_lib.symbol_statistics import SymbolPlanError
33 class StrategyRule:
34 """A single rule that might determine how to convert a symbol."""
36 def start(self, symbol_statistics):
37 """This method is called once before get_symbol() is ever called.
39 The StrategyRule can override this method to do whatever it wants
40 to prepare itself for work. SYMBOL_STATISTICS is an instance of
41 SymbolStatistics containing the statistics for all symbols in all
42 projects."""
44 pass
46 def get_symbol(self, symbol, stats):
47 """Return an object describing what to do with the symbol in STATS.
49 SYMBOL holds a Trunk or Symbol object as it has been determined so
50 far. Hopefully one of these method calls will turn any naked
51 Symbol instances into TypedSymbols.
53 If this rule applies to the SYMBOL (whose statistics are collected
54 in STATS), then return a new or modified AbstractSymbol object.
55 If this rule doesn't apply, return SYMBOL unchanged."""
57 raise NotImplementedError()
59 def finish(self):
60 """This method is called once after get_symbol() is done being called.
62 The StrategyRule can override this method do whatever it wants to
63 release resources, etc."""
65 pass
68 class _RegexpStrategyRule(StrategyRule):
69 """A Strategy rule that bases its decisions on regexp matches.
71 If self.regexp matches a symbol name, return self.action(symbol);
72 otherwise, return the symbol unchanged."""
74 def __init__(self, pattern, action):
75 """Initialize a _RegexpStrategyRule.
77 PATTERN is a string that will be treated as a regexp pattern.
78 PATTERN must match a full symbol name for the rule to apply (i.e.,
79 it is anchored at the beginning and end of the symbol name).
81 ACTION is the class representing how the symbol should be
82 converted. It should be one of the classes Branch, Tag, or
83 ExcludedSymbol.
85 If PATTERN matches a symbol name, then get_symbol() returns
86 ACTION(name, id); otherwise it returns SYMBOL unchanged."""
88 try:
89 self.regexp = re.compile('^(?:' + pattern + ')$')
90 except re.error:
91 raise FatalError("%r is not a valid regexp." % (pattern,))
93 self.action = action
95 def log(self, symbol):
96 raise NotImplementedError()
98 def get_symbol(self, symbol, stats):
99 if isinstance(symbol, (Trunk, TypedSymbol)):
100 return symbol
101 elif self.regexp.match(symbol.name):
102 self.log(symbol)
103 return self.action(symbol)
104 else:
105 return symbol
108 class ForceBranchRegexpStrategyRule(_RegexpStrategyRule):
109 """Force symbols matching pattern to be branches."""
111 def __init__(self, pattern):
112 _RegexpStrategyRule.__init__(self, pattern, Branch)
114 def log(self, symbol):
115 logger.verbose(
116 'Converting symbol %s as a branch because it matches regexp "%s".'
117 % (symbol, self.regexp.pattern,)
121 class ForceTagRegexpStrategyRule(_RegexpStrategyRule):
122 """Force symbols matching pattern to be tags."""
124 def __init__(self, pattern):
125 _RegexpStrategyRule.__init__(self, pattern, Tag)
127 def log(self, symbol):
128 logger.verbose(
129 'Converting symbol %s as a tag because it matches regexp "%s".'
130 % (symbol, self.regexp.pattern,)
134 class ExcludeRegexpStrategyRule(_RegexpStrategyRule):
135 """Exclude symbols matching pattern."""
137 def __init__(self, pattern):
138 _RegexpStrategyRule.__init__(self, pattern, ExcludedSymbol)
140 def log(self, symbol):
141 logger.verbose(
142 'Excluding symbol %s because it matches regexp "%s".'
143 % (symbol, self.regexp.pattern,)
147 class ExcludeTrivialImportBranchRule(StrategyRule):
148 """If a symbol is a trivial import branch, exclude it.
150 A trivial import branch is defined to be a branch that only had a
151 single import on it (no other kinds of commits) in every file in
152 which it appeared. In most cases these branches are worthless."""
154 def get_symbol(self, symbol, stats):
155 if isinstance(symbol, (Trunk, TypedSymbol)):
156 return symbol
157 if stats.tag_create_count == 0 \
158 and stats.branch_create_count == stats.trivial_import_count:
159 logger.verbose(
160 'Excluding branch %s because it is a trivial import branch.'
161 % (symbol,)
163 return ExcludedSymbol(symbol)
164 else:
165 return symbol
168 class ExcludeVendorBranchRule(StrategyRule):
169 """If a symbol is a pure vendor branch, exclude it.
171 A pure vendor branch is defined to be a branch that only had imports
172 on it (no other kinds of commits) in every file in which it
173 appeared."""
175 def get_symbol(self, symbol, stats):
176 if isinstance(symbol, (Trunk, TypedSymbol)):
177 return symbol
178 if stats.tag_create_count == 0 \
179 and stats.branch_create_count == stats.pure_ntdb_count:
180 logger.verbose(
181 'Excluding branch %s because it is a pure vendor branch.'
182 % (symbol,)
184 return ExcludedSymbol(symbol)
185 else:
186 return symbol
189 class UnambiguousUsageRule(StrategyRule):
190 """If a symbol is used unambiguously as a tag/branch, convert it as such."""
192 def get_symbol(self, symbol, stats):
193 if isinstance(symbol, (Trunk, TypedSymbol)):
194 return symbol
195 is_tag = stats.tag_create_count > 0
196 is_branch = stats.branch_create_count > 0 or stats.branch_commit_count > 0
197 if is_tag and is_branch:
198 # Can't decide
199 return symbol
200 elif is_branch:
201 logger.verbose(
202 'Converting symbol %s as a branch because it is always used '
203 'as a branch.'
204 % (symbol,)
206 return Branch(symbol)
207 elif is_tag:
208 logger.verbose(
209 'Converting symbol %s as a tag because it is always used '
210 'as a tag.'
211 % (symbol,)
213 return Tag(symbol)
214 else:
215 # The symbol didn't appear at all:
216 return symbol
219 class BranchIfCommitsRule(StrategyRule):
220 """If there was ever a commit on the symbol, convert it as a branch."""
222 def get_symbol(self, symbol, stats):
223 if isinstance(symbol, (Trunk, TypedSymbol)):
224 return symbol
225 elif stats.branch_commit_count > 0:
226 logger.verbose(
227 'Converting symbol %s as a branch because there are commits on it.'
228 % (symbol,)
230 return Branch(symbol)
231 else:
232 return symbol
235 class HeuristicStrategyRule(StrategyRule):
236 """Convert symbol based on how often it was used as a branch/tag.
238 Whichever happened more often determines how the symbol is
239 converted."""
241 def get_symbol(self, symbol, stats):
242 if isinstance(symbol, (Trunk, TypedSymbol)):
243 return symbol
244 elif stats.tag_create_count >= stats.branch_create_count:
245 logger.verbose(
246 'Converting symbol %s as a tag because it is more often used '
247 'as a tag.'
248 % (symbol,)
250 return Tag(symbol)
251 else:
252 logger.verbose(
253 'Converting symbol %s as a branch because it is more often used '
254 'as a branch.'
255 % (symbol,)
257 return Branch(symbol)
260 class _CatchAllRule(StrategyRule):
261 """Base class for catch-all rules.
263 Usually this rule will appear after a list of more careful rules
264 (including a general rule like UnambiguousUsageRule) and will
265 therefore only apply to the symbols not handled earlier."""
267 def __init__(self, action):
268 self._action = action
270 def log(self, symbol):
271 raise NotImplementedError()
273 def get_symbol(self, symbol, stats):
274 if isinstance(symbol, (Trunk, TypedSymbol)):
275 return symbol
276 else:
277 self.log(symbol)
278 return self._action(symbol)
281 class AllBranchRule(_CatchAllRule):
282 """Convert all symbols as branches.
284 Usually this rule will appear after a list of more careful rules
285 (including a general rule like UnambiguousUsageRule) and will
286 therefore only apply to the symbols not handled earlier."""
288 def __init__(self):
289 _CatchAllRule.__init__(self, Branch)
291 def log(self, symbol):
292 logger.verbose(
293 'Converting symbol %s as a branch because no other rules applied.'
294 % (symbol,)
298 class AllTagRule(_CatchAllRule):
299 """Convert all symbols as tags.
301 We don't worry about conflicts here; they will be caught later by
302 SymbolStatistics.check_consistency().
304 Usually this rule will appear after a list of more careful rules
305 (including a general rule like UnambiguousUsageRule) and will
306 therefore only apply to the symbols not handled earlier."""
308 def __init__(self):
309 _CatchAllRule.__init__(self, Tag)
311 def log(self, symbol):
312 logger.verbose(
313 'Converting symbol %s as a tag because no other rules applied.'
314 % (symbol,)
318 class AllExcludedRule(_CatchAllRule):
319 """Exclude all symbols.
321 Usually this rule will appear after a list of more careful rules
322 (including a SymbolHintsFileRule or several ManualSymbolRules)
323 and will therefore only apply to the symbols not handled earlier."""
325 def __init__(self):
326 _CatchAllRule.__init__(self, ExcludedSymbol)
328 def log(self, symbol):
329 logger.verbose(
330 'Excluding symbol %s by catch-all rule.' % (symbol,)
334 class TrunkPathRule(StrategyRule):
335 """Set the base path for Trunk."""
337 def __init__(self, trunk_path):
338 self.trunk_path = trunk_path
340 def get_symbol(self, symbol, stats):
341 if isinstance(symbol, Trunk) and symbol.base_path is None:
342 symbol.base_path = self.trunk_path
344 return symbol
347 class SymbolPathRule(StrategyRule):
348 """Set the base paths for symbol LODs."""
350 def __init__(self, symbol_type, base_path):
351 self.symbol_type = symbol_type
352 self.base_path = base_path
354 def get_symbol(self, symbol, stats):
355 if isinstance(symbol, self.symbol_type) and symbol.base_path is None:
356 symbol.base_path = path_join(self.base_path, symbol.name)
358 return symbol
361 class BranchesPathRule(SymbolPathRule):
362 """Set the base paths for Branch LODs."""
364 def __init__(self, branch_path):
365 SymbolPathRule.__init__(self, Branch, branch_path)
368 class TagsPathRule(SymbolPathRule):
369 """Set the base paths for Tag LODs."""
371 def __init__(self, tag_path):
372 SymbolPathRule.__init__(self, Tag, tag_path)
375 class HeuristicPreferredParentRule(StrategyRule):
376 """Use a heuristic rule to pick preferred parents.
378 Pick the parent that should be preferred for any TypedSymbols. As
379 parent, use the symbol that appeared most often as a possible parent
380 of the symbol in question. If multiple symbols are tied, choose the
381 one that comes first according to the Symbol class's natural sort
382 order."""
384 def _get_preferred_parent(self, stats):
385 """Return the LODs that are most often possible parents in STATS.
387 Return the set of LinesOfDevelopment that appeared most often as
388 possible parents. The return value might contain multiple symbols
389 if multiple LinesOfDevelopment appeared the same number of times."""
391 best_count = -1
392 best_symbol = None
393 for (symbol, count) in stats.possible_parents.items():
394 if count > best_count or (count == best_count and symbol < best_symbol):
395 best_count = count
396 best_symbol = symbol
398 if best_symbol is None:
399 return None
400 else:
401 return best_symbol
403 def get_symbol(self, symbol, stats):
404 if isinstance(symbol, TypedSymbol) and symbol.preferred_parent_id is None:
405 preferred_parent = self._get_preferred_parent(stats)
406 if preferred_parent is None:
407 logger.verbose('%s has no preferred parent' % (symbol,))
408 else:
409 symbol.preferred_parent_id = preferred_parent.id
410 logger.verbose(
411 'The preferred parent of %s is %s' % (symbol, preferred_parent,)
414 return symbol
417 class ManualTrunkRule(StrategyRule):
418 """Change the SVN path of Trunk LODs.
420 Members:
422 project_id -- (int or None) The id of the project whose trunk
423 should be affected by this rule. If project_id is None, then
424 the rule is not project-specific.
426 svn_path -- (str) The SVN path that should be used as the base
427 directory for this trunk. This member must not be None,
428 though it may be the empty string for a single-project,
429 trunk-only conversion.
433 def __init__(self, project_id, svn_path):
434 self.project_id = project_id
435 self.svn_path = normalize_svn_path(svn_path, allow_empty=True)
437 def get_symbol(self, symbol, stats):
438 if (self.project_id is not None
439 and self.project_id != stats.lod.project.id):
440 return symbol
442 if isinstance(symbol, Trunk):
443 symbol.base_path = self.svn_path
445 return symbol
448 def convert_as_branch(symbol):
449 logger.verbose(
450 'Converting symbol %s as a branch because of manual setting.'
451 % (symbol,)
453 return Branch(symbol)
456 def convert_as_tag(symbol):
457 logger.verbose(
458 'Converting symbol %s as a tag because of manual setting.'
459 % (symbol,)
461 return Tag(symbol)
464 def exclude(symbol):
465 logger.verbose(
466 'Excluding symbol %s because of manual setting.'
467 % (symbol,)
469 return ExcludedSymbol(symbol)
472 class ManualSymbolRule(StrategyRule):
473 """Change how particular symbols are converted.
475 Members:
477 project_id -- (int or None) The id of the project whose trunk
478 should be affected by this rule. If project_id is None, then
479 the rule is not project-specific.
481 symbol_name -- (str) The name of the symbol that should be
482 affected by this rule.
484 conversion -- (callable or None) A callable that converts the
485 symbol to its preferred output type. This should normally be
486 one of (convert_as_branch, convert_as_tag, exclude). If this
487 member is None, then this rule does not affect the symbol's
488 output type.
490 svn_path -- (str) The SVN path that should be used as the base
491 directory for this trunk. This member must not be None,
492 though it may be the empty string for a single-project,
493 trunk-only conversion.
495 parent_lod_name -- (str or None) The name of the line of
496 development that should be preferred as the parent of this
497 symbol. (The preferred parent is the line of development from
498 which the symbol should sprout.) If this member is set to the
499 string '.trunk.', then the symbol will be set to sprout
500 directly from trunk. If this member is set to None, then this
501 rule won't affect the symbol's parent.
505 def __init__(
506 self, project_id, symbol_name, conversion, svn_path, parent_lod_name
508 self.project_id = project_id
509 self.symbol_name = symbol_name
510 self.conversion = conversion
511 if svn_path is None:
512 self.svn_path = None
513 else:
514 self.svn_path = normalize_svn_path(svn_path, allow_empty=True)
515 self.parent_lod_name = parent_lod_name
517 def _get_parent_by_id(self, parent_lod_name, stats):
518 """Return the LOD object for the parent with name PARENT_LOD_NAME.
520 STATS is the _Stats object describing a symbol whose parent needs
521 to be determined from its name. If none of its possible parents
522 has name PARENT_LOD_NAME, raise a SymbolPlanError."""
524 for pp in stats.possible_parents.keys():
525 if isinstance(pp, Trunk):
526 pass
527 elif pp.name == parent_lod_name:
528 return pp
529 else:
530 parent_counts = stats.possible_parents.items()
531 parent_counts.sort(lambda a,b: - cmp(a[1], b[1]))
532 lines = [
533 '%s is not a valid parent for %s;'
534 % (parent_lod_name, stats.lod,),
535 ' possible parents (with counts):'
537 for (symbol, count) in parent_counts:
538 if isinstance(symbol, Trunk):
539 lines.append(' .trunk. : %d' % count)
540 else:
541 lines.append(' %s : %d' % (symbol.name, count))
542 raise SymbolPlanError('\n'.join(lines))
544 def get_symbol(self, symbol, stats):
545 if (self.project_id is not None
546 and self.project_id != stats.lod.project.id):
547 return symbol
549 elif isinstance(symbol, Trunk):
550 return symbol
552 elif self.symbol_name == stats.lod.name:
553 if self.conversion is not None:
554 symbol = self.conversion(symbol)
556 if self.parent_lod_name is None:
557 pass
558 elif self.parent_lod_name == '.trunk.':
559 symbol.preferred_parent_id = stats.lod.project.trunk_id
560 else:
561 symbol.preferred_parent_id = self._get_parent_by_id(
562 self.parent_lod_name, stats
563 ).id
565 if self.svn_path is not None:
566 symbol.base_path = self.svn_path
568 return symbol
571 class SymbolHintsFileRule(StrategyRule):
572 """Use manual symbol configurations read from a file.
574 The input file is line-oriented with the following format:
576 <project-id> <symbol-name> <conversion> [<svn-path> [<parent-lod-name>]]
578 Where the fields are separated by whitespace and
580 project-id -- the numerical id of the Project to which the
581 symbol belongs (numbered starting with 0). This field can
582 be '.' if the rule is not project-specific.
584 symbol-name -- the name of the symbol being specified, or
585 '.trunk.' if the rule should apply to trunk.
587 conversion -- how the symbol should be treated in the
588 conversion. This is one of the following values: 'branch',
589 'tag', or 'exclude'. This field can be '.' if the rule
590 shouldn't affect how the symbol is treated in the
591 conversion.
593 svn-path -- the SVN path that should serve as the root path of
594 this LOD. The path should be expressed as a path relative
595 to the SVN root directory, with or without a leading '/'.
596 This field can be omitted or '.' if the rule shouldn't
597 affect the LOD's SVN path.
599 parent-lod-name -- the name of the LOD that should serve as this
600 symbol's parent. This field can be omitted or '.' if the
601 rule shouldn't affect the symbol's parent, or it can be
602 '.trunk.' to indicate that the symbol should sprout from the
603 project's trunk."""
605 comment_re = re.compile(r'^(\#|$)')
607 conversion_map = {
608 'branch' : convert_as_branch,
609 'tag' : convert_as_tag,
610 'exclude' : exclude,
611 '.' : None,
614 def __init__(self, filename):
615 self.filename = filename
617 def start(self, symbol_statistics):
618 self._rules = []
620 f = open(self.filename, 'r')
621 try:
622 for l in f:
623 l = l.rstrip()
624 s = l.lstrip()
625 if self.comment_re.match(s):
626 continue
627 fields = s.split()
629 if len(fields) < 3:
630 raise FatalError(
631 'The following line in "%s" cannot be parsed:\n "%s"'
632 % (self.filename, l,)
635 project_id = fields.pop(0)
636 symbol_name = fields.pop(0)
637 conversion = fields.pop(0)
639 if fields:
640 svn_path = fields.pop(0)
641 if svn_path == '.':
642 svn_path = None
643 elif svn_path[0] == '/':
644 svn_path = svn_path[1:]
645 else:
646 svn_path = None
648 if fields:
649 parent_lod_name = fields.pop(0)
650 else:
651 parent_lod_name = '.'
653 if fields:
654 raise FatalError(
655 'The following line in "%s" cannot be parsed:\n "%s"'
656 % (self.filename, l,)
659 if project_id == '.':
660 project_id = None
661 else:
662 try:
663 project_id = int(project_id)
664 except ValueError:
665 raise FatalError(
666 'Illegal project_id in the following line:\n "%s"' % (l,)
669 if symbol_name == '.trunk.':
670 if conversion not in ['.', 'trunk']:
671 raise FatalError('Trunk cannot be converted as a different type')
673 if parent_lod_name != '.':
674 raise FatalError('Trunk\'s parent cannot be set')
676 if svn_path is None:
677 # This rule doesn't do anything:
678 pass
679 else:
680 self._rules.append(ManualTrunkRule(project_id, svn_path))
682 else:
683 try:
684 conversion = self.conversion_map[conversion]
685 except KeyError:
686 raise FatalError(
687 'Illegal conversion in the following line:\n "%s"' % (l,)
690 if parent_lod_name == '.':
691 parent_lod_name = None
693 if conversion is None \
694 and svn_path is None \
695 and parent_lod_name is None:
696 # There is nothing to be done:
697 pass
698 else:
699 self._rules.append(
700 ManualSymbolRule(
701 project_id, symbol_name,
702 conversion, svn_path, parent_lod_name
705 finally:
706 f.close()
708 for rule in self._rules:
709 rule.start(symbol_statistics)
711 def get_symbol(self, symbol, stats):
712 for rule in self._rules:
713 symbol = rule.get_symbol(symbol, stats)
715 return symbol
717 def finish(self):
718 for rule in self._rules:
719 rule.finish()
721 del self._rules