Add a way to specify the MimeMapper mappings to its constructor directly.
[cvs2svn.git] / cvs2svn_lib / symbol_strategy.py
blob9d562a82402fe6ee9e0e80d474256b8170fe38cf
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2008 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """SymbolStrategy classes determine how to convert symbols."""
19 import re
21 from cvs2svn_lib.common import FatalError
22 from cvs2svn_lib.common import path_join
23 from cvs2svn_lib.common import normalize_svn_path
24 from cvs2svn_lib.log import Log
25 from cvs2svn_lib.symbol import Trunk
26 from cvs2svn_lib.symbol import TypedSymbol
27 from cvs2svn_lib.symbol import Branch
28 from cvs2svn_lib.symbol import Tag
29 from cvs2svn_lib.symbol import ExcludedSymbol
30 from cvs2svn_lib.symbol_statistics import SymbolPlanError
33 class StrategyRule:
34 """A single rule that might determine how to convert a symbol."""
36 def start(self, symbol_statistics):
37 """This method is called once before get_symbol() is ever called.
39 The StrategyRule can override this method to do whatever it wants
40 to prepare itself for work. SYMBOL_STATISTICS is an instance of
41 SymbolStatistics containing the statistics for all symbols in all
42 projects."""
44 pass
46 def get_symbol(self, symbol, stats):
47 """Return an object describing what to do with the symbol in STATS.
49 SYMBOL holds a Trunk or Symbol object as it has been determined so
50 far. Hopefully one of these method calls will turn any naked
51 Symbol instances into TypedSymbols.
53 If this rule applies to the SYMBOL (whose statistics are collected
54 in STATS), then return a new or modified AbstractSymbol object.
55 If this rule doesn't apply, return SYMBOL unchanged."""
57 raise NotImplementedError()
59 def finish(self):
60 """This method is called once after get_symbol() is done being called.
62 The StrategyRule can override this method do whatever it wants to
63 release resources, etc."""
65 pass
68 class _RegexpStrategyRule(StrategyRule):
69 """A Strategy rule that bases its decisions on regexp matches.
71 If self.regexp matches a symbol name, return self.action(symbol);
72 otherwise, return the symbol unchanged."""
74 def __init__(self, pattern, action):
75 """Initialize a _RegexpStrategyRule.
77 PATTERN is a string that will be treated as a regexp pattern.
78 PATTERN must match a full symbol name for the rule to apply (i.e.,
79 it is anchored at the beginning and end of the symbol name).
81 ACTION is the class representing how the symbol should be
82 converted. It should be one of the classes Branch, Tag, or
83 ExcludedSymbol.
85 If PATTERN matches a symbol name, then get_symbol() returns
86 ACTION(name, id); otherwise it returns SYMBOL unchanged."""
88 try:
89 self.regexp = re.compile('^' + pattern + '$')
90 except re.error:
91 raise FatalError("%r is not a valid regexp." % (pattern,))
93 self.action = action
95 def log(self, symbol):
96 raise NotImplementedError()
98 def get_symbol(self, symbol, stats):
99 if isinstance(symbol, (Trunk, TypedSymbol)):
100 return symbol
101 elif self.regexp.match(symbol.name):
102 self.log(symbol)
103 return self.action(symbol)
104 else:
105 return symbol
108 class ForceBranchRegexpStrategyRule(_RegexpStrategyRule):
109 """Force symbols matching pattern to be branches."""
111 def __init__(self, pattern):
112 _RegexpStrategyRule.__init__(self, pattern, Branch)
114 def log(self, symbol):
115 Log().verbose(
116 'Converting symbol %s as a branch because it matches regexp "%s".'
117 % (symbol, self.regexp.pattern,)
121 class ForceTagRegexpStrategyRule(_RegexpStrategyRule):
122 """Force symbols matching pattern to be tags."""
124 def __init__(self, pattern):
125 _RegexpStrategyRule.__init__(self, pattern, Tag)
127 def log(self, symbol):
128 Log().verbose(
129 'Converting symbol %s as a tag because it matches regexp "%s".'
130 % (symbol, self.regexp.pattern,)
134 class ExcludeRegexpStrategyRule(_RegexpStrategyRule):
135 """Exclude symbols matching pattern."""
137 def __init__(self, pattern):
138 _RegexpStrategyRule.__init__(self, pattern, ExcludedSymbol)
140 def log(self, symbol):
141 Log().verbose(
142 'Excluding symbol %s because it matches regexp "%s".'
143 % (symbol, self.regexp.pattern,)
147 class ExcludeTrivialImportBranchRule(StrategyRule):
148 """If a symbol is a trivial import branch, exclude it.
150 A trivial import branch is defined to be a branch that only had a
151 single import on it (no other kinds of commits) in every file in
152 which it appeared. In most cases these branches are worthless."""
154 def get_symbol(self, symbol, stats):
155 if isinstance(symbol, (Trunk, TypedSymbol)):
156 return symbol
157 if stats.tag_create_count == 0 \
158 and stats.branch_create_count == stats.trivial_import_count:
159 Log().verbose(
160 'Excluding branch %s because it is a trivial import branch.'
161 % (symbol,)
163 return ExcludedSymbol(symbol)
164 else:
165 return symbol
168 class ExcludeVendorBranchRule(StrategyRule):
169 """If a symbol is a pure vendor branch, exclude it.
171 A pure vendor branch is defined to be a branch that only had imports
172 on it (no other kinds of commits) in every file in which it
173 appeared."""
175 def get_symbol(self, symbol, stats):
176 if isinstance(symbol, (Trunk, TypedSymbol)):
177 return symbol
178 if stats.tag_create_count == 0 \
179 and stats.branch_create_count == stats.pure_ntdb_count:
180 Log().verbose(
181 'Excluding branch %s because it is a pure vendor branch.'
182 % (symbol,)
184 return ExcludedSymbol(symbol)
185 else:
186 return symbol
189 class UnambiguousUsageRule(StrategyRule):
190 """If a symbol is used unambiguously as a tag/branch, convert it as such."""
192 def get_symbol(self, symbol, stats):
193 if isinstance(symbol, (Trunk, TypedSymbol)):
194 return symbol
195 is_tag = stats.tag_create_count > 0
196 is_branch = stats.branch_create_count > 0 or stats.branch_commit_count > 0
197 if is_tag and is_branch:
198 # Can't decide
199 return symbol
200 elif is_branch:
201 Log().verbose(
202 'Converting symbol %s as a branch because it is always used '
203 'as a branch.'
204 % (symbol,)
206 return Branch(symbol)
207 elif is_tag:
208 Log().verbose(
209 'Converting symbol %s as a tag because it is always used '
210 'as a tag.'
211 % (symbol,)
213 return Tag(symbol)
214 else:
215 # The symbol didn't appear at all:
216 return symbol
219 class BranchIfCommitsRule(StrategyRule):
220 """If there was ever a commit on the symbol, convert it as a branch."""
222 def get_symbol(self, symbol, stats):
223 if isinstance(symbol, (Trunk, TypedSymbol)):
224 return symbol
225 elif stats.branch_commit_count > 0:
226 Log().verbose(
227 'Converting symbol %s as a branch because there are commits on it.'
228 % (symbol,)
230 return Branch(symbol)
231 else:
232 return symbol
235 class HeuristicStrategyRule(StrategyRule):
236 """Convert symbol based on how often it was used as a branch/tag.
238 Whichever happened more often determines how the symbol is
239 converted."""
241 def get_symbol(self, symbol, stats):
242 if isinstance(symbol, (Trunk, TypedSymbol)):
243 return symbol
244 elif stats.tag_create_count >= stats.branch_create_count:
245 Log().verbose(
246 'Converting symbol %s as a tag because it is more often used '
247 'as a tag.'
248 % (symbol,)
250 return Tag(symbol)
251 else:
252 Log().verbose(
253 'Converting symbol %s as a branch because it is more often used '
254 'as a branch.'
255 % (symbol,)
257 return Branch(symbol)
260 class AllBranchRule(StrategyRule):
261 """Convert all symbols as branches.
263 Usually this rule will appear after a list of more careful rules
264 (including a general rule like UnambiguousUsageRule) and will
265 therefore only apply to the symbols not handled earlier."""
267 def get_symbol(self, symbol, stats):
268 if isinstance(symbol, (Trunk, TypedSymbol)):
269 return symbol
270 else:
271 Log().verbose(
272 'Converting symbol %s as a branch because no other rules applied.'
273 % (symbol,)
275 return Branch(symbol)
278 class AllTagRule(StrategyRule):
279 """Convert all symbols as tags.
281 We don't worry about conflicts here; they will be caught later by
282 SymbolStatistics.check_consistency().
284 Usually this rule will appear after a list of more careful rules
285 (including a general rule like UnambiguousUsageRule) and will
286 therefore only apply to the symbols not handled earlier."""
288 def get_symbol(self, symbol, stats):
289 if isinstance(symbol, (Trunk, TypedSymbol)):
290 return symbol
291 else:
292 Log().verbose(
293 'Converting symbol %s as a tag because no other rules applied.'
294 % (symbol,)
296 return Tag(symbol)
299 class TrunkPathRule(StrategyRule):
300 """Set the base path for Trunk."""
302 def __init__(self, trunk_path):
303 self.trunk_path = trunk_path
305 def get_symbol(self, symbol, stats):
306 if isinstance(symbol, Trunk) and symbol.base_path is None:
307 symbol.base_path = self.trunk_path
309 return symbol
312 class SymbolPathRule(StrategyRule):
313 """Set the base paths for symbol LODs."""
315 def __init__(self, symbol_type, base_path):
316 self.symbol_type = symbol_type
317 self.base_path = base_path
319 def get_symbol(self, symbol, stats):
320 if isinstance(symbol, self.symbol_type) and symbol.base_path is None:
321 symbol.base_path = path_join(self.base_path, symbol.name)
323 return symbol
326 class BranchesPathRule(SymbolPathRule):
327 """Set the base paths for Branch LODs."""
329 def __init__(self, branch_path):
330 SymbolPathRule.__init__(self, Branch, branch_path)
333 class TagsPathRule(SymbolPathRule):
334 """Set the base paths for Tag LODs."""
336 def __init__(self, tag_path):
337 SymbolPathRule.__init__(self, Tag, tag_path)
340 class HeuristicPreferredParentRule(StrategyRule):
341 """Use a heuristic rule to pick preferred parents.
343 Pick the parent that should be preferred for any TypedSymbols. As
344 parent, use the symbol that appeared most often as a possible parent
345 of the symbol in question. If multiple symbols are tied, choose the
346 one that comes first according to the Symbol class's natural sort
347 order."""
349 def _get_preferred_parent(self, stats):
350 """Return the LODs that are most often possible parents in STATS.
352 Return the set of LinesOfDevelopment that appeared most often as
353 possible parents. The return value might contain multiple symbols
354 if multiple LinesOfDevelopment appeared the same number of times."""
356 best_count = -1
357 best_symbol = None
358 for (symbol, count) in stats.possible_parents.items():
359 if count > best_count or (count == best_count and symbol < best_symbol):
360 best_count = count
361 best_symbol = symbol
363 if best_symbol is None:
364 return None
365 else:
366 return best_symbol
368 def get_symbol(self, symbol, stats):
369 if isinstance(symbol, TypedSymbol) and symbol.preferred_parent_id is None:
370 preferred_parent = self._get_preferred_parent(stats)
371 if preferred_parent is None:
372 Log().verbose('%s has no preferred parent' % (symbol,))
373 else:
374 symbol.preferred_parent_id = preferred_parent.id
375 Log().verbose(
376 'The preferred parent of %s is %s' % (symbol, preferred_parent,)
379 return symbol
382 class ManualTrunkRule(StrategyRule):
383 """Change the SVN path of Trunk LODs.
385 Members:
387 project_id -- (int or None) The id of the project whose trunk
388 should be affected by this rule. If project_id is None, then
389 the rule is not project-specific.
391 svn_path -- (str) The SVN path that should be used as the base
392 directory for this trunk. This member must not be None,
393 though it may be the empty string for a single-project,
394 trunk-only conversion.
398 def __init__(self, project_id, svn_path):
399 self.project_id = project_id
400 self.svn_path = normalize_svn_path(svn_path, allow_empty=True)
402 def get_symbol(self, symbol, stats):
403 if (self.project_id is not None
404 and self.project_id != stats.lod.project.id):
405 return symbol
407 if isinstance(symbol, Trunk):
408 symbol.base_path = self.svn_path
410 return symbol
413 def convert_as_branch(symbol):
414 Log().verbose(
415 'Converting symbol %s as a branch because of manual setting.'
416 % (symbol,)
418 return Branch(symbol)
421 def convert_as_tag(symbol):
422 Log().verbose(
423 'Converting symbol %s as a tag because of manual setting.'
424 % (symbol,)
426 return Tag(symbol)
429 def exclude(symbol):
430 Log().verbose(
431 'Excluding symbol %s because of manual setting.'
432 % (symbol,)
434 return ExcludedSymbol(symbol)
437 class ManualSymbolRule(StrategyRule):
438 """Change how particular symbols are converted.
440 Members:
442 project_id -- (int or None) The id of the project whose trunk
443 should be affected by this rule. If project_id is None, then
444 the rule is not project-specific.
446 symbol_name -- (str) The name of the symbol that should be
447 affected by this rule.
449 conversion -- (callable or None) A callable that converts the
450 symbol to its preferred output type. This should normally be
451 one of (convert_as_branch, convert_as_tag, exclude). If this
452 member is None, then this rule does not affect the symbol's
453 output type.
455 svn_path -- (str) The SVN path that should be used as the base
456 directory for this trunk. This member must not be None,
457 though it may be the empty string for a single-project,
458 trunk-only conversion.
460 parent_lod_name -- (str or None) The name of the line of
461 development that should be preferred as the parent of this
462 symbol. (The preferred parent is the line of development from
463 which the symbol should sprout.) If this member is set to the
464 string '.trunk.', then the symbol will be set to sprout
465 directly from trunk. If this member is set to None, then this
466 rule won't affect the symbol's parent.
470 def __init__(
471 self, project_id, symbol_name, conversion, svn_path, parent_lod_name
473 self.project_id = project_id
474 self.symbol_name = symbol_name
475 self.conversion = conversion
476 if svn_path is None:
477 self.svn_path = None
478 else:
479 self.svn_path = normalize_svn_path(svn_path, allow_empty=True)
480 self.parent_lod_name = parent_lod_name
482 def _get_parent_by_id(self, parent_lod_name, stats):
483 """Return the LOD object for the parent with name PARENT_LOD_NAME.
485 STATS is the _Stats object describing a symbol whose parent needs
486 to be determined from its name. If none of its possible parents
487 has name PARENT_LOD_NAME, raise a SymbolPlanError."""
489 for pp in stats.possible_parents.keys():
490 if isinstance(pp, Trunk):
491 pass
492 elif pp.name == parent_lod_name:
493 return pp
494 else:
495 parent_counts = stats.possible_parents.items()
496 parent_counts.sort(lambda a,b: - cmp(a[1], b[1]))
497 lines = [
498 '%s is not a valid parent for %s;'
499 % (parent_lod_name, stats.lod,),
500 ' possible parents (with counts):'
502 for (symbol, count) in parent_counts:
503 if isinstance(symbol, Trunk):
504 lines.append(' .trunk. : %d' % count)
505 else:
506 lines.append(' %s : %d' % (symbol.name, count))
507 raise SymbolPlanError('\n'.join(lines))
509 def get_symbol(self, symbol, stats):
510 if (self.project_id is not None
511 and self.project_id != stats.lod.project.id):
512 return symbol
514 elif isinstance(symbol, Trunk):
515 return symbol
517 elif self.symbol_name == stats.lod.name:
518 if self.conversion is not None:
519 symbol = self.conversion(symbol)
521 if self.parent_lod_name is None:
522 pass
523 elif self.parent_lod_name == '.trunk.':
524 symbol.preferred_parent_id = stats.lod.project.trunk_id
525 else:
526 symbol.preferred_parent_id = self._get_parent_by_id(
527 self.parent_lod_name, stats
528 ).id
530 if self.svn_path is not None:
531 symbol.base_path = self.svn_path
533 return symbol
536 class SymbolHintsFileRule(StrategyRule):
537 """Use manual symbol configurations read from a file.
539 The input file is line-oriented with the following format:
541 <project-id> <symbol-name> <conversion> [<svn-path> [<parent-lod-name>]]
543 Where the fields are separated by whitespace and
545 project-id -- the numerical id of the Project to which the
546 symbol belongs (numbered starting with 0). This field can
547 be '.' if the rule is not project-specific.
549 symbol-name -- the name of the symbol being specified, or
550 '.trunk.' if the rule should apply to trunk.
552 conversion -- how the symbol should be treated in the
553 conversion. This is one of the following values: 'branch',
554 'tag', or 'exclude'. This field can be '.' if the rule
555 shouldn't affect how the symbol is treated in the
556 conversion.
558 svn-path -- the SVN path that should serve as the root path of
559 this LOD. The path should be expressed as a path relative
560 to the SVN root directory, with or without a leading '/'.
561 This field can be omitted or '.' if the rule shouldn't
562 affect the LOD's SVN path.
564 parent-lod-name -- the name of the LOD that should serve as this
565 symbol's parent. This field can be omitted or '.' if the
566 rule shouldn't affect the symbol's parent, or it can be
567 '.trunk.' to indicate that the symbol should sprout from the
568 project's trunk."""
570 comment_re = re.compile(r'^(\#|$)')
572 conversion_map = {
573 'branch' : convert_as_branch,
574 'tag' : convert_as_tag,
575 'exclude' : exclude,
576 '.' : None,
579 def __init__(self, filename):
580 self.filename = filename
582 def start(self, symbol_statistics):
583 self._rules = []
585 f = open(self.filename, 'r')
586 for l in f:
587 l = l.rstrip()
588 s = l.lstrip()
589 if self.comment_re.match(s):
590 continue
591 fields = s.split()
593 if len(fields) < 3:
594 raise FatalError(
595 'The following line in "%s" cannot be parsed:\n "%s"'
596 % (self.filename, l,)
599 project_id = fields.pop(0)
600 symbol_name = fields.pop(0)
601 conversion = fields.pop(0)
603 if fields:
604 svn_path = fields.pop(0)
605 if svn_path == '.':
606 svn_path = None
607 elif svn_path[0] == '/':
608 svn_path = svn_path[1:]
609 else:
610 svn_path = None
612 if fields:
613 parent_lod_name = fields.pop(0)
614 else:
615 parent_lod_name = '.'
617 if fields:
618 raise FatalError(
619 'The following line in "%s" cannot be parsed:\n "%s"'
620 % (self.filename, l,)
623 if project_id == '.':
624 project_id = None
625 else:
626 try:
627 project_id = int(project_id)
628 except ValueError:
629 raise FatalError(
630 'Illegal project_id in the following line:\n "%s"' % (l,)
633 if symbol_name == '.trunk.':
634 if conversion not in ['.', 'trunk']:
635 raise FatalError('Trunk cannot be converted as a different type')
637 if parent_lod_name != '.':
638 raise FatalError('Trunk\'s parent cannot be set')
640 if svn_path is None:
641 # This rule doesn't do anything:
642 pass
643 else:
644 self._rules.append(ManualTrunkRule(project_id, svn_path))
646 else:
647 try:
648 conversion = self.conversion_map[conversion]
649 except KeyError:
650 raise FatalError(
651 'Illegal conversion in the following line:\n "%s"' % (l,)
654 if parent_lod_name == '.':
655 parent_lod_name = None
657 if conversion is None \
658 and svn_path is None \
659 and parent_lod_name is None:
660 # There is nothing to be done:
661 pass
662 else:
663 self._rules.append(
664 ManualSymbolRule(
665 project_id, symbol_name,
666 conversion, svn_path, parent_lod_name
670 for rule in self._rules:
671 rule.start(symbol_statistics)
673 def get_symbol(self, symbol, stats):
674 for rule in self._rules:
675 symbol = rule.get_symbol(symbol, stats)
677 return symbol
679 def finish(self):
680 for rule in self._rules:
681 rule.finish()
683 del self._rules