run-tests.py: Only pass the --svnadmin option to cvs2svn when needed.
[cvs2svn.git] / cvs2svn_lib / symbol_transform.py
blob51c9a9b238bdf9733fbb47cd67a61310a98dac16
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2006-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains classes to transform symbol names."""
20 import os
21 import re
23 from cvs2svn_lib.log import logger
24 from cvs2svn_lib.common import FatalError
25 from cvs2svn_lib.common import IllegalSVNPathError
26 from cvs2svn_lib.common import normalize_svn_path
27 from cvs2svn_lib.common import is_branch_revision_number
30 class SymbolTransform:
31 """Transform symbol names arbitrarily."""
33 def transform(self, cvs_file, symbol_name, revision):
34 """Possibly transform SYMBOL_NAME, which was found in CVS_FILE.
36 Return the transformed symbol name. If this SymbolTransform
37 doesn't apply, return the original SYMBOL_NAME. If this symbol
38 should be ignored entirely, return None. (Please note that
39 ignoring a branch via this mechanism only causes the branch *name*
40 to be ignored; the branch contents will still be converted.
41 Usually branches should be excluded using --exclude.)
43 REVISION contains the CVS revision number to which the symbol was
44 attached in the file as a string (with zeros removed).
46 This method is free to use the information in CVS_FILE (including
47 CVS_FILE.project) to decide whether and/or how to transform
48 SYMBOL_NAME."""
50 raise NotImplementedError()
53 class ReplaceSubstringsSymbolTransform(SymbolTransform):
54 """Replace specific substrings in symbol names.
56 If the substring occurs multiple times, replace all copies."""
58 def __init__(self, old, new):
59 self.old = old
60 self.new = new
62 def transform(self, cvs_file, symbol_name, revision):
63 return symbol_name.replace(self.old, self.new)
66 class NormalizePathsSymbolTransform(SymbolTransform):
67 def transform(self, cvs_file, symbol_name, revision):
68 try:
69 return normalize_svn_path(symbol_name)
70 except IllegalSVNPathError, e:
71 raise FatalError('Problem with %s: %s' % (symbol_name, e,))
74 class CompoundSymbolTransform(SymbolTransform):
75 """A SymbolTransform that applies other SymbolTransforms in series.
77 Each of the contained SymbolTransforms is applied, one after the
78 other. If any of them returns None, then None is returned (the
79 following SymbolTransforms are ignored)."""
81 def __init__(self, symbol_transforms):
82 """Ininitialize a CompoundSymbolTransform.
84 SYMBOL_TRANSFORMS is an iterable of SymbolTransform instances."""
86 self.symbol_transforms = list(symbol_transforms)
88 def transform(self, cvs_file, symbol_name, revision):
89 for symbol_transform in self.symbol_transforms:
90 symbol_name = symbol_transform.transform(
91 cvs_file, symbol_name, revision
93 if symbol_name is None:
94 # Don't continue with other symbol transforms:
95 break
97 return symbol_name
100 class RegexpSymbolTransform(SymbolTransform):
101 """Transform symbols by using a regexp textual substitution."""
103 def __init__(self, pattern, replacement):
104 """Create a SymbolTransform that transforms symbols matching PATTERN.
106 PATTERN is a regular expression that should match the whole symbol
107 name. REPLACEMENT is the replacement text, which may include
108 patterns like r'\1' or r'\g<1>' or r'\g<name>' (where 'name' is a
109 reference to a named substring in the pattern of the form
110 r'(?P<name>...)')."""
112 self.pattern = re.compile('^' + pattern + '$')
113 self.replacement = replacement
115 def transform(self, cvs_file, symbol_name, revision):
116 return self.pattern.sub(self.replacement, symbol_name)
119 class SymbolMapper(SymbolTransform):
120 """A SymbolTransform that transforms specific symbol definitions.
122 The user has to specify the exact CVS filename, symbol name, and
123 revision number to be transformed, and the new name (or None if the
124 symbol should be ignored). The mappings can be set via a
125 constructor argument or by calling __setitem__()."""
127 def __init__(self, items=[]):
128 """Initialize the mapper.
130 ITEMS is a list of tuples (cvs_filename, symbol_name, revision,
131 new_name) which will be set as mappings."""
133 # A map {(cvs_filename, symbol_name, revision) : new_name}:
134 self._map = {}
136 for (cvs_filename, symbol_name, revision, new_name) in items:
137 self[cvs_filename, symbol_name, revision] = new_name
139 def __setitem__(self, (cvs_filename, symbol_name, revision), new_name):
140 """Set a mapping for a particular file, symbol, and revision."""
142 cvs_filename = os.path.normcase(os.path.normpath(cvs_filename))
143 key = (cvs_filename, symbol_name, revision)
144 if key in self._map:
145 logger.warn(
146 'Overwriting symbol transform for\n'
147 ' filename=%r symbol=%s revision=%s'
148 % (cvs_filename, symbol_name, revision,)
150 self._map[key] = new_name
152 def transform(self, cvs_file, symbol_name, revision):
153 # cvs_file.rcs_path is guaranteed to already be normalised the way
154 # os.path.normpath() normalises paths. No need to call it again.
155 cvs_filename = os.path.normcase(cvs_file.rcs_path)
156 return self._map.get(
157 (cvs_filename, symbol_name, revision), symbol_name
161 class SubtreeSymbolMapper(SymbolTransform):
162 """A SymbolTransform that transforms symbols within a whole repo subtree.
164 The user has to specify a CVS repository path (a filename or
165 directory) and the original symbol name. All symbols under that
166 path will be renamed to the specified new name (which can be None if
167 the symbol should be ignored). The mappings can be set via a
168 constructor argument or by calling __setitem__(). Only the most
169 specific rule is applied."""
171 def __init__(self, items=[]):
172 """Initialize the mapper.
174 ITEMS is a list of tuples (cvs_path, symbol_name, new_name)
175 which will be set as mappings. cvs_path is a string naming a
176 directory within the CVS repository."""
178 # A map {symbol_name : {cvs_path : new_name}}:
179 self._map = {}
181 for (cvs_path, symbol_name, new_name) in items:
182 self[cvs_path, symbol_name] = new_name
184 def __setitem__(self, (cvs_path, symbol_name), new_name):
185 """Set a mapping for a particular file and symbol."""
187 try:
188 symbol_map = self._map[symbol_name]
189 except KeyError:
190 symbol_map = {}
191 self._map[symbol_name] = symbol_map
193 cvs_path = os.path.normcase(os.path.normpath(cvs_path))
194 if cvs_path in symbol_map:
195 logger.warn(
196 'Overwriting symbol transform for\n'
197 ' directory=%r symbol=%s'
198 % (cvs_path, symbol_name,)
200 symbol_map[cvs_path] = new_name
202 def transform(self, cvs_file, symbol_name, revision):
203 try:
204 symbol_map = self._map[symbol_name]
205 except KeyError:
206 # No rules for that symbol name
207 return symbol_name
209 # cvs_file.rcs_path is guaranteed to already be normalised the way
210 # os.path.normpath() normalises paths. No need to call it again.
211 cvs_path = os.path.normcase(cvs_file.rcs_path)
212 while True:
213 try:
214 return symbol_map[cvs_path]
215 except KeyError:
216 new_cvs_path = os.path.dirname(cvs_path)
217 if new_cvs_path == cvs_path:
218 # No rules found for that path; return symbol name unaltered.
219 return symbol_name
220 else:
221 cvs_path = new_cvs_path
224 class IgnoreSymbolTransform(SymbolTransform):
225 """Ignore symbols matching a specified regular expression."""
227 def __init__(self, pattern):
228 """Create an SymbolTransform that ignores symbols matching PATTERN.
230 PATTERN is a regular expression that should match the whole symbol
231 name."""
233 self.pattern = re.compile('^' + pattern + '$')
235 def transform(self, cvs_file, symbol_name, revision):
236 if self.pattern.match(symbol_name):
237 return None
238 else:
239 return symbol_name
242 class SubtreeSymbolTransform(SymbolTransform):
243 """A wrapper around another SymbolTransform, that limits it to a
244 specified subtree."""
246 def __init__(self, cvs_path, inner_symbol_transform):
247 """Constructor.
249 CVS_PATH is the path in the repository. INNER_SYMBOL_TRANSFORM is
250 the SymbolTransform to wrap."""
252 assert isinstance(cvs_path, str)
253 self.__subtree = os.path.normcase(os.path.normpath(cvs_path))
254 self.__subtree_len = len(self.__subtree)
255 self.__inner = inner_symbol_transform
257 def __does_rule_apply_to(self, cvs_file):
259 # NOTE: This turns out to be a hot path through the code.
261 # It used to use logic similar to SubtreeSymbolMapper.transform(). And
262 # it used to take 44% of cvs2svn's total runtime on one real-world test.
263 # Now it's been optimized, it takes about 2%.
265 # This is called about:
266 # (num_files * num_symbols_per_file * num_subtree_symbol_transforms)
267 # times. On a large repository with several of these transforms,
268 # that can exceed 100,000,000 calls.
271 # cvs_file.rcs_path is guaranteed to already be normalised the way
272 # os.path.normpath() normalises paths. So we don't need to call
273 # os.path.normpath() again. (The os.path.normpath() function does
274 # quite a lot, so it's expensive).
276 # os.path.normcase is a no-op on POSIX systems (and therefore fast).
277 # Even on Windows it's only a memory allocation and case-change, it
278 # should be quite fast.
279 cvs_path = os.path.normcase(cvs_file.rcs_path)
281 # Do most of the work in a single call, without allocating memory.
282 if not cvs_path.startswith(self.__subtree):
283 # Different prefix.
284 # This is the common "no match" case.
285 return False
287 if len(cvs_path) == self.__subtree_len:
288 # Exact match.
290 # This is quite rare, as self.__subtree is usually a directory and
291 # cvs_path is always a file.
292 return True
294 # We know cvs_path starts with self.__subtree, check the next character
295 # is a '/' (or if we're on Windows, a '\\'). If so, then cvs_path is a
296 # file under the self.__subtree directory tree, so we match. If not,
297 # then it's not a match.
298 return cvs_path[self.__subtree_len] == os.path.sep
300 def transform(self, cvs_file, symbol_name, revision):
301 if self.__does_rule_apply_to(cvs_file):
302 return self.__inner.transform(cvs_file, symbol_name, revision)
303 else:
304 # Rule does not apply to that path; return symbol name unaltered.
305 return symbol_name
308 class TagOnlyTransform(SymbolTransform):
309 """A wrapper around another SymbolTransform, that limits it to
310 CVS tags (not CVS branches)."""
312 def __init__(self, inner_symbol_transform):
313 """Constructor.
315 INNER_SYMBOL_TRANSFORM is the SymbolTransform to wrap."""
316 self.__inner = inner_symbol_transform
318 def transform(self, cvs_file, symbol_name, revision):
319 if is_branch_revision_number(revision):
320 # It's a branch
321 return symbol_name
322 else:
323 # It's a tag
324 return self.__inner.transform(cvs_file, symbol_name, revision)
327 class BranchOnlyTransform(SymbolTransform):
328 """A wrapper around another SymbolTransform, that limits it to
329 CVS branches (not CVS tags)."""
331 def __init__(self, inner_symbol_transform):
332 """Constructor.
334 INNER_SYMBOL_TRANSFORM is the SymbolTransform to wrap."""
335 self.__inner = inner_symbol_transform
337 def transform(self, cvs_file, symbol_name, revision):
338 if is_branch_revision_number(revision):
339 # It's a branch
340 return self.__inner.transform(cvs_file, symbol_name, revision)
341 else:
342 # It's a tag
343 return symbol_name