Move the function canonicalize_eol() to common.py.
[cvs2svn.git] / cvs2svn_lib / common.py
blobd60b2b4c49bd433c4acfb331e1c54cef9288d2bb
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains common facilities used by cvs2svn."""
20 import re
21 import time
22 import codecs
24 from cvs2svn_lib.log import Log
27 # Always use these constants for opening databases.
28 DB_OPEN_READ = 'r'
29 DB_OPEN_WRITE = 'w'
30 DB_OPEN_NEW = 'n'
33 SVN_INVALID_REVNUM = -1
36 # Warnings and errors start with these strings. They are typically
37 # followed by a colon and a space, as in "%s: " ==> "WARNING: ".
38 warning_prefix = "WARNING"
39 error_prefix = "ERROR"
42 class FatalException(Exception):
43 """Exception thrown on a non-recoverable error.
45 If this exception is thrown by main(), it is caught by the global
46 layer of the program, its string representation is printed (followed
47 by a newline), and the program is ended with an exit code of 1."""
49 pass
52 class InternalError(Exception):
53 """Exception thrown in the case of a cvs2svn internal error (aka, bug)."""
55 pass
58 class FatalError(FatalException):
59 """A FatalException that prepends error_prefix to the message."""
61 def __init__(self, msg):
62 """Use (error_prefix + ': ' + MSG) as the error message."""
64 FatalException.__init__(self, '%s: %s' % (error_prefix, msg,))
67 class CommandError(FatalError):
68 """A FatalError caused by a failed command invocation.
70 The error message includes the command name, exit code, and output."""
72 def __init__(self, command, exit_status, error_output=''):
73 self.command = command
74 self.exit_status = exit_status
75 self.error_output = error_output
76 if error_output.rstrip():
77 FatalError.__init__(
78 self,
79 'The command %r failed with exit status=%s\n'
80 'and the following output:\n'
81 '%s'
82 % (self.command, self.exit_status, self.error_output.rstrip()))
83 else:
84 FatalError.__init__(
85 self,
86 'The command %r failed with exit status=%s and no output'
87 % (self.command, self.exit_status))
90 def canonicalize_eol(text, eol):
91 """Replace any end-of-line sequences in TEXT with the string EOL."""
93 text = text.replace('\r\n', '\n')
94 text = text.replace('\r', '\n')
95 if eol != '\n':
96 text = text.replace('\n', eol)
97 return text
100 def path_join(*components):
101 """Join two or more pathname COMPONENTS, inserting '/' as needed.
102 Empty component are skipped."""
104 return '/'.join(filter(None, components))
107 def path_split(path):
108 """Split the svn pathname PATH into a pair, (HEAD, TAIL).
110 This is similar to os.path.split(), but always uses '/' as path
111 separator. PATH is an svn path, which should not start with a '/'.
112 HEAD is everything before the last slash, and TAIL is everything
113 after. If PATH ends in a slash, TAIL will be empty. If there is no
114 slash in PATH, HEAD will be empty. If PATH is empty, both HEAD and
115 TAIL are empty."""
117 pos = path.rfind('/')
118 if pos == -1:
119 return ('', path,)
120 else:
121 return (path[:pos], path[pos+1:],)
124 class IllegalSVNPathError(FatalException):
125 pass
128 # Control characters (characters not allowed in Subversion filenames):
129 ctrl_characters_regexp = re.compile('[\\\x00-\\\x1f\\\x7f]')
132 def verify_svn_filename_legal(filename):
133 """Verify that FILENAME is a legal filename.
135 FILENAME is a path component of a CVS path. Check that it won't
136 choke SVN:
138 - Check that it is not empty.
140 - Check that it is not equal to '.' or '..'.
142 - Check that the filename does not include any control characters.
144 If any of these tests fail, raise an IllegalSVNPathError."""
146 if filename == '':
147 raise IllegalSVNPathError("Empty filename component.")
149 if filename in ['.', '..']:
150 raise IllegalSVNPathError("Illegal filename component %r." % (filename,))
152 m = ctrl_characters_regexp.search(filename)
153 if m:
154 raise IllegalSVNPathError(
155 "Character %r in filename %r is not supported by Subversion."
156 % (m.group(), filename,)
160 def verify_svn_path_legal(path):
161 """Verify that PATH is a legitimate SVN path.
163 If not, raise an IllegalSVNPathError."""
165 if path.startswith('/'):
166 raise IllegalSVNPathError("Path %r must not start with '/'." % (path,))
167 head = path
168 while head != '':
169 (head,tail) = path_split(head)
170 try:
171 verify_svn_filename_legal(tail)
172 except IllegalSVNPathError, e:
173 raise IllegalSVNPathError('Problem with path %r: %s' % (path, e,))
176 def normalize_svn_path(path, allow_empty=False):
177 """Normalize an SVN path (e.g., one supplied by a user).
179 1. Strip leading, trailing, and duplicated '/'.
180 2. If ALLOW_EMPTY is not set, verify that PATH is not empty.
182 Return the normalized path.
184 If the path is invalid, raise an IllegalSVNPathError."""
186 norm_path = path_join(*path.split('/'))
187 if not allow_empty and not norm_path:
188 raise IllegalSVNPathError("Path is empty")
189 return norm_path
192 class PathRepeatedException(Exception):
193 def __init__(self, path, count):
194 self.path = path
195 self.count = count
196 Exception.__init__(
197 self, 'Path %s is repeated %d times' % (self.path, self.count,)
201 class PathsNestedException(Exception):
202 def __init__(self, nest, nestlings):
203 self.nest = nest
204 self.nestlings = nestlings
205 Exception.__init__(
206 self,
207 'Path %s contains the following other paths: %s'
208 % (self.nest, ', '.join(self.nestlings),)
212 class PathsNotDisjointException(FatalException):
213 """An exception that collects multiple other disjointness exceptions."""
215 def __init__(self, problems):
216 self.problems = problems
217 Exception.__init__(
218 self,
219 'The following paths are not disjoint:\n'
220 ' %s\n'
221 % ('\n '.join([str(problem) for problem in self.problems]),)
225 def verify_paths_disjoint(*paths):
226 """Verify that all of the paths in the argument list are disjoint.
228 If any of the paths is nested in another one (i.e., in the sense
229 that 'a/b/c/d' is nested in 'a/b'), or any two paths are identical,
230 raise a PathsNotDisjointException containing exceptions detailing
231 the individual problems."""
233 def split(path):
234 if not path:
235 return []
236 else:
237 return path.split('/')
239 def contains(split_path1, split_path2):
240 """Return True iff SPLIT_PATH1 contains SPLIT_PATH2."""
242 return (
243 len(split_path1) < len(split_path2)
244 and split_path2[:len(split_path1)] == split_path1
247 paths = [(split(path), path) for path in paths]
248 # If all overlapping elements are equal, a shorter list is
249 # considered "less than" a longer one. Therefore if any paths are
250 # nested, this sort will leave at least one such pair adjacent, in
251 # the order [nest,nestling].
252 paths.sort()
254 problems = []
256 # Create exceptions for any repeated paths, and delete the repeats
257 # from the paths array:
258 i = 0
259 while i < len(paths):
260 split_path, path = paths[i]
261 j = i + 1
262 while j < len(paths) and split_path == paths[j][0]:
263 j += 1
264 if j - i > 1:
265 problems.append(PathRepeatedException(path, j - i))
266 # Delete all but the first copy:
267 del paths[i + 1:j]
268 i += 1
270 # Create exceptions for paths nested in each other:
271 i = 0
272 while i < len(paths):
273 split_path, path = paths[i]
274 j = i + 1
275 while j < len(paths) and contains(split_path, paths[j][0]):
276 j += 1
277 if j - i > 1:
278 problems.append(PathsNestedException(
279 path, [path2 for (split_path2, path2) in paths[i + 1:j]]
281 i += 1
283 if problems:
284 raise PathsNotDisjointException(problems)
287 def is_trunk_revision(rev):
288 """Return True iff REV is a trunk revision.
290 REV is a CVS revision number (e.g., '1.6' or '1.6.4.5'). Return
291 True iff the revision is on trunk."""
293 return rev.count('.') == 1
296 def is_branch_revision_number(rev):
297 """Return True iff REV is a branch revision number.
299 REV is a CVS revision number in canonical form (i.e., with zeros
300 removed). Return True iff it refers to a whole branch, as opposed
301 to a single revision."""
303 return rev.count('.') % 2 == 0
306 def format_date(date):
307 """Return an svn-compatible date string for DATE (seconds since epoch).
309 A Subversion date looks like '2002-09-29T14:44:59.000000Z'."""
311 return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date))
314 class CVSTextDecoder:
315 """Callable that decodes CVS strings into Unicode."""
317 def __init__(self, encodings, fallback_encoding=None):
318 """Create a CVSTextDecoder instance.
320 ENCODINGS is a list containing the names of encodings that are
321 attempted to be used as source encodings in 'strict' mode.
323 FALLBACK_ENCODING, if specified, is the name of an encoding that
324 should be used as a source encoding in lossy 'replace' mode if all
325 of ENCODINGS failed.
327 Raise LookupError if any of the specified encodings is unknown."""
329 self.decoders = [
330 (encoding, codecs.lookup(encoding)[1])
331 for encoding in encodings]
333 if fallback_encoding is None:
334 self.fallback_decoder = None
335 else:
336 self.fallback_decoder = (
337 fallback_encoding, codecs.lookup(fallback_encoding)[1]
340 def add_encoding(self, encoding):
341 """Add an encoding to be tried in 'strict' mode.
343 ENCODING is the name of an encoding. If it is unknown, raise a
344 LookupError."""
346 for (name, decoder) in self.decoders:
347 if name == encoding:
348 return
349 else:
350 self.decoders.append( (encoding, codecs.lookup(encoding)[1]) )
352 def set_fallback_encoding(self, encoding):
353 """Set the fallback encoding, to be tried in 'replace' mode.
355 ENCODING is the name of an encoding. If it is unknown, raise a
356 LookupError."""
358 if encoding is None:
359 self.fallback_decoder = None
360 else:
361 self.fallback_decoder = (encoding, codecs.lookup(encoding)[1])
363 def __call__(self, s):
364 """Try to decode string S using our configured source encodings.
366 Return the string as a Unicode string. If S is already a unicode
367 string, do nothing.
369 Raise UnicodeError if the string cannot be decoded using any of
370 the source encodings and no fallback encoding was specified."""
372 if isinstance(s, unicode):
373 return s
374 for (name, decoder) in self.decoders:
375 try:
376 return decoder(s)[0]
377 except ValueError:
378 Log().verbose("Encoding '%s' failed for string %r" % (name, s))
380 if self.fallback_decoder is not None:
381 (name, decoder) = self.fallback_decoder
382 return decoder(s, 'replace')[0]
383 else:
384 raise UnicodeError
387 class Timestamper:
388 """Return monotonic timestamps derived from changeset timestamps."""
390 def __init__(self):
391 # The last timestamp that has been returned:
392 self.timestamp = 0.0
394 # The maximum timestamp that is considered reasonable:
395 self.max_timestamp = time.time() + 24.0 * 60.0 * 60.0
397 def get(self, timestamp, change_expected):
398 """Return a reasonable timestamp derived from TIMESTAMP.
400 Push TIMESTAMP into the future if necessary to ensure that it is
401 at least one second later than every other timestamp that has been
402 returned by previous calls to this method.
404 If CHANGE_EXPECTED is not True, then log a message if the
405 timestamp has to be changed."""
407 if timestamp > self.max_timestamp:
408 # If a timestamp is in the future, it is assumed that it is
409 # bogus. Shift it backwards in time to prevent it forcing other
410 # timestamps to be pushed even further in the future.
412 # Note that this is not nearly a complete solution to the bogus
413 # timestamp problem. A timestamp in the future still affects
414 # the ordering of changesets, and a changeset having such a
415 # timestamp will not be committed until all changesets with
416 # earlier timestamps have been committed, even if other
417 # changesets with even earlier timestamps depend on this one.
418 self.timestamp = self.timestamp + 1.0
419 if not change_expected:
420 Log().warn(
421 'Timestamp "%s" is in the future; changed to "%s".'
422 % (time.asctime(time.gmtime(timestamp)),
423 time.asctime(time.gmtime(self.timestamp)),)
425 elif timestamp < self.timestamp + 1.0:
426 self.timestamp = self.timestamp + 1.0
427 if not change_expected and Log().is_on(Log.VERBOSE):
428 Log().verbose(
429 'Timestamp "%s" adjusted to "%s" to ensure monotonicity.'
430 % (time.asctime(time.gmtime(timestamp)),
431 time.asctime(time.gmtime(self.timestamp)),)
433 else:
434 self.timestamp = timestamp
436 return self.timestamp