Python 2.2 compatibility: Don't use UserDict.DictMixin.
[cvs2svn.git] / cvs2svn
blob260370401e7fb0b2f873945b4ab3a86df5b06bf2
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # cvs2svn: ...
6 # ====================================================================
7 # Copyright (c) 2000-2004 CollabNet. All rights reserved.
9 # This software is licensed as described in the file COPYING, which
10 # you should have received as part of this distribution. The terms
11 # are also available at http://subversion.tigris.org/license-1.html.
12 # If newer versions of this license are posted there, you may use a
13 # newer version instead, at your option.
15 # This software consists of voluntary contributions made by many
16 # individuals. For exact contribution history, see the revision
17 # history and logs, available at http://cvs2svn.tigris.org/.
18 # ====================================================================
20 VERSION = 'r' + "$LastChangedRevision$"[22:-2]
22 from __future__ import generators
24 import cvs2svn_rcsparse
25 import os
26 import sys
27 import sha
28 import re
29 import time
30 import fileinput
31 import fnmatch
32 import getopt
33 import stat
34 import md5
35 import marshal
36 import errno
37 import popen2
38 import types
39 import ConfigParser
40 try:
41 # Try to get access to a bunch of encodings for use with --encoding.
42 # See http://cjkpython.i18n.org/ for details.
43 import iconv_codec
44 except ImportError:
45 pass
47 # Warnings and errors start with these strings. They are typically
48 # followed by a colon and a space, as in "%s: " ==> "WARNING: ".
49 warning_prefix = "WARNING"
50 error_prefix = "ERROR"
52 # Make sure this Python is recent enough.
53 if sys.hexversion < 0x02020000:
54 sys.stderr.write("'%s: Python 2.2 or higher required, "
55 "see www.python.org.\n" % error_prefix)
56 sys.exit(1)
58 # Pretend we have true booleans on older python versions
59 try:
60 True
61 except:
62 True = 1
63 False = 0
65 # Opening pipes was a mess before Python 2.4, because some methods did
66 # not exist on some platforms, and some behaved differenly on other.
67 # Python 2.4 solved this by adding the subprocess module, but since we
68 # cannot require such a new version, we cannot use it directly, but
69 # must implement a simplified Popen using the best means neccessary.
71 # The SimplePopen class only has the following members and methods, all
72 # behaving as documented in the subprocess.Popen class:
73 # - stdin
74 # - stdout
75 # - stderr
76 # - wait
77 try:
78 # First try subprocess.Popen...
79 import subprocess
80 class SimplePopen:
81 def __init__(self, cmd, capture_stderr):
82 if capture_stderr:
83 stderr = subprocess.PIPE
84 else:
85 stderr = None
86 self._popen = subprocess.Popen(cmd, stdin=subprocess.PIPE,
87 stdout=subprocess.PIPE, stderr=stderr)
88 self.stdin = self._popen.stdin
89 self.stdout = self._popen.stdout
90 if capture_stderr:
91 self.stderr = self._popen.stderr
92 self.wait = self._popen.wait
93 except ImportError:
94 if hasattr(popen2, 'Popen3'):
95 # ...then try popen2.Popen3...
96 class SimplePopen:
97 def __init__(self, cmd, capture_stderr):
98 self._popen3 = popen2.Popen3(cmd, capture_stderr)
99 self.stdin = self._popen3.tochild
100 self.stdout = self._popen3.fromchild
101 if capture_stderr:
102 self.stderr = self._popen3.childerr
103 self.wait = self._popen3.wait
104 else:
105 # ...and if all fails, use popen2.popen3...
106 class SimplePopen:
107 def __init__(self, cmd, capture_stderr):
108 if type(cmd) != types.StringType:
109 cmd = argv_to_command_string(cmd)
110 self.stdout, self.stdin, self.stderr = popen2.popen3(cmd, mode='b')
111 def wait(self):
112 return self.stdout.close() or self.stdin.close() or \
113 self.stderr.close()
115 # DBM module selection
117 # 1. If we have bsddb3, it is probably newer than bsddb. Fake bsddb = bsddb3,
118 # so that the dbhash module used by anydbm will use bsddb3.
119 try:
120 import bsddb3
121 sys.modules['bsddb'] = sys.modules['bsddb3']
122 except ImportError:
123 pass
125 # 2. These DBM modules are not good for cvs2svn.
126 import anydbm
127 if (anydbm._defaultmod.__name__ == 'dumbdbm'
128 or anydbm._defaultmod.__name__ == 'dbm'):
129 sys.stderr.write(
130 error_prefix
131 + ': your installation of Python does not contain a suitable\n'
132 + 'DBM module -- cvs2svn cannot continue.\n'
133 + 'See http://python.org/doc/current/lib/module-anydbm.html to solve.\n')
134 sys.exit(1)
136 # 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
137 # Unfortunately, gdbm appears not to be trouble free, either.
138 if hasattr(anydbm._defaultmod, 'bsddb') \
139 and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
140 try:
141 gdbm = __import__('gdbm')
142 except ImportError:
143 sys.stderr.write(warning_prefix +
144 ': The version of the bsddb module found '
145 'on your computer has been reported to malfunction on some datasets, '
146 'causing KeyError exceptions. You may wish to upgrade your Python to '
147 'version 2.3 or later.\n')
148 else:
149 anydbm._defaultmod = gdbm
151 trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
152 cvs_branch_tag = re.compile('^((?:[0-9]+\\.[0-9]+\\.)+)0\\.([0-9]+)$')
153 rcs_branch_tag = re.compile('^(?:[0-9]+\\.[0-9]+\\.)+[0-9]+$')
155 SVN_KEYWORDS_VALUE = 'Author Date Id Revision'
157 # This really only matches standard '1.1.1.*'-style vendor revisions.
158 # One could conceivably have a file whose default branch is 1.1.3 or
159 # whatever, or was that at some point in time, with vendor revisions
160 # 1.1.3.1, 1.1.3.2, etc. But with the default branch gone now (which
161 # is the only time this regexp gets used), we'd have no basis for
162 # assuming that the non-standard vendor branch had ever been the
163 # default branch anyway, so we don't want this to match them anyway.
164 vendor_revision = re.compile('^(1\\.1\\.1)\\.([0-9])+$')
166 # If this run's output is a repository, then (in the tmpdir) we use
167 # a dumpfile of this name for repository loads.
169 # If this run's output is a dumpfile, then this is default name of
170 # that dumpfile, but in the current directory (unless the user has
171 # specified a dumpfile path, of course, in which case it will be
172 # wherever the user said).
173 DUMPFILE = 'cvs2svn-dump'
175 # This file appears with different suffixes at different stages of
176 # processing. CVS revisions are cleaned and sorted here, for commit
177 # grouping. See design-notes.txt for details.
178 DATAFILE = 'cvs2svn-data'
180 # This file contains a marshalled copy of all the statistics that we
181 # gather throughout the various runs of cvs2svn. The data stored as a
182 # marshalled dictionary.
183 STATISTICS_FILE = 'cvs2svn-statistics'
185 # This text file contains records (1 per line) that describe svn
186 # filesystem paths that are the opening and closing source revisions
187 # for copies to tags and branches. The format is as follows:
189 # SYMBOL_NAME SVN_REVNUM TYPE SVN_PATH
191 # Where type is either OPENING or CLOSING. The SYMBOL_NAME and
192 # SVN_REVNUM are the primary and secondary sorting criteria for
193 # creating SYMBOL_OPENINGS_CLOSINGS_SORTED.
194 SYMBOL_OPENINGS_CLOSINGS = 'cvs2svn-symbolic-names.txt'
195 # A sorted version of the above file.
196 SYMBOL_OPENINGS_CLOSINGS_SORTED = 'cvs2svn-symbolic-names-s.txt'
198 # This file is a temporary file for storing symbolic_name -> closing
199 # CVSRevision until the end of our pass where we can look up the
200 # corresponding SVNRevNum for the closing revs and write these out to
201 # the SYMBOL_OPENINGS_CLOSINGS.
202 SYMBOL_CLOSINGS_TMP = 'cvs2svn-symbolic-names-closings-tmp.txt'
204 # Skeleton version of an svn filesystem.
205 # (These supersede and will eventually replace the two above.)
206 # See class SVNRepositoryMirror for how these work.
207 SVN_MIRROR_REVISIONS_DB = 'cvs2svn-svn-revisions.db'
208 SVN_MIRROR_NODES_DB = 'cvs2svn-svn-nodes.db'
210 # Offsets pointing to the beginning of each SYMBOLIC_NAME in
211 # SYMBOL_OPENINGS_CLOSINGS_SORTED
212 SYMBOL_OFFSETS_DB = 'cvs2svn-symbolic-name-offsets.db'
214 # Maps CVSRevision.unique_key()s to lists of symbolic names, where
215 # the CVSRevision is the last such that is a source for those symbolic
216 # names. For example, if branch B's number is 1.3.0.2 in this CVS
217 # file, and this file's 1.3 is the latest (by date) revision among
218 # *all* CVS files that is a source for branch B, then the
219 # CVSRevision.unique_key() corresponding to this file at 1.3 would
220 # list at least B in its list.
221 SYMBOL_LAST_CVS_REVS_DB = 'cvs2svn-symbol-last-cvs-revs.db'
223 # Maps CVSRevision.unique_key() to corresponding line in s-revs.
224 ###PERF Or, we could map to an offset into s-revs, instead of dup'ing
225 ### the s-revs data in this database.
226 CVS_REVS_DB = 'cvs2svn-cvs-revs.db'
228 # Lists all symbolic names that are tags. Keys are strings (symbolic
229 # names), values are ignorable.
230 TAGS_DB = 'cvs2svn-tags.db'
232 # A list all tags. Each line consists of the tag name and the number
233 # of files in which it exists, separated by a space.
234 TAGS_LIST = 'cvs2svn-tags.txt'
236 # A list of all branches. The file is stored as a plain text file
237 # to make it easy to look at in an editor. Each line contains the
238 # branch name, the number of files where the branch is created, the
239 # commit count, and a list of tags and branches that are defined on
240 # revisions in the branch.
241 BRANCHES_LIST = 'cvs2svn-branches.txt'
243 # These two databases provide a bidirectional mapping between
244 # CVSRevision.unique_key()s and Subversion revision numbers.
246 # The first maps CVSRevision.unique_key() to a number; the values are
247 # not unique.
249 # The second maps Subversion revision numbers to tuples (c_rev_keys,
250 # motivating_revnum, symbolic_name, date).
252 # c_rev_keys is a list of CVSRevision.unique_key()s.
254 # If the SVNCommit is a default branch synchronization,
255 # motivating_revnum is the svn_revnum of the primary SVNCommit that
256 # motivated it; otherwise it is None. (NOTE: Secondary commits that
257 # fill branches and tags also have a motivating commit, but we do not
258 # record it because it is (currently) not needed for anything.)
259 # motivating_revnum is used when generating the log message for the
260 # commit that synchronizes the default branch with trunk.
262 # symbolic_name is the symbolic name associated with the commit (if it
263 # filled a symbolic name) or None otherwise.
265 # date is the date of the commit.
266 CVS_REVS_TO_SVN_REVNUMS = 'cvs2svn-cvs-revs-to-svn-revnums.db'
267 SVN_REVNUMS_TO_CVS_REVS = 'cvs2svn-svn-revnums-to-cvs-revs.db'
269 # How many bytes to read at a time from a pipe. 128 kiB should be
270 # large enough to be efficient without wasting too much memory.
271 PIPE_READ_SIZE = 128 * 1024
273 # Record the default RCS branches, if any, for CVS filepaths.
275 # The keys are CVS filepaths, relative to the top of the repository
276 # and with the ",v" stripped off, so they match the cvs paths used in
277 # Commit.commit(). The values are vendor branch revisions, such as
278 # '1.1.1.1', or '1.1.1.2', or '1.1.1.96'. The vendor branch revision
279 # represents the highest vendor branch revision thought to have ever
280 # been head of the default branch.
282 # The reason we record a specific vendor revision, rather than a
283 # default branch number, is that there are two cases to handle:
285 # One case is simple. The RCS file lists a default branch explicitly
286 # in its header, such as '1.1.1'. In this case, we know that every
287 # revision on the vendor branch is to be treated as head of trunk at
288 # that point in time.
290 # But there's also a degenerate case. The RCS file does not currently
291 # have a default branch, yet we can deduce that for some period in the
292 # past it probably *did* have one. For example, the file has vendor
293 # revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2,
294 # and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2. In this
295 # case, we should record 1.1.1.96 as the last vendor revision to have
296 # been the head of the default branch.
297 DEFAULT_BRANCHES_DB = 'cvs2svn-default-branches.db'
299 # Records the author and log message for each changeset.
300 # The keys are author+log digests, the same kind used to identify
301 # unique revisions in the .revs, etc files. Each value is a tuple
302 # of two elements: '(author logmessage)'.
303 METADATA_DB = "cvs2svn-metadata.db"
305 # A temporary on-disk hash that maps CVSRevision unique keys to a new
306 # timestamp for that CVSRevision. These new timestamps are created in
307 # pass2, and this hash is used exclusively in pass2.
308 TWEAKED_TIMESTAMPS_DB = "cvs2svn-fixed-timestamps.db"
310 REVS_SUFFIX = '.revs'
311 CLEAN_REVS_SUFFIX = '.c-revs'
312 SORTED_REVS_SUFFIX = '.s-revs'
313 RESYNC_SUFFIX = '.resync'
315 SVN_INVALID_REVNUM = -1
317 COMMIT_THRESHOLD = 5 * 60 # flush a commit if a 5 minute gap occurs
319 # Things that can happen to a file.
320 OP_NOOP = '-'
321 OP_ADD = 'A'
322 OP_DELETE = 'D'
323 OP_CHANGE = 'C'
325 # A deltatext either does or doesn't represent some change.
326 DELTATEXT_NONEMPTY = 'N'
327 DELTATEXT_EMPTY = 'E'
329 DIGEST_END_IDX = 9 + (sha.digestsize * 2)
331 # Constants used in SYMBOL_OPENINGS_CLOSINGS
332 OPENING = 'O'
333 CLOSING = 'C'
335 class FatalException(Exception):
336 """Exception thrown on a non-recoverable error.
338 If this exception is thrown by main(), it is caught by the global
339 layer of the program, its string representation is printed, and the
340 program is ended with an exit code of 1."""
342 pass
345 class FatalError(FatalException):
346 """A FatalException that prepends error_prefix to the message."""
348 def __init__(self, msg):
349 """Use (error_prefix + ': ' + MSG + '\n') as the error message."""
351 FatalException.__init__(self, '%s: %s\n' % (error_prefix, msg,))
354 def temp(basename):
355 """Return a path to BASENAME in Ctx().tmpdir.
356 This is a convenience function to save horizontal space in source."""
357 return os.path.join(Ctx().tmpdir, basename)
359 # Since the unofficial set also includes [/\] we need to translate those
360 # into ones that don't conflict with Subversion limitations.
361 def _clean_symbolic_name(name):
362 """Return symbolic name NAME, translating characters that Subversion
363 does not allow in a pathname."""
364 name = name.replace('/','++')
365 name = name.replace('\\','--')
366 return name
368 def _path_join(*components):
369 """Join two or more pathname COMPONENTS, inserting '/' as needed.
370 Empty component are skipped."""
371 return '/'.join(filter(None, components))
373 def _path_split(path):
374 """Split the svn pathname PATH into a pair, (HEAD, TAIL).
376 This is similar to os.path.split(), but always uses '/' as path
377 separator. PATH is an svn path, which should not start with a '/'.
378 HEAD is everything before the last slash, and TAIL is everything
379 after. If PATH ends in a slash, TAIL will be empty. If there is no
380 slash in PATH, HEAD will be empty. If PATH is empty, both HEAD and
381 TAIL are empty."""
383 pos = path.rfind('/')
384 if pos == -1:
385 return ('', path,)
386 else:
387 return (path[:pos], path[pos+1:],)
389 def to_utf8(value, mode='replace'):
390 """Encode (as Unicode) VALUE, trying the encodings in Ctx.encoding
391 as valid source encodings. Raise UnicodeError on failure of all
392 source encodings."""
393 ### FIXME: The 'replace' default mode should be an option,
394 ### like --encoding is.
395 for encoding in Ctx().encoding:
396 try:
397 return unicode(value, encoding, mode).encode('utf8')
398 except UnicodeError:
399 Log().write(LOG_VERBOSE, "Encoding '%s' failed for string '%s'"
400 % (encoding, value))
401 raise UnicodeError
403 ctrl_characters_regexp = re.compile('[\\\x00-\\\x1f\\\x7f]')
405 def verify_filename_legal(filename):
406 """Verify that FILENAME does not include any control characters. If
407 it does, raise a FatalError."""
409 m = ctrl_characters_regexp.search(filename)
410 if m:
411 raise FatalError(
412 "Character %r in filename %r is not supported by subversion."
413 % (m.group(), filename,))
415 def run_command(command):
416 if os.system(command):
417 raise FatalError('Command failed: "%s"' % (command,))
420 class CommandFailedException(Exception):
421 """Exception raised if check_command_runs() fails."""
423 pass
426 def check_command_runs(cmd, cmdname):
427 """Check whether the command CMD can be executed without errors.
429 CMD is a list or string, as accepted by SimplePopen. CMDNAME is the
430 name of the command as it should be included in exception error
431 messages.
433 This function checks three things: (1) the command can be run
434 without throwing an OSError; (2) it exits with status=0; (3) it
435 doesn't output anything to stderr. If any of these conditions is
436 not met, raise a CommandFailedException describing the problem."""
438 try:
439 pipe = SimplePopen(cmd, True)
440 except OSError, e:
441 raise CommandFailedException('error executing %s: %s' % (cmdname, e,))
442 pipe.stdin.close()
443 pipe.stdout.read()
444 errmsg = pipe.stderr.read()
445 status = pipe.wait()
446 if status or errmsg:
447 msg = 'error executing %s: status %s' % (cmdname, status,)
448 if errmsg:
449 msg += ', error output:\n%s' % (errmsg,)
450 raise CommandFailedException(msg)
453 class CVSRepository:
454 """A CVS repository from which data can be extracted."""
456 def __init__(self, cvs_repos_path):
457 """CVS_REPOS_PATH is the top of the CVS repository (at least as
458 far as this run is concerned)."""
460 if not os.path.isdir(cvs_repos_path):
461 raise FatalError("The specified CVS repository path '%s' is not an "
462 "existing directory." % cvs_repos_path)
464 self.cvs_repos_path = os.path.normpath(cvs_repos_path)
465 self.cvs_prefix_re = re.compile(
466 r'^' + re.escape(self.cvs_repos_path)
467 + r'(' + re.escape(os.sep) + r'|$)')
469 def get_cvs_path(self, fname):
470 """Return the path to FNAME relative to cvs_repos_path, with ',v' removed.
472 FNAME is a filesystem name that has to be within
473 self.cvs_repos_path. Return the filename relative to
474 self.cvs_repos_path, with ',v' striped off if present, and with
475 os.sep converted to '/'."""
477 (tail, n) = self.cvs_prefix_re.subn('', fname, 1)
478 if n != 1:
479 raise FatalError(
480 "get_cvs_path: '%s' is not a sub-path of '%s'"
481 % (fname, self.cvs_repos_path,))
482 if tail.endswith(',v'):
483 tail = tail[:-2]
484 return tail.replace(os.sep, '/')
486 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
487 """Return a command string, and the pipe created using that
488 string. C_REV is a CVSRevision. If SUPPRESS_KEYWORD_SUBSTITUTION
489 is True, then suppress the substitution of RCS/CVS keywords in the
490 output. The pipe returns the text of that CVS Revision."""
491 raise NotImplementedError
494 class CVSRepositoryViaRCS(CVSRepository):
495 """A CVSRepository accessed via RCS."""
497 def __init__(self, cvs_repos_path):
498 CVSRepository.__init__(self, cvs_repos_path)
499 try:
500 check_command_runs([ 'co', '-V' ], 'co')
501 except CommandFailedException, e:
502 raise FatalError('%s\n'
503 'Please check that co is installed and in your PATH\n'
504 '(it is a part of the RCS software).' % (e,))
506 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
507 pipe_cmd = [ 'co', '-q', '-x,v', '-p' + c_rev.rev ]
508 if suppress_keyword_substitution:
509 pipe_cmd.append('-kk')
510 pipe_cmd.append(c_rev.rcs_path())
511 pipe = SimplePopen(pipe_cmd, True)
512 pipe.stdin.close()
513 return pipe_cmd, pipe
516 class CVSRepositoryViaCVS(CVSRepository):
517 """A CVSRepository accessed via CVS."""
519 def __init__(self, cvs_repos_path):
520 CVSRepository.__init__(self, cvs_repos_path)
521 # Ascend above the specified root if necessary, to find the
522 # cvs_repository_root (a directory containing a CVSROOT directory)
523 # and the cvs_module (the path of the conversion root within the
524 # cvs repository) NB: cvs_module must be seperated by '/' *not* by
525 # os.sep .
526 def is_cvs_repository_root(path):
527 return os.path.isdir(os.path.join(path, 'CVSROOT'))
529 self.cvs_repository_root = os.path.abspath(self.cvs_repos_path)
530 self.cvs_module = ""
531 while not is_cvs_repository_root(self.cvs_repository_root):
532 # Step up one directory:
533 prev_cvs_repository_root = self.cvs_repository_root
534 self.cvs_repository_root, module_component = \
535 os.path.split(self.cvs_repository_root)
536 if self.cvs_repository_root == prev_cvs_repository_root:
537 # Hit the root (of the drive, on Windows) without finding a
538 # CVSROOT dir.
539 raise FatalError(
540 "the path '%s' is not a CVS repository, nor a path "
541 "within a CVS repository. A CVS repository contains "
542 "a CVSROOT directory within its root directory."
543 % (self.cvs_repos_path,))
545 self.cvs_module = module_component + "/" + self.cvs_module
547 os.environ['CVSROOT'] = self.cvs_repository_root
549 def cvs_ok(global_arguments):
550 check_command_runs(
551 [ 'cvs' ] + global_arguments + [ '--version' ], 'cvs')
553 self.global_arguments = [ "-q", "-R" ]
554 try:
555 cvs_ok(self.global_arguments)
556 except CommandFailedException, e:
557 self.global_arguments = [ "-q" ]
558 try:
559 cvs_ok(self.global_arguments)
560 except CommandFailedException, e:
561 raise FatalError(
562 '%s\n'
563 'Please check that cvs is installed and in your PATH.' % (e,))
565 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
566 pipe_cmd = [ 'cvs' ] + self.global_arguments + \
567 [ 'co', '-r' + c_rev.rev, '-p' ]
568 if suppress_keyword_substitution:
569 pipe_cmd.append('-kk')
570 pipe_cmd.append(self.cvs_module + c_rev.cvs_path)
571 pipe = SimplePopen(pipe_cmd, True)
572 pipe.stdin.close()
573 return pipe_cmd, pipe
576 def generate_ignores(c_rev):
577 # Read in props
578 pipe_cmd, pipe = Ctx().cvs_repository.get_co_pipe(c_rev)
579 buf = pipe.stdout.read(PIPE_READ_SIZE)
580 raw_ignore_val = ""
581 while buf:
582 raw_ignore_val += buf
583 buf = pipe.stdout.read(PIPE_READ_SIZE)
584 pipe.stdout.close()
585 error_output = pipe.stderr.read()
586 exit_status = pipe.wait()
587 if exit_status:
588 raise FatalError("The command '%s' failed with exit status: %s\n"
589 "and the following output:\n"
590 "%s" % (pipe_cmd, exit_status, error_output))
592 # Tweak props: First, convert any spaces to newlines...
593 raw_ignore_val = '\n'.join(raw_ignore_val.split())
594 raw_ignores = raw_ignore_val.split('\n')
595 ignore_vals = [ ]
596 for ignore in raw_ignores:
597 # Reset the list if we encounter a '!'
598 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
599 if ignore == '!':
600 ignore_vals = [ ]
601 continue
602 # Skip empty lines
603 if len(ignore) == 0:
604 continue
605 ignore_vals.append(ignore)
606 return ignore_vals
608 # Return a string that has not been returned by gen_key() before.
609 gen_key_base = 0L
610 def gen_key():
611 global gen_key_base
612 key = '%x' % gen_key_base
613 gen_key_base += 1
614 return key
616 # ============================================================================
617 # This code is copied with a few modifications from:
618 # subversion/subversion/bindings/swig/python/svn/core.py
620 if sys.platform == "win32":
621 _escape_shell_arg_re = re.compile(r'(\\+)(\"|$)')
623 def escape_shell_arg(arg):
624 # The (very strange) parsing rules used by the C runtime library are
625 # described at:
626 # http://msdn.microsoft.com/library/en-us/vclang/html/_pluslang_Parsing_C.2b2b_.Command.2d.Line_Arguments.asp
628 # double up slashes, but only if they are followed by a quote character
629 arg = re.sub(_escape_shell_arg_re, r'\1\1\2', arg)
631 # surround by quotes and escape quotes inside
632 arg = '"' + arg.replace('"', '"^""') + '"'
633 return arg
636 def argv_to_command_string(argv):
637 """Flatten a list of command line arguments into a command string.
639 The resulting command string is expected to be passed to the system
640 shell which os functions like popen() and system() invoke internally.
643 # According cmd's usage notes (cmd /?), it parses the command line by
644 # "seeing if the first character is a quote character and if so, stripping
645 # the leading character and removing the last quote character."
646 # So to prevent the argument string from being changed we add an extra set
647 # of quotes around it here.
648 return '"' + ' '.join(map(escape_shell_arg, argv)) + '"'
650 else:
651 def escape_shell_arg(arg):
652 return "'" + arg.replace("'", "'\\''") + "'"
654 def argv_to_command_string(argv):
655 """Flatten a list of command line arguments into a command string.
657 The resulting command string is expected to be passed to the system
658 shell which os functions like popen() and system() invoke internally.
661 return ' '.join(map(escape_shell_arg, argv))
662 # ============================================================================
664 def format_date(date):
665 """Return an svn-compatible date string for DATE (seconds since epoch)."""
666 # A Subversion date looks like "2002-09-29T14:44:59.000000Z"
667 return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date))
669 def sort_file(infile, outfile):
670 # sort the log files
672 # GNU sort will sort our dates differently (incorrectly!) if our
673 # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
674 # it to 'C'
675 lc_all_tmp = os.environ.get('LC_ALL', None)
676 os.environ['LC_ALL'] = 'C'
677 # The -T option to sort has a nice side effect. The Win32 sort is
678 # case insensitive and cannot be used, and since it does not
679 # understand the -T option and dies if we try to use it, there is
680 # no risk that we use that sort by accident.
681 run_command('sort -T %s %s > %s' % (Ctx().tmpdir, infile, outfile))
682 if lc_all_tmp is None:
683 del os.environ['LC_ALL']
684 else:
685 os.environ['LC_ALL'] = lc_all_tmp
687 def match_regexp_list(regexp_list, s):
688 """Test whether string S matches any of the compiled regexps in
689 REGEXP_LIST."""
690 for regexp in regexp_list:
691 if regexp.match(s):
692 return True
693 return False
695 class LF_EOL_Filter:
696 """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
697 into LFs only."""
698 def __init__(self, stream):
699 self.stream = stream
700 self.carry_cr = False
701 self.eof = False
703 def read(self, size):
704 while True:
705 buf = self.stream.read(size)
706 self.eof = len(buf) == 0
707 if self.carry_cr:
708 buf = '\r' + buf
709 self.carry_cr = False
710 if not self.eof and buf[-1] == '\r':
711 self.carry_cr = True
712 buf = buf[:-1]
713 buf = buf.replace('\r\n', '\n')
714 buf = buf.replace('\r', '\n')
715 if len(buf) > 0 or self.eof:
716 return buf
719 # These constants represent the log levels that this script supports
720 LOG_WARN = -1
721 LOG_QUIET = 0
722 LOG_NORMAL = 1
723 LOG_VERBOSE = 2
724 class Log:
725 """A Simple logging facility. Each line will be timestamped is
726 self.use_timestamps is TRUE. This class is a Borg, see
727 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
728 __shared_state = {}
729 def __init__(self):
730 self.__dict__ = self.__shared_state
731 if self.__dict__:
732 return
733 self.log_level = LOG_NORMAL
734 # Set this to true if you want to see timestamps on each line output.
735 self.use_timestamps = None
736 self.logger = sys.stdout
738 def _timestamp(self):
739 """Output a detailed timestamp at the beginning of each line output."""
740 self.logger.write(time.strftime('[%Y-%m-%d %I:%m:%S %Z] - '))
742 def write(self, log_level, *args):
743 """This is the public method to use for writing to a file. Only
744 messages whose LOG_LEVEL is <= self.log_level will be printed. If
745 there are multiple ARGS, they will be separated by a space."""
746 if log_level > self.log_level:
747 return
748 if self.use_timestamps:
749 self._timestamp()
750 self.logger.write(' '.join(map(str,args)) + "\n")
751 # Ensure that log output doesn't get out-of-order with respect to
752 # stderr output.
753 self.logger.flush()
756 class Cleanup:
757 """This singleton class manages any files created by cvs2svn. When
758 you first create a file, call Cleanup.register, passing the
759 filename, and the last pass that you need the file. After the end
760 of that pass, your file will be cleaned up after running an optional
761 callback. This class is a Borg, see
762 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
764 __shared_state = {}
765 def __init__(self):
766 self.__dict__ = self.__shared_state
767 if self.__dict__:
768 return
769 self._log = {}
770 self._callbacks = {}
772 def register(self, file, which_pass, callback=None):
773 """Register FILE for cleanup at the end of WHICH_PASS, running
774 function CALLBACK prior to removal. Registering a given FILE is
775 idempotent; you may register as many times as you wish, but it
776 will only be cleaned up once.
778 Note that if a file is registered multiple times, only the first
779 callback registered for that file will be called at cleanup
780 time. Also note that if you register a database file you must
781 close the database before cleanup, e.g. using a callback."""
782 self._log.setdefault(which_pass, {})[file] = 1
783 if callback and not self._callbacks.has_key(file):
784 self._callbacks[file] = callback
786 def cleanup(self, which_pass):
787 """Clean up all files, and invoke callbacks, for pass WHICH_PASS."""
788 if not self._log.has_key(which_pass):
789 return
790 for file in self._log[which_pass]:
791 Log().write(LOG_VERBOSE, "Deleting", file)
792 if self._callbacks.has_key(file):
793 self._callbacks[file]()
794 os.unlink(file)
797 # Always use these constants for opening databases.
798 DB_OPEN_READ = 'r'
799 DB_OPEN_NEW = 'n'
802 class AbstractDatabase:
803 """An abstract base class for anydbm-based databases."""
805 def __init__(self, filename, mode):
806 """A convenience function for opening an anydbm database."""
807 # pybsddb3 has a bug which prevents it from working with
808 # Berkeley DB 4.2 if you open the db with 'n' ("new"). This
809 # causes the DB_TRUNCATE flag to be passed, which is disallowed
810 # for databases protected by lock and transaction support
811 # (bsddb databases use locking from bsddb version 4.2.4 onwards).
813 # Therefore, manually perform the removal (we can do this, because
814 # we know that for bsddb - but *not* anydbm in general - the database
815 # consists of one file with the name we specify, rather than several
816 # based on that name).
817 if mode == 'n' and anydbm._defaultmod.__name__ == 'dbhash':
818 if os.path.isfile(filename):
819 os.unlink(filename)
820 mode = 'c'
822 self.db = anydbm.open(filename, mode)
824 # Import implementations for many mapping interface methods. Note
825 # that we specifically do not do this for any method which handles
826 # *values*, because our derived classes define __getitem__ and
827 # __setitem__ to override the storage of values, and grabbing
828 # methods directly from the dbm object would bypass this.
829 for meth_name in ('__delitem__', 'keys',
830 '__iter__', 'has_key', '__contains__', 'iterkeys', 'clear'):
831 meth_ref = getattr(self.db, meth_name, None)
832 if meth_ref:
833 setattr(self, meth_name, meth_ref)
835 def __delitem__(self, key):
836 # gdbm defines a __delitem__ method, but it cannot be assigned. So
837 # this method provides a fallback definition via explicit delegation:
838 del self.db[key]
840 def __iter__(self):
841 for key in self.keys():
842 yield key
844 def has_key(self, key):
845 try:
846 self.db[key]
847 return True
848 except KeyError:
849 return False
851 def __contains__(self, key):
852 return self.has_key(key)
854 def iterkeys(self):
855 return self.__iter__()
857 def clear(self):
858 for key in self.keys():
859 del self[key]
861 def items(self):
862 return [(key, self[key],) for key in self.keys()]
864 def values(self):
865 return [self[key] for key in self.keys()]
867 def get(self, key, default=None):
868 try:
869 return self[key]
870 except KeyError:
871 return default
874 class SDatabase(AbstractDatabase):
875 """A database that can only store strings."""
877 def __getitem__(self, key):
878 return self.db[key]
880 def __setitem__(self, key, value):
881 self.db[key] = value
884 class Database(AbstractDatabase):
885 """A database that uses the marshal module to store built-in types."""
887 def __getitem__(self, key):
888 return marshal.loads(self.db[key])
890 def __setitem__(self, key, value):
891 self.db[key] = marshal.dumps(value)
894 class StatsKeeper:
895 __shared_state = { }
896 def __init__(self):
897 self.__dict__ = self.__shared_state
898 if self.__dict__:
899 return
900 self.filename = temp(STATISTICS_FILE)
901 Cleanup().register(self.filename, pass8)
902 # This can get kinda large, so we don't store it in our data dict.
903 self.repos_files = { }
905 if os.path.exists(self.filename):
906 self.unarchive()
907 else:
908 self.data = { 'cvs_revs_count' : 0,
909 'tags': { },
910 'branches' : { },
911 'repos_size' : 0,
912 'repos_file_count' : 0,
913 'svn_rev_count' : None,
914 'first_rev_date' : 1L<<32,
915 'last_rev_date' : 0,
916 'pass_timings' : { },
917 'start_time' : 0,
918 'end_time' : 0,
921 def log_duration_for_pass(self, duration, pass_num):
922 self.data['pass_timings'][pass_num] = duration
924 def set_start_time(self, start):
925 self.data['start_time'] = start
927 def set_end_time(self, end):
928 self.data['end_time'] = end
930 def _bump_item(self, key, amount=1):
931 self.data[key] += amount
933 def reset_c_rev_info(self):
934 self.data['cvs_revs_count'] = 0
935 self.data['tags'] = { }
936 self.data['branches'] = { }
938 def record_c_rev(self, c_rev):
939 self._bump_item('cvs_revs_count')
941 for tag in c_rev.tags:
942 self.data['tags'][tag] = None
943 for branch in c_rev.branches:
944 self.data['branches'][branch] = None
946 if c_rev.timestamp < self.data['first_rev_date']:
947 self.data['first_rev_date'] = c_rev.timestamp
949 if c_rev.timestamp > self.data['last_rev_date']:
950 self.data['last_rev_date'] = c_rev.timestamp
952 # Only add the size if this is the first time we see the file.
953 if not self.repos_files.has_key(c_rev.fname):
954 self._bump_item('repos_size', c_rev.file_size)
955 self.repos_files[c_rev.fname] = None
957 self.data['repos_file_count'] = len(self.repos_files)
959 def set_svn_rev_count(self, count):
960 self.data['svn_rev_count'] = count
962 def svn_rev_count(self):
963 return self.data['svn_rev_count']
965 def archive(self):
966 open(self.filename, 'w').write(marshal.dumps(self.data))
968 def unarchive(self):
969 self.data = marshal.loads(open(self.filename, 'r').read())
971 def __str__(self):
972 svn_revs_str = ""
973 if self.data['svn_rev_count'] is not None:
974 svn_revs_str = ('Total SVN Commits: %10s\n'
975 % self.data['svn_rev_count'])
977 return ('\n' \
978 'cvs2svn Statistics:\n' \
979 '------------------\n' \
980 'Total CVS Files: %10i\n' \
981 'Total CVS Revisions: %10i\n' \
982 'Total Unique Tags: %10i\n' \
983 'Total Unique Branches: %10i\n' \
984 'CVS Repos Size in KB: %10i\n' \
985 '%s' \
986 'First Revision Date: %s\n' \
987 'Last Revision Date: %s\n' \
988 '------------------' \
989 % (self.data['repos_file_count'],
990 self.data['cvs_revs_count'],
991 len(self.data['tags']),
992 len(self.data['branches']),
993 (self.data['repos_size'] / 1024),
994 svn_revs_str,
995 time.ctime(self.data['first_rev_date']),
996 time.ctime(self.data['last_rev_date']),
999 def timings(self):
1000 passes = self.data['pass_timings'].keys()
1001 passes.sort()
1002 output = 'Timings:\n------------------\n'
1004 def desc(val):
1005 if val == 1: return "second"
1006 return "seconds"
1008 for pass_num in passes:
1009 duration = int(self.data['pass_timings'][pass_num])
1010 p_str = ('pass %d:%6d %s\n'
1011 % (pass_num, duration, desc(duration)))
1012 output += p_str
1014 total = int(self.data['end_time'] - self.data['start_time'])
1015 output += ('total: %6d %s' % (total, desc(total)))
1016 return output
1019 class LastSymbolicNameDatabase:
1020 """ Passing every CVSRevision in s-revs to this class will result in
1021 a Database whose key is the last CVS Revision a symbolicname was
1022 seen in, and whose value is a list of all symbolicnames that were
1023 last seen in that revision."""
1024 def __init__(self, mode):
1025 self.symbols = {}
1026 self.symbol_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB), mode)
1027 Cleanup().register(temp(SYMBOL_LAST_CVS_REVS_DB), pass5)
1029 # Once we've gone through all the revs,
1030 # symbols.keys() will be a list of all tags and branches, and
1031 # their corresponding values will be a key into the last CVS revision
1032 # that they were used in.
1033 def log_revision(self, c_rev):
1034 # Gather last CVS Revision for symbolic name info and tag info
1035 for tag in c_rev.tags:
1036 self.symbols[tag] = c_rev.unique_key()
1037 if c_rev.op is not OP_DELETE:
1038 for branch in c_rev.branches:
1039 self.symbols[branch] = c_rev.unique_key()
1041 # Creates an inversion of symbols above--a dictionary of lists (key
1042 # = CVS rev unique_key: val = list of symbols that close in that
1043 # rev.
1044 def create_database(self):
1045 for sym, rev_unique_key in self.symbols.items():
1046 ary = self.symbol_revs_db.get(rev_unique_key, [])
1047 ary.append(sym)
1048 self.symbol_revs_db[rev_unique_key] = ary
1051 class CVSRevisionDatabase:
1052 """A Database to store CVSRevision objects and retrieve them by their
1053 unique_key()."""
1055 def __init__(self, mode):
1056 """Initialize an instance, opening database in MODE (like the MODE
1057 argument to Database or anydbm.open())."""
1058 self.cvs_revs_db = SDatabase(temp(CVS_REVS_DB), mode)
1059 Cleanup().register(temp(CVS_REVS_DB), pass8)
1061 def log_revision(self, c_rev):
1062 """Add C_REV, a CVSRevision, to the database."""
1063 self.cvs_revs_db[c_rev.unique_key()] = str(c_rev)
1065 def get_revision(self, unique_key):
1066 """Return the CVSRevision stored under UNIQUE_KEY."""
1067 return CVSRevision(Ctx(), self.cvs_revs_db[unique_key])
1070 def TagsDatabase(mode):
1071 """A Database to store which symbolic names are tags.
1072 Each key is a tag name.
1073 The value has no meaning, and should be set to None."""
1074 db = SDatabase(temp(TAGS_DB), mode)
1075 Cleanup().register(temp(TAGS_DB), pass8)
1076 return db
1079 class Project:
1080 """A project within a CVS repository."""
1082 def __init__(self, project_cvs_repos_path,
1083 trunk_path, branches_path, tags_path):
1084 """Create a new Project record.
1086 PROJECT_CVS_REPOS_PATH is the main CVS directory for this project
1087 (within the filesystem). TRUNK_PATH, BRANCHES_PATH, and TAGS_PATH
1088 are the full, normalized directory names in svn for the
1089 corresponding part of the repository."""
1091 self.project_cvs_repos_path = project_cvs_repos_path
1092 prefix = Ctx().cvs_repository.cvs_repos_path
1093 if not self.project_cvs_repos_path.startswith(prefix):
1094 raise FatalError("Project '%s' must start with '%s'"
1095 % (self.project_cvs_repos_path, prefix,))
1096 # The project's main directory as a cvs_path:
1097 self.project_cvs_path = self.project_cvs_repos_path[len(prefix):]
1098 if self.project_cvs_path.startswith(os.sep):
1099 self.project_cvs_path = self.project_cvs_path[1:]
1100 self.trunk_path = trunk_path
1101 self.branches_path = branches_path
1102 self.tags_path = tags_path
1103 verify_paths_disjoint(self.trunk_path, self.branches_path, self.tags_path)
1105 def is_source(self, svn_path):
1106 """Return True iff SVN_PATH is a legitimate source for this project.
1108 Legitimate paths are self.trunk_path or any directory directly
1109 under self.branches_path."""
1111 if svn_path == self.trunk_path:
1112 return True
1114 (head, tail,) = _path_split(svn_path)
1115 if head == self.branches_path:
1116 return True
1118 return False
1120 def is_unremovable(self, svn_path):
1121 """Return True iff the specified path must not be removed."""
1123 return svn_path in [self.trunk_path, self.branches_path, self.tags_path]
1125 def get_branch_path(self, branch_name):
1126 """Return the svnpath for the branch named BRANCH_NAME."""
1128 return _path_join(self.branches_path, _clean_symbolic_name(branch_name))
1130 def get_tag_path(self, tag_name):
1131 """Return the svnpath for the tag named TAG_NAME."""
1133 return _path_join(self.tags_path, _clean_symbolic_name(tag_name))
1135 def _relative_name(self, cvs_path):
1136 """Convert CVS_PATH into a name relative to this project's root directory.
1138 CVS_PATH has to begin (textually) with self.project_cvs_path.
1139 Remove prefix and optional '/'."""
1141 if not cvs_path.startswith(self.project_cvs_path):
1142 raise FatalError(
1143 "_relative_name: '%s' is not a sub-path of '%s'"
1144 % (cvs_path, self.project_cvs_path,))
1145 l = len(self.project_cvs_path)
1146 if cvs_path[l] == os.sep:
1147 l += 1
1148 return cvs_path[l:]
1150 def make_trunk_path(self, cvs_path):
1151 """Return the trunk path for CVS_PATH.
1153 Return the svn path for this file on trunk."""
1155 return _path_join(self.trunk_path, self._relative_name(cvs_path))
1157 def make_branch_path(self, branch_name, cvs_path):
1158 """Return the svn path for CVS_PATH on branch BRANCH_NAME."""
1160 return _path_join(self.get_branch_path(branch_name),
1161 self._relative_name(cvs_path))
1164 class CVSRevision:
1165 def __init__(self, ctx, *args):
1166 """Initialize a new CVSRevision with Ctx object CTX, and ARGS.
1168 If CTX is None, the following members and methods of the
1169 instantiated CVSRevision class object will be unavailable (or
1170 simply will not work correctly, if at all):
1171 cvs_path
1172 svn_path
1173 is_default_branch_revision()
1175 (Note that this class treats CTX as const, because the caller
1176 likely passed in a Borg instance of a Ctx. The reason this class
1177 takes CTX as as a parameter, instead of just instantiating a Ctx
1178 itself, is that this class should be usable outside cvs2svn.)
1180 If there is one argument in ARGS, it is a string, in the format of
1181 a line from a revs file. Do *not* include a trailing newline.
1183 If there are multiple ARGS, there must be 17 of them,
1184 comprising a parsed revs line:
1185 timestamp --> (int) date stamp for this cvs revision
1186 digest --> (string) digest of author+logmsg
1187 prev_timestamp --> (int) date stamp for the previous cvs revision
1188 next_timestamp --> (int) date stamp for the next cvs revision
1189 op --> (char) OP_ADD, OP_CHANGE, or OP_DELETE
1190 prev_rev --> (string or None) previous CVS rev, e.g., "1.2"
1191 rev --> (string) this CVS rev, e.g., "1.3"
1192 next_rev --> (string or None) next CVS rev, e.g., "1.4"
1193 file_in_attic --> (char or None) true if RCS file is in Attic
1194 file_executable --> (char or None) true if RCS file has exec bit set.
1195 file_size --> (int) size of the RCS file
1196 deltatext_code --> (char) 'N' if non-empty deltatext, else 'E'
1197 fname --> (string) relative path of file in CVS repos
1198 mode --> (string or None) "kkv", "kb", etc.
1199 branch_name --> (string or None) branch on which this rev occurred
1200 tags --> (list of strings) all tags on this revision
1201 branches --> (list of strings) all branches rooted in this rev
1203 The two forms of initialization are equivalent.
1205 WARNING: Due to the resync process in pass2, prev_timestamp or
1206 next_timestamp may be incorrect in the c-revs or s-revs files."""
1208 self._ctx = ctx
1209 if len(args) == 17:
1210 (self.timestamp, self.digest, self.prev_timestamp, self.next_timestamp,
1211 self.op, self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
1212 self.file_executable, self.file_size, self.deltatext_code,
1213 self.fname,
1214 self.mode, self.branch_name, self.tags, self.branches) = args
1215 elif len(args) == 1:
1216 data = args[0].split(' ', 15)
1217 (self.timestamp, self.digest, self.prev_timestamp, self.next_timestamp,
1218 self.op, self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
1219 self.file_executable, self.file_size, self.deltatext_code,
1220 self.mode, self.branch_name, numtags, remainder) = data
1221 # Patch up data items which are not simple strings
1222 self.timestamp = int(self.timestamp, 16)
1223 if self.prev_timestamp == "*":
1224 self.prev_timestamp = 0
1225 else:
1226 self.prev_timestamp = int(self.prev_timestamp)
1227 if self.next_timestamp == "*":
1228 self.next_timestamp = 0
1229 else:
1230 self.next_timestamp = int(self.next_timestamp)
1231 if self.prev_rev == "*":
1232 self.prev_rev = None
1233 if self.next_rev == "*":
1234 self.next_rev = None
1235 if self.file_in_attic == "*":
1236 self.file_in_attic = None
1237 if self.file_executable == "*":
1238 self.file_executable = None
1239 self.file_size = int(self.file_size)
1240 if self.mode == "*":
1241 self.mode = None
1242 if self.branch_name == "*":
1243 self.branch_name = None
1244 numtags = int(numtags)
1245 tags_and_numbranches_and_remainder = remainder.split(' ', numtags + 1)
1246 self.tags = tags_and_numbranches_and_remainder[:-2]
1247 numbranches = int(tags_and_numbranches_and_remainder[-2])
1248 remainder = tags_and_numbranches_and_remainder[-1]
1249 branches_and_fname = remainder.split(' ', numbranches)
1250 self.branches = branches_and_fname[:-1]
1251 self.fname = branches_and_fname[-1]
1252 else:
1253 raise TypeError, 'CVSRevision() takes 2 or 18 arguments (%d given)' % \
1254 (len(args) + 1)
1255 if ctx is not None:
1256 self.cvs_path = ctx.cvs_repository.get_cvs_path(self.fname)
1257 if self.branch_name:
1258 self.svn_path = ctx.project.make_branch_path(self.branch_name,
1259 self.cvs_path)
1260 else:
1261 self.svn_path = ctx.project.make_trunk_path(self.cvs_path)
1263 # The 'primary key' of a CVS Revision is the revision number + the
1264 # filename. To provide a unique key (say, for a dict), we just glom
1265 # them together in a string. By passing in self.prev_rev or
1266 # self.next_rev, you can get the unique key for their respective
1267 # CVSRevisions.
1268 def unique_key(self, revnum="0"):
1269 if revnum is "0":
1270 revnum = self.rev
1271 elif revnum is None:
1272 return None
1273 return revnum + "/" + self.fname
1275 def __str__(self):
1276 return ('%08lx %s %s %s %s %s %s %s %s %s %d %s %s %s %d%s%s %d%s%s %s'
1277 % (self.timestamp, self.digest, self.prev_timestamp or "*",
1278 self.next_timestamp or "*", self.op, (self.prev_rev or "*"),
1279 self.rev, (self.next_rev or "*"), (self.file_in_attic or "*"),
1280 (self.file_executable or "*"),
1281 self.file_size,
1282 self.deltatext_code, (self.mode or "*"),
1283 (self.branch_name or "*"),
1284 len(self.tags), self.tags and " " or "", " ".join(self.tags),
1285 len(self.branches), self.branches and " " or "",
1286 " ".join(self.branches),
1287 self.fname, ))
1289 # Returns true if this CVSRevision is the opening CVSRevision for
1290 # NAME (for this RCS file).
1291 def opens_symbolic_name(self, name):
1292 if name in self.tags:
1293 return 1
1294 if name in self.branches:
1295 # If this c_rev opens a branch and our op is OP_DELETE, then
1296 # that means that the file that this c_rev belongs to was
1297 # created on the branch, so for all intents and purposes, this
1298 # c_rev is *technically* not an opening. See Issue #62 for more
1299 # information.
1300 if self.op != OP_DELETE:
1301 return 1
1302 return 0
1304 def is_default_branch_revision(self):
1305 """Return 1 if SELF.rev of SELF.cvs_path is a default branch
1306 revision according to DEFAULT_BRANCHES_DB (see the conditions
1307 documented there), else return None."""
1308 val = self._ctx._default_branches_db.get(self.cvs_path, None)
1309 if val is not None:
1310 val_last_dot = val.rindex(".")
1311 our_last_dot = self.rev.rindex(".")
1312 default_branch = val[:val_last_dot]
1313 our_branch = self.rev[:our_last_dot]
1314 default_rev_component = int(val[val_last_dot + 1:])
1315 our_rev_component = int(self.rev[our_last_dot + 1:])
1316 if (default_branch == our_branch
1317 and our_rev_component <= default_rev_component):
1318 return 1
1319 # else
1320 return None
1322 def rcs_path(self):
1323 """Returns the actual filesystem path to the RCS file of this
1324 CVSRevision."""
1325 if self.file_in_attic is None:
1326 return self.fname
1327 else:
1328 basepath, filename = os.path.split(self.fname)
1329 return os.path.join(basepath, 'Attic', filename)
1331 def filename(self):
1332 "Return the last path component of self.fname, minus the ',v'"
1333 return os.path.split(self.fname)[-1][:-2]
1335 class SymbolDatabase:
1336 """This database records information on all symbols in the RCS
1337 files. It is created in pass 1 and it is used in pass 2."""
1338 def __init__(self):
1339 # A hash that maps tag names to commit counts
1340 self.tags = { }
1341 # A hash that maps branch names to lists of the format
1342 # [ create_count, commit_count, blockers ], where blockers
1343 # is a hash that lists the symbols that depend on the
1344 # the branch. The blockers hash is used as a set, so the
1345 # values are not used.
1346 self.branches = { }
1348 def register_tag_creation(self, name):
1349 """Register the creation of the tag NAME."""
1350 self.tags[name] = self.tags.get(name, 0) + 1
1352 def _branch(self, name):
1353 """Helper function to get a branch node that will create and
1354 initialize the node if it does not exist."""
1355 if not self.branches.has_key(name):
1356 self.branches[name] = [ 0, 0, { } ]
1357 return self.branches[name]
1359 def register_branch_creation(self, name):
1360 """Register the creation of the branch NAME."""
1361 self._branch(name)[0] += 1
1363 def register_branch_commit(self, name):
1364 """Register a commit on the branch NAME."""
1365 self._branch(name)[1] += 1
1367 def register_branch_blocker(self, name, blocker):
1368 """Register BLOCKER as a blocker on the branch NAME."""
1369 self._branch(name)[2][blocker] = None
1371 def branch_has_commit(self, name):
1372 """Return non-zero if NAME has commits. Returns 0 if name
1373 is not a branch or if it has no commits."""
1374 return self.branches.has_key(name) and self.branches[name][1]
1376 def find_excluded_symbols(self, regexp_list):
1377 """Returns a hash of all symbols that match the regexps in
1378 REGEXP_LIST. The hash is used as a set so the values are
1379 not used."""
1380 excludes = { }
1381 for tag in self.tags:
1382 if match_regexp_list(regexp_list, tag):
1383 excludes[tag] = None
1384 for branch in self.branches:
1385 if match_regexp_list(regexp_list, branch):
1386 excludes[branch] = None
1387 return excludes
1389 def find_branch_exclude_blockers(self, branch, excludes):
1390 """Find all blockers of BRANCH, excluding the ones in the hash
1391 EXCLUDES."""
1392 blockers = { }
1393 if excludes.has_key(branch):
1394 for blocker in self.branches[branch][2]:
1395 if not excludes.has_key(blocker):
1396 blockers[blocker] = None
1397 return blockers
1399 def find_blocked_excludes(self, excludes):
1400 """Find all branches not in EXCLUDES that have blocking symbols that
1401 are not themselves excluded. Return a hash that maps branch names
1402 to a hash of blockers. The hash of blockes is used as a set so the
1403 values are not used."""
1404 blocked_branches = { }
1405 for branch in self.branches:
1406 blockers = self.find_branch_exclude_blockers(branch, excludes)
1407 if blockers:
1408 blocked_branches[branch] = blockers
1409 return blocked_branches
1411 def find_mismatches(self, excludes=None):
1412 """Find all symbols that are defined as both tags and branches,
1413 excluding the ones in EXCLUDES. Returns a list of 4-tuples with
1414 the symbol name, tag count, branch count and commit count."""
1415 if excludes is None:
1416 excludes = { }
1417 mismatches = [ ]
1418 for branch in self.branches:
1419 if not excludes.has_key(branch) and self.tags.has_key(branch):
1420 mismatches.append((branch, # name
1421 self.tags[branch], # tag count
1422 self.branches[branch][0], # branch count
1423 self.branches[branch][1])) # commit count
1424 return mismatches
1426 def read(self):
1427 """Read the symbol database from files."""
1428 f = open(temp(TAGS_LIST))
1429 while 1:
1430 line = f.readline()
1431 if not line:
1432 break
1433 tag, count = line.split()
1434 self.tags[tag] = int(count)
1436 f = open(temp(BRANCHES_LIST))
1437 while 1:
1438 line = f.readline()
1439 if not line:
1440 break
1441 words = line.split()
1442 self.branches[words[0]] = [ int(words[1]), int(words[2]), { } ]
1443 for blocker in words[3:]:
1444 self.branches[words[0]][2][blocker] = None
1446 def write(self):
1447 """Store the symbol database to files."""
1448 f = open(temp(TAGS_LIST), "w")
1449 Cleanup().register(temp(TAGS_LIST), pass2)
1450 for tag, count in self.tags.items():
1451 f.write("%s %d\n" % (tag, count))
1453 f = open(temp(BRANCHES_LIST), "w")
1454 Cleanup().register(temp(BRANCHES_LIST), pass2)
1455 for branch, info in self.branches.items():
1456 f.write("%s %d %d" % (branch, info[0], info[1]))
1457 if info[2]:
1458 f.write(" ")
1459 f.write(" ".join(info[2].keys()))
1460 f.write("\n")
1463 class FileDataCollector(cvs2svn_rcsparse.Sink):
1464 """Class responsible for collecting RCS data for a particular file.
1466 Any collected data that need to be remembered are stored into the
1467 referenced CollectData instance."""
1469 def __init__(self, collect_data, canonical_name, filename):
1470 """Create an object that is prepared to receive data for FILENAME.
1471 FILENAME is the absolute filesystem path to the file in question,
1472 and CANONICAL_NAME is FILENAME with the 'Attic' component removed
1473 (if the file is indeed in the Attic). COLLECT_DATA is used to
1474 store the information collected about the file."""
1476 self.collect_data = collect_data
1478 self.fname = canonical_name
1480 # We calculate and save some file metadata here, where we can do
1481 # it only once per file, instead of waiting until later where we
1482 # would have to do the same calculations once per CVS *revision*.
1484 self.cvs_path = Ctx().cvs_repository.get_cvs_path(self.fname)
1486 # If the paths are not the same, then that means that the
1487 # canonical_name has had the 'Attic' component stripped out.
1488 self.file_in_attic = None
1489 if canonical_name != filename:
1490 self.file_in_attic = 1
1492 file_stat = os.stat(filename)
1493 # The size of our file in bytes
1494 self.file_size = file_stat[stat.ST_SIZE]
1496 # Whether or not the executable bit is set.
1497 self.file_executable = None
1498 if file_stat[0] & stat.S_IXUSR:
1499 self.file_executable = 1
1501 # revision -> [timestamp, author, old-timestamp]
1502 self.rev_data = { }
1504 # Maps revision number (key) to the revision number of the
1505 # previous revision along this line of development.
1507 # For the first revision R on a branch, we consider the revision
1508 # from which R sprouted to be the 'previous'.
1510 # Note that this revision can't be determined arithmetically (due
1511 # to cvsadmin -o, which is why this is necessary).
1513 # If the key has no previous revision, then store None as key's
1514 # value.
1515 self.prev_rev = { }
1517 # This dict is essentially self.prev_rev with the values mapped in
1518 # the other direction, so following key -> value will yield you
1519 # the next revision number.
1521 # Unlike self.prev_rev, if the key has no next revision, then the
1522 # key is not present.
1523 self.next_rev = { }
1525 # Track the state of each revision so that in set_revision_info,
1526 # we can determine if our op is an add/change/delete. We can do
1527 # this because in set_revision_info, we'll have all of the
1528 # revisions for a file at our fingertips, and we need to examine
1529 # the state of our prev_rev to determine if we're an add or a
1530 # change--without the state of the prev_rev, we are unable to
1531 # distinguish between an add and a change.
1532 self.rev_state = { }
1534 # Hash mapping branch numbers, like '1.7.2', to branch names,
1535 # like 'Release_1_0_dev'.
1536 self.branch_names = { }
1538 # RCS flags (used for keyword expansion).
1539 self.mode = None
1541 # Hash mapping revision numbers, like '1.7', to lists of names
1542 # indicating which branches sprout from that revision, like
1543 # ['Release_1_0_dev', 'experimental_driver', ...].
1544 self.branchlist = { }
1546 # Like self.branchlist, but the values are lists of tag names that
1547 # apply to the key revision.
1548 self.taglist = { }
1550 # If set, this is an RCS branch number -- rcsparse calls this the
1551 # "principal branch", but CVS and RCS refer to it as the "default
1552 # branch", so that's what we call it, even though the rcsparse API
1553 # setter method is still 'set_principal_branch'.
1554 self.default_branch = None
1556 # If the RCS file doesn't have a default branch anymore, but does
1557 # have vendor revisions, then we make an educated guess that those
1558 # revisions *were* the head of the default branch up until the
1559 # commit of 1.2, at which point the file's default branch became
1560 # trunk. This records the date at which 1.2 was committed.
1561 self.first_non_vendor_revision_date = None
1563 # A list of all symbols defined for the current file. Used to
1564 # prevent multiple definitions of a symbol, something which can
1565 # easily happen when --symbol-transform is used.
1566 self.defined_symbols = { }
1568 def set_principal_branch(self, branch):
1569 self.default_branch = branch
1571 def set_expansion(self, mode):
1572 self.mode = mode
1574 def set_branch_name(self, branch_number, name):
1575 """Record that BRANCH_NUMBER is the branch number for branch NAME,
1576 and that NAME sprouts from BRANCH_NUMBER .
1577 BRANCH_NUMBER is an RCS branch number with an odd number of components,
1578 for example '1.7.2' (never '1.7.0.2')."""
1579 if not self.branch_names.has_key(branch_number):
1580 self.branch_names[branch_number] = name
1581 # The branchlist is keyed on the revision number from which the
1582 # branch sprouts, so strip off the odd final component.
1583 sprout_rev = branch_number[:branch_number.rfind(".")]
1584 self.branchlist.setdefault(sprout_rev, []).append(name)
1585 self.collect_data.symbol_db.register_branch_creation(name)
1586 else:
1587 sys.stderr.write("%s: in '%s':\n"
1588 " branch '%s' already has name '%s',\n"
1589 " cannot also have name '%s', ignoring the latter\n"
1590 % (warning_prefix, self.fname, branch_number,
1591 self.branch_names[branch_number], name))
1593 def rev_to_branch_name(self, revision):
1594 """Return the name of the branch on which REVISION lies.
1595 REVISION is a non-branch revision number with an even number of,
1596 components, for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
1597 For the convenience of callers, REVISION can also be a trunk
1598 revision such as '1.2', in which case just return None."""
1599 if trunk_rev.match(revision):
1600 return None
1601 return self.branch_names.get(revision[:revision.rindex(".")])
1603 def define_tag(self, name, revision):
1604 """Record a bidirectional mapping between symbolic NAME and REVISION.
1605 REVISION is an unprocessed revision number from the RCS file's
1606 header, for example: '1.7', '1.7.0.2', or '1.1.1' or '1.1.1.1'.
1607 This function will determine what kind of symbolic name it is by
1608 inspection, and record it in the right places."""
1609 for (pattern, replacement) in Ctx().symbol_transforms:
1610 newname = pattern.sub(replacement, name)
1611 if newname != name:
1612 Log().write(LOG_WARN, " symbol '%s' transformed to '%s'"
1613 % (name, newname))
1614 name = newname
1615 if self.defined_symbols.has_key(name):
1616 err = "%s: Multiple definitions of the symbol '%s' in '%s'" \
1617 % (error_prefix, name, self.fname)
1618 sys.stderr.write(err + "\n")
1619 self.collect_data.fatal_errors.append(err)
1620 self.defined_symbols[name] = None
1621 m = cvs_branch_tag.match(revision)
1622 if m:
1623 self.set_branch_name(m.group(1) + m.group(2), name)
1624 elif rcs_branch_tag.match(revision):
1625 self.set_branch_name(revision, name)
1626 else:
1627 self.taglist.setdefault(revision, []).append(name)
1628 self.collect_data.symbol_db.register_tag_creation(name)
1630 def define_revision(self, revision, timestamp, author, state,
1631 branches, next):
1633 # Record the state of our revision for later calculations
1634 self.rev_state[revision] = state
1636 # store the rev_data as a list in case we have to jigger the timestamp
1637 self.rev_data[revision] = [int(timestamp), author, None]
1639 # When on trunk, the RCS 'next' revision number points to what
1640 # humans might consider to be the 'previous' revision number. For
1641 # example, 1.3's RCS 'next' is 1.2.
1643 # However, on a branch, the RCS 'next' revision number really does
1644 # point to what humans would consider to be the 'next' revision
1645 # number. For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
1647 # In other words, in RCS, 'next' always means "where to find the next
1648 # deltatext that you need this revision to retrieve.
1650 # That said, we don't *want* RCS's behavior here, so we determine
1651 # whether we're on trunk or a branch and set self.prev_rev
1652 # accordingly.
1654 # One last thing. Note that if REVISION is a branch revision,
1655 # instead of mapping REVISION to NEXT, we instead map NEXT to
1656 # REVISION. Since we loop over all revisions in the file before
1657 # doing anything with the data we gather here, this 'reverse
1658 # assignment' effectively does the following:
1660 # 1. Gives us no 'prev' value for REVISION (in this
1661 # iteration... it may have been set in a previous iteration)
1663 # 2. Sets the 'prev' value for the revision with number NEXT to
1664 # REVISION. So when we come around to the branch revision whose
1665 # revision value is NEXT, its 'prev' and 'prev_rev' are already
1666 # set.
1667 if trunk_rev.match(revision):
1668 self.prev_rev[revision] = next
1669 self.next_rev[next] = revision
1670 elif next:
1671 self.prev_rev[next] = revision
1672 self.next_rev[revision] = next
1674 for b in branches:
1675 self.prev_rev[b] = revision
1677 # Ratchet up the highest vendor head revision, if necessary.
1678 if self.default_branch:
1679 default_branch_root = self.default_branch + "."
1680 if ((revision.find(default_branch_root) == 0)
1681 and (default_branch_root.count('.') == revision.count('.'))):
1682 # This revision is on the default branch, so record that it is
1683 # the new highest default branch head revision.
1684 self.collect_data.default_branches_db[self.cvs_path] = revision
1685 else:
1686 # No default branch, so make an educated guess.
1687 if revision == '1.2':
1688 # This is probably the time when the file stopped having a
1689 # default branch, so make a note of it.
1690 self.first_non_vendor_revision_date = timestamp
1691 else:
1692 m = vendor_revision.match(revision)
1693 if m and ((not self.first_non_vendor_revision_date)
1694 or (timestamp < self.first_non_vendor_revision_date)):
1695 # We're looking at a vendor revision, and it wasn't
1696 # committed after this file lost its default branch, so bump
1697 # the maximum trunk vendor revision in the permanent record.
1698 self.collect_data.default_branches_db[self.cvs_path] = revision
1700 if not trunk_rev.match(revision):
1701 # Check for unlabeled branches, record them. We tried to collect
1702 # all branch names when we parsed the symbolic name header
1703 # earlier, of course, but that didn't catch unlabeled branches.
1704 # If a branch is unlabeled, this is our first encounter with it,
1705 # so we have to record its data now.
1706 branch_number = revision[:revision.rindex(".")]
1707 if not self.branch_names.has_key(branch_number):
1708 branch_name = "unlabeled-" + branch_number
1709 self.set_branch_name(branch_number, branch_name)
1711 # Register the commit on this non-trunk branch
1712 branch_name = self.branch_names[branch_number]
1713 self.collect_data.symbol_db.register_branch_commit(branch_name)
1715 def tree_completed(self):
1716 "The revision tree has been parsed. Analyze it for consistency."
1718 # Our algorithm depends upon the timestamps on the revisions occuring
1719 # monotonically over time. That is, we want to see rev 1.34 occur in
1720 # time before rev 1.35. If we inserted 1.35 *first* (due to the time-
1721 # sorting), and then tried to insert 1.34, we'd be screwed.
1723 # to perform the analysis, we'll simply visit all of the 'previous'
1724 # links that we have recorded and validate that the timestamp on the
1725 # previous revision is before the specified revision
1727 # if we have to resync some nodes, then we restart the scan. just keep
1728 # looping as long as we need to restart.
1729 while 1:
1730 for current, prev in self.prev_rev.items():
1731 if not prev:
1732 # no previous revision exists (i.e. the initial revision)
1733 continue
1734 t_c = self.rev_data[current][0]
1735 t_p = self.rev_data[prev][0]
1736 if t_p >= t_c:
1737 # the previous revision occurred later than the current revision.
1738 # shove the previous revision back in time (and any before it that
1739 # may need to shift).
1741 # We sync backwards and not forwards because any given CVS
1742 # Revision has only one previous revision. However, a CVS
1743 # Revision can *be* a previous revision for many other
1744 # revisions (e.g., a revision that is the source of multiple
1745 # branches). This becomes relevant when we do the secondary
1746 # synchronization in pass 2--we can make certain that we
1747 # don't resync a revision earlier than it's previous
1748 # revision, but it would be non-trivial to make sure that we
1749 # don't resync revision R *after* any revisions that have R
1750 # as a previous revision.
1751 while t_p >= t_c:
1752 self.rev_data[prev][0] = t_c - 1 # new timestamp
1753 self.rev_data[prev][2] = t_p # old timestamp
1754 delta = t_c - 1 - t_p
1755 msg = "PASS1 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
1756 % (self.cvs_path, prev, time.ctime(t_p), delta)
1757 Log().write(LOG_VERBOSE, msg)
1758 if (delta > COMMIT_THRESHOLD
1759 or delta < (COMMIT_THRESHOLD * -1)):
1760 Log().write(LOG_WARN,
1761 "%s: Significant timestamp change for '%s' "
1762 "(%d seconds)"
1763 % (warning_prefix, self.cvs_path, delta))
1764 current = prev
1765 prev = self.prev_rev[current]
1766 if not prev:
1767 break
1768 t_c -= 1 # self.rev_data[current][0]
1769 t_p = self.rev_data[prev][0]
1771 # break from the for-loop
1772 break
1773 else:
1774 # finished the for-loop (no resyncing was performed)
1775 return
1777 def set_revision_info(self, revision, log, text):
1778 timestamp, author, old_ts = self.rev_data[revision]
1779 digest = sha.new(log + '\0' + author).hexdigest()
1780 if old_ts:
1781 # the timestamp on this revision was changed. log it for later
1782 # resynchronization of other files's revisions that occurred
1783 # for this time and log message.
1784 self.collect_data.resync.write('%08lx %s %08lx\n'
1785 % (old_ts, digest, timestamp))
1787 # "...Give back one kadam to honor the Hebrew God whose Ark this is."
1788 # -- Imam to Indy and Sallah, in 'Raiders of the Lost Ark'
1790 # If revision 1.1 appears to have been created via 'cvs add'
1791 # instead of 'cvs import', then this file probably never had a
1792 # default branch, so retroactively remove its record in the
1793 # default branches db. The test is that the log message CVS uses
1794 # for 1.1 in imports is "Initial revision\n" with no period.
1795 if revision == '1.1' and log != 'Initial revision\n':
1796 try:
1797 del self.collect_data.default_branches_db[self.cvs_path]
1798 except KeyError:
1799 pass
1801 # Get the timestamps of the previous and next revisions
1802 prev_rev = self.prev_rev[revision]
1803 prev_timestamp, ign, ign = self.rev_data.get(prev_rev, [0, None, None])
1805 next_rev = self.next_rev.get(revision)
1806 next_timestamp, ign, ign = self.rev_data.get(next_rev, [0, None, None])
1808 # How to tell if a CVSRevision is an add, a change, or a deletion:
1810 # It's a delete if RCS state is 'dead'
1812 # It's an add if RCS state is 'Exp.' and
1813 # - we either have no previous revision
1814 # or
1815 # - we have a previous revision whose state is 'dead'
1817 # Anything else is a change.
1818 if self.rev_state[revision] == 'dead':
1819 op = OP_DELETE
1820 elif ((self.prev_rev.get(revision, None) is None)
1821 or (self.rev_state[self.prev_rev[revision]] == 'dead')):
1822 op = OP_ADD
1823 else:
1824 op = OP_CHANGE
1826 def is_branch_revision(rev):
1827 """Return True if this revision is not a trunk revision,
1828 else return False."""
1829 if rev.count('.') >= 3:
1830 return True
1831 return False
1833 def is_same_line_of_development(rev1, rev2):
1834 """Return True if rev1 and rev2 are on the same line of
1835 development (i.e., both on trunk, or both on the same branch);
1836 return False otherwise. Either rev1 or rev2 can be None, in
1837 which case automatically return False."""
1838 if rev1 is None or rev2 is None:
1839 return False
1840 if rev1.count('.') == 1 and rev2.count('.') == 1:
1841 return True
1842 if rev1[0:rev1.rfind('.')] == rev2[0:rev2.rfind('.')]:
1843 return True
1844 return False
1846 # There can be an odd situation where the tip revision of a branch
1847 # is alive, but every predecessor on the branch is in state 'dead',
1848 # yet the revision from which the branch sprouts is alive. (This
1849 # is sort of a mirror image of the more common case of adding a
1850 # file on a branch, in which the first revision on the branch is
1851 # alive while the revision from which it sprouts is dead.)
1853 # In this odd situation, we must mark the first live revision on
1854 # the branch as an OP_CHANGE instead of an OP_ADD, because it
1855 # reflects, however indirectly, a change w.r.t. the source
1856 # revision from which the branch sprouts.
1858 # This is issue #89.
1859 cur_num = revision
1860 if is_branch_revision(revision) and self.rev_state[revision] != 'dead':
1861 while 1:
1862 prev_num = self.prev_rev.get(cur_num, None)
1863 if not cur_num or not prev_num:
1864 break
1865 if (not is_same_line_of_development(cur_num, prev_num)
1866 and self.rev_state[cur_num] == 'dead'
1867 and self.rev_state[prev_num] != 'dead'):
1868 op = OP_CHANGE
1869 cur_num = self.prev_rev.get(cur_num, None)
1871 if text:
1872 deltatext_code = DELTATEXT_NONEMPTY
1873 else:
1874 deltatext_code = DELTATEXT_EMPTY
1876 c_rev = CVSRevision(Ctx(), timestamp, digest, prev_timestamp,
1877 next_timestamp, op,
1878 prev_rev, revision, next_rev,
1879 self.file_in_attic, self.file_executable,
1880 self.file_size,
1881 deltatext_code, self.fname,
1882 self.mode, self.rev_to_branch_name(revision),
1883 self.taglist.get(revision, []),
1884 self.branchlist.get(revision, []))
1885 self.collect_data.revs.write(str(c_rev) + "\n")
1886 StatsKeeper().record_c_rev(c_rev)
1888 if not self.collect_data.metadata_db.has_key(digest):
1889 self.collect_data.metadata_db[digest] = (author, log)
1891 def parse_completed(self):
1892 # Walk through all branches and tags and register them with
1893 # their parent branch in the symbol database.
1894 for revision, symbols in self.taglist.items() + self.branchlist.items():
1895 for symbol in symbols:
1896 name = self.rev_to_branch_name(revision)
1897 if name is not None:
1898 self.collect_data.symbol_db.register_branch_blocker(name, symbol)
1900 self.collect_data.num_files += 1
1903 class CollectData:
1904 """Repository for data collected by parsing the CVS repository files.
1906 This class manages the databases into which information collected
1907 from the CVS repository is stored. The data are stored into this
1908 class by FileDataCollector instances, one of which is created for
1909 each file to be parsed."""
1911 def __init__(self):
1912 self.revs = open(temp(DATAFILE + REVS_SUFFIX), 'w')
1913 Cleanup().register(temp(DATAFILE + REVS_SUFFIX), pass2)
1914 self.resync = open(temp(DATAFILE + RESYNC_SUFFIX), 'w')
1915 Cleanup().register(temp(DATAFILE + RESYNC_SUFFIX), pass2)
1916 self.default_branches_db = SDatabase(temp(DEFAULT_BRANCHES_DB),
1917 DB_OPEN_NEW)
1918 Cleanup().register(temp(DEFAULT_BRANCHES_DB), pass5)
1919 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_NEW)
1920 Cleanup().register(temp(METADATA_DB), pass8)
1921 self.fatal_errors = []
1922 self.num_files = 0
1923 self.symbol_db = SymbolDatabase()
1925 # 1 if we've collected data for at least one file, None otherwise.
1926 self.found_valid_file = None
1928 def write_symbol_db(self):
1929 self.symbol_db.write()
1932 class SymbolingsLogger:
1933 """Manage the file that contains lines for symbol openings and
1934 closings.
1936 This data will later be used to determine valid SVNRevision ranges
1937 from which a file can be copied when creating a branch or tag in
1938 Subversion. Do this by finding "Openings" and "Closings" for each
1939 file copied onto a branch or tag.
1941 An "Opening" is the CVSRevision from which a given branch/tag
1942 sprouts on a path.
1944 The "Closing" for that branch/tag and path is the next CVSRevision
1945 on the same line of development as the opening.
1947 For example, on file 'foo.c', branch BEE has branch number 1.2.2 and
1948 obviously sprouts from revision 1.2. Therefore, 1.2 is the opening
1949 for BEE on path 'foo.c', and 1.3 is the closing for BEE on path
1950 'foo.c'. Note that there may be many revisions chronologically
1951 between 1.2 and 1.3, for example, revisions on branches of 'foo.c',
1952 perhaps even including on branch BEE itself. But 1.3 is the next
1953 revision *on the same line* as 1.2, that is why it is the closing
1954 revision for those symbolic names of which 1.2 is the opening.
1956 The reason for doing all this hullabaloo is to make branch and tag
1957 creation as efficient as possible by minimizing the number of copies
1958 and deletes per creation. For example, revisions 1.2 and 1.3 of
1959 foo.c might correspond to revisions 17 and 30 in Subversion. That
1960 means that when creating branch BEE, there is some motivation to do
1961 the copy from one of 17-30. Now if there were another file,
1962 'bar.c', whose opening and closing CVSRevisions for BEE corresponded
1963 to revisions 24 and 39 in Subversion, we would know that the ideal
1964 thing would be to copy the branch from somewhere between 24 and 29,
1965 inclusive.
1967 def __init__(self):
1968 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS), 'w')
1969 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS), pass6)
1970 self.closings = open(temp(SYMBOL_CLOSINGS_TMP), 'w')
1971 Cleanup().register(temp(SYMBOL_CLOSINGS_TMP), pass5)
1973 # This keys of this dictionary are *source* cvs_paths for which
1974 # we've encountered an 'opening' on the default branch. The
1975 # values are the (uncleaned) symbolic names that this path has
1976 # opened.
1977 self.open_paths_with_default_branches = { }
1979 def log_revision(self, c_rev, svn_revnum):
1980 """Log any openings found in C_REV, and if C_REV.next_rev is not
1981 None, a closing. The opening uses SVN_REVNUM, but the closing (if
1982 any) will have its revnum determined later."""
1983 for name in c_rev.tags + c_rev.branches:
1984 self._note_default_branch_opening(c_rev, name)
1985 if c_rev.op != OP_DELETE:
1986 self._log(name, svn_revnum,
1987 c_rev.cvs_path, c_rev.branch_name, OPENING)
1989 # If our c_rev has a next_rev, then that's the closing rev for
1990 # this source revision. Log it to closings for later processing
1991 # since we don't know the svn_revnum yet.
1992 if c_rev.next_rev is not None:
1993 self.closings.write('%s %s\n' %
1994 (name, c_rev.unique_key(c_rev.next_rev)))
1996 def _log(self, name, svn_revnum, cvs_path, branch_name, type):
1997 """Write out a single line to the symbol_openings_closings file
1998 representing that SVN_REVNUM of SVN_PATH on BRANCH_NAME is either the
1999 opening or closing (TYPE) of NAME (a symbolic name).
2001 TYPE should only be one of the following global constants:
2002 OPENING or CLOSING."""
2003 # 8 places gives us 999,999,999 SVN revs. That *should* be enough.
2004 self.symbolings.write(
2005 '%s %.8d %s %s %s\n'
2006 % (name, svn_revnum, type, branch_name or '*', cvs_path))
2008 def close(self):
2009 """Iterate through the closings file, lookup the svn_revnum for
2010 each closing CVSRevision, and write a proper line out to the
2011 symbolings file."""
2012 # Use this to get the c_rev of our rev_key
2013 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_READ)
2015 self.closings.close()
2016 for line in fileinput.FileInput(temp(SYMBOL_CLOSINGS_TMP)):
2017 (name, rev_key) = line.rstrip().split(" ", 1)
2018 svn_revnum = Ctx()._persistence_manager.get_svn_revnum(rev_key)
2020 c_rev = cvs_revs_db.get_revision(rev_key)
2021 self._log(name, svn_revnum, c_rev.cvs_path, c_rev.branch_name, CLOSING)
2023 self.symbolings.close()
2025 def _note_default_branch_opening(self, c_rev, symbolic_name):
2026 """If C_REV is a default branch revision, log C_REV.cvs_path as an
2027 opening for SYMBOLIC_NAME."""
2028 self.open_paths_with_default_branches.setdefault(
2029 c_rev.cvs_path, []).append(symbolic_name)
2031 def log_default_branch_closing(self, c_rev, svn_revnum):
2032 """If self.open_paths_with_default_branches contains
2033 C_REV.cvs_path, then call log each name in
2034 self.open_paths_with_default_branches[C_REV.cvs_path] as a closing
2035 with SVN_REVNUM as the closing revision number."""
2036 path = c_rev.cvs_path
2037 if self.open_paths_with_default_branches.has_key(path):
2038 # log each symbol as a closing
2039 for name in self.open_paths_with_default_branches[path]:
2040 self._log(name, svn_revnum, path, None, CLOSING)
2041 # Remove them from the openings list as we're done with them.
2042 del self.open_paths_with_default_branches[path]
2045 class PersistenceManager:
2046 """The PersistenceManager allows us to effectively store SVNCommits
2047 to disk and retrieve them later using only their subversion revision
2048 number as the key. It also returns the subversion revision number
2049 for a given CVSRevision's unique key.
2051 All information pertinent to each SVNCommit is stored in a series of
2052 on-disk databases so that SVNCommits can be retrieved on-demand.
2054 MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
2055 In 'new' mode, PersistenceManager will initialize a new set of on-disk
2056 databases and be fully-featured.
2057 In 'read' mode, PersistenceManager will open existing on-disk databases
2058 and the set_* methods will be unavailable."""
2059 def __init__(self, mode):
2060 self.mode = mode
2061 if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
2062 raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
2063 self.svn2cvs_db = Database(temp(SVN_REVNUMS_TO_CVS_REVS), mode)
2064 Cleanup().register(temp(SVN_REVNUMS_TO_CVS_REVS), pass8)
2065 self.cvs2svn_db = Database(temp(CVS_REVS_TO_SVN_REVNUMS), mode)
2066 Cleanup().register(temp(CVS_REVS_TO_SVN_REVNUMS), pass8)
2067 self.svn_commit_metadata = Database(temp(METADATA_DB), DB_OPEN_READ)
2068 self.cvs_revisions = CVSRevisionDatabase(DB_OPEN_READ)
2069 ###PERF kff Elsewhere there are comments about sucking the tags db
2070 ### into memory. That seems like a good idea.
2071 if not Ctx().trunk_only:
2072 self.tags_db = TagsDatabase(DB_OPEN_READ)
2074 # "branch_name" -> svn_revnum in which branch was last filled.
2075 # This is used by CVSCommit._pre_commit, to prevent creating a fill
2076 # revision which would have nothing to do.
2077 self.last_filled = {}
2079 def get_svn_revnum(self, cvs_rev_unique_key):
2080 """Return the Subversion revision number in which
2081 CVS_REV_UNIQUE_KEY was committed, or SVN_INVALID_REVNUM if there
2082 is no mapping for CVS_REV_UNIQUE_KEY."""
2083 return int(self.cvs2svn_db.get(cvs_rev_unique_key, SVN_INVALID_REVNUM))
2085 def get_svn_commit(self, svn_revnum):
2086 """Return an SVNCommit that corresponds to SVN_REVNUM.
2088 If no SVNCommit exists for revnum SVN_REVNUM, then return None.
2090 This method can throw SVNCommitInternalInconsistencyError.
2092 svn_commit = SVNCommit("Retrieved from disk", svn_revnum)
2093 (c_rev_keys, motivating_revnum, name, date) = self.svn2cvs_db.get(
2094 str(svn_revnum), (None, None, None, None))
2095 if c_rev_keys is None:
2096 return None
2098 digest = None
2099 for key in c_rev_keys:
2100 c_rev = self.cvs_revisions.get_revision(key)
2101 svn_commit.add_revision(c_rev)
2102 # Set the author and log message for this commit by using
2103 # CVSRevision metadata, but only if haven't done so already.
2104 if digest is None:
2105 digest = c_rev.digest
2106 author, log_msg = self.svn_commit_metadata[digest]
2107 svn_commit.set_author(author)
2108 svn_commit.set_log_msg(log_msg)
2110 svn_commit.set_date(date)
2112 # If we're doing a trunk-only conversion, we don't need to do any more
2113 # work.
2114 if Ctx().trunk_only:
2115 return svn_commit
2117 if name:
2118 if svn_commit.cvs_revs:
2119 raise SVNCommit.SVNCommitInternalInconsistencyError(
2120 "An SVNCommit cannot have cvs_revisions *and* a corresponding\n"
2121 "symbolic name ('%s') to fill."
2122 % (_clean_symbolic_name(name),))
2123 svn_commit.set_symbolic_name(name)
2124 if self.tags_db.has_key(name):
2125 svn_commit.is_tag = 1
2127 if motivating_revnum is not None:
2128 svn_commit.set_motivating_revnum(motivating_revnum)
2130 return svn_commit
2132 def put_svn_commit(self, svn_revnum, cvs_revs,
2133 date, name, motivating_revnum):
2134 """Record the bidirectional mapping between SVN_REVNUM and
2135 CVS_REVS and record associated attributes."""
2136 if self.mode == DB_OPEN_READ:
2137 raise RuntimeError, \
2138 'Write operation attempted on read-only PersistenceManager'
2140 for c_rev in cvs_revs:
2141 Log().write(LOG_VERBOSE, " ", c_rev.unique_key())
2143 self.svn2cvs_db[str(svn_revnum)] = ([x.unique_key() for x in cvs_revs],
2144 motivating_revnum, name, date)
2146 for c_rev in cvs_revs:
2147 self.cvs2svn_db[c_rev.unique_key()] = svn_revnum
2149 # If it is not a primary commit, then record last_filled. name is
2150 # allowed to be None.
2151 if name or motivating_revnum:
2152 self.last_filled[name] = svn_revnum
2155 class CVSCommit:
2156 """Each instance of this class contains a number of CVS Revisions
2157 that correspond to one or more Subversion Commits. After all CVS
2158 Revisions are added to the grouping, calling process_revisions will
2159 generate a Subversion Commit (or Commits) for the set of CVS
2160 Revisions in the grouping."""
2162 def __init__(self, digest, author, log):
2163 self.digest = digest
2164 self.author = author
2165 self.log = log
2167 # Symbolic names for which the last source revision has already
2168 # been seen and for which the CVSRevisionAggregator has already
2169 # generated a fill SVNCommit. See self.process_revisions().
2170 self.done_symbols = [ ]
2172 self.files = { }
2173 # Lists of CVSRevisions
2174 self.changes = [ ]
2175 self.deletes = [ ]
2177 # Start out with a t_min higher than any incoming time T, and a
2178 # t_max lower than any incoming T. This way the first T will
2179 # push t_min down to T, and t_max up to T, naturally (without any
2180 # special-casing), and successive times will then ratchet them
2181 # outward as appropriate.
2182 self.t_min = 1L<<32
2183 self.t_max = 0
2185 # This will be set to the SVNCommit that occurs in self._commit.
2186 self.motivating_commit = None
2188 # This is a list of all non-primary commits motivated by the main
2189 # commit. We gather these so that we can set their dates to the
2190 # same date as the primary commit.
2191 self.secondary_commits = [ ]
2193 # State for handling default branches.
2195 # Here is a tempting, but ultimately nugatory, bit of logic, which
2196 # I share with you so you may appreciate the less attractive, but
2197 # refreshingly non-nugatory, logic which follows it:
2199 # If some of the commits in this txn happened on a non-trunk
2200 # default branch, then those files will have to be copied into
2201 # trunk manually after being changed on the branch (because the
2202 # RCS "default branch" appears as head, i.e., trunk, in practice).
2203 # As long as those copies don't overwrite any trunk paths that
2204 # were also changed in this commit, then we can do the copies in
2205 # the same revision, because they won't cover changes that don't
2206 # appear anywhere/anywhen else. However, if some of the trunk dst
2207 # paths *did* change in this commit, then immediately copying the
2208 # branch changes would lose those trunk mods forever. So in this
2209 # case, we need to do at least that copy in its own revision. And
2210 # for simplicity's sake, if we're creating the new revision for
2211 # even one file, then we just do all such copies together in the
2212 # new revision.
2214 # Doesn't that sound nice?
2216 # Unfortunately, Subversion doesn't support copies with sources
2217 # in the current txn. All copies must be based in committed
2218 # revisions. Therefore, we generate the above-described new
2219 # revision unconditionally.
2221 # This is a list of c_revs, and a c_rev is appended for each
2222 # default branch commit that will need to be copied to trunk (or
2223 # deleted from trunk) in some generated revision following the
2224 # "regular" revision.
2225 self.default_branch_cvs_revisions = [ ]
2227 def __cmp__(self, other):
2228 # Commits should be sorted by t_max. If both self and other have
2229 # the same t_max, break the tie using t_min, and lastly, digest.
2230 # If all those are equal, then compare based on ids, to ensure
2231 # that no two instances compare equal.
2232 return (cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
2233 or cmp(self.digest, other.digest) or cmp(id(self), id(other)))
2235 def __hash__(self):
2236 return id(self)
2238 def has_file(self, fname):
2239 return self.files.has_key(fname)
2241 def revisions(self):
2242 return self.changes + self.deletes
2244 def opens_symbolic_name(self, name):
2245 """Returns true if any CVSRevision in this commit is on a tag or a
2246 branch or is the origin of a tag or branch."""
2247 for c_rev in self.revisions():
2248 if c_rev.opens_symbolic_name(name):
2249 return 1
2250 return 0
2252 def add_revision(self, c_rev):
2253 # Record the time range of this commit.
2255 # ### ISSUE: It's possible, though unlikely, that the time range
2256 # of a commit could get gradually expanded to be arbitrarily
2257 # longer than COMMIT_THRESHOLD. I'm not sure this is a huge
2258 # problem, and anyway deciding where to break it up would be a
2259 # judgement call. For now, we just print a warning in commit() if
2260 # this happens.
2261 if c_rev.timestamp < self.t_min:
2262 self.t_min = c_rev.timestamp
2263 if c_rev.timestamp > self.t_max:
2264 self.t_max = c_rev.timestamp
2266 if c_rev.op == OP_DELETE:
2267 self.deletes.append(c_rev)
2268 else:
2269 # OP_CHANGE or OP_ADD
2270 self.changes.append(c_rev)
2272 self.files[c_rev.fname] = 1
2274 def _pre_commit(self):
2275 """Generates any SVNCommits that must exist before the main
2276 commit."""
2278 # There may be multiple c_revs in this commit that would cause
2279 # branch B to be filled, but we only want to fill B once. On the
2280 # other hand, there might be multiple branches committed on in
2281 # this commit. Whatever the case, we should count exactly one
2282 # commit per branch, because we only fill a branch once per
2283 # CVSCommit. This list tracks which branches we've already
2284 # counted.
2285 accounted_for_sym_names = [ ]
2287 def fill_needed(c_rev, pm):
2288 """Return 1 if this is the first commit on a new branch (for
2289 this file) and we need to fill the branch; else return 0
2290 (meaning that some other file's first commit on the branch has
2291 already done the fill for us).
2293 If C_REV.op is OP_ADD, only return 1 if the branch that this
2294 commit is on has no last filled revision.
2296 PM is a PersistenceManager to query.
2299 # Different '.' counts indicate that c_rev is now on a different
2300 # line of development (and may need a fill)
2301 if c_rev.rev.count('.') != c_rev.prev_rev.count('.'):
2302 svn_revnum = pm.get_svn_revnum(c_rev.unique_key(c_rev.prev_rev))
2303 # It should be the case that when we have a file F that
2304 # is added on branch B (thus, F on trunk is in state
2305 # 'dead'), we generate an SVNCommit to fill B iff the branch
2306 # has never been filled before.
2308 # If this c_rev.op == OP_ADD, *and* the branch has never
2309 # been filled before, then fill it now. Otherwise, no need to
2310 # fill it.
2311 if c_rev.op == OP_ADD:
2312 if pm.last_filled.get(c_rev.branch_name, None) is None:
2313 return 1
2314 elif c_rev.op == OP_CHANGE:
2315 if svn_revnum > pm.last_filled.get(c_rev.branch_name, 0):
2316 return 1
2317 elif c_rev.op == OP_DELETE:
2318 if pm.last_filled.get(c_rev.branch_name, None) is None:
2319 return 1
2320 return 0
2322 for c_rev in self.changes + self.deletes:
2323 # If a commit is on a branch, we must ensure that the branch
2324 # path being committed exists (in HEAD of the Subversion
2325 # repository). If it doesn't exist, we will need to fill the
2326 # branch. After the fill, the path on which we're committing
2327 # will exist.
2328 if c_rev.branch_name \
2329 and c_rev.branch_name not in accounted_for_sym_names \
2330 and c_rev.branch_name not in self.done_symbols \
2331 and fill_needed(c_rev, Ctx()._persistence_manager):
2332 svn_commit = SVNCommit("pre-commit symbolic name '%s'"
2333 % c_rev.branch_name)
2334 svn_commit.set_symbolic_name(c_rev.branch_name)
2335 self.secondary_commits.append(svn_commit)
2336 accounted_for_sym_names.append(c_rev.branch_name)
2338 def _commit(self):
2339 """Generates the primary SVNCommit that corresponds to this
2340 CVSCommit."""
2341 # Generate an SVNCommit unconditionally. Even if the only change
2342 # in this CVSCommit is a deletion of an already-deleted file (that
2343 # is, a CVS revision in state 'dead' whose predecessor was also in
2344 # state 'dead'), the conversion will still generate a Subversion
2345 # revision containing the log message for the second dead
2346 # revision, because we don't want to lose that information.
2347 svn_commit = SVNCommit("commit")
2348 self.motivating_commit = svn_commit
2350 for c_rev in self.changes:
2351 svn_commit.add_revision(c_rev)
2352 # Only make a change if we need to. When 1.1.1.1 has an empty
2353 # deltatext, the explanation is almost always that we're looking
2354 # at an imported file whose 1.1 and 1.1.1.1 are identical. On
2355 # such imports, CVS creates an RCS file where 1.1 has the
2356 # content, and 1.1.1.1 has an empty deltatext, i.e, the same
2357 # content as 1.1. There's no reason to reflect this non-change
2358 # in the repository, so we want to do nothing in this case. (If
2359 # we were really paranoid, we could make sure 1.1's log message
2360 # is the CVS-generated "Initial revision\n", but I think the
2361 # conditions below are strict enough.)
2362 if not ((c_rev.deltatext_code == DELTATEXT_EMPTY)
2363 and (c_rev.rev == "1.1.1.1")):
2364 if c_rev.is_default_branch_revision():
2365 self.default_branch_cvs_revisions.append(c_rev)
2367 for c_rev in self.deletes:
2368 # When a file is added on a branch, CVS not only adds the file
2369 # on the branch, but generates a trunk revision (typically
2370 # 1.1) for that file in state 'dead'. We only want to add
2371 # this revision if the log message is not the standard cvs
2372 # fabricated log message.
2373 if c_rev.prev_rev is None:
2374 # c_rev.branches may be empty if the originating branch
2375 # has been excluded.
2376 if not c_rev.branches:
2377 continue
2378 cvs_generated_msg = ('file %s was initially added on branch %s.\n'
2379 % (c_rev.filename(),
2380 c_rev.branches[0]))
2381 author, log_msg = \
2382 Ctx()._persistence_manager.svn_commit_metadata[c_rev.digest]
2383 if log_msg == cvs_generated_msg:
2384 continue
2386 svn_commit.add_revision(c_rev)
2387 if c_rev.is_default_branch_revision():
2388 self.default_branch_cvs_revisions.append(c_rev)
2390 # There is a slight chance that we didn't actually register any
2391 # CVSRevisions with our SVNCommit (see loop over self.deletes
2392 # above), so if we have no CVSRevisions, we don't flush the
2393 # svn_commit to disk and roll back our revnum.
2394 if len(svn_commit.cvs_revs) > 0:
2395 svn_commit.flush()
2396 else:
2397 # We will not be flushing this SVNCommit, so rollback the
2398 # SVNCommit revision counter.
2399 SVNCommit.revnum -= 1
2401 if not Ctx().trunk_only:
2402 for c_rev in self.revisions():
2403 Ctx()._symbolings_logger.log_revision(c_rev, svn_commit.revnum)
2405 def _post_commit(self):
2406 """Generates any SVNCommits that we can perform now that _commit
2407 has happened. That is, handle non-trunk default branches.
2408 Sometimes an RCS file has a non-trunk default branch, so a commit
2409 on that default branch would be visible in a default CVS checkout
2410 of HEAD. If we don't copy that commit over to Subversion's trunk,
2411 then there will be no Subversion tree which corresponds to that
2412 CVS checkout. Of course, in order to copy the path over, we may
2413 first need to delete the existing trunk there. """
2415 # Only generate a commit if we have default branch revs
2416 if len(self.default_branch_cvs_revisions):
2417 # Generate an SVNCommit for all of our default branch c_revs.
2418 svn_commit = SVNCommit("post-commit default branch(es)")
2419 svn_commit.set_motivating_revnum(self.motivating_commit.revnum)
2420 for c_rev in self.default_branch_cvs_revisions:
2421 svn_commit.add_revision(c_rev)
2422 Ctx()._symbolings_logger.log_default_branch_closing(c_rev,
2423 svn_commit.revnum)
2424 self.secondary_commits.append(svn_commit)
2426 def process_revisions(self, done_symbols):
2427 """Process all the CVSRevisions that this instance has, creating
2428 one or more SVNCommits in the process. Generate fill SVNCommits
2429 only for symbols not in DONE_SYMBOLS (avoids unnecessary
2430 fills).
2432 Return the primary SVNCommit that corresponds to this CVSCommit.
2433 The returned SVNCommit is the commit that motivated any other
2434 SVNCommits generated in this CVSCommit."""
2435 self.done_symbols = done_symbols
2436 seconds = self.t_max - self.t_min + 1
2438 Log().write(LOG_VERBOSE, '-' * 60)
2439 Log().write(LOG_VERBOSE, 'CVS Revision grouping:')
2440 if seconds == 1:
2441 Log().write(LOG_VERBOSE, ' Start time: %s (duration: 1 second)'
2442 % time.ctime(self.t_max))
2443 else:
2444 Log().write(LOG_VERBOSE, ' Start time: %s' % time.ctime(self.t_min))
2445 Log().write(LOG_VERBOSE, ' End time: %s (duration: %d seconds)'
2446 % (time.ctime(self.t_max), seconds))
2448 if seconds > COMMIT_THRESHOLD + 1:
2449 Log().write(LOG_WARN, '%s: grouping spans more than %d seconds'
2450 % (warning_prefix, COMMIT_THRESHOLD))
2452 if Ctx().trunk_only: # Only do the primary commit if we're trunk-only
2453 self._commit()
2454 return self.motivating_commit
2456 self._pre_commit()
2457 self._commit()
2458 self._post_commit()
2460 for svn_commit in self.secondary_commits:
2461 svn_commit.set_date(self.motivating_commit.get_date())
2462 svn_commit.flush()
2464 return self.motivating_commit
2467 class SVNCommit:
2468 """This represents one commit to the Subversion Repository. There
2469 are three types of SVNCommits:
2471 1. Commits one or more CVSRevisions (cannot fill a symbolic name).
2473 2. Creates or fills a symbolic name (cannot commit CVSRevisions).
2475 3. Updates trunk to reflect the contents of a particular branch
2476 (this is to handle RCS default branches)."""
2478 # The revision number to assign to the next new SVNCommit.
2479 # We start at 2 because SVNRepositoryMirror uses the first commit
2480 # to create trunk, tags, and branches.
2481 revnum = 2
2483 class SVNCommitInternalInconsistencyError(Exception):
2484 """Exception raised if we encounter an impossible state in the
2485 SVNCommit Databases."""
2486 pass
2488 def __init__(self, description="", revnum=None, cvs_revs=None):
2489 """Instantiate an SVNCommit. DESCRIPTION is for debugging only.
2490 If REVNUM, the SVNCommit will correspond to that revision number;
2491 and if CVS_REVS, then they must be the exact set of CVSRevisions for
2492 REVNUM.
2494 It is an error to pass CVS_REVS without REVNUM, but you may pass
2495 REVNUM without CVS_REVS, and then add a revision at a time by
2496 invoking add_revision()."""
2497 self._description = description
2499 # Revprop metadata for this commit.
2501 # These initial values are placeholders. At least the log and the
2502 # date should be different by the time these are used.
2504 # They are private because their values should be returned encoded
2505 # in UTF8, but callers aren't required to set them in UTF8.
2506 # Therefore, accessor methods are used to set them, and
2507 # self.get_revprops() is used to to get them, in dictionary form.
2508 self._author = Ctx().username
2509 self._log_msg = "This log message means an SVNCommit was used too soon."
2510 self._max_date = 0 # Latest date seen so far.
2512 self.cvs_revs = cvs_revs or []
2513 if revnum:
2514 self.revnum = revnum
2515 else:
2516 self.revnum = SVNCommit.revnum
2517 SVNCommit.revnum += 1
2519 # The (uncleaned) symbolic name that is filled in this SVNCommit, if any.
2520 self.symbolic_name = None
2522 # If this commit is a default branch synchronization, this
2523 # variable represents the subversion revision number of the
2524 # *primary* commit where the default branch changes actually
2525 # happened. It is None otherwise.
2527 # It is possible for multiple synchronization commits to refer to
2528 # the same motivating commit revision number, and it is possible
2529 # for a single synchronization commit to contain CVSRevisions on
2530 # multiple different default branches.
2531 self.motivating_revnum = None
2533 # is_tag is true only if this commit is a fill of a symbolic name
2534 # that is a tag, None in all other cases.
2535 self.is_tag = None
2537 def set_symbolic_name(self, symbolic_name):
2538 "Set self.symbolic_name to SYMBOLIC_NAME."
2539 self.symbolic_name = symbolic_name
2541 def set_motivating_revnum(self, revnum):
2542 "Set self.motivating_revnum to REVNUM."
2543 self.motivating_revnum = revnum
2545 def set_author(self, author):
2546 """Set this SVNCommit's author to AUTHOR (a locally-encoded string).
2547 This is the only way to set an SVNCommit's author."""
2548 self._author = author
2550 def set_log_msg(self, msg):
2551 """Set this SVNCommit's log message to MSG (a locally-encoded string).
2552 This is the only way to set an SVNCommit's log message."""
2553 self._log_msg = msg
2555 def set_date(self, date):
2556 """Set this SVNCommit's date to DATE (an integer).
2557 Note that self.add_revision() updates this automatically based on
2558 a CVSRevision; so you may not need to call this at all, and even
2559 if you do, the value may be overwritten by a later call to
2560 self.add_revision()."""
2561 self._max_date = date
2563 def get_date(self):
2564 """Returns this SVNCommit's date as an integer."""
2565 return self._max_date
2567 def get_revprops(self):
2568 """Return the Subversion revprops for this SVNCommit."""
2569 date = format_date(self._max_date)
2570 try:
2571 utf8_author = None
2572 if self._author is not None:
2573 utf8_author = to_utf8(self._author)
2574 utf8_log = to_utf8(self.get_log_msg())
2575 return { 'svn:author' : utf8_author,
2576 'svn:log' : utf8_log,
2577 'svn:date' : date }
2578 except UnicodeError:
2579 Log().write(LOG_WARN, '%s: problem encoding author or log message:'
2580 % warning_prefix)
2581 Log().write(LOG_WARN, " author: '%s'" % self._author)
2582 Log().write(LOG_WARN, " log: '%s'" % self.get_log_msg().rstrip())
2583 Log().write(LOG_WARN, " date: '%s'" % date)
2584 Log().write(LOG_WARN,
2585 "(subversion rev %s) Related files:" % self.revnum)
2586 for c_rev in self.cvs_revs:
2587 Log().write(LOG_WARN, " ", c_rev.fname)
2589 Log().write(LOG_WARN, "Consider rerunning with one or more ",
2590 "'--encoding' parameters.\n")
2591 # It's better to fall back to the original (unknown encoding) data
2592 # than to either 1) quit or 2) record nothing at all.
2593 return { 'svn:author' : self._author,
2594 'svn:log' : self.get_log_msg(),
2595 'svn:date' : date }
2597 def add_revision(self, cvs_rev):
2598 self.cvs_revs.append(cvs_rev)
2599 if cvs_rev.timestamp > self._max_date:
2600 self._max_date = cvs_rev.timestamp
2602 def flush(self):
2603 Log().write(LOG_NORMAL, "Creating Subversion r%d (%s)"
2604 % (self.revnum, self._description))
2605 Ctx()._persistence_manager.put_svn_commit(self.revnum,
2606 self.cvs_revs,
2607 self._max_date,
2608 self.symbolic_name,
2609 self.motivating_revnum)
2611 def __str__(self):
2612 """ Print a human-readable description of this SVNCommit. This
2613 description is not intended to be machine-parseable (although
2614 we're not going to stop you if you try!)"""
2616 ret = "SVNCommit #: " + str(self.revnum) + "\n"
2617 if self.symbolic_name:
2618 ret += (" symbolic name: " + _clean_symbolic_name(self.symbolic_name)
2619 + "\n")
2620 else:
2621 ret += " NO symbolic name\n"
2622 ret += " debug description: " + self._description + "\n"
2623 ret += " cvs_revs:\n"
2624 for c_rev in self.cvs_revs:
2625 ret += " " + c_rev.unique_key() + "\n"
2626 return ret
2628 def get_log_msg(self):
2629 """Returns the actual log message for a primary commit, and the
2630 appropriate manufactured log message for a secondary commit."""
2631 if self.symbolic_name is not None:
2632 return self._log_msg_for_symbolic_name_commit()
2633 elif self.motivating_revnum is not None:
2634 return self._log_msg_for_default_branch_commit()
2635 else:
2636 return self._log_msg
2638 def _log_msg_for_symbolic_name_commit(self):
2639 """Creates a log message for a manufactured commit that fills
2640 self.symbolic_name. If self.is_tag is true, write the log message
2641 as though for a tag, else write it as though for a branch."""
2642 type = 'branch'
2643 if self.is_tag:
2644 type = 'tag'
2646 # In Python 2.2.3, we could use textwrap.fill(). Oh well :-).
2647 space_or_newline = ' '
2648 cleaned_symbolic_name = _clean_symbolic_name(self.symbolic_name)
2649 if len(cleaned_symbolic_name) >= 13:
2650 space_or_newline = '\n'
2652 return "This commit was manufactured by cvs2svn to create %s%s'%s'." \
2653 % (type, space_or_newline, cleaned_symbolic_name)
2655 def _log_msg_for_default_branch_commit(self):
2656 """Creates a log message for a manufactured commit that
2657 synchronizes a non-trunk default branch with trunk."""
2658 msg = 'This commit was generated by cvs2svn to compensate for ' \
2659 'changes in r%d,\n' \
2660 'which included commits to RCS files with non-trunk default ' \
2661 'branches.\n' % self.motivating_revnum
2662 return msg
2664 class CVSRevisionAggregator:
2665 """This class groups CVSRevisions into CVSCommits that represent
2666 at least one SVNCommit."""
2667 def __init__(self):
2668 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_READ)
2669 if not Ctx().trunk_only:
2670 self.last_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB),
2671 DB_OPEN_READ)
2673 # A map { key : CVSCommit } of CVS commits currently being
2674 # accumulated. If the CVSCommit is still open to further
2675 # CVSRevisions, then key is CVSRevision.digest. If not (because
2676 # an inbound commit wanted to affect a file that was already
2677 # within the CVSCommit), then key is CVSRevision.digest plus some
2678 # number of appended '-'.
2679 self.cvs_commits = {}
2681 # List of ready commits.
2682 self.ready_queue = [ ]
2684 # A map { symbol : None } of symbolic names for which the last
2685 # source CVSRevision has already been processed but which haven't
2686 # been closed yet.
2687 self.pending_symbols = {}
2689 # A list of closed symbols. That is, we've already encountered
2690 # the last CVSRevision that is a source for that symbol, the final
2691 # fill for this symbol has been done, and we never need to fill it
2692 # again.
2693 self.done_symbols = [ ]
2695 # This variable holds the most recently created primary svn_commit
2696 # object. CVSRevisionAggregator maintains this variable merely
2697 # for its date, so that it can set dates for the SVNCommits
2698 # created in self._attempt_to_commit_symbols().
2699 self.latest_primary_svn_commit = None
2701 Ctx()._symbolings_logger = SymbolingsLogger()
2702 Ctx()._persistence_manager = PersistenceManager(DB_OPEN_NEW)
2703 Ctx()._default_branches_db = SDatabase(temp(DEFAULT_BRANCHES_DB),
2704 DB_OPEN_READ)
2706 def _extract_ready_commits(self, timestamp):
2707 """Extract and return any active commits that expire by TIMESTAMP."""
2709 for digest_key, cvs_commit in self.cvs_commits.items():
2710 if cvs_commit.t_max + COMMIT_THRESHOLD < timestamp:
2711 self.ready_queue.append(cvs_commit)
2712 del self.cvs_commits[digest_key]
2714 def _commit_ready_commits(self):
2715 """Sort the commits from self.ready_queue by time, then process them."""
2716 self.ready_queue.sort()
2717 while self.ready_queue:
2718 cvs_commit = self.ready_queue[0]
2719 del self.ready_queue[0]
2720 self.latest_primary_svn_commit = \
2721 cvs_commit.process_revisions(self.done_symbols)
2722 self._attempt_to_commit_symbols()
2724 def process_revision(self, c_rev):
2725 # Each time we read a new line, scan the accumulating commits to
2726 # see if any are ready for processing.
2727 self._extract_ready_commits(c_rev.timestamp)
2729 for digest_key, cvs_commit in self.cvs_commits.items():
2730 # If the inbound commit is on the same file as a pending commit,
2731 # close the pending commit to further changes. Don't flush it though,
2732 # as there may be other pending commits dated before this one.
2733 # ### ISSUE: the has_file() check below is not optimal.
2734 # It does fix the dataloss bug where revisions would get lost
2735 # if checked in too quickly, but it can also break apart the
2736 # commits. The correct fix would require tracking the dependencies
2737 # between change sets and committing them in proper order.
2738 if cvs_commit.has_file(c_rev.fname):
2739 unused_id = digest_key + '-'
2740 # Find a string that does is not already a key in
2741 # the self.cvs_commits dict
2742 while self.cvs_commits.has_key(unused_id):
2743 unused_id += '-'
2744 self.cvs_commits[unused_id] = cvs_commit
2745 del self.cvs_commits[digest_key]
2747 # Add this item into the set of still-available commits.
2748 if self.cvs_commits.has_key(c_rev.digest):
2749 cvs_commit = self.cvs_commits[c_rev.digest]
2750 else:
2751 author, log = self.metadata_db[c_rev.digest]
2752 cvs_commit = CVSCommit(c_rev.digest, author, log)
2753 self.cvs_commits[c_rev.digest] = cvs_commit
2754 cvs_commit.add_revision(c_rev)
2756 # Any elements in self.ready_queue at this point need to be
2757 # processed, because this latest rev couldn't possibly be part of
2758 # any of them.
2759 self._commit_ready_commits()
2761 self._add_pending_symbols(c_rev)
2763 def flush(self):
2764 """Commit anything left in self.cvs_commits. Then inform the
2765 SymbolingsLogger that all commits are done."""
2767 self._extract_ready_commits(1L<<32)
2768 self._commit_ready_commits()
2770 if not Ctx().trunk_only:
2771 Ctx()._symbolings_logger.close()
2773 def _add_pending_symbols(self, c_rev):
2774 """Add to self.pending_symbols any symbols from C_REV for which
2775 C_REV is the last CVSRevision.
2777 If we're not doing a trunk-only conversion, get the symbolic names
2778 that this c_rev is the last *source* CVSRevision for and add them
2779 to those left over from previous passes through the aggregator."""
2781 if not Ctx().trunk_only:
2782 for sym in self.last_revs_db.get(c_rev.unique_key(), []):
2783 self.pending_symbols[sym] = None
2785 def _attempt_to_commit_symbols(self):
2786 """Generate one SVNCommit for each symbol in self.pending_symbols
2787 that doesn't have an opening CVSRevision in either self.ready_queue
2788 or self.cvs_commits.values()."""
2790 # Make a list of all symbols from self.pending_symbols that do not
2791 # have *source* CVSRevisions in the pending commit queues
2792 # (self.cvs_commits or self.ready_queue):
2793 closeable_symbols = []
2794 pending_commits = self.cvs_commits.values() + self.ready_queue
2795 for sym in self.pending_symbols:
2796 for cvs_commit in pending_commits:
2797 if cvs_commit.opens_symbolic_name(sym):
2798 break
2799 else:
2800 closeable_symbols.append(sym)
2802 # Sort the closeable symbols so that we will always process the
2803 # symbols in the same order, regardless of the order in which the
2804 # dict hashing algorithm hands them back to us. We do this so
2805 # that our tests will get the same results on all platforms.
2806 closeable_symbols.sort()
2807 for sym in closeable_symbols:
2808 svn_commit = SVNCommit("closing tag/branch '%s'" % sym)
2809 svn_commit.set_symbolic_name(sym)
2810 svn_commit.set_date(self.latest_primary_svn_commit.get_date())
2811 svn_commit.flush()
2812 self.done_symbols.append(sym)
2813 del self.pending_symbols[sym]
2816 class SymbolingsReader:
2817 """Provides an interface to the SYMBOL_OPENINGS_CLOSINGS_SORTED file
2818 and the SYMBOL_OFFSETS_DB. Does the heavy lifting of finding and
2819 returning the correct opening and closing Subversion revision
2820 numbers for a given symbolic name."""
2821 def __init__(self):
2822 """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and
2823 reads the offsets database into memory."""
2824 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
2825 # The offsets_db is really small, and we need to read and write
2826 # from it a fair bit, so suck it into memory
2827 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_READ)
2828 self.offsets = { }
2829 for key in offsets_db:
2830 #print " ZOO:", key, offsets_db[key]
2831 self.offsets[key] = offsets_db[key]
2833 def filling_guide_for_symbol(self, symbolic_name, svn_revnum):
2834 """Given SYMBOLIC_NAME and SVN_REVNUM, return a new
2835 SymbolicNameFillingGuide object.
2837 Note that if we encounter an opening rev in this fill, but the
2838 corresponding closing rev takes place later than SVN_REVNUM, the
2839 closing will not be passed to SymbolicNameFillingGuide in this
2840 fill (and will be discarded when encountered in a later fill).
2841 This is perfectly fine, because we can still do a valid fill
2842 without the closing--we always try to fill what we can as soon as
2843 we can."""
2845 openings_closings_map = OpeningsClosingsMap(symbolic_name)
2847 # It's possible to have a branch start with a file that was added
2848 # on a branch
2849 if self.offsets.has_key(symbolic_name):
2850 # set our read offset for self.symbolings to the offset for
2851 # symbolic_name
2852 self.symbolings.seek(self.offsets[symbolic_name])
2854 while 1:
2855 fpos = self.symbolings.tell()
2856 line = self.symbolings.readline().rstrip()
2857 if not line:
2858 break
2859 name, revnum, type, branch_name, cvs_path = line.split(" ", 4)
2860 if branch_name == '*':
2861 svn_path = Ctx().project.make_trunk_path(cvs_path)
2862 else:
2863 svn_path = Ctx().project.make_branch_path(branch_name, cvs_path)
2864 revnum = int(revnum)
2865 if revnum > svn_revnum or name != symbolic_name:
2866 break
2867 openings_closings_map.register(svn_path, revnum, type)
2869 # get current offset of the read marker and set it to the offset
2870 # for the beginning of the line we just read if we used anything
2871 # we read.
2872 if not openings_closings_map.is_empty():
2873 self.offsets[symbolic_name] = fpos
2875 return SymbolicNameFillingGuide(openings_closings_map)
2878 class SvnRevisionRange:
2879 """The range of subversion revision numbers from which a path can be
2880 copied. self.opening_revnum is the number of the earliest such
2881 revision, and self.closing_revnum is one higher than the number of
2882 the last such revision. If self.closing_revnum is None, then no
2883 closings were registered."""
2885 def __init__(self, opening_revnum):
2886 self.opening_revnum = opening_revnum
2887 self.closing_revnum = None
2889 def add_closing(self, closing_revnum):
2890 # When we have a non-trunk default branch, we may have multiple
2891 # closings--only register the first closing we encounter.
2892 if self.closing_revnum is None:
2893 self.closing_revnum = closing_revnum
2895 def __str__(self):
2896 if self.closing_revnum is None:
2897 return '[%d:]' % (self.opening_revnum,)
2898 else:
2899 return '[%d:%d]' % (self.opening_revnum, self.closing_revnum,)
2902 class OpeningsClosingsMap:
2903 """A dictionary of openings and closings for a symbolic name in the
2904 current SVNCommit.
2906 The user should call self.register() for the openings and closings,
2907 then self.get_node_tree() to retrieve the information as a
2908 SymbolicNameFillingGuide."""
2910 def __init__(self, symbolic_name):
2911 """Initialize OpeningsClosingsMap and prepare it for receiving
2912 openings and closings."""
2914 self.name = symbolic_name
2916 # A dictionary of SVN_PATHS to SvnRevisionRange objects.
2917 self.things = { }
2919 def register(self, svn_path, svn_revnum, type):
2920 """Register an opening or closing revision for this symbolic name.
2921 SVN_PATH is the source path that needs to be copied into
2922 self.symbolic_name, and SVN_REVNUM is either the first svn
2923 revision number that we can copy from (our opening), or the last
2924 (not inclusive) svn revision number that we can copy from (our
2925 closing). TYPE indicates whether this path is an opening or a a
2926 closing.
2928 The opening for a given SVN_PATH must be passed before the closing
2929 for it to have any effect... any closing encountered before a
2930 corresponding opening will be discarded.
2932 It is not necessary to pass a corresponding closing for every
2933 opening.
2935 # Always log an OPENING
2936 if type == OPENING:
2937 self.things[svn_path] = SvnRevisionRange(svn_revnum)
2938 # Only log a closing if we've already registered the opening for that
2939 # path.
2940 elif type == CLOSING and self.things.has_key(svn_path):
2941 self.things[svn_path].add_closing(svn_revnum)
2943 def is_empty(self):
2944 """Return true if we haven't accumulated any openings or closings,
2945 false otherwise."""
2946 return not len(self.things)
2948 def get_things(self):
2949 """Return a list of (svn_path, SvnRevisionRange) tuples for all
2950 svn_paths with registered openings or closings."""
2952 return self.things.items()
2955 class SymbolicNameFillingGuide:
2956 """A node tree representing the source paths to be copied to fill
2957 self.symbolic_name in the current SVNCommit.
2959 self._node_tree is the root of the directory tree, in the form {
2960 path_component : subnode }. Leaf nodes are instances of
2961 SvnRevisionRange. Intermediate (directory) nodes are dictionaries
2962 mapping relative names to subnodes.
2964 By walking self._node_tree and calling self.get_best_revnum() on
2965 each node, the caller can determine what subversion revision number
2966 to copy the path corresponding to that node from. self._node_tree
2967 should be treated as read-only.
2969 The caller can then descend to sub-nodes to see if their "best
2970 revnum" differs from their parents' and if it does, take appropriate
2971 actions to "patch up" the subtrees."""
2973 def __init__(self, openings_closings_map):
2974 """Initializes a SymbolicNameFillingGuide for SYMBOLIC_NAME and
2975 store into it the openings and closings from
2976 OPENINGS_CLOSINGS_MAP."""
2978 self.name = openings_closings_map.name
2980 # The dictionary that holds our node tree as a map { node_key :
2981 # node }.
2982 self._node_tree = { }
2984 for svn_path, svn_revision_range in openings_closings_map.get_things():
2985 (head, tail) = _path_split(svn_path)
2986 self._get_node_for_path(head)[tail] = svn_revision_range
2988 #self.print_node_tree(self._node_tree)
2990 def _get_node_for_path(self, svn_path):
2991 """Return the node key for svn_path, creating new nodes as needed."""
2992 # Walk down the path, one node at a time.
2993 node = self._node_tree
2994 for component in svn_path.split('/'):
2995 if node.has_key(component):
2996 node = node[component]
2997 else:
2998 old_node = node
2999 node = {}
3000 old_node[component] = node
3002 return node
3004 def get_best_revnum(self, node, preferred_revnum):
3005 """Determine the best subversion revision number to use when
3006 copying the source tree beginning at NODE. Returns a
3007 subversion revision number.
3009 PREFERRED_REVNUM is passed to best_rev and used to calculate the
3010 best_revnum."""
3012 def score_revisions(svn_revision_ranges):
3013 """Return a list of revisions and scores based on
3014 SVN_REVISION_RANGES. The returned list looks like:
3016 [(REV1 SCORE1), (REV2 SCORE2), ...]
3018 where the tuples are sorted by revision number.
3019 SVN_REVISION_RANGES is a list of SvnRevisionRange objects.
3021 For each svn revision that appears as either an opening_revnum
3022 or closing_revnum for one of the svn_revision_ranges, output a
3023 tuple indicating how many of the SvnRevisionRanges include that
3024 svn_revision in its range. A score thus indicates that copying
3025 the corresponding revision (or any following revision up to the
3026 next revision in the list) of the object in question would yield
3027 that many correct paths at or underneath the object. There may
3028 be other paths underneath it which are not correct and would
3029 need to be deleted or recopied; those can only be detected by
3030 descending and examining their scores.
3032 If OPENINGS is empty, return the empty list."""
3033 openings = [ x.opening_revnum
3034 for x in svn_revision_ranges ]
3035 closings = [ x.closing_revnum
3036 for x in svn_revision_ranges
3037 if x.closing_revnum is not None ]
3039 # First look for easy out.
3040 if not openings:
3041 return []
3043 # Create a list with both openings (which increment the total)
3044 # and closings (which decrement the total):
3045 things = [(rev,1) for rev in openings] + [(rev,-1) for rev in closings]
3046 # Sort by revision number:
3047 things.sort()
3048 # Initialize output list with zeroth element of things. This
3049 # element must exist, because it was already verified that
3050 # openings is not empty.
3051 scores = [ things[0] ]
3052 total = scores[-1][1]
3053 for (rev, change) in things[1:]:
3054 total += change
3055 if rev == scores[-1][0]:
3056 # Same revision as last entry; modify last entry:
3057 scores[-1] = (rev, total)
3058 else:
3059 # Previously-unseen revision; create new entry:
3060 scores.append((rev, total))
3061 return scores
3063 def best_rev(scores, preferred_rev):
3064 """Return the revision with the highest score from SCORES, a list
3065 returned by score_revisions(). When the maximum score is shared
3066 by multiple revisions, the oldest revision is selected, unless
3067 PREFERRED_REV is one of the possibilities, in which case, it is
3068 selected."""
3069 max_score = 0
3070 preferred_rev_score = -1
3071 rev = SVN_INVALID_REVNUM
3072 if preferred_rev is None:
3073 # Comparison order of different types is arbitrary. Do not
3074 # expect None to compare less than int values below.
3075 preferred_rev = SVN_INVALID_REVNUM
3076 for revnum, count in scores:
3077 if count > max_score:
3078 max_score = count
3079 rev = revnum
3080 if revnum <= preferred_rev:
3081 preferred_rev_score = count
3082 if preferred_rev_score == max_score:
3083 rev = preferred_rev
3084 return rev, max_score
3086 # Aggregate openings and closings from the rev tree
3087 svn_revision_ranges = self._list_revnums(node)
3089 # Score the lists
3090 scores = score_revisions(svn_revision_ranges)
3092 revnum, max_score = best_rev(scores, preferred_revnum)
3094 if revnum == SVN_INVALID_REVNUM:
3095 raise FatalError("failed to find a revision "
3096 + "to copy from when copying %s" % name)
3097 return revnum, max_score
3099 def _list_revnums(self, node):
3100 """Return a list of all the SvnRevisionRanges (including
3101 duplicates) for all leaf nodes at and under NODE."""
3103 if isinstance(node, SvnRevisionRange):
3104 # It is a leaf node.
3105 return [ node ]
3106 else:
3107 # It is an intermediate node.
3108 revnums = []
3109 for key, subnode in node.items():
3110 revnums.extend(self._list_revnums(subnode))
3111 return revnums
3113 def get_sources(self):
3114 """Return the list of sources for this symbolic name.
3116 The Project instance defines what are legitimate sources. Raise
3117 an exception if a change occurred outside of the source
3118 directories."""
3120 return self._get_sub_sources('', self._node_tree)
3122 def _get_sub_sources(self, start_svn_path, start_node):
3123 """Return the list of sources for this symbolic name, starting the
3124 search at path START_SVN_PATH, which is node START_NODE. This is
3125 a helper method, called by get_sources() (see)."""
3127 project = Ctx().project
3128 if isinstance(start_node, SvnRevisionRange):
3129 # This implies that a change was found outside of the
3130 # legitimate sources. This should never happen.
3131 raise
3132 elif project.is_source(start_svn_path):
3133 # This is a legitimate source. Add it to list.
3134 return [ FillSource(start_svn_path, start_node) ]
3135 else:
3136 # This is a directory that is not a legitimate source. (That's
3137 # OK because it hasn't changed directly.) But directories
3138 # within it have been changed, so we need to search recursively
3139 # to find their enclosing sources.
3140 sources = []
3141 for entry, node in start_node.items():
3142 svn_path = _path_join(start_svn_path, entry)
3143 sources.extend(self._get_sub_sources(svn_path, node))
3145 return sources
3147 def print_node_tree(self, node, name='/', indent_depth=0):
3148 """For debugging purposes. Prints all nodes in TREE that are
3149 rooted at NODE. INDENT_DEPTH is used to indent the output of
3150 recursive calls."""
3151 if not indent_depth:
3152 print "TREE", "=" * 75
3153 if isinstance(node, SvnRevisionRange):
3154 print "TREE:", " " * (indent_depth * 2), name, node
3155 else:
3156 print "TREE:", " " * (indent_depth * 2), name
3157 for key, value in node.items():
3158 self.print_node_tree(value, key, (indent_depth + 1))
3161 class FillSource:
3162 """Representation of a fill source used by the symbol filler in
3163 SVNRepositoryMirror."""
3164 def __init__(self, prefix, node):
3165 """Create an unscored fill source with a prefix and a key."""
3166 self.prefix = prefix
3167 self.node = node
3168 self.score = None
3169 self.revnum = None
3171 def set_score(self, score, revnum):
3172 """Set the SCORE and REVNUM."""
3173 self.score = score
3174 self.revnum = revnum
3176 def __cmp__(self, other):
3177 """Comparison operator used to sort FillSources in descending
3178 score order."""
3179 if self.score is None or other.score is None:
3180 raise TypeError, 'Tried to compare unscored FillSource'
3181 return cmp(other.score, self.score)
3184 class SVNRepositoryMirror:
3185 """Mirror a Subversion Repository as it is constructed, one
3186 SVNCommit at a time. The mirror is skeletal; it does not contain
3187 file contents. The creation of a dumpfile or Subversion repository
3188 is handled by delegates. See self.add_delegate method for how to
3189 set delegates.
3191 The structure of the repository is kept in two databases and one
3192 hash. The revs_db database maps revisions to root node keys, and
3193 the nodes_db database maps node keys to nodes. A node is a hash
3194 from directory names to keys. Both the revs_db and the nodes_db are
3195 stored on disk and each access is expensive.
3197 The nodes_db database only has the keys for old revisions. The
3198 revision that is being contructed is kept in memory in the new_nodes
3199 hash which is cheap to access.
3201 You must invoke _start_commit between SVNCommits.
3203 *** WARNING *** All path arguments to methods in this class CANNOT
3204 have leading or trailing slashes.
3207 class SVNRepositoryMirrorPathExistsError(Exception):
3208 """Exception raised if an attempt is made to add a path to the
3209 repository mirror and that path already exists in the youngest
3210 revision of the repository."""
3211 pass
3213 class SVNRepositoryMirrorUnexpectedOperationError(Exception):
3214 """Exception raised if a CVSRevision is found to have an unexpected
3215 operation (OP) value."""
3216 pass
3218 class SVNRepositoryMirrorInvalidFillOperationError(Exception):
3219 """Exception raised if an empty SymbolicNameFillingGuide is returned
3220 during a fill where the branch in question already exists."""
3221 pass
3223 def __init__(self):
3224 """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""
3225 self.delegates = [ ]
3227 # This corresponds to the 'revisions' table in a Subversion fs.
3228 self.revs_db = SDatabase(temp(SVN_MIRROR_REVISIONS_DB), DB_OPEN_NEW)
3229 Cleanup().register(temp(SVN_MIRROR_REVISIONS_DB), pass8)
3231 # This corresponds to the 'nodes' table in a Subversion fs. (We
3232 # don't need a 'representations' or 'strings' table because we
3233 # only track metadata, not file contents.)
3234 self.nodes_db = Database(temp(SVN_MIRROR_NODES_DB), DB_OPEN_NEW)
3235 Cleanup().register(temp(SVN_MIRROR_NODES_DB), pass8)
3237 # Start at revision 0 without a root node. It will be created
3238 # by _open_writable_root_node.
3239 self.youngest = 0
3240 self.new_root_key = None
3241 self.new_nodes = { }
3243 if not Ctx().trunk_only:
3244 ###PERF IMPT: Suck this into memory.
3245 self.tags_db = TagsDatabase(DB_OPEN_READ)
3246 self.symbolings_reader = SymbolingsReader()
3248 def _initialize_repository(self, date):
3249 """Initialize the repository by creating the directories for
3250 trunk, tags, and branches. This method should only be called
3251 after all delegates are added to the repository mirror."""
3252 # Make a 'fake' SVNCommit so we can take advantage of the revprops
3253 # magic therein
3254 svn_commit = SVNCommit("Initialization", 1)
3255 svn_commit.set_date(date)
3256 svn_commit.set_log_msg("New repository initialized by cvs2svn.")
3258 self._start_commit(svn_commit)
3259 self._mkdir(Ctx().project.trunk_path)
3260 if not Ctx().trunk_only:
3261 self._mkdir(Ctx().project.branches_path)
3262 self._mkdir(Ctx().project.tags_path)
3264 def _start_commit(self, svn_commit):
3265 """Start a new commit."""
3266 if self.youngest > 0:
3267 self._end_commit()
3269 self.youngest = svn_commit.revnum
3270 self.new_root_key = None
3271 self.new_nodes = { }
3273 self._invoke_delegates('start_commit', svn_commit)
3275 def _end_commit(self):
3276 """Called at the end of each commit. This method copies the newly
3277 created nodes to the on-disk nodes db."""
3278 if self.new_root_key is None:
3279 # No changes were made in this revision, so we make the root node
3280 # of the new revision be the same as the last one.
3281 self.revs_db[str(self.youngest)] = self.revs_db[str(self.youngest - 1)]
3282 else:
3283 self.revs_db[str(self.youngest)] = self.new_root_key
3284 # Copy the new nodes to the nodes_db
3285 for key, value in self.new_nodes.items():
3286 self.nodes_db[key] = value
3288 def _get_node(self, key):
3289 """Returns the node contents for KEY which may refer to either
3290 self.nodes_db or self.new_nodes."""
3291 if self.new_nodes.has_key(key):
3292 return self.new_nodes[key]
3293 else:
3294 return self.nodes_db[key]
3296 def _open_readonly_node(self, path, revnum):
3297 """Open a readonly node for PATH at revision REVNUM. Returns the
3298 node key and node contents if the path exists, else (None, None)."""
3299 # Get the root key
3300 if revnum == self.youngest:
3301 if self.new_root_key is None:
3302 node_key = self.revs_db[str(self.youngest - 1)]
3303 else:
3304 node_key = self.new_root_key
3305 else:
3306 node_key = self.revs_db[str(revnum)]
3308 for component in path.split('/'):
3309 node_contents = self._get_node(node_key)
3310 node_key = node_contents.get(component, None)
3311 if node_key is None:
3312 return None
3314 return node_key
3316 def _open_writable_root_node(self):
3317 """Open a writable root node. The current root node is returned
3318 immeditely if it is already writable. If not, create a new one by
3319 copying the contents of the root node of the previous version."""
3320 if self.new_root_key is not None:
3321 return self.new_root_key, self.new_nodes[self.new_root_key]
3323 if self.youngest < 2:
3324 new_contents = { }
3325 else:
3326 new_contents = self.nodes_db[self.revs_db[str(self.youngest - 1)]]
3327 self.new_root_key = gen_key()
3328 self.new_nodes = { self.new_root_key: new_contents }
3330 return self.new_root_key, new_contents
3332 def _open_writable_node(self, svn_path, create):
3333 """Open a writable node for the path SVN_PATH, creating SVN_PATH
3334 and any missing directories if CREATE is True."""
3335 parent_key, parent_contents = self._open_writable_root_node()
3337 # Walk up the path, one node at a time.
3338 path_so_far = None
3339 components = svn_path.split('/')
3340 for i in range(len(components)):
3341 component = components[i]
3342 path_so_far = _path_join(path_so_far, component)
3343 this_key = parent_contents.get(component, None)
3344 if this_key is not None:
3345 # The component exists.
3346 this_contents = self.new_nodes.get(this_key, None)
3347 if this_contents is None:
3348 # Suck the node from the nodes_db, but update the key
3349 this_contents = self.nodes_db[this_key]
3350 this_key = gen_key()
3351 self.new_nodes[this_key] = this_contents
3352 parent_contents[component] = this_key
3353 elif create:
3354 # The component does not exists, so we create it.
3355 this_contents = { }
3356 this_key = gen_key()
3357 self.new_nodes[this_key] = this_contents
3358 parent_contents[component] = this_key
3359 if i < len(components) - 1:
3360 self._invoke_delegates('mkdir', path_so_far)
3361 else:
3362 # The component does not exists and we are not instructed to
3363 # create it, so we give up.
3364 return None, None
3366 parent_key = this_key
3367 parent_contents = this_contents
3369 return this_key, this_contents
3371 def _path_exists(self, path):
3372 """If PATH exists in self.youngest of the svn repository mirror,
3373 return true, else return None.
3375 PATH must not start with '/'."""
3376 return self._open_readonly_node(path, self.youngest) is not None
3378 def _fast_delete_path(self, parent_path, parent_contents, component):
3379 """Delete COMPONENT from the parent direcory PARENT_PATH with the
3380 contents PARENT_CONTENTS. Do nothing if COMPONENT does not exist
3381 in PARENT_CONTENTS."""
3382 if parent_contents.has_key(component):
3383 del parent_contents[component]
3384 self._invoke_delegates('delete_path',
3385 _path_join(parent_path, component))
3387 def _delete_path(self, svn_path, should_prune=False):
3388 """Delete PATH from the tree. If SHOULD_PRUNE is true, then delete
3389 all ancestor directories that are made empty when SVN_PATH is deleted.
3390 In other words, SHOULD_PRUNE is like the -P option to 'cvs checkout'.
3392 NOTE: This function ignores requests to delete the root directory
3393 or any directory for which Ctx().project.is_unremovable() returns
3394 True, either directly or by pruning."""
3396 if svn_path == '' or Ctx().project.is_unremovable(svn_path):
3397 return
3399 (parent_path, entry,) = _path_split(svn_path)
3400 if parent_path:
3401 parent_key, parent_contents = \
3402 self._open_writable_node(parent_path, False)
3403 else:
3404 parent_key, parent_contents = self._open_writable_root_node()
3406 if parent_key is not None:
3407 self._fast_delete_path(parent_path, parent_contents, entry)
3408 # The following recursion makes pruning an O(n^2) operation in the
3409 # worst case (where n is the depth of SVN_PATH), but the worst case
3410 # is probably rare, and the constant cost is pretty low. Another
3411 # drawback is that we issue a delete for each path and not just
3412 # a single delete for the topmost directory pruned.
3413 if should_prune and len(parent_contents) == 0:
3414 self._delete_path(parent_path, True)
3416 def _mkdir(self, path):
3417 """Create PATH in the repository mirror at the youngest revision."""
3418 self._open_writable_node(path, True)
3419 self._invoke_delegates('mkdir', path)
3421 def _change_path(self, cvs_rev):
3422 """Register a change in self.youngest for the CVS_REV's svn_path
3423 in the repository mirror."""
3424 # We do not have to update the nodes because our mirror is only
3425 # concerned with the presence or absence of paths, and a file
3426 # content change does not cause any path changes.
3427 self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, False))
3429 def _add_path(self, cvs_rev):
3430 """Add the CVS_REV's svn_path to the repository mirror."""
3431 self._open_writable_node(cvs_rev.svn_path, True)
3432 self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, True))
3434 def _copy_path(self, src_path, dest_path, src_revnum):
3435 """Copy SRC_PATH at subversion revision number SRC_REVNUM to
3436 DEST_PATH. In the youngest revision of the repository, DEST_PATH's
3437 parent *must* exist, but DEST_PATH *cannot* exist.
3439 Return the node key and the contents of the new node at DEST_PATH
3440 as a dictionary."""
3441 # get the contents of the node of our src_path
3442 src_key = self._open_readonly_node(src_path, src_revnum)
3443 src_contents = self._get_node(src_key)
3445 # Get the parent path and the base path of the dest_path
3446 (dest_parent, dest_basename,) = _path_split(dest_path)
3447 dest_parent_key, dest_parent_contents = \
3448 self._open_writable_node(dest_parent, False)
3450 if dest_parent_contents.has_key(dest_basename):
3451 msg = "Attempt to add path '%s' to repository mirror " % dest_path
3452 msg += "when it already exists in the mirror."
3453 raise self.SVNRepositoryMirrorPathExistsError, msg
3455 dest_parent_contents[dest_basename] = src_key
3456 self._invoke_delegates('copy_path', src_path, dest_path, src_revnum)
3458 # Yes sir, src_key and src_contents are also the contents of the
3459 # destination. This is a cheap copy, remember! :-)
3460 return src_key, src_contents
3462 def _fill_symbolic_name(self, svn_commit):
3463 """Performs all copies necessary to create as much of the the tag
3464 or branch SVN_COMMIT.symbolic_name as possible given the current
3465 revision of the repository mirror.
3467 The symbolic name is guaranteed to exist in the Subversion
3468 repository by the end of this call, even if there are no paths
3469 under it."""
3470 symbol_fill = self.symbolings_reader.filling_guide_for_symbol(
3471 svn_commit.symbolic_name, self.youngest)
3472 # Get the list of sources for the symbolic name.
3473 sources = symbol_fill.get_sources()
3475 if sources:
3476 if self.tags_db.has_key(svn_commit.symbolic_name):
3477 dest_prefix = Ctx().project.get_tag_path(svn_commit.symbolic_name)
3478 else:
3479 dest_prefix = Ctx().project.get_branch_path(svn_commit.symbolic_name)
3481 dest_key = self._open_writable_node(dest_prefix, False)[0]
3482 self._fill(symbol_fill, dest_prefix, dest_key, sources)
3483 else:
3484 # We can only get here for a branch whose first commit is an add
3485 # (as opposed to a copy).
3486 dest_path = Ctx().project.get_branch_path(symbol_fill.name)
3487 if not self._path_exists(dest_path):
3488 # If our symbol_fill was empty, that means that our first
3489 # commit on the branch was to a file added on the branch, and
3490 # that this is our first fill of that branch.
3492 # This case is covered by test 16.
3494 # ...we create the branch by copying trunk from the our
3495 # current revision number minus 1
3496 source_path = Ctx().project.trunk_path
3497 entries = self._copy_path(source_path, dest_path,
3498 svn_commit.revnum - 1)[1]
3499 # Now since we've just copied trunk to a branch that's
3500 # *supposed* to be empty, we delete any entries in the
3501 # copied directory.
3502 for entry in entries:
3503 del_path = dest_path + '/' + entry
3504 # Delete but don't prune.
3505 self._delete_path(del_path)
3506 else:
3507 msg = "Error filling branch '" \
3508 + _clean_symbolic_name(symbol_fill.name) + "'.\n"
3509 msg += "Received an empty SymbolicNameFillingGuide and\n"
3510 msg += "attempted to create a branch that already exists."
3511 raise self.SVNRepositoryMirrorInvalidFillOperationError, msg
3513 def _fill(self, symbol_fill, dest_prefix, dest_key, sources,
3514 path = None, parent_source_prefix = None,
3515 preferred_revnum = None, prune_ok = None):
3516 """Fill the tag or branch at DEST_PREFIX + PATH with items from
3517 SOURCES, and recurse into the child items.
3519 DEST_PREFIX is the prefix of the destination directory, e.g.
3520 '/tags/my_tag' or '/branches/my_branch', and SOURCES is a list of
3521 FillSource classes that are candidates to be copied to the
3522 destination. DEST_KEY is the key in self.nodes_db to the
3523 destination, or None if the destination does not yet exist.
3525 PATH is the path relative to DEST_PREFIX. If PATH is None, we
3526 are at the top level, e.g. '/tags/my_tag'.
3528 PARENT_SOURCE_PREFIX is the source prefix that was used to copy
3529 the parent directory, and PREFERRED_REVNUM is an int which is the
3530 source revision number that the caller (who may have copied KEY's
3531 parent) used to perform its copy. If PREFERRED_REVNUM is None,
3532 then no revision is preferable to any other (which probably means
3533 that no copies have happened yet).
3535 PRUNE_OK means that a copy has been made in this recursion, and
3536 it's safe to prune directories that are not in
3537 SYMBOL_FILL._node_tree, provided that said directory has a source
3538 prefix of one of the PARENT_SOURCE_PREFIX.
3540 PATH, PARENT_SOURCE_PREFIX, PRUNE_OK, and PREFERRED_REVNUM
3541 should only be passed in by recursive calls."""
3542 # Calculate scores and revnums for all sources
3543 for source in sources:
3544 src_revnum, score = symbol_fill.get_best_revnum(source.node,
3545 preferred_revnum)
3546 source.set_score(score, src_revnum)
3548 # Sort the sources in descending score order so that we will make
3549 # a eventual copy from the source with the highest score.
3550 sources.sort()
3551 copy_source = sources[0]
3553 src_path = _path_join(copy_source.prefix, path)
3554 dest_path = _path_join(dest_prefix, path)
3556 # Figure out if we shall copy to this destination and delete any
3557 # destination path that is in the way.
3558 do_copy = 0
3559 if dest_key is None:
3560 do_copy = 1
3561 elif prune_ok and (parent_source_prefix != copy_source.prefix or
3562 copy_source.revnum != preferred_revnum):
3563 # We are about to replace the destination, so we need to remove
3564 # it before we perform the copy.
3565 self._delete_path(dest_path)
3566 do_copy = 1
3568 if do_copy:
3569 dest_key, dest_entries = self._copy_path(src_path, dest_path,
3570 copy_source.revnum)
3571 prune_ok = 1
3572 else:
3573 dest_entries = self._get_node(dest_key)
3575 # Create the SRC_ENTRIES hash from SOURCES. The keys are path
3576 # elements and the values are lists of FillSource classes where
3577 # this path element exists.
3578 src_entries = {}
3579 for source in sources:
3580 if isinstance(source.node, SvnRevisionRange):
3581 continue
3582 for entry, node in source.node.items():
3583 src_entries.setdefault(entry, []).append(
3584 FillSource(source.prefix, node))
3586 if prune_ok:
3587 # Delete the entries in DEST_ENTRIES that are not in src_entries.
3588 delete_list = [ ]
3589 for entry in dest_entries:
3590 if not src_entries.has_key(entry):
3591 delete_list.append(entry)
3592 if delete_list:
3593 if not self.new_nodes.has_key(dest_key):
3594 dest_key, dest_entries = self._open_writable_node(dest_path, True)
3595 # Sort the delete list to get "diffable" dumpfiles.
3596 delete_list.sort()
3597 for entry in delete_list:
3598 self._fast_delete_path(dest_path, dest_entries, entry)
3600 # Recurse into the SRC_ENTRIES keys sorted in alphabetical order.
3601 src_keys = src_entries.keys()
3602 src_keys.sort()
3603 for src_key in src_keys:
3604 next_dest_key = dest_entries.get(src_key, None)
3605 self._fill(symbol_fill, dest_prefix, next_dest_key,
3606 src_entries[src_key], _path_join(path, src_key),
3607 copy_source.prefix, sources[0].revnum, prune_ok)
3609 def _synchronize_default_branch(self, svn_commit):
3610 """Propagate any changes that happened on a non-trunk default
3611 branch to the trunk of the repository. See
3612 CVSCommit._post_commit() for details on why this is necessary."""
3613 for cvs_rev in svn_commit.cvs_revs:
3614 svn_trunk_path = Ctx().project.make_trunk_path(cvs_rev.cvs_path)
3615 if cvs_rev.op == OP_ADD or cvs_rev.op == OP_CHANGE:
3616 if self._path_exists(svn_trunk_path):
3617 # Delete the path on trunk...
3618 self._delete_path(svn_trunk_path)
3619 # ...and copy over from branch
3620 self._copy_path(cvs_rev.svn_path, svn_trunk_path,
3621 svn_commit.motivating_revnum)
3622 elif cvs_rev.op == OP_DELETE:
3623 # delete trunk path
3624 self._delete_path(svn_trunk_path)
3625 else:
3626 msg = ("Unknown CVSRevision operation '%s' in default branch sync."
3627 % cvs_rev.op)
3628 raise self.SVNRepositoryMirrorUnexpectedOperationError, msg
3630 def commit(self, svn_commit):
3631 """Add an SVNCommit to the SVNRepository, incrementing the
3632 Repository revision number, and changing the repository. Invoke
3633 the delegates' _start_commit() method."""
3635 if svn_commit.revnum == 2:
3636 self._initialize_repository(svn_commit.get_date())
3638 self._start_commit(svn_commit)
3640 if svn_commit.symbolic_name:
3641 Log().write(LOG_VERBOSE, "Filling symbolic name:",
3642 _clean_symbolic_name(svn_commit.symbolic_name))
3643 self._fill_symbolic_name(svn_commit)
3644 elif svn_commit.motivating_revnum:
3645 Log().write(LOG_VERBOSE, "Synchronizing default_branch motivated by %d"
3646 % svn_commit.motivating_revnum)
3647 self._synchronize_default_branch(svn_commit)
3648 else: # This actually commits CVSRevisions
3649 if len(svn_commit.cvs_revs) > 1: plural = "s"
3650 else: plural = ""
3651 Log().write(LOG_VERBOSE, "Committing %d CVSRevision%s"
3652 % (len(svn_commit.cvs_revs), plural))
3653 for cvs_rev in svn_commit.cvs_revs:
3654 # See comment in CVSCommit._commit() for what this is all
3655 # about. Note that although asking self._path_exists() is
3656 # somewhat expensive, we only do it if the first two (cheap)
3657 # tests succeed first.
3658 if not ((cvs_rev.deltatext_code == DELTATEXT_EMPTY)
3659 and (cvs_rev.rev == "1.1.1.1")
3660 and self._path_exists(cvs_rev.svn_path)):
3661 if cvs_rev.op == OP_ADD:
3662 self._add_path(cvs_rev)
3663 elif cvs_rev.op == OP_CHANGE:
3664 # Fix for Issue #74:
3666 # Here's the scenario. You have file FOO that is imported
3667 # on a non-trunk vendor branch. So in r1.1 and r1.1.1.1,
3668 # the file exists.
3670 # Moving forward in time, FOO is deleted on the default
3671 # branch (r1.1.1.2). cvs2svn determines that this delete
3672 # also needs to happen on trunk, so FOO is deleted on
3673 # trunk.
3675 # Along come r1.2, whose op is OP_CHANGE (because r1.1 is
3676 # not 'dead', we assume it's a change). However, since
3677 # our trunk file has been deleted, svnadmin blows up--you
3678 # can't change a file that doesn't exist!
3680 # Soooo... we just check the path, and if it doesn't
3681 # exist, we do an add... if the path does exist, it's
3682 # business as usual.
3683 if not self._path_exists(cvs_rev.svn_path):
3684 self._add_path(cvs_rev)
3685 else:
3686 self._change_path(cvs_rev)
3688 if cvs_rev.op == OP_DELETE:
3689 self._delete_path(cvs_rev.svn_path, Ctx().prune)
3691 def cleanup(self):
3692 """Callback for the Cleanup.register in self.__init__."""
3693 self.revs_db = None
3694 self.nodes_db = None
3696 def add_delegate(self, delegate):
3697 """Adds DELEGATE to self.delegates.
3699 For every delegate you add, as soon as SVNRepositoryMirror
3700 performs a repository action method, SVNRepositoryMirror will call
3701 the delegate's corresponding repository action method. Multiple
3702 delegates will be called in the order that they are added. See
3703 SVNRepositoryMirrorDelegate for more information."""
3704 self.delegates.append(delegate)
3706 def _invoke_delegates(self, method, *args):
3707 """Iterate through each of our delegates, in the order that they
3708 were added, and call the delegate's method named METHOD with the
3709 arguments in ARGS."""
3710 for delegate in self.delegates:
3711 getattr(delegate, method)(*args)
3713 def finish(self):
3714 """Calls the delegate finish method."""
3715 self._end_commit()
3716 self._invoke_delegates('finish')
3717 self.cleanup()
3720 class SVNCommitItem:
3721 """A wrapper class for CVSRevision objects upon which
3722 Subversion-related data (such as properties) may be hung."""
3724 def __init__(self, c_rev, svn_props_changed):
3725 """Initialize instance and record the properties for this file.
3726 SVN_PROPS_CHANGED indicates whether the svn: properties are known
3727 to have changed since the last revision.
3729 The properties are set by the SVNPropertySetters in
3730 Ctx().svn_property_setters, then we read a couple of the
3731 properties back out for our own purposes."""
3733 self.c_rev = c_rev
3734 # Did the svn properties change for this file (i.e., do they have
3735 # to be written to the dumpfile?)
3736 self.svn_props_changed = svn_props_changed
3738 # The properties for this item as a map { key : value }. If VALUE
3739 # is None, no property should be set.
3740 self.svn_props = { }
3742 for svn_property_setter in Ctx().svn_property_setters:
3743 svn_property_setter.set_properties(self)
3745 # Remember if we need to filter the EOLs. We could actually use
3746 # self.svn_props now, since it is initialized for each revision.
3747 self.needs_eol_filter = \
3748 self.svn_props.get('svn:eol-style', None) is not None
3750 self.has_keywords = self.svn_props.get('svn:keywords', None) is not None
3753 class SVNRepositoryMirrorDelegate:
3754 """Abstract superclass for any delegate to SVNRepositoryMirror.
3755 Subclasses must implement all of the methods below.
3757 For each method, a subclass implements, in its own way, the
3758 Subversion operation implied by the method's name. For example, for
3759 the add_path method, the DumpfileDelegate would write out a
3760 "Node-add:" command to a Subversion dumpfile, the StdoutDelegate
3761 would merely print that the path is being added to the repository,
3762 and the RepositoryDelegate would actually cause the path to be added
3763 to the Subversion repository that it is creating.
3766 def start_commit(self, svn_commit):
3767 """Perform any actions needed to start SVNCommit SVN_COMMIT;
3768 see subclass implementation for details."""
3769 raise NotImplementedError
3771 def mkdir(self, path):
3772 """PATH is a string; see subclass implementation for details."""
3773 raise NotImplementedError
3775 def add_path(self, s_item):
3776 """S_ITEM is an SVNCommitItem; see subclass implementation for
3777 details."""
3778 raise NotImplementedError
3780 def change_path(self, s_item):
3781 """S_ITEM is an SVNCommitItem; see subclass implementation for
3782 details."""
3783 raise NotImplementedError
3785 def delete_path(self, path):
3786 """PATH is a string; see subclass implementation for
3787 details."""
3788 raise NotImplementedError
3790 def copy_path(self, src_path, dest_path, src_revnum):
3791 """SRC_PATH and DEST_PATH are both strings, and SRC_REVNUM is a
3792 subversion revision number (int); see subclass implementation for
3793 details."""
3794 raise NotImplementedError
3796 def finish(self):
3797 """Perform any cleanup necessary after all revisions have been
3798 committed."""
3799 raise NotImplementedError
3802 class DumpfileDelegate(SVNRepositoryMirrorDelegate):
3803 """Create a Subversion dumpfile."""
3805 def __init__(self, dumpfile_path=None):
3806 """Return a new DumpfileDelegate instance, attached to a dumpfile
3807 DUMPFILE_PATH (Ctx().dumpfile, if None), using Ctx().encoding."""
3808 if dumpfile_path:
3809 self.dumpfile_path = dumpfile_path
3810 else:
3811 self.dumpfile_path = Ctx().dumpfile
3813 self.dumpfile = open(self.dumpfile_path, 'wb')
3814 self._write_dumpfile_header(self.dumpfile)
3816 def _write_dumpfile_header(self, dumpfile):
3817 # Initialize the dumpfile with the standard headers.
3819 # Since the CVS repository doesn't have a UUID, and the Subversion
3820 # repository will be created with one anyway, we don't specify a
3821 # UUID in the dumpflie
3822 dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
3824 def _utf8_path(self, path):
3825 """Return a copy of PATH encoded in UTF-8."""
3826 pieces = path.split('/')
3827 # Convert each path component separately (as they may each use
3828 # different encodings).
3829 for i in range(len(pieces)):
3830 try:
3831 # Log messages can be converted with the 'replace' strategy,
3832 # but we can't afford any lossiness here.
3833 pieces[i] = to_utf8(pieces[i], 'strict')
3834 except UnicodeError:
3835 raise FatalError(
3836 "Unable to convert a path '%s' to internal encoding.\n"
3837 "Consider rerunning with one or more '--encoding' parameters."
3838 % (path,))
3839 return '/'.join(pieces)
3841 def _string_for_prop(self, name, value):
3842 """Return a property in the form needed for the dumpfile."""
3844 return 'K %d\n%s\nV %d\n%s\n' % (len(name), name, len(value), value)
3846 def start_commit(self, svn_commit):
3847 """Emit the start of SVN_COMMIT (an SVNCommit)."""
3849 self.revision = svn_commit.revnum
3851 # The start of a new commit typically looks like this:
3853 # Revision-number: 1
3854 # Prop-content-length: 129
3855 # Content-length: 129
3857 # K 7
3858 # svn:log
3859 # V 27
3860 # Log message for revision 1.
3861 # K 10
3862 # svn:author
3863 # V 7
3864 # jrandom
3865 # K 8
3866 # svn:date
3867 # V 27
3868 # 2003-04-22T22:57:58.132837Z
3869 # PROPS-END
3871 # Notice that the length headers count everything -- not just the
3872 # length of the data but also the lengths of the lengths, including
3873 # the 'K ' or 'V ' prefixes.
3875 # The reason there are both Prop-content-length and Content-length
3876 # is that the former includes just props, while the latter includes
3877 # everything. That's the generic header form for any entity in a
3878 # dumpfile. But since revisions only have props, the two lengths
3879 # are always the same for revisions.
3881 # Calculate the output needed for the property definitions.
3882 props = svn_commit.get_revprops()
3883 prop_names = props.keys()
3884 prop_names.sort()
3885 prop_strings = []
3886 for propname in prop_names:
3887 if props[propname] is not None:
3888 prop_strings.append(self._string_for_prop(propname, props[propname]))
3890 all_prop_strings = ''.join(prop_strings) + 'PROPS-END\n'
3891 total_len = len(all_prop_strings)
3893 # Print the revision header and props
3894 self.dumpfile.write('Revision-number: %d\n'
3895 'Prop-content-length: %d\n'
3896 'Content-length: %d\n'
3897 '\n'
3898 % (self.revision, total_len, total_len))
3900 self.dumpfile.write(all_prop_strings)
3901 self.dumpfile.write('\n')
3903 def mkdir(self, path):
3904 """Emit the creation of directory PATH."""
3905 self.dumpfile.write("Node-path: %s\n"
3906 "Node-kind: dir\n"
3907 "Node-action: add\n"
3908 "\n"
3909 "\n" % self._utf8_path(path))
3911 def _add_or_change_path(self, s_item, op):
3912 """Emit the addition or change corresponding to S_ITEM.
3913 OP is either the constant OP_ADD or OP_CHANGE."""
3915 # Validation stuffs
3916 if op == OP_ADD:
3917 action = 'add'
3918 elif op == OP_CHANGE:
3919 action = 'change'
3920 else:
3921 raise FatalError("_add_or_change_path() called with bad op ('%s')"
3922 % (op,))
3924 # Convenience variables
3925 c_rev = s_item.c_rev
3927 # The property handling here takes advantage of an undocumented
3928 # but IMHO consistent feature of the Subversion dumpfile-loading
3929 # code. When a node's properties aren't mentioned (that is, the
3930 # "Prop-content-length:" header is absent, no properties are
3931 # listed at all, and there is no "PROPS-END\n" line) then no
3932 # change is made to the node's properties.
3934 # This is consistent with the way dumpfiles behave w.r.t. text
3935 # content changes, so I'm comfortable relying on it. If you
3936 # commit a change to *just* the properties of some node that
3937 # already has text contents from a previous revision, then in the
3938 # dumpfile output for the prop change, no "Text-content-length:"
3939 # nor "Text-content-md5:" header will be present, and the text of
3940 # the file will not be given. But this does not cause the file's
3941 # text to be erased! It simply remains unchanged.
3943 # This works out great for cvs2svn, due to lucky coincidences:
3945 # For files, the only properties we ever set are set in the first
3946 # revision; all other revisions (including on branches) inherit
3947 # from that. After the first revision, we never change file
3948 # properties, therefore, there is no need to remember the full set
3949 # of properties on a given file once we've set it.
3951 # For directories, the only property we set is "svn:ignore", and
3952 # while we may change it after the first revision, we always do so
3953 # based on the contents of a ".cvsignore" file -- in other words,
3954 # CVS is doing the remembering for us, so we still don't have to
3955 # preserve the previous value of the property ourselves.
3957 # Calculate the (sorted-by-name) property string and length, if any.
3958 if s_item.svn_props_changed:
3959 svn_props = s_item.svn_props
3960 prop_contents = ''
3961 prop_names = svn_props.keys()
3962 prop_names.sort()
3963 for pname in prop_names:
3964 pvalue = svn_props[pname]
3965 if pvalue is not None:
3966 prop_contents += self._string_for_prop(pname, pvalue)
3967 prop_contents += 'PROPS-END\n'
3968 props_header = 'Prop-content-length: %d\n' % len(prop_contents)
3969 else:
3970 prop_contents = ''
3971 props_header = ''
3973 # treat .cvsignore as a directory property
3974 dir_path, basename = os.path.split(c_rev.svn_path)
3975 if basename == ".cvsignore":
3976 ignore_vals = generate_ignores(c_rev)
3977 ignore_contents = '\n'.join(ignore_vals)
3978 ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
3979 (len(ignore_contents), ignore_contents))
3980 ignore_contents += 'PROPS-END\n'
3981 ignore_len = len(ignore_contents)
3983 # write headers, then props
3984 self.dumpfile.write('Node-path: %s\n'
3985 'Node-kind: dir\n'
3986 'Node-action: change\n'
3987 'Prop-content-length: %d\n'
3988 'Content-length: %d\n'
3989 '\n'
3990 '%s'
3991 % (self._utf8_path(dir_path), ignore_len,
3992 ignore_len, ignore_contents))
3994 # If the file has keywords, we must prevent CVS/RCS from expanding
3995 # the keywords because they must be unexpanded in the repository,
3996 # or Subversion will get confused.
3997 pipe_cmd, pipe = Ctx().cvs_repository.get_co_pipe(
3998 c_rev, suppress_keyword_substitution=s_item.has_keywords)
4000 self.dumpfile.write('Node-path: %s\n'
4001 'Node-kind: file\n'
4002 'Node-action: %s\n'
4003 '%s' # no property header if no props
4004 'Text-content-length: '
4005 % (self._utf8_path(c_rev.svn_path),
4006 action, props_header))
4008 pos = self.dumpfile.tell()
4010 self.dumpfile.write('0000000000000000\n'
4011 'Text-content-md5: 00000000000000000000000000000000\n'
4012 'Content-length: 0000000000000000\n'
4013 '\n')
4015 if prop_contents:
4016 self.dumpfile.write(prop_contents)
4018 # Insert a filter to convert all EOLs to LFs if neccessary
4019 if s_item.needs_eol_filter:
4020 data_reader = LF_EOL_Filter(pipe.stdout)
4021 else:
4022 data_reader = pipe.stdout
4024 # Insert the rev contents, calculating length and checksum as we go.
4025 checksum = md5.new()
4026 length = 0
4027 while True:
4028 buf = data_reader.read(PIPE_READ_SIZE)
4029 if buf == '':
4030 break
4031 checksum.update(buf)
4032 length += len(buf)
4033 self.dumpfile.write(buf)
4035 pipe.stdout.close()
4036 error_output = pipe.stderr.read()
4037 exit_status = pipe.wait()
4038 if exit_status:
4039 raise FatalError("The command '%s' failed with exit status: %s\n"
4040 "and the following output:\n"
4041 "%s" % (pipe_cmd, exit_status, error_output))
4043 # Go back to patch up the length and checksum headers:
4044 self.dumpfile.seek(pos, 0)
4045 # We left 16 zeros for the text length; replace them with the real
4046 # length, padded on the left with spaces:
4047 self.dumpfile.write('%16d' % length)
4048 # 16... + 1 newline + len('Text-content-md5: ') == 35
4049 self.dumpfile.seek(pos + 35, 0)
4050 self.dumpfile.write(checksum.hexdigest())
4051 # 35... + 32 bytes of checksum + 1 newline + len('Content-length: ') == 84
4052 self.dumpfile.seek(pos + 84, 0)
4053 # The content length is the length of property data, text data,
4054 # and any metadata around/inside around them.
4055 self.dumpfile.write('%16d' % (length + len(prop_contents)))
4056 # Jump back to the end of the stream
4057 self.dumpfile.seek(0, 2)
4059 # This record is done (write two newlines -- one to terminate
4060 # contents that weren't themselves newline-termination, one to
4061 # provide a blank line for readability.
4062 self.dumpfile.write('\n\n')
4064 def add_path(self, s_item):
4065 """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
4066 self._add_or_change_path(s_item, OP_ADD)
4068 def change_path(self, s_item):
4069 """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
4070 self._add_or_change_path(s_item, OP_CHANGE)
4072 def delete_path(self, path):
4073 """Emit the deletion of PATH."""
4074 self.dumpfile.write('Node-path: %s\n'
4075 'Node-action: delete\n'
4076 '\n' % self._utf8_path(path))
4078 def copy_path(self, src_path, dest_path, src_revnum):
4079 """Emit the copying of SRC_PATH at SRC_REV to DEST_PATH."""
4080 # We don't need to include "Node-kind:" for copies; the loader
4081 # ignores it anyway and just uses the source kind instead.
4082 self.dumpfile.write('Node-path: %s\n'
4083 'Node-action: add\n'
4084 'Node-copyfrom-rev: %d\n'
4085 'Node-copyfrom-path: /%s\n'
4086 '\n'
4087 % (self._utf8_path(dest_path),
4088 src_revnum,
4089 self._utf8_path(src_path)))
4091 def finish(self):
4092 """Perform any cleanup necessary after all revisions have been
4093 committed."""
4094 self.dumpfile.close()
4097 class RepositoryDelegate(DumpfileDelegate):
4098 """Creates a new Subversion Repository. DumpfileDelegate does all
4099 of the heavy lifting."""
4100 def __init__(self):
4101 self.svnadmin = Ctx().svnadmin
4102 self.target = Ctx().target
4103 if not Ctx().existing_svnrepos:
4104 Log().write(LOG_NORMAL,"Creating new repository '%s'" % (self.target))
4105 if not Ctx().fs_type:
4106 # User didn't say what kind repository (bdb, fsfs, etc).
4107 # We still pass --bdb-txn-nosync. It's a no-op if the default
4108 # repository type doesn't support it, but we definitely want
4109 # it if BDB is the default.
4110 run_command('%s create %s "%s"' % (self.svnadmin,
4111 "--bdb-txn-nosync",
4112 self.target))
4113 elif Ctx().fs_type == 'bdb':
4114 # User explicitly specified bdb.
4116 # Since this is a BDB repository, pass --bdb-txn-nosync,
4117 # because it gives us a 4-5x speed boost (if cvs2svn is
4118 # creating the repository, cvs2svn should be the only program
4119 # accessing the svn repository (until cvs is done, at least)).
4120 # But we'll turn no-sync off in self.finish(), unless
4121 # instructed otherwise.
4122 run_command('%s create %s %s "%s"' % (self.svnadmin,
4123 "--fs-type=bdb",
4124 "--bdb-txn-nosync",
4125 self.target))
4126 else:
4127 # User specified something other than bdb.
4128 run_command('%s create %s "%s"' % (self.svnadmin,
4129 "--fs-type=%s" % Ctx().fs_type,
4130 self.target))
4132 # Since the output of this run is a repository, not a dumpfile,
4133 # the temporary dumpfiles we create should go in the tmpdir.
4134 DumpfileDelegate.__init__(self, temp(Ctx().dumpfile))
4136 # This is 1 if a commit is in progress, otherwise None.
4137 self._commit_in_progress = None
4139 self.dumpfile = open(self.dumpfile_path, 'w+b')
4140 self.loader_pipe = SimplePopen([ self.svnadmin, 'load', '-q',
4141 self.target ], True)
4142 self.loader_pipe.stdout.close()
4143 try:
4144 self._write_dumpfile_header(self.loader_pipe.stdin)
4145 except IOError:
4146 raise FatalError("svnadmin failed with the following output while "
4147 "loading the dumpfile:\n"
4148 + self.loader_pipe.stderr.read())
4150 def _feed_pipe(self):
4151 """Feed the revision stored in the dumpfile to the svnadmin
4152 load pipe."""
4153 self.dumpfile.seek(0)
4154 while 1:
4155 data = self.dumpfile.read(128*1024) # Chunk size is arbitrary
4156 if not len(data):
4157 break
4158 try:
4159 self.loader_pipe.stdin.write(data)
4160 except IOError:
4161 raise FatalError("svnadmin failed with the following output "
4162 "while loading the dumpfile:\n"
4163 + self.loader_pipe.stderr.read())
4165 def start_commit(self, svn_commit):
4166 """Start a new commit. If a commit is already in progress, close
4167 the dumpfile, load it into the svn repository, open a new
4168 dumpfile, and write the header into it."""
4169 if self._commit_in_progress:
4170 self._feed_pipe()
4171 self.dumpfile.seek(0)
4172 self.dumpfile.truncate()
4173 DumpfileDelegate.start_commit(self, svn_commit)
4174 self._commit_in_progress = 1
4176 def finish(self):
4177 """Loads the last commit into the repository."""
4178 self._feed_pipe()
4179 self.dumpfile.close()
4180 self.loader_pipe.stdin.close()
4181 error_output = self.loader_pipe.stderr.read()
4182 exit_status = self.loader_pipe.wait()
4183 if exit_status:
4184 raise FatalError('svnadmin load failed with exit status: %s\n'
4185 'and the following output:\n'
4186 '%s' % (exit_status, error_output,))
4187 os.remove(self.dumpfile_path)
4189 # If this is a BDB repository, and we created the repository, and
4190 # --bdb-no-sync wasn't passed, then comment out the DB_TXN_NOSYNC
4191 # line in the DB_CONFIG file, because txn syncing should be on by
4192 # default in BDB repositories.
4194 # We determine if this is a BDB repository by looking for the
4195 # DB_CONFIG file, which doesn't exist in FSFS, rather than by
4196 # checking Ctx().fs_type. That way this code will Do The Right
4197 # Thing in all circumstances.
4198 db_config = os.path.join(self.target, "db/DB_CONFIG")
4199 if (not Ctx().existing_svnrepos and not Ctx().bdb_txn_nosync
4200 and os.path.exists(db_config)):
4201 no_sync = 'set_flags DB_TXN_NOSYNC\n'
4203 contents = open(db_config, 'r').readlines()
4204 index = contents.index(no_sync)
4205 contents[index] = '# ' + no_sync
4206 contents = open(db_config, 'w').writelines(contents)
4209 class StdoutDelegate(SVNRepositoryMirrorDelegate):
4210 """Makes no changes to the disk, but writes out information to
4211 STDOUT about what the SVNRepositoryMirror is doing. Of course, our
4212 print statements will state that we're doing something, when in
4213 reality, we aren't doing anything other than printing out that we're
4214 doing something. Kind of zen, really."""
4215 def __init__(self, total_revs):
4216 self.total_revs = total_revs
4218 def start_commit(self, svn_commit):
4219 """Prints out the Subversion revision number of the commit that is
4220 being started."""
4221 Log().write(LOG_VERBOSE, "=" * 60)
4222 Log().write(LOG_NORMAL, "Starting Subversion r%d / %d" %
4223 (svn_commit.revnum, self.total_revs))
4225 def mkdir(self, path):
4226 """Print a line stating that we are creating directory PATH."""
4227 Log().write(LOG_VERBOSE, " New Directory", path)
4229 def add_path(self, s_item):
4230 """Print a line stating that we are 'adding' s_item.c_rev.svn_path."""
4231 Log().write(LOG_VERBOSE, " Adding", s_item.c_rev.svn_path)
4233 def change_path(self, s_item):
4234 """Print a line stating that we are 'changing' s_item.c_rev.svn_path."""
4235 Log().write(LOG_VERBOSE, " Changing", s_item.c_rev.svn_path)
4237 def delete_path(self, path):
4238 """Print a line stating that we are 'deleting' PATH."""
4239 Log().write(LOG_VERBOSE, " Deleting", path)
4241 def copy_path(self, src_path, dest_path, src_revnum):
4242 """Print a line stating that we are 'copying' revision SRC_REVNUM
4243 of SRC_PATH to DEST_PATH."""
4244 Log().write(LOG_VERBOSE, " Copying revision", src_revnum, "of", src_path)
4245 Log().write(LOG_VERBOSE, " to", dest_path)
4247 def finish(self):
4248 """State that we are done creating our repository."""
4249 Log().write(LOG_VERBOSE, "Finished creating Subversion repository.")
4250 Log().write(LOG_QUIET, "Done.")
4252 def pass1():
4253 OS_SEP_PLUS_ATTIC = os.sep + 'Attic'
4254 Log().write(LOG_QUIET, "Examining all CVS ',v' files...")
4255 cd = CollectData()
4257 def visit_file(baton, dirname, files):
4258 cd = baton
4259 for fname in files:
4260 verify_filename_legal(fname)
4261 if not fname.endswith(',v'):
4262 continue
4263 cd.found_valid_file = 1
4264 pathname = os.path.join(dirname, fname)
4265 if dirname.endswith(OS_SEP_PLUS_ATTIC):
4266 # drop the 'Attic' portion from the pathname for the canonical name.
4267 fdc = FileDataCollector(cd, os.path.join(dirname[:-6], fname),
4268 pathname)
4269 else:
4270 # If this file also exists in the attic, it's a fatal error
4271 attic_path = os.path.join(dirname, 'Attic', fname)
4272 if os.path.exists(attic_path):
4273 err = "%s: A CVS repository cannot contain both %s and %s" \
4274 % (error_prefix, pathname, attic_path)
4275 sys.stderr.write(err + '\n')
4276 cd.fatal_errors.append(err)
4277 fdc = FileDataCollector(cd, pathname, pathname)
4278 Log().write(LOG_NORMAL, pathname)
4279 try:
4280 cvs2svn_rcsparse.parse(open(pathname, 'rb'), fdc)
4281 except (cvs2svn_rcsparse.common.RCSParseError, ValueError,
4282 RuntimeError):
4283 err = "%s: '%s' is not a valid ,v file" \
4284 % (error_prefix, pathname)
4285 sys.stderr.write(err + '\n')
4286 cd.fatal_errors.append(err)
4287 except:
4288 Log().write(LOG_WARN,
4289 "Exception occurred while parsing %s" % pathname)
4290 raise
4292 os.path.walk(Ctx().project.project_cvs_repos_path, visit_file, cd)
4293 Log().write(LOG_VERBOSE, 'Processed', cd.num_files, 'files')
4295 cd.write_symbol_db()
4297 if len(cd.fatal_errors) > 0:
4298 raise FatalException("Pass 1 complete.\n"
4299 + "=" * 75 + "\n"
4300 + "Error summary:\n"
4301 + "\n".join(cd.fatal_errors) + "\n"
4302 + "Exited due to fatal error(s).\n")
4304 if cd.found_valid_file is None:
4305 raise FatalException(
4306 "\n"
4307 "No RCS files found in your CVS Repository!\n"
4308 "Are you absolutely certain you are pointing cvs2svn\n"
4309 "at a CVS repository?\n"
4310 "\n"
4311 "Exited due to fatal error(s).\n")
4313 StatsKeeper().reset_c_rev_info()
4314 StatsKeeper().archive()
4315 Log().write(LOG_QUIET, "Done")
4317 def pass2():
4318 "Pass 2: clean up the revision information."
4320 symbol_db = SymbolDatabase()
4321 symbol_db.read()
4323 # Convert the list of regexps to a list of strings
4324 excludes = symbol_db.find_excluded_symbols(Ctx().excludes)
4326 error_detected = 0
4328 Log().write(LOG_QUIET, "Checking for blocked exclusions...")
4329 blocked_excludes = symbol_db.find_blocked_excludes(excludes)
4330 if blocked_excludes:
4331 for branch, blockers in blocked_excludes.items():
4332 sys.stderr.write(error_prefix + ": The branch '%s' cannot be "
4333 "excluded because the following symbols depend "
4334 "on it:\n" % (branch))
4335 for blocker in blockers:
4336 sys.stderr.write(" '%s'\n" % (blocker))
4337 sys.stderr.write("\n")
4338 error_detected = 1
4340 Log().write(LOG_QUIET, "Checking for forced tags with commits...")
4341 invalid_forced_tags = [ ]
4342 for forced_tag in Ctx().forced_tags:
4343 if excludes.has_key(forced_tag):
4344 continue
4345 if symbol_db.branch_has_commit(forced_tag):
4346 invalid_forced_tags.append(forced_tag)
4347 if invalid_forced_tags:
4348 sys.stderr.write(error_prefix + ": The following branches cannot be "
4349 "forced to be tags because they have commits:\n")
4350 for tag in invalid_forced_tags:
4351 sys.stderr.write(" '%s'\n" % (tag))
4352 sys.stderr.write("\n")
4353 error_detected = 1
4355 Log().write(LOG_QUIET, "Checking for tag/branch mismatches...")
4356 mismatches = symbol_db.find_mismatches(excludes)
4357 def is_not_forced(mismatch):
4358 name = mismatch[0]
4359 return not (name in Ctx().forced_tags or name in Ctx().forced_branches)
4360 mismatches = filter(is_not_forced, mismatches)
4361 if mismatches:
4362 sys.stderr.write(error_prefix + ": The following symbols are tags "
4363 "in some files and branches in others.\nUse "
4364 "--force-tag, --force-branch and/or --exclude to "
4365 "resolve the symbols.\n")
4366 for name, tag_count, branch_count, commit_count in mismatches:
4367 sys.stderr.write(" '%s' is a tag in %d files, a branch in "
4368 "%d files and has commits in %d files.\n"
4369 % (name, tag_count, branch_count, commit_count))
4370 error_detected = 1
4372 # Bail out now if we found errors
4373 if error_detected:
4374 sys.exit(1)
4376 # Create the tags database
4377 tags_db = TagsDatabase(DB_OPEN_NEW)
4378 for tag in symbol_db.tags:
4379 if tag not in Ctx().forced_branches:
4380 tags_db[tag] = None
4381 for tag in Ctx().forced_tags:
4382 tags_db[tag] = None
4384 Log().write(LOG_QUIET, "Re-synchronizing CVS revision timestamps...")
4386 # We may have recorded some changes in revisions' timestamp. We need to
4387 # scan for any other files which may have had the same log message and
4388 # occurred at "the same time" and change their timestamps, too.
4390 # read the resync data file
4391 def read_resync(fname):
4392 "Read the .resync file into memory."
4394 ### note that we assume that we can hold the entire resync file in
4395 ### memory. really large repositories with whacky timestamps could
4396 ### bust this assumption. should that ever happen, then it is possible
4397 ### to split the resync file into pieces and make multiple passes,
4398 ### using each piece.
4401 # A digest maps to a sequence of lists which specify a lower and upper
4402 # time bound for matching up the commit. We keep a sequence of these
4403 # because a number of checkins with the same log message (e.g. an empty
4404 # log message) could need to be remapped. We also make them a list
4405 # because we will dynamically expand the lower/upper bound as we find
4406 # commits that fall into a particular msg and time range.
4408 # resync == digest -> [ [old_time_lower, old_time_upper, new_time], ... ]
4410 resync = { }
4412 for line in fileinput.FileInput(fname):
4413 t1 = int(line[:8], 16)
4414 digest = line[9:DIGEST_END_IDX]
4415 t2 = int(line[DIGEST_END_IDX+1:], 16)
4416 t1_l = t1 - COMMIT_THRESHOLD/2
4417 t1_u = t1 + COMMIT_THRESHOLD/2
4418 resync.setdefault(digest, []).append([t1_l, t1_u, t2])
4420 # For each digest, sort the resync items in it in increasing order,
4421 # based on the lower time bound.
4422 for val in resync.values():
4423 val.sort()
4425 return resync
4427 resync = read_resync(temp(DATAFILE + RESYNC_SUFFIX))
4429 output = open(temp(DATAFILE + CLEAN_REVS_SUFFIX), 'w')
4430 Cleanup().register(temp(DATAFILE + CLEAN_REVS_SUFFIX), pass3)
4432 tweaked_timestamps_db = Database(temp(TWEAKED_TIMESTAMPS_DB), DB_OPEN_NEW)
4433 Cleanup().register(temp(TWEAKED_TIMESTAMPS_DB), pass2)
4435 # process the revisions file, looking for items to clean up
4436 for line in fileinput.FileInput(temp(DATAFILE + REVS_SUFFIX)):
4437 c_rev = CVSRevision(Ctx(), line[:-1])
4439 # Skip this entire revision if it's on an excluded branch
4440 if excludes.has_key(c_rev.branch_name):
4441 continue
4443 new_prev_ts = None
4444 if c_rev.prev_rev is not None:
4445 new_prev_ts = tweaked_timestamps_db.get(
4446 c_rev.unique_key(c_rev.prev_rev), None)
4447 if new_prev_ts:
4448 c_rev.prev_timestamp = new_prev_ts
4450 new_next_ts = None
4451 if c_rev.next_rev is not None:
4452 new_next_ts = tweaked_timestamps_db.get(
4453 c_rev.unique_key(c_rev.next_rev), None)
4454 if new_next_ts:
4455 c_rev.next_timestamp = new_next_ts
4457 # Remove all references to excluded tags and branches
4458 def not_excluded(symbol, excludes=excludes):
4459 return not excludes.has_key(symbol)
4460 c_rev.branches = filter(not_excluded, c_rev.branches)
4461 c_rev.tags = filter(not_excluded, c_rev.tags)
4463 # Convert all branches that are forced to be tags
4464 for forced_tag in Ctx().forced_tags:
4465 if forced_tag in c_rev.branches:
4466 c_rev.branches.remove(forced_tag)
4467 c_rev.tags.append(forced_tag)
4469 # Convert all tags that are forced to be branches
4470 for forced_branch in Ctx().forced_branches:
4471 if forced_branch in c_rev.tags:
4472 c_rev.tags.remove(forced_branch)
4473 c_rev.branches.append(forced_branch)
4475 # see if this is "near" any of the resync records we
4476 # have recorded for this digest [of the log message].
4477 for record in resync.get(c_rev.digest, []):
4478 if record[2] == c_rev.timestamp:
4479 # This means that either c_rev is the same revision that
4480 # caused the resync record to exist, or c_rev is a different
4481 # CVS revision that happens to have the same timestamp. In
4482 # either case, we don't have to do anything, so we...
4483 continue
4485 if record[0] <= c_rev.timestamp <= record[1]:
4486 # bingo! We probably want to remap the time on this c_rev,
4487 # unless the remapping would be useless because the new time
4488 # would fall outside the COMMIT_THRESHOLD window for this
4489 # commit group.
4490 new_timestamp = record[2]
4491 # If the new timestamp is earlier than that of our previous revision
4492 if new_timestamp < c_rev.prev_timestamp:
4493 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4494 + " to time %s, which is before previous the time of"
4495 + " revision %s (%s):")
4496 Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4497 c_rev.cvs_path, new_timestamp,
4498 c_rev.prev_rev, c_rev.prev_timestamp))
4499 # If resyncing our rev to c_rev.prev_timestamp + 1 will place
4500 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4501 # attempted resync time, then sync back to c_rev.prev_timestamp
4502 # + 1...
4503 if ((c_rev.prev_timestamp + 1) - new_timestamp) < COMMIT_THRESHOLD:
4504 new_timestamp = c_rev.prev_timestamp + 1
4505 Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4506 new_timestamp))
4507 else:
4508 Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4509 warning_prefix)
4510 continue
4512 # If the new timestamp is later than that of our next revision
4513 elif c_rev.next_timestamp and new_timestamp > c_rev.next_timestamp:
4514 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4515 + " to time %s, which is after time of next"
4516 + " revision %s (%s):")
4517 Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4518 c_rev.cvs_path, new_timestamp,
4519 c_rev.prev_rev, c_rev.next_timestamp))
4520 # If resyncing our rev to c_rev.next_timestamp - 1 will place
4521 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4522 # attempted resync time, then sync forward to c_rev.next_timestamp
4523 # - 1...
4524 if (new_timestamp - (c_rev.next_timestamp - 1)) < COMMIT_THRESHOLD:
4525 new_timestamp = c_rev.next_timestamp - 1
4526 Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4527 new_timestamp))
4528 else:
4529 Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4530 warning_prefix)
4531 continue
4533 # Fix for Issue #71: Avoid resyncing two consecutive revisions
4534 # to the same timestamp.
4535 elif (new_timestamp == c_rev.prev_timestamp
4536 or new_timestamp == c_rev.next_timestamp):
4537 continue
4539 # adjust the time range. we want the COMMIT_THRESHOLD from the
4540 # bounds of the earlier/latest commit in this group.
4541 record[0] = min(record[0], c_rev.timestamp - COMMIT_THRESHOLD/2)
4542 record[1] = max(record[1], c_rev.timestamp + COMMIT_THRESHOLD/2)
4544 msg = "PASS2 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
4545 % (c_rev.cvs_path, c_rev.rev, time.ctime(c_rev.timestamp),
4546 new_timestamp - c_rev.timestamp)
4547 Log().write(LOG_VERBOSE, msg)
4549 c_rev.timestamp = new_timestamp
4550 tweaked_timestamps_db[c_rev.unique_key()] = new_timestamp
4552 # stop looking for hits
4553 break
4555 output.write(str(c_rev) + "\n")
4556 Log().write(LOG_QUIET, "Done")
4558 def pass3():
4559 Log().write(LOG_QUIET, "Sorting CVS revisions...")
4560 sort_file(temp(DATAFILE + CLEAN_REVS_SUFFIX),
4561 temp(DATAFILE + SORTED_REVS_SUFFIX))
4562 Cleanup().register(temp(DATAFILE + SORTED_REVS_SUFFIX), pass5)
4563 Log().write(LOG_QUIET, "Done")
4565 def pass4():
4566 """Iterate through sorted revs, storing them in a database.
4567 If we're not doing a trunk-only conversion, generate the
4568 LastSymbolicNameDatabase, which contains the last CVSRevision
4569 that is a source for each tag or branch.
4571 Log().write(LOG_QUIET,
4572 "Copying CVS revision data from flat file to database...")
4573 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_NEW)
4574 if not Ctx().trunk_only:
4575 Log().write(LOG_QUIET,
4576 "Finding last CVS revisions for all symbolic names...")
4577 last_sym_name_db = LastSymbolicNameDatabase(DB_OPEN_NEW)
4578 else:
4579 # This is to avoid testing Ctx().trunk_only every time around the loop
4580 class DummyLSNDB:
4581 def noop(*args): pass
4582 log_revision = noop
4583 create_database = noop
4584 last_sym_name_db = DummyLSNDB()
4586 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4587 c_rev = CVSRevision(Ctx(), line[:-1])
4588 cvs_revs_db.log_revision(c_rev)
4589 last_sym_name_db.log_revision(c_rev)
4590 StatsKeeper().record_c_rev(c_rev)
4592 last_sym_name_db.create_database()
4593 StatsKeeper().archive()
4594 Log().write(LOG_QUIET, "Done")
4596 def pass5():
4598 Generate the SVNCommit <-> CVSRevision mapping
4599 databases. CVSCommit._commit also calls SymbolingsLogger to register
4600 CVSRevisions that represent an opening or closing for a path on a
4601 branch or tag. See SymbolingsLogger for more details.
4603 Log().write(LOG_QUIET, "Mapping CVS revisions to Subversion commits...")
4605 aggregator = CVSRevisionAggregator()
4606 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4607 c_rev = CVSRevision(Ctx(), line[:-1])
4608 if not (Ctx().trunk_only and c_rev.branch_name is not None):
4609 aggregator.process_revision(c_rev)
4610 aggregator.flush()
4612 StatsKeeper().set_svn_rev_count(SVNCommit.revnum - 1)
4613 StatsKeeper().archive()
4614 Log().write(LOG_QUIET, "Done")
4616 def pass6():
4617 Log().write(LOG_QUIET, "Sorting symbolic name source revisions...")
4619 if not Ctx().trunk_only:
4620 sort_file(temp(SYMBOL_OPENINGS_CLOSINGS),
4621 temp(SYMBOL_OPENINGS_CLOSINGS_SORTED))
4622 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), pass8)
4623 Log().write(LOG_QUIET, "Done")
4625 def pass7():
4626 Log().write(LOG_QUIET, "Determining offsets for all symbolic names...")
4628 def generate_offsets_for_symbolings():
4629 """This function iterates through all the lines in
4630 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
4631 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
4632 where SYMBOLIC_NAME is first encountered. This will allow us to
4633 seek to the various offsets in the file and sequentially read only
4634 the openings and closings that we need."""
4636 ###PERF This is a fine example of a db that can be in-memory and
4637 #just flushed to disk when we're done. Later, it can just be sucked
4638 #back into memory.
4639 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_NEW)
4640 Cleanup().register(temp(SYMBOL_OFFSETS_DB), pass8)
4642 file = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
4643 old_sym = ""
4644 while 1:
4645 fpos = file.tell()
4646 line = file.readline()
4647 if not line:
4648 break
4649 sym, svn_revnum, cvs_rev_key = line.split(" ", 2)
4650 if sym != old_sym:
4651 Log().write(LOG_VERBOSE, " ", sym)
4652 old_sym = sym
4653 offsets_db[sym] = fpos
4655 if not Ctx().trunk_only:
4656 generate_offsets_for_symbolings()
4657 Log().write(LOG_QUIET, "Done.")
4659 def pass8():
4660 svncounter = 2 # Repository initialization is 1.
4661 repos = SVNRepositoryMirror()
4662 persistence_manager = PersistenceManager(DB_OPEN_READ)
4664 if Ctx().target:
4665 if not Ctx().dry_run:
4666 repos.add_delegate(RepositoryDelegate())
4667 Log().write(LOG_QUIET, "Starting Subversion Repository.")
4668 else:
4669 if not Ctx().dry_run:
4670 repos.add_delegate(DumpfileDelegate())
4671 Log().write(LOG_QUIET, "Starting Subversion Dumpfile.")
4673 repos.add_delegate(StdoutDelegate(StatsKeeper().svn_rev_count()))
4675 while 1:
4676 svn_commit = persistence_manager.get_svn_commit(svncounter)
4677 if not svn_commit:
4678 break
4679 repos.commit(svn_commit)
4680 svncounter += 1
4682 repos.finish()
4684 _passes = [
4685 pass1,
4686 pass2,
4687 pass3,
4688 pass4,
4689 pass5,
4690 pass6,
4691 pass7,
4692 pass8,
4696 class Ctx:
4697 """Session state for this run of cvs2svn. For example, run-time
4698 options are stored here. This class is a Borg, see
4699 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531.
4701 __shared_state = { }
4702 def __init__(self):
4703 self.__dict__ = self.__shared_state
4704 if self.__dict__:
4705 return
4706 # Else, initialize to defaults.
4707 self.target = None
4708 self.dumpfile = DUMPFILE
4709 self.tmpdir = '.'
4710 self.verbose = 0
4711 self.quiet = 0
4712 self.prune = 1
4713 self.existing_svnrepos = 0
4714 self.dump_only = 0
4715 self.dry_run = 0
4716 self.trunk_only = 0
4717 self.trunk_base = "trunk"
4718 self.tags_base = "tags"
4719 self.branches_base = "branches"
4720 self.encoding = ["ascii"]
4721 self.mime_types_file = None
4722 self.auto_props_file = None
4723 self.auto_props_ignore_case = False
4724 self.no_default_eol = 0
4725 self.eol_from_mime_type = 0
4726 self.keywords_off = 0
4727 self.use_cvs = None
4728 self.svnadmin = "svnadmin"
4729 self.username = None
4730 self.print_help = 0
4731 self.skip_cleanup = 0
4732 self.bdb_txn_nosync = 0
4733 self.fs_type = None
4734 self.forced_branches = []
4735 self.forced_tags = []
4736 self.excludes = []
4737 self.symbol_transforms = []
4738 self.svn_property_setters = []
4741 class SVNPropertySetter:
4742 """Abstract class for objects that can set properties on a SVNCommitItem."""
4744 def set_properties(self, s_item):
4745 """Set any properties that can be determined for S_ITEM."""
4747 raise NotImplementedError
4750 class CVSRevisionNumberSetter(SVNPropertySetter):
4751 """Set the cvs2svn:cvs-rev property to the CVS revision number."""
4753 def set_properties(self, s_item):
4754 s_item.svn_props['cvs2svn:cvs-rev'] = s_item.c_rev.rev
4755 s_item.svn_props_changed = True
4758 class ExecutablePropertySetter(SVNPropertySetter):
4759 """Set the svn:executable property based on c_rev.file_executable."""
4761 def set_properties(self, s_item):
4762 if s_item.c_rev.file_executable:
4763 s_item.svn_props['svn:executable'] = '*'
4766 class BinaryFileEOLStyleSetter(SVNPropertySetter):
4767 """Set the eol-style for binary files to None."""
4769 def set_properties(self, s_item):
4770 if s_item.c_rev.mode == 'b':
4771 s_item.svn_props['svn:eol-style'] = None
4774 class MimeMapper(SVNPropertySetter):
4775 """A class that provides mappings from file names to MIME types."""
4777 def __init__(self, mime_types_file):
4778 self.mappings = { }
4780 for line in fileinput.input(mime_types_file):
4781 if line.startswith("#"):
4782 continue
4784 # format of a line is something like
4785 # text/plain c h cpp
4786 extensions = line.split()
4787 if len(extensions) < 2:
4788 continue
4789 type = extensions.pop(0)
4790 for ext in extensions:
4791 if self.mappings.has_key(ext) and self.mappings[ext] != type:
4792 sys.stderr.write("%s: ambiguous MIME mapping for *.%s (%s or %s)\n"
4793 % (warning_prefix, ext, self.mappings[ext], type))
4794 self.mappings[ext] = type
4796 def set_properties(self, s_item):
4797 basename, extension = os.path.splitext(
4798 os.path.basename(s_item.c_rev.cvs_path)
4801 # Extension includes the dot, so strip it (will leave extension
4802 # empty if filename ends with a dot, which is ok):
4803 extension = extension[1:]
4805 # If there is no extension (or the file ends with a period), use
4806 # the base name for mapping. This allows us to set mappings for
4807 # files such as README or Makefile:
4808 if not extension:
4809 extension = basename
4811 mime_type = self.mappings.get(extension, None)
4812 if mime_type is not None:
4813 s_item.svn_props['svn:mime-type'] = mime_type
4816 class AutoPropsPropertySetter(SVNPropertySetter):
4817 """Set arbitrary svn properties based on an auto-props configuration.
4819 This class supports case-sensitive or case-insensitive pattern
4820 matching. The 'correct' behavior is not quite clear, because
4821 subversion itself does an inconsistent job of handling case in
4822 auto-props patterns; see
4823 http://subversion.tigris.org/issues/show_bug.cgi?id=2036.
4825 If a property specified in auto-props has already been set to a
4826 different value, print a warning and leave the old property value
4827 unchanged."""
4829 class Pattern:
4830 """Describes the properties to be set for files matching a pattern."""
4831 def __init__(self, pattern, propdict):
4832 # A glob-like pattern:
4833 self.pattern = pattern
4834 # A dictionary of properties that should be set:
4835 self.propdict = propdict
4837 def match(self, basename):
4838 """Does the file with the specified basename match pattern?"""
4839 return fnmatch.fnmatch(basename, self.pattern)
4841 def __init__(self, configfilename, ignore_case):
4842 config = ConfigParser.ConfigParser()
4843 if ignore_case:
4844 self.transform_case = self.squash_case
4845 else:
4846 config.optionxform = self.preserve_case
4847 self.transform_case = self.preserve_case
4849 config.readfp(file(configfilename))
4850 self.patterns = []
4851 for section in config.sections():
4852 if self.transform_case(section) == 'auto-props':
4853 for pattern in config.options(section):
4854 value = config.get(section, pattern)
4855 if value:
4856 self._add_pattern(pattern, value)
4858 def squash_case(self, s):
4859 return s.lower()
4861 def preserve_case(self, s):
4862 return s
4864 def _add_pattern(self, pattern, value):
4865 props = value.split(';')
4866 propdict = {}
4867 for prop in props:
4868 s = prop.split('=', 1)
4869 if len(s) == 1:
4870 propdict[s[0]] = None
4871 else:
4872 propdict[s[0]] = s[1]
4873 self.patterns.append(
4874 self.Pattern(self.transform_case(pattern), propdict))
4876 def get_propdict(self, path):
4877 basename = self.transform_case(os.path.basename(path))
4878 propdict = {}
4879 for pattern in self.patterns:
4880 if pattern.match(basename):
4881 for (key,value) in pattern.propdict.items():
4882 if propdict.has_key(key):
4883 if propdict[key] != value:
4884 Log().write(
4885 LOG_WARN,
4886 "Contradictory values set for property '%s' for file %s."
4887 % (k, path,))
4888 else:
4889 propdict[key] = value
4891 print 'propdict %s -> %s' % (path, propdict,) ###
4892 return propdict
4894 def set_properties(self, s_item):
4895 propdict = self.get_propdict(s_item.c_rev.cvs_path)
4896 for (k,v) in propdict.items():
4897 if s_item.svn_props.has_key(k):
4898 if s_item.svn_props[k] != v:
4899 Log().write(
4900 LOG_WARN,
4901 "Property '%s' already set for file %s."
4902 % (k, s_item.c_rev.cvs_path,))
4903 else:
4904 s_item.svn_props[k] = v
4907 class BinaryFileDefaultMimeTypeSetter(SVNPropertySetter):
4908 """If the file is binary and its svn:mime-type property is not yet
4909 set, set it to 'application/octet-stream'."""
4911 def set_properties(self, s_item):
4912 if not s_item.svn_props.has_key('svn:mime-type') \
4913 and s_item.c_rev.mode == 'b':
4914 s_item.svn_props['svn:mime-type'] = 'application/octet-stream'
4917 class EOLStyleFromMimeTypeSetter(SVNPropertySetter):
4918 """Set svn:eol-style based on svn:mime-type.
4920 If svn:mime-type is known but svn:eol-style is not, then set
4921 svn:eol-style based on svn:mime-type as follows: if svn:mime-type
4922 starts with 'text/', then set svn:eol-style to native; otherwise,
4923 force it to remain unset. See also issue #39."""
4925 def set_properties(self, s_item):
4926 if not s_item.svn_props.has_key('svn:eol-style') \
4927 and s_item.svn_props.get('svn:mime-type', None) is not None:
4928 if s_item.svn_props['svn:mime-type'].startswith("text/"):
4929 s_item.svn_props['svn:eol-style'] = 'native'
4930 else:
4931 s_item.svn_props['svn:eol-style'] = None
4934 class DefaultEOLStyleSetter(SVNPropertySetter):
4935 """Set the eol-style if one has not already been set."""
4937 def __init__(self, value):
4938 """Initialize with the specified default VALUE."""
4940 self.value = value
4942 def set_properties(self, s_item):
4943 if not s_item.svn_props.has_key('svn:eol-style'):
4944 s_item.svn_props['svn:eol-style'] = self.value
4947 class KeywordsPropertySetter(SVNPropertySetter):
4948 """If the svn:keywords property is not yet set, set it based on the
4949 file's mode. See issue #2."""
4951 def __init__(self, value):
4952 """Use VALUE for the value of the svn:keywords property if it is
4953 to be set."""
4955 self.value = value
4957 def set_properties(self, s_item):
4958 if not s_item.svn_props.has_key('svn:keywords') \
4959 and s_item.c_rev.mode in [None, 'kv', 'kvl']:
4960 s_item.svn_props['svn:keywords'] = self.value
4963 def convert(start_pass, end_pass):
4964 "Convert a CVS repository to an SVN repository."
4966 cleanup = Cleanup()
4967 times = [ None ] * (end_pass + 1)
4968 times[start_pass - 1] = time.time()
4969 StatsKeeper().set_start_time(time.time())
4970 for i in range(start_pass - 1, end_pass):
4971 Log().write(LOG_QUIET, '----- pass %d -----' % (i + 1))
4972 _passes[i]()
4973 times[i + 1] = time.time()
4974 StatsKeeper().log_duration_for_pass(times[i + 1] - times[i], i + 1)
4975 # Dispose of items in Ctx() not intended to live past the end of the pass
4976 # (Identified by exactly one leading underscore)
4977 for attr in dir(Ctx()):
4978 if (len(attr) > 2 and attr[0] == '_' and attr[1] != '_'
4979 and attr[:6] != "_Ctx__"):
4980 delattr(Ctx(), attr)
4981 if not Ctx().skip_cleanup:
4982 cleanup.cleanup(_passes[i])
4983 StatsKeeper().set_end_time(time.time())
4985 Log().write(LOG_QUIET, StatsKeeper())
4986 if end_pass < 4:
4987 Log().write(LOG_QUIET,
4988 '(These are unaltered CVS repository stats and do not\n'
4989 ' reflect tags or branches excluded via --exclude)\n')
4990 Log().write(LOG_NORMAL, StatsKeeper().timings())
4993 def normalize_ttb_path(opt, path):
4994 """Normalize a path to be used for --trunk, --tags, or --branches.
4996 1. Strip leading, trailing, and duplicated '/'.
4997 2. Verify that the path is not empty.
4999 Return the normalized path.
5001 If the path is invalid, write an error message and exit."""
5003 norm_path = _path_join(*path.split('/'))
5004 if not norm_path:
5005 raise FatalError("cannot pass an empty path to %s." % (opt,))
5006 return norm_path
5009 def verify_paths_disjoint(*paths):
5010 """Verify that all of the paths in the argument list are disjoint.
5012 If any of the paths is nested in another one (i.e., in the sense
5013 that 'a/b/c/d' is nested in 'a/b'), or any two paths are identical,
5014 write an error message and exit."""
5016 paths = [(path.split('/'), path) for path in paths]
5017 # If all overlapping elements are equal, a shorter list is
5018 # considered "less than" a longer one. Therefore if any paths are
5019 # nested, this sort will leave at least one such pair adjacent, in
5020 # the order [nest,nestling].
5021 paths.sort()
5022 for i in range(1, len(paths)):
5023 split_path1, path1 = paths[i - 1]
5024 split_path2, path2 = paths[i]
5025 if len(split_path1) <= len(split_path2) \
5026 and split_path2[:len(split_path1)] == split_path1:
5027 raise FatalError("paths %s and %s are not disjoint." % (path1, path2,))
5030 def usage():
5031 print 'USAGE: %s [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
5032 % os.path.basename(sys.argv[0])
5033 print ' --help, -h print this usage message and exit with success'
5034 print ' --version print the version number'
5035 print ' -q quiet'
5036 print ' -v verbose'
5037 print ' -s PATH path for SVN repos'
5038 print ' -p START[:END] start at pass START, end at pass END of %d' \
5039 % len(_passes)
5040 print ' If only START is given, run only pass START'
5041 print ' (implicitly enables --skip-cleanup)'
5042 print ' --existing-svnrepos load into existing SVN repository'
5043 print ' --dumpfile=PATH name of intermediate svn dumpfile'
5044 print ' --tmpdir=PATH directory to use for tmp data (default to cwd)'
5045 print ' --profile profile with \'hotshot\' (into file cvs2svn.hotshot)'
5046 print ' --dry-run do not create a repository or a dumpfile;'
5047 print ' just print what would happen.'
5048 print ' --use-cvs use CVS instead of RCS \'co\' to extract data'
5049 print ' (only use this if having problems with RCS)'
5050 print ' --svnadmin=PATH path to the svnadmin program'
5051 print ' --trunk-only convert only trunk commits, not tags nor branches'
5052 print ' --trunk=PATH path for trunk (default: %s)' \
5053 % Ctx().trunk_base
5054 print ' --branches=PATH path for branches (default: %s)' \
5055 % Ctx().branches_base
5056 print ' --tags=PATH path for tags (default: %s)' \
5057 % Ctx().tags_base
5058 print ' --no-prune don\'t prune empty directories'
5059 print ' --dump-only just produce a dumpfile, don\'t commit to a repos'
5060 print ' --encoding=ENC encoding of paths and log messages in CVS repos'
5061 print ' Multiple of these options may be passed, where they'
5062 print ' will be treated as an ordered list of encodings to'
5063 print ' attempt (with "ascii" as a hardcoded last resort)'
5064 print ' --force-branch=NAME force NAME to be a branch'
5065 print ' --force-tag=NAME force NAME to be a tag'
5066 print ' --exclude=REGEXP exclude branches and tags matching REGEXP'
5067 print ' --symbol-transform=P:S transform symbol names from P to S where P and S'
5068 print ' use Python regexp and reference syntax respectively'
5069 print ' --username=NAME username for cvs2svn-synthesized commits'
5070 print ' --skip-cleanup prevent the deletion of intermediate files'
5071 print ' --bdb-txn-nosync pass --bdb-txn-nosync to "svnadmin create"'
5072 print ' --fs-type=TYPE pass --fs-type=TYPE to "svnadmin create"'
5073 print ' --cvs-revnums record CVS revision numbers as file properties'
5074 print ' --auto-props=FILE set file properties from the auto-props section'
5075 print ' of a file in svn config format'
5076 print ' --auto-props-ignore-case Ignore case when matching auto-props patterns'
5077 print ' --mime-types=FILE specify an apache-style mime.types file for'
5078 print ' setting svn:mime-type'
5079 print ' --eol-from-mime-type set svn:eol-style from mime type if known'
5080 print ' --no-default-eol don\'t set svn:eol-style to \'native\' for'
5081 print ' non-binary files with undetermined mime types'
5082 print ' --keywords-off don\'t set svn:keywords on any files (by default,'
5083 print ' cvs2svn sets svn:keywords on non-binary files to'
5084 print ' "%s")' % SVN_KEYWORDS_VALUE
5086 def main():
5087 # Convenience var, so we don't have to keep instantiating this Borg.
5088 ctx = Ctx()
5090 profiling = None
5091 start_pass = 1
5092 end_pass = len(_passes)
5094 try:
5095 opts, args = getopt.getopt(sys.argv[1:], 'p:s:qvh',
5096 [ "help", "create", "trunk=",
5097 "username=", "existing-svnrepos",
5098 "branches=", "tags=", "encoding=",
5099 "force-branch=", "force-tag=", "exclude=",
5100 "use-cvs", "mime-types=",
5101 "auto-props=", "auto-props-ignore-case",
5102 "eol-from-mime-type", "no-default-eol",
5103 "trunk-only", "no-prune", "dry-run",
5104 "dump-only", "dumpfile=", "tmpdir=",
5105 "svnadmin=", "skip-cleanup", "cvs-revnums",
5106 "bdb-txn-nosync", "fs-type=",
5107 "version", "profile",
5108 "keywords-off", "symbol-transform="])
5109 except getopt.GetoptError, e:
5110 sys.stderr.write(error_prefix + ': ' + str(e) + '\n\n')
5111 usage()
5112 sys.exit(1)
5114 for opt, value in opts:
5115 if opt == '--version':
5116 print '%s version %s' % (os.path.basename(sys.argv[0]), VERSION)
5117 sys.exit(0)
5118 elif opt == '-p':
5119 # Don't cleanup if we're doing incrementals.
5120 ctx.skip_cleanup = 1
5121 if value.find(':') > 0:
5122 start_pass, end_pass = map(int, value.split(':'))
5123 else:
5124 end_pass = start_pass = int(value)
5125 if start_pass > len(_passes) or start_pass < 1:
5126 raise FatalError(
5127 'illegal value (%d) for starting pass. Must be 1 through %d.'
5128 % (int(start_pass), len(_passes),))
5129 if end_pass < start_pass or end_pass > len(_passes):
5130 raise FatalError(
5131 'illegal value (%d) for ending pass. Must be %d through %d.'
5132 % (int(end_pass), int(start_pass), len(_passes),))
5133 elif (opt == '--help') or (opt == '-h'):
5134 ctx.print_help = 1
5135 elif opt == '-v':
5136 Log().log_level = LOG_VERBOSE
5137 ctx.verbose = 1
5138 elif opt == '-q':
5139 Log().log_level = LOG_QUIET
5140 ctx.quiet = 1
5141 elif opt == '-s':
5142 ctx.target = value
5143 elif opt == '--existing-svnrepos':
5144 ctx.existing_svnrepos = 1
5145 elif opt == '--dumpfile':
5146 ctx.dumpfile = value
5147 elif opt == '--tmpdir':
5148 ctx.tmpdir = value
5149 elif opt == '--use-cvs':
5150 ctx.use_cvs = 1
5151 elif opt == '--svnadmin':
5152 ctx.svnadmin = value
5153 elif opt == '--trunk-only':
5154 ctx.trunk_only = 1
5155 elif opt == '--trunk':
5156 ctx.trunk_base = normalize_ttb_path(opt, value)
5157 elif opt == '--branches':
5158 ctx.branches_base = normalize_ttb_path(opt, value)
5159 elif opt == '--tags':
5160 ctx.tags_base = normalize_ttb_path(opt, value)
5161 elif opt == '--no-prune':
5162 ctx.prune = None
5163 elif opt == '--dump-only':
5164 ctx.dump_only = 1
5165 elif opt == '--dry-run':
5166 ctx.dry_run = 1
5167 elif opt == '--encoding':
5168 ctx.encoding.insert(-1, value)
5169 elif opt == '--force-branch':
5170 ctx.forced_branches.append(value)
5171 elif opt == '--force-tag':
5172 ctx.forced_tags.append(value)
5173 elif opt == '--exclude':
5174 try:
5175 ctx.excludes.append(re.compile('^' + value + '$'))
5176 except re.error, e:
5177 raise FatalError("'%s' is not a valid regexp." % (value,))
5178 elif opt == '--mime-types':
5179 ctx.mime_types_file = value
5180 elif opt == '--auto-props':
5181 ctx.auto_props_file = value
5182 elif opt == '--auto-props-ignore-case':
5183 ctx.auto_props_ignore_case = True
5184 elif opt == '--eol-from-mime-type':
5185 ctx.eol_from_mime_type = 1
5186 elif opt == '--no-default-eol':
5187 ctx.no_default_eol = 1
5188 elif opt == '--keywords-off':
5189 ctx.keywords_off = 1
5190 elif opt == '--username':
5191 ctx.username = value
5192 elif opt == '--skip-cleanup':
5193 ctx.skip_cleanup = 1
5194 elif opt == '--cvs-revnums':
5195 ctx.svn_property_setters.append(CVSRevisionNumberSetter())
5196 elif opt == '--bdb-txn-nosync':
5197 ctx.bdb_txn_nosync = 1
5198 elif opt == '--fs-type':
5199 ctx.fs_type = value
5200 elif opt == '--create':
5201 sys.stderr.write(warning_prefix +
5202 ': The behaviour produced by the --create option is now the '
5203 'default,\nand passing the option is deprecated.\n')
5204 elif opt == '--profile':
5205 profiling = 1
5206 elif opt == '--symbol-transform':
5207 [pattern, replacement] = value.split(":")
5208 try:
5209 pattern = re.compile(pattern)
5210 except re.error, e:
5211 raise FatalError("'%s' is not a valid regexp." % (pattern,))
5212 ctx.symbol_transforms.append((pattern, replacement,))
5214 if ctx.print_help:
5215 usage()
5216 sys.exit(0)
5218 # Consistency check for options and arguments.
5219 if len(args) == 0:
5220 usage()
5221 sys.exit(1)
5223 if len(args) > 1:
5224 sys.stderr.write(error_prefix +
5225 ": must pass only one CVS repository.\n")
5226 usage()
5227 sys.exit(1)
5229 cvsroot = args[0]
5231 if ctx.use_cvs:
5232 ctx.cvs_repository = CVSRepositoryViaCVS(cvsroot)
5233 else:
5234 ctx.cvs_repository = CVSRepositoryViaRCS(cvsroot)
5236 if (not ctx.target) and (not ctx.dump_only) and (not ctx.dry_run):
5237 raise FatalError("must pass one of '-s' or '--dump-only'.")
5239 def not_both(opt1val, opt1name, opt2val, opt2name):
5240 if opt1val and opt2val:
5241 raise FatalError("cannot pass both '%s' and '%s'."
5242 % (opt1name, opt2name,))
5244 not_both(ctx.target, '-s',
5245 ctx.dump_only, '--dump-only')
5247 not_both(ctx.dump_only, '--dump-only',
5248 ctx.existing_svnrepos, '--existing-svnrepos')
5250 not_both(ctx.bdb_txn_nosync, '--bdb-txn-nosync',
5251 ctx.existing_svnrepos, '--existing-svnrepos')
5253 not_both(ctx.dump_only, '--dump-only',
5254 ctx.bdb_txn_nosync, '--bdb-txn-nosync')
5256 not_both(ctx.quiet, '-q',
5257 ctx.verbose, '-v')
5259 not_both(ctx.fs_type, '--fs-type',
5260 ctx.existing_svnrepos, '--existing-svnrepos')
5262 if ctx.fs_type and ctx.fs_type != 'bdb' and ctx.bdb_txn_nosync:
5263 raise FatalError("cannot pass --bdb-txn-nosync with --fs-type=%s."
5264 % ctx.fs_type)
5266 # Create the default project (using ctx.trunk, ctx.branches, and ctx.tags):
5267 ctx.project = Project(ctx.cvs_repository.cvs_repos_path,
5268 ctx.trunk_base, ctx.branches_base, ctx.tags_base)
5270 if ctx.existing_svnrepos and not os.path.isdir(ctx.target):
5271 raise FatalError("the svn-repos-path '%s' is not an "
5272 "existing directory." % ctx.target)
5274 if not ctx.dump_only and not ctx.existing_svnrepos \
5275 and (not ctx.dry_run) and os.path.exists(ctx.target):
5276 raise FatalError("the svn-repos-path '%s' exists.\n"
5277 "Remove it, or pass '--existing-svnrepos'."
5278 % ctx.target)
5280 if ctx.target and not ctx.dry_run:
5281 # Verify that svnadmin can be executed. The 'help' subcommand
5282 # should be harmless.
5283 try:
5284 check_command_runs([ctx.svnadmin, 'help'], 'svnadmin')
5285 except CommandFailedException, e:
5286 raise FatalError(
5287 '%s\n'
5288 'svnadmin could not be executed. Please ensure that it is\n'
5289 'installed and/or use the --svnadmin option.' % (e,))
5291 ctx.svn_property_setters.append(ExecutablePropertySetter())
5293 ctx.svn_property_setters.append(BinaryFileEOLStyleSetter())
5295 if ctx.mime_types_file:
5296 ctx.svn_property_setters.append(MimeMapper(ctx.mime_types_file))
5298 if ctx.auto_props_file:
5299 ctx.svn_property_setters.append(AutoPropsPropertySetter(
5300 ctx.auto_props_file, ctx.auto_props_ignore_case))
5302 ctx.svn_property_setters.append(BinaryFileDefaultMimeTypeSetter())
5304 if ctx.eol_from_mime_type:
5305 ctx.svn_property_setters.append(EOLStyleFromMimeTypeSetter())
5307 if ctx.no_default_eol:
5308 ctx.svn_property_setters.append(DefaultEOLStyleSetter(None))
5309 else:
5310 ctx.svn_property_setters.append(DefaultEOLStyleSetter('native'))
5312 if not ctx.keywords_off:
5313 ctx.svn_property_setters.append(
5314 KeywordsPropertySetter(SVN_KEYWORDS_VALUE))
5316 # Make sure the tmp directory exists. Note that we don't check if
5317 # it's empty -- we want to be able to use, for example, "." to hold
5318 # tempfiles. But if we *did* want check if it were empty, we'd do
5319 # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
5320 if not os.path.exists(ctx.tmpdir):
5321 os.mkdir(ctx.tmpdir)
5322 elif not os.path.isdir(ctx.tmpdir):
5323 raise FatalError(
5324 "cvs2svn tried to use '%s' for temporary files, but that path\n"
5325 " exists and is not a directory. Please make it be a directory,\n"
5326 " or specify some other directory for temporary files."
5327 % (ctx.tmpdir,))
5329 # But do lock the tmpdir, to avoid process clash.
5330 try:
5331 os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
5332 except OSError, e:
5333 if e.errno == errno.EACCES:
5334 raise FatalError("Permission denied:"
5335 + " No write access to directory '%s'." % ctx.tmpdir)
5336 if e.errno == errno.EEXIST:
5337 raise FatalError(
5338 "cvs2svn is using directory '%s' for temporary files, but\n"
5339 " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
5340 " cvs2svn process is currently using '%s' as its temporary\n"
5341 " workspace. If you are certain that is not the case,\n"
5342 " then remove the '%s/cvs2svn.lock' subdirectory."
5343 % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir,))
5344 raise
5345 try:
5346 if profiling:
5347 import hotshot
5348 prof = hotshot.Profile('cvs2svn.hotshot')
5349 prof.runcall(convert, start_pass, end_pass)
5350 prof.close()
5351 else:
5352 convert(start_pass, end_pass)
5353 finally:
5354 try: os.rmdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
5355 except: pass
5358 if __name__ == '__main__':
5359 try:
5360 main()
5361 except FatalException, e:
5362 sys.stderr.write(str(e))
5363 sys.exit(1)