Remove some explicit .keys() invocations which are no longer required now that
[cvs2svn.git] / cvs2svn
blob73f0f525e4e47c3397df20a47c447f8016e24074
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # cvs2svn: ...
6 # ====================================================================
7 # Copyright (c) 2000-2004 CollabNet. All rights reserved.
9 # This software is licensed as described in the file COPYING, which
10 # you should have received as part of this distribution. The terms
11 # are also available at http://subversion.tigris.org/license-1.html.
12 # If newer versions of this license are posted there, you may use a
13 # newer version instead, at your option.
15 # This software consists of voluntary contributions made by many
16 # individuals. For exact contribution history, see the revision
17 # history and logs, available at http://cvs2svn.tigris.org/.
18 # ====================================================================
20 VERSION = 'r' + "$LastChangedRevision$"[22:-2]
22 import cvs2svn_rcsparse
23 import os
24 import sys
25 import sha
26 import re
27 import time
28 import fileinput
29 import fnmatch
30 import string
31 import getopt
32 import stat
33 import md5
34 import marshal
35 import errno
36 import popen2
37 import types
38 import ConfigParser
39 try:
40 # Try to get access to a bunch of encodings for use with --encoding.
41 # See http://cjkpython.i18n.org/ for details.
42 import iconv_codec
43 except ImportError:
44 pass
46 # Warnings and errors start with these strings. They are typically
47 # followed by a colon and a space, as in "%s: " ==> "WARNING: ".
48 warning_prefix = "WARNING"
49 error_prefix = "ERROR"
51 # Make sure this Python is recent enough.
52 if sys.hexversion < 0x02020000:
53 sys.stderr.write("'%s: Python 2.2 or higher required, "
54 "see www.python.org.\n" % error_prefix)
55 sys.exit(1)
57 # Pretend we have true booleans on older python versions
58 try:
59 True
60 except:
61 True = 1
62 False = 0
64 # Opening pipes was a mess before Python 2.4, because some methods did
65 # not exist on some platforms, and some behaved differenly on other.
66 # Python 2.4 solved this by adding the subprocess module, but since we
67 # cannot require such a new version, we cannot use it directly, but
68 # must implement a simplified Popen using the best means neccessary.
70 # The SimplePopen class only has the following members and methods, all
71 # behaving as documented in the subprocess.Popen class:
72 # - stdin
73 # - stdout
74 # - stderr
75 # - wait
76 try:
77 # First try subprocess.Popen...
78 import subprocess
79 class SimplePopen:
80 def __init__(self, cmd, capture_stderr):
81 if capture_stderr:
82 stderr = subprocess.PIPE
83 else:
84 stderr = None
85 self._popen = subprocess.Popen(cmd, stdin=subprocess.PIPE,
86 stdout=subprocess.PIPE, stderr=stderr)
87 self.stdin = self._popen.stdin
88 self.stdout = self._popen.stdout
89 if capture_stderr:
90 self.stderr = self._popen.stderr
91 self.wait = self._popen.wait
92 except ImportError:
93 if hasattr(popen2, 'Popen3'):
94 # ...then try popen2.Popen3...
95 class SimplePopen:
96 def __init__(self, cmd, capture_stderr):
97 self._popen3 = popen2.Popen3(cmd, capture_stderr)
98 self.stdin = self._popen3.tochild
99 self.stdout = self._popen3.fromchild
100 if capture_stderr:
101 self.stderr = self._popen3.childerr
102 self.wait = self._popen3.wait
103 else:
104 # ...and if all fails, use popen2.popen3...
105 class SimplePopen:
106 def __init__(self, cmd, capture_stderr):
107 if type(cmd) != types.StringType:
108 cmd = argv_to_command_string(cmd)
109 self.stdout, self.stdin, self.stderr = popen2.popen3(cmd, mode='b')
110 def wait(self):
111 return self.stdout.close() or self.stdin.close() or \
112 self.stderr.close()
114 # DBM module selection
116 # 1. If we have bsddb3, it is probably newer than bsddb. Fake bsddb = bsddb3,
117 # so that the dbhash module used by anydbm will use bsddb3.
118 try:
119 import bsddb3
120 sys.modules['bsddb'] = sys.modules['bsddb3']
121 except ImportError:
122 pass
124 # 2. These DBM modules are not good for cvs2svn.
125 import anydbm
126 if (anydbm._defaultmod.__name__ == 'dumbdbm'
127 or anydbm._defaultmod.__name__ == 'dbm'):
128 sys.stderr.write(
129 error_prefix
130 + ': your installation of Python does not contain a suitable\n'
131 + 'DBM module -- cvs2svn cannot continue.\n'
132 + 'See http://python.org/doc/current/lib/module-anydbm.html to solve.\n')
133 sys.exit(1)
135 # 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
136 # Unfortunately, gdbm appears not to be trouble free, either.
137 if hasattr(anydbm._defaultmod, 'bsddb') \
138 and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
139 try:
140 gdbm = __import__('gdbm')
141 except ImportError:
142 sys.stderr.write(warning_prefix +
143 ': The version of the bsddb module found '
144 'on your computer has been reported to malfunction on some datasets, '
145 'causing KeyError exceptions. You may wish to upgrade your Python to '
146 'version 2.3 or later.\n')
147 else:
148 anydbm._defaultmod = gdbm
150 trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
151 branch_tag = re.compile('^[0-9.]+\\.0\\.[0-9]+$')
152 vendor_tag = re.compile('^[0-9]+\\.[0-9]+\\.[0-9]+$')
154 SVN_KEYWORDS_VALUE = 'Author Date Id Revision'
156 # This really only matches standard '1.1.1.*'-style vendor revisions.
157 # One could conceivably have a file whose default branch is 1.1.3 or
158 # whatever, or was that at some point in time, with vendor revisions
159 # 1.1.3.1, 1.1.3.2, etc. But with the default branch gone now (which
160 # is the only time this regexp gets used), we'd have no basis for
161 # assuming that the non-standard vendor branch had ever been the
162 # default branch anyway, so we don't want this to match them anyway.
163 vendor_revision = re.compile('^(1\\.1\\.1)\\.([0-9])+$')
165 # If this run's output is a repository, then (in the tmpdir) we use
166 # a dumpfile of this name for repository loads.
168 # If this run's output is a dumpfile, then this is default name of
169 # that dumpfile, but in the current directory (unless the user has
170 # specified a dumpfile path, of course, in which case it will be
171 # wherever the user said).
172 DUMPFILE = 'cvs2svn-dump'
174 # This file appears with different suffixes at different stages of
175 # processing. CVS revisions are cleaned and sorted here, for commit
176 # grouping. See design-notes.txt for details.
177 DATAFILE = 'cvs2svn-data'
179 # This file contains a marshalled copy of all the statistics that we
180 # gather throughout the various runs of cvs2svn. The data stored as a
181 # marshalled dictionary.
182 STATISTICS_FILE = 'cvs2svn-statistics'
184 # This text file contains records (1 per line) that describe svn
185 # filesystem paths that are the opening and closing source revisions
186 # for copies to tags and branches. The format is as follows:
188 # SYMBOL_NAME SVN_REVNUM TYPE SVN_PATH
190 # Where type is either OPENING or CLOSING. The SYMBOL_NAME and
191 # SVN_REVNUM are the primary and secondary sorting criteria for
192 # creating SYMBOL_OPENINGS_CLOSINGS_SORTED.
193 SYMBOL_OPENINGS_CLOSINGS = 'cvs2svn-symbolic-names.txt'
194 # A sorted version of the above file.
195 SYMBOL_OPENINGS_CLOSINGS_SORTED = 'cvs2svn-symbolic-names-s.txt'
197 # This file is a temporary file for storing symbolic_name -> closing
198 # CVSRevision until the end of our pass where we can look up the
199 # corresponding SVNRevNum for the closing revs and write these out to
200 # the SYMBOL_OPENINGS_CLOSINGS.
201 SYMBOL_CLOSINGS_TMP = 'cvs2svn-symbolic-names-closings-tmp.txt'
203 # Skeleton version of an svn filesystem.
204 # (These supersede and will eventually replace the two above.)
205 # See class SVNRepositoryMirror for how these work.
206 SVN_MIRROR_REVISIONS_DB = 'cvs2svn-svn-revisions.db'
207 SVN_MIRROR_NODES_DB = 'cvs2svn-svn-nodes.db'
209 # Offsets pointing to the beginning of each SYMBOLIC_NAME in
210 # SYMBOL_OPENINGS_CLOSINGS_SORTED
211 SYMBOL_OFFSETS_DB = 'cvs2svn-symbolic-name-offsets.db'
213 # Maps CVSRevision.unique_key()s to lists of symbolic names, where
214 # the CVSRevision is the last such that is a source for those symbolic
215 # names. For example, if branch B's number is 1.3.0.2 in this CVS
216 # file, and this file's 1.3 is the latest (by date) revision among
217 # *all* CVS files that is a source for branch B, then the
218 # CVSRevision.unique_key() corresponding to this file at 1.3 would
219 # list at least B in its list.
220 SYMBOL_LAST_CVS_REVS_DB = 'cvs2svn-symbol-last-cvs-revs.db'
222 # Maps CVSRevision.unique_key() to corresponding line in s-revs.
223 ###PERF Or, we could map to an offset into s-revs, instead of dup'ing
224 ### the s-revs data in this database.
225 CVS_REVS_DB = 'cvs2svn-cvs-revs.db'
227 # Lists all symbolic names that are tags. Keys are strings (symbolic
228 # names), values are ignorable.
229 TAGS_DB = 'cvs2svn-tags.db'
231 # A list all tags. Each line consists of the tag name and the number
232 # of files in which it exists, separated by a space.
233 TAGS_LIST = 'cvs2svn-tags.txt'
235 # A list of all branches. The file is stored as a plain text file
236 # to make it easy to look at in an editor. Each line contains the
237 # branch name, the number of files where the branch is created, the
238 # commit count, and a list of tags and branches that are defined on
239 # revisions in the branch.
240 BRANCHES_LIST = 'cvs2svn-branches.txt'
242 # These two databases provide a bidirectional mapping between
243 # CVSRevision.unique_key()s and Subversion revision numbers.
245 # The first maps CVSRevision.unique_key() to a number; the values are
246 # not unique.
248 # The second maps a number to a list of CVSRevision.unique_key()s.
249 CVS_REVS_TO_SVN_REVNUMS = 'cvs2svn-cvs-revs-to-svn-revnums.db'
250 SVN_REVNUMS_TO_CVS_REVS = 'cvs2svn-svn-revnums-to-cvs-revs.db'
252 # This database maps svn_revnums to tuples of (symbolic_name, date).
254 # The svn_revnums are the revision numbers of all non-primary
255 # SVNCommits. No primary SVNCommit has a key in this database.
257 # The date is stored for all commits in this database.
259 # For commits that fill symbolic names, the symbolic_name is stored.
260 # For commits that default branch syncs, the symbolic_name is None.
261 SVN_COMMIT_NAMES_DATES = 'cvs2svn-svn-commit-names-and-dates.db'
263 # This database maps svn_revnums of a default branch synchronization
264 # commit to the svn_revnum of the primary SVNCommit that motivated it.
266 # (NOTE: Secondary commits that fill branches and tags also have a
267 # motivating commit, but we do not record it because it is (currently)
268 # not needed for anything.)
270 # This mapping is used when generating the log message for the commit
271 # that synchronizes the default branch with trunk.
272 MOTIVATING_REVNUMS = 'cvs2svn-svn-motivating-commit-revnums.db'
274 # How many bytes to read at a time from a pipe. 128 kiB should be
275 # large enough to be efficient without wasting too much memory.
276 PIPE_READ_SIZE = 128 * 1024
278 # Record the default RCS branches, if any, for CVS filepaths.
280 # The keys are CVS filepaths, relative to the top of the repository
281 # and with the ",v" stripped off, so they match the cvs paths used in
282 # Commit.commit(). The values are vendor branch revisions, such as
283 # '1.1.1.1', or '1.1.1.2', or '1.1.1.96'. The vendor branch revision
284 # represents the highest vendor branch revision thought to have ever
285 # been head of the default branch.
287 # The reason we record a specific vendor revision, rather than a
288 # default branch number, is that there are two cases to handle:
290 # One case is simple. The RCS file lists a default branch explicitly
291 # in its header, such as '1.1.1'. In this case, we know that every
292 # revision on the vendor branch is to be treated as head of trunk at
293 # that point in time.
295 # But there's also a degenerate case. The RCS file does not currently
296 # have a default branch, yet we can deduce that for some period in the
297 # past it probably *did* have one. For example, the file has vendor
298 # revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2,
299 # and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2. In this
300 # case, we should record 1.1.1.96 as the last vendor revision to have
301 # been the head of the default branch.
302 DEFAULT_BRANCHES_DB = 'cvs2svn-default-branches.db'
304 # Records the author and log message for each changeset.
305 # The keys are author+log digests, the same kind used to identify
306 # unique revisions in the .revs, etc files. Each value is a tuple
307 # of two elements: '(author logmessage)'.
308 METADATA_DB = "cvs2svn-metadata.db"
310 # A temporary on-disk hash that maps CVSRevision unique keys to a new
311 # timestamp for that CVSRevision. These new timestamps are created in
312 # pass2, and this hash is used exclusively in pass2.
313 TWEAKED_TIMESTAMPS_DB = "cvs2svn-fixed-timestamps.db"
315 REVS_SUFFIX = '.revs'
316 CLEAN_REVS_SUFFIX = '.c-revs'
317 SORTED_REVS_SUFFIX = '.s-revs'
318 RESYNC_SUFFIX = '.resync'
320 SVN_INVALID_REVNUM = -1
322 COMMIT_THRESHOLD = 5 * 60 # flush a commit if a 5 minute gap occurs
324 # Things that can happen to a file.
325 OP_NOOP = '-'
326 OP_ADD = 'A'
327 OP_DELETE = 'D'
328 OP_CHANGE = 'C'
330 # A deltatext either does or doesn't represent some change.
331 DELTATEXT_NONEMPTY = 'N'
332 DELTATEXT_EMPTY = 'E'
334 DIGEST_END_IDX = 9 + (sha.digestsize * 2)
336 # Constants used in SYMBOL_OPENINGS_CLOSINGS
337 OPENING = 'O'
338 CLOSING = 'C'
340 class FatalException(Exception):
341 """Exception thrown on a non-recoverable error.
343 If this exception is thrown by main(), it is caught by the global
344 layer of the program, its string representation is printed, and the
345 program is ended with an exit code of 1."""
347 pass
350 class FatalError(FatalException):
351 """A FatalException that prepends error_prefix to the message."""
353 def __init__(self, msg):
354 """Use (error_prefix + ': ' + MSG + '\n') as the error message."""
356 FatalException.__init__(self, '%s: %s\n' % (error_prefix, msg,))
359 def temp(basename):
360 """Return a path to BASENAME in Ctx().tmpdir.
361 This is a convenience function to save horizontal space in source."""
362 return os.path.join(Ctx().tmpdir, basename)
364 # Since the unofficial set also includes [/\] we need to translate those
365 # into ones that don't conflict with Subversion limitations.
366 def _clean_symbolic_name(name):
367 """Return symbolic name NAME, translating characters that Subversion
368 does not allow in a pathname."""
369 name = name.replace('/','++')
370 name = name.replace('\\','--')
371 return name
373 def _path_join(*components):
374 """Join two or more pathname COMPONENTS, inserting '/' as needed.
375 Empty component are skipped."""
376 return string.join(filter(None, components), '/')
378 def _path_split(path):
379 """Split the svn pathname PATH into a pair, (HEAD, TAIL).
381 This is similar to os.path.split(), but always uses '/' as path
382 separator. PATH is an svn path, which should not start with a '/'.
383 HEAD is everything before the last slash, and TAIL is everything
384 after. If PATH ends in a slash, TAIL will be empty. If there is no
385 slash in PATH, HEAD will be empty. If PATH is empty, both HEAD and
386 TAIL are empty."""
388 pos = path.rfind('/')
389 if pos == -1:
390 return ('', path,)
391 else:
392 return (path[:pos], path[pos+1:],)
394 def to_utf8(value, mode='replace'):
395 """Encode (as Unicode) VALUE, trying the encodings in Ctx.encoding
396 as valid source encodings. Raise UnicodeError on failure of all
397 source encodings."""
398 ### FIXME: The 'replace' default mode should be an option,
399 ### like --encoding is.
400 for encoding in Ctx().encoding:
401 try:
402 return unicode(value, encoding, mode).encode('utf8')
403 except UnicodeError:
404 Log().write(LOG_VERBOSE, "Encoding '%s' failed for string '%s'"
405 % (encoding, value))
406 raise UnicodeError
408 def run_command(command):
409 if os.system(command):
410 raise FatalError('Command failed: "%s"' % (command,))
413 class CommandFailedException(Exception):
414 """Exception raised if check_command_runs() fails."""
416 pass
419 def check_command_runs(cmd, cmdname):
420 """Check whether the command CMD can be executed without errors.
422 CMD is a list or string, as accepted by SimplePopen. CMDNAME is the
423 name of the command as it should be included in exception error
424 messages.
426 This function checks three things: (1) the command can be run
427 without throwing an OSError; (2) it exits with status=0; (3) it
428 doesn't output anything to stderr. If any of these conditions is
429 not met, raise a CommandFailedException describing the problem."""
431 try:
432 pipe = SimplePopen(cmd, True)
433 except OSError, e:
434 raise CommandFailedException('error executing %s: %s' % (cmdname, e,))
435 pipe.stdin.close()
436 pipe.stdout.read()
437 errmsg = pipe.stderr.read()
438 status = pipe.wait()
439 if status != 0 or errmsg:
440 msg = 'error executing %s: status %s' % (cmdname, status,)
441 if errmsg:
442 msg += ', error output:\n%s' % (errmsg,)
443 raise CommandFailedException(msg)
446 class CVSRepository:
447 """A CVS repository from which data can be extracted."""
449 def __init__(self, cvs_repos_path):
450 """CVS_REPOS_PATH is the top of the CVS repository (at least as
451 far as this run is concerned)."""
453 if not os.path.isdir(cvs_repos_path):
454 raise FatalError("The specified CVS repository path '%s' is not an "
455 "existing directory." % cvs_repos_path)
457 self.cvs_repos_path = os.path.normpath(cvs_repos_path)
458 self.cvs_prefix_re = re.compile(
459 r'^' + re.escape(self.cvs_repos_path)
460 + r'(' + re.escape(os.sep) + r'|$)')
462 def get_cvs_path(self, fname):
463 """Return the path to FNAME relative to cvs_repos_path, with ',v' removed.
465 FNAME is a filesystem name that has to be within
466 self.cvs_repos_path. Return the filename relative to
467 self.cvs_repos_path, with ',v' striped off if present, and with
468 os.sep converted to '/'."""
470 (tail, n) = self.cvs_prefix_re.subn('', fname, 1)
471 if n != 1:
472 raise FatalError(
473 "get_cvs_path: '%s' is not a sub-path of '%s'"
474 % (fname, self.cvs_repos_path,))
475 if tail.endswith(',v'):
476 tail = tail[:-2]
477 return string.replace(tail, os.sep, '/')
479 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
480 """Return a command string, and the pipe created using that
481 string. C_REV is a CVSRevision. If SUPPRESS_KEYWORD_SUBSTITUTION
482 is True, then suppress the substitution of RCS/CVS keywords in the
483 output. The pipe returns the text of that CVS Revision."""
484 raise NotImplementedError
487 class CVSRepositoryViaRCS(CVSRepository):
488 """A CVSRepository accessed via RCS."""
490 def __init__(self, cvs_repos_path):
491 CVSRepository.__init__(self, cvs_repos_path)
492 try:
493 check_command_runs([ 'co', '-V' ], 'co')
494 except CommandFailedException, e:
495 raise FatalError('%s\n'
496 'Please check that co is installed and in your PATH\n'
497 '(it is a part of the RCS software).' % (e,))
499 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
500 pipe_cmd = [ 'co', '-q', '-x,v', '-p' + c_rev.rev ]
501 if suppress_keyword_substitution:
502 pipe_cmd.append('-kk')
503 pipe_cmd.append(c_rev.rcs_path())
504 pipe = SimplePopen(pipe_cmd, True)
505 pipe.stdin.close()
506 return pipe_cmd, pipe
509 class CVSRepositoryViaCVS(CVSRepository):
510 """A CVSRepository accessed via CVS."""
512 def __init__(self, cvs_repos_path):
513 CVSRepository.__init__(self, cvs_repos_path)
514 # Ascend above the specified root if necessary, to find the
515 # cvs_repository_root (a directory containing a CVSROOT directory)
516 # and the cvs_module (the path of the conversion root within the
517 # cvs repository) NB: cvs_module must be seperated by '/' *not* by
518 # os.sep .
519 def is_cvs_repository_root(path):
520 return os.path.isdir(os.path.join(path, 'CVSROOT'))
522 self.cvs_repository_root = os.path.abspath(self.cvs_repos_path)
523 self.cvs_module = ""
524 while not is_cvs_repository_root(self.cvs_repository_root):
525 # Step up one directory:
526 prev_cvs_repository_root = self.cvs_repository_root
527 self.cvs_repository_root, module_component = \
528 os.path.split(self.cvs_repository_root)
529 if self.cvs_repository_root == prev_cvs_repository_root:
530 # Hit the root (of the drive, on Windows) without finding a
531 # CVSROOT dir.
532 raise FatalError(
533 "the path '%s' is not a CVS repository, nor a path "
534 "within a CVS repository. A CVS repository contains "
535 "a CVSROOT directory within its root directory."
536 % (self.cvs_repos_path,))
538 self.cvs_module = module_component + "/" + self.cvs_module
540 os.environ['CVSROOT'] = self.cvs_repository_root
542 def cvs_ok(global_arguments):
543 check_command_runs(
544 [ 'cvs' ] + global_arguments + [ '--version' ], 'cvs')
546 self.global_arguments = [ "-q", "-R" ]
547 try:
548 cvs_ok(self.global_arguments)
549 except CommandFailedException, e:
550 self.global_arguments = [ "-q" ]
551 try:
552 cvs_ok(self.global_arguments)
553 except CommandFailedException, e:
554 raise FatalError(
555 '%s\n'
556 'Please check that cvs is installed and in your PATH.' % (e,))
558 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
559 pipe_cmd = [ 'cvs' ] + self.global_arguments + \
560 [ 'co', '-r' + c_rev.rev, '-p' ]
561 if suppress_keyword_substitution:
562 pipe_cmd.append('-kk')
563 pipe_cmd.append(self.cvs_module + c_rev.cvs_path)
564 pipe = SimplePopen(pipe_cmd, True)
565 pipe.stdin.close()
566 return pipe_cmd, pipe
569 def generate_ignores(c_rev):
570 # Read in props
571 pipe_cmd, pipe = Ctx().cvs_repository.get_co_pipe(c_rev)
572 buf = pipe.stdout.read(PIPE_READ_SIZE)
573 raw_ignore_val = ""
574 while buf:
575 raw_ignore_val = raw_ignore_val + buf
576 buf = pipe.stdout.read(PIPE_READ_SIZE)
577 pipe.stdout.close()
578 error_output = pipe.stderr.read()
579 exit_status = pipe.wait()
580 if exit_status:
581 raise FatalError("The command '%s' failed with exit status: %s\n"
582 "and the following output:\n"
583 "%s" % (pipe_cmd, exit_status, error_output))
585 # Tweak props: First, convert any spaces to newlines...
586 raw_ignore_val = '\n'.join(raw_ignore_val.split())
587 raw_ignores = raw_ignore_val.split('\n')
588 ignore_vals = [ ]
589 for ignore in raw_ignores:
590 # Reset the list if we encounter a '!'
591 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
592 if ignore == '!':
593 ignore_vals = [ ]
594 continue
595 # Skip empty lines
596 if len(ignore) == 0:
597 continue
598 ignore_vals.append(ignore)
599 return ignore_vals
601 # Return a string that has not been returned by gen_key() before.
602 gen_key_base = 0L
603 def gen_key():
604 global gen_key_base
605 key = '%x' % gen_key_base
606 gen_key_base = gen_key_base + 1
607 return key
609 # ============================================================================
610 # This code is copied with a few modifications from:
611 # subversion/subversion/bindings/swig/python/svn/core.py
613 if sys.platform == "win32":
614 _escape_shell_arg_re = re.compile(r'(\\+)(\"|$)')
616 def escape_shell_arg(arg):
617 # The (very strange) parsing rules used by the C runtime library are
618 # described at:
619 # http://msdn.microsoft.com/library/en-us/vclang/html/_pluslang_Parsing_C.2b2b_.Command.2d.Line_Arguments.asp
621 # double up slashes, but only if they are followed by a quote character
622 arg = re.sub(_escape_shell_arg_re, r'\1\1\2', arg)
624 # surround by quotes and escape quotes inside
625 arg = '"' + string.replace(arg, '"', '"^""') + '"'
626 return arg
629 def argv_to_command_string(argv):
630 """Flatten a list of command line arguments into a command string.
632 The resulting command string is expected to be passed to the system
633 shell which os functions like popen() and system() invoke internally.
636 # According cmd's usage notes (cmd /?), it parses the command line by
637 # "seeing if the first character is a quote character and if so, stripping
638 # the leading character and removing the last quote character."
639 # So to prevent the argument string from being changed we add an extra set
640 # of quotes around it here.
641 return '"' + string.join(map(escape_shell_arg, argv), " ") + '"'
643 else:
644 def escape_shell_arg(str):
645 return "'" + string.replace(str, "'", "'\\''") + "'"
647 def argv_to_command_string(argv):
648 """Flatten a list of command line arguments into a command string.
650 The resulting command string is expected to be passed to the system
651 shell which os functions like popen() and system() invoke internally.
654 return string.join(map(escape_shell_arg, argv), " ")
655 # ============================================================================
657 def format_date(date):
658 """Return an svn-compatible date string for DATE (seconds since epoch)."""
659 # A Subversion date looks like "2002-09-29T14:44:59.000000Z"
660 return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date))
662 def sort_file(infile, outfile):
663 # sort the log files
665 # GNU sort will sort our dates differently (incorrectly!) if our
666 # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
667 # it to 'C'
668 lc_all_tmp = os.environ.get('LC_ALL', None)
669 os.environ['LC_ALL'] = 'C'
670 # The -T option to sort has a nice side effect. The Win32 sort is
671 # case insensitive and cannot be used, and since it does not
672 # understand the -T option and dies if we try to use it, there is
673 # no risk that we use that sort by accident.
674 run_command('sort -T %s %s > %s' % (Ctx().tmpdir, infile, outfile))
675 if lc_all_tmp is None:
676 del os.environ['LC_ALL']
677 else:
678 os.environ['LC_ALL'] = lc_all_tmp
680 def match_regexp_list(regexp_list, string):
681 """Test whether STRING matches any of the compiled regexps in
682 REGEXP_LIST."""
683 for regexp in regexp_list:
684 if regexp.match(string):
685 return True
686 return False
688 class LF_EOL_Filter:
689 """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
690 into LFs only."""
691 def __init__(self, stream):
692 self.stream = stream
693 self.carry_cr = False
694 self.eof = False
696 def read(self, size):
697 while True:
698 buf = self.stream.read(size)
699 self.eof = len(buf) == 0
700 if self.carry_cr:
701 buf = '\r' + buf
702 self.carry_cr = False
703 if not self.eof and buf[-1] == '\r':
704 self.carry_cr = True
705 buf = buf[:-1]
706 buf = string.replace(buf, '\r\n', '\n')
707 buf = string.replace(buf, '\r', '\n')
708 if len(buf) > 0 or self.eof:
709 return buf
712 # These constants represent the log levels that this script supports
713 LOG_WARN = -1
714 LOG_QUIET = 0
715 LOG_NORMAL = 1
716 LOG_VERBOSE = 2
717 class Log:
718 """A Simple logging facility. Each line will be timestamped is
719 self.use_timestamps is TRUE. This class is a Borg, see
720 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
721 __shared_state = {}
722 def __init__(self):
723 self.__dict__ = self.__shared_state
724 if self.__dict__:
725 return
726 self.log_level = LOG_NORMAL
727 # Set this to true if you want to see timestamps on each line output.
728 self.use_timestamps = None
729 self.logger = sys.stdout
731 def _timestamp(self):
732 """Output a detailed timestamp at the beginning of each line output."""
733 self.logger.write(time.strftime('[%Y-%m-%d %I:%m:%S %Z] - '))
735 def write(self, log_level, *args):
736 """This is the public method to use for writing to a file. Only
737 messages whose LOG_LEVEL is <= self.log_level will be printed. If
738 there are multiple ARGS, they will be separated by a space."""
739 if log_level > self.log_level:
740 return
741 if self.use_timestamps:
742 self._timestamp()
743 self.logger.write(' '.join(map(str,args)) + "\n")
744 # Ensure that log output doesn't get out-of-order with respect to
745 # stderr output.
746 self.logger.flush()
749 class Cleanup:
750 """This singleton class manages any files created by cvs2svn. When
751 you first create a file, call Cleanup.register, passing the
752 filename, and the last pass that you need the file. After the end
753 of that pass, your file will be cleaned up after running an optional
754 callback. This class is a Borg, see
755 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
757 __shared_state = {}
758 def __init__(self):
759 self.__dict__ = self.__shared_state
760 if self.__dict__:
761 return
762 self._log = {}
763 self._callbacks = {}
765 def register(self, file, which_pass, callback=None):
766 """Register FILE for cleanup at the end of WHICH_PASS, running
767 function CALLBACK prior to removal. Registering a given FILE is
768 idempotent; you may register as many times as you wish, but it
769 will only be cleaned up once.
771 Note that if a file is registered multiple times, only the first
772 callback registered for that file will be called at cleanup
773 time. Also note that if you register a database file you must
774 close the database before cleanup, e.g. using a callback."""
775 self._log.setdefault(which_pass, {})[file] = 1
776 if callback and not self._callbacks.has_key(file):
777 self._callbacks[file] = callback
779 def cleanup(self, which_pass):
780 """Clean up all files, and invoke callbacks, for pass WHICH_PASS."""
781 if not self._log.has_key(which_pass):
782 return
783 for file in self._log[which_pass]:
784 Log().write(LOG_VERBOSE, "Deleting", file)
785 if self._callbacks.has_key(file):
786 self._callbacks[file]()
787 os.unlink(file)
790 # Always use these constants for opening databases.
791 DB_OPEN_READ = 'r'
792 DB_OPEN_NEW = 'n'
795 class AbstractDatabase:
796 """An abstract base class for anydbm-based databases."""
798 def __init__(self, filename, mode):
799 """A convenience function for opening an anydbm database."""
800 # pybsddb3 has a bug which prevents it from working with
801 # Berkeley DB 4.2 if you open the db with 'n' ("new"). This
802 # causes the DB_TRUNCATE flag to be passed, which is disallowed
803 # for databases protected by lock and transaction support
804 # (bsddb databases use locking from bsddb version 4.2.4 onwards).
806 # Therefore, manually perform the removal (we can do this, because
807 # we know that for bsddb - but *not* anydbm in general - the database
808 # consists of one file with the name we specify, rather than several
809 # based on that name).
810 if mode == 'n' and anydbm._defaultmod.__name__ == 'dbhash':
811 if os.path.isfile(filename):
812 os.unlink(filename)
813 mode = 'c'
815 self.db = anydbm.open(filename, mode)
816 self.has_key = self.db.has_key
817 self.__delitem__ = self.db.__delitem__
819 def get(self, key, default=None):
820 """bsddb3 doesn't have a get() method, so define one here."""
822 try:
823 return self[key]
824 except KeyError:
825 return default
828 class SDatabase(AbstractDatabase):
829 """A database that can only store strings."""
831 def __getitem__(self, key):
832 return self.db[key]
834 def __setitem__(self, key, value):
835 self.db[key] = value
838 class Database(AbstractDatabase):
839 """A database that uses the marshal module to store built-in types."""
841 def __getitem__(self, key):
842 return marshal.loads(self.db[key])
844 def __setitem__(self, key, value):
845 self.db[key] = marshal.dumps(value)
848 class StatsKeeper:
849 __shared_state = { }
850 def __init__(self):
851 self.__dict__ = self.__shared_state
852 if self.__dict__:
853 return
854 self.filename = temp(STATISTICS_FILE)
855 Cleanup().register(self.filename, pass8)
856 # This can get kinda large, so we don't store it in our data dict.
857 self.repos_files = { }
859 if os.path.exists(self.filename):
860 self.unarchive()
861 else:
862 self.data = { 'cvs_revs_count' : 0,
863 'tags': { },
864 'branches' : { },
865 'repos_size' : 0,
866 'repos_file_count' : 0,
867 'svn_rev_count' : None,
868 'first_rev_date' : 1L<<32,
869 'last_rev_date' : 0,
870 'pass_timings' : { },
871 'start_time' : 0,
872 'end_time' : 0,
875 def log_duration_for_pass(self, duration, pass_num):
876 self.data['pass_timings'][pass_num] = duration
878 def set_start_time(self, start):
879 self.data['start_time'] = start
881 def set_end_time(self, end):
882 self.data['end_time'] = end
884 def _bump_item(self, key, amount=1):
885 self.data[key] = self.data[key] + amount
887 def reset_c_rev_info(self):
888 self.data['cvs_revs_count'] = 0
889 self.data['tags'] = { }
890 self.data['branches'] = { }
892 def record_c_rev(self, c_rev):
893 self._bump_item('cvs_revs_count')
895 for tag in c_rev.tags:
896 self.data['tags'][tag] = None
897 for branch in c_rev.branches:
898 self.data['branches'][branch] = None
900 if c_rev.timestamp < self.data['first_rev_date']:
901 self.data['first_rev_date'] = c_rev.timestamp
903 if c_rev.timestamp > self.data['last_rev_date']:
904 self.data['last_rev_date'] = c_rev.timestamp
906 # Only add the size if this is the first time we see the file.
907 if not self.repos_files.has_key(c_rev.fname):
908 self._bump_item('repos_size', c_rev.file_size)
909 self.repos_files[c_rev.fname] = None
911 self.data['repos_file_count'] = len(self.repos_files)
913 def set_svn_rev_count(self, count):
914 self.data['svn_rev_count'] = count
916 def svn_rev_count(self):
917 return self.data['svn_rev_count']
919 def archive(self):
920 open(self.filename, 'w').write(marshal.dumps(self.data))
922 def unarchive(self):
923 self.data = marshal.loads(open(self.filename, 'r').read())
925 def __str__(self):
926 svn_revs_str = ""
927 if self.data['svn_rev_count'] is not None:
928 svn_revs_str = ('Total SVN Commits: %10s\n'
929 % self.data['svn_rev_count'])
931 return ('\n' \
932 'cvs2svn Statistics:\n' \
933 '------------------\n' \
934 'Total CVS Files: %10i\n' \
935 'Total CVS Revisions: %10i\n' \
936 'Total Unique Tags: %10i\n' \
937 'Total Unique Branches: %10i\n' \
938 'CVS Repos Size in KB: %10i\n' \
939 '%s' \
940 'First Revision Date: %s\n' \
941 'Last Revision Date: %s\n' \
942 '------------------' \
943 % (self.data['repos_file_count'],
944 self.data['cvs_revs_count'],
945 len(self.data['tags']),
946 len(self.data['branches']),
947 (self.data['repos_size'] / 1024),
948 svn_revs_str,
949 time.ctime(self.data['first_rev_date']),
950 time.ctime(self.data['last_rev_date']),
953 def timings(self):
954 passes = self.data['pass_timings'].keys()
955 passes.sort()
956 str = 'Timings:\n------------------\n'
958 def desc(val):
959 if val == 1: return "second"
960 return "seconds"
962 for pass_num in passes:
963 duration = int(self.data['pass_timings'][pass_num])
964 p_str = ('pass %d:%6d %s\n'
965 % (pass_num, duration, desc(duration)))
966 str = str + p_str
968 total = int(self.data['end_time'] - self.data['start_time'])
969 str = str + ('total: %6d %s' % (total, desc(total)))
970 return str
973 class LastSymbolicNameDatabase:
974 """ Passing every CVSRevision in s-revs to this class will result in
975 a Database whose key is the last CVS Revision a symbolicname was
976 seen in, and whose value is a list of all symbolicnames that were
977 last seen in that revision."""
978 def __init__(self, mode):
979 self.symbols = {}
980 self.symbol_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB), mode)
981 Cleanup().register(temp(SYMBOL_LAST_CVS_REVS_DB), pass5)
983 # Once we've gone through all the revs,
984 # symbols.keys() will be a list of all tags and branches, and
985 # their corresponding values will be a key into the last CVS revision
986 # that they were used in.
987 def log_revision(self, c_rev):
988 # Gather last CVS Revision for symbolic name info and tag info
989 for tag in c_rev.tags:
990 self.symbols[tag] = c_rev.unique_key()
991 if c_rev.op is not OP_DELETE:
992 for branch in c_rev.branches:
993 self.symbols[branch] = c_rev.unique_key()
995 # Creates an inversion of symbols above--a dictionary of lists (key
996 # = CVS rev unique_key: val = list of symbols that close in that
997 # rev.
998 def create_database(self):
999 for sym, rev_unique_key in self.symbols.items():
1000 ary = self.symbol_revs_db.get(rev_unique_key, [])
1001 ary.append(sym)
1002 self.symbol_revs_db[rev_unique_key] = ary
1005 class CVSRevisionDatabase:
1006 """A Database to store CVSRevision objects and retrieve them by their
1007 unique_key()."""
1009 def __init__(self, mode):
1010 """Initialize an instance, opening database in MODE (like the MODE
1011 argument to Database or anydbm.open())."""
1012 self.cvs_revs_db = SDatabase(temp(CVS_REVS_DB), mode)
1013 Cleanup().register(temp(CVS_REVS_DB), pass8)
1015 def log_revision(self, c_rev):
1016 """Add C_REV, a CVSRevision, to the database."""
1017 self.cvs_revs_db[c_rev.unique_key()] = str(c_rev)
1019 def get_revision(self, unique_key):
1020 """Return the CVSRevision stored under UNIQUE_KEY."""
1021 return CVSRevision(Ctx(), self.cvs_revs_db[unique_key])
1024 def TagsDatabase(mode):
1025 """A Database to store which symbolic names are tags.
1026 Each key is a tag name.
1027 The value has no meaning, and should be set to None."""
1028 db = SDatabase(temp(TAGS_DB), mode)
1029 Cleanup().register(temp(TAGS_DB), pass8)
1030 return db
1033 class Project:
1034 """A project within a CVS repository."""
1036 def __init__(self, project_cvs_repos_path,
1037 trunk_path, branches_path, tags_path):
1038 """Create a new Project record.
1040 PROJECT_CVS_REPOS_PATH is the main CVS directory for this project
1041 (within the filesystem). TRUNK_PATH, BRANCHES_PATH, and TAGS_PATH
1042 are the full, normalized directory names in svn for the
1043 corresponding part of the repository."""
1045 self.project_cvs_repos_path = project_cvs_repos_path
1046 prefix = Ctx().cvs_repository.cvs_repos_path
1047 if not self.project_cvs_repos_path.startswith(prefix):
1048 raise FatalError("Project '%s' must start with '%s'"
1049 % (self.project_cvs_repos_path, prefix,))
1050 # The project's main directory as a cvs_path:
1051 self.project_cvs_path = self.project_cvs_repos_path[len(prefix):]
1052 if self.project_cvs_path.startswith(os.sep):
1053 self.project_cvs_path = self.project_cvs_path[1:]
1054 self.trunk_path = trunk_path
1055 self.branches_path = branches_path
1056 self.tags_path = tags_path
1057 verify_paths_disjoint(self.trunk_path, self.branches_path, self.tags_path)
1059 def is_source(self, svn_path):
1060 """Return True iff SVN_PATH is a legitimate source for this project.
1062 Legitimate paths are self.trunk_path or any directory directly
1063 under self.branches_path."""
1065 if svn_path == self.trunk_path:
1066 return True
1068 (head, tail,) = _path_split(svn_path)
1069 if head == self.branches_path:
1070 return True
1072 return False
1074 def is_unremovable(self, svn_path):
1075 """Return True iff the specified path must not be removed."""
1077 return svn_path in [self.trunk_path, self.branches_path, self.tags_path]
1079 def get_branch_path(self, branch_name):
1080 """Return the svnpath for the branch named BRANCH_NAME."""
1082 return _path_join(self.branches_path, _clean_symbolic_name(branch_name))
1084 def get_tag_path(self, tag_name):
1085 """Return the svnpath for the tag named TAG_NAME."""
1087 return _path_join(self.tags_path, _clean_symbolic_name(tag_name))
1089 def _relative_name(self, cvs_path):
1090 """Convert CVS_PATH into a name relative to this project's root directory.
1092 CVS_PATH has to begin (textually) with self.project_cvs_path.
1093 Remove prefix and optional '/'."""
1095 if not cvs_path.startswith(self.project_cvs_path):
1096 raise FatalError(
1097 "_relative_name: '%s' is not a sub-path of '%s'"
1098 % (cvs_path, self.project_cvs_path,))
1099 l = len(self.project_cvs_path)
1100 if cvs_path[l] == os.sep:
1101 l += 1
1102 return cvs_path[l:]
1104 def make_trunk_path(self, cvs_path):
1105 """Return the trunk path for CVS_PATH.
1107 Return the svn path for this file on trunk."""
1109 return _path_join(self.trunk_path, self._relative_name(cvs_path))
1111 def make_branch_path(self, branch_name, cvs_path):
1112 """Return the svn path for CVS_PATH on branch BRANCH_NAME."""
1114 return _path_join(self.get_branch_path(branch_name),
1115 self._relative_name(cvs_path))
1118 class CVSRevision:
1119 def __init__(self, ctx, *args):
1120 """Initialize a new CVSRevision with Ctx object CTX, and ARGS.
1122 If CTX is None, the following members and methods of the
1123 instantiated CVSRevision class object will be unavailable (or
1124 simply will not work correctly, if at all):
1125 cvs_path
1126 svn_path
1127 is_default_branch_revision()
1129 (Note that this class treats CTX as const, because the caller
1130 likely passed in a Borg instance of a Ctx. The reason this class
1131 takes CTX as as a parameter, instead of just instantiating a Ctx
1132 itself, is that this class should be usable outside cvs2svn.)
1134 If there is one argument in ARGS, it is a string, in the format of
1135 a line from a revs file. Do *not* include a trailing newline.
1137 If there are multiple ARGS, there must be 17 of them,
1138 comprising a parsed revs line:
1139 timestamp --> (int) date stamp for this cvs revision
1140 digest --> (string) digest of author+logmsg
1141 prev_timestamp --> (int) date stamp for the previous cvs revision
1142 next_timestamp --> (int) date stamp for the next cvs revision
1143 op --> (char) OP_ADD, OP_CHANGE, or OP_DELETE
1144 prev_rev --> (string or None) previous CVS rev, e.g., "1.2"
1145 rev --> (string) this CVS rev, e.g., "1.3"
1146 next_rev --> (string or None) next CVS rev, e.g., "1.4"
1147 file_in_attic --> (char or None) true if RCS file is in Attic
1148 file_executable --> (char or None) true if RCS file has exec bit set.
1149 file_size --> (int) size of the RCS file
1150 deltatext_code --> (char) 'N' if non-empty deltatext, else 'E'
1151 fname --> (string) relative path of file in CVS repos
1152 mode --> (string or None) "kkv", "kb", etc.
1153 branch_name --> (string or None) branch on which this rev occurred
1154 tags --> (list of strings) all tags on this revision
1155 branches --> (list of strings) all branches rooted in this rev
1157 The two forms of initialization are equivalent.
1159 WARNING: Due to the resync process in pass2, prev_timestamp or
1160 next_timestamp may be incorrect in the c-revs or s-revs files."""
1162 self._ctx = ctx
1163 if len(args) == 17:
1164 (self.timestamp, self.digest, self.prev_timestamp, self.next_timestamp,
1165 self.op, self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
1166 self.file_executable, self.file_size, self.deltatext_code,
1167 self.fname,
1168 self.mode, self.branch_name, self.tags, self.branches) = args
1169 elif len(args) == 1:
1170 data = args[0].split(' ', 15)
1171 (self.timestamp, self.digest, self.prev_timestamp, self.next_timestamp,
1172 self.op, self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
1173 self.file_executable, self.file_size, self.deltatext_code,
1174 self.mode, self.branch_name, numtags, remainder) = data
1175 # Patch up data items which are not simple strings
1176 self.timestamp = int(self.timestamp, 16)
1177 if self.prev_timestamp == "*":
1178 self.prev_timestamp = 0
1179 else:
1180 self.prev_timestamp = int(self.prev_timestamp)
1181 if self.next_timestamp == "*":
1182 self.next_timestamp = 0
1183 else:
1184 self.next_timestamp = int(self.next_timestamp)
1185 if self.prev_rev == "*":
1186 self.prev_rev = None
1187 if self.next_rev == "*":
1188 self.next_rev = None
1189 if self.file_in_attic == "*":
1190 self.file_in_attic = None
1191 if self.file_executable == "*":
1192 self.file_executable = None
1193 self.file_size = int(self.file_size)
1194 if self.mode == "*":
1195 self.mode = None
1196 if self.branch_name == "*":
1197 self.branch_name = None
1198 numtags = int(numtags)
1199 tags_and_numbranches_and_remainder = remainder.split(' ', numtags + 1)
1200 self.tags = tags_and_numbranches_and_remainder[:-2]
1201 numbranches = int(tags_and_numbranches_and_remainder[-2])
1202 remainder = tags_and_numbranches_and_remainder[-1]
1203 branches_and_fname = remainder.split(' ', numbranches)
1204 self.branches = branches_and_fname[:-1]
1205 self.fname = branches_and_fname[-1]
1206 else:
1207 raise TypeError, 'CVSRevision() takes 2 or 18 arguments (%d given)' % \
1208 (len(args) + 1)
1209 if ctx is not None:
1210 self.cvs_path = ctx.cvs_repository.get_cvs_path(self.fname)
1211 if self.branch_name:
1212 self.svn_path = ctx.project.make_branch_path(self.branch_name,
1213 self.cvs_path)
1214 else:
1215 self.svn_path = ctx.project.make_trunk_path(self.cvs_path)
1217 # The 'primary key' of a CVS Revision is the revision number + the
1218 # filename. To provide a unique key (say, for a dict), we just glom
1219 # them together in a string. By passing in self.prev_rev or
1220 # self.next_rev, you can get the unique key for their respective
1221 # CVSRevisions.
1222 def unique_key(self, revnum="0"):
1223 if revnum is "0":
1224 revnum = self.rev
1225 elif revnum is None:
1226 return None
1227 return revnum + "/" + self.fname
1229 def __str__(self):
1230 return ('%08lx %s %s %s %s %s %s %s %s %s %d %s %s %s %d%s%s %d%s%s %s'
1231 % (self.timestamp, self.digest, self.prev_timestamp or "*",
1232 self.next_timestamp or "*", self.op, (self.prev_rev or "*"),
1233 self.rev, (self.next_rev or "*"), (self.file_in_attic or "*"),
1234 (self.file_executable or "*"),
1235 self.file_size,
1236 self.deltatext_code, (self.mode or "*"),
1237 (self.branch_name or "*"),
1238 len(self.tags), self.tags and " " or "", " ".join(self.tags),
1239 len(self.branches), self.branches and " " or "",
1240 " ".join(self.branches),
1241 self.fname, ))
1243 # Returns true if this CVSRevision is the opening CVSRevision for
1244 # NAME (for this RCS file).
1245 def opens_symbolic_name(self, name):
1246 if name in self.tags:
1247 return 1
1248 if name in self.branches:
1249 # If this c_rev opens a branch and our op is OP_DELETE, then
1250 # that means that the file that this c_rev belongs to was
1251 # created on the branch, so for all intents and purposes, this
1252 # c_rev is *technically* not an opening. See Issue #62 for more
1253 # information.
1254 if self.op != OP_DELETE:
1255 return 1
1256 return 0
1258 def is_default_branch_revision(self):
1259 """Return 1 if SELF.rev of SELF.cvs_path is a default branch
1260 revision according to DEFAULT_BRANCHES_DB (see the conditions
1261 documented there), else return None."""
1262 val = self._ctx._default_branches_db.get(self.cvs_path, None)
1263 if val is not None:
1264 val_last_dot = val.rindex(".")
1265 our_last_dot = self.rev.rindex(".")
1266 default_branch = val[:val_last_dot]
1267 our_branch = self.rev[:our_last_dot]
1268 default_rev_component = int(val[val_last_dot + 1:])
1269 our_rev_component = int(self.rev[our_last_dot + 1:])
1270 if (default_branch == our_branch
1271 and our_rev_component <= default_rev_component):
1272 return 1
1273 # else
1274 return None
1276 def rcs_path(self):
1277 """Returns the actual filesystem path to the RCS file of this
1278 CVSRevision."""
1279 if self.file_in_attic is None:
1280 return self.fname
1281 else:
1282 basepath, filename = os.path.split(self.fname)
1283 return os.path.join(basepath, 'Attic', filename)
1285 def filename(self):
1286 "Return the last path component of self.fname, minus the ',v'"
1287 return os.path.split(self.fname)[-1][:-2]
1289 class SymbolDatabase:
1290 """This database records information on all symbols in the RCS
1291 files. It is created in pass 1 and it is used in pass 2."""
1292 def __init__(self):
1293 # A hash that maps tag names to commit counts
1294 self.tags = { }
1295 # A hash that maps branch names to lists of the format
1296 # [ create_count, commit_count, blockers ], where blockers
1297 # is a hash that lists the symbols that depend on the
1298 # the branch. The blockers hash is used as a set, so the
1299 # values are not used.
1300 self.branches = { }
1302 def register_tag_creation(self, name):
1303 """Register the creation of the tag NAME."""
1304 self.tags[name] = self.tags.get(name, 0) + 1
1306 def _branch(self, name):
1307 """Helper function to get a branch node that will create and
1308 initialize the node if it does not exist."""
1309 if not self.branches.has_key(name):
1310 self.branches[name] = [ 0, 0, { } ]
1311 return self.branches[name]
1313 def register_branch_creation(self, name):
1314 """Register the creation of the branch NAME."""
1315 self._branch(name)[0] += 1
1317 def register_branch_commit(self, name):
1318 """Register a commit on the branch NAME."""
1319 self._branch(name)[1] += 1
1321 def register_branch_blocker(self, name, blocker):
1322 """Register BLOCKER as a blocker on the branch NAME."""
1323 self._branch(name)[2][blocker] = None
1325 def branch_has_commit(self, name):
1326 """Return non-zero if NAME has commits. Returns 0 if name
1327 is not a branch or if it has no commits."""
1328 return self.branches.has_key(name) and self.branches[name][1]
1330 def find_excluded_symbols(self, regexp_list):
1331 """Returns a hash of all symbols thaht match the regexps in
1332 REGEXP_LISTE. The hash is used as a set so the values are
1333 not used."""
1334 excludes = { }
1335 for tag in self.tags:
1336 if match_regexp_list(regexp_list, tag):
1337 excludes[tag] = None
1338 for branch in self.branches:
1339 if match_regexp_list(regexp_list, branch):
1340 excludes[branch] = None
1341 return excludes
1343 def find_branch_exclude_blockers(self, branch, excludes):
1344 """Find all blockers of BRANCH, excluding the ones in the hash
1345 EXCLUDES."""
1346 blockers = { }
1347 if excludes.has_key(branch):
1348 for blocker in self.branches[branch][2]:
1349 if not excludes.has_key(blocker):
1350 blockers[blocker] = None
1351 return blockers
1353 def find_blocked_excludes(self, excludes):
1354 """Find all branches not in EXCLUDES that have blocking symbols that
1355 are not themselves excluded. Return a hash that maps branch names
1356 to a hash of blockers. The hash of blockes is used as a set so the
1357 values are not used."""
1358 blocked_branches = { }
1359 for branch in self.branches:
1360 blockers = self.find_branch_exclude_blockers(branch, excludes)
1361 if blockers:
1362 blocked_branches[branch] = blockers
1363 return blocked_branches
1365 def find_mismatches(self, excludes=None):
1366 """Find all symbols that are defined as both tags and branches,
1367 excluding the ones in EXCLUDES. Returns a list of 4-tuples with
1368 the symbol name, tag count, branch count and commit count."""
1369 if excludes is None:
1370 excludes = { }
1371 mismatches = [ ]
1372 for branch in self.branches:
1373 if not excludes.has_key(branch) and self.tags.has_key(branch):
1374 mismatches.append((branch, # name
1375 self.tags[branch], # tag count
1376 self.branches[branch][0], # branch count
1377 self.branches[branch][1])) # commit count
1378 return mismatches
1380 def read(self):
1381 """Read the symbol database from files."""
1382 f = open(temp(TAGS_LIST))
1383 while 1:
1384 line = f.readline()
1385 if not line:
1386 break
1387 tag, count = line.split()
1388 self.tags[tag] = int(count)
1390 f = open(temp(BRANCHES_LIST))
1391 while 1:
1392 line = f.readline()
1393 if not line:
1394 break
1395 words = line.split()
1396 self.branches[words[0]] = [ int(words[1]), int(words[2]), { } ]
1397 for blocker in words[3:]:
1398 self.branches[words[0]][2][blocker] = None
1400 def write(self):
1401 """Store the symbol database to files."""
1402 f = open(temp(TAGS_LIST), "w")
1403 Cleanup().register(temp(TAGS_LIST), pass2)
1404 for tag, count in self.tags.items():
1405 f.write("%s %d\n" % (tag, count))
1407 f = open(temp(BRANCHES_LIST), "w")
1408 Cleanup().register(temp(BRANCHES_LIST), pass2)
1409 for branch, info in self.branches.items():
1410 f.write("%s %d %d" % (branch, info[0], info[1]))
1411 if info[2]:
1412 f.write(" ")
1413 f.write(" ".join(info[2].keys()))
1414 f.write("\n")
1416 class CollectData(cvs2svn_rcsparse.Sink):
1417 def __init__(self):
1418 self.revs = open(temp(DATAFILE + REVS_SUFFIX), 'w')
1419 Cleanup().register(temp(DATAFILE + REVS_SUFFIX), pass2)
1420 self.resync = open(temp(DATAFILE + RESYNC_SUFFIX), 'w')
1421 Cleanup().register(temp(DATAFILE + RESYNC_SUFFIX), pass2)
1422 self.default_branches_db = SDatabase(temp(DEFAULT_BRANCHES_DB),
1423 DB_OPEN_NEW)
1424 Cleanup().register(temp(DEFAULT_BRANCHES_DB), pass5)
1425 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_NEW)
1426 Cleanup().register(temp(METADATA_DB), pass8)
1427 self.fatal_errors = []
1428 self.num_files = 0
1429 self.symbol_db = SymbolDatabase()
1431 # 1 if we've collected data for at least one file, None otherwise.
1432 self.found_valid_file = None
1434 # See set_fname() for initializations of other variables.
1436 def set_fname(self, canonical_name, filename):
1437 """Prepare to receive data for FILENAME. FILENAME is the absolute
1438 filesystem path to the file in question, and CANONICAL_NAME is
1439 FILENAME with the 'Attic' component removed (if the file is indeed
1440 in the Attic) ."""
1441 self.fname = canonical_name
1443 # We calculate and save some file metadata here, where we can do
1444 # it only once per file, instead of waiting until later where we
1445 # would have to do the same calculations once per CVS *revision*.
1447 self.cvs_path = Ctx().cvs_repository.get_cvs_path(self.fname)
1449 # If the paths are not the same, then that means that the
1450 # canonical_name has had the 'Attic' component stripped out.
1451 self.file_in_attic = None
1452 if canonical_name != filename:
1453 self.file_in_attic = 1
1455 file_stat = os.stat(filename)
1456 # The size of our file in bytes
1457 self.file_size = file_stat[stat.ST_SIZE]
1459 # Whether or not the executable bit is set.
1460 self.file_executable = None
1461 if file_stat[0] & stat.S_IXUSR:
1462 self.file_executable = 1
1464 # revision -> [timestamp, author, old-timestamp]
1465 self.rev_data = { }
1467 # Maps revision number (key) to the revision number of the
1468 # previous revision along this line of development.
1470 # For the first revision R on a branch, we consider the revision
1471 # from which R sprouted to be the 'previous'.
1473 # Note that this revision can't be determined arithmetically (due
1474 # to cvsadmin -o, which is why this is necessary).
1476 # If the key has no previous revision, then store None as key's
1477 # value.
1478 self.prev_rev = { }
1480 # This dict is essentially self.prev_rev with the values mapped in
1481 # the other direction, so following key -> value will yield you
1482 # the next revision number.
1484 # Unlike self.prev_rev, if the key has no next revision, then the
1485 # key is not present.
1486 self.next_rev = { }
1488 # Track the state of each revision so that in set_revision_info,
1489 # we can determine if our op is an add/change/delete. We can do
1490 # this because in set_revision_info, we'll have all of the
1491 # revisions for a file at our fingertips, and we need to examine
1492 # the state of our prev_rev to determine if we're an add or a
1493 # change--without the state of the prev_rev, we are unable to
1494 # distinguish between an add and a change.
1495 self.rev_state = { }
1497 # Hash mapping branch numbers, like '1.7.2', to branch names,
1498 # like 'Release_1_0_dev'.
1499 self.branch_names = { }
1501 # RCS flags (used for keyword expansion).
1502 self.mode = None
1504 # Hash mapping revision numbers, like '1.7', to lists of names
1505 # indicating which branches sprout from that revision, like
1506 # ['Release_1_0_dev', 'experimental_driver', ...].
1507 self.branchlist = { }
1509 # Like self.branchlist, but the values are lists of tag names that
1510 # apply to the key revision.
1511 self.taglist = { }
1513 # If set, this is an RCS branch number -- rcsparse calls this the
1514 # "principal branch", but CVS and RCS refer to it as the "default
1515 # branch", so that's what we call it, even though the rcsparse API
1516 # setter method is still 'set_principal_branch'.
1517 self.default_branch = None
1519 # If the RCS file doesn't have a default branch anymore, but does
1520 # have vendor revisions, then we make an educated guess that those
1521 # revisions *were* the head of the default branch up until the
1522 # commit of 1.2, at which point the file's default branch became
1523 # trunk. This records the date at which 1.2 was committed.
1524 self.first_non_vendor_revision_date = None
1526 # A list of all symbols defined for the current file. Used to
1527 # prevent multiple definitions of a symbol, something which can
1528 # easily happen when --symbol-transform is used.
1529 self.defined_symbols = { }
1531 def set_principal_branch(self, branch):
1532 self.default_branch = branch
1534 def set_expansion(self, mode):
1535 self.mode = mode
1537 def set_branch_name(self, branch_number, name):
1538 """Record that BRANCH_NUMBER is the branch number for branch NAME,
1539 and that NAME sprouts from BRANCH_NUMBER .
1540 BRANCH_NUMBER is an RCS branch number with an odd number of components,
1541 for example '1.7.2' (never '1.7.0.2')."""
1542 if not self.branch_names.has_key(branch_number):
1543 self.branch_names[branch_number] = name
1544 # The branchlist is keyed on the revision number from which the
1545 # branch sprouts, so strip off the odd final component.
1546 sprout_rev = branch_number[:branch_number.rfind(".")]
1547 self.branchlist.setdefault(sprout_rev, []).append(name)
1548 self.symbol_db.register_branch_creation(name)
1549 else:
1550 sys.stderr.write("%s: in '%s':\n"
1551 " branch '%s' already has name '%s',\n"
1552 " cannot also have name '%s', ignoring the latter\n"
1553 % (warning_prefix, self.fname, branch_number,
1554 self.branch_names[branch_number], name))
1556 def rev_to_branch_name(self, revision):
1557 """Return the name of the branch on which REVISION lies.
1558 REVISION is a non-branch revision number with an even number of,
1559 components, for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
1560 For the convenience of callers, REVISION can also be a trunk
1561 revision such as '1.2', in which case just return None."""
1562 if trunk_rev.match(revision):
1563 return None
1564 return self.branch_names.get(revision[:revision.rindex(".")])
1566 def add_cvs_branch(self, revision, branch_name):
1567 """Record the root revision and branch revision for BRANCH_NAME,
1568 based on REVISION. REVISION is a CVS branch number having an even
1569 number of components where the second-to-last is '0'. For
1570 example, if it's '1.7.0.2', then record that BRANCH_NAME sprouts
1571 from 1.7 and has branch number 1.7.2."""
1572 last_dot = revision.rfind(".")
1573 branch_rev = revision[:last_dot]
1574 last2_dot = branch_rev.rfind(".")
1575 branch_rev = branch_rev[:last2_dot] + revision[last_dot:]
1576 self.set_branch_name(branch_rev, branch_name)
1578 def define_tag(self, name, revision):
1579 """Record a bidirectional mapping between symbolic NAME and REVISION.
1580 REVISION is an unprocessed revision number from the RCS file's
1581 header, for example: '1.7', '1.7.0.2', or '1.1.1' or '1.1.1.1'.
1582 This function will determine what kind of symbolic name it is by
1583 inspection, and record it in the right places."""
1584 for (pattern, replacement) in Ctx().symbol_transforms:
1585 newname = pattern.sub(replacement, name)
1586 if newname != name:
1587 Log().write(LOG_WARN, " symbol '%s' transformed to '%s'"
1588 % (name, newname))
1589 name = newname
1590 if self.defined_symbols.has_key(name):
1591 err = "%s: Multiple definitions of the symbol '%s' in '%s'" \
1592 % (error_prefix, name, self.fname)
1593 sys.stderr.write(err + "\n")
1594 self.fatal_errors.append(err)
1595 self.defined_symbols[name] = None
1596 if branch_tag.match(revision):
1597 self.add_cvs_branch(revision, name)
1598 elif vendor_tag.match(revision):
1599 self.set_branch_name(revision, name)
1600 else:
1601 self.taglist.setdefault(revision, []).append(name)
1602 self.symbol_db.register_tag_creation(name)
1604 def define_revision(self, revision, timestamp, author, state,
1605 branches, next):
1607 # Record the state of our revision for later calculations
1608 self.rev_state[revision] = state
1610 # store the rev_data as a list in case we have to jigger the timestamp
1611 self.rev_data[revision] = [int(timestamp), author, None]
1613 # When on trunk, the RCS 'next' revision number points to what
1614 # humans might consider to be the 'previous' revision number. For
1615 # example, 1.3's RCS 'next' is 1.2.
1617 # However, on a branch, the RCS 'next' revision number really does
1618 # point to what humans would consider to be the 'next' revision
1619 # number. For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
1621 # In other words, in RCS, 'next' always means "where to find the next
1622 # deltatext that you need this revision to retrieve.
1624 # That said, we don't *want* RCS's behavior here, so we determine
1625 # whether we're on trunk or a branch and set self.prev_rev
1626 # accordingly.
1628 # One last thing. Note that if REVISION is a branch revision,
1629 # instead of mapping REVISION to NEXT, we instead map NEXT to
1630 # REVISION. Since we loop over all revisions in the file before
1631 # doing anything with the data we gather here, this 'reverse
1632 # assignment' effectively does the following:
1634 # 1. Gives us no 'prev' value for REVISION (in this
1635 # iteration... it may have been set in a previous iteration)
1637 # 2. Sets the 'prev' value for the revision with number NEXT to
1638 # REVISION. So when we come around to the branch revision whose
1639 # revision value is NEXT, its 'prev' and 'prev_rev' are already
1640 # set.
1641 if trunk_rev.match(revision):
1642 self.prev_rev[revision] = next
1643 self.next_rev[next] = revision
1644 elif next:
1645 self.prev_rev[next] = revision
1646 self.next_rev[revision] = next
1648 for b in branches:
1649 self.prev_rev[b] = revision
1651 # Ratchet up the highest vendor head revision, if necessary.
1652 if self.default_branch:
1653 default_branch_root = self.default_branch + "."
1654 if ((revision.find(default_branch_root) == 0)
1655 and (default_branch_root.count('.') == revision.count('.'))):
1656 # This revision is on the default branch, so record that it is
1657 # the new highest default branch head revision.
1658 self.default_branches_db[self.cvs_path] = revision
1659 else:
1660 # No default branch, so make an educated guess.
1661 if revision == '1.2':
1662 # This is probably the time when the file stopped having a
1663 # default branch, so make a note of it.
1664 self.first_non_vendor_revision_date = timestamp
1665 else:
1666 m = vendor_revision.match(revision)
1667 if m and ((not self.first_non_vendor_revision_date)
1668 or (timestamp < self.first_non_vendor_revision_date)):
1669 # We're looking at a vendor revision, and it wasn't
1670 # committed after this file lost its default branch, so bump
1671 # the maximum trunk vendor revision in the permanent record.
1672 self.default_branches_db[self.cvs_path] = revision
1674 if not trunk_rev.match(revision):
1675 # Check for unlabeled branches, record them. We tried to collect
1676 # all branch names when we parsed the symbolic name header
1677 # earlier, of course, but that didn't catch unlabeled branches.
1678 # If a branch is unlabeled, this is our first encounter with it,
1679 # so we have to record its data now.
1680 branch_number = revision[:revision.rindex(".")]
1681 if not self.branch_names.has_key(branch_number):
1682 branch_name = "unlabeled-" + branch_number
1683 self.set_branch_name(branch_number, branch_name)
1685 # Register the commit on this non-trunk branch
1686 branch_name = self.branch_names[branch_number]
1687 self.symbol_db.register_branch_commit(branch_name)
1689 def tree_completed(self):
1690 "The revision tree has been parsed. Analyze it for consistency."
1692 # Our algorithm depends upon the timestamps on the revisions occuring
1693 # monotonically over time. That is, we want to see rev 1.34 occur in
1694 # time before rev 1.35. If we inserted 1.35 *first* (due to the time-
1695 # sorting), and then tried to insert 1.34, we'd be screwed.
1697 # to perform the analysis, we'll simply visit all of the 'previous'
1698 # links that we have recorded and validate that the timestamp on the
1699 # previous revision is before the specified revision
1701 # if we have to resync some nodes, then we restart the scan. just keep
1702 # looping as long as we need to restart.
1703 while 1:
1704 for current, prev in self.prev_rev.items():
1705 if not prev:
1706 # no previous revision exists (i.e. the initial revision)
1707 continue
1708 t_c = self.rev_data[current][0]
1709 t_p = self.rev_data[prev][0]
1710 if t_p >= t_c:
1711 # the previous revision occurred later than the current revision.
1712 # shove the previous revision back in time (and any before it that
1713 # may need to shift).
1715 # We sync backwards and not forwards because any given CVS
1716 # Revision has only one previous revision. However, a CVS
1717 # Revision can *be* a previous revision for many other
1718 # revisions (e.g., a revision that is the source of multiple
1719 # branches). This becomes relevant when we do the secondary
1720 # synchronization in pass 2--we can make certain that we
1721 # don't resync a revision earlier than it's previous
1722 # revision, but it would be non-trivial to make sure that we
1723 # don't resync revision R *after* any revisions that have R
1724 # as a previous revision.
1725 while t_p >= t_c:
1726 self.rev_data[prev][0] = t_c - 1 # new timestamp
1727 self.rev_data[prev][2] = t_p # old timestamp
1728 delta = t_c - 1 - t_p
1729 msg = "PASS1 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
1730 % (self.cvs_path, prev, time.ctime(t_p), delta)
1731 Log().write(LOG_VERBOSE, msg)
1732 if (delta > COMMIT_THRESHOLD
1733 or delta < (COMMIT_THRESHOLD * -1)):
1734 str = "%s: Significant timestamp change for '%s' (%d seconds)"
1735 Log().write(LOG_WARN,
1736 str % (warning_prefix, self.cvs_path, delta))
1737 current = prev
1738 prev = self.prev_rev[current]
1739 if not prev:
1740 break
1741 t_c = t_c - 1 # self.rev_data[current][0]
1742 t_p = self.rev_data[prev][0]
1744 # break from the for-loop
1745 break
1746 else:
1747 # finished the for-loop (no resyncing was performed)
1748 return
1750 def set_revision_info(self, revision, log, text):
1751 timestamp, author, old_ts = self.rev_data[revision]
1752 digest = sha.new(log + '\0' + author).hexdigest()
1753 if old_ts:
1754 # the timestamp on this revision was changed. log it for later
1755 # resynchronization of other files's revisions that occurred
1756 # for this time and log message.
1757 self.resync.write('%08lx %s %08lx\n' % (old_ts, digest, timestamp))
1759 # "...Give back one kadam to honor the Hebrew God whose Ark this is."
1760 # -- Imam to Indy and Sallah, in 'Raiders of the Lost Ark'
1762 # If revision 1.1 appears to have been created via 'cvs add'
1763 # instead of 'cvs import', then this file probably never had a
1764 # default branch, so retroactively remove its record in the
1765 # default branches db. The test is that the log message CVS uses
1766 # for 1.1 in imports is "Initial revision\n" with no period.
1767 if revision == '1.1' and log != 'Initial revision\n':
1768 try:
1769 del self.default_branches_db[self.cvs_path]
1770 except KeyError:
1771 pass
1773 # Get the timestamps of the previous and next revisions
1774 prev_rev = self.prev_rev[revision]
1775 prev_timestamp, ign, ign = self.rev_data.get(prev_rev, [0, None, None])
1777 next_rev = self.next_rev.get(revision)
1778 next_timestamp, ign, ign = self.rev_data.get(next_rev, [0, None, None])
1780 # How to tell if a CVSRevision is an add, a change, or a deletion:
1782 # It's a delete if RCS state is 'dead'
1784 # It's an add if RCS state is 'Exp.' and
1785 # - we either have no previous revision
1786 # or
1787 # - we have a previous revision whose state is 'dead'
1789 # Anything else is a change.
1790 if self.rev_state[revision] == 'dead':
1791 op = OP_DELETE
1792 elif ((self.prev_rev.get(revision, None) is None)
1793 or (self.rev_state[self.prev_rev[revision]] == 'dead')):
1794 op = OP_ADD
1795 else:
1796 op = OP_CHANGE
1798 def is_branch_revision(rev):
1799 """Return True if this revision is not a trunk revision,
1800 else return False."""
1801 if rev.count('.') >= 3:
1802 return True
1803 return False
1805 def is_same_line_of_development(rev1, rev2):
1806 """Return True if rev1 and rev2 are on the same line of
1807 development (i.e., both on trunk, or both on the same branch);
1808 return False otherwise. Either rev1 or rev2 can be None, in
1809 which case automatically return False."""
1810 if rev1 is None or rev2 is None:
1811 return False
1812 if rev1.count('.') == 1 and rev2.count('.') == 1:
1813 return True
1814 if rev1[0:rev1.rfind('.')] == rev2[0:rev2.rfind('.')]:
1815 return True
1816 return False
1818 # There can be an odd situation where the tip revision of a branch
1819 # is alive, but every predecessor on the branch is in state 'dead',
1820 # yet the revision from which the branch sprouts is alive. (This
1821 # is sort of a mirror image of the more common case of adding a
1822 # file on a branch, in which the first revision on the branch is
1823 # alive while the revision from which it sprouts is dead.)
1825 # In this odd situation, we must mark the first live revision on
1826 # the branch as an OP_CHANGE instead of an OP_ADD, because it
1827 # reflects, however indirectly, a change w.r.t. the source
1828 # revision from which the branch sprouts.
1830 # This is issue #89.
1831 cur_num = revision
1832 if is_branch_revision(revision) and self.rev_state[revision] != 'dead':
1833 while 1:
1834 prev_num = self.prev_rev.get(cur_num, None)
1835 if not cur_num or not prev_num:
1836 break
1837 if (not is_same_line_of_development(cur_num, prev_num)
1838 and self.rev_state[cur_num] == 'dead'
1839 and self.rev_state[prev_num] != 'dead'):
1840 op = OP_CHANGE
1841 cur_num = self.prev_rev.get(cur_num, None)
1843 if text:
1844 deltatext_code = DELTATEXT_NONEMPTY
1845 else:
1846 deltatext_code = DELTATEXT_EMPTY
1848 c_rev = CVSRevision(Ctx(), timestamp, digest, prev_timestamp,
1849 next_timestamp, op,
1850 prev_rev, revision, next_rev,
1851 self.file_in_attic, self.file_executable,
1852 self.file_size,
1853 deltatext_code, self.fname,
1854 self.mode, self.rev_to_branch_name(revision),
1855 self.taglist.get(revision, []),
1856 self.branchlist.get(revision, []))
1857 self.revs.write(str(c_rev) + "\n")
1858 StatsKeeper().record_c_rev(c_rev)
1860 if not self.metadata_db.has_key(digest):
1861 self.metadata_db[digest] = (author, log)
1863 def parse_completed(self):
1864 # Walk through all branches and tags and register them with
1865 # their parent branch in the symbol database.
1866 for revision, symbols in self.taglist.items() + self.branchlist.items():
1867 for symbol in symbols:
1868 name = self.rev_to_branch_name(revision)
1869 if name is not None:
1870 self.symbol_db.register_branch_blocker(name, symbol)
1872 self.num_files = self.num_files + 1
1874 def write_symbol_db(self):
1875 self.symbol_db.write()
1877 class SymbolingsLogger:
1878 """Manage the file that contains lines for symbol openings and
1879 closings.
1881 This data will later be used to determine valid SVNRevision ranges
1882 from which a file can be copied when creating a branch or tag in
1883 Subversion. Do this by finding "Openings" and "Closings" for each
1884 file copied onto a branch or tag.
1886 An "Opening" is the CVSRevision from which a given branch/tag
1887 sprouts on a path.
1889 The "Closing" for that branch/tag and path is the next CVSRevision
1890 on the same line of development as the opening.
1892 For example, on file 'foo.c', branch BEE has branch number 1.2.2 and
1893 obviously sprouts from revision 1.2. Therefore, 1.2 is the opening
1894 for BEE on path 'foo.c', and 1.3 is the closing for BEE on path
1895 'foo.c'. Note that there may be many revisions chronologically
1896 between 1.2 and 1.3, for example, revisions on branches of 'foo.c',
1897 perhaps even including on branch BEE itself. But 1.3 is the next
1898 revision *on the same line* as 1.2, that is why it is the closing
1899 revision for those symbolic names of which 1.2 is the opening.
1901 The reason for doing all this hullabaloo is to make branch and tag
1902 creation as efficient as possible by minimizing the number of copies
1903 and deletes per creation. For example, revisions 1.2 and 1.3 of
1904 foo.c might correspond to revisions 17 and 30 in Subversion. That
1905 means that when creating branch BEE, there is some motivation to do
1906 the copy from one of 17-30. Now if there were another file,
1907 'bar.c', whose opening and closing CVSRevisions for BEE corresponded
1908 to revisions 24 and 39 in Subversion, we would know that the ideal
1909 thing would be to copy the branch from somewhere between 24 and 29,
1910 inclusive.
1912 def __init__(self):
1913 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS), 'w')
1914 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS), pass6)
1915 self.closings = open(temp(SYMBOL_CLOSINGS_TMP), 'w')
1916 Cleanup().register(temp(SYMBOL_CLOSINGS_TMP), pass5)
1918 # This keys of this dictionary are *source* cvs_paths for which
1919 # we've encountered an 'opening' on the default branch. The
1920 # values are the (uncleaned) symbolic names that this path has
1921 # opened.
1922 self.open_paths_with_default_branches = { }
1924 def log_revision(self, c_rev, svn_revnum):
1925 """Log any openings found in C_REV, and if C_REV.next_rev is not
1926 None, a closing. The opening uses SVN_REVNUM, but the closing (if
1927 any) will have its revnum determined later."""
1928 for name in c_rev.tags + c_rev.branches:
1929 self._note_default_branch_opening(c_rev, name)
1930 if c_rev.op != OP_DELETE:
1931 self._log(name, svn_revnum,
1932 c_rev.cvs_path, c_rev.branch_name, OPENING)
1934 # If our c_rev has a next_rev, then that's the closing rev for
1935 # this source revision. Log it to closings for later processing
1936 # since we don't know the svn_revnum yet.
1937 if c_rev.next_rev is not None:
1938 self.closings.write('%s %s\n' %
1939 (name, c_rev.unique_key(c_rev.next_rev)))
1941 def _log(self, name, svn_revnum, cvs_path, branch_name, type):
1942 """Write out a single line to the symbol_openings_closings file
1943 representing that SVN_REVNUM of SVN_PATH on BRANCH_NAME is either the
1944 opening or closing (TYPE) of NAME (a symbolic name).
1946 TYPE should only be one of the following global constants:
1947 OPENING or CLOSING."""
1948 # 8 places gives us 999,999,999 SVN revs. That *should* be enough.
1949 self.symbolings.write(
1950 '%s %.8d %s %s %s\n'
1951 % (name, svn_revnum, type, branch_name or '*', cvs_path))
1953 def close(self):
1954 """Iterate through the closings file, lookup the svn_revnum for
1955 each closing CVSRevision, and write a proper line out to the
1956 symbolings file."""
1957 # Use this to get the c_rev of our rev_key
1958 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_READ)
1960 self.closings.close()
1961 for line in fileinput.FileInput(temp(SYMBOL_CLOSINGS_TMP)):
1962 (name, rev_key) = line.rstrip().split(" ", 1)
1963 svn_revnum = Ctx()._persistence_manager.get_svn_revnum(rev_key)
1965 c_rev = cvs_revs_db.get_revision(rev_key)
1966 self._log(name, svn_revnum, c_rev.cvs_path, c_rev.branch_name, CLOSING)
1968 self.symbolings.close()
1970 def _note_default_branch_opening(self, c_rev, symbolic_name):
1971 """If C_REV is a default branch revision, log C_REV.cvs_path as an
1972 opening for SYMBOLIC_NAME."""
1973 self.open_paths_with_default_branches.setdefault(
1974 c_rev.cvs_path, []).append(symbolic_name)
1976 def log_default_branch_closing(self, c_rev, svn_revnum):
1977 """If self.open_paths_with_default_branches contains
1978 C_REV.cvs_path, then call log each name in
1979 self.open_paths_with_default_branches[C_REV.cvs_path] as a closing
1980 with SVN_REVNUM as the closing revision number."""
1981 path = c_rev.cvs_path
1982 if self.open_paths_with_default_branches.has_key(path):
1983 # log each symbol as a closing
1984 for name in self.open_paths_with_default_branches[path]:
1985 self._log(name, svn_revnum, path, None, CLOSING)
1986 # Remove them from the openings list as we're done with them.
1987 del self.open_paths_with_default_branches[path]
1990 class PersistenceManager:
1991 """The PersistenceManager allows us to effectively store SVNCommits
1992 to disk and retrieve them later using only their subversion revision
1993 number as the key. It also returns the subversion revision number
1994 for a given CVSRevision's unique key.
1996 All information pertinent to each SVNCommit is stored in a series of
1997 on-disk databases so that SVNCommits can be retrieved on-demand.
1999 MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
2000 In 'new' mode, PersistenceManager will initialize a new set of on-disk
2001 databases and be fully-featured.
2002 In 'read' mode, PersistenceManager will open existing on-disk databases
2003 and the set_* methods will be unavailable."""
2004 def __init__(self, mode):
2005 self.mode = mode
2006 if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
2007 raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
2008 self.svn2cvs_db = Database(temp(SVN_REVNUMS_TO_CVS_REVS), mode)
2009 Cleanup().register(temp(SVN_REVNUMS_TO_CVS_REVS), pass8)
2010 self.cvs2svn_db = Database(temp(CVS_REVS_TO_SVN_REVNUMS), mode)
2011 Cleanup().register(temp(CVS_REVS_TO_SVN_REVNUMS), pass8)
2012 self.svn_commit_names_dates = Database(temp(SVN_COMMIT_NAMES_DATES), mode)
2013 Cleanup().register(temp(SVN_COMMIT_NAMES_DATES), pass8)
2014 self.svn_commit_metadata = Database(temp(METADATA_DB), DB_OPEN_READ)
2015 self.cvs_revisions = CVSRevisionDatabase(DB_OPEN_READ)
2016 ###PERF kff Elsewhere there are comments about sucking the tags db
2017 ### into memory. That seems like a good idea.
2018 if not Ctx().trunk_only:
2019 self.tags_db = TagsDatabase(DB_OPEN_READ)
2020 self.motivating_revnums = SDatabase(temp(MOTIVATING_REVNUMS), mode)
2021 Cleanup().register(temp(MOTIVATING_REVNUMS), pass8)
2023 # "branch_name" -> svn_revnum in which branch was last filled.
2024 # This is used by CVSCommit._pre_commit, to prevent creating a fill
2025 # revision which would have nothing to do.
2026 self.last_filled = {}
2028 def get_svn_revnum(self, cvs_rev_unique_key):
2029 """Return the Subversion revision number in which
2030 CVS_REV_UNIQUE_KEY was committed, or SVN_INVALID_REVNUM if there
2031 is no mapping for CVS_REV_UNIQUE_KEY."""
2032 return int(self.cvs2svn_db.get(cvs_rev_unique_key, SVN_INVALID_REVNUM))
2034 def get_svn_commit(self, svn_revnum):
2035 """Return an SVNCommit that corresponds to SVN_REVNUM.
2037 If no SVNCommit exists for revnum SVN_REVNUM, then return None.
2039 This method can throw SVNCommitInternalInconsistencyError.
2041 svn_commit = SVNCommit("Retrieved from disk", svn_revnum)
2042 c_rev_keys = self.svn2cvs_db.get(str(svn_revnum), None)
2043 if c_rev_keys == None:
2044 return None
2046 digest = None
2047 for key in c_rev_keys:
2048 c_rev = self.cvs_revisions.get_revision(key)
2049 svn_commit.add_revision(c_rev)
2050 # Set the author and log message for this commit by using
2051 # CVSRevision metadata, but only if haven't done so already.
2052 if digest is None:
2053 digest = c_rev.digest
2054 author, log_msg = self.svn_commit_metadata[digest]
2055 svn_commit.set_author(author)
2056 svn_commit.set_log_msg(log_msg)
2058 # If we're doing a trunk-only conversion, we don't need to do any more
2059 # work.
2060 if Ctx().trunk_only:
2061 return svn_commit
2063 name, date = self._get_name_and_date(svn_revnum)
2064 if name:
2065 svn_commit.set_symbolic_name(name)
2066 svn_commit.set_date(date)
2067 if self.tags_db.has_key(name):
2068 svn_commit.is_tag = 1
2070 motivating_revnum = self.motivating_revnums.get(str(svn_revnum), None)
2071 if motivating_revnum:
2072 svn_commit.set_motivating_revnum(int(motivating_revnum))
2073 svn_commit.set_date(date)
2075 if len(svn_commit.cvs_revs) and name:
2076 raise SVNCommit.SVNCommitInternalInconsistencyError(
2077 "An SVNCommit cannot have cvs_revisions *and* a corresponding\n"
2078 "symbolic name ('%s') to fill."
2079 % (_clean_symbolic_name(name),))
2081 return svn_commit
2083 def set_cvs_revs(self, svn_revnum, cvs_revs):
2084 """Record the bidirectional mapping between SVN_REVNUM and
2085 CVS_REVS."""
2086 if self.mode == DB_OPEN_READ:
2087 raise RuntimeError, \
2088 'Write operation attempted on read-only PersistenceManager'
2089 for c_rev in cvs_revs:
2090 Log().write(LOG_VERBOSE, " ", c_rev.unique_key())
2091 self.svn2cvs_db[str(svn_revnum)] = [x.unique_key() for x in cvs_revs]
2092 for c_rev in cvs_revs:
2093 self.cvs2svn_db[c_rev.unique_key()] = svn_revnum
2095 def set_name_and_date(self, svn_revnum, name, date):
2096 """Associate symbolic name NAME and DATE with SVN_REVNUM.
2098 NAME is allowed to be None."""
2100 if self.mode == DB_OPEN_READ:
2101 raise RuntimeError, \
2102 'Write operation attempted on read-only PersistenceManager'
2103 self.svn_commit_names_dates[str(svn_revnum)] = (name, date)
2104 self.last_filled[name] = svn_revnum
2106 def _get_name_and_date(self, svn_revnum):
2107 """Return a tuple containing the symbolic name and date associated
2108 with SVN_REVNUM, or (None, None) if SVN_REVNUM has no such data
2109 associated with it."""
2110 return self.svn_commit_names_dates.get(str(svn_revnum), (None, None))
2112 def set_motivating_revnum(self, svn_revnum, motivating_revnum):
2113 """Store MOTIVATING_REVNUM as the value of SVN_REVNUM"""
2114 if self.mode == DB_OPEN_READ:
2115 raise RuntimeError, \
2116 'Write operation attempted on read-only PersistenceManager'
2117 self.motivating_revnums[str(svn_revnum)] = str(motivating_revnum)
2120 class CVSCommit:
2121 """Each instance of this class contains a number of CVS Revisions
2122 that correspond to one or more Subversion Commits. After all CVS
2123 Revisions are added to the grouping, calling process_revisions will
2124 generate a Subversion Commit (or Commits) for the set of CVS
2125 Revisions in the grouping."""
2127 def __init__(self, digest, author, log):
2128 self.digest = digest
2129 self.author = author
2130 self.log = log
2132 # Symbolic names for which the last source revision has already
2133 # been seen and for which the CVSRevisionAggregator has already
2134 # generated a fill SVNCommit. See self.process_revisions().
2135 self.done_symbols = [ ]
2137 self.files = { }
2138 # Lists of CVSRevisions
2139 self.changes = [ ]
2140 self.deletes = [ ]
2142 # Start out with a t_min higher than any incoming time T, and a
2143 # t_max lower than any incoming T. This way the first T will
2144 # push t_min down to T, and t_max up to T, naturally (without any
2145 # special-casing), and successive times will then ratchet them
2146 # outward as appropriate.
2147 self.t_min = 1L<<32
2148 self.t_max = 0
2150 # This will be set to the SVNCommit that occurs in self._commit.
2151 self.motivating_commit = None
2153 # This is a list of all non-primary commits motivated by the main
2154 # commit. We gather these so that we can set their dates to the
2155 # same date as the primary commit.
2156 self.secondary_commits = [ ]
2158 # State for handling default branches.
2160 # Here is a tempting, but ultimately nugatory, bit of logic, which
2161 # I share with you so you may appreciate the less attractive, but
2162 # refreshingly non-nugatory, logic which follows it:
2164 # If some of the commits in this txn happened on a non-trunk
2165 # default branch, then those files will have to be copied into
2166 # trunk manually after being changed on the branch (because the
2167 # RCS "default branch" appears as head, i.e., trunk, in practice).
2168 # As long as those copies don't overwrite any trunk paths that
2169 # were also changed in this commit, then we can do the copies in
2170 # the same revision, because they won't cover changes that don't
2171 # appear anywhere/anywhen else. However, if some of the trunk dst
2172 # paths *did* change in this commit, then immediately copying the
2173 # branch changes would lose those trunk mods forever. So in this
2174 # case, we need to do at least that copy in its own revision. And
2175 # for simplicity's sake, if we're creating the new revision for
2176 # even one file, then we just do all such copies together in the
2177 # new revision.
2179 # Doesn't that sound nice?
2181 # Unfortunately, Subversion doesn't support copies with sources
2182 # in the current txn. All copies must be based in committed
2183 # revisions. Therefore, we generate the above-described new
2184 # revision unconditionally.
2186 # This is a list of c_revs, and a c_rev is appended for each
2187 # default branch commit that will need to be copied to trunk (or
2188 # deleted from trunk) in some generated revision following the
2189 # "regular" revision.
2190 self.default_branch_cvs_revisions = [ ]
2192 def __cmp__(self, other):
2193 # Commits should be sorted by t_max. If both self and other have
2194 # the same t_max, break the tie using t_min, and lastly, digest
2195 return (cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
2196 or cmp(self.digest, other.digest))
2198 def has_file(self, fname):
2199 return self.files.has_key(fname)
2201 def revisions(self):
2202 return self.changes + self.deletes
2204 def opens_symbolic_name(self, name):
2205 """Returns true if any CVSRevision in this commit is on a tag or a
2206 branch or is the origin of a tag or branch."""
2207 for c_rev in self.revisions():
2208 if c_rev.opens_symbolic_name(name):
2209 return 1
2210 return 0
2212 def add_revision(self, c_rev):
2213 # Record the time range of this commit.
2215 # ### ISSUE: It's possible, though unlikely, that the time range
2216 # of a commit could get gradually expanded to be arbitrarily
2217 # longer than COMMIT_THRESHOLD. I'm not sure this is a huge
2218 # problem, and anyway deciding where to break it up would be a
2219 # judgement call. For now, we just print a warning in commit() if
2220 # this happens.
2221 if c_rev.timestamp < self.t_min:
2222 self.t_min = c_rev.timestamp
2223 if c_rev.timestamp > self.t_max:
2224 self.t_max = c_rev.timestamp
2226 if c_rev.op == OP_DELETE:
2227 self.deletes.append(c_rev)
2228 else:
2229 # OP_CHANGE or OP_ADD
2230 self.changes.append(c_rev)
2232 self.files[c_rev.fname] = 1
2234 def _pre_commit(self):
2235 """Generates any SVNCommits that must exist before the main
2236 commit."""
2238 # There may be multiple c_revs in this commit that would cause
2239 # branch B to be filled, but we only want to fill B once. On the
2240 # other hand, there might be multiple branches committed on in
2241 # this commit. Whatever the case, we should count exactly one
2242 # commit per branch, because we only fill a branch once per
2243 # CVSCommit. This list tracks which branches we've already
2244 # counted.
2245 accounted_for_sym_names = [ ]
2247 def fill_needed(c_rev, pm):
2248 """Return 1 if this is the first commit on a new branch (for
2249 this file) and we need to fill the branch; else return 0
2250 (meaning that some other file's first commit on the branch has
2251 already done the fill for us).
2253 If C_REV.op is OP_ADD, only return 1 if the branch that this
2254 commit is on has no last filled revision.
2256 PM is a PersistenceManager to query.
2259 # Different '.' counts indicate that c_rev is now on a different
2260 # line of development (and may need a fill)
2261 if c_rev.rev.count('.') != c_rev.prev_rev.count('.'):
2262 svn_revnum = pm.get_svn_revnum(c_rev.unique_key(c_rev.prev_rev))
2263 # It should be the case that when we have a file F that
2264 # is added on branch B (thus, F on trunk is in state
2265 # 'dead'), we generate an SVNCommit to fill B iff the branch
2266 # has never been filled before.
2268 # If this c_rev.op == OP_ADD, *and* the branch has never
2269 # been filled before, then fill it now. Otherwise, no need to
2270 # fill it.
2271 if c_rev.op == OP_ADD:
2272 if pm.last_filled.get(c_rev.branch_name, None) is None:
2273 return 1
2274 elif c_rev.op == OP_CHANGE:
2275 if svn_revnum > pm.last_filled.get(c_rev.branch_name, 0):
2276 return 1
2277 elif c_rev.op == OP_DELETE:
2278 if pm.last_filled.get(c_rev.branch_name, None) is None:
2279 return 1
2280 return 0
2282 for c_rev in self.changes + self.deletes:
2283 # If a commit is on a branch, we must ensure that the branch
2284 # path being committed exists (in HEAD of the Subversion
2285 # repository). If it doesn't exist, we will need to fill the
2286 # branch. After the fill, the path on which we're committing
2287 # will exist.
2288 if c_rev.branch_name \
2289 and c_rev.branch_name not in accounted_for_sym_names \
2290 and c_rev.branch_name not in self.done_symbols \
2291 and fill_needed(c_rev, Ctx()._persistence_manager):
2292 svn_commit = SVNCommit("pre-commit symbolic name '%s'"
2293 % c_rev.branch_name)
2294 svn_commit.set_symbolic_name(c_rev.branch_name)
2295 self.secondary_commits.append(svn_commit)
2296 accounted_for_sym_names.append(c_rev.branch_name)
2298 def _commit(self):
2299 """Generates the primary SVNCommit that corresponds to this
2300 CVSCommit."""
2301 # Generate an SVNCommit unconditionally. Even if the only change
2302 # in this CVSCommit is a deletion of an already-deleted file (that
2303 # is, a CVS revision in state 'dead' whose predecessor was also in
2304 # state 'dead'), the conversion will still generate a Subversion
2305 # revision containing the log message for the second dead
2306 # revision, because we don't want to lose that information.
2307 svn_commit = SVNCommit("commit")
2308 self.motivating_commit = svn_commit
2310 for c_rev in self.changes:
2311 svn_commit.add_revision(c_rev)
2312 # Only make a change if we need to. When 1.1.1.1 has an empty
2313 # deltatext, the explanation is almost always that we're looking
2314 # at an imported file whose 1.1 and 1.1.1.1 are identical. On
2315 # such imports, CVS creates an RCS file where 1.1 has the
2316 # content, and 1.1.1.1 has an empty deltatext, i.e, the same
2317 # content as 1.1. There's no reason to reflect this non-change
2318 # in the repository, so we want to do nothing in this case. (If
2319 # we were really paranoid, we could make sure 1.1's log message
2320 # is the CVS-generated "Initial revision\n", but I think the
2321 # conditions below are strict enough.)
2322 if not ((c_rev.deltatext_code == DELTATEXT_EMPTY)
2323 and (c_rev.rev == "1.1.1.1")):
2324 if c_rev.is_default_branch_revision():
2325 self.default_branch_cvs_revisions.append(c_rev)
2327 for c_rev in self.deletes:
2328 # When a file is added on a branch, CVS not only adds the file
2329 # on the branch, but generates a trunk revision (typically
2330 # 1.1) for that file in state 'dead'. We only want to add
2331 # this revision if the log message is not the standard cvs
2332 # fabricated log message.
2333 if c_rev.prev_rev is None:
2334 # c_rev.branches may be empty if the originating branch
2335 # has been excluded.
2336 if not c_rev.branches:
2337 continue
2338 cvs_generated_msg = ('file %s was initially added on branch %s.\n'
2339 % (c_rev.filename(),
2340 c_rev.branches[0]))
2341 author, log_msg = \
2342 Ctx()._persistence_manager.svn_commit_metadata[c_rev.digest]
2343 if log_msg == cvs_generated_msg:
2344 continue
2346 svn_commit.add_revision(c_rev)
2347 if c_rev.is_default_branch_revision():
2348 self.default_branch_cvs_revisions.append(c_rev)
2350 # There is a slight chance that we didn't actually register any
2351 # CVSRevisions with our SVNCommit (see loop over self.deletes
2352 # above), so if we have no CVSRevisions, we don't flush the
2353 # svn_commit to disk and roll back our revnum.
2354 if len(svn_commit.cvs_revs) > 0:
2355 svn_commit.flush()
2356 else:
2357 # We will not be flushing this SVNCommit, so rollback the
2358 # SVNCommit revision counter.
2359 SVNCommit.revnum = SVNCommit.revnum - 1
2361 if not Ctx().trunk_only:
2362 for c_rev in self.revisions():
2363 Ctx()._symbolings_logger.log_revision(c_rev, svn_commit.revnum)
2365 def _post_commit(self):
2366 """Generates any SVNCommits that we can perform now that _commit
2367 has happened. That is, handle non-trunk default branches.
2368 Sometimes an RCS file has a non-trunk default branch, so a commit
2369 on that default branch would be visible in a default CVS checkout
2370 of HEAD. If we don't copy that commit over to Subversion's trunk,
2371 then there will be no Subversion tree which corresponds to that
2372 CVS checkout. Of course, in order to copy the path over, we may
2373 first need to delete the existing trunk there. """
2375 # Only generate a commit if we have default branch revs
2376 if len(self.default_branch_cvs_revisions):
2377 # Generate an SVNCommit for all of our default branch c_revs.
2378 svn_commit = SVNCommit("post-commit default branch(es)")
2379 svn_commit.set_motivating_revnum(self.motivating_commit.revnum)
2380 for c_rev in self.default_branch_cvs_revisions:
2381 svn_commit.add_revision(c_rev)
2382 Ctx()._symbolings_logger.log_default_branch_closing(c_rev,
2383 svn_commit.revnum)
2384 self.secondary_commits.append(svn_commit)
2386 def process_revisions(self, done_symbols):
2387 """Process all the CVSRevisions that this instance has, creating
2388 one or more SVNCommits in the process. Generate fill SVNCommits
2389 only for symbols not in DONE_SYMBOLS (avoids unnecessary
2390 fills).
2392 Return the primary SVNCommit that corresponds to this CVSCommit.
2393 The returned SVNCommit is the commit that motivated any other
2394 SVNCommits generated in this CVSCommit."""
2395 self.done_symbols = done_symbols
2396 seconds = self.t_max - self.t_min + 1
2398 Log().write(LOG_VERBOSE, '-' * 60)
2399 Log().write(LOG_VERBOSE, 'CVS Revision grouping:')
2400 if seconds == 1:
2401 Log().write(LOG_VERBOSE, ' Start time: %s (duration: 1 second)'
2402 % time.ctime(self.t_max))
2403 else:
2404 Log().write(LOG_VERBOSE, ' Start time: %s' % time.ctime(self.t_min))
2405 Log().write(LOG_VERBOSE, ' End time: %s (duration: %d seconds)'
2406 % (time.ctime(self.t_max), seconds))
2408 if seconds > COMMIT_THRESHOLD + 1:
2409 Log().write(LOG_WARN, '%s: grouping spans more than %d seconds'
2410 % (warning_prefix, COMMIT_THRESHOLD))
2412 if Ctx().trunk_only: # Only do the primary commit if we're trunk-only
2413 self._commit()
2414 return self.motivating_commit
2416 self._pre_commit()
2417 self._commit()
2418 self._post_commit()
2420 for svn_commit in self.secondary_commits:
2421 svn_commit.set_date(self.motivating_commit.get_date())
2422 svn_commit.flush()
2424 return self.motivating_commit
2427 class SVNCommit:
2428 """This represents one commit to the Subversion Repository. There
2429 are three types of SVNCommits:
2431 1. Commits one or more CVSRevisions (cannot fill a symbolic name).
2433 2. Creates or fills a symbolic name (cannot commit CVSRevisions).
2435 3. Updates trunk to reflect the contents of a particular branch
2436 (this is to handle RCS default branches)."""
2438 # The revision number to assign to the next new SVNCommit.
2439 # We start at 2 because SVNRepositoryMirror uses the first commit
2440 # to create trunk, tags, and branches.
2441 revnum = 2
2443 class SVNCommitInternalInconsistencyError(Exception):
2444 """Exception raised if we encounter an impossible state in the
2445 SVNCommit Databases."""
2446 pass
2448 def __init__(self, description="", revnum=None, cvs_revs=None):
2449 """Instantiate an SVNCommit. DESCRIPTION is for debugging only.
2450 If REVNUM, the SVNCommit will correspond to that revision number;
2451 and if CVS_REVS, then they must be the exact set of CVSRevisions for
2452 REVNUM.
2454 It is an error to pass CVS_REVS without REVNUM, but you may pass
2455 REVNUM without CVS_REVS, and then add a revision at a time by
2456 invoking add_revision()."""
2457 self._description = description
2459 # Revprop metadata for this commit.
2461 # These initial values are placeholders. At least the log and the
2462 # date should be different by the time these are used.
2464 # They are private because their values should be returned encoded
2465 # in UTF8, but callers aren't required to set them in UTF8.
2466 # Therefore, accessor methods are used to set them, and
2467 # self.get_revprops() is used to to get them, in dictionary form.
2468 self._author = Ctx().username
2469 self._log_msg = "This log message means an SVNCommit was used too soon."
2470 self._max_date = 0 # Latest date seen so far.
2472 self.cvs_revs = cvs_revs or []
2473 if revnum:
2474 self.revnum = revnum
2475 else:
2476 self.revnum = SVNCommit.revnum
2477 SVNCommit.revnum = SVNCommit.revnum + 1
2479 # The (uncleaned) symbolic name that is filled in this SVNCommit, if any.
2480 self.symbolic_name = None
2482 # If this commit is a default branch synchronization, this
2483 # variable represents the subversion revision number of the
2484 # *primary* commit where the default branch changes actually
2485 # happened. It is None otherwise.
2487 # It is possible for multiple synchronization commits to refer to
2488 # the same motivating commit revision number, and it is possible
2489 # for a single synchronization commit to contain CVSRevisions on
2490 # multiple different default branches.
2491 self.motivating_revnum = None
2493 # is_tag is true only if this commit is a fill of a symbolic name
2494 # that is a tag, None in all other cases.
2495 self.is_tag = None
2497 def set_symbolic_name(self, symbolic_name):
2498 "Set self.symbolic_name to SYMBOLIC_NAME."
2499 self.symbolic_name = symbolic_name
2501 def set_motivating_revnum(self, revnum):
2502 "Set self.motivating_revnum to REVNUM."
2503 self.motivating_revnum = revnum
2505 def set_author(self, author):
2506 """Set this SVNCommit's author to AUTHOR (a locally-encoded string).
2507 This is the only way to set an SVNCommit's author."""
2508 self._author = author
2510 def set_log_msg(self, msg):
2511 """Set this SVNCommit's log message to MSG (a locally-encoded string).
2512 This is the only way to set an SVNCommit's log message."""
2513 self._log_msg = msg
2515 def set_date(self, date):
2516 """Set this SVNCommit's date to DATE (an integer).
2517 Note that self.add_revision() updates this automatically based on
2518 a CVSRevision; so you may not need to call this at all, and even
2519 if you do, the value may be overwritten by a later call to
2520 self.add_revision()."""
2521 self._max_date = date
2523 def get_date(self):
2524 """Returns this SVNCommit's date as an integer."""
2525 return self._max_date
2527 def get_revprops(self):
2528 """Return the Subversion revprops for this SVNCommit."""
2529 date = format_date(self._max_date)
2530 try:
2531 utf8_author = None
2532 if self._author is not None:
2533 utf8_author = to_utf8(self._author)
2534 utf8_log = to_utf8(self.get_log_msg())
2535 return { 'svn:author' : utf8_author,
2536 'svn:log' : utf8_log,
2537 'svn:date' : date }
2538 except UnicodeError:
2539 Log().write(LOG_WARN, '%s: problem encoding author or log message:'
2540 % warning_prefix)
2541 Log().write(LOG_WARN, " author: '%s'" % self._author)
2542 Log().write(LOG_WARN, " log: '%s'" % self.get_log_msg().rstrip())
2543 Log().write(LOG_WARN, " date: '%s'" % date)
2544 Log().write(LOG_WARN,
2545 "(subversion rev %s) Related files:" % self.revnum)
2546 for c_rev in self.cvs_revs:
2547 Log().write(LOG_WARN, " ", c_rev.fname)
2549 Log().write(LOG_WARN, "Consider rerunning with (for example)",
2550 "'--encoding=latin1'.\n")
2551 # It's better to fall back to the original (unknown encoding) data
2552 # than to either 1) quit or 2) record nothing at all.
2553 return { 'svn:author' : self._author,
2554 'svn:log' : self.get_log_msg(),
2555 'svn:date' : date }
2557 def add_revision(self, cvs_rev):
2558 self.cvs_revs.append(cvs_rev)
2559 if cvs_rev.timestamp > self._max_date:
2560 self._max_date = cvs_rev.timestamp
2562 def _is_primary_commit(self):
2563 """Return true if this is a primary SVNCommit, false otherwise."""
2564 return not (self.symbolic_name or self.motivating_revnum)
2566 def flush(self):
2567 Log().write(LOG_NORMAL, "Creating Subversion r%d (%s)"
2568 % (self.revnum, self._description))
2569 Ctx()._persistence_manager.set_cvs_revs(self.revnum, self.cvs_revs)
2571 if self.motivating_revnum is not None:
2572 Ctx()._persistence_manager.set_motivating_revnum(self.revnum,
2573 self.motivating_revnum)
2575 # If we're not a primary commit, then store our date and/or our
2576 # symbolic_name
2577 if not self._is_primary_commit():
2578 Ctx()._persistence_manager.set_name_and_date(
2579 self.revnum, self.symbolic_name, self._max_date)
2581 def __str__(self):
2582 """ Print a human-readable description of this SVNCommit. This
2583 description is not intended to be machine-parseable (although
2584 we're not going to stop you if you try!)"""
2586 ret = "SVNCommit #: " + str(self.revnum) + "\n"
2587 if self.symbolic_name:
2588 ret += (" symbolic name: " + _clean_symbolic_name(self.symbolic_name)
2589 + "\n")
2590 else:
2591 ret += " NO symbolic name\n"
2592 ret += " debug description: " + self._description + "\n"
2593 ret += " cvs_revs:\n"
2594 for c_rev in self.cvs_revs:
2595 ret += " " + c_rev.unique_key() + "\n"
2596 return ret
2598 def get_log_msg(self):
2599 """Returns the actual log message for a primary commit, and the
2600 appropriate manufactured log message for a secondary commit."""
2601 if self.symbolic_name is not None:
2602 return self._log_msg_for_symbolic_name_commit()
2603 elif self.motivating_revnum is not None:
2604 return self._log_msg_for_default_branch_commit()
2605 else:
2606 return self._log_msg
2608 def _log_msg_for_symbolic_name_commit(self):
2609 """Creates a log message for a manufactured commit that fills
2610 self.symbolic_name. If self.is_tag is true, write the log message
2611 as though for a tag, else write it as though for a branch."""
2612 type = 'branch'
2613 if self.is_tag:
2614 type = 'tag'
2616 # In Python 2.2.3, we could use textwrap.fill(). Oh well :-).
2617 space_or_newline = ' '
2618 cleaned_symbolic_name = _clean_symbolic_name(self.symbolic_name)
2619 if len(cleaned_symbolic_name) >= 13:
2620 space_or_newline = '\n'
2622 return "This commit was manufactured by cvs2svn to create %s%s'%s'." \
2623 % (type, space_or_newline, cleaned_symbolic_name)
2625 def _log_msg_for_default_branch_commit(self):
2626 """Creates a log message for a manufactured commit that
2627 synchronizes a non-trunk default branch with trunk."""
2628 msg = 'This commit was generated by cvs2svn to compensate for ' \
2629 'changes in r%d,\n' \
2630 'which included commits to RCS files with non-trunk default ' \
2631 'branches.\n' % self.motivating_revnum
2632 return msg
2634 class CVSRevisionAggregator:
2635 """This class groups CVSRevisions into CVSCommits that represent
2636 at least one SVNCommit."""
2637 def __init__(self):
2638 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_READ)
2639 if not Ctx().trunk_only:
2640 self.last_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB),
2641 DB_OPEN_READ)
2643 # A map { key : CVSCommit } of CVS commits currently being
2644 # accumulated. If the CVSCommit is still open to further
2645 # CVSRevisions, then key is CVSRevision.digest. If not (because
2646 # an inbound commit wanted to affect a file that was already
2647 # within the CVSCommit), then key is CVSRevision.digest plus some
2648 # number of appended '-'.
2649 self.cvs_commits = {}
2651 # A map { symbol : None } of symbolic names for which the last
2652 # source CVSRevision has already been processed but which haven't
2653 # been closed yet.
2654 self.pending_symbols = {}
2656 # A list of closed symbols. That is, we've already encountered
2657 # the last CVSRevision that is a source for that symbol, the final
2658 # fill for this symbol has been done, and we never need to fill it
2659 # again.
2660 self.done_symbols = [ ]
2662 # This variable holds the most recently created primary svn_commit
2663 # object. CVSRevisionAggregator maintains this variable merely
2664 # for its date, so that it can set dates for the SVNCommits
2665 # created in self._attempt_to_commit_symbols().
2666 self.latest_primary_svn_commit = None
2668 Ctx()._symbolings_logger = SymbolingsLogger()
2669 Ctx()._persistence_manager = PersistenceManager(DB_OPEN_NEW)
2670 Ctx()._default_branches_db = SDatabase(temp(DEFAULT_BRANCHES_DB),
2671 DB_OPEN_READ)
2673 def _extract_ready_commits(self, timestamp):
2674 """Extract and return any active commits that expire by TIMESTAMP."""
2676 ready_queue = [ ]
2677 for digest_key, cvs_commit in self.cvs_commits.items():
2678 if cvs_commit.t_max + COMMIT_THRESHOLD < timestamp:
2679 ready_queue.append(cvs_commit)
2680 del self.cvs_commits[digest_key]
2681 return ready_queue
2683 def process_revision(self, c_rev):
2684 # Each time we read a new line, scan the accumulating commits to
2685 # see if any are ready for processing.
2686 ready_queue = self._extract_ready_commits(c_rev.timestamp)
2688 for digest_key, cvs_commit in self.cvs_commits.items():
2689 # If the inbound commit is on the same file as a pending commit,
2690 # close the pending commit to further changes. Don't flush it though,
2691 # as there may be other pending commits dated before this one.
2692 # ### ISSUE: the has_file() check below is not optimal.
2693 # It does fix the dataloss bug where revisions would get lost
2694 # if checked in too quickly, but it can also break apart the
2695 # commits. The correct fix would require tracking the dependencies
2696 # between change sets and committing them in proper order.
2697 if cvs_commit.has_file(c_rev.fname):
2698 unused_id = digest_key + '-'
2699 # Find a string that does is not already a key in
2700 # the self.cvs_commits dict
2701 while self.cvs_commits.has_key(unused_id):
2702 unused_id = unused_id + '-'
2703 self.cvs_commits[unused_id] = cvs_commit
2704 del self.cvs_commits[digest_key]
2706 # Add this item into the set of still-available commits.
2707 if self.cvs_commits.has_key(c_rev.digest):
2708 cvs_commit = self.cvs_commits[c_rev.digest]
2709 else:
2710 author, log = self.metadata_db[c_rev.digest]
2711 cvs_commit = CVSCommit(c_rev.digest, author, log)
2712 self.cvs_commits[c_rev.digest] = cvs_commit
2713 cvs_commit.add_revision(c_rev)
2715 if ready_queue:
2716 # Any elements in the ready_queue at this point need to be
2717 # processed, because this latest rev couldn't possibly be part
2718 # of any of them. Sort them into time-order, then process 'em.
2719 ready_queue.sort()
2721 while ready_queue:
2722 cvs_commit = ready_queue.pop(0)
2723 self.latest_primary_svn_commit = \
2724 cvs_commit.process_revisions(self.done_symbols)
2725 self._add_pending_symbols(c_rev)
2726 self._attempt_to_commit_symbols(ready_queue)
2727 else:
2728 # Make sure we _add_pending_symbols() for this c_rev and
2729 # _attempt_to_commit_symbols(), even if no commits are ready.
2730 self._add_pending_symbols(c_rev)
2731 self._attempt_to_commit_symbols(ready_queue)
2733 def flush(self):
2734 """Commit anything left in self.cvs_commits. Then inform the
2735 SymbolingsLogger that all commits are done."""
2737 ready_queue = [ ]
2738 for k, v in self.cvs_commits.items():
2739 ready_queue.append((v, k))
2741 ready_queue.sort()
2742 while ready_queue:
2743 (cvs_commit, key) = ready_queue.pop(0)
2744 self.latest_primary_svn_commit = \
2745 cvs_commit.process_revisions(self.done_symbols)
2746 del self.cvs_commits[key]
2747 self._attempt_to_commit_symbols([])
2749 if not Ctx().trunk_only:
2750 Ctx()._symbolings_logger.close()
2752 def _add_pending_symbols(self, c_rev):
2753 """Add to self.pending_symbols any symbols from C_REV for which
2754 C_REV is the last CVSRevision.
2756 If we're not doing a trunk-only conversion, get the symbolic names
2757 that this c_rev is the last *source* CVSRevision for and add them
2758 to those left over from previous passes through the aggregator."""
2760 if not Ctx().trunk_only:
2761 for sym in self.last_revs_db.get(c_rev.unique_key(), []):
2762 self.pending_symbols[sym] = None
2764 def _attempt_to_commit_symbols(self, queued_commits):
2765 """Generate one SVNCommit for each symbol in self.pending_symbols
2766 that doesn't have an opening CVSRevision in either QUEUED_COMMITS
2767 or self.cvs_commits.values()."""
2769 # Make a list of all symbols from self.pending_symbols that do not
2770 # have *source* CVSRevisions in the pending commit queue
2771 # (self.cvs_commits) or in queued_commits:
2772 closeable_symbols = []
2773 for sym in self.pending_symbols:
2774 for cvs_commit in self.cvs_commits.values() + queued_commits:
2775 if cvs_commit.opens_symbolic_name(sym):
2776 break
2777 else:
2778 closeable_symbols.append(sym)
2780 # Sort the closeable symbols so that we will always process the
2781 # symbols in the same order, regardless of the order in which the
2782 # dict hashing algorithm hands them back to us. We do this so
2783 # that our tests will get the same results on all platforms.
2784 closeable_symbols.sort()
2785 for sym in closeable_symbols:
2786 svn_commit = SVNCommit("closing tag/branch '%s'" % sym)
2787 svn_commit.set_symbolic_name(sym)
2788 svn_commit.set_date(self.latest_primary_svn_commit.get_date())
2789 svn_commit.flush()
2790 self.done_symbols.append(sym)
2791 del self.pending_symbols[sym]
2794 class SymbolingsReader:
2795 """Provides an interface to the SYMBOL_OPENINGS_CLOSINGS_SORTED file
2796 and the SYMBOL_OFFSETS_DB. Does the heavy lifting of finding and
2797 returning the correct opening and closing Subversion revision
2798 numbers for a given symbolic name."""
2799 def __init__(self):
2800 """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and
2801 reads the offsets database into memory."""
2802 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
2803 # The offsets_db is really small, and we need to read and write
2804 # from it a fair bit, so suck it into memory
2805 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_READ)
2806 self.offsets = { }
2807 for key in offsets_db.db.keys():
2808 #print " ZOO:", key, offsets_db[key]
2809 self.offsets[key] = offsets_db[key]
2811 def filling_guide_for_symbol(self, symbolic_name, svn_revnum):
2812 """Given SYMBOLIC_NAME and SVN_REVNUM, return a new
2813 SymbolicNameFillingGuide object.
2815 Note that if we encounter an opening rev in this fill, but the
2816 corresponding closing rev takes place later than SVN_REVNUM, the
2817 closing will not be passed to SymbolicNameFillingGuide in this
2818 fill (and will be discarded when encountered in a later fill).
2819 This is perfectly fine, because we can still do a valid fill
2820 without the closing--we always try to fill what we can as soon as
2821 we can."""
2823 openings_closings_map = OpeningsClosingsMap(symbolic_name)
2825 # It's possible to have a branch start with a file that was added
2826 # on a branch
2827 if self.offsets.has_key(symbolic_name):
2828 # set our read offset for self.symbolings to the offset for
2829 # symbolic_name
2830 self.symbolings.seek(self.offsets[symbolic_name])
2832 while 1:
2833 fpos = self.symbolings.tell()
2834 line = self.symbolings.readline().rstrip()
2835 if not line:
2836 break
2837 name, revnum, type, branch_name, cvs_path = line.split(" ", 4)
2838 if branch_name == '*':
2839 svn_path = Ctx().project.make_trunk_path(cvs_path)
2840 else:
2841 svn_path = Ctx().project.make_branch_path(branch_name, cvs_path)
2842 revnum = int(revnum)
2843 if revnum > svn_revnum or name != symbolic_name:
2844 break
2845 openings_closings_map.register(svn_path, revnum, type)
2847 # get current offset of the read marker and set it to the offset
2848 # for the beginning of the line we just read if we used anything
2849 # we read.
2850 if not openings_closings_map.is_empty():
2851 self.offsets[symbolic_name] = fpos
2853 return SymbolicNameFillingGuide(openings_closings_map)
2856 class SvnRevisionRange:
2857 """The range of subversion revision numbers from which a path can be
2858 copied. self.opening_revnum is the number of the earliest such
2859 revision, and self.closing_revnum is one higher than the number of
2860 the last such revision. If self.closing_revnum is None, then no
2861 closings were registered."""
2863 def __init__(self, opening_revnum):
2864 self.opening_revnum = opening_revnum
2865 self.closing_revnum = None
2867 def add_closing(self, closing_revnum):
2868 # When we have a non-trunk default branch, we may have multiple
2869 # closings--only register the first closing we encounter.
2870 if self.closing_revnum is None:
2871 self.closing_revnum = closing_revnum
2873 def __str__(self):
2874 if self.closing_revnum is None:
2875 return '[%d:]' % (self.opening_revnum,)
2876 else:
2877 return '[%d:%d]' % (self.opening_revnum, self.closing_revnum,)
2880 class OpeningsClosingsMap:
2881 """A dictionary of openings and closings for a symbolic name in the
2882 current SVNCommit.
2884 The user should call self.register() for the openings and closings,
2885 then self.get_node_tree() to retrieve the information as a
2886 SymbolicNameFillingGuide."""
2888 def __init__(self, symbolic_name):
2889 """Initialize OpeningsClosingsMap and prepare it for receiving
2890 openings and closings."""
2892 self.name = symbolic_name
2894 # A dictionary of SVN_PATHS to SvnRevisionRange objects.
2895 self.things = { }
2897 def register(self, svn_path, svn_revnum, type):
2898 """Register an opening or closing revision for this symbolic name.
2899 SVN_PATH is the source path that needs to be copied into
2900 self.symbolic_name, and SVN_REVNUM is either the first svn
2901 revision number that we can copy from (our opening), or the last
2902 (not inclusive) svn revision number that we can copy from (our
2903 closing). TYPE indicates whether this path is an opening or a a
2904 closing.
2906 The opening for a given SVN_PATH must be passed before the closing
2907 for it to have any effect... any closing encountered before a
2908 corresponding opening will be discarded.
2910 It is not necessary to pass a corresponding closing for every
2911 opening.
2913 # Always log an OPENING
2914 if type == OPENING:
2915 self.things[svn_path] = SvnRevisionRange(svn_revnum)
2916 # Only log a closing if we've already registered the opening for that
2917 # path.
2918 elif type == CLOSING and self.things.has_key(svn_path):
2919 self.things[svn_path].add_closing(svn_revnum)
2921 def is_empty(self):
2922 """Return true if we haven't accumulated any openings or closings,
2923 false otherwise."""
2924 return not len(self.things)
2926 def get_things(self):
2927 """Return a list of (svn_path, SvnRevisionRange) tuples for all
2928 svn_paths with registered openings or closings."""
2930 return self.things.items()
2933 class SymbolicNameFillingGuide:
2934 """A node tree representing the source paths to be copied to fill
2935 self.symbolic_name in the current SVNCommit.
2937 self._node_tree is the root of the directory tree, in the form {
2938 path_component : subnode }. Leaf nodes are instances of
2939 SvnRevisionRange. Intermediate (directory) nodes are dictionaries
2940 mapping relative names to subnodes.
2942 By walking self._node_tree and calling self.get_best_revnum() on
2943 each node, the caller can determine what subversion revision number
2944 to copy the path corresponding to that node from. self._node_tree
2945 should be treated as read-only.
2947 The caller can then descend to sub-nodes to see if their "best
2948 revnum" differs from their parents' and if it does, take appropriate
2949 actions to "patch up" the subtrees."""
2951 def __init__(self, openings_closings_map):
2952 """Initializes a SymbolicNameFillingGuide for SYMBOLIC_NAME and
2953 store into it the openings and closings from
2954 OPENINGS_CLOSINGS_MAP."""
2956 self.name = openings_closings_map.name
2958 # The dictionary that holds our node tree as a map { node_key :
2959 # node }.
2960 self._node_tree = { }
2962 for svn_path, svn_revision_range in openings_closings_map.get_things():
2963 (head, tail) = _path_split(svn_path)
2964 self._get_node_for_path(head)[tail] = svn_revision_range
2966 #self.print_node_tree(self._node_tree)
2968 def _get_node_for_path(self, svn_path):
2969 """Return the node key for svn_path, creating new nodes as needed."""
2970 # Walk down the path, one node at a time.
2971 node = self._node_tree
2972 for component in svn_path.split('/'):
2973 if node.has_key(component):
2974 node = node[component]
2975 else:
2976 old_node = node
2977 node = {}
2978 old_node[component] = node
2980 return node
2982 def get_best_revnum(self, node, preferred_revnum):
2983 """Determine the best subversion revision number to use when
2984 copying the source tree beginning at NODE. Returns a
2985 subversion revision number.
2987 PREFERRED_REVNUM is passed to best_rev and used to calculate the
2988 best_revnum."""
2990 def score_revisions(svn_revision_ranges):
2991 """Return a list of revisions and scores based on
2992 SVN_REVISION_RANGES. The returned list looks like:
2994 [(REV1 SCORE1), (REV2 SCORE2), ...]
2996 where the tuples are sorted by revision number.
2997 SVN_REVISION_RANGES is a list of SvnRevisionRange objects.
2999 For each svn revision that appears as either an opening_revnum
3000 or closing_revnum for one of the svn_revision_ranges, output a
3001 tuple indicating how many of the SvnRevisionRanges include that
3002 svn_revision in its range. A score thus indicates that copying
3003 the corresponding revision (or any following revision up to the
3004 next revision in the list) of the object in question would yield
3005 that many correct paths at or underneath the object. There may
3006 be other paths underneath it which are not correct and would
3007 need to be deleted or recopied; those can only be detected by
3008 descending and examining their scores.
3010 If OPENINGS is empty, return the empty list."""
3011 openings = [ x.opening_revnum
3012 for x in svn_revision_ranges ]
3013 closings = [ x.closing_revnum
3014 for x in svn_revision_ranges
3015 if x.closing_revnum is not None ]
3017 # First look for easy out.
3018 if not openings:
3019 return []
3021 # Create a list with both openings (which increment the total)
3022 # and closings (which decrement the total):
3023 things = [(rev,1) for rev in openings] + [(rev,-1) for rev in closings]
3024 # Sort by revision number:
3025 things.sort()
3026 # Initialize output list with zeroth element of things. This
3027 # element must exist, because it was already verified that
3028 # openings is not empty.
3029 scores = [ things[0] ]
3030 total = scores[-1][1]
3031 for (rev, change) in things[1:]:
3032 total += change
3033 if rev == scores[-1][0]:
3034 # Same revision as last entry; modify last entry:
3035 scores[-1] = (rev, total)
3036 else:
3037 # Previously-unseen revision; create new entry:
3038 scores.append((rev, total))
3039 return scores
3041 def best_rev(scores, preferred_rev):
3042 """Return the revision with the highest score from SCORES, a list
3043 returned by score_revisions(). When the maximum score is shared
3044 by multiple revisions, the oldest revision is selected, unless
3045 PREFERRED_REV is one of the possibilities, in which case, it is
3046 selected."""
3047 max_score = 0
3048 preferred_rev_score = -1
3049 rev = SVN_INVALID_REVNUM
3050 if preferred_rev is None:
3051 # Comparison order of different types is arbitrary. Do not
3052 # expect None to compare less than int values below.
3053 preferred_rev = SVN_INVALID_REVNUM
3054 for revnum, count in scores:
3055 if count > max_score:
3056 max_score = count
3057 rev = revnum
3058 if revnum <= preferred_rev:
3059 preferred_rev_score = count
3060 if preferred_rev_score == max_score:
3061 rev = preferred_rev
3062 return rev, max_score
3064 # Aggregate openings and closings from the rev tree
3065 svn_revision_ranges = self._list_revnums(node)
3067 # Score the lists
3068 scores = score_revisions(svn_revision_ranges)
3070 revnum, max_score = best_rev(scores, preferred_revnum)
3072 if revnum == SVN_INVALID_REVNUM:
3073 raise FatalError("failed to find a revision "
3074 + "to copy from when copying %s" % name)
3075 return revnum, max_score
3077 def _list_revnums(self, node):
3078 """Return a list of all the SvnRevisionRanges (including
3079 duplicates) for all leaf nodes at and under NODE."""
3081 if isinstance(node, SvnRevisionRange):
3082 # It is a leaf node.
3083 return [ node ]
3084 else:
3085 # It is an intermediate node.
3086 revnums = []
3087 for key, subnode in node.items():
3088 revnums.extend(self._list_revnums(subnode))
3089 return revnums
3091 def get_sources(self):
3092 """Return the list of sources for this symbolic name.
3094 The Project instance defines what are legitimate sources. Raise
3095 an exception if a change occurred outside of the source
3096 directories."""
3098 return self._get_sub_sources('', self._node_tree)
3100 def _get_sub_sources(self, start_svn_path, start_node):
3101 """Return the list of sources for this symbolic name, starting the
3102 search at path START_SVN_PATH, which is node START_NODE. This is
3103 a helper method, called by get_sources() (see)."""
3105 project = Ctx().project
3106 if isinstance(start_node, SvnRevisionRange):
3107 # This implies that a change was found outside of the
3108 # legitimate sources. This should never happen.
3109 raise
3110 elif project.is_source(start_svn_path):
3111 # This is a legitimate source. Add it to list.
3112 return [ FillSource(start_svn_path, start_node) ]
3113 else:
3114 # This is a directory that is not a legitimate source. (That's
3115 # OK because it hasn't changed directly.) But directories
3116 # within it have been changed, so we need to search recursively
3117 # to find their enclosing sources.
3118 sources = []
3119 for entry, node in start_node.items():
3120 svn_path = _path_join(start_svn_path, entry)
3121 sources.extend(self._get_sub_sources(svn_path, node))
3123 return sources
3125 def print_node_tree(self, node, name='/', indent_depth=0):
3126 """For debugging purposes. Prints all nodes in TREE that are
3127 rooted at NODE. INDENT_DEPTH is used to indent the output of
3128 recursive calls."""
3129 if not indent_depth:
3130 print "TREE", "=" * 75
3131 if isinstance(node, SvnRevisionRange):
3132 print "TREE:", " " * (indent_depth * 2), name, node
3133 else:
3134 print "TREE:", " " * (indent_depth * 2), name
3135 for key, value in node.items():
3136 self.print_node_tree(value, key, (indent_depth + 1))
3139 class FillSource:
3140 """Representation of a fill source used by the symbol filler in
3141 SVNRepositoryMirror."""
3142 def __init__(self, prefix, node):
3143 """Create an unscored fill source with a prefix and a key."""
3144 self.prefix = prefix
3145 self.node = node
3146 self.score = None
3147 self.revnum = None
3149 def set_score(self, score, revnum):
3150 """Set the SCORE and REVNUM."""
3151 self.score = score
3152 self.revnum = revnum
3154 def __cmp__(self, other):
3155 """Comparison operator used to sort FillSources in descending
3156 score order."""
3157 if self.score is None or other.score is None:
3158 raise TypeError, 'Tried to compare unscored FillSource'
3159 return cmp(other.score, self.score)
3162 class SVNRepositoryMirror:
3163 """Mirror a Subversion Repository as it is constructed, one
3164 SVNCommit at a time. The mirror is skeletal; it does not contain
3165 file contents. The creation of a dumpfile or Subversion repository
3166 is handled by delegates. See self.add_delegate method for how to
3167 set delegates.
3169 The structure of the repository is kept in two databases and one
3170 hash. The revs_db database maps revisions to root node keys, and
3171 the nodes_db database maps node keys to nodes. A node is a hash
3172 from directory names to keys. Both the revs_db and the nodes_db are
3173 stored on disk and each access is expensive.
3175 The nodes_db database only has the keys for old revisions. The
3176 revision that is being contructed is kept in memory in the new_nodes
3177 hash which is cheap to access.
3179 You must invoke _start_commit between SVNCommits.
3181 *** WARNING *** All path arguments to methods in this class CANNOT
3182 have leading or trailing slashes.
3185 class SVNRepositoryMirrorPathExistsError(Exception):
3186 """Exception raised if an attempt is made to add a path to the
3187 repository mirror and that path already exists in the youngest
3188 revision of the repository."""
3189 pass
3191 class SVNRepositoryMirrorUnexpectedOperationError(Exception):
3192 """Exception raised if a CVSRevision is found to have an unexpected
3193 operation (OP) value."""
3194 pass
3196 class SVNRepositoryMirrorInvalidFillOperationError(Exception):
3197 """Exception raised if an empty SymbolicNameFillingGuide is returned
3198 during a fill where the branch in question already exists."""
3199 pass
3201 def __init__(self):
3202 """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""
3203 self.delegates = [ ]
3205 # This corresponds to the 'revisions' table in a Subversion fs.
3206 self.revs_db = SDatabase(temp(SVN_MIRROR_REVISIONS_DB), DB_OPEN_NEW)
3207 Cleanup().register(temp(SVN_MIRROR_REVISIONS_DB), pass8)
3209 # This corresponds to the 'nodes' table in a Subversion fs. (We
3210 # don't need a 'representations' or 'strings' table because we
3211 # only track metadata, not file contents.)
3212 self.nodes_db = Database(temp(SVN_MIRROR_NODES_DB), DB_OPEN_NEW)
3213 Cleanup().register(temp(SVN_MIRROR_NODES_DB), pass8)
3215 # Start at revision 0 without a root node. It will be created
3216 # by _open_writable_root_node.
3217 self.youngest = 0
3218 self.new_root_key = None
3219 self.new_nodes = { }
3221 if not Ctx().trunk_only:
3222 ###PERF IMPT: Suck this into memory.
3223 self.tags_db = TagsDatabase(DB_OPEN_READ)
3224 self.symbolings_reader = SymbolingsReader()
3226 def _initialize_repository(self, date):
3227 """Initialize the repository by creating the directories for
3228 trunk, tags, and branches. This method should only be called
3229 after all delegates are added to the repository mirror."""
3230 # Make a 'fake' SVNCommit so we can take advantage of the revprops
3231 # magic therein
3232 svn_commit = SVNCommit("Initialization", 1)
3233 svn_commit.set_date(date)
3234 svn_commit.set_log_msg("New repository initialized by cvs2svn.")
3236 self._start_commit(svn_commit)
3237 self._mkdir(Ctx().project.trunk_path)
3238 if not Ctx().trunk_only:
3239 self._mkdir(Ctx().project.branches_path)
3240 self._mkdir(Ctx().project.tags_path)
3242 def _start_commit(self, svn_commit):
3243 """Start a new commit."""
3244 if self.youngest > 0:
3245 self._end_commit()
3247 self.youngest = svn_commit.revnum
3248 self.new_root_key = None
3249 self.new_nodes = { }
3251 self._invoke_delegates('start_commit', svn_commit)
3253 def _end_commit(self):
3254 """Called at the end of each commit. This method copies the newly
3255 created nodes to the on-disk nodes db."""
3256 if self.new_root_key is None:
3257 # No changes were made in this revision, so we make the root node
3258 # of the new revision be the same as the last one.
3259 self.revs_db[str(self.youngest)] = self.revs_db[str(self.youngest - 1)]
3260 else:
3261 self.revs_db[str(self.youngest)] = self.new_root_key
3262 # Copy the new nodes to the nodes_db
3263 for key, value in self.new_nodes.items():
3264 self.nodes_db[key] = value
3266 def _get_node(self, key):
3267 """Returns the node contents for KEY which may refer to either
3268 self.nodes_db or self.new_nodes."""
3269 if self.new_nodes.has_key(key):
3270 return self.new_nodes[key]
3271 else:
3272 return self.nodes_db[key]
3274 def _open_readonly_node(self, path, revnum):
3275 """Open a readonly node for PATH at revision REVNUM. Returns the
3276 node key and node contents if the path exists, else (None, None)."""
3277 # Get the root key
3278 if revnum == self.youngest:
3279 if self.new_root_key is None:
3280 node_key = self.revs_db[str(self.youngest - 1)]
3281 else:
3282 node_key = self.new_root_key
3283 else:
3284 node_key = self.revs_db[str(revnum)]
3286 for component in path.split('/'):
3287 node_contents = self._get_node(node_key)
3288 node_key = node_contents.get(component, None)
3289 if node_key is None:
3290 return None
3292 return node_key
3294 def _open_writable_root_node(self):
3295 """Open a writable root node. The current root node is returned
3296 immeditely if it is already writable. If not, create a new one by
3297 copying the contents of the root node of the previous version."""
3298 if self.new_root_key is not None:
3299 return self.new_root_key, self.new_nodes[self.new_root_key]
3301 if self.youngest < 2:
3302 new_contents = { }
3303 else:
3304 new_contents = self.nodes_db[self.revs_db[str(self.youngest - 1)]]
3305 self.new_root_key = gen_key()
3306 self.new_nodes = { self.new_root_key: new_contents }
3308 return self.new_root_key, new_contents
3310 def _open_writable_node(self, svn_path, create):
3311 """Open a writable node for the path SVN_PATH, creating SVN_PATH
3312 and any missing directories if CREATE is True."""
3313 parent_key, parent_contents = self._open_writable_root_node()
3315 # Walk up the path, one node at a time.
3316 path_so_far = None
3317 components = svn_path.split('/')
3318 for i in range(len(components)):
3319 component = components[i]
3320 path_so_far = _path_join(path_so_far, component)
3321 this_key = parent_contents.get(component, None)
3322 if this_key is not None:
3323 # The component exists.
3324 this_contents = self.new_nodes.get(this_key, None)
3325 if this_contents is None:
3326 # Suck the node from the nodes_db, but update the key
3327 this_contents = self.nodes_db[this_key]
3328 this_key = gen_key()
3329 self.new_nodes[this_key] = this_contents
3330 parent_contents[component] = this_key
3331 elif create:
3332 # The component does not exists, so we create it.
3333 this_contents = { }
3334 this_key = gen_key()
3335 self.new_nodes[this_key] = this_contents
3336 parent_contents[component] = this_key
3337 if i < len(components) - 1:
3338 self._invoke_delegates('mkdir', path_so_far)
3339 else:
3340 # The component does not exists and we are not instructed to
3341 # create it, so we give up.
3342 return None, None
3344 parent_key = this_key
3345 parent_contents = this_contents
3347 return this_key, this_contents
3349 def _path_exists(self, path):
3350 """If PATH exists in self.youngest of the svn repository mirror,
3351 return true, else return None.
3353 PATH must not start with '/'."""
3354 return self._open_readonly_node(path, self.youngest) is not None
3356 def _fast_delete_path(self, parent_path, parent_contents, component):
3357 """Delete COMPONENT from the parent direcory PARENT_PATH with the
3358 contents PARENT_CONTENTS. Do nothing if COMPONENT does not exist
3359 in PARENT_CONTENTS."""
3360 if parent_contents.has_key(component):
3361 del parent_contents[component]
3362 self._invoke_delegates('delete_path',
3363 _path_join(parent_path, component))
3365 def _delete_path(self, svn_path, should_prune=False):
3366 """Delete PATH from the tree. If SHOULD_PRUNE is true, then delete
3367 all ancestor directories that are made empty when SVN_PATH is deleted.
3368 In other words, SHOULD_PRUNE is like the -P option to 'cvs checkout'.
3370 NOTE: This function ignores requests to delete the root directory
3371 or any directory for which Ctx().project.is_unremovable() returns
3372 True, either directly or by pruning."""
3374 if svn_path == '' or Ctx().project.is_unremovable(svn_path):
3375 return
3377 (parent_path, entry,) = _path_split(svn_path)
3378 if parent_path:
3379 parent_key, parent_contents = \
3380 self._open_writable_node(parent_path, False)
3381 else:
3382 parent_key, parent_contents = self._open_writable_root_node()
3384 if parent_key is not None:
3385 self._fast_delete_path(parent_path, parent_contents, entry)
3386 # The following recursion makes pruning an O(n^2) operation in the
3387 # worst case (where n is the depth of SVN_PATH), but the worst case
3388 # is probably rare, and the constant cost is pretty low. Another
3389 # drawback is that we issue a delete for each path and not just
3390 # a single delete for the topmost directory pruned.
3391 if should_prune and len(parent_contents) == 0:
3392 self._delete_path(parent_path, True)
3394 def _mkdir(self, path):
3395 """Create PATH in the repository mirror at the youngest revision."""
3396 self._open_writable_node(path, True)
3397 self._invoke_delegates('mkdir', path)
3399 def _change_path(self, cvs_rev):
3400 """Register a change in self.youngest for the CVS_REV's svn_path
3401 in the repository mirror."""
3402 # We do not have to update the nodes because our mirror is only
3403 # concerned with the presence or absence of paths, and a file
3404 # content change does not cause any path changes.
3405 self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, False))
3407 def _add_path(self, cvs_rev):
3408 """Add the CVS_REV's svn_path to the repository mirror."""
3409 self._open_writable_node(cvs_rev.svn_path, True)
3410 self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, True))
3412 def _copy_path(self, src_path, dest_path, src_revnum):
3413 """Copy SRC_PATH at subversion revision number SRC_REVNUM to
3414 DEST_PATH. In the youngest revision of the repository, DEST_PATH's
3415 parent *must* exist, but DEST_PATH *cannot* exist.
3417 Return the node key and the contents of the new node at DEST_PATH
3418 as a dictionary."""
3419 # get the contents of the node of our src_path
3420 src_key = self._open_readonly_node(src_path, src_revnum)
3421 src_contents = self._get_node(src_key)
3423 # Get the parent path and the base path of the dest_path
3424 (dest_parent, dest_basename,) = _path_split(dest_path)
3425 dest_parent_key, dest_parent_contents = \
3426 self._open_writable_node(dest_parent, False)
3428 if dest_parent_contents.has_key(dest_basename):
3429 msg = "Attempt to add path '%s' to repository mirror " % dest_path
3430 msg = msg + "when it already exists in the mirror."
3431 raise self.SVNRepositoryMirrorPathExistsError, msg
3433 dest_parent_contents[dest_basename] = src_key
3434 self._invoke_delegates('copy_path', src_path, dest_path, src_revnum)
3436 # Yes sir, src_key and src_contents are also the contents of the
3437 # destination. This is a cheap copy, remember! :-)
3438 return src_key, src_contents
3440 def _fill_symbolic_name(self, svn_commit):
3441 """Performs all copies necessary to create as much of the the tag
3442 or branch SVN_COMMIT.symbolic_name as possible given the current
3443 revision of the repository mirror.
3445 The symbolic name is guaranteed to exist in the Subversion
3446 repository by the end of this call, even if there are no paths
3447 under it."""
3448 symbol_fill = self.symbolings_reader.filling_guide_for_symbol(
3449 svn_commit.symbolic_name, self.youngest)
3450 # Get the list of sources for the symbolic name.
3451 sources = symbol_fill.get_sources()
3453 if sources:
3454 if self.tags_db.has_key(svn_commit.symbolic_name):
3455 dest_prefix = Ctx().project.get_tag_path(svn_commit.symbolic_name)
3456 else:
3457 dest_prefix = Ctx().project.get_branch_path(svn_commit.symbolic_name)
3459 dest_key = self._open_writable_node(dest_prefix, False)[0]
3460 self._fill(symbol_fill, dest_prefix, dest_key, sources)
3461 else:
3462 # We can only get here for a branch whose first commit is an add
3463 # (as opposed to a copy).
3464 dest_path = Ctx().project.get_branch_path(symbol_fill.name)
3465 if not self._path_exists(dest_path):
3466 # If our symbol_fill was empty, that means that our first
3467 # commit on the branch was to a file added on the branch, and
3468 # that this is our first fill of that branch.
3470 # This case is covered by test 16.
3472 # ...we create the branch by copying trunk from the our
3473 # current revision number minus 1
3474 source_path = Ctx().project.trunk_path
3475 entries = self._copy_path(source_path, dest_path,
3476 svn_commit.revnum - 1)[1]
3477 # Now since we've just copied trunk to a branch that's
3478 # *supposed* to be empty, we delete any entries in the
3479 # copied directory.
3480 for entry in entries:
3481 del_path = dest_path + '/' + entry
3482 # Delete but don't prune.
3483 self._delete_path(del_path)
3484 else:
3485 msg = "Error filling branch '" \
3486 + _clean_symbolic_name(symbol_fill.name) + "'.\n"
3487 msg = msg + "Received an empty SymbolicNameFillingGuide and\n"
3488 msg = msg + "attempted to create a branch that already exists."
3489 raise self.SVNRepositoryMirrorInvalidFillOperationError, msg
3491 def _fill(self, symbol_fill, dest_prefix, dest_key, sources,
3492 path = None, parent_source_prefix = None,
3493 preferred_revnum = None, prune_ok = None):
3494 """Fill the tag or branch at DEST_PREFIX + PATH with items from
3495 SOURCES, and recurse into the child items.
3497 DEST_PREFIX is the prefix of the destination directory, e.g.
3498 '/tags/my_tag' or '/branches/my_branch', and SOURCES is a list of
3499 FillSource classes that are candidates to be copied to the
3500 destination. DEST_KEY is the key in self.nodes_db to the
3501 destination, or None if the destination does not yet exist.
3503 PATH is the path relative to DEST_PREFIX. If PATH is None, we
3504 are at the top level, e.g. '/tags/my_tag'.
3506 PARENT_SOURCE_PREFIX is the source prefix that was used to copy
3507 the parent directory, and PREFERRED_REVNUM is an int which is the
3508 source revision number that the caller (who may have copied KEY's
3509 parent) used to perform its copy. If PREFERRED_REVNUM is None,
3510 then no revision is preferable to any other (which probably means
3511 that no copies have happened yet).
3513 PRUNE_OK means that a copy has been made in this recursion, and
3514 it's safe to prune directories that are not in
3515 SYMBOL_FILL._node_tree, provided that said directory has a source
3516 prefix of one of the PARENT_SOURCE_PREFIX.
3518 PATH, PARENT_SOURCE_PREFIX, PRUNE_OK, and PREFERRED_REVNUM
3519 should only be passed in by recursive calls."""
3520 # Calculate scores and revnums for all sources
3521 for source in sources:
3522 src_revnum, score = symbol_fill.get_best_revnum(source.node,
3523 preferred_revnum)
3524 source.set_score(score, src_revnum)
3526 # Sort the sources in descending score order so that we will make
3527 # a eventual copy from the source with the highest score.
3528 sources.sort()
3529 copy_source = sources[0]
3531 src_path = _path_join(copy_source.prefix, path)
3532 dest_path = _path_join(dest_prefix, path)
3534 # Figure out if we shall copy to this destination and delete any
3535 # destination path that is in the way.
3536 do_copy = 0
3537 if dest_key is None:
3538 do_copy = 1
3539 elif prune_ok and (parent_source_prefix != copy_source.prefix or
3540 copy_source.revnum != preferred_revnum):
3541 # We are about to replace the destination, so we need to remove
3542 # it before we perform the copy.
3543 self._delete_path(dest_path)
3544 do_copy = 1
3546 if do_copy:
3547 dest_key, dest_entries = self._copy_path(src_path, dest_path,
3548 copy_source.revnum)
3549 prune_ok = 1
3550 else:
3551 dest_entries = self._get_node(dest_key)
3553 # Create the SRC_ENTRIES hash from SOURCES. The keys are path
3554 # elements and the values are lists of FillSource classes where
3555 # this path element exists.
3556 src_entries = {}
3557 for source in sources:
3558 if isinstance(source.node, SvnRevisionRange):
3559 continue
3560 for entry, node in source.node.items():
3561 src_entries.setdefault(entry, []).append(
3562 FillSource(source.prefix, node))
3564 if prune_ok:
3565 # Delete the entries in DEST_ENTRIES that are not in src_entries.
3566 delete_list = [ ]
3567 for entry in dest_entries:
3568 if not src_entries.has_key(entry):
3569 delete_list.append(entry)
3570 if delete_list:
3571 if not self.new_nodes.has_key(dest_key):
3572 dest_key, dest_entries = self._open_writable_node(dest_path, True)
3573 # Sort the delete list to get "diffable" dumpfiles.
3574 delete_list.sort()
3575 for entry in delete_list:
3576 self._fast_delete_path(dest_path, dest_entries, entry)
3578 # Recurse into the SRC_ENTRIES keys sorted in alphabetical order.
3579 src_keys = src_entries.keys()
3580 src_keys.sort()
3581 for src_key in src_keys:
3582 next_dest_key = dest_entries.get(src_key, None)
3583 self._fill(symbol_fill, dest_prefix, next_dest_key,
3584 src_entries[src_key], _path_join(path, src_key),
3585 copy_source.prefix, sources[0].revnum, prune_ok)
3587 def _synchronize_default_branch(self, svn_commit):
3588 """Propagate any changes that happened on a non-trunk default
3589 branch to the trunk of the repository. See
3590 CVSCommit._post_commit() for details on why this is necessary."""
3591 for cvs_rev in svn_commit.cvs_revs:
3592 svn_trunk_path = Ctx().project.make_trunk_path(cvs_rev.cvs_path)
3593 if cvs_rev.op == OP_ADD or cvs_rev.op == OP_CHANGE:
3594 if self._path_exists(svn_trunk_path):
3595 # Delete the path on trunk...
3596 self._delete_path(svn_trunk_path)
3597 # ...and copy over from branch
3598 self._copy_path(cvs_rev.svn_path, svn_trunk_path,
3599 svn_commit.motivating_revnum)
3600 elif cvs_rev.op == OP_DELETE:
3601 # delete trunk path
3602 self._delete_path(svn_trunk_path)
3603 else:
3604 msg = ("Unknown CVSRevision operation '%s' in default branch sync."
3605 % cvs_rev.op)
3606 raise self.SVNRepositoryMirrorUnexpectedOperationError, msg
3608 def commit(self, svn_commit):
3609 """Add an SVNCommit to the SVNRepository, incrementing the
3610 Repository revision number, and changing the repository. Invoke
3611 the delegates' _start_commit() method."""
3613 if svn_commit.revnum == 2:
3614 self._initialize_repository(svn_commit.get_date())
3616 self._start_commit(svn_commit)
3618 if svn_commit.symbolic_name:
3619 Log().write(LOG_VERBOSE, "Filling symbolic name:",
3620 _clean_symbolic_name(svn_commit.symbolic_name))
3621 self._fill_symbolic_name(svn_commit)
3622 elif svn_commit.motivating_revnum:
3623 Log().write(LOG_VERBOSE, "Synchronizing default_branch motivated by %d"
3624 % svn_commit.motivating_revnum)
3625 self._synchronize_default_branch(svn_commit)
3626 else: # This actually commits CVSRevisions
3627 if len(svn_commit.cvs_revs) > 1: plural = "s"
3628 else: plural = ""
3629 Log().write(LOG_VERBOSE, "Committing %d CVSRevision%s"
3630 % (len(svn_commit.cvs_revs), plural))
3631 for cvs_rev in svn_commit.cvs_revs:
3632 # See comment in CVSCommit._commit() for what this is all
3633 # about. Note that although asking self._path_exists() is
3634 # somewhat expensive, we only do it if the first two (cheap)
3635 # tests succeed first.
3636 if not ((cvs_rev.deltatext_code == DELTATEXT_EMPTY)
3637 and (cvs_rev.rev == "1.1.1.1")
3638 and self._path_exists(cvs_rev.svn_path)):
3639 if cvs_rev.op == OP_ADD:
3640 self._add_path(cvs_rev)
3641 elif cvs_rev.op == OP_CHANGE:
3642 # Fix for Issue #74:
3644 # Here's the scenario. You have file FOO that is imported
3645 # on a non-trunk vendor branch. So in r1.1 and r1.1.1.1,
3646 # the file exists.
3648 # Moving forward in time, FOO is deleted on the default
3649 # branch (r1.1.1.2). cvs2svn determines that this delete
3650 # also needs to happen on trunk, so FOO is deleted on
3651 # trunk.
3653 # Along come r1.2, whose op is OP_CHANGE (because r1.1 is
3654 # not 'dead', we assume it's a change). However, since
3655 # our trunk file has been deleted, svnadmin blows up--you
3656 # can't change a file that doesn't exist!
3658 # Soooo... we just check the path, and if it doesn't
3659 # exist, we do an add... if the path does exist, it's
3660 # business as usual.
3661 if not self._path_exists(cvs_rev.svn_path):
3662 self._add_path(cvs_rev)
3663 else:
3664 self._change_path(cvs_rev)
3666 if cvs_rev.op == OP_DELETE:
3667 self._delete_path(cvs_rev.svn_path, Ctx().prune)
3669 def cleanup(self):
3670 """Callback for the Cleanup.register in self.__init__."""
3671 self.revs_db = None
3672 self.nodes_db = None
3674 def add_delegate(self, delegate):
3675 """Adds DELEGATE to self.delegates.
3677 For every delegate you add, as soon as SVNRepositoryMirror
3678 performs a repository action method, SVNRepositoryMirror will call
3679 the delegate's corresponding repository action method. Multiple
3680 delegates will be called in the order that they are added. See
3681 SVNRepositoryMirrorDelegate for more information."""
3682 self.delegates.append(delegate)
3684 def _invoke_delegates(self, method, *args):
3685 """Iterate through each of our delegates, in the order that they
3686 were added, and call the delegate's method named METHOD with the
3687 arguments in ARGS."""
3688 for delegate in self.delegates:
3689 getattr(delegate, method)(*args)
3691 def finish(self):
3692 """Calls the delegate finish method."""
3693 self._end_commit()
3694 self._invoke_delegates('finish')
3695 self.cleanup()
3698 class SVNCommitItem:
3699 """A wrapper class for CVSRevision objects upon which
3700 Subversion-related data (such as properties) may be hung."""
3702 def __init__(self, c_rev, svn_props_changed):
3703 """Initialize instance and record the properties for this file.
3704 SVN_PROPS_CHANGED indicates whether the svn: properties are known
3705 to have changed since the last revision.
3707 The properties are set by the SVNPropertySetters in
3708 Ctx().svn_property_setters, then we read a couple of the
3709 properties back out for our own purposes."""
3711 self.c_rev = c_rev
3712 # Did the svn properties change for this file (i.e., do they have
3713 # to be written to the dumpfile?)
3714 self.svn_props_changed = svn_props_changed
3716 # The properties for this item as a map { key : value }. If VALUE
3717 # is None, no property should be set.
3718 self.svn_props = { }
3720 for svn_property_setter in Ctx().svn_property_setters:
3721 svn_property_setter.set_properties(self)
3723 # Remember if we need to filter the EOLs. We could actually use
3724 # self.svn_props now, since it is initialized for each revision.
3725 self.needs_eol_filter = \
3726 self.svn_props.get('svn:eol-style', None) is not None
3728 self.has_keywords = self.svn_props.get('svn:keywords', None) is not None
3731 class SVNPropertySetter:
3732 """Abstract class for objects that can set properties on a SVNCommitItem."""
3734 def set_properties(self, s_item):
3735 """Set any properties that can be determined for S_ITEM."""
3737 raise NotImplementedError
3740 class SVNRepositoryMirrorDelegate:
3741 """Abstract superclass for any delegate to SVNRepositoryMirror.
3742 Subclasses must implement all of the methods below.
3744 For each method, a subclass implements, in its own way, the
3745 Subversion operation implied by the method's name. For example, for
3746 the add_path method, the DumpfileDelegate would write out a
3747 "Node-add:" command to a Subversion dumpfile, the StdoutDelegate
3748 would merely print that the path is being added to the repository,
3749 and the RepositoryDelegate would actually cause the path to be added
3750 to the Subversion repository that it is creating.
3753 def start_commit(self, svn_commit):
3754 """Perform any actions needed to start SVNCommit SVN_COMMIT;
3755 see subclass implementation for details."""
3756 raise NotImplementedError
3758 def mkdir(self, path):
3759 """PATH is a string; see subclass implementation for details."""
3760 raise NotImplementedError
3762 def add_path(self, s_item):
3763 """S_ITEM is an SVNCommitItem; see subclass implementation for
3764 details."""
3765 raise NotImplementedError
3767 def change_path(self, s_item):
3768 """S_ITEM is an SVNCommitItem; see subclass implementation for
3769 details."""
3770 raise NotImplementedError
3772 def delete_path(self, path):
3773 """PATH is a string; see subclass implementation for
3774 details."""
3775 raise NotImplementedError
3777 def copy_path(self, src_path, dest_path, src_revnum):
3778 """SRC_PATH and DEST_PATH are both strings, and SRC_REVNUM is a
3779 subversion revision number (int); see subclass implementation for
3780 details."""
3781 raise NotImplementedError
3783 def finish(self):
3784 """Perform any cleanup necessary after all revisions have been
3785 committed."""
3786 raise NotImplementedError
3789 class DumpfileDelegate(SVNRepositoryMirrorDelegate):
3790 """Create a Subversion dumpfile."""
3792 def __init__(self, dumpfile_path=None):
3793 """Return a new DumpfileDelegate instance, attached to a dumpfile
3794 DUMPFILE_PATH (Ctx().dumpfile, if None), using Ctx().encoding."""
3795 if dumpfile_path:
3796 self.dumpfile_path = dumpfile_path
3797 else:
3798 self.dumpfile_path = Ctx().dumpfile
3800 self.dumpfile = open(self.dumpfile_path, 'wb')
3801 self._write_dumpfile_header(self.dumpfile)
3803 def _write_dumpfile_header(self, dumpfile):
3804 # Initialize the dumpfile with the standard headers.
3806 # Since the CVS repository doesn't have a UUID, and the Subversion
3807 # repository will be created with one anyway, we don't specify a
3808 # UUID in the dumpflie
3809 dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
3811 def _utf8_path(self, path):
3812 """Return a copy of PATH encoded in UTF-8."""
3813 pieces = string.split(path, '/')
3814 # Convert each path component separately (as they may each use
3815 # different encodings).
3816 for i in range(len(pieces)):
3817 try:
3818 # Log messages can be converted with the 'replace' strategy,
3819 # but we can't afford any lossiness here.
3820 pieces[i] = to_utf8(pieces[i], 'strict')
3821 except UnicodeError:
3822 raise FatalError(
3823 "Unable to convert a path '%s' to internal encoding.\n"
3824 "Consider rerunning with (for example) '--encoding=latin1'."
3825 % (path,))
3826 return string.join(pieces, '/')
3828 def _string_for_prop(self, name, value):
3829 """Return a property in the form needed for the dumpfile."""
3831 return 'K %d\n%s\nV %d\n%s\n' % (len(name), name, len(value), value)
3833 def start_commit(self, svn_commit):
3834 """Emit the start of SVN_COMMIT (an SVNCommit)."""
3836 self.revision = svn_commit.revnum
3838 # The start of a new commit typically looks like this:
3840 # Revision-number: 1
3841 # Prop-content-length: 129
3842 # Content-length: 129
3844 # K 7
3845 # svn:log
3846 # V 27
3847 # Log message for revision 1.
3848 # K 10
3849 # svn:author
3850 # V 7
3851 # jrandom
3852 # K 8
3853 # svn:date
3854 # V 27
3855 # 2003-04-22T22:57:58.132837Z
3856 # PROPS-END
3858 # Notice that the length headers count everything -- not just the
3859 # length of the data but also the lengths of the lengths, including
3860 # the 'K ' or 'V ' prefixes.
3862 # The reason there are both Prop-content-length and Content-length
3863 # is that the former includes just props, while the latter includes
3864 # everything. That's the generic header form for any entity in a
3865 # dumpfile. But since revisions only have props, the two lengths
3866 # are always the same for revisions.
3868 # Calculate the output needed for the property definitions.
3869 props = svn_commit.get_revprops()
3870 prop_names = props.keys()
3871 prop_names.sort()
3872 prop_strings = []
3873 for propname in prop_names:
3874 if props[propname] is not None:
3875 prop_strings.append(self._string_for_prop(propname, props[propname]))
3877 all_prop_strings = ''.join(prop_strings) + 'PROPS-END\n'
3878 total_len = len(all_prop_strings)
3880 # Print the revision header and props
3881 self.dumpfile.write('Revision-number: %d\n'
3882 'Prop-content-length: %d\n'
3883 'Content-length: %d\n'
3884 '\n'
3885 % (self.revision, total_len, total_len))
3887 self.dumpfile.write(all_prop_strings)
3888 self.dumpfile.write('\n')
3890 def mkdir(self, path):
3891 """Emit the creation of directory PATH."""
3892 self.dumpfile.write("Node-path: %s\n"
3893 "Node-kind: dir\n"
3894 "Node-action: add\n"
3895 "\n"
3896 "\n" % self._utf8_path(path))
3898 def _add_or_change_path(self, s_item, op):
3899 """Emit the addition or change corresponding to S_ITEM.
3900 OP is either the constant OP_ADD or OP_CHANGE."""
3902 # Validation stuffs
3903 if op == OP_ADD:
3904 action = 'add'
3905 elif op == OP_CHANGE:
3906 action = 'change'
3907 else:
3908 raise FatalError("_add_or_change_path() called with bad op ('%s')"
3909 % (op,))
3911 # Convenience variables
3912 c_rev = s_item.c_rev
3914 # The property handling here takes advantage of an undocumented
3915 # but IMHO consistent feature of the Subversion dumpfile-loading
3916 # code. When a node's properties aren't mentioned (that is, the
3917 # "Prop-content-length:" header is absent, no properties are
3918 # listed at all, and there is no "PROPS-END\n" line) then no
3919 # change is made to the node's properties.
3921 # This is consistent with the way dumpfiles behave w.r.t. text
3922 # content changes, so I'm comfortable relying on it. If you
3923 # commit a change to *just* the properties of some node that
3924 # already has text contents from a previous revision, then in the
3925 # dumpfile output for the prop change, no "Text-content-length:"
3926 # nor "Text-content-md5:" header will be present, and the text of
3927 # the file will not be given. But this does not cause the file's
3928 # text to be erased! It simply remains unchanged.
3930 # This works out great for cvs2svn, due to lucky coincidences:
3932 # For files, the only properties we ever set are set in the first
3933 # revision; all other revisions (including on branches) inherit
3934 # from that. After the first revision, we never change file
3935 # properties, therefore, there is no need to remember the full set
3936 # of properties on a given file once we've set it.
3938 # For directories, the only property we set is "svn:ignore", and
3939 # while we may change it after the first revision, we always do so
3940 # based on the contents of a ".cvsignore" file -- in other words,
3941 # CVS is doing the remembering for us, so we still don't have to
3942 # preserve the previous value of the property ourselves.
3944 # Calculate the (sorted-by-name) property string and length, if any.
3945 if s_item.svn_props_changed:
3946 svn_props = s_item.svn_props
3947 prop_contents = ''
3948 prop_names = svn_props.keys()
3949 prop_names.sort()
3950 for pname in prop_names:
3951 pvalue = svn_props[pname]
3952 if pvalue is not None:
3953 prop_contents += self._string_for_prop(pname, pvalue)
3954 prop_contents += 'PROPS-END\n'
3955 props_header = 'Prop-content-length: %d\n' % len(prop_contents)
3956 else:
3957 prop_contents = ''
3958 props_header = ''
3960 # treat .cvsignore as a directory property
3961 dir_path, basename = os.path.split(c_rev.svn_path)
3962 if basename == ".cvsignore":
3963 ignore_vals = generate_ignores(c_rev)
3964 ignore_contents = '\n'.join(ignore_vals)
3965 ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
3966 (len(ignore_contents), ignore_contents))
3967 ignore_contents = ignore_contents + 'PROPS-END\n'
3968 ignore_len = len(ignore_contents)
3970 # write headers, then props
3971 self.dumpfile.write('Node-path: %s\n'
3972 'Node-kind: dir\n'
3973 'Node-action: change\n'
3974 'Prop-content-length: %d\n'
3975 'Content-length: %d\n'
3976 '\n'
3977 '%s'
3978 % (self._utf8_path(dir_path), ignore_len,
3979 ignore_len, ignore_contents))
3981 # If the file has keywords, we must prevent CVS/RCS from expanding
3982 # the keywords because they must be unexpanded in the repository,
3983 # or Subversion will get confused.
3984 pipe_cmd, pipe = Ctx().cvs_repository.get_co_pipe(
3985 c_rev, suppress_keyword_substitution=s_item.has_keywords)
3987 self.dumpfile.write('Node-path: %s\n'
3988 'Node-kind: file\n'
3989 'Node-action: %s\n'
3990 '%s' # no property header if no props
3991 'Text-content-length: '
3992 % (self._utf8_path(c_rev.svn_path),
3993 action, props_header))
3995 pos = self.dumpfile.tell()
3997 self.dumpfile.write('0000000000000000\n'
3998 'Text-content-md5: 00000000000000000000000000000000\n'
3999 'Content-length: 0000000000000000\n'
4000 '\n')
4002 if prop_contents:
4003 self.dumpfile.write(prop_contents)
4005 # Insert a filter to convert all EOLs to LFs if neccessary
4006 if s_item.needs_eol_filter:
4007 data_reader = LF_EOL_Filter(pipe.stdout)
4008 else:
4009 data_reader = pipe.stdout
4011 # Insert the rev contents, calculating length and checksum as we go.
4012 checksum = md5.new()
4013 length = 0
4014 while True:
4015 buf = data_reader.read(PIPE_READ_SIZE)
4016 if buf == '':
4017 break
4018 checksum.update(buf)
4019 length = length + len(buf)
4020 self.dumpfile.write(buf)
4022 pipe.stdout.close()
4023 error_output = pipe.stderr.read()
4024 exit_status = pipe.wait()
4025 if exit_status:
4026 raise FatalError("The command '%s' failed with exit status: %s\n"
4027 "and the following output:\n"
4028 "%s" % (pipe_cmd, exit_status, error_output))
4030 # Go back to patch up the length and checksum headers:
4031 self.dumpfile.seek(pos, 0)
4032 # We left 16 zeros for the text length; replace them with the real
4033 # length, padded on the left with spaces:
4034 self.dumpfile.write('%16d' % length)
4035 # 16... + 1 newline + len('Text-content-md5: ') == 35
4036 self.dumpfile.seek(pos + 35, 0)
4037 self.dumpfile.write(checksum.hexdigest())
4038 # 35... + 32 bytes of checksum + 1 newline + len('Content-length: ') == 84
4039 self.dumpfile.seek(pos + 84, 0)
4040 # The content length is the length of property data, text data,
4041 # and any metadata around/inside around them.
4042 self.dumpfile.write('%16d' % (length + len(prop_contents)))
4043 # Jump back to the end of the stream
4044 self.dumpfile.seek(0, 2)
4046 # This record is done (write two newlines -- one to terminate
4047 # contents that weren't themselves newline-termination, one to
4048 # provide a blank line for readability.
4049 self.dumpfile.write('\n\n')
4051 def add_path(self, s_item):
4052 """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
4053 self._add_or_change_path(s_item, OP_ADD)
4055 def change_path(self, s_item):
4056 """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
4057 self._add_or_change_path(s_item, OP_CHANGE)
4059 def delete_path(self, path):
4060 """Emit the deletion of PATH."""
4061 self.dumpfile.write('Node-path: %s\n'
4062 'Node-action: delete\n'
4063 '\n' % self._utf8_path(path))
4065 def copy_path(self, src_path, dest_path, src_revnum):
4066 """Emit the copying of SRC_PATH at SRC_REV to DEST_PATH."""
4067 # We don't need to include "Node-kind:" for copies; the loader
4068 # ignores it anyway and just uses the source kind instead.
4069 self.dumpfile.write('Node-path: %s\n'
4070 'Node-action: add\n'
4071 'Node-copyfrom-rev: %d\n'
4072 'Node-copyfrom-path: /%s\n'
4073 '\n'
4074 % (self._utf8_path(dest_path),
4075 src_revnum,
4076 self._utf8_path(src_path)))
4078 def finish(self):
4079 """Perform any cleanup necessary after all revisions have been
4080 committed."""
4081 self.dumpfile.close()
4084 class RepositoryDelegate(DumpfileDelegate):
4085 """Creates a new Subversion Repository. DumpfileDelegate does all
4086 of the heavy lifting."""
4087 def __init__(self):
4088 self.svnadmin = Ctx().svnadmin
4089 self.target = Ctx().target
4090 if not Ctx().existing_svnrepos:
4091 Log().write(LOG_NORMAL,"Creating new repository '%s'" % (self.target))
4092 if not Ctx().fs_type:
4093 # User didn't say what kind repository (bdb, fsfs, etc).
4094 # We still pass --bdb-txn-nosync. It's a no-op if the default
4095 # repository type doesn't support it, but we definitely want
4096 # it if BDB is the default.
4097 run_command('%s create %s "%s"' % (self.svnadmin,
4098 "--bdb-txn-nosync",
4099 self.target))
4100 elif Ctx().fs_type == 'bdb':
4101 # User explicitly specified bdb.
4103 # Since this is a BDB repository, pass --bdb-txn-nosync,
4104 # because it gives us a 4-5x speed boost (if cvs2svn is
4105 # creating the repository, cvs2svn should be the only program
4106 # accessing the svn repository (until cvs is done, at least)).
4107 # But we'll turn no-sync off in self.finish(), unless
4108 # instructed otherwise.
4109 run_command('%s create %s %s "%s"' % (self.svnadmin,
4110 "--fs-type=bdb",
4111 "--bdb-txn-nosync",
4112 self.target))
4113 else:
4114 # User specified something other than bdb.
4115 run_command('%s create %s "%s"' % (self.svnadmin,
4116 "--fs-type=%s" % Ctx().fs_type,
4117 self.target))
4119 # Since the output of this run is a repository, not a dumpfile,
4120 # the temporary dumpfiles we create should go in the tmpdir.
4121 DumpfileDelegate.__init__(self, temp(Ctx().dumpfile))
4123 # This is 1 if a commit is in progress, otherwise None.
4124 self._commit_in_progress = None
4126 self.dumpfile = open(self.dumpfile_path, 'w+b')
4127 self.loader_pipe = SimplePopen([ self.svnadmin, 'load', '-q',
4128 self.target ], True)
4129 self.loader_pipe.stdout.close()
4130 try:
4131 self._write_dumpfile_header(self.loader_pipe.stdin)
4132 except IOError:
4133 raise FatalError("svnadmin failed with the following output while "
4134 "loading the dumpfile:\n"
4135 + self.loader_pipe.stderr.read())
4137 def _feed_pipe(self):
4138 """Feed the revision stored in the dumpfile to the svnadmin
4139 load pipe."""
4140 self.dumpfile.seek(0)
4141 while 1:
4142 data = self.dumpfile.read(128*1024) # Chunk size is arbitrary
4143 if not len(data):
4144 break
4145 try:
4146 self.loader_pipe.stdin.write(data)
4147 except IOError:
4148 raise FatalError("svnadmin failed with the following output "
4149 "while loading the dumpfile:\n"
4150 + self.loader_pipe.stderr.read())
4152 def start_commit(self, svn_commit):
4153 """Start a new commit. If a commit is already in progress, close
4154 the dumpfile, load it into the svn repository, open a new
4155 dumpfile, and write the header into it."""
4156 if self._commit_in_progress:
4157 self._feed_pipe()
4158 self.dumpfile.seek(0)
4159 self.dumpfile.truncate()
4160 DumpfileDelegate.start_commit(self, svn_commit)
4161 self._commit_in_progress = 1
4163 def finish(self):
4164 """Loads the last commit into the repository."""
4165 self._feed_pipe()
4166 self.dumpfile.close()
4167 self.loader_pipe.stdin.close()
4168 error_output = self.loader_pipe.stderr.read()
4169 exit_status = self.loader_pipe.wait()
4170 if exit_status:
4171 raise FatalError('svnadmin load failed with exit status: %s\n'
4172 'and the following output:\n'
4173 '%s' % (exit_status, error_output,))
4174 os.remove(self.dumpfile_path)
4176 # If this is a BDB repository, and we created the repository, and
4177 # --bdb-no-sync wasn't passed, then comment out the DB_TXN_NOSYNC
4178 # line in the DB_CONFIG file, because txn syncing should be on by
4179 # default in BDB repositories.
4181 # We determine if this is a BDB repository by looking for the
4182 # DB_CONFIG file, which doesn't exist in FSFS, rather than by
4183 # checking Ctx().fs_type. That way this code will Do The Right
4184 # Thing in all circumstances.
4185 db_config = os.path.join(self.target, "db/DB_CONFIG")
4186 if (not Ctx().existing_svnrepos and not Ctx().bdb_txn_nosync
4187 and os.path.exists(db_config)):
4188 no_sync = 'set_flags DB_TXN_NOSYNC\n'
4190 contents = open(db_config, 'r').readlines()
4191 index = contents.index(no_sync)
4192 contents[index] = '# ' + no_sync
4193 contents = open(db_config, 'w').writelines(contents)
4196 class StdoutDelegate(SVNRepositoryMirrorDelegate):
4197 """Makes no changes to the disk, but writes out information to
4198 STDOUT about what the SVNRepositoryMirror is doing. Of course, our
4199 print statements will state that we're doing something, when in
4200 reality, we aren't doing anything other than printing out that we're
4201 doing something. Kind of zen, really."""
4202 def __init__(self, total_revs):
4203 self.total_revs = total_revs
4205 def start_commit(self, svn_commit):
4206 """Prints out the Subversion revision number of the commit that is
4207 being started."""
4208 Log().write(LOG_VERBOSE, "=" * 60)
4209 Log().write(LOG_NORMAL, "Starting Subversion r%d / %d" %
4210 (svn_commit.revnum, self.total_revs))
4212 def mkdir(self, path):
4213 """Print a line stating that we are creating directory PATH."""
4214 Log().write(LOG_VERBOSE, " New Directory", path)
4216 def add_path(self, s_item):
4217 """Print a line stating that we are 'adding' s_item.c_rev.svn_path."""
4218 Log().write(LOG_VERBOSE, " Adding", s_item.c_rev.svn_path)
4220 def change_path(self, s_item):
4221 """Print a line stating that we are 'changing' s_item.c_rev.svn_path."""
4222 Log().write(LOG_VERBOSE, " Changing", s_item.c_rev.svn_path)
4224 def delete_path(self, path):
4225 """Print a line stating that we are 'deleting' PATH."""
4226 Log().write(LOG_VERBOSE, " Deleting", path)
4228 def copy_path(self, src_path, dest_path, src_revnum):
4229 """Print a line stating that we are 'copying' revision SRC_REVNUM
4230 of SRC_PATH to DEST_PATH."""
4231 Log().write(LOG_VERBOSE, " Copying revision", src_revnum, "of", src_path)
4232 Log().write(LOG_VERBOSE, " to", dest_path)
4234 def finish(self):
4235 """State that we are done creating our repository."""
4236 Log().write(LOG_VERBOSE, "Finished creating Subversion repository.")
4237 Log().write(LOG_QUIET, "Done.")
4239 def pass1():
4240 OS_SEP_PLUS_ATTIC = os.sep + 'Attic'
4241 Log().write(LOG_QUIET, "Examining all CVS ',v' files...")
4242 cd = CollectData()
4244 def visit_file(baton, dirname, files):
4245 cd = baton
4246 for fname in files:
4247 if fname[-2:] != ',v':
4248 continue
4249 cd.found_valid_file = 1
4250 pathname = os.path.join(dirname, fname)
4251 if dirname[-6:] == OS_SEP_PLUS_ATTIC:
4252 # drop the 'Attic' portion from the pathname for the canonical name.
4253 cd.set_fname(os.path.join(dirname[:-6], fname), pathname)
4254 else:
4255 # If this file also exists in the attic, it's a fatal error
4256 attic_path = os.path.join(dirname, 'Attic', fname)
4257 if os.path.exists(attic_path):
4258 err = "%s: A CVS repository cannot contain both %s and %s" \
4259 % (error_prefix, pathname, attic_path)
4260 sys.stderr.write(err + '\n')
4261 cd.fatal_errors.append(err)
4262 cd.set_fname(pathname, pathname)
4263 Log().write(LOG_NORMAL, pathname)
4264 try:
4265 cvs2svn_rcsparse.parse(open(pathname, 'rb'), cd)
4266 except (cvs2svn_rcsparse.common.RCSParseError, ValueError,
4267 RuntimeError):
4268 err = "%s: '%s' is not a valid ,v file" \
4269 % (error_prefix, pathname)
4270 sys.stderr.write(err + '\n')
4271 cd.fatal_errors.append(err)
4272 except:
4273 Log().write(LOG_WARN,
4274 "Exception occurred while parsing %s" % pathname)
4275 raise
4277 os.path.walk(Ctx().project.project_cvs_repos_path, visit_file, cd)
4278 Log().write(LOG_VERBOSE, 'Processed', cd.num_files, 'files')
4280 cd.write_symbol_db()
4282 if len(cd.fatal_errors) > 0:
4283 raise FatalException("Pass 1 complete.\n"
4284 + "=" * 75 + "\n"
4285 + "Error summary:\n"
4286 + "\n".join(cd.fatal_errors) + "\n"
4287 + "Exited due to fatal error(s).\n")
4289 if cd.found_valid_file is None:
4290 raise FatalException(
4291 "\n"
4292 "No RCS files found in your CVS Repository!\n"
4293 "Are you absolutely certain you are pointing cvs2svn\n"
4294 "at a CVS repository?\n"
4295 "\n"
4296 "Exited due to fatal error(s).\n")
4298 StatsKeeper().reset_c_rev_info()
4299 StatsKeeper().archive()
4300 Log().write(LOG_QUIET, "Done")
4302 def pass2():
4303 "Pass 2: clean up the revision information."
4305 symbol_db = SymbolDatabase()
4306 symbol_db.read()
4308 # Convert the list of regexps to a list of strings
4309 excludes = symbol_db.find_excluded_symbols(Ctx().excludes)
4311 error_detected = 0
4313 Log().write(LOG_QUIET, "Checking for blocked exclusions...")
4314 blocked_excludes = symbol_db.find_blocked_excludes(excludes)
4315 if blocked_excludes:
4316 for branch, blockers in blocked_excludes.items():
4317 sys.stderr.write(error_prefix + ": The branch '%s' cannot be "
4318 "excluded because the following symbols depend "
4319 "on it:\n" % (branch))
4320 for blocker in blockers:
4321 sys.stderr.write(" '%s'\n" % (blocker))
4322 sys.stderr.write("\n")
4323 error_detected = 1
4325 Log().write(LOG_QUIET, "Checking for forced tags with commits...")
4326 invalid_forced_tags = [ ]
4327 for forced_tag in Ctx().forced_tags:
4328 if excludes.has_key(forced_tag):
4329 continue
4330 if symbol_db.branch_has_commit(forced_tag):
4331 invalid_forced_tags.append(forced_tag)
4332 if invalid_forced_tags:
4333 sys.stderr.write(error_prefix + ": The following branches cannot be "
4334 "forced to be tags because they have commits:\n")
4335 for tag in invalid_forced_tags:
4336 sys.stderr.write(" '%s'\n" % (tag))
4337 sys.stderr.write("\n")
4338 error_detected = 1
4340 Log().write(LOG_QUIET, "Checking for tag/branch mismatches...")
4341 mismatches = symbol_db.find_mismatches(excludes)
4342 def is_not_forced(mismatch):
4343 name = mismatch[0]
4344 return not (name in Ctx().forced_tags or name in Ctx().forced_branches)
4345 mismatches = filter(is_not_forced, mismatches)
4346 if mismatches:
4347 sys.stderr.write(error_prefix + ": The following symbols are tags "
4348 "in some files and branches in others.\nUse "
4349 "--force-tag, --force-branch and/or --exclude to "
4350 "resolve the symbols.\n")
4351 for name, tag_count, branch_count, commit_count in mismatches:
4352 sys.stderr.write(" '%s' is a tag in %d files, a branch in "
4353 "%d files and has commits in %d files.\n"
4354 % (name, tag_count, branch_count, commit_count))
4355 error_detected = 1
4357 # Bail out now if we found errors
4358 if error_detected:
4359 sys.exit(1)
4361 # Create the tags database
4362 tags_db = TagsDatabase(DB_OPEN_NEW)
4363 for tag in symbol_db.tags:
4364 if tag not in Ctx().forced_branches:
4365 tags_db[tag] = None
4366 for tag in Ctx().forced_tags:
4367 tags_db[tag] = None
4369 Log().write(LOG_QUIET, "Re-synchronizing CVS revision timestamps...")
4371 # We may have recorded some changes in revisions' timestamp. We need to
4372 # scan for any other files which may have had the same log message and
4373 # occurred at "the same time" and change their timestamps, too.
4375 # read the resync data file
4376 def read_resync(fname):
4377 "Read the .resync file into memory."
4379 ### note that we assume that we can hold the entire resync file in
4380 ### memory. really large repositories with whacky timestamps could
4381 ### bust this assumption. should that ever happen, then it is possible
4382 ### to split the resync file into pieces and make multiple passes,
4383 ### using each piece.
4386 # A digest maps to a sequence of lists which specify a lower and upper
4387 # time bound for matching up the commit. We keep a sequence of these
4388 # because a number of checkins with the same log message (e.g. an empty
4389 # log message) could need to be remapped. We also make them a list
4390 # because we will dynamically expand the lower/upper bound as we find
4391 # commits that fall into a particular msg and time range.
4393 # resync == digest -> [ [old_time_lower, old_time_upper, new_time], ... ]
4395 resync = { }
4397 for line in fileinput.FileInput(fname):
4398 t1 = int(line[:8], 16)
4399 digest = line[9:DIGEST_END_IDX]
4400 t2 = int(line[DIGEST_END_IDX+1:], 16)
4401 t1_l = t1 - COMMIT_THRESHOLD/2
4402 t1_u = t1 + COMMIT_THRESHOLD/2
4403 resync.setdefault(digest, []).append([t1_l, t1_u, t2])
4405 # For each digest, sort the resync items in it in increasing order,
4406 # based on the lower time bound.
4407 for val in resync.values():
4408 val.sort()
4410 return resync
4412 resync = read_resync(temp(DATAFILE + RESYNC_SUFFIX))
4414 output = open(temp(DATAFILE + CLEAN_REVS_SUFFIX), 'w')
4415 Cleanup().register(temp(DATAFILE + CLEAN_REVS_SUFFIX), pass3)
4417 tweaked_timestamps_db = Database(temp(TWEAKED_TIMESTAMPS_DB), DB_OPEN_NEW)
4418 Cleanup().register(temp(TWEAKED_TIMESTAMPS_DB), pass2)
4420 # process the revisions file, looking for items to clean up
4421 for line in fileinput.FileInput(temp(DATAFILE + REVS_SUFFIX)):
4422 c_rev = CVSRevision(Ctx(), line[:-1])
4424 # Skip this entire revision if it's on an excluded branch
4425 if excludes.has_key(c_rev.branch_name):
4426 continue
4428 new_prev_ts = None
4429 if c_rev.prev_rev is not None:
4430 new_prev_ts = tweaked_timestamps_db.get(
4431 c_rev.unique_key(c_rev.prev_rev), None)
4432 if new_prev_ts:
4433 c_rev.prev_timestamp = new_prev_ts
4435 new_next_ts = None
4436 if c_rev.next_rev is not None:
4437 new_next_ts = tweaked_timestamps_db.get(
4438 c_rev.unique_key(c_rev.next_rev), None)
4439 if new_next_ts:
4440 c_rev.next_timestamp = new_next_ts
4442 # Remove all references to excluded tags and branches
4443 def not_excluded(symbol, excludes=excludes):
4444 return not excludes.has_key(symbol)
4445 c_rev.branches = filter(not_excluded, c_rev.branches)
4446 c_rev.tags = filter(not_excluded, c_rev.tags)
4448 # Convert all branches that are forced to be tags
4449 for forced_tag in Ctx().forced_tags:
4450 if forced_tag in c_rev.branches:
4451 c_rev.branches.remove(forced_tag)
4452 c_rev.tags.append(forced_tag)
4454 # Convert all tags that are forced to be branches
4455 for forced_branch in Ctx().forced_branches:
4456 if forced_branch in c_rev.tags:
4457 c_rev.tags.remove(forced_branch)
4458 c_rev.branches.append(forced_branch)
4460 # see if this is "near" any of the resync records we
4461 # have recorded for this digest [of the log message].
4462 for record in resync.get(c_rev.digest, []):
4463 if record[2] == c_rev.timestamp:
4464 # This means that either c_rev is the same revision that
4465 # caused the resync record to exist, or c_rev is a different
4466 # CVS revision that happens to have the same timestamp. In
4467 # either case, we don't have to do anything, so we...
4468 continue
4470 if record[0] <= c_rev.timestamp <= record[1]:
4471 # bingo! We probably want to remap the time on this c_rev,
4472 # unless the remapping would be useless because the new time
4473 # would fall outside the COMMIT_THRESHOLD window for this
4474 # commit group.
4475 new_timestamp = record[2]
4476 # If the new timestamp is earlier than that of our previous revision
4477 if new_timestamp < c_rev.prev_timestamp:
4478 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4479 + " to time %s, which is before previous the time of"
4480 + " revision %s (%s):")
4481 Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4482 c_rev.cvs_path, new_timestamp,
4483 c_rev.prev_rev, c_rev.prev_timestamp))
4484 # If resyncing our rev to c_rev.prev_timestamp + 1 will place
4485 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4486 # attempted resync time, then sync back to c_rev.prev_timestamp
4487 # + 1...
4488 if ((c_rev.prev_timestamp + 1) - new_timestamp) < COMMIT_THRESHOLD:
4489 new_timestamp = c_rev.prev_timestamp + 1
4490 Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4491 new_timestamp))
4492 else:
4493 Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4494 warning_prefix)
4495 continue
4497 # If the new timestamp is later than that of our next revision
4498 elif c_rev.next_timestamp and new_timestamp > c_rev.next_timestamp:
4499 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4500 + " to time %s, which is after time of next"
4501 + " revision %s (%s):")
4502 Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4503 c_rev.cvs_path, new_timestamp,
4504 c_rev.prev_rev, c_rev.next_timestamp))
4505 # If resyncing our rev to c_rev.next_timestamp - 1 will place
4506 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4507 # attempted resync time, then sync forward to c_rev.next_timestamp
4508 # - 1...
4509 if (new_timestamp - (c_rev.next_timestamp - 1)) < COMMIT_THRESHOLD:
4510 new_timestamp = c_rev.next_timestamp - 1
4511 Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4512 new_timestamp))
4513 else:
4514 Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4515 warning_prefix)
4516 continue
4518 # Fix for Issue #71: Avoid resyncing two consecutive revisions
4519 # to the same timestamp.
4520 elif (new_timestamp == c_rev.prev_timestamp
4521 or new_timestamp == c_rev.next_timestamp):
4522 continue
4524 # adjust the time range. we want the COMMIT_THRESHOLD from the
4525 # bounds of the earlier/latest commit in this group.
4526 record[0] = min(record[0], c_rev.timestamp - COMMIT_THRESHOLD/2)
4527 record[1] = max(record[1], c_rev.timestamp + COMMIT_THRESHOLD/2)
4529 msg = "PASS2 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
4530 % (c_rev.cvs_path, c_rev.rev, time.ctime(c_rev.timestamp),
4531 new_timestamp - c_rev.timestamp)
4532 Log().write(LOG_VERBOSE, msg)
4534 c_rev.timestamp = new_timestamp
4535 tweaked_timestamps_db[c_rev.unique_key()] = new_timestamp
4537 # stop looking for hits
4538 break
4540 output.write(str(c_rev) + "\n")
4541 Log().write(LOG_QUIET, "Done")
4543 def pass3():
4544 Log().write(LOG_QUIET, "Sorting CVS revisions...")
4545 sort_file(temp(DATAFILE + CLEAN_REVS_SUFFIX),
4546 temp(DATAFILE + SORTED_REVS_SUFFIX))
4547 Cleanup().register(temp(DATAFILE + SORTED_REVS_SUFFIX), pass5)
4548 Log().write(LOG_QUIET, "Done")
4550 def pass4():
4551 """Iterate through sorted revs, storing them in a database.
4552 If we're not doing a trunk-only conversion, generate the
4553 LastSymbolicNameDatabase, which contains the last CVSRevision
4554 that is a source for each tag or branch.
4556 Log().write(LOG_QUIET,
4557 "Copying CVS revision data from flat file to database...")
4558 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_NEW)
4559 if not Ctx().trunk_only:
4560 Log().write(LOG_QUIET,
4561 "Finding last CVS revisions for all symbolic names...")
4562 last_sym_name_db = LastSymbolicNameDatabase(DB_OPEN_NEW)
4563 else:
4564 # This is to avoid testing Ctx().trunk_only every time around the loop
4565 class DummyLSNDB:
4566 def noop(*args): pass
4567 log_revision = noop
4568 create_database = noop
4569 last_sym_name_db = DummyLSNDB()
4571 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4572 c_rev = CVSRevision(Ctx(), line[:-1])
4573 cvs_revs_db.log_revision(c_rev)
4574 last_sym_name_db.log_revision(c_rev)
4575 StatsKeeper().record_c_rev(c_rev)
4577 last_sym_name_db.create_database()
4578 StatsKeeper().archive()
4579 Log().write(LOG_QUIET, "Done")
4581 def pass5():
4583 Generate the SVNCommit <-> CVSRevision mapping
4584 databases. CVSCommit._commit also calls SymbolingsLogger to register
4585 CVSRevisions that represent an opening or closing for a path on a
4586 branch or tag. See SymbolingsLogger for more details.
4588 Log().write(LOG_QUIET, "Mapping CVS revisions to Subversion commits...")
4590 aggregator = CVSRevisionAggregator()
4591 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4592 c_rev = CVSRevision(Ctx(), line[:-1])
4593 if not (Ctx().trunk_only and c_rev.branch_name is not None):
4594 aggregator.process_revision(c_rev)
4595 aggregator.flush()
4597 StatsKeeper().set_svn_rev_count(SVNCommit.revnum - 1)
4598 StatsKeeper().archive()
4599 Log().write(LOG_QUIET, "Done")
4601 def pass6():
4602 Log().write(LOG_QUIET, "Sorting symbolic name source revisions...")
4604 if not Ctx().trunk_only:
4605 sort_file(temp(SYMBOL_OPENINGS_CLOSINGS),
4606 temp(SYMBOL_OPENINGS_CLOSINGS_SORTED))
4607 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), pass8)
4608 Log().write(LOG_QUIET, "Done")
4610 def pass7():
4611 Log().write(LOG_QUIET, "Determining offsets for all symbolic names...")
4613 def generate_offsets_for_symbolings():
4614 """This function iterates through all the lines in
4615 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
4616 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
4617 where SYMBOLIC_NAME is first encountered. This will allow us to
4618 seek to the various offsets in the file and sequentially read only
4619 the openings and closings that we need."""
4621 ###PERF This is a fine example of a db that can be in-memory and
4622 #just flushed to disk when we're done. Later, it can just be sucked
4623 #back into memory.
4624 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_NEW)
4625 Cleanup().register(temp(SYMBOL_OFFSETS_DB), pass8)
4627 file = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
4628 old_sym = ""
4629 while 1:
4630 fpos = file.tell()
4631 line = file.readline()
4632 if not line:
4633 break
4634 sym, svn_revnum, cvs_rev_key = line.split(" ", 2)
4635 if sym != old_sym:
4636 Log().write(LOG_VERBOSE, " ", sym)
4637 old_sym = sym
4638 offsets_db[sym] = fpos
4640 if not Ctx().trunk_only:
4641 generate_offsets_for_symbolings()
4642 Log().write(LOG_QUIET, "Done.")
4644 def pass8():
4645 svncounter = 2 # Repository initialization is 1.
4646 repos = SVNRepositoryMirror()
4647 persistence_manager = PersistenceManager(DB_OPEN_READ)
4649 if Ctx().target:
4650 if not Ctx().dry_run:
4651 repos.add_delegate(RepositoryDelegate())
4652 Log().write(LOG_QUIET, "Starting Subversion Repository.")
4653 else:
4654 if not Ctx().dry_run:
4655 repos.add_delegate(DumpfileDelegate())
4656 Log().write(LOG_QUIET, "Starting Subversion Dumpfile.")
4658 repos.add_delegate(StdoutDelegate(StatsKeeper().svn_rev_count()))
4660 while 1:
4661 svn_commit = persistence_manager.get_svn_commit(svncounter)
4662 if not svn_commit:
4663 break
4664 repos.commit(svn_commit)
4665 svncounter += 1
4667 repos.finish()
4669 _passes = [
4670 pass1,
4671 pass2,
4672 pass3,
4673 pass4,
4674 pass5,
4675 pass6,
4676 pass7,
4677 pass8,
4681 class Ctx:
4682 """Session state for this run of cvs2svn. For example, run-time
4683 options are stored here. This class is a Borg, see
4684 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531.
4686 __shared_state = { }
4687 def __init__(self):
4688 self.__dict__ = self.__shared_state
4689 if self.__dict__:
4690 return
4691 # Else, initialize to defaults.
4692 self.target = None
4693 self.dumpfile = DUMPFILE
4694 self.tmpdir = '.'
4695 self.verbose = 0
4696 self.quiet = 0
4697 self.prune = 1
4698 self.existing_svnrepos = 0
4699 self.dump_only = 0
4700 self.dry_run = 0
4701 self.trunk_only = 0
4702 self.trunk_base = "trunk"
4703 self.tags_base = "tags"
4704 self.branches_base = "branches"
4705 self.encoding = ["ascii"]
4706 self.mime_types_file = None
4707 self.auto_props_file = None
4708 self.auto_props_ignore_case = False
4709 self.no_default_eol = 0
4710 self.eol_from_mime_type = 0
4711 self.keywords_off = 0
4712 self.use_cvs = None
4713 self.svnadmin = "svnadmin"
4714 self.username = None
4715 self.print_help = 0
4716 self.skip_cleanup = 0
4717 self.bdb_txn_nosync = 0
4718 self.fs_type = None
4719 self.forced_branches = []
4720 self.forced_tags = []
4721 self.excludes = []
4722 self.symbol_transforms = []
4723 self.svn_property_setters = []
4726 class CVSRevisionNumberSetter(SVNPropertySetter):
4727 """Set the cvs2svn:cvs-rev property to the CVS revision number."""
4729 def set_properties(self, s_item):
4730 s_item.svn_props['cvs2svn:cvs-rev'] = s_item.c_rev.rev
4731 s_item.svn_props_changed = True
4734 class MimeMapper(SVNPropertySetter):
4735 """A class that provides mappings from file names to MIME types."""
4737 def __init__(self, mime_types_file):
4738 self.mappings = { }
4740 for line in fileinput.input(mime_types_file):
4741 if line.startswith("#"):
4742 continue
4744 # format of a line is something like
4745 # text/plain c h cpp
4746 extensions = line.split()
4747 if len(extensions) < 2:
4748 continue
4749 type = extensions.pop(0)
4750 for ext in extensions:
4751 if self.mappings.has_key(ext) and self.mappings[ext] != type:
4752 sys.stderr.write("%s: ambiguous MIME mapping for *.%s (%s or %s)\n"
4753 % (warning_prefix, ext, self.mappings[ext], type))
4754 self.mappings[ext] = type
4756 def set_properties(self, s_item):
4757 basename, extension = os.path.splitext(
4758 os.path.basename(s_item.c_rev.cvs_path)
4761 # Extension includes the dot, so strip it (will leave extension
4762 # empty if filename ends with a dot, which is ok):
4763 extension = extension[1:]
4765 # If there is no extension (or the file ends with a period), use
4766 # the base name for mapping. This allows us to set mappings for
4767 # files such as README or Makefile:
4768 if not extension:
4769 extension = basename
4771 mime_type = self.mappings.get(extension, None)
4772 if mime_type is not None:
4773 s_item.svn_props['svn:mime-type'] = mime_type
4776 class AutoPropsPropertySetter(SVNPropertySetter):
4777 """Set arbitrary svn properties based on an auto-props configuration.
4779 This class always supports case-sensitive and case-insensitive
4780 pattern matching. The 'correct' behavior is not quite clear,
4781 because subversion itself does an inconsistent job of handling case
4782 in auto-props patterns; see
4783 http://subversion.tigris.org/issues/show_bug.cgi?id=2036."""
4785 class Pattern:
4786 """Describes the properties to be set for files matching a pattern."""
4787 def __init__(self, pattern, propdict):
4788 # A glob-like pattern:
4789 self.pattern = pattern
4790 # A dictionary of properties that should be set:
4791 self.propdict = propdict
4793 def match(self, basename):
4794 """Does the file with the specified basename match pattern?"""
4795 return fnmatch.fnmatch(basename, self.pattern)
4797 def __init__(self, configfilename, ignore_case):
4798 config = ConfigParser.ConfigParser()
4799 if ignore_case:
4800 self.transform_case = self.squash_case
4801 else:
4802 config.optionxform = self.preserve_case
4803 self.transform_case = self.preserve_case
4805 config.readfp(file(configfilename))
4806 self.patterns = []
4807 for section in config.sections():
4808 if self.transform_case(section) == 'auto-props':
4809 for (pattern, value) in config.items(section):
4810 if value:
4811 self._add_pattern(pattern, value)
4813 def squash_case(self, s):
4814 return s.lower()
4816 def preserve_case(self, s):
4817 return s
4819 def _add_pattern(self, pattern, value):
4820 props = value.split(';')
4821 propdict = {}
4822 for prop in props:
4823 s = prop.split('=', 1)
4824 if len(s) == 1:
4825 propdict[s[0]] = None
4826 else:
4827 propdict[s[0]] = s[1]
4828 self.patterns.append(
4829 self.Pattern(self.transform_case(pattern), propdict))
4831 def get_propdict(self, path):
4832 basename = self.transform_case(os.path.basename(path))
4833 propdict = {}
4834 for pattern in self.patterns:
4835 if pattern.match(basename):
4836 for (key,value) in pattern.propdict.items():
4837 if propdict.has_key(key):
4838 if propdict[key] != value:
4839 Log().write(
4840 LOG_WARN,
4841 "Contradictory values set for property '%s' for file %s."
4842 % (k, path,))
4843 else:
4844 propdict[key] = value
4846 print 'propdict %s -> %s' % (path, propdict,) ###
4847 return propdict
4849 def set_properties(self, s_item):
4850 propdict = self.get_propdict(s_item.c_rev.cvs_path)
4851 for (k,v) in propdict.items():
4852 if s_item.svn_props.has_key(k):
4853 if s_item.svn_props[k] != v:
4854 Log().write(
4855 LOG_WARN,
4856 "Property '%s' already set for file %s."
4857 % (k, s_item.c_rev.cvs_path,))
4858 else:
4859 s_item.svn_props[k] = v
4862 class BinaryFileDefaultMimeTypeSetter(SVNPropertySetter):
4863 """Set the default mime type for binary files, if no other one is known."""
4865 def set_properties(self, s_item):
4866 if not s_item.svn_props.has_key('svn:mime-type') \
4867 and s_item.c_rev.mode == 'b':
4868 s_item.svn_props['svn:mime-type'] = 'application/octet-stream'
4871 class BinaryFileEOLStyleSetter(SVNPropertySetter):
4872 """Set the eol-style for binary files to None."""
4874 def set_properties(self, s_item):
4875 if s_item.c_rev.mode == 'b':
4876 s_item.svn_props['svn:eol-style'] = None
4879 class EOLStyleFromMimeTypeSetter(SVNPropertySetter):
4880 """Set the eol-style from the mime type if it is not already known.
4882 This setting is influenced by the mime-type setting, which must
4883 already have been set. See also issue #39."""
4885 def set_properties(self, s_item):
4886 if not s_item.svn_props.has_key('svn:eol-style') \
4887 and s_item.svn_props.get('svn:mime-type', None) is not None:
4888 if s_item.svn_props['svn:mime-type'].startswith("text/"):
4889 s_item.svn_props['svn:eol-style'] = 'native'
4890 else:
4891 s_item.svn_props['svn:eol-style'] = None
4894 class DefaultEOLStyleSetter(SVNPropertySetter):
4895 """Set the default eol-style if one has not already been set."""
4897 def __init__(self, value):
4898 """Initialize with the specified default VALUE."""
4900 self.value = value
4902 def set_properties(self, s_item):
4903 if not s_item.svn_props.has_key('svn:eol-style'):
4904 s_item.svn_props['svn:eol-style'] = self.value
4907 class KeywordsPropertySetter(SVNPropertySetter):
4908 """Set the svn:keywords property based on the file's mode. See
4909 issue #2."""
4911 def __init__(self, value):
4912 """Use VALUE for the value of the svn:keywords property if it is
4913 to be set."""
4915 self.value = value
4917 def set_properties(self, s_item):
4918 if not s_item.svn_props.has_key('svn:keywords') \
4919 and s_item.c_rev.mode in [None, 'kv', 'kvl']:
4920 s_item.svn_props['svn:keywords'] = self.value
4923 class ExecutablePropertySetter(SVNPropertySetter):
4924 """Set the svn:executable property based on c_rev.file_executable."""
4926 def set_properties(self, s_item):
4927 if s_item.c_rev.file_executable:
4928 s_item.svn_props['svn:executable'] = '*'
4931 def convert(start_pass, end_pass):
4932 "Convert a CVS repository to an SVN repository."
4934 cleanup = Cleanup()
4935 times = [ None ] * (end_pass + 1)
4936 times[start_pass - 1] = time.time()
4937 StatsKeeper().set_start_time(time.time())
4938 for i in range(start_pass - 1, end_pass):
4939 Log().write(LOG_QUIET, '----- pass %d -----' % (i + 1))
4940 _passes[i]()
4941 times[i + 1] = time.time()
4942 StatsKeeper().log_duration_for_pass(times[i + 1] - times[i], i + 1)
4943 # Dispose of items in Ctx() not intended to live past the end of the pass
4944 # (Identified by exactly one leading underscore)
4945 for attr in dir(Ctx()):
4946 if (len(attr) > 2 and attr[0] == '_' and attr[1] != '_'
4947 and attr[:6] != "_Ctx__"):
4948 delattr(Ctx(), attr)
4949 if not Ctx().skip_cleanup:
4950 cleanup.cleanup(_passes[i])
4951 StatsKeeper().set_end_time(time.time())
4953 Log().write(LOG_QUIET, StatsKeeper())
4954 if end_pass < 4:
4955 Log().write(LOG_QUIET,
4956 '(These are unaltered CVS repository stats and do not\n'
4957 ' reflect tags or branches excluded via --exclude)\n')
4958 Log().write(LOG_NORMAL, StatsKeeper().timings())
4961 def normalize_ttb_path(opt, path):
4962 """Normalize a path to be used for --trunk, --tags, or --branches.
4964 1. Strip leading, trailing, and duplicated '/'.
4965 2. Verify that the path is not empty.
4967 Return the normalized path.
4969 If the path is invalid, write an error message and exit."""
4971 norm_path = _path_join(*path.split('/'))
4972 if not norm_path:
4973 raise FatalError("cannot pass an empty path to %s." % (opt,))
4974 return norm_path
4977 def verify_paths_disjoint(*paths):
4978 """Verify that all of the paths in the argument list are disjoint.
4980 If any of the paths is nested in another one (i.e., in the sense
4981 that 'a/b/c/d' is nested in 'a/b'), or any two paths are identical,
4982 write an error message and exit."""
4984 paths = [(path.split('/'), path) for path in paths]
4985 # If all overlapping elements are equal, a shorter list is
4986 # considered "less than" a longer one. Therefore if any paths are
4987 # nested, this sort will leave at least one such pair adjacent, in
4988 # the order [nest,nestling].
4989 paths.sort()
4990 for i in range(1, len(paths)):
4991 split_path1, path1 = paths[i - 1]
4992 split_path2, path2 = paths[i]
4993 if len(split_path1) <= len(split_path2) \
4994 and split_path2[:len(split_path1)] == split_path1:
4995 raise FatalError("paths %s and %s are not disjoint." % (path1, path2,))
4998 def usage():
4999 print 'USAGE: %s [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
5000 % os.path.basename(sys.argv[0])
5001 print ' --help, -h print this usage message and exit with success'
5002 print ' --version print the version number'
5003 print ' -q quiet'
5004 print ' -v verbose'
5005 print ' -s PATH path for SVN repos'
5006 print ' -p START[:END] start at pass START, end at pass END of %d' \
5007 % len(_passes)
5008 print ' If only START is given, run only pass START'
5009 print ' (implicitly enables --skip-cleanup)'
5010 print ' --existing-svnrepos load into existing SVN repository'
5011 print ' --dumpfile=PATH name of intermediate svn dumpfile'
5012 print ' --tmpdir=PATH directory to use for tmp data (default to cwd)'
5013 print ' --profile profile with \'hotshot\' (into file cvs2svn.hotshot)'
5014 print ' --dry-run do not create a repository or a dumpfile;'
5015 print ' just print what would happen.'
5016 print ' --use-cvs use CVS instead of RCS \'co\' to extract data'
5017 print ' (only use this if having problems with RCS)'
5018 print ' --svnadmin=PATH path to the svnadmin program'
5019 print ' --trunk-only convert only trunk commits, not tags nor branches'
5020 print ' --trunk=PATH path for trunk (default: %s)' \
5021 % Ctx().trunk_base
5022 print ' --branches=PATH path for branches (default: %s)' \
5023 % Ctx().branches_base
5024 print ' --tags=PATH path for tags (default: %s)' \
5025 % Ctx().tags_base
5026 print ' --no-prune don\'t prune empty directories'
5027 print ' --dump-only just produce a dumpfile, don\'t commit to a repos'
5028 print ' --encoding=ENC encoding of paths and log messages in CVS repos'
5029 print ' Multiple of these options may be passed, where they'
5030 print ' will be treated as an ordered list of encodings to'
5031 print ' attempt (with "ascii" as a hardcoded last resort)'
5032 print ' --force-branch=NAME force NAME to be a branch'
5033 print ' --force-tag=NAME force NAME to be a tag'
5034 print ' --exclude=REGEXP exclude branches and tags matching REGEXP'
5035 print ' --symbol-transform=P:S transform symbol names from P to S where P and S'
5036 print ' use Python regexp and reference syntax respectively'
5037 print ' --username=NAME username for cvs2svn-synthesized commits'
5038 print ' --skip-cleanup prevent the deletion of intermediate files'
5039 print ' --bdb-txn-nosync pass --bdb-txn-nosync to "svnadmin create"'
5040 print ' --fs-type=TYPE pass --fs-type=TYPE to "svnadmin create"'
5041 print ' --cvs-revnums record CVS revision numbers as file properties'
5042 print ' --auto-props=FILE set file properties from the auto-props section'
5043 print ' of a file in svn config format'
5044 print ' --auto-props-ignore-case Ignore case when matching auto-props patterns'
5045 print ' --mime-types=FILE specify an apache-style mime.types file for'
5046 print ' setting svn:mime-type'
5047 print ' --eol-from-mime-type set svn:eol-style from mime type if known'
5048 print ' --no-default-eol don\'t set svn:eol-style to \'native\' for'
5049 print ' non-binary files with undetermined mime types'
5050 print ' --keywords-off don\'t set svn:keywords on any files (by default,'
5051 print ' cvs2svn sets svn:keywords on non-binary files to'
5052 print ' "%s")' % SVN_KEYWORDS_VALUE
5054 def main():
5055 # Convenience var, so we don't have to keep instantiating this Borg.
5056 ctx = Ctx()
5058 profiling = None
5059 start_pass = 1
5060 end_pass = len(_passes)
5062 try:
5063 opts, args = getopt.getopt(sys.argv[1:], 'p:s:qvh',
5064 [ "help", "create", "trunk=",
5065 "username=", "existing-svnrepos",
5066 "branches=", "tags=", "encoding=",
5067 "force-branch=", "force-tag=", "exclude=",
5068 "use-cvs", "mime-types=",
5069 "auto-props=", "auto-props-ignore-case",
5070 "eol-from-mime-type", "no-default-eol",
5071 "trunk-only", "no-prune", "dry-run",
5072 "dump-only", "dumpfile=", "tmpdir=",
5073 "svnadmin=", "skip-cleanup", "cvs-revnums",
5074 "bdb-txn-nosync", "fs-type=",
5075 "version", "profile",
5076 "keywords-off", "symbol-transform="])
5077 except getopt.GetoptError, e:
5078 sys.stderr.write(error_prefix + ': ' + str(e) + '\n\n')
5079 usage()
5080 sys.exit(1)
5082 for opt, value in opts:
5083 if opt == '--version':
5084 print '%s version %s' % (os.path.basename(sys.argv[0]), VERSION)
5085 sys.exit(0)
5086 elif opt == '-p':
5087 # Don't cleanup if we're doing incrementals.
5088 ctx.skip_cleanup = 1
5089 if value.find(':') > 0:
5090 start_pass, end_pass = map(int, value.split(':'))
5091 else:
5092 end_pass = start_pass = int(value)
5093 if start_pass > len(_passes) or start_pass < 1:
5094 raise FatalError(
5095 'illegal value (%d) for starting pass. Must be 1 through %d.'
5096 % (int(start_pass), len(_passes),))
5097 if end_pass < start_pass or end_pass > len(_passes):
5098 raise FatalError(
5099 'illegal value (%d) for ending pass. Must be %d through %d.'
5100 % (int(end_pass), int(start_pass), len(_passes),))
5101 elif (opt == '--help') or (opt == '-h'):
5102 ctx.print_help = 1
5103 elif opt == '-v':
5104 Log().log_level = LOG_VERBOSE
5105 ctx.verbose = 1
5106 elif opt == '-q':
5107 Log().log_level = LOG_QUIET
5108 ctx.quiet = 1
5109 elif opt == '-s':
5110 ctx.target = value
5111 elif opt == '--existing-svnrepos':
5112 ctx.existing_svnrepos = 1
5113 elif opt == '--dumpfile':
5114 ctx.dumpfile = value
5115 elif opt == '--tmpdir':
5116 ctx.tmpdir = value
5117 elif opt == '--use-cvs':
5118 ctx.use_cvs = 1
5119 elif opt == '--svnadmin':
5120 ctx.svnadmin = value
5121 elif opt == '--trunk-only':
5122 ctx.trunk_only = 1
5123 elif opt == '--trunk':
5124 ctx.trunk_base = normalize_ttb_path(opt, value)
5125 elif opt == '--branches':
5126 ctx.branches_base = normalize_ttb_path(opt, value)
5127 elif opt == '--tags':
5128 ctx.tags_base = normalize_ttb_path(opt, value)
5129 elif opt == '--no-prune':
5130 ctx.prune = None
5131 elif opt == '--dump-only':
5132 ctx.dump_only = 1
5133 elif opt == '--dry-run':
5134 ctx.dry_run = 1
5135 elif opt == '--encoding':
5136 ctx.encoding.insert(-1, value)
5137 elif opt == '--force-branch':
5138 ctx.forced_branches.append(value)
5139 elif opt == '--force-tag':
5140 ctx.forced_tags.append(value)
5141 elif opt == '--exclude':
5142 try:
5143 ctx.excludes.append(re.compile('^' + value + '$'))
5144 except re.error, e:
5145 raise FatalError("'%s' is not a valid regexp." % (value,))
5146 elif opt == '--mime-types':
5147 ctx.mime_types_file = value
5148 elif opt == '--auto-props':
5149 ctx.auto_props_file = value
5150 elif opt == '--auto-props-ignore-case':
5151 ctx.auto_props_ignore_case = True
5152 elif opt == '--eol-from-mime-type':
5153 ctx.eol_from_mime_type = 1
5154 elif opt == '--no-default-eol':
5155 ctx.no_default_eol = 1
5156 elif opt == '--keywords-off':
5157 ctx.keywords_off = 1
5158 elif opt == '--username':
5159 ctx.username = value
5160 elif opt == '--skip-cleanup':
5161 ctx.skip_cleanup = 1
5162 elif opt == '--cvs-revnums':
5163 ctx.svn_property_setters.append(CVSRevisionNumberSetter())
5164 elif opt == '--bdb-txn-nosync':
5165 ctx.bdb_txn_nosync = 1
5166 elif opt == '--fs-type':
5167 ctx.fs_type = value
5168 elif opt == '--create':
5169 sys.stderr.write(warning_prefix +
5170 ': The behaviour produced by the --create option is now the '
5171 'default,\nand passing the option is deprecated.\n')
5172 elif opt == '--profile':
5173 profiling = 1
5174 elif opt == '--symbol-transform':
5175 [pattern, replacement] = value.split(":")
5176 try:
5177 pattern = re.compile(pattern)
5178 except re.error, e:
5179 raise FatalError("'%s' is not a valid regexp." % (pattern,))
5180 ctx.symbol_transforms.append((pattern, replacement,))
5182 if ctx.print_help:
5183 usage()
5184 sys.exit(0)
5186 # Consistency check for options and arguments.
5187 if len(args) == 0:
5188 usage()
5189 sys.exit(1)
5191 if len(args) > 1:
5192 sys.stderr.write(error_prefix +
5193 ": must pass only one CVS repository.\n")
5194 usage()
5195 sys.exit(1)
5197 cvsroot = args[0]
5199 if ctx.use_cvs:
5200 ctx.cvs_repository = CVSRepositoryViaCVS(cvsroot)
5201 else:
5202 ctx.cvs_repository = CVSRepositoryViaRCS(cvsroot)
5204 if (not ctx.target) and (not ctx.dump_only) and (not ctx.dry_run):
5205 raise FatalError("must pass one of '-s' or '--dump-only'.")
5207 def not_both(opt1val, opt1name, opt2val, opt2name):
5208 if opt1val and opt2val:
5209 raise FatalError("cannot pass both '%s' and '%s'."
5210 % (opt1name, opt2name,))
5212 not_both(ctx.target, '-s',
5213 ctx.dump_only, '--dump-only')
5215 not_both(ctx.dump_only, '--dump-only',
5216 ctx.existing_svnrepos, '--existing-svnrepos')
5218 not_both(ctx.bdb_txn_nosync, '--bdb-txn-nosync',
5219 ctx.existing_svnrepos, '--existing-svnrepos')
5221 not_both(ctx.dump_only, '--dump-only',
5222 ctx.bdb_txn_nosync, '--bdb-txn-nosync')
5224 not_both(ctx.quiet, '-q',
5225 ctx.verbose, '-v')
5227 not_both(ctx.fs_type, '--fs-type',
5228 ctx.existing_svnrepos, '--existing-svnrepos')
5230 if ctx.fs_type and ctx.fs_type != 'bdb' and ctx.bdb_txn_nosync:
5231 raise FatalError("cannot pass --bdb-txn-nosync with --fs-type=%s."
5232 % ctx.fs_type)
5234 # Create the default project (using ctx.trunk, ctx.branches, and ctx.tags):
5235 ctx.project = Project(ctx.cvs_repository.cvs_repos_path,
5236 ctx.trunk_base, ctx.branches_base, ctx.tags_base)
5238 if ctx.existing_svnrepos and not os.path.isdir(ctx.target):
5239 raise FatalError("the svn-repos-path '%s' is not an "
5240 "existing directory." % ctx.target)
5242 if not ctx.dump_only and not ctx.existing_svnrepos \
5243 and (not ctx.dry_run) and os.path.exists(ctx.target):
5244 raise FatalError("the svn-repos-path '%s' exists.\n"
5245 "Remove it, or pass '--existing-svnrepos'."
5246 % ctx.target)
5248 if ctx.target and not ctx.dry_run:
5249 # Verify that svnadmin can be executed. The 'help' subcommand
5250 # should be harmless.
5251 try:
5252 check_command_runs([ctx.svnadmin, 'help'], 'svnadmin')
5253 except CommandFailedException, e:
5254 raise FatalError(
5255 '%s\n'
5256 'svnadmin could not be executed. Please ensure that it is\n'
5257 'installed and/or use the --svnadmin option.' % (e,))
5259 if ctx.mime_types_file:
5260 ctx.svn_property_setters.append(MimeMapper(ctx.mime_types_file))
5262 if ctx.auto_props_file:
5263 ctx.svn_property_setters.append(AutoPropsPropertySetter(
5264 ctx.auto_props_file, ctx.auto_props_ignore_case))
5266 ctx.svn_property_setters.append(BinaryFileDefaultMimeTypeSetter())
5267 ctx.svn_property_setters.append(BinaryFileEOLStyleSetter())
5269 if ctx.eol_from_mime_type:
5270 ctx.svn_property_setters.append(EOLStyleFromMimeTypeSetter())
5272 if ctx.no_default_eol:
5273 ctx.svn_property_setters.append(DefaultEOLStyleSetter(None))
5274 else:
5275 ctx.svn_property_setters.append(DefaultEOLStyleSetter('native'))
5277 if not ctx.keywords_off:
5278 ctx.svn_property_setters.append(
5279 KeywordsPropertySetter(SVN_KEYWORDS_VALUE))
5281 ctx.svn_property_setters.append(ExecutablePropertySetter())
5283 # Make sure the tmp directory exists. Note that we don't check if
5284 # it's empty -- we want to be able to use, for example, "." to hold
5285 # tempfiles. But if we *did* want check if it were empty, we'd do
5286 # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
5287 if not os.path.exists(ctx.tmpdir):
5288 os.mkdir(ctx.tmpdir)
5289 elif not os.path.isdir(ctx.tmpdir):
5290 raise FatalError(
5291 "cvs2svn tried to use '%s' for temporary files, but that path\n"
5292 " exists and is not a directory. Please make it be a directory,\n"
5293 " or specify some other directory for temporary files."
5294 % (ctx.tmpdir,))
5296 # But do lock the tmpdir, to avoid process clash.
5297 try:
5298 os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
5299 except OSError, e:
5300 if e.errno == errno.EACCES:
5301 raise FatalError("Permission denied:"
5302 + " No write access to directory '%s'." % ctx.tmpdir)
5303 if e.errno == errno.EEXIST:
5304 raise FatalError(
5305 "cvs2svn is using directory '%s' for temporary files, but\n"
5306 " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
5307 " cvs2svn process is currently using '%s' as its temporary\n"
5308 " workspace. If you are certain that is not the case,\n"
5309 " then remove the '%s/cvs2svn.lock' subdirectory."
5310 % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir,))
5311 raise
5312 try:
5313 if profiling:
5314 import hotshot
5315 prof = hotshot.Profile('cvs2svn.hotshot')
5316 prof.runcall(convert, start_pass, end_pass)
5317 prof.close()
5318 else:
5319 convert(start_pass, end_pass)
5320 finally:
5321 try: os.rmdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
5322 except: pass
5325 if __name__ == '__main__':
5326 try:
5327 main()
5328 except FatalException, e:
5329 sys.stderr.write(str(e))
5330 sys.exit(1)