* cvs2svn (Project): Store the project's root as a cvs_path, too.
[cvs2svn.git] / cvs2svn
blob35b3d79a4a5ecfe8da23d1f360a86cad866baf61
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # cvs2svn: ...
6 # ====================================================================
7 # Copyright (c) 2000-2004 CollabNet. All rights reserved.
9 # This software is licensed as described in the file COPYING, which
10 # you should have received as part of this distribution. The terms
11 # are also available at http://subversion.tigris.org/license-1.html.
12 # If newer versions of this license are posted there, you may use a
13 # newer version instead, at your option.
15 # This software consists of voluntary contributions made by many
16 # individuals. For exact contribution history, see the revision
17 # history and logs, available at http://cvs2svn.tigris.org/.
18 # ====================================================================
20 VERSION = 'r' + "$LastChangedRevision$"[22:-2]
22 import cvs2svn_rcsparse
23 import os
24 import sys
25 import sha
26 import re
27 import time
28 import fileinput
29 import string
30 import getopt
31 import stat
32 import md5
33 import marshal
34 import errno
35 import popen2
36 import types
37 try:
38 # Try to get access to a bunch of encodings for use with --encoding.
39 # See http://cjkpython.i18n.org/ for details.
40 import iconv_codec
41 except ImportError:
42 pass
44 # Warnings and errors start with these strings. They are typically
45 # followed by a colon and a space, as in "%s: " ==> "WARNING: ".
46 warning_prefix = "WARNING"
47 error_prefix = "ERROR"
49 # Make sure this Python is recent enough.
50 if sys.hexversion < 0x2000000:
51 sys.stderr.write("'%s: Python 2.0 or higher required, "
52 "see www.python.org.\n" % error_prefix)
53 sys.exit(1)
55 # Pretend we have true booleans on older python versions
56 try:
57 True
58 except:
59 True = 1
60 False = 0
62 # Opening pipes was a mess before Python 2.4, because some methods did
63 # not exist on some platforms, and some behaved differenly on other.
64 # Python 2.4 solved this by adding the subprocess module, but since we
65 # cannot require such a new version, we cannot use it directly, but
66 # must implement a simplified Popen using the best means neccessary.
68 # The SimplePopen class only has the following members and methods, all
69 # behaving as documented in the subprocess.Popen class:
70 # - stdin
71 # - stdout
72 # - stderr
73 # - wait
74 try:
75 # First try subprocess.Popen...
76 import subprocess
77 class SimplePopen:
78 def __init__(self, cmd, capture_stderr):
79 if capture_stderr:
80 stderr = subprocess.PIPE
81 else:
82 stderr = None
83 self._popen = subprocess.Popen(cmd, stdin=subprocess.PIPE,
84 stdout=subprocess.PIPE, stderr=stderr)
85 self.stdin = self._popen.stdin
86 self.stdout = self._popen.stdout
87 if capture_stderr:
88 self.stderr = self._popen.stderr
89 self.wait = self._popen.wait
90 except ImportError:
91 if hasattr(popen2, 'Popen3'):
92 # ...then try popen2.Popen3...
93 class SimplePopen:
94 def __init__(self, cmd, capture_stderr):
95 self._popen3 = popen2.Popen3(cmd, capture_stderr)
96 self.stdin = self._popen3.tochild
97 self.stdout = self._popen3.fromchild
98 if capture_stderr:
99 self.stderr = self._popen3.childerr
100 self.wait = self._popen3.wait
101 else:
102 # ...and if all fails, use popen2.popen3...
103 class SimplePopen:
104 def __init__(self, cmd, capture_stderr):
105 if type(cmd) != types.StringType:
106 cmd = argv_to_command_string(cmd)
107 self.stdout, self.stdin, self.stderr = popen2.popen3(cmd, mode='b')
108 def wait(self):
109 return self.stdout.close() or self.stdin.close() or \
110 self.stderr.close()
112 # DBM module selection
114 # 1. If we have bsddb3, it is probably newer than bsddb. Fake bsddb = bsddb3,
115 # so that the dbhash module used by anydbm will use bsddb3.
116 try:
117 import bsddb3
118 sys.modules['bsddb'] = sys.modules['bsddb3']
119 except ImportError:
120 pass
122 # 2. These DBM modules are not good for cvs2svn.
123 import anydbm
124 if (anydbm._defaultmod.__name__ == 'dumbdbm'
125 or anydbm._defaultmod.__name__ == 'dbm'):
126 sys.stderr.write(
127 error_prefix
128 + ': your installation of Python does not contain a suitable\n'
129 + 'DBM module -- cvs2svn cannot continue.\n'
130 + 'See http://python.org/doc/current/lib/module-anydbm.html to solve.\n')
131 sys.exit(1)
133 # 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
134 # Unfortunately, gdbm appears not to be trouble free, either.
135 if hasattr(anydbm._defaultmod, 'bsddb') \
136 and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
137 try:
138 gdbm = __import__('gdbm')
139 except ImportError:
140 sys.stderr.write(warning_prefix +
141 ': The version of the bsddb module found '
142 'on your computer has been reported to malfunction on some datasets, '
143 'causing KeyError exceptions. You may wish to upgrade your Python to '
144 'version 2.3 or later.\n')
145 else:
146 anydbm._defaultmod = gdbm
148 trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
149 branch_tag = re.compile('^[0-9.]+\\.0\\.[0-9]+$')
150 vendor_tag = re.compile('^[0-9]+\\.[0-9]+\\.[0-9]+$')
152 SVN_KEYWORDS_VALUE = 'Author Date Id Revision'
154 # This really only matches standard '1.1.1.*'-style vendor revisions.
155 # One could conceivably have a file whose default branch is 1.1.3 or
156 # whatever, or was that at some point in time, with vendor revisions
157 # 1.1.3.1, 1.1.3.2, etc. But with the default branch gone now (which
158 # is the only time this regexp gets used), we'd have no basis for
159 # assuming that the non-standard vendor branch had ever been the
160 # default branch anyway, so we don't want this to match them anyway.
161 vendor_revision = re.compile('^(1\\.1\\.1)\\.([0-9])+$')
163 # If this run's output is a repository, then (in the tmpdir) we use
164 # a dumpfile of this name for repository loads.
166 # If this run's output is a dumpfile, then this is default name of
167 # that dumpfile, but in the current directory (unless the user has
168 # specified a dumpfile path, of course, in which case it will be
169 # wherever the user said).
170 DUMPFILE = 'cvs2svn-dump'
172 # This file appears with different suffixes at different stages of
173 # processing. CVS revisions are cleaned and sorted here, for commit
174 # grouping. See design-notes.txt for details.
175 DATAFILE = 'cvs2svn-data'
177 # This file contains a marshalled copy of all the statistics that we
178 # gather throughout the various runs of cvs2svn. The data stored as a
179 # marshalled dictionary.
180 STATISTICS_FILE = 'cvs2svn-statistics'
182 # This text file contains records (1 per line) that describe svn
183 # filesystem paths that are the opening and closing source revisions
184 # for copies to tags and branches. The format is as follows:
186 # SYMBOL_NAME SVN_REVNUM TYPE SVN_PATH
188 # Where type is either OPENING or CLOSING. The SYMBOL_NAME and
189 # SVN_REVNUM are the primary and secondary sorting criteria for
190 # creating SYMBOL_OPENINGS_CLOSINGS_SORTED.
191 SYMBOL_OPENINGS_CLOSINGS = 'cvs2svn-symbolic-names.txt'
192 # A sorted version of the above file.
193 SYMBOL_OPENINGS_CLOSINGS_SORTED = 'cvs2svn-symbolic-names-s.txt'
195 # This file is a temporary file for storing symbolic_name -> closing
196 # CVSRevision until the end of our pass where we can look up the
197 # corresponding SVNRevNum for the closing revs and write these out to
198 # the SYMBOL_OPENINGS_CLOSINGS.
199 SYMBOL_CLOSINGS_TMP = 'cvs2svn-symbolic-names-closings-tmp.txt'
201 # Skeleton version of an svn filesystem.
202 # (These supersede and will eventually replace the two above.)
203 # See class SVNRepositoryMirror for how these work.
204 SVN_MIRROR_REVISIONS_DB = 'cvs2svn-svn-revisions.db'
205 SVN_MIRROR_NODES_DB = 'cvs2svn-svn-nodes.db'
207 # Offsets pointing to the beginning of each SYMBOLIC_NAME in
208 # SYMBOL_OPENINGS_CLOSINGS_SORTED
209 SYMBOL_OFFSETS_DB = 'cvs2svn-symbolic-name-offsets.db'
211 # Maps CVSRevision.unique_key()s to lists of symbolic names, where
212 # the CVSRevision is the last such that is a source for those symbolic
213 # names. For example, if branch B's number is 1.3.0.2 in this CVS
214 # file, and this file's 1.3 is the latest (by date) revision among
215 # *all* CVS files that is a source for branch B, then the
216 # CVSRevision.unique_key() corresponding to this file at 1.3 would
217 # list at least B in its list.
218 SYMBOL_LAST_CVS_REVS_DB = 'cvs2svn-symbol-last-cvs-revs.db'
220 # Maps CVSRevision.unique_key() to corresponding line in s-revs.
221 ###PERF Or, we could map to an offset into s-revs, instead of dup'ing
222 ### the s-revs data in this database.
223 CVS_REVS_DB = 'cvs2svn-cvs-revs.db'
225 # Lists all symbolic names that are tags. Keys are strings (symbolic
226 # names), values are ignorable.
227 TAGS_DB = 'cvs2svn-tags.db'
229 # A list all tags. Each line consists of the tag name and the number
230 # of files in which it exists, separated by a space.
231 TAGS_LIST = 'cvs2svn-tags.txt'
233 # A list of all branches. The file is stored as a plain text file
234 # to make it easy to look at in an editor. Each line contains the
235 # branch name, the number of files where the branch is created, the
236 # commit count, and a list of tags and branches that are defined on
237 # revisions in the branch.
238 BRANCHES_LIST = 'cvs2svn-branches.txt'
240 # These two databases provide a bidirectional mapping between
241 # CVSRevision.unique_key()s and Subversion revision numbers.
243 # The first maps CVSRevision.unique_key() to a number; the values are
244 # not unique.
246 # The second maps a number to a list of CVSRevision.unique_key()s.
247 CVS_REVS_TO_SVN_REVNUMS = 'cvs2svn-cvs-revs-to-svn-revnums.db'
248 SVN_REVNUMS_TO_CVS_REVS = 'cvs2svn-svn-revnums-to-cvs-revs.db'
250 # This database maps svn_revnums to tuples of (symbolic_name, date).
252 # The svn_revnums are the revision numbers of all non-primary
253 # SVNCommits. No primary SVNCommit has a key in this database.
255 # The date is stored for all commits in this database.
257 # For commits that fill symbolic names, the symbolic_name is stored.
258 # For commits that default branch syncs, the symbolic_name is None.
259 SVN_COMMIT_NAMES_DATES = 'cvs2svn-svn-commit-names-and-dates.db'
261 # This database maps svn_revnums of a default branch synchronization
262 # commit to the svn_revnum of the primary SVNCommit that motivated it.
264 # (NOTE: Secondary commits that fill branches and tags also have a
265 # motivating commit, but we do not record it because it is (currently)
266 # not needed for anything.)
268 # This mapping is used when generating the log message for the commit
269 # that synchronizes the default branch with trunk.
270 MOTIVATING_REVNUMS = 'cvs2svn-svn-motivating-commit-revnums.db'
272 # How many bytes to read at a time from a pipe. 128 kiB should be
273 # large enough to be efficient without wasting too much memory.
274 PIPE_READ_SIZE = 128 * 1024
276 # Record the default RCS branches, if any, for CVS filepaths.
278 # The keys are CVS filepaths, relative to the top of the repository
279 # and with the ",v" stripped off, so they match the cvs paths used in
280 # Commit.commit(). The values are vendor branch revisions, such as
281 # '1.1.1.1', or '1.1.1.2', or '1.1.1.96'. The vendor branch revision
282 # represents the highest vendor branch revision thought to have ever
283 # been head of the default branch.
285 # The reason we record a specific vendor revision, rather than a
286 # default branch number, is that there are two cases to handle:
288 # One case is simple. The RCS file lists a default branch explicitly
289 # in its header, such as '1.1.1'. In this case, we know that every
290 # revision on the vendor branch is to be treated as head of trunk at
291 # that point in time.
293 # But there's also a degenerate case. The RCS file does not currently
294 # have a default branch, yet we can deduce that for some period in the
295 # past it probably *did* have one. For example, the file has vendor
296 # revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2,
297 # and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2. In this
298 # case, we should record 1.1.1.96 as the last vendor revision to have
299 # been the head of the default branch.
300 DEFAULT_BRANCHES_DB = 'cvs2svn-default-branches.db'
302 # Records the author and log message for each changeset.
303 # The keys are author+log digests, the same kind used to identify
304 # unique revisions in the .revs, etc files. Each value is a tuple
305 # of two elements: '(author logmessage)'.
306 METADATA_DB = "cvs2svn-metadata.db"
308 # A temporary on-disk hash that maps CVSRevision unique keys to a new
309 # timestamp for that CVSRevision. These new timestamps are created in
310 # pass2, and this hash is used exclusively in pass2.
311 TWEAKED_TIMESTAMPS_DB = "cvs2svn-fixed-timestamps.db"
313 REVS_SUFFIX = '.revs'
314 CLEAN_REVS_SUFFIX = '.c-revs'
315 SORTED_REVS_SUFFIX = '.s-revs'
316 RESYNC_SUFFIX = '.resync'
318 SVN_INVALID_REVNUM = -1
320 COMMIT_THRESHOLD = 5 * 60 # flush a commit if a 5 minute gap occurs
322 # Things that can happen to a file.
323 OP_NOOP = '-'
324 OP_ADD = 'A'
325 OP_DELETE = 'D'
326 OP_CHANGE = 'C'
328 # A deltatext either does or doesn't represent some change.
329 DELTATEXT_NONEMPTY = 'N'
330 DELTATEXT_EMPTY = 'E'
332 DIGEST_END_IDX = 9 + (sha.digestsize * 2)
334 # Constants used in SYMBOL_OPENINGS_CLOSINGS
335 OPENING = 'O'
336 CLOSING = 'C'
338 class FatalException(Exception):
339 """Exception thrown on a non-recoverable error.
341 If this exception is thrown by main(), it is caught by the global
342 layer of the program, its string representation is printed, and the
343 program is ended with an exit code of 1."""
345 pass
348 class FatalError(FatalException):
349 """A FatalException that prepends error_prefix to the message."""
351 def __init__(self, msg):
352 """Use (error_prefix + ': ' + MSG + '\n') as the error message."""
354 FatalException.__init__(self, '%s: %s\n' % (error_prefix, msg,))
357 def temp(basename):
358 """Return a path to BASENAME in Ctx().tmpdir.
359 This is a convenience function to save horizontal space in source."""
360 return os.path.join(Ctx().tmpdir, basename)
362 # Since the unofficial set also includes [/\] we need to translate those
363 # into ones that don't conflict with Subversion limitations.
364 def _clean_symbolic_name(name):
365 """Return symbolic name NAME, translating characters that Subversion
366 does not allow in a pathname."""
367 name = name.replace('/','++')
368 name = name.replace('\\','--')
369 return name
371 def _path_join(*components):
372 """Join two or more pathname COMPONENTS, inserting '/' as needed.
373 Empty component are skipped."""
374 return string.join(filter(None, components), '/')
376 def _path_split(path):
377 """Split the svn pathname PATH into a pair, (HEAD, TAIL).
379 This is similar to os.path.split(), but always uses '/' as path
380 separator. PATH is an svn path, which should not start with a '/'.
381 HEAD is everything before the last slash, and TAIL is everything
382 after. If PATH ends in a slash, TAIL will be empty. If there is no
383 slash in PATH, HEAD will be empty. If PATH is empty, both HEAD and
384 TAIL are empty."""
386 pos = path.rfind('/')
387 if pos == -1:
388 return ('', path,)
389 else:
390 return (path[:pos], path[pos+1:],)
392 def to_utf8(value, mode='replace'):
393 """Encode (as Unicode) VALUE, trying the encodings in Ctx.encoding
394 as valid source encodings. Raise UnicodeError on failure of all
395 source encodings."""
396 ### FIXME: The 'replace' default mode should be an option,
397 ### like --encoding is.
398 for encoding in Ctx().encoding:
399 try:
400 return unicode(value, encoding, mode).encode('utf8')
401 except UnicodeError:
402 Log().write(LOG_VERBOSE, "Encoding '%s' failed for string '%s'"
403 % (encoding, value))
404 raise UnicodeError
406 def run_command(command):
407 if os.system(command):
408 raise FatalError('Command failed: "%s"' % (command,))
411 class CommandFailedException(Exception):
412 """Exception raised if check_command_runs() fails."""
414 pass
417 def check_command_runs(cmd, cmdname):
418 """Check whether the command CMD can be executed without errors.
420 CMD is a list or string, as accepted by SimplePopen. CMDNAME is the
421 name of the command as it should be included in exception error
422 messages.
424 This function checks three things: (1) the command can be run
425 without throwing an OSError; (2) it exits with status=0; (3) it
426 doesn't output anything to stderr. If any of these conditions is
427 not met, raise a CommandFailedException describing the problem."""
429 try:
430 pipe = SimplePopen(cmd, True)
431 except OSError, e:
432 raise CommandFailedException('error executing %s: %s' % (cmdname, e,))
433 pipe.stdin.close()
434 pipe.stdout.read()
435 errmsg = pipe.stderr.read()
436 status = pipe.wait()
437 if status != 0 or errmsg:
438 msg = 'error executing %s: status %s' % (cmdname, status,)
439 if errmsg:
440 msg += ', error output:\n%s' % (errmsg,)
441 raise CommandFailedException(msg)
444 class CVSRepository:
445 """A CVS repository from which data can be extracted."""
447 def __init__(self, cvs_repos_path):
448 """CVS_REPOS_PATH is the top of the CVS repository (at least as
449 far as this run is concerned)."""
451 if not os.path.isdir(cvs_repos_path):
452 raise FatalError("The specified CVS repository path '%s' is not an "
453 "existing directory." % cvs_repos_path)
455 self.cvs_repos_path = os.path.normpath(cvs_repos_path)
457 def get_cvs_path(self, fname):
458 """Return the path to FNAME relative to cvs_repos_path, with ',v' removed.
460 FNAME is a filesystem name that has to begin (textually) with
461 self.cvs_repos_path and end with ',v'. Those parts will be
462 stripped off and os.sep will be converted to '/'."""
464 if not fname.startswith(self.cvs_repos_path):
465 raise FatalError(
466 "get_cvs_path: '%s' is not a sub-path of '%s'"
467 % (fname, self.cvs_repos_path,))
468 if not fname.endswith(',v'):
469 raise FatalError("get_cvs_path: '%s' does not end with ',v'"
470 % (fname,))
471 l = len(self.cvs_repos_path)
472 if fname[l] == os.sep:
473 l += 1
474 return string.replace(fname[l:-2], os.sep, '/')
476 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
477 """Return a command string, and the pipe created using that
478 string. C_REV is a CVSRevision. If SUPPRESS_KEYWORD_SUBSTITUTION
479 is True, then suppress the substitution of RCS/CVS keywords in the
480 output. The pipe returns the text of that CVS Revision."""
481 raise NotImplementedError
484 class CVSRepositoryViaRCS(CVSRepository):
485 """A CVSRepository accessed via RCS."""
487 def __init__(self, cvs_repos_path):
488 CVSRepository.__init__(self, cvs_repos_path)
489 try:
490 check_command_runs([ 'co', '-V' ], 'co')
491 except CommandFailedException, e:
492 raise FatalError('%s\n'
493 'Please check that co is installed and in your PATH\n'
494 '(it is a part of the RCS software).' % (e,))
496 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
497 pipe_cmd = [ 'co', '-q', '-x,v', '-p' + c_rev.rev ]
498 if suppress_keyword_substitution:
499 pipe_cmd.append('-kk')
500 pipe_cmd.append(c_rev.rcs_path())
501 pipe = SimplePopen(pipe_cmd, True)
502 pipe.stdin.close()
503 return pipe_cmd, pipe
506 class CVSRepositoryViaCVS(CVSRepository):
507 """A CVSRepository accessed via CVS."""
509 def __init__(self, cvs_repos_path):
510 CVSRepository.__init__(self, cvs_repos_path)
511 # Ascend above the specified root if necessary, to find the
512 # cvs_repository_root (a directory containing a CVSROOT directory)
513 # and the cvs_module (the path of the conversion root within the
514 # cvs repository) NB: cvs_module must be seperated by '/' *not* by
515 # os.sep .
516 def is_cvs_repository_root(path):
517 return os.path.isdir(os.path.join(path, 'CVSROOT'))
519 self.cvs_repository_root = os.path.abspath(self.cvs_repos_path)
520 self.cvs_module = ""
521 while not is_cvs_repository_root(self.cvs_repository_root):
522 # Step up one directory:
523 prev_cvs_repository_root = self.cvs_repository_root
524 self.cvs_repository_root, module_component = \
525 os.path.split(self.cvs_repository_root)
526 if self.cvs_repository_root == prev_cvs_repository_root:
527 # Hit the root (of the drive, on Windows) without finding a
528 # CVSROOT dir.
529 raise FatalError(
530 "the path '%s' is not a CVS repository, nor a path "
531 "within a CVS repository. A CVS repository contains "
532 "a CVSROOT directory within its root directory."
533 % (self.cvs_repos_path,))
535 self.cvs_module = module_component + "/" + self.cvs_module
537 os.environ['CVSROOT'] = self.cvs_repository_root
539 def cvs_ok(global_arguments):
540 check_command_runs(
541 [ 'cvs' ] + global_arguments + [ '--version' ], 'cvs')
543 self.global_arguments = [ "-q", "-R" ]
544 try:
545 cvs_ok(self.global_arguments)
546 except CommandFailedException, e:
547 self.global_arguments = [ "-q" ]
548 try:
549 cvs_ok(self.global_arguments)
550 except CommandFailedException, e:
551 raise FatalError(
552 '%s\n'
553 'Please check that cvs is installed and in your PATH.' % (e,))
555 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
556 pipe_cmd = [ 'cvs' ] + self.global_arguments + \
557 [ 'co', '-r' + c_rev.rev, '-p' ]
558 if suppress_keyword_substitution:
559 pipe_cmd.append('-kk')
560 pipe_cmd.append(self.cvs_module + c_rev.cvs_path)
561 pipe = SimplePopen(pipe_cmd, True)
562 pipe.stdin.close()
563 return pipe_cmd, pipe
566 def generate_ignores(c_rev):
567 # Read in props
568 pipe_cmd, pipe = Ctx().cvs_repository.get_co_pipe(c_rev)
569 buf = pipe.stdout.read(PIPE_READ_SIZE)
570 raw_ignore_val = ""
571 while buf:
572 raw_ignore_val = raw_ignore_val + buf
573 buf = pipe.stdout.read(PIPE_READ_SIZE)
574 pipe.stdout.close()
575 error_output = pipe.stderr.read()
576 exit_status = pipe.wait()
577 if exit_status:
578 raise FatalError("The command '%s' failed with exit status: %s\n"
579 "and the following output:\n"
580 "%s" % (pipe_cmd, exit_status, error_output))
582 # Tweak props: First, convert any spaces to newlines...
583 raw_ignore_val = '\n'.join(raw_ignore_val.split())
584 raw_ignores = raw_ignore_val.split('\n')
585 ignore_vals = [ ]
586 for ignore in raw_ignores:
587 # Reset the list if we encounter a '!'
588 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
589 if ignore == '!':
590 ignore_vals = [ ]
591 continue
592 # Skip empty lines
593 if len(ignore) == 0:
594 continue
595 ignore_vals.append(ignore)
596 return ignore_vals
598 # Return a string that has not been returned by gen_key() before.
599 gen_key_base = 0L
600 def gen_key():
601 global gen_key_base
602 key = '%x' % gen_key_base
603 gen_key_base = gen_key_base + 1
604 return key
606 # ============================================================================
607 # This code is copied with a few modifications from:
608 # subversion/subversion/bindings/swig/python/svn/core.py
610 if sys.platform == "win32":
611 _escape_shell_arg_re = re.compile(r'(\\+)(\"|$)')
613 def escape_shell_arg(arg):
614 # The (very strange) parsing rules used by the C runtime library are
615 # described at:
616 # http://msdn.microsoft.com/library/en-us/vclang/html/_pluslang_Parsing_C.2b2b_.Command.2d.Line_Arguments.asp
618 # double up slashes, but only if they are followed by a quote character
619 arg = re.sub(_escape_shell_arg_re, r'\1\1\2', arg)
621 # surround by quotes and escape quotes inside
622 arg = '"' + string.replace(arg, '"', '"^""') + '"'
623 return arg
626 def argv_to_command_string(argv):
627 """Flatten a list of command line arguments into a command string.
629 The resulting command string is expected to be passed to the system
630 shell which os functions like popen() and system() invoke internally.
633 # According cmd's usage notes (cmd /?), it parses the command line by
634 # "seeing if the first character is a quote character and if so, stripping
635 # the leading character and removing the last quote character."
636 # So to prevent the argument string from being changed we add an extra set
637 # of quotes around it here.
638 return '"' + string.join(map(escape_shell_arg, argv), " ") + '"'
640 else:
641 def escape_shell_arg(str):
642 return "'" + string.replace(str, "'", "'\\''") + "'"
644 def argv_to_command_string(argv):
645 """Flatten a list of command line arguments into a command string.
647 The resulting command string is expected to be passed to the system
648 shell which os functions like popen() and system() invoke internally.
651 return string.join(map(escape_shell_arg, argv), " ")
652 # ============================================================================
654 def format_date(date):
655 """Return an svn-compatible date string for DATE (seconds since epoch)."""
656 # A Subversion date looks like "2002-09-29T14:44:59.000000Z"
657 return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date))
659 def sort_file(infile, outfile):
660 # sort the log files
662 # GNU sort will sort our dates differently (incorrectly!) if our
663 # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
664 # it to 'C'
665 lc_all_tmp = os.environ.get('LC_ALL', None)
666 os.environ['LC_ALL'] = 'C'
667 # The -T option to sort has a nice side effect. The Win32 sort is
668 # case insensitive and cannot be used, and since it does not
669 # understand the -T option and dies if we try to use it, there is
670 # no risk that we use that sort by accident.
671 run_command('sort -T %s %s > %s' % (Ctx().tmpdir, infile, outfile))
672 if lc_all_tmp is None:
673 del os.environ['LC_ALL']
674 else:
675 os.environ['LC_ALL'] = lc_all_tmp
677 def match_regexp_list(regexp_list, string):
678 """Test whether STRING matches any of the compiled regexps in
679 REGEXP_LIST."""
680 for regexp in regexp_list:
681 if regexp.match(string):
682 return True
683 return False
685 class LF_EOL_Filter:
686 """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
687 into LFs only."""
688 def __init__(self, stream):
689 self.stream = stream
690 self.carry_cr = False
691 self.eof = False
693 def read(self, size):
694 while True:
695 buf = self.stream.read(size)
696 self.eof = len(buf) == 0
697 if self.carry_cr:
698 buf = '\r' + buf
699 self.carry_cr = False
700 if not self.eof and buf[-1] == '\r':
701 self.carry_cr = True
702 buf = buf[:-1]
703 buf = string.replace(buf, '\r\n', '\n')
704 buf = string.replace(buf, '\r', '\n')
705 if len(buf) > 0 or self.eof:
706 return buf
709 # These constants represent the log levels that this script supports
710 LOG_WARN = -1
711 LOG_QUIET = 0
712 LOG_NORMAL = 1
713 LOG_VERBOSE = 2
714 class Log:
715 """A Simple logging facility. Each line will be timestamped is
716 self.use_timestamps is TRUE. This class is a Borg, see
717 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
718 __shared_state = {}
719 def __init__(self):
720 self.__dict__ = self.__shared_state
721 if self.__dict__:
722 return
723 self.log_level = LOG_NORMAL
724 # Set this to true if you want to see timestamps on each line output.
725 self.use_timestamps = None
726 self.logger = sys.stdout
728 def _timestamp(self):
729 """Output a detailed timestamp at the beginning of each line output."""
730 self.logger.write(time.strftime('[%Y-%m-%d %I:%m:%S %Z] - '))
732 def write(self, log_level, *args):
733 """This is the public method to use for writing to a file. Only
734 messages whose LOG_LEVEL is <= self.log_level will be printed. If
735 there are multiple ARGS, they will be separated by a space."""
736 if log_level > self.log_level:
737 return
738 if self.use_timestamps:
739 self._timestamp()
740 self.logger.write(' '.join(map(str,args)) + "\n")
741 # Ensure that log output doesn't get out-of-order with respect to
742 # stderr output.
743 self.logger.flush()
746 class Cleanup:
747 """This singleton class manages any files created by cvs2svn. When
748 you first create a file, call Cleanup.register, passing the
749 filename, and the last pass that you need the file. After the end
750 of that pass, your file will be cleaned up after running an optional
751 callback. This class is a Borg, see
752 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
754 __shared_state = {}
755 def __init__(self):
756 self.__dict__ = self.__shared_state
757 if self.__dict__:
758 return
759 self._log = {}
760 self._callbacks = {}
762 def register(self, file, which_pass, callback=None):
763 """Register FILE for cleanup at the end of WHICH_PASS, running
764 function CALLBACK prior to removal. Registering a given FILE is
765 idempotent; you may register as many times as you wish, but it
766 will only be cleaned up once.
768 Note that if a file is registered multiple times, only the first
769 callback registered for that file will be called at cleanup
770 time. Also note that if you register a database file you must
771 close the database before cleanup, e.g. using a callback."""
772 self._log.setdefault(which_pass, {})[file] = 1
773 if callback and not self._callbacks.has_key(file):
774 self._callbacks[file] = callback
776 def cleanup(self, which_pass):
777 """Clean up all files, and invoke callbacks, for pass WHICH_PASS."""
778 if not self._log.has_key(which_pass):
779 return
780 for file in self._log[which_pass].keys():
781 Log().write(LOG_VERBOSE, "Deleting", file)
782 if self._callbacks.has_key(file):
783 self._callbacks[file]()
784 os.unlink(file)
787 # Always use these constants for opening databases.
788 DB_OPEN_READ = 'r'
789 DB_OPEN_NEW = 'n'
792 class AbstractDatabase:
793 """An abstract base class for anydbm-based databases."""
795 def __init__(self, filename, mode):
796 """A convenience function for opening an anydbm database."""
797 # pybsddb3 has a bug which prevents it from working with
798 # Berkeley DB 4.2 if you open the db with 'n' ("new"). This
799 # causes the DB_TRUNCATE flag to be passed, which is disallowed
800 # for databases protected by lock and transaction support
801 # (bsddb databases use locking from bsddb version 4.2.4 onwards).
803 # Therefore, manually perform the removal (we can do this, because
804 # we know that for bsddb - but *not* anydbm in general - the database
805 # consists of one file with the name we specify, rather than several
806 # based on that name).
807 if mode == 'n' and anydbm._defaultmod.__name__ == 'dbhash':
808 if os.path.isfile(filename):
809 os.unlink(filename)
810 mode = 'c'
812 self.db = anydbm.open(filename, mode)
813 self.has_key = self.db.has_key
814 self.__delitem__ = self.db.__delitem__
816 def get(self, key, default=None):
817 """bsddb3 doesn't have a get() method, so define one here."""
819 try:
820 return self[key]
821 except KeyError:
822 return default
825 class SDatabase(AbstractDatabase):
826 """A database that can only store strings."""
828 def __getitem__(self, key):
829 return self.db[key]
831 def __setitem__(self, key, value):
832 self.db[key] = value
835 class Database(AbstractDatabase):
836 """A database that uses the marshal module to store built-in types."""
838 def __getitem__(self, key):
839 return marshal.loads(self.db[key])
841 def __setitem__(self, key, value):
842 self.db[key] = marshal.dumps(value)
845 class StatsKeeper:
846 __shared_state = { }
847 def __init__(self):
848 self.__dict__ = self.__shared_state
849 if self.__dict__:
850 return
851 self.filename = temp(STATISTICS_FILE)
852 Cleanup().register(self.filename, pass8)
853 # This can get kinda large, so we don't store it in our data dict.
854 self.repos_files = { }
856 if os.path.exists(self.filename):
857 self.unarchive()
858 else:
859 self.data = { 'cvs_revs_count' : 0,
860 'tags': { },
861 'branches' : { },
862 'repos_size' : 0,
863 'repos_file_count' : 0,
864 'svn_rev_count' : None,
865 'first_rev_date' : 1L<<32,
866 'last_rev_date' : 0,
867 'pass_timings' : { },
868 'start_time' : 0,
869 'end_time' : 0,
872 def log_duration_for_pass(self, duration, pass_num):
873 self.data['pass_timings'][pass_num] = duration
875 def set_start_time(self, start):
876 self.data['start_time'] = start
878 def set_end_time(self, end):
879 self.data['end_time'] = end
881 def _bump_item(self, key, amount=1):
882 self.data[key] = self.data[key] + amount
884 def reset_c_rev_info(self):
885 self.data['cvs_revs_count'] = 0
886 self.data['tags'] = { }
887 self.data['branches'] = { }
889 def record_c_rev(self, c_rev):
890 self._bump_item('cvs_revs_count')
892 for tag in c_rev.tags:
893 self.data['tags'][tag] = None
894 for branch in c_rev.branches:
895 self.data['branches'][branch] = None
897 if c_rev.timestamp < self.data['first_rev_date']:
898 self.data['first_rev_date'] = c_rev.timestamp
900 if c_rev.timestamp > self.data['last_rev_date']:
901 self.data['last_rev_date'] = c_rev.timestamp
903 # Only add the size if this is the first time we see the file.
904 if not self.repos_files.has_key(c_rev.fname):
905 self._bump_item('repos_size', c_rev.file_size)
906 self.repos_files[c_rev.fname] = None
908 self.data['repos_file_count'] = len(self.repos_files)
910 def set_svn_rev_count(self, count):
911 self.data['svn_rev_count'] = count
913 def svn_rev_count(self):
914 return self.data['svn_rev_count']
916 def archive(self):
917 open(self.filename, 'w').write(marshal.dumps(self.data))
919 def unarchive(self):
920 self.data = marshal.loads(open(self.filename, 'r').read())
922 def __str__(self):
923 svn_revs_str = ""
924 if self.data['svn_rev_count'] is not None:
925 svn_revs_str = ('Total SVN Commits: %10s\n'
926 % self.data['svn_rev_count'])
928 return ('\n' \
929 'cvs2svn Statistics:\n' \
930 '------------------\n' \
931 'Total CVS Files: %10i\n' \
932 'Total CVS Revisions: %10i\n' \
933 'Total Unique Tags: %10i\n' \
934 'Total Unique Branches: %10i\n' \
935 'CVS Repos Size in KB: %10i\n' \
936 '%s' \
937 'First Revision Date: %s\n' \
938 'Last Revision Date: %s\n' \
939 '------------------' \
940 % (self.data['repos_file_count'],
941 self.data['cvs_revs_count'],
942 len(self.data['tags']),
943 len(self.data['branches']),
944 (self.data['repos_size'] / 1024),
945 svn_revs_str,
946 time.ctime(self.data['first_rev_date']),
947 time.ctime(self.data['last_rev_date']),
950 def timings(self):
951 passes = self.data['pass_timings'].keys()
952 passes.sort()
953 str = 'Timings:\n------------------\n'
955 def desc(val):
956 if val == 1: return "second"
957 return "seconds"
959 for pass_num in passes:
960 duration = int(self.data['pass_timings'][pass_num])
961 p_str = ('pass %d:%6d %s\n'
962 % (pass_num, duration, desc(duration)))
963 str = str + p_str
965 total = int(self.data['end_time'] - self.data['start_time'])
966 str = str + ('total: %6d %s' % (total, desc(total)))
967 return str
970 class LastSymbolicNameDatabase:
971 """ Passing every CVSRevision in s-revs to this class will result in
972 a Database whose key is the last CVS Revision a symbolicname was
973 seen in, and whose value is a list of all symbolicnames that were
974 last seen in that revision."""
975 def __init__(self, mode):
976 self.symbols = {}
977 self.symbol_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB), mode)
978 Cleanup().register(temp(SYMBOL_LAST_CVS_REVS_DB), pass5)
980 # Once we've gone through all the revs,
981 # symbols.keys() will be a list of all tags and branches, and
982 # their corresponding values will be a key into the last CVS revision
983 # that they were used in.
984 def log_revision(self, c_rev):
985 # Gather last CVS Revision for symbolic name info and tag info
986 for tag in c_rev.tags:
987 self.symbols[tag] = c_rev.unique_key()
988 if c_rev.op is not OP_DELETE:
989 for branch in c_rev.branches:
990 self.symbols[branch] = c_rev.unique_key()
992 # Creates an inversion of symbols above--a dictionary of lists (key
993 # = CVS rev unique_key: val = list of symbols that close in that
994 # rev.
995 def create_database(self):
996 for sym, rev_unique_key in self.symbols.items():
997 ary = self.symbol_revs_db.get(rev_unique_key, [])
998 ary.append(sym)
999 self.symbol_revs_db[rev_unique_key] = ary
1002 class CVSRevisionDatabase:
1003 """A Database to store CVSRevision objects and retrieve them by their
1004 unique_key()."""
1006 def __init__(self, mode):
1007 """Initialize an instance, opening database in MODE (like the MODE
1008 argument to Database or anydbm.open())."""
1009 self.cvs_revs_db = SDatabase(temp(CVS_REVS_DB), mode)
1010 Cleanup().register(temp(CVS_REVS_DB), pass8)
1012 def log_revision(self, c_rev):
1013 """Add C_REV, a CVSRevision, to the database."""
1014 self.cvs_revs_db[c_rev.unique_key()] = str(c_rev)
1016 def get_revision(self, unique_key):
1017 """Return the CVSRevision stored under UNIQUE_KEY."""
1018 return CVSRevision(Ctx(), self.cvs_revs_db[unique_key])
1021 def TagsDatabase(mode):
1022 """A Database to store which symbolic names are tags.
1023 Each key is a tag name.
1024 The value has no meaning, and should be set to None."""
1025 db = SDatabase(temp(TAGS_DB), mode)
1026 Cleanup().register(temp(TAGS_DB), pass8)
1027 return db
1030 class Project:
1031 """A project within a CVS repository."""
1033 def __init__(self, project_cvs_repos_path,
1034 trunk_path, branches_path, tags_path):
1035 """Create a new Project record.
1037 PROJECT_CVS_REPOS_PATH is the main CVS directory for this project
1038 (within the filesystem). TRUNK_PATH, BRANCHES_PATH, and TAGS_PATH
1039 are the full, normalized directory names in svn for the
1040 corresponding part of the repository."""
1042 self.project_cvs_repos_path = project_cvs_repos_path
1043 prefix = Ctx().cvs_repository.cvs_repos_path
1044 if not self.project_cvs_repos_path.startswith(prefix):
1045 raise FatalError("Project '%s' must start with '%s'"
1046 % (self.project_cvs_repos_path, prefix,))
1047 # The project's main directory as a cvs_path:
1048 self.project_cvs_path = self.project_cvs_repos_path[len(prefix):]
1049 if self.project_cvs_path.startswith(os.sep):
1050 self.project_cvs_path = self.project_cvs_path[1:]
1051 self.trunk_path = trunk_path
1052 self.branches_path = branches_path
1053 self.tags_path = tags_path
1054 verify_paths_disjoint(self.trunk_path, self.branches_path, self.tags_path)
1056 def is_source(self, svn_path):
1057 """Return True iff SVN_PATH is a legitimate source for this project.
1059 Legitimate paths are self.trunk_path or any directory directly
1060 under self.branches_path."""
1062 if svn_path == self.trunk_path:
1063 return True
1065 (head, tail,) = _path_split(svn_path)
1066 if head == self.branches_path:
1067 return True
1069 return False
1071 def is_unremovable(self, svn_path):
1072 """Return True iff the specified path must not be removed."""
1074 return svn_path in [self.trunk_path, self.branches_path, self.tags_path]
1076 def relative_name(self, fname):
1077 """Return the path to FNAME relative to project_cvs_repos_path,
1078 with ',v' removed.
1080 FNAME is a filesystem name that has to begin (textually) with
1081 self.project_cvs_repos_path and end with ',v'. Remove both prefix
1082 and suffix, and convert os.sep into '/'."""
1084 if not fname.startswith(self.project_cvs_repos_path):
1085 raise FatalError(
1086 "relative_name: '%s' is not a sub-path of '%s'"
1087 % (fname, self.project_cvs_repos_path,))
1088 if not fname.endswith(',v'):
1089 raise FatalError("relative_name: '%s' does not end with ',v'"
1090 % (fname,))
1091 l = len(self.project_cvs_repos_path)
1092 if fname[l] == os.sep:
1093 l += 1
1094 return string.replace(fname[l:-2], os.sep, '/')
1096 def get_branch_path(self, branch_name):
1097 """Return the svnpath for the branch named BRANCH_NAME."""
1099 return _path_join(self.branches_path, _clean_symbolic_name(branch_name))
1101 def get_tag_path(self, tag_name):
1102 """Return the svnpath for the tag named TAG_NAME."""
1104 return _path_join(self.tags_path, _clean_symbolic_name(tag_name))
1106 def make_trunk_path(self, path):
1107 """Return the trunk path for PATH.
1109 PATH is a relative name (relative to project_cvs_repos_path).
1110 Return the svn path for this file on trunk."""
1112 return _path_join(self.trunk_path, path)
1114 def make_branch_path(self, branch_name, path):
1115 """Return the branch path for PATH on the branch with name BRANCH_NAME.
1117 PATH is a relative name (relative to project_cvs_repos_path).
1118 Return the svn path for this file on the specified branch."""
1120 return _path_join(self.get_branch_path(branch_name), path)
1123 class CVSRevision:
1124 def __init__(self, ctx, *args):
1125 """Initialize a new CVSRevision with Ctx object CTX, and ARGS.
1127 If CTX is None, the following members and methods of the
1128 instantiated CVSRevision class object will be unavailable (or
1129 simply will not work correctly, if at all):
1130 cvs_path
1131 svn_path
1132 is_default_branch_revision()
1134 (Note that this class treats CTX as const, because the caller
1135 likely passed in a Borg instance of a Ctx. The reason this class
1136 takes CTX as as a parameter, instead of just instantiating a Ctx
1137 itself, is that this class should be usable outside cvs2svn.)
1139 If there is one argument in ARGS, it is a string, in the format of
1140 a line from a revs file. Do *not* include a trailing newline.
1142 If there are multiple ARGS, there must be 17 of them,
1143 comprising a parsed revs line:
1144 timestamp --> (int) date stamp for this cvs revision
1145 digest --> (string) digest of author+logmsg
1146 prev_timestamp --> (int) date stamp for the previous cvs revision
1147 next_timestamp --> (int) date stamp for the next cvs revision
1148 op --> (char) OP_ADD, OP_CHANGE, or OP_DELETE
1149 prev_rev --> (string or None) previous CVS rev, e.g., "1.2"
1150 rev --> (string) this CVS rev, e.g., "1.3"
1151 next_rev --> (string or None) next CVS rev, e.g., "1.4"
1152 file_in_attic --> (char or None) true if RCS file is in Attic
1153 file_executable --> (char or None) true if RCS file has exec bit set.
1154 file_size --> (int) size of the RCS file
1155 deltatext_code --> (char) 'N' if non-empty deltatext, else 'E'
1156 fname --> (string) relative path of file in CVS repos
1157 mode --> (string or None) "kkv", "kb", etc.
1158 branch_name --> (string or None) branch on which this rev occurred
1159 tags --> (list of strings) all tags on this revision
1160 branches --> (list of strings) all branches rooted in this rev
1162 The two forms of initialization are equivalent.
1164 WARNING: Due to the resync process in pass2, prev_timestamp or
1165 next_timestamp may be incorrect in the c-revs or s-revs files."""
1167 self._ctx = ctx
1168 if len(args) == 17:
1169 (self.timestamp, self.digest, self.prev_timestamp, self.next_timestamp,
1170 self.op, self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
1171 self.file_executable, self.file_size, self.deltatext_code,
1172 self.fname,
1173 self.mode, self.branch_name, self.tags, self.branches) = args
1174 elif len(args) == 1:
1175 data = args[0].split(' ', 15)
1176 (self.timestamp, self.digest, self.prev_timestamp, self.next_timestamp,
1177 self.op, self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
1178 self.file_executable, self.file_size, self.deltatext_code,
1179 self.mode, self.branch_name, numtags, remainder) = data
1180 # Patch up data items which are not simple strings
1181 self.timestamp = int(self.timestamp, 16)
1182 if self.prev_timestamp == "*":
1183 self.prev_timestamp = 0
1184 else:
1185 self.prev_timestamp = int(self.prev_timestamp)
1186 if self.next_timestamp == "*":
1187 self.next_timestamp = 0
1188 else:
1189 self.next_timestamp = int(self.next_timestamp)
1190 if self.prev_rev == "*":
1191 self.prev_rev = None
1192 if self.next_rev == "*":
1193 self.next_rev = None
1194 if self.file_in_attic == "*":
1195 self.file_in_attic = None
1196 if self.file_executable == "*":
1197 self.file_executable = None
1198 self.file_size = int(self.file_size)
1199 if self.mode == "*":
1200 self.mode = None
1201 if self.branch_name == "*":
1202 self.branch_name = None
1203 numtags = int(numtags)
1204 tags_and_numbranches_and_remainder = remainder.split(' ', numtags + 1)
1205 self.tags = tags_and_numbranches_and_remainder[:-2]
1206 numbranches = int(tags_and_numbranches_and_remainder[-2])
1207 remainder = tags_and_numbranches_and_remainder[-1]
1208 branches_and_fname = remainder.split(' ', numbranches)
1209 self.branches = branches_and_fname[:-1]
1210 self.fname = branches_and_fname[-1]
1211 else:
1212 raise TypeError, 'CVSRevision() takes 2 or 18 arguments (%d given)' % \
1213 (len(args) + 1)
1214 if ctx is not None:
1215 self.cvs_path = ctx.cvs_repository.get_cvs_path(self.fname)
1216 rel_name = ctx.project.relative_name(self.fname)
1217 if self.branch_name:
1218 self.svn_path = ctx.project.make_branch_path(
1219 self.branch_name, rel_name)
1220 else:
1221 self.svn_path = ctx.project.make_trunk_path(rel_name)
1223 # The 'primary key' of a CVS Revision is the revision number + the
1224 # filename. To provide a unique key (say, for a dict), we just glom
1225 # them together in a string. By passing in self.prev_rev or
1226 # self.next_rev, you can get the unique key for their respective
1227 # CVSRevisions.
1228 def unique_key(self, revnum="0"):
1229 if revnum is "0":
1230 revnum = self.rev
1231 elif revnum is None:
1232 return None
1233 return revnum + "/" + self.fname
1235 def __str__(self):
1236 return ('%08lx %s %s %s %s %s %s %s %s %s %d %s %s %s %d%s%s %d%s%s %s'
1237 % (self.timestamp, self.digest, self.prev_timestamp or "*",
1238 self.next_timestamp or "*", self.op, (self.prev_rev or "*"),
1239 self.rev, (self.next_rev or "*"), (self.file_in_attic or "*"),
1240 (self.file_executable or "*"),
1241 self.file_size,
1242 self.deltatext_code, (self.mode or "*"),
1243 (self.branch_name or "*"),
1244 len(self.tags), self.tags and " " or "", " ".join(self.tags),
1245 len(self.branches), self.branches and " " or "",
1246 " ".join(self.branches),
1247 self.fname, ))
1249 # Returns true if this CVSRevision is the opening CVSRevision for
1250 # NAME (for this RCS file).
1251 def opens_symbolic_name(self, name):
1252 if name in self.tags:
1253 return 1
1254 if name in self.branches:
1255 # If this c_rev opens a branch and our op is OP_DELETE, then
1256 # that means that the file that this c_rev belongs to was
1257 # created on the branch, so for all intents and purposes, this
1258 # c_rev is *technically* not an opening. See Issue #62 for more
1259 # information.
1260 if self.op != OP_DELETE:
1261 return 1
1262 return 0
1264 def is_default_branch_revision(self):
1265 """Return 1 if SELF.rev of SELF.cvs_path is a default branch
1266 revision according to DEFAULT_BRANCHES_DB (see the conditions
1267 documented there), else return None."""
1268 val = self._ctx._default_branches_db.get(self.cvs_path, None)
1269 if val is not None:
1270 val_last_dot = val.rindex(".")
1271 our_last_dot = self.rev.rindex(".")
1272 default_branch = val[:val_last_dot]
1273 our_branch = self.rev[:our_last_dot]
1274 default_rev_component = int(val[val_last_dot + 1:])
1275 our_rev_component = int(self.rev[our_last_dot + 1:])
1276 if (default_branch == our_branch
1277 and our_rev_component <= default_rev_component):
1278 return 1
1279 # else
1280 return None
1282 def rcs_path(self):
1283 """Returns the actual filesystem path to the RCS file of this
1284 CVSRevision."""
1285 if self.file_in_attic is None:
1286 return self.fname
1287 else:
1288 basepath, filename = os.path.split(self.fname)
1289 return os.path.join(basepath, 'Attic', filename)
1291 def filename(self):
1292 "Return the last path component of self.fname, minus the ',v'"
1293 return os.path.split(self.fname)[-1][:-2]
1295 class SymbolDatabase:
1296 """This database records information on all symbols in the RCS
1297 files. It is created in pass 1 and it is used in pass 2."""
1298 def __init__(self):
1299 # A hash that maps tag names to commit counts
1300 self.tags = { }
1301 # A hash that maps branch names to lists of the format
1302 # [ create_count, commit_count, blockers ], where blockers
1303 # is a hash that lists the symbols that depend on the
1304 # the branch. The blockers hash is used as a set, so the
1305 # values are not used.
1306 self.branches = { }
1308 def register_tag_creation(self, name):
1309 """Register the creation of the tag NAME."""
1310 self.tags[name] = self.tags.get(name, 0) + 1
1312 def _branch(self, name):
1313 """Helper function to get a branch node that will create and
1314 initialize the node if it does not exist."""
1315 if not self.branches.has_key(name):
1316 self.branches[name] = [ 0, 0, { } ]
1317 return self.branches[name]
1319 def register_branch_creation(self, name):
1320 """Register the creation of the branch NAME."""
1321 self._branch(name)[0] += 1
1323 def register_branch_commit(self, name):
1324 """Register a commit on the branch NAME."""
1325 self._branch(name)[1] += 1
1327 def register_branch_blocker(self, name, blocker):
1328 """Register BLOCKER as a blocker on the branch NAME."""
1329 self._branch(name)[2][blocker] = None
1331 def branch_has_commit(self, name):
1332 """Return non-zero if NAME has commits. Returns 0 if name
1333 is not a branch or if it has no commits."""
1334 return self.branches.has_key(name) and self.branches[name][1]
1336 def find_excluded_symbols(self, regexp_list):
1337 """Returns a hash of all symbols thaht match the regexps in
1338 REGEXP_LISTE. The hash is used as a set so the values are
1339 not used."""
1340 excludes = { }
1341 for tag in self.tags.keys():
1342 if match_regexp_list(regexp_list, tag):
1343 excludes[tag] = None
1344 for branch in self.branches.keys():
1345 if match_regexp_list(regexp_list, branch):
1346 excludes[branch] = None
1347 return excludes
1349 def find_branch_exclude_blockers(self, branch, excludes):
1350 """Find all blockers of BRANCH, excluding the ones in the hash
1351 EXCLUDES."""
1352 blockers = { }
1353 if excludes.has_key(branch):
1354 for blocker in self.branches[branch][2]:
1355 if not excludes.has_key(blocker):
1356 blockers[blocker] = None
1357 return blockers
1359 def find_blocked_excludes(self, excludes):
1360 """Find all branches not in EXCLUDES that have blocking symbols that
1361 are not themselves excluded. Return a hash that maps branch names
1362 to a hash of blockers. The hash of blockes is used as a set so the
1363 values are not used."""
1364 blocked_branches = { }
1365 for branch in self.branches.keys():
1366 blockers = self.find_branch_exclude_blockers(branch, excludes)
1367 if blockers:
1368 blocked_branches[branch] = blockers
1369 return blocked_branches
1371 def find_mismatches(self, excludes=None):
1372 """Find all symbols that are defined as both tags and branches,
1373 excluding the ones in EXCLUDES. Returns a list of 4-tuples with
1374 the symbol name, tag count, branch count and commit count."""
1375 if excludes is None:
1376 excludes = { }
1377 mismatches = [ ]
1378 for branch in self.branches.keys():
1379 if not excludes.has_key(branch) and self.tags.has_key(branch):
1380 mismatches.append((branch, # name
1381 self.tags[branch], # tag count
1382 self.branches[branch][0], # branch count
1383 self.branches[branch][1])) # commit count
1384 return mismatches
1386 def read(self):
1387 """Read the symbol database from files."""
1388 f = open(temp(TAGS_LIST))
1389 while 1:
1390 line = f.readline()
1391 if not line:
1392 break
1393 tag, count = line.split()
1394 self.tags[tag] = int(count)
1396 f = open(temp(BRANCHES_LIST))
1397 while 1:
1398 line = f.readline()
1399 if not line:
1400 break
1401 words = line.split()
1402 self.branches[words[0]] = [ int(words[1]), int(words[2]), { } ]
1403 for blocker in words[3:]:
1404 self.branches[words[0]][2][blocker] = None
1406 def write(self):
1407 """Store the symbol database to files."""
1408 f = open(temp(TAGS_LIST), "w")
1409 Cleanup().register(temp(TAGS_LIST), pass2)
1410 for tag, count in self.tags.items():
1411 f.write("%s %d\n" % (tag, count))
1413 f = open(temp(BRANCHES_LIST), "w")
1414 Cleanup().register(temp(BRANCHES_LIST), pass2)
1415 for branch, info in self.branches.items():
1416 f.write("%s %d %d" % (branch, info[0], info[1]))
1417 if info[2]:
1418 f.write(" ")
1419 f.write(" ".join(info[2].keys()))
1420 f.write("\n")
1422 class CollectData(cvs2svn_rcsparse.Sink):
1423 def __init__(self):
1424 self.revs = open(temp(DATAFILE + REVS_SUFFIX), 'w')
1425 Cleanup().register(temp(DATAFILE + REVS_SUFFIX), pass2)
1426 self.resync = open(temp(DATAFILE + RESYNC_SUFFIX), 'w')
1427 Cleanup().register(temp(DATAFILE + RESYNC_SUFFIX), pass2)
1428 self.default_branches_db = SDatabase(temp(DEFAULT_BRANCHES_DB),
1429 DB_OPEN_NEW)
1430 Cleanup().register(temp(DEFAULT_BRANCHES_DB), pass5)
1431 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_NEW)
1432 Cleanup().register(temp(METADATA_DB), pass8)
1433 self.fatal_errors = []
1434 self.num_files = 0
1435 self.symbol_db = SymbolDatabase()
1437 # 1 if we've collected data for at least one file, None otherwise.
1438 self.found_valid_file = None
1440 # See set_fname() for initializations of other variables.
1442 def set_fname(self, canonical_name, filename):
1443 """Prepare to receive data for FILENAME. FILENAME is the absolute
1444 filesystem path to the file in question, and CANONICAL_NAME is
1445 FILENAME with the 'Attic' component removed (if the file is indeed
1446 in the Attic) ."""
1447 self.fname = canonical_name
1449 # We calculate and save some file metadata here, where we can do
1450 # it only once per file, instead of waiting until later where we
1451 # would have to do the same calculations once per CVS *revision*.
1453 self.cvs_path = Ctx().cvs_repository.get_cvs_path(self.fname)
1455 # If the paths are not the same, then that means that the
1456 # canonical_name has had the 'Attic' component stripped out.
1457 self.file_in_attic = None
1458 if canonical_name != filename:
1459 self.file_in_attic = 1
1461 file_stat = os.stat(filename)
1462 # The size of our file in bytes
1463 self.file_size = file_stat[stat.ST_SIZE]
1465 # Whether or not the executable bit is set.
1466 self.file_executable = None
1467 if file_stat[0] & stat.S_IXUSR:
1468 self.file_executable = 1
1470 # revision -> [timestamp, author, old-timestamp]
1471 self.rev_data = { }
1473 # Maps revision number (key) to the revision number of the
1474 # previous revision along this line of development.
1476 # For the first revision R on a branch, we consider the revision
1477 # from which R sprouted to be the 'previous'.
1479 # Note that this revision can't be determined arithmetically (due
1480 # to cvsadmin -o, which is why this is necessary).
1482 # If the key has no previous revision, then store None as key's
1483 # value.
1484 self.prev_rev = { }
1486 # This dict is essentially self.prev_rev with the values mapped in
1487 # the other direction, so following key -> value will yield you
1488 # the next revision number.
1490 # Unlike self.prev_rev, if the key has no next revision, then the
1491 # key is not present.
1492 self.next_rev = { }
1494 # Track the state of each revision so that in set_revision_info,
1495 # we can determine if our op is an add/change/delete. We can do
1496 # this because in set_revision_info, we'll have all of the
1497 # revisions for a file at our fingertips, and we need to examine
1498 # the state of our prev_rev to determine if we're an add or a
1499 # change--without the state of the prev_rev, we are unable to
1500 # distinguish between an add and a change.
1501 self.rev_state = { }
1503 # Hash mapping branch numbers, like '1.7.2', to branch names,
1504 # like 'Release_1_0_dev'.
1505 self.branch_names = { }
1507 # RCS flags (used for keyword expansion).
1508 self.mode = None
1510 # Hash mapping revision numbers, like '1.7', to lists of names
1511 # indicating which branches sprout from that revision, like
1512 # ['Release_1_0_dev', 'experimental_driver', ...].
1513 self.branchlist = { }
1515 # Like self.branchlist, but the values are lists of tag names that
1516 # apply to the key revision.
1517 self.taglist = { }
1519 # If set, this is an RCS branch number -- rcsparse calls this the
1520 # "principal branch", but CVS and RCS refer to it as the "default
1521 # branch", so that's what we call it, even though the rcsparse API
1522 # setter method is still 'set_principal_branch'.
1523 self.default_branch = None
1525 # If the RCS file doesn't have a default branch anymore, but does
1526 # have vendor revisions, then we make an educated guess that those
1527 # revisions *were* the head of the default branch up until the
1528 # commit of 1.2, at which point the file's default branch became
1529 # trunk. This records the date at which 1.2 was committed.
1530 self.first_non_vendor_revision_date = None
1532 # A list of all symbols defined for the current file. Used to
1533 # prevent multiple definitions of a symbol, something which can
1534 # easily happen when --symbol-transform is used.
1535 self.defined_symbols = { }
1537 def set_principal_branch(self, branch):
1538 self.default_branch = branch
1540 def set_expansion(self, mode):
1541 self.mode = mode
1543 def set_branch_name(self, branch_number, name):
1544 """Record that BRANCH_NUMBER is the branch number for branch NAME,
1545 and that NAME sprouts from BRANCH_NUMBER .
1546 BRANCH_NUMBER is an RCS branch number with an odd number of components,
1547 for example '1.7.2' (never '1.7.0.2')."""
1548 if not self.branch_names.has_key(branch_number):
1549 self.branch_names[branch_number] = name
1550 # The branchlist is keyed on the revision number from which the
1551 # branch sprouts, so strip off the odd final component.
1552 sprout_rev = branch_number[:branch_number.rfind(".")]
1553 self.branchlist.setdefault(sprout_rev, []).append(name)
1554 self.symbol_db.register_branch_creation(name)
1555 else:
1556 sys.stderr.write("%s: in '%s':\n"
1557 " branch '%s' already has name '%s',\n"
1558 " cannot also have name '%s', ignoring the latter\n"
1559 % (warning_prefix, self.fname, branch_number,
1560 self.branch_names[branch_number], name))
1562 def rev_to_branch_name(self, revision):
1563 """Return the name of the branch on which REVISION lies.
1564 REVISION is a non-branch revision number with an even number of,
1565 components, for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
1566 For the convenience of callers, REVISION can also be a trunk
1567 revision such as '1.2', in which case just return None."""
1568 if trunk_rev.match(revision):
1569 return None
1570 return self.branch_names.get(revision[:revision.rindex(".")])
1572 def add_cvs_branch(self, revision, branch_name):
1573 """Record the root revision and branch revision for BRANCH_NAME,
1574 based on REVISION. REVISION is a CVS branch number having an even
1575 number of components where the second-to-last is '0'. For
1576 example, if it's '1.7.0.2', then record that BRANCH_NAME sprouts
1577 from 1.7 and has branch number 1.7.2."""
1578 last_dot = revision.rfind(".")
1579 branch_rev = revision[:last_dot]
1580 last2_dot = branch_rev.rfind(".")
1581 branch_rev = branch_rev[:last2_dot] + revision[last_dot:]
1582 self.set_branch_name(branch_rev, branch_name)
1584 def define_tag(self, name, revision):
1585 """Record a bidirectional mapping between symbolic NAME and REVISION.
1586 REVISION is an unprocessed revision number from the RCS file's
1587 header, for example: '1.7', '1.7.0.2', or '1.1.1' or '1.1.1.1'.
1588 This function will determine what kind of symbolic name it is by
1589 inspection, and record it in the right places."""
1590 for (pattern, replacement) in Ctx().symbol_transforms:
1591 newname = pattern.sub(replacement, name)
1592 if newname != name:
1593 Log().write(LOG_WARN, " symbol '%s' transformed to '%s'"
1594 % (name, newname))
1595 name = newname
1596 if self.defined_symbols.has_key(name):
1597 err = "%s: Multiple definitions of the symbol '%s' in '%s'" \
1598 % (error_prefix, name, self.fname)
1599 sys.stderr.write(err + "\n")
1600 self.fatal_errors.append(err)
1601 self.defined_symbols[name] = None
1602 if branch_tag.match(revision):
1603 self.add_cvs_branch(revision, name)
1604 elif vendor_tag.match(revision):
1605 self.set_branch_name(revision, name)
1606 else:
1607 self.taglist.setdefault(revision, []).append(name)
1608 self.symbol_db.register_tag_creation(name)
1610 def define_revision(self, revision, timestamp, author, state,
1611 branches, next):
1613 # Record the state of our revision for later calculations
1614 self.rev_state[revision] = state
1616 # store the rev_data as a list in case we have to jigger the timestamp
1617 self.rev_data[revision] = [int(timestamp), author, None]
1619 # When on trunk, the RCS 'next' revision number points to what
1620 # humans might consider to be the 'previous' revision number. For
1621 # example, 1.3's RCS 'next' is 1.2.
1623 # However, on a branch, the RCS 'next' revision number really does
1624 # point to what humans would consider to be the 'next' revision
1625 # number. For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
1627 # In other words, in RCS, 'next' always means "where to find the next
1628 # deltatext that you need this revision to retrieve.
1630 # That said, we don't *want* RCS's behavior here, so we determine
1631 # whether we're on trunk or a branch and set self.prev_rev
1632 # accordingly.
1634 # One last thing. Note that if REVISION is a branch revision,
1635 # instead of mapping REVISION to NEXT, we instead map NEXT to
1636 # REVISION. Since we loop over all revisions in the file before
1637 # doing anything with the data we gather here, this 'reverse
1638 # assignment' effectively does the following:
1640 # 1. Gives us no 'prev' value for REVISION (in this
1641 # iteration... it may have been set in a previous iteration)
1643 # 2. Sets the 'prev' value for the revision with number NEXT to
1644 # REVISION. So when we come around to the branch revision whose
1645 # revision value is NEXT, its 'prev' and 'prev_rev' are already
1646 # set.
1647 if trunk_rev.match(revision):
1648 self.prev_rev[revision] = next
1649 self.next_rev[next] = revision
1650 elif next:
1651 self.prev_rev[next] = revision
1652 self.next_rev[revision] = next
1654 for b in branches:
1655 self.prev_rev[b] = revision
1657 # Ratchet up the highest vendor head revision, if necessary.
1658 if self.default_branch:
1659 default_branch_root = self.default_branch + "."
1660 if ((revision.find(default_branch_root) == 0)
1661 and (default_branch_root.count('.') == revision.count('.'))):
1662 # This revision is on the default branch, so record that it is
1663 # the new highest default branch head revision.
1664 self.default_branches_db[self.cvs_path] = revision
1665 else:
1666 # No default branch, so make an educated guess.
1667 if revision == '1.2':
1668 # This is probably the time when the file stopped having a
1669 # default branch, so make a note of it.
1670 self.first_non_vendor_revision_date = timestamp
1671 else:
1672 m = vendor_revision.match(revision)
1673 if m and ((not self.first_non_vendor_revision_date)
1674 or (timestamp < self.first_non_vendor_revision_date)):
1675 # We're looking at a vendor revision, and it wasn't
1676 # committed after this file lost its default branch, so bump
1677 # the maximum trunk vendor revision in the permanent record.
1678 self.default_branches_db[self.cvs_path] = revision
1680 if not trunk_rev.match(revision):
1681 # Check for unlabeled branches, record them. We tried to collect
1682 # all branch names when we parsed the symbolic name header
1683 # earlier, of course, but that didn't catch unlabeled branches.
1684 # If a branch is unlabeled, this is our first encounter with it,
1685 # so we have to record its data now.
1686 branch_number = revision[:revision.rindex(".")]
1687 if not self.branch_names.has_key(branch_number):
1688 branch_name = "unlabeled-" + branch_number
1689 self.set_branch_name(branch_number, branch_name)
1691 # Register the commit on this non-trunk branch
1692 branch_name = self.branch_names[branch_number]
1693 self.symbol_db.register_branch_commit(branch_name)
1695 def tree_completed(self):
1696 "The revision tree has been parsed. Analyze it for consistency."
1698 # Our algorithm depends upon the timestamps on the revisions occuring
1699 # monotonically over time. That is, we want to see rev 1.34 occur in
1700 # time before rev 1.35. If we inserted 1.35 *first* (due to the time-
1701 # sorting), and then tried to insert 1.34, we'd be screwed.
1703 # to perform the analysis, we'll simply visit all of the 'previous'
1704 # links that we have recorded and validate that the timestamp on the
1705 # previous revision is before the specified revision
1707 # if we have to resync some nodes, then we restart the scan. just keep
1708 # looping as long as we need to restart.
1709 while 1:
1710 for current, prev in self.prev_rev.items():
1711 if not prev:
1712 # no previous revision exists (i.e. the initial revision)
1713 continue
1714 t_c = self.rev_data[current][0]
1715 t_p = self.rev_data[prev][0]
1716 if t_p >= t_c:
1717 # the previous revision occurred later than the current revision.
1718 # shove the previous revision back in time (and any before it that
1719 # may need to shift).
1721 # We sync backwards and not forwards because any given CVS
1722 # Revision has only one previous revision. However, a CVS
1723 # Revision can *be* a previous revision for many other
1724 # revisions (e.g., a revision that is the source of multiple
1725 # branches). This becomes relevant when we do the secondary
1726 # synchronization in pass 2--we can make certain that we
1727 # don't resync a revision earlier than it's previous
1728 # revision, but it would be non-trivial to make sure that we
1729 # don't resync revision R *after* any revisions that have R
1730 # as a previous revision.
1731 while t_p >= t_c:
1732 self.rev_data[prev][0] = t_c - 1 # new timestamp
1733 self.rev_data[prev][2] = t_p # old timestamp
1734 delta = t_c - 1 - t_p
1735 msg = "PASS1 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
1736 % (self.cvs_path, prev, time.ctime(t_p), delta)
1737 Log().write(LOG_VERBOSE, msg)
1738 if (delta > COMMIT_THRESHOLD
1739 or delta < (COMMIT_THRESHOLD * -1)):
1740 str = "%s: Significant timestamp change for '%s' (%d seconds)"
1741 Log().write(LOG_WARN,
1742 str % (warning_prefix, self.cvs_path, delta))
1743 current = prev
1744 prev = self.prev_rev[current]
1745 if not prev:
1746 break
1747 t_c = t_c - 1 # self.rev_data[current][0]
1748 t_p = self.rev_data[prev][0]
1750 # break from the for-loop
1751 break
1752 else:
1753 # finished the for-loop (no resyncing was performed)
1754 return
1756 def set_revision_info(self, revision, log, text):
1757 timestamp, author, old_ts = self.rev_data[revision]
1758 digest = sha.new(log + '\0' + author).hexdigest()
1759 if old_ts:
1760 # the timestamp on this revision was changed. log it for later
1761 # resynchronization of other files's revisions that occurred
1762 # for this time and log message.
1763 self.resync.write('%08lx %s %08lx\n' % (old_ts, digest, timestamp))
1765 # "...Give back one kadam to honor the Hebrew God whose Ark this is."
1766 # -- Imam to Indy and Sallah, in 'Raiders of the Lost Ark'
1768 # If revision 1.1 appears to have been created via 'cvs add'
1769 # instead of 'cvs import', then this file probably never had a
1770 # default branch, so retroactively remove its record in the
1771 # default branches db. The test is that the log message CVS uses
1772 # for 1.1 in imports is "Initial revision\n" with no period.
1773 if revision == '1.1' and log != 'Initial revision\n':
1774 try:
1775 del self.default_branches_db[self.cvs_path]
1776 except KeyError:
1777 pass
1779 # Get the timestamps of the previous and next revisions
1780 prev_rev = self.prev_rev[revision]
1781 prev_timestamp, ign, ign = self.rev_data.get(prev_rev, [0, None, None])
1783 next_rev = self.next_rev.get(revision)
1784 next_timestamp, ign, ign = self.rev_data.get(next_rev, [0, None, None])
1786 # How to tell if a CVSRevision is an add, a change, or a deletion:
1788 # It's a delete if RCS state is 'dead'
1790 # It's an add if RCS state is 'Exp.' and
1791 # - we either have no previous revision
1792 # or
1793 # - we have a previous revision whose state is 'dead'
1795 # Anything else is a change.
1796 if self.rev_state[revision] == 'dead':
1797 op = OP_DELETE
1798 elif ((self.prev_rev.get(revision, None) is None)
1799 or (self.rev_state[self.prev_rev[revision]] == 'dead')):
1800 op = OP_ADD
1801 else:
1802 op = OP_CHANGE
1804 def is_branch_revision(rev):
1805 """Return True if this revision is not a trunk revision,
1806 else return False."""
1807 if rev.count('.') >= 3:
1808 return True
1809 return False
1811 def is_same_line_of_development(rev1, rev2):
1812 """Return True if rev1 and rev2 are on the same line of
1813 development (i.e., both on trunk, or both on the same branch);
1814 return False otherwise. Either rev1 or rev2 can be None, in
1815 which case automatically return False."""
1816 if rev1 is None or rev2 is None:
1817 return False
1818 if rev1.count('.') == 1 and rev2.count('.') == 1:
1819 return True
1820 if rev1[0:rev1.rfind('.')] == rev2[0:rev2.rfind('.')]:
1821 return True
1822 return False
1824 # There can be an odd situation where the tip revision of a branch
1825 # is alive, but every predecessor on the branch is in state 'dead',
1826 # yet the revision from which the branch sprouts is alive. (This
1827 # is sort of a mirror image of the more common case of adding a
1828 # file on a branch, in which the first revision on the branch is
1829 # alive while the revision from which it sprouts is dead.)
1831 # In this odd situation, we must mark the first live revision on
1832 # the branch as an OP_CHANGE instead of an OP_ADD, because it
1833 # reflects, however indirectly, a change w.r.t. the source
1834 # revision from which the branch sprouts.
1836 # This is issue #89.
1837 cur_num = revision
1838 if is_branch_revision(revision) and self.rev_state[revision] != 'dead':
1839 while 1:
1840 prev_num = self.prev_rev.get(cur_num, None)
1841 if not cur_num or not prev_num:
1842 break
1843 if (not is_same_line_of_development(cur_num, prev_num)
1844 and self.rev_state[cur_num] == 'dead'
1845 and self.rev_state[prev_num] != 'dead'):
1846 op = OP_CHANGE
1847 cur_num = self.prev_rev.get(cur_num, None)
1849 if text:
1850 deltatext_code = DELTATEXT_NONEMPTY
1851 else:
1852 deltatext_code = DELTATEXT_EMPTY
1854 c_rev = CVSRevision(Ctx(), timestamp, digest, prev_timestamp,
1855 next_timestamp, op,
1856 prev_rev, revision, next_rev,
1857 self.file_in_attic, self.file_executable,
1858 self.file_size,
1859 deltatext_code, self.fname,
1860 self.mode, self.rev_to_branch_name(revision),
1861 self.taglist.get(revision, []),
1862 self.branchlist.get(revision, []))
1863 self.revs.write(str(c_rev) + "\n")
1864 StatsKeeper().record_c_rev(c_rev)
1866 if not self.metadata_db.has_key(digest):
1867 self.metadata_db[digest] = (author, log)
1869 def parse_completed(self):
1870 # Walk through all branches and tags and register them with
1871 # their parent branch in the symbol database.
1872 for revision, symbols in self.taglist.items() + self.branchlist.items():
1873 for symbol in symbols:
1874 name = self.rev_to_branch_name(revision)
1875 if name is not None:
1876 self.symbol_db.register_branch_blocker(name, symbol)
1878 self.num_files = self.num_files + 1
1880 def write_symbol_db(self):
1881 self.symbol_db.write()
1883 class SymbolingsLogger:
1884 """Manage the file that contains lines for symbol openings and
1885 closings.
1887 This data will later be used to determine valid SVNRevision ranges
1888 from which a file can be copied when creating a branch or tag in
1889 Subversion. Do this by finding "Openings" and "Closings" for each
1890 file copied onto a branch or tag.
1892 An "Opening" is the CVSRevision from which a given branch/tag
1893 sprouts on a path.
1895 The "Closing" for that branch/tag and path is the next CVSRevision
1896 on the same line of development as the opening.
1898 For example, on file 'foo.c', branch BEE has branch number 1.2.2 and
1899 obviously sprouts from revision 1.2. Therefore, 1.2 is the opening
1900 for BEE on path 'foo.c', and 1.3 is the closing for BEE on path
1901 'foo.c'. Note that there may be many revisions chronologically
1902 between 1.2 and 1.3, for example, revisions on branches of 'foo.c',
1903 perhaps even including on branch BEE itself. But 1.3 is the next
1904 revision *on the same line* as 1.2, that is why it is the closing
1905 revision for those symbolic names of which 1.2 is the opening.
1907 The reason for doing all this hullabaloo is to make branch and tag
1908 creation as efficient as possible by minimizing the number of copies
1909 and deletes per creation. For example, revisions 1.2 and 1.3 of
1910 foo.c might correspond to revisions 17 and 30 in Subversion. That
1911 means that when creating branch BEE, there is some motivation to do
1912 the copy from one of 17-30. Now if there were another file,
1913 'bar.c', whose opening and closing CVSRevisions for BEE corresponded
1914 to revisions 24 and 39 in Subversion, we would know that the ideal
1915 thing would be to copy the branch from somewhere between 24 and 29,
1916 inclusive.
1918 def __init__(self):
1919 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS), 'w')
1920 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS), pass6)
1921 self.closings = open(temp(SYMBOL_CLOSINGS_TMP), 'w')
1922 Cleanup().register(temp(SYMBOL_CLOSINGS_TMP), pass5)
1924 # This keys of this dictionary are *source* cvs_paths for which
1925 # we've encountered an 'opening' on the default branch. The
1926 # values are the (uncleaned) symbolic names that this path has
1927 # opened.
1928 self.open_paths_with_default_branches = { }
1930 def log_revision(self, c_rev, svn_revnum):
1931 """Log any openings found in C_REV, and if C_REV.next_rev is not
1932 None, a closing. The opening uses SVN_REVNUM, but the closing (if
1933 any) will have its revnum determined later."""
1934 for name in c_rev.tags + c_rev.branches:
1935 self._note_default_branch_opening(c_rev, name)
1936 if c_rev.op != OP_DELETE:
1937 self._log(name, svn_revnum,
1938 c_rev.cvs_path, c_rev.branch_name, OPENING)
1940 # If our c_rev has a next_rev, then that's the closing rev for
1941 # this source revision. Log it to closings for later processing
1942 # since we don't know the svn_revnum yet.
1943 if c_rev.next_rev is not None:
1944 self.closings.write('%s %s\n' %
1945 (name, c_rev.unique_key(c_rev.next_rev)))
1947 def _log(self, name, svn_revnum, cvs_path, branch_name, type):
1948 """Write out a single line to the symbol_openings_closings file
1949 representing that SVN_REVNUM of SVN_PATH on BRANCH_NAME is either the
1950 opening or closing (TYPE) of NAME (a symbolic name).
1952 TYPE should only be one of the following global constants:
1953 OPENING or CLOSING."""
1954 # 8 places gives us 999,999,999 SVN revs. That *should* be enough.
1955 self.symbolings.write(
1956 '%s %.8d %s %s %s\n'
1957 % (name, svn_revnum, type, branch_name or '*', cvs_path))
1959 def close(self):
1960 """Iterate through the closings file, lookup the svn_revnum for
1961 each closing CVSRevision, and write a proper line out to the
1962 symbolings file."""
1963 # Use this to get the c_rev of our rev_key
1964 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_READ)
1966 self.closings.close()
1967 for line in fileinput.FileInput(temp(SYMBOL_CLOSINGS_TMP)):
1968 (name, rev_key) = line.rstrip().split(" ", 1)
1969 svn_revnum = Ctx()._persistence_manager.get_svn_revnum(rev_key)
1971 c_rev = cvs_revs_db.get_revision(rev_key)
1972 self._log(name, svn_revnum, c_rev.cvs_path, c_rev.branch_name, CLOSING)
1974 self.symbolings.close()
1976 def _note_default_branch_opening(self, c_rev, symbolic_name):
1977 """If C_REV is a default branch revision, log C_REV.cvs_path as an
1978 opening for SYMBOLIC_NAME."""
1979 self.open_paths_with_default_branches.setdefault(
1980 c_rev.cvs_path, []).append(symbolic_name)
1982 def log_default_branch_closing(self, c_rev, svn_revnum):
1983 """If self.open_paths_with_default_branches contains
1984 C_REV.cvs_path, then call log each name in
1985 self.open_paths_with_default_branches[C_REV.cvs_path] as a closing
1986 with SVN_REVNUM as the closing revision number."""
1987 path = c_rev.cvs_path
1988 if self.open_paths_with_default_branches.has_key(path):
1989 # log each symbol as a closing
1990 for name in self.open_paths_with_default_branches[path]:
1991 self._log(name, svn_revnum, path, None, CLOSING)
1992 # Remove them from the openings list as we're done with them.
1993 del self.open_paths_with_default_branches[path]
1996 class PersistenceManager:
1997 """The PersistenceManager allows us to effectively store SVNCommits
1998 to disk and retrieve them later using only their subversion revision
1999 number as the key. It also returns the subversion revision number
2000 for a given CVSRevision's unique key.
2002 All information pertinent to each SVNCommit is stored in a series of
2003 on-disk databases so that SVNCommits can be retrieved on-demand.
2005 MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
2006 In 'new' mode, PersistenceManager will initialize a new set of on-disk
2007 databases and be fully-featured.
2008 In 'read' mode, PersistenceManager will open existing on-disk databases
2009 and the set_* methods will be unavailable."""
2010 def __init__(self, mode):
2011 self.mode = mode
2012 if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
2013 raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
2014 self.svn2cvs_db = Database(temp(SVN_REVNUMS_TO_CVS_REVS), mode)
2015 Cleanup().register(temp(SVN_REVNUMS_TO_CVS_REVS), pass8)
2016 self.cvs2svn_db = Database(temp(CVS_REVS_TO_SVN_REVNUMS), mode)
2017 Cleanup().register(temp(CVS_REVS_TO_SVN_REVNUMS), pass8)
2018 self.svn_commit_names_dates = Database(temp(SVN_COMMIT_NAMES_DATES), mode)
2019 Cleanup().register(temp(SVN_COMMIT_NAMES_DATES), pass8)
2020 self.svn_commit_metadata = Database(temp(METADATA_DB), DB_OPEN_READ)
2021 self.cvs_revisions = CVSRevisionDatabase(DB_OPEN_READ)
2022 ###PERF kff Elsewhere there are comments about sucking the tags db
2023 ### into memory. That seems like a good idea.
2024 if not Ctx().trunk_only:
2025 self.tags_db = TagsDatabase(DB_OPEN_READ)
2026 self.motivating_revnums = SDatabase(temp(MOTIVATING_REVNUMS), mode)
2027 Cleanup().register(temp(MOTIVATING_REVNUMS), pass8)
2029 # "branch_name" -> svn_revnum in which branch was last filled.
2030 # This is used by CVSCommit._pre_commit, to prevent creating a fill
2031 # revision which would have nothing to do.
2032 self.last_filled = {}
2034 def get_svn_revnum(self, cvs_rev_unique_key):
2035 """Return the Subversion revision number in which
2036 CVS_REV_UNIQUE_KEY was committed, or SVN_INVALID_REVNUM if there
2037 is no mapping for CVS_REV_UNIQUE_KEY."""
2038 return int(self.cvs2svn_db.get(cvs_rev_unique_key, SVN_INVALID_REVNUM))
2040 def get_svn_commit(self, svn_revnum):
2041 """Return an SVNCommit that corresponds to SVN_REVNUM.
2043 If no SVNCommit exists for revnum SVN_REVNUM, then return None.
2045 This method can throw SVNCommitInternalInconsistencyError.
2047 svn_commit = SVNCommit("Retrieved from disk", svn_revnum)
2048 c_rev_keys = self.svn2cvs_db.get(str(svn_revnum), None)
2049 if c_rev_keys == None:
2050 return None
2052 digest = None
2053 for key in c_rev_keys:
2054 c_rev = self.cvs_revisions.get_revision(key)
2055 svn_commit.add_revision(c_rev)
2056 # Set the author and log message for this commit by using
2057 # CVSRevision metadata, but only if haven't done so already.
2058 if digest is None:
2059 digest = c_rev.digest
2060 author, log_msg = self.svn_commit_metadata[digest]
2061 svn_commit.set_author(author)
2062 svn_commit.set_log_msg(log_msg)
2064 # If we're doing a trunk-only conversion, we don't need to do any more
2065 # work.
2066 if Ctx().trunk_only:
2067 return svn_commit
2069 name, date = self._get_name_and_date(svn_revnum)
2070 if name:
2071 svn_commit.set_symbolic_name(name)
2072 svn_commit.set_date(date)
2073 if self.tags_db.has_key(name):
2074 svn_commit.is_tag = 1
2076 motivating_revnum = self.motivating_revnums.get(str(svn_revnum), None)
2077 if motivating_revnum:
2078 svn_commit.set_motivating_revnum(int(motivating_revnum))
2079 svn_commit.set_date(date)
2081 if len(svn_commit.cvs_revs) and name:
2082 raise SVNCommit.SVNCommitInternalInconsistencyError(
2083 "An SVNCommit cannot have cvs_revisions *and* a corresponding\n"
2084 "symbolic name ('%s') to fill."
2085 % (_clean_symbolic_name(name),))
2087 return svn_commit
2089 def set_cvs_revs(self, svn_revnum, cvs_revs):
2090 """Record the bidirectional mapping between SVN_REVNUM and
2091 CVS_REVS."""
2092 if self.mode == DB_OPEN_READ:
2093 raise RuntimeError, \
2094 'Write operation attempted on read-only PersistenceManager'
2095 for c_rev in cvs_revs:
2096 Log().write(LOG_VERBOSE, " ", c_rev.unique_key())
2097 self.svn2cvs_db[str(svn_revnum)] = [x.unique_key() for x in cvs_revs]
2098 for c_rev in cvs_revs:
2099 self.cvs2svn_db[c_rev.unique_key()] = svn_revnum
2101 def set_name_and_date(self, svn_revnum, name, date):
2102 """Associate symbolic name NAME and DATE with SVN_REVNUM.
2104 NAME is allowed to be None."""
2106 if self.mode == DB_OPEN_READ:
2107 raise RuntimeError, \
2108 'Write operation attempted on read-only PersistenceManager'
2109 self.svn_commit_names_dates[str(svn_revnum)] = (name, date)
2110 self.last_filled[name] = svn_revnum
2112 def _get_name_and_date(self, svn_revnum):
2113 """Return a tuple containing the symbolic name and date associated
2114 with SVN_REVNUM, or (None, None) if SVN_REVNUM has no such data
2115 associated with it."""
2116 return self.svn_commit_names_dates.get(str(svn_revnum), (None, None))
2118 def set_motivating_revnum(self, svn_revnum, motivating_revnum):
2119 """Store MOTIVATING_REVNUM as the value of SVN_REVNUM"""
2120 if self.mode == DB_OPEN_READ:
2121 raise RuntimeError, \
2122 'Write operation attempted on read-only PersistenceManager'
2123 self.motivating_revnums[str(svn_revnum)] = str(motivating_revnum)
2126 class CVSCommit:
2127 """Each instance of this class contains a number of CVS Revisions
2128 that correspond to one or more Subversion Commits. After all CVS
2129 Revisions are added to the grouping, calling process_revisions will
2130 generate a Subversion Commit (or Commits) for the set of CVS
2131 Revisions in the grouping."""
2133 def __init__(self, digest, author, log):
2134 self.digest = digest
2135 self.author = author
2136 self.log = log
2138 # Symbolic names for which the last source revision has already
2139 # been seen and for which the CVSRevisionAggregator has already
2140 # generated a fill SVNCommit. See self.process_revisions().
2141 self.done_symbols = [ ]
2143 self.files = { }
2144 # Lists of CVSRevisions
2145 self.changes = [ ]
2146 self.deletes = [ ]
2148 # Start out with a t_min higher than any incoming time T, and a
2149 # t_max lower than any incoming T. This way the first T will
2150 # push t_min down to T, and t_max up to T, naturally (without any
2151 # special-casing), and successive times will then ratchet them
2152 # outward as appropriate.
2153 self.t_min = 1L<<32
2154 self.t_max = 0
2156 # This will be set to the SVNCommit that occurs in self._commit.
2157 self.motivating_commit = None
2159 # This is a list of all non-primary commits motivated by the main
2160 # commit. We gather these so that we can set their dates to the
2161 # same date as the primary commit.
2162 self.secondary_commits = [ ]
2164 # State for handling default branches.
2166 # Here is a tempting, but ultimately nugatory, bit of logic, which
2167 # I share with you so you may appreciate the less attractive, but
2168 # refreshingly non-nugatory, logic which follows it:
2170 # If some of the commits in this txn happened on a non-trunk
2171 # default branch, then those files will have to be copied into
2172 # trunk manually after being changed on the branch (because the
2173 # RCS "default branch" appears as head, i.e., trunk, in practice).
2174 # As long as those copies don't overwrite any trunk paths that
2175 # were also changed in this commit, then we can do the copies in
2176 # the same revision, because they won't cover changes that don't
2177 # appear anywhere/anywhen else. However, if some of the trunk dst
2178 # paths *did* change in this commit, then immediately copying the
2179 # branch changes would lose those trunk mods forever. So in this
2180 # case, we need to do at least that copy in its own revision. And
2181 # for simplicity's sake, if we're creating the new revision for
2182 # even one file, then we just do all such copies together in the
2183 # new revision.
2185 # Doesn't that sound nice?
2187 # Unfortunately, Subversion doesn't support copies with sources
2188 # in the current txn. All copies must be based in committed
2189 # revisions. Therefore, we generate the above-described new
2190 # revision unconditionally.
2192 # This is a list of c_revs, and a c_rev is appended for each
2193 # default branch commit that will need to be copied to trunk (or
2194 # deleted from trunk) in some generated revision following the
2195 # "regular" revision.
2196 self.default_branch_cvs_revisions = [ ]
2198 def __cmp__(self, other):
2199 # Commits should be sorted by t_max. If both self and other have
2200 # the same t_max, break the tie using t_min, and lastly, digest
2201 return (cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
2202 or cmp(self.digest, other.digest))
2204 def has_file(self, fname):
2205 return self.files.has_key(fname)
2207 def revisions(self):
2208 return self.changes + self.deletes
2210 def opens_symbolic_name(self, name):
2211 """Returns true if any CVSRevision in this commit is on a tag or a
2212 branch or is the origin of a tag or branch."""
2213 for c_rev in self.revisions():
2214 if c_rev.opens_symbolic_name(name):
2215 return 1
2216 return 0
2218 def add_revision(self, c_rev):
2219 # Record the time range of this commit.
2221 # ### ISSUE: It's possible, though unlikely, that the time range
2222 # of a commit could get gradually expanded to be arbitrarily
2223 # longer than COMMIT_THRESHOLD. I'm not sure this is a huge
2224 # problem, and anyway deciding where to break it up would be a
2225 # judgement call. For now, we just print a warning in commit() if
2226 # this happens.
2227 if c_rev.timestamp < self.t_min:
2228 self.t_min = c_rev.timestamp
2229 if c_rev.timestamp > self.t_max:
2230 self.t_max = c_rev.timestamp
2232 if c_rev.op == OP_DELETE:
2233 self.deletes.append(c_rev)
2234 else:
2235 # OP_CHANGE or OP_ADD
2236 self.changes.append(c_rev)
2238 self.files[c_rev.fname] = 1
2240 def _pre_commit(self):
2241 """Generates any SVNCommits that must exist before the main
2242 commit."""
2244 # There may be multiple c_revs in this commit that would cause
2245 # branch B to be filled, but we only want to fill B once. On the
2246 # other hand, there might be multiple branches committed on in
2247 # this commit. Whatever the case, we should count exactly one
2248 # commit per branch, because we only fill a branch once per
2249 # CVSCommit. This list tracks which branches we've already
2250 # counted.
2251 accounted_for_sym_names = [ ]
2253 def fill_needed(c_rev, pm):
2254 """Return 1 if this is the first commit on a new branch (for
2255 this file) and we need to fill the branch; else return 0
2256 (meaning that some other file's first commit on the branch has
2257 already done the fill for us).
2259 If C_REV.op is OP_ADD, only return 1 if the branch that this
2260 commit is on has no last filled revision.
2262 PM is a PersistenceManager to query.
2265 # Different '.' counts indicate that c_rev is now on a different
2266 # line of development (and may need a fill)
2267 if c_rev.rev.count('.') != c_rev.prev_rev.count('.'):
2268 svn_revnum = pm.get_svn_revnum(c_rev.unique_key(c_rev.prev_rev))
2269 # It should be the case that when we have a file F that
2270 # is added on branch B (thus, F on trunk is in state
2271 # 'dead'), we generate an SVNCommit to fill B iff the branch
2272 # has never been filled before.
2274 # If this c_rev.op == OP_ADD, *and* the branch has never
2275 # been filled before, then fill it now. Otherwise, no need to
2276 # fill it.
2277 if c_rev.op == OP_ADD:
2278 if pm.last_filled.get(c_rev.branch_name, None) is None:
2279 return 1
2280 elif c_rev.op == OP_CHANGE:
2281 if svn_revnum > pm.last_filled.get(c_rev.branch_name, 0):
2282 return 1
2283 elif c_rev.op == OP_DELETE:
2284 if pm.last_filled.get(c_rev.branch_name, None) is None:
2285 return 1
2286 return 0
2288 for c_rev in self.changes + self.deletes:
2289 # If a commit is on a branch, we must ensure that the branch
2290 # path being committed exists (in HEAD of the Subversion
2291 # repository). If it doesn't exist, we will need to fill the
2292 # branch. After the fill, the path on which we're committing
2293 # will exist.
2294 if c_rev.branch_name \
2295 and c_rev.branch_name not in accounted_for_sym_names \
2296 and c_rev.branch_name not in self.done_symbols \
2297 and fill_needed(c_rev, Ctx()._persistence_manager):
2298 svn_commit = SVNCommit("pre-commit symbolic name '%s'"
2299 % c_rev.branch_name)
2300 svn_commit.set_symbolic_name(c_rev.branch_name)
2301 self.secondary_commits.append(svn_commit)
2302 accounted_for_sym_names.append(c_rev.branch_name)
2304 def _commit(self):
2305 """Generates the primary SVNCommit that corresponds to this
2306 CVSCommit."""
2307 # Generate an SVNCommit unconditionally. Even if the only change
2308 # in this CVSCommit is a deletion of an already-deleted file (that
2309 # is, a CVS revision in state 'dead' whose predecessor was also in
2310 # state 'dead'), the conversion will still generate a Subversion
2311 # revision containing the log message for the second dead
2312 # revision, because we don't want to lose that information.
2313 svn_commit = SVNCommit("commit")
2314 self.motivating_commit = svn_commit
2316 for c_rev in self.changes:
2317 svn_commit.add_revision(c_rev)
2318 # Only make a change if we need to. When 1.1.1.1 has an empty
2319 # deltatext, the explanation is almost always that we're looking
2320 # at an imported file whose 1.1 and 1.1.1.1 are identical. On
2321 # such imports, CVS creates an RCS file where 1.1 has the
2322 # content, and 1.1.1.1 has an empty deltatext, i.e, the same
2323 # content as 1.1. There's no reason to reflect this non-change
2324 # in the repository, so we want to do nothing in this case. (If
2325 # we were really paranoid, we could make sure 1.1's log message
2326 # is the CVS-generated "Initial revision\n", but I think the
2327 # conditions below are strict enough.)
2328 if not ((c_rev.deltatext_code == DELTATEXT_EMPTY)
2329 and (c_rev.rev == "1.1.1.1")):
2330 if c_rev.is_default_branch_revision():
2331 self.default_branch_cvs_revisions.append(c_rev)
2333 for c_rev in self.deletes:
2334 # When a file is added on a branch, CVS not only adds the file
2335 # on the branch, but generates a trunk revision (typically
2336 # 1.1) for that file in state 'dead'. We only want to add
2337 # this revision if the log message is not the standard cvs
2338 # fabricated log message.
2339 if c_rev.prev_rev is None:
2340 # c_rev.branches may be empty if the originating branch
2341 # has been excluded.
2342 if not c_rev.branches:
2343 continue
2344 cvs_generated_msg = ('file %s was initially added on branch %s.\n'
2345 % (c_rev.filename(),
2346 c_rev.branches[0]))
2347 author, log_msg = \
2348 Ctx()._persistence_manager.svn_commit_metadata[c_rev.digest]
2349 if log_msg == cvs_generated_msg:
2350 continue
2352 svn_commit.add_revision(c_rev)
2353 if c_rev.is_default_branch_revision():
2354 self.default_branch_cvs_revisions.append(c_rev)
2356 # There is a slight chance that we didn't actually register any
2357 # CVSRevisions with our SVNCommit (see loop over self.deletes
2358 # above), so if we have no CVSRevisions, we don't flush the
2359 # svn_commit to disk and roll back our revnum.
2360 if len(svn_commit.cvs_revs) > 0:
2361 svn_commit.flush()
2362 else:
2363 # We will not be flushing this SVNCommit, so rollback the
2364 # SVNCommit revision counter.
2365 SVNCommit.revnum = SVNCommit.revnum - 1
2367 if not Ctx().trunk_only:
2368 for c_rev in self.revisions():
2369 Ctx()._symbolings_logger.log_revision(c_rev, svn_commit.revnum)
2371 def _post_commit(self):
2372 """Generates any SVNCommits that we can perform now that _commit
2373 has happened. That is, handle non-trunk default branches.
2374 Sometimes an RCS file has a non-trunk default branch, so a commit
2375 on that default branch would be visible in a default CVS checkout
2376 of HEAD. If we don't copy that commit over to Subversion's trunk,
2377 then there will be no Subversion tree which corresponds to that
2378 CVS checkout. Of course, in order to copy the path over, we may
2379 first need to delete the existing trunk there. """
2381 # Only generate a commit if we have default branch revs
2382 if len(self.default_branch_cvs_revisions):
2383 # Generate an SVNCommit for all of our default branch c_revs.
2384 svn_commit = SVNCommit("post-commit default branch(es)")
2385 svn_commit.set_motivating_revnum(self.motivating_commit.revnum)
2386 for c_rev in self.default_branch_cvs_revisions:
2387 svn_commit.add_revision(c_rev)
2388 Ctx()._symbolings_logger.log_default_branch_closing(c_rev,
2389 svn_commit.revnum)
2390 self.secondary_commits.append(svn_commit)
2392 def process_revisions(self, done_symbols):
2393 """Process all the CVSRevisions that this instance has, creating
2394 one or more SVNCommits in the process. Generate fill SVNCommits
2395 only for symbols not in DONE_SYMBOLS (avoids unnecessary
2396 fills).
2398 Return the primary SVNCommit that corresponds to this CVSCommit.
2399 The returned SVNCommit is the commit that motivated any other
2400 SVNCommits generated in this CVSCommit."""
2401 self.done_symbols = done_symbols
2402 seconds = self.t_max - self.t_min + 1
2404 Log().write(LOG_VERBOSE, '-' * 60)
2405 Log().write(LOG_VERBOSE, 'CVS Revision grouping:')
2406 if seconds == 1:
2407 Log().write(LOG_VERBOSE, ' Start time: %s (duration: 1 second)'
2408 % time.ctime(self.t_max))
2409 else:
2410 Log().write(LOG_VERBOSE, ' Start time: %s' % time.ctime(self.t_min))
2411 Log().write(LOG_VERBOSE, ' End time: %s (duration: %d seconds)'
2412 % (time.ctime(self.t_max), seconds))
2414 if seconds > COMMIT_THRESHOLD + 1:
2415 Log().write(LOG_WARN, '%s: grouping spans more than %d seconds'
2416 % (warning_prefix, COMMIT_THRESHOLD))
2418 if Ctx().trunk_only: # Only do the primary commit if we're trunk-only
2419 self._commit()
2420 return self.motivating_commit
2422 self._pre_commit()
2423 self._commit()
2424 self._post_commit()
2426 for svn_commit in self.secondary_commits:
2427 svn_commit.set_date(self.motivating_commit.get_date())
2428 svn_commit.flush()
2430 return self.motivating_commit
2433 class SVNCommit:
2434 """This represents one commit to the Subversion Repository. There
2435 are three types of SVNCommits:
2437 1. Commits one or more CVSRevisions (cannot fill a symbolic name).
2439 2. Creates or fills a symbolic name (cannot commit CVSRevisions).
2441 3. Updates trunk to reflect the contents of a particular branch
2442 (this is to handle RCS default branches)."""
2444 # The revision number to assign to the next new SVNCommit.
2445 # We start at 2 because SVNRepositoryMirror uses the first commit
2446 # to create trunk, tags, and branches.
2447 revnum = 2
2449 class SVNCommitInternalInconsistencyError(Exception):
2450 """Exception raised if we encounter an impossible state in the
2451 SVNCommit Databases."""
2452 pass
2454 def __init__(self, description="", revnum=None, cvs_revs=None):
2455 """Instantiate an SVNCommit. DESCRIPTION is for debugging only.
2456 If REVNUM, the SVNCommit will correspond to that revision number;
2457 and if CVS_REVS, then they must be the exact set of CVSRevisions for
2458 REVNUM.
2460 It is an error to pass CVS_REVS without REVNUM, but you may pass
2461 REVNUM without CVS_REVS, and then add a revision at a time by
2462 invoking add_revision()."""
2463 self._description = description
2465 # Revprop metadata for this commit.
2467 # These initial values are placeholders. At least the log and the
2468 # date should be different by the time these are used.
2470 # They are private because their values should be returned encoded
2471 # in UTF8, but callers aren't required to set them in UTF8.
2472 # Therefore, accessor methods are used to set them, and
2473 # self.get_revprops() is used to to get them, in dictionary form.
2474 self._author = Ctx().username
2475 self._log_msg = "This log message means an SVNCommit was used too soon."
2476 self._max_date = 0 # Latest date seen so far.
2478 self.cvs_revs = cvs_revs or []
2479 if revnum:
2480 self.revnum = revnum
2481 else:
2482 self.revnum = SVNCommit.revnum
2483 SVNCommit.revnum = SVNCommit.revnum + 1
2485 # The (uncleaned) symbolic name that is filled in this SVNCommit, if any.
2486 self.symbolic_name = None
2488 # If this commit is a default branch synchronization, this
2489 # variable represents the subversion revision number of the
2490 # *primary* commit where the default branch changes actually
2491 # happened. It is None otherwise.
2493 # It is possible for multiple synchronization commits to refer to
2494 # the same motivating commit revision number, and it is possible
2495 # for a single synchronization commit to contain CVSRevisions on
2496 # multiple different default branches.
2497 self.motivating_revnum = None
2499 # is_tag is true only if this commit is a fill of a symbolic name
2500 # that is a tag, None in all other cases.
2501 self.is_tag = None
2503 def set_symbolic_name(self, symbolic_name):
2504 "Set self.symbolic_name to SYMBOLIC_NAME."
2505 self.symbolic_name = symbolic_name
2507 def set_motivating_revnum(self, revnum):
2508 "Set self.motivating_revnum to REVNUM."
2509 self.motivating_revnum = revnum
2511 def set_author(self, author):
2512 """Set this SVNCommit's author to AUTHOR (a locally-encoded string).
2513 This is the only way to set an SVNCommit's author."""
2514 self._author = author
2516 def set_log_msg(self, msg):
2517 """Set this SVNCommit's log message to MSG (a locally-encoded string).
2518 This is the only way to set an SVNCommit's log message."""
2519 self._log_msg = msg
2521 def set_date(self, date):
2522 """Set this SVNCommit's date to DATE (an integer).
2523 Note that self.add_revision() updates this automatically based on
2524 a CVSRevision; so you may not need to call this at all, and even
2525 if you do, the value may be overwritten by a later call to
2526 self.add_revision()."""
2527 self._max_date = date
2529 def get_date(self):
2530 """Returns this SVNCommit's date as an integer."""
2531 return self._max_date
2533 def get_revprops(self):
2534 """Return the Subversion revprops for this SVNCommit."""
2535 date = format_date(self._max_date)
2536 try:
2537 utf8_author = None
2538 if self._author is not None:
2539 utf8_author = to_utf8(self._author)
2540 utf8_log = to_utf8(self.get_log_msg())
2541 return { 'svn:author' : utf8_author,
2542 'svn:log' : utf8_log,
2543 'svn:date' : date }
2544 except UnicodeError:
2545 Log().write(LOG_WARN, '%s: problem encoding author or log message:'
2546 % warning_prefix)
2547 Log().write(LOG_WARN, " author: '%s'" % self._author)
2548 Log().write(LOG_WARN, " log: '%s'" % self.get_log_msg().rstrip())
2549 Log().write(LOG_WARN, " date: '%s'" % date)
2550 Log().write(LOG_WARN,
2551 "(subversion rev %s) Related files:" % self.revnum)
2552 for c_rev in self.cvs_revs:
2553 Log().write(LOG_WARN, " ", c_rev.fname)
2555 Log().write(LOG_WARN, "Consider rerunning with (for example)",
2556 "'--encoding=latin1'.\n")
2557 # It's better to fall back to the original (unknown encoding) data
2558 # than to either 1) quit or 2) record nothing at all.
2559 return { 'svn:author' : self._author,
2560 'svn:log' : self.get_log_msg(),
2561 'svn:date' : date }
2563 def add_revision(self, cvs_rev):
2564 self.cvs_revs.append(cvs_rev)
2565 if cvs_rev.timestamp > self._max_date:
2566 self._max_date = cvs_rev.timestamp
2568 def _is_primary_commit(self):
2569 """Return true if this is a primary SVNCommit, false otherwise."""
2570 return not (self.symbolic_name or self.motivating_revnum)
2572 def flush(self):
2573 Log().write(LOG_NORMAL, "Creating Subversion r%d (%s)"
2574 % (self.revnum, self._description))
2575 Ctx()._persistence_manager.set_cvs_revs(self.revnum, self.cvs_revs)
2577 if self.motivating_revnum is not None:
2578 Ctx()._persistence_manager.set_motivating_revnum(self.revnum,
2579 self.motivating_revnum)
2581 # If we're not a primary commit, then store our date and/or our
2582 # symbolic_name
2583 if not self._is_primary_commit():
2584 Ctx()._persistence_manager.set_name_and_date(
2585 self.revnum, self.symbolic_name, self._max_date)
2587 def __str__(self):
2588 """ Print a human-readable description of this SVNCommit. This
2589 description is not intended to be machine-parseable (although
2590 we're not going to stop you if you try!)"""
2592 ret = "SVNCommit #: " + str(self.revnum) + "\n"
2593 if self.symbolic_name:
2594 ret += (" symbolic name: " + _clean_symbolic_name(self.symbolic_name)
2595 + "\n")
2596 else:
2597 ret += " NO symbolic name\n"
2598 ret += " debug description: " + self._description + "\n"
2599 ret += " cvs_revs:\n"
2600 for c_rev in self.cvs_revs:
2601 ret += " " + c_rev.unique_key() + "\n"
2602 return ret
2604 def get_log_msg(self):
2605 """Returns the actual log message for a primary commit, and the
2606 appropriate manufactured log message for a secondary commit."""
2607 if self.symbolic_name is not None:
2608 return self._log_msg_for_symbolic_name_commit()
2609 elif self.motivating_revnum is not None:
2610 return self._log_msg_for_default_branch_commit()
2611 else:
2612 return self._log_msg
2614 def _log_msg_for_symbolic_name_commit(self):
2615 """Creates a log message for a manufactured commit that fills
2616 self.symbolic_name. If self.is_tag is true, write the log message
2617 as though for a tag, else write it as though for a branch."""
2618 type = 'branch'
2619 if self.is_tag:
2620 type = 'tag'
2622 # In Python 2.2.3, we could use textwrap.fill(). Oh well :-).
2623 space_or_newline = ' '
2624 cleaned_symbolic_name = _clean_symbolic_name(self.symbolic_name)
2625 if len(cleaned_symbolic_name) >= 13:
2626 space_or_newline = '\n'
2628 return "This commit was manufactured by cvs2svn to create %s%s'%s'." \
2629 % (type, space_or_newline, cleaned_symbolic_name)
2631 def _log_msg_for_default_branch_commit(self):
2632 """Creates a log message for a manufactured commit that
2633 synchronizes a non-trunk default branch with trunk."""
2634 msg = 'This commit was generated by cvs2svn to compensate for ' \
2635 'changes in r%d,\n' \
2636 'which included commits to RCS files with non-trunk default ' \
2637 'branches.\n' % self.motivating_revnum
2638 return msg
2640 class CVSRevisionAggregator:
2641 """This class groups CVSRevisions into CVSCommits that represent
2642 at least one SVNCommit."""
2643 def __init__(self):
2644 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_READ)
2645 if not Ctx().trunk_only:
2646 self.last_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB),
2647 DB_OPEN_READ)
2648 self.cvs_commits = {}
2649 self.pending_symbols = {}
2650 # A list of symbols for which we've already encountered the last
2651 # CVSRevision that is a source for that symbol. That is, the
2652 # final fill for this symbol has been done, and we never need to
2653 # fill it again.
2654 self.done_symbols = [ ]
2656 # This variable holds the most recently created primary svn_commit
2657 # object. CVSRevisionAggregator maintains this variable merely
2658 # for its date, so that it can set dates for the SVNCommits
2659 # created in self.attempt_to_commit_symbols().
2660 self.latest_primary_svn_commit = None
2662 Ctx()._symbolings_logger = SymbolingsLogger()
2663 Ctx()._persistence_manager = PersistenceManager(DB_OPEN_NEW)
2664 Ctx()._default_branches_db = SDatabase(temp(DEFAULT_BRANCHES_DB),
2665 DB_OPEN_READ)
2668 def process_revision(self, c_rev):
2669 # Each time we read a new line, we scan the commits we've
2670 # accumulated so far to see if any are ready for processing now.
2671 ready_queue = [ ]
2672 for digest_key, cvs_commit in self.cvs_commits.items():
2673 if cvs_commit.t_max + COMMIT_THRESHOLD < c_rev.timestamp:
2674 ready_queue.append(cvs_commit)
2675 del self.cvs_commits[digest_key]
2676 continue
2677 # If the inbound commit is on the same file as a pending commit,
2678 # close the pending commit to further changes. Don't flush it though,
2679 # as there may be other pending commits dated before this one.
2680 # ### ISSUE: the has_file() check below is not optimal.
2681 # It does fix the dataloss bug where revisions would get lost
2682 # if checked in too quickly, but it can also break apart the
2683 # commits. The correct fix would require tracking the dependencies
2684 # between change sets and committing them in proper order.
2685 if cvs_commit.has_file(c_rev.fname):
2686 unused_id = digest_key + '-'
2687 # Find a string that does is not already a key in
2688 # the self.cvs_commits dict
2689 while self.cvs_commits.has_key(unused_id):
2690 unused_id = unused_id + '-'
2691 self.cvs_commits[unused_id] = cvs_commit
2692 del self.cvs_commits[digest_key]
2694 # Add this item into the set of still-available commits.
2695 if self.cvs_commits.has_key(c_rev.digest):
2696 cvs_commit = self.cvs_commits[c_rev.digest]
2697 else:
2698 author, log = self.metadata_db[c_rev.digest]
2699 self.cvs_commits[c_rev.digest] = CVSCommit(c_rev.digest,
2700 author, log)
2701 cvs_commit = self.cvs_commits[c_rev.digest]
2702 cvs_commit.add_revision(c_rev)
2704 # If there are any elements in the ready_queue at this point, they
2705 # need to be processed, because this latest rev couldn't possibly
2706 # be part of any of them. Sort them into time-order, then process
2707 # 'em.
2708 ready_queue.sort()
2710 # Make sure we attempt_to_commit_symbols for this c_rev, even if no
2711 # commits are ready.
2712 if len(ready_queue) == 0:
2713 self.attempt_to_commit_symbols(ready_queue, c_rev)
2715 for cvs_commit in ready_queue[:]:
2716 self.latest_primary_svn_commit \
2717 = cvs_commit.process_revisions(self.done_symbols)
2718 ready_queue.remove(cvs_commit)
2719 self.attempt_to_commit_symbols(ready_queue, c_rev)
2721 def flush(self):
2722 """Commit anything left in self.cvs_commits. Then inform the
2723 SymbolingsLogger that all commits are done."""
2725 ready_queue = [ ]
2726 for k, v in self.cvs_commits.items():
2727 ready_queue.append((v, k))
2729 ready_queue.sort()
2730 for cvs_commit_tuple in ready_queue[:]:
2731 self.latest_primary_svn_commit = \
2732 cvs_commit_tuple[0].process_revisions(self.done_symbols)
2733 ready_queue.remove(cvs_commit_tuple)
2734 del self.cvs_commits[cvs_commit_tuple[1]]
2735 self.attempt_to_commit_symbols([])
2737 if not Ctx().trunk_only:
2738 Ctx()._symbolings_logger.close()
2740 def attempt_to_commit_symbols(self, queued_commits, c_rev=None):
2742 This function generates 1 SVNCommit for each symbol in
2743 self.pending_symbols that doesn't have an opening CVSRevision in
2744 either QUEUED_COMMITS or self.cvs_commits.values().
2746 If C_REV is not None, then we first add to self.pending_symbols
2747 any symbols from C_REV that C_REV is the last CVSRevision for.
2749 # If we're not doing a trunk-only conversion, get the symbolic
2750 # names that this c_rev is the last *source* CVSRevision for and
2751 # add them to those left over from previous passes through the
2752 # aggregator.
2753 if c_rev and not Ctx().trunk_only:
2754 for sym in self.last_revs_db.get(c_rev.unique_key(), []):
2755 self.pending_symbols[sym] = None
2757 # Make a list of all symbols that still have *source* CVSRevisions
2758 # in the pending commit queue (self.cvs_commits).
2759 open_symbols = {}
2760 for sym in self.pending_symbols.keys():
2761 for cvs_commit in self.cvs_commits.values() + queued_commits:
2762 if cvs_commit.opens_symbolic_name(sym):
2763 open_symbols[sym] = None
2764 break
2766 # Sort the pending symbols so that we will always process the
2767 # symbols in the same order, regardless of the order in which the
2768 # dict hashing algorithm hands them back to us. We do this so
2769 # that our tests will get the same results on all platforms.
2770 sorted_pending_symbols_keys = self.pending_symbols.keys()
2771 sorted_pending_symbols_keys.sort()
2772 for sym in sorted_pending_symbols_keys:
2773 if open_symbols.has_key(sym): # sym is still open--don't close it.
2774 continue
2775 svn_commit = SVNCommit("closing tag/branch '%s'" % sym)
2776 svn_commit.set_symbolic_name(sym)
2777 svn_commit.set_date(self.latest_primary_svn_commit.get_date())
2778 svn_commit.flush()
2779 self.done_symbols.append(sym)
2780 del self.pending_symbols[sym]
2783 class SymbolingsReader:
2784 """Provides an interface to the SYMBOL_OPENINGS_CLOSINGS_SORTED file
2785 and the SYMBOL_OFFSETS_DB. Does the heavy lifting of finding and
2786 returning the correct opening and closing Subversion revision
2787 numbers for a given symbolic name."""
2788 def __init__(self):
2789 """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and
2790 reads the offsets database into memory."""
2791 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
2792 # The offsets_db is really small, and we need to read and write
2793 # from it a fair bit, so suck it into memory
2794 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_READ)
2795 self.offsets = { }
2796 for key in offsets_db.db.keys():
2797 #print " ZOO:", key, offsets_db[key]
2798 self.offsets[key] = offsets_db[key]
2800 def filling_guide_for_symbol(self, symbolic_name, svn_revnum):
2801 """Given SYMBOLIC_NAME and SVN_REVNUM, return a new
2802 SymbolicNameFillingGuide object.
2804 Note that if we encounter an opening rev in this fill, but the
2805 corresponding closing rev takes place later than SVN_REVNUM, the
2806 closing will not be passed to SymbolicNameFillingGuide in this
2807 fill (and will be discarded when encountered in a later fill).
2808 This is perfectly fine, because we can still do a valid fill
2809 without the closing--we always try to fill what we can as soon as
2810 we can."""
2812 openings_closings_map = OpeningsClosingsMap(symbolic_name)
2814 # It's possible to have a branch start with a file that was added
2815 # on a branch
2816 if self.offsets.has_key(symbolic_name):
2817 # set our read offset for self.symbolings to the offset for
2818 # symbolic_name
2819 self.symbolings.seek(self.offsets[symbolic_name])
2821 while 1:
2822 fpos = self.symbolings.tell()
2823 line = self.symbolings.readline().rstrip()
2824 if not line:
2825 break
2826 name, revnum, type, branch_name, cvs_path = line.split(" ", 4)
2827 if branch_name == '*':
2828 svn_path = Ctx().project.make_trunk_path(cvs_path)
2829 else:
2830 svn_path = Ctx().project.make_branch_path(branch_name, cvs_path)
2831 revnum = int(revnum)
2832 if revnum > svn_revnum or name != symbolic_name:
2833 break
2834 openings_closings_map.register(svn_path, revnum, type)
2836 # get current offset of the read marker and set it to the offset
2837 # for the beginning of the line we just read if we used anything
2838 # we read.
2839 if not openings_closings_map.is_empty():
2840 self.offsets[symbolic_name] = fpos
2842 return SymbolicNameFillingGuide(openings_closings_map)
2845 class SvnRevisionRange:
2846 """The range of subversion revision numbers from which a path can be
2847 copied. self.opening_revnum is the number of the earliest such
2848 revision, and self.closing_revnum is one higher than the number of
2849 the last such revision. If self.closing_revnum is None, then no
2850 closings were registered."""
2852 def __init__(self, opening_revnum):
2853 self.opening_revnum = opening_revnum
2854 self.closing_revnum = None
2856 def add_closing(self, closing_revnum):
2857 # When we have a non-trunk default branch, we may have multiple
2858 # closings--only register the first closing we encounter.
2859 if self.closing_revnum is None:
2860 self.closing_revnum = closing_revnum
2862 def __str__(self):
2863 if self.closing_revnum is None:
2864 return '[%d:]' % (self.opening_revnum,)
2865 else:
2866 return '[%d:%d]' % (self.opening_revnum, self.closing_revnum,)
2869 class OpeningsClosingsMap:
2870 """A dictionary of openings and closings for a symbolic name in the
2871 current SVNCommit.
2873 The user should call self.register() for the openings and closings,
2874 then self.get_node_tree() to retrieve the information as a
2875 SymbolicNameFillingGuide."""
2877 def __init__(self, symbolic_name):
2878 """Initialize OpeningsClosingsMap and prepare it for receiving
2879 openings and closings."""
2881 self.name = symbolic_name
2883 # A dictionary of SVN_PATHS to SvnRevisionRange objects.
2884 self.things = { }
2886 def register(self, svn_path, svn_revnum, type):
2887 """Register an opening or closing revision for this symbolic name.
2888 SVN_PATH is the source path that needs to be copied into
2889 self.symbolic_name, and SVN_REVNUM is either the first svn
2890 revision number that we can copy from (our opening), or the last
2891 (not inclusive) svn revision number that we can copy from (our
2892 closing). TYPE indicates whether this path is an opening or a a
2893 closing.
2895 The opening for a given SVN_PATH must be passed before the closing
2896 for it to have any effect... any closing encountered before a
2897 corresponding opening will be discarded.
2899 It is not necessary to pass a corresponding closing for every
2900 opening.
2902 # Always log an OPENING
2903 if type == OPENING:
2904 self.things[svn_path] = SvnRevisionRange(svn_revnum)
2905 # Only log a closing if we've already registered the opening for that
2906 # path.
2907 elif type == CLOSING and self.things.has_key(svn_path):
2908 self.things[svn_path].add_closing(svn_revnum)
2910 def is_empty(self):
2911 """Return true if we haven't accumulated any openings or closings,
2912 false otherwise."""
2913 return not len(self.things)
2915 def get_things(self):
2916 """Return a list of (svn_path, SvnRevisionRange) tuples for all
2917 svn_paths with registered openings or closings."""
2919 return self.things.items()
2922 class SymbolicNameFillingGuide:
2923 """A node tree representing the source paths to be copied to fill
2924 self.symbolic_name in the current SVNCommit.
2926 self._node_tree is the root of the directory tree, in the form {
2927 path_component : subnode }. Leaf nodes are instances of
2928 SvnRevisionRange. Intermediate (directory) nodes are dictionaries
2929 mapping relative names to subnodes.
2931 By walking self._node_tree and calling self.get_best_revnum() on
2932 each node, the caller can determine what subversion revision number
2933 to copy the path corresponding to that node from. self._node_tree
2934 should be treated as read-only.
2936 The caller can then descend to sub-nodes to see if their "best
2937 revnum" differs from their parents' and if it does, take appropriate
2938 actions to "patch up" the subtrees."""
2940 def __init__(self, openings_closings_map):
2941 """Initializes a SymbolicNameFillingGuide for SYMBOLIC_NAME and
2942 store into it the openings and closings from
2943 OPENINGS_CLOSINGS_MAP."""
2945 self.name = openings_closings_map.name
2947 # The dictionary that holds our node tree as a map { node_key :
2948 # node }.
2949 self._node_tree = { }
2951 for svn_path, svn_revision_range in openings_closings_map.get_things():
2952 (head, tail) = _path_split(svn_path)
2953 self._get_node_for_path(head)[tail] = svn_revision_range
2955 #self.print_node_tree(self._node_tree)
2957 def _get_node_for_path(self, svn_path):
2958 """Return the node key for svn_path, creating new nodes as needed."""
2959 # Walk down the path, one node at a time.
2960 node = self._node_tree
2961 for component in svn_path.split('/'):
2962 if node.has_key(component):
2963 node = node[component]
2964 else:
2965 old_node = node
2966 node = {}
2967 old_node[component] = node
2969 return node
2971 def get_best_revnum(self, node, preferred_revnum):
2972 """Determine the best subversion revision number to use when
2973 copying the source tree beginning at NODE. Returns a
2974 subversion revision number.
2976 PREFERRED_REVNUM is passed to best_rev and used to calculate the
2977 best_revnum."""
2979 def score_revisions(svn_revision_ranges):
2980 """Return a list of revisions and scores based on
2981 SVN_REVISION_RANGES. The returned list looks like:
2983 [(REV1 SCORE1), (REV2 SCORE2), ...]
2985 where the tuples are sorted by revision number.
2986 SVN_REVISION_RANGES is a list of SvnRevisionRange objects.
2988 For each svn revision that appears as either an opening_revnum
2989 or closing_revnum for one of the svn_revision_ranges, output a
2990 tuple indicating how many of the SvnRevisionRanges include that
2991 svn_revision in its range. A score thus indicates that copying
2992 the corresponding revision (or any following revision up to the
2993 next revision in the list) of the object in question would yield
2994 that many correct paths at or underneath the object. There may
2995 be other paths underneath it which are not correct and would
2996 need to be deleted or recopied; those can only be detected by
2997 descending and examining their scores.
2999 If OPENINGS is empty, return the empty list."""
3000 openings = [ x.opening_revnum
3001 for x in svn_revision_ranges ]
3002 closings = [ x.closing_revnum
3003 for x in svn_revision_ranges
3004 if x.closing_revnum is not None ]
3006 # First look for easy out.
3007 if not openings:
3008 return []
3010 # Create a list with both openings (which increment the total)
3011 # and closings (which decrement the total):
3012 things = [(rev,1) for rev in openings] + [(rev,-1) for rev in closings]
3013 # Sort by revision number:
3014 things.sort()
3015 # Initialize output list with zeroth element of things. This
3016 # element must exist, because it was already verified that
3017 # openings is not empty.
3018 scores = [ things[0] ]
3019 total = scores[-1][1]
3020 for (rev, change) in things[1:]:
3021 total += change
3022 if rev == scores[-1][0]:
3023 # Same revision as last entry; modify last entry:
3024 scores[-1] = (rev, total)
3025 else:
3026 # Previously-unseen revision; create new entry:
3027 scores.append((rev, total))
3028 return scores
3030 def best_rev(scores, preferred_rev):
3031 """Return the revision with the highest score from SCORES, a list
3032 returned by score_revisions(). When the maximum score is shared
3033 by multiple revisions, the oldest revision is selected, unless
3034 PREFERRED_REV is one of the possibilities, in which case, it is
3035 selected."""
3036 max_score = 0
3037 preferred_rev_score = -1
3038 rev = SVN_INVALID_REVNUM
3039 if preferred_rev is None:
3040 # Comparison order of different types is arbitrary. Do not
3041 # expect None to compare less than int values below.
3042 # In Python 2.3 None compares with ints like negative infinity.
3043 # In Python 2.0 None compares with ints like positive infinity.
3044 preferred_rev = SVN_INVALID_REVNUM
3045 for revnum, count in scores:
3046 if count > max_score:
3047 max_score = count
3048 rev = revnum
3049 if revnum <= preferred_rev:
3050 preferred_rev_score = count
3051 if preferred_rev_score == max_score:
3052 rev = preferred_rev
3053 return rev, max_score
3055 # Aggregate openings and closings from the rev tree
3056 svn_revision_ranges = self._list_revnums(node)
3058 # Score the lists
3059 scores = score_revisions(svn_revision_ranges)
3061 revnum, max_score = best_rev(scores, preferred_revnum)
3063 if revnum == SVN_INVALID_REVNUM:
3064 raise FatalError("failed to find a revision "
3065 + "to copy from when copying %s" % name)
3066 return revnum, max_score
3068 def _list_revnums(self, node):
3069 """Return a list of all the SvnRevisionRanges (including
3070 duplicates) for all leaf nodes at and under NODE."""
3072 if isinstance(node, SvnRevisionRange):
3073 # It is a leaf node.
3074 return [ node ]
3075 else:
3076 # It is an intermediate node.
3077 revnums = []
3078 for key, subnode in node.items():
3079 revnums.extend(self._list_revnums(subnode))
3080 return revnums
3082 def get_sources(self):
3083 """Return the list of sources for this symbolic name.
3085 The Project instance defines what are legitimate sources. Raise
3086 an exception if a change occurred outside of the source
3087 directories."""
3089 return self._get_sub_sources('', self._node_tree)
3091 def _get_sub_sources(self, start_svn_path, start_node):
3092 """Return the list of sources for this symbolic name, starting the
3093 search at path START_SVN_PATH, which is node START_NODE. This is
3094 a helper method, called by get_sources() (see)."""
3096 project = Ctx().project
3097 if isinstance(start_node, SvnRevisionRange):
3098 # This implies that a change was found outside of the
3099 # legitimate sources. This should never happen.
3100 raise
3101 elif project.is_source(start_svn_path):
3102 # This is a legitimate source. Add it to list.
3103 return [ FillSource(start_svn_path, start_node) ]
3104 else:
3105 # This is a directory that is not a legitimate source. (That's
3106 # OK because it hasn't changed directly.) But directories
3107 # within it have been changed, so we need to search recursively
3108 # to find their enclosing sources.
3109 sources = []
3110 for entry, node in start_node.items():
3111 svn_path = _path_join(start_svn_path, entry)
3112 sources.extend(self._get_sub_sources(svn_path, node))
3114 return sources
3116 def print_node_tree(self, node, name='/', indent_depth=0):
3117 """For debugging purposes. Prints all nodes in TREE that are
3118 rooted at NODE. INDENT_DEPTH is used to indent the output of
3119 recursive calls."""
3120 if not indent_depth:
3121 print "TREE", "=" * 75
3122 if isinstance(node, SvnRevisionRange):
3123 print "TREE:", " " * (indent_depth * 2), name, node
3124 else:
3125 print "TREE:", " " * (indent_depth * 2), name
3126 for key, value in node.items():
3127 self.print_node_tree(value, key, (indent_depth + 1))
3130 class FillSource:
3131 """Representation of a fill source used by the symbol filler in
3132 SVNRepositoryMirror."""
3133 def __init__(self, prefix, node):
3134 """Create an unscored fill source with a prefix and a key."""
3135 self.prefix = prefix
3136 self.node = node
3137 self.score = None
3138 self.revnum = None
3140 def set_score(self, score, revnum):
3141 """Set the SCORE and REVNUM."""
3142 self.score = score
3143 self.revnum = revnum
3145 def __cmp__(self, other):
3146 """Comparison operator used to sort FillSources in descending
3147 score order."""
3148 if self.score is None or other.score is None:
3149 raise TypeError, 'Tried to compare unscored FillSource'
3150 return cmp(other.score, self.score)
3153 class SVNRepositoryMirror:
3154 """Mirror a Subversion Repository as it is constructed, one
3155 SVNCommit at a time. The mirror is skeletal; it does not contain
3156 file contents. The creation of a dumpfile or Subversion repository
3157 is handled by delegates. See self.add_delegate method for how to
3158 set delegates.
3160 The structure of the repository is kept in two databases and one
3161 hash. The revs_db database maps revisions to root node keys, and
3162 the nodes_db database maps node keys to nodes. A node is a hash
3163 from directory names to keys. Both the revs_db and the nodes_db are
3164 stored on disk and each access is expensive.
3166 The nodes_db database only has the keys for old revisions. The
3167 revision that is being contructed is kept in memory in the new_nodes
3168 hash which is cheap to access.
3170 You must invoke _start_commit between SVNCommits.
3172 *** WARNING *** All path arguments to methods in this class CANNOT
3173 have leading or trailing slashes.
3176 class SVNRepositoryMirrorPathExistsError(Exception):
3177 """Exception raised if an attempt is made to add a path to the
3178 repository mirror and that path already exists in the youngest
3179 revision of the repository."""
3180 pass
3182 class SVNRepositoryMirrorUnexpectedOperationError(Exception):
3183 """Exception raised if a CVSRevision is found to have an unexpected
3184 operation (OP) value."""
3185 pass
3187 class SVNRepositoryMirrorInvalidFillOperationError(Exception):
3188 """Exception raised if an empty SymbolicNameFillingGuide is returned
3189 during a fill where the branch in question already exists."""
3190 pass
3192 def __init__(self):
3193 """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""
3194 self.delegates = [ ]
3196 # This corresponds to the 'revisions' table in a Subversion fs.
3197 self.revs_db = SDatabase(temp(SVN_MIRROR_REVISIONS_DB), DB_OPEN_NEW)
3198 Cleanup().register(temp(SVN_MIRROR_REVISIONS_DB), pass8)
3200 # This corresponds to the 'nodes' table in a Subversion fs. (We
3201 # don't need a 'representations' or 'strings' table because we
3202 # only track metadata, not file contents.)
3203 self.nodes_db = Database(temp(SVN_MIRROR_NODES_DB), DB_OPEN_NEW)
3204 Cleanup().register(temp(SVN_MIRROR_NODES_DB), pass8)
3206 # Start at revision 0 without a root node. It will be created
3207 # by _open_writable_root_node.
3208 self.youngest = 0
3209 self.new_root_key = None
3210 self.new_nodes = { }
3212 if not Ctx().trunk_only:
3213 ###PERF IMPT: Suck this into memory.
3214 self.tags_db = TagsDatabase(DB_OPEN_READ)
3215 self.symbolings_reader = SymbolingsReader()
3217 def _initialize_repository(self, date):
3218 """Initialize the repository by creating the directories for
3219 trunk, tags, and branches. This method should only be called
3220 after all delegates are added to the repository mirror."""
3221 # Make a 'fake' SVNCommit so we can take advantage of the revprops
3222 # magic therein
3223 svn_commit = SVNCommit("Initialization", 1)
3224 svn_commit.set_date(date)
3225 svn_commit.set_log_msg("New repository initialized by cvs2svn.")
3227 self._start_commit(svn_commit)
3228 self._mkdir(Ctx().project.trunk_path)
3229 if not Ctx().trunk_only:
3230 self._mkdir(Ctx().project.branches_path)
3231 self._mkdir(Ctx().project.tags_path)
3233 def _start_commit(self, svn_commit):
3234 """Start a new commit."""
3235 if self.youngest > 0:
3236 self._end_commit()
3238 self.youngest = svn_commit.revnum
3239 self.new_root_key = None
3240 self.new_nodes = { }
3242 self._invoke_delegates('start_commit', svn_commit)
3244 def _end_commit(self):
3245 """Called at the end of each commit. This method copies the newly
3246 created nodes to the on-disk nodes db."""
3247 if self.new_root_key is None:
3248 # No changes were made in this revision, so we make the root node
3249 # of the new revision be the same as the last one.
3250 self.revs_db[str(self.youngest)] = self.revs_db[str(self.youngest - 1)]
3251 else:
3252 self.revs_db[str(self.youngest)] = self.new_root_key
3253 # Copy the new nodes to the nodes_db
3254 for key, value in self.new_nodes.items():
3255 self.nodes_db[key] = value
3257 def _get_node(self, key):
3258 """Returns the node contents for KEY which may refer to either
3259 self.nodes_db or self.new_nodes."""
3260 if self.new_nodes.has_key(key):
3261 return self.new_nodes[key]
3262 else:
3263 return self.nodes_db[key]
3265 def _open_readonly_node(self, path, revnum):
3266 """Open a readonly node for PATH at revision REVNUM. Returns the
3267 node key and node contents if the path exists, else (None, None)."""
3268 # Get the root key
3269 if revnum == self.youngest:
3270 if self.new_root_key is None:
3271 node_key = self.revs_db[str(self.youngest - 1)]
3272 else:
3273 node_key = self.new_root_key
3274 else:
3275 node_key = self.revs_db[str(revnum)]
3277 for component in path.split('/'):
3278 node_contents = self._get_node(node_key)
3279 node_key = node_contents.get(component, None)
3280 if node_key is None:
3281 return None
3283 return node_key
3285 def _open_writable_root_node(self):
3286 """Open a writable root node. The current root node is returned
3287 immeditely if it is already writable. If not, create a new one by
3288 copying the contents of the root node of the previous version."""
3289 if self.new_root_key is not None:
3290 return self.new_root_key, self.new_nodes[self.new_root_key]
3292 if self.youngest < 2:
3293 new_contents = { }
3294 else:
3295 new_contents = self.nodes_db[self.revs_db[str(self.youngest - 1)]]
3296 self.new_root_key = gen_key()
3297 self.new_nodes = { self.new_root_key: new_contents }
3299 return self.new_root_key, new_contents
3301 def _open_writable_node(self, svn_path, create):
3302 """Open a writable node for the path SVN_PATH, creating SVN_PATH
3303 and any missing directories if CREATE is True."""
3304 parent_key, parent_contents = self._open_writable_root_node()
3306 # Walk up the path, one node at a time.
3307 path_so_far = None
3308 components = svn_path.split('/')
3309 for i in range(len(components)):
3310 component = components[i]
3311 path_so_far = _path_join(path_so_far, component)
3312 this_key = parent_contents.get(component, None)
3313 if this_key is not None:
3314 # The component exists.
3315 this_contents = self.new_nodes.get(this_key, None)
3316 if this_contents is None:
3317 # Suck the node from the nodes_db, but update the key
3318 this_contents = self.nodes_db[this_key]
3319 this_key = gen_key()
3320 self.new_nodes[this_key] = this_contents
3321 parent_contents[component] = this_key
3322 elif create:
3323 # The component does not exists, so we create it.
3324 this_contents = { }
3325 this_key = gen_key()
3326 self.new_nodes[this_key] = this_contents
3327 parent_contents[component] = this_key
3328 if i < len(components) - 1:
3329 self._invoke_delegates('mkdir', path_so_far)
3330 else:
3331 # The component does not exists and we are not instructed to
3332 # create it, so we give up.
3333 return None, None
3335 parent_key = this_key
3336 parent_contents = this_contents
3338 return this_key, this_contents
3340 def _path_exists(self, path):
3341 """If PATH exists in self.youngest of the svn repository mirror,
3342 return true, else return None.
3344 PATH must not start with '/'."""
3345 return self._open_readonly_node(path, self.youngest) is not None
3347 def _fast_delete_path(self, parent_path, parent_contents, component):
3348 """Delete COMPONENT from the parent direcory PARENT_PATH with the
3349 contents PARENT_CONTENTS. Do nothing if COMPONENT does not exist
3350 in PARENT_CONTENTS."""
3351 if parent_contents.has_key(component):
3352 del parent_contents[component]
3353 self._invoke_delegates('delete_path',
3354 _path_join(parent_path, component))
3356 def _delete_path(self, svn_path, should_prune=False):
3357 """Delete PATH from the tree. If SHOULD_PRUNE is true, then delete
3358 all ancestor directories that are made empty when SVN_PATH is deleted.
3359 In other words, SHOULD_PRUNE is like the -P option to 'cvs checkout'.
3361 NOTE: This function ignores requests to delete the root directory
3362 or any directory for which Ctx().project.is_unremovable() returns
3363 True, either directly or by pruning."""
3365 if svn_path == '' or Ctx().project.is_unremovable(svn_path):
3366 return
3368 (parent_path, entry,) = _path_split(svn_path)
3369 if parent_path:
3370 parent_key, parent_contents = \
3371 self._open_writable_node(parent_path, False)
3372 else:
3373 parent_key, parent_contents = self._open_writable_root_node()
3375 if parent_key is not None:
3376 self._fast_delete_path(parent_path, parent_contents, entry)
3377 # The following recursion makes pruning an O(n^2) operation in the
3378 # worst case (where n is the depth of SVN_PATH), but the worst case
3379 # is probably rare, and the constant cost is pretty low. Another
3380 # drawback is that we issue a delete for each path and not just
3381 # a single delete for the topmost directory pruned.
3382 if should_prune and len(parent_contents) == 0:
3383 self._delete_path(parent_path, True)
3385 def _mkdir(self, path):
3386 """Create PATH in the repository mirror at the youngest revision."""
3387 self._open_writable_node(path, True)
3388 self._invoke_delegates('mkdir', path)
3390 def _change_path(self, cvs_rev):
3391 """Register a change in self.youngest for the CVS_REV's svn_path
3392 in the repository mirror."""
3393 # We do not have to update the nodes because our mirror is only
3394 # concerned with the presence or absence of paths, and a file
3395 # content change does not cause any path changes.
3396 self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, False))
3398 def _add_path(self, cvs_rev):
3399 """Add the CVS_REV's svn_path to the repository mirror."""
3400 self._open_writable_node(cvs_rev.svn_path, True)
3401 self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, True))
3403 def _copy_path(self, src_path, dest_path, src_revnum):
3404 """Copy SRC_PATH at subversion revision number SRC_REVNUM to
3405 DEST_PATH. In the youngest revision of the repository, DEST_PATH's
3406 parent *must* exist, but DEST_PATH *cannot* exist.
3408 Return the node key and the contents of the new node at DEST_PATH
3409 as a dictionary."""
3410 # get the contents of the node of our src_path
3411 src_key = self._open_readonly_node(src_path, src_revnum)
3412 src_contents = self._get_node(src_key)
3414 # Get the parent path and the base path of the dest_path
3415 (dest_parent, dest_basename,) = _path_split(dest_path)
3416 dest_parent_key, dest_parent_contents = \
3417 self._open_writable_node(dest_parent, False)
3419 if dest_parent_contents.has_key(dest_basename):
3420 msg = "Attempt to add path '%s' to repository mirror " % dest_path
3421 msg = msg + "when it already exists in the mirror."
3422 raise self.SVNRepositoryMirrorPathExistsError, msg
3424 dest_parent_contents[dest_basename] = src_key
3425 self._invoke_delegates('copy_path', src_path, dest_path, src_revnum)
3427 # Yes sir, src_key and src_contents are also the contents of the
3428 # destination. This is a cheap copy, remember! :-)
3429 return src_key, src_contents
3431 def _fill_symbolic_name(self, svn_commit):
3432 """Performs all copies necessary to create as much of the the tag
3433 or branch SVN_COMMIT.symbolic_name as possible given the current
3434 revision of the repository mirror.
3436 The symbolic name is guaranteed to exist in the Subversion
3437 repository by the end of this call, even if there are no paths
3438 under it."""
3439 symbol_fill = self.symbolings_reader.filling_guide_for_symbol(
3440 svn_commit.symbolic_name, self.youngest)
3441 # Get the list of sources for the symbolic name.
3442 sources = symbol_fill.get_sources()
3444 if sources:
3445 if self.tags_db.has_key(svn_commit.symbolic_name):
3446 dest_prefix = Ctx().project.get_tag_path(svn_commit.symbolic_name)
3447 else:
3448 dest_prefix = Ctx().project.get_branch_path(svn_commit.symbolic_name)
3450 dest_key = self._open_writable_node(dest_prefix, False)[0]
3451 self._fill(symbol_fill, dest_prefix, dest_key, sources)
3452 else:
3453 # We can only get here for a branch whose first commit is an add
3454 # (as opposed to a copy).
3455 dest_path = Ctx().project.get_branch_path(symbol_fill.name)
3456 if not self._path_exists(dest_path):
3457 # If our symbol_fill was empty, that means that our first
3458 # commit on the branch was to a file added on the branch, and
3459 # that this is our first fill of that branch.
3461 # This case is covered by test 16.
3463 # ...we create the branch by copying trunk from the our
3464 # current revision number minus 1
3465 source_path = Ctx().project.trunk_path
3466 entries = self._copy_path(source_path, dest_path,
3467 svn_commit.revnum - 1)[1]
3468 # Now since we've just copied trunk to a branch that's
3469 # *supposed* to be empty, we delete any entries in the
3470 # copied directory.
3471 for entry in entries.keys():
3472 del_path = dest_path + '/' + entry
3473 # Delete but don't prune.
3474 self._delete_path(del_path)
3475 else:
3476 msg = "Error filling branch '" \
3477 + _clean_symbolic_name(symbol_fill.name) + "'.\n"
3478 msg = msg + "Received an empty SymbolicNameFillingGuide and\n"
3479 msg = msg + "attempted to create a branch that already exists."
3480 raise self.SVNRepositoryMirrorInvalidFillOperationError, msg
3482 def _fill(self, symbol_fill, dest_prefix, dest_key, sources,
3483 path = None, parent_source_prefix = None,
3484 preferred_revnum = None, prune_ok = None):
3485 """Fill the tag or branch at DEST_PREFIX + PATH with items from
3486 SOURCES, and recurse into the child items.
3488 DEST_PREFIX is the prefix of the destination directory, e.g.
3489 '/tags/my_tag' or '/branches/my_branch', and SOURCES is a list of
3490 FillSource classes that are candidates to be copied to the
3491 destination. DEST_KEY is the key in self.nodes_db to the
3492 destination, or None if the destination does not yet exist.
3494 PATH is the path relative to DEST_PREFIX. If PATH is None, we
3495 are at the top level, e.g. '/tags/my_tag'.
3497 PARENT_SOURCE_PREFIX is the source prefix that was used to copy
3498 the parent directory, and PREFERRED_REVNUM is an int which is the
3499 source revision number that the caller (who may have copied KEY's
3500 parent) used to perform its copy. If PREFERRED_REVNUM is None,
3501 then no revision is preferable to any other (which probably means
3502 that no copies have happened yet).
3504 PRUNE_OK means that a copy has been made in this recursion, and
3505 it's safe to prune directories that are not in
3506 SYMBOL_FILL._node_tree, provided that said directory has a source
3507 prefix of one of the PARENT_SOURCE_PREFIX.
3509 PATH, PARENT_SOURCE_PREFIX, PRUNE_OK, and PREFERRED_REVNUM
3510 should only be passed in by recursive calls."""
3511 # Calculate scores and revnums for all sources
3512 for source in sources:
3513 src_revnum, score = symbol_fill.get_best_revnum(source.node,
3514 preferred_revnum)
3515 source.set_score(score, src_revnum)
3517 # Sort the sources in descending score order so that we will make
3518 # a eventual copy from the source with the highest score.
3519 sources.sort()
3520 copy_source = sources[0]
3522 src_path = _path_join(copy_source.prefix, path)
3523 dest_path = _path_join(dest_prefix, path)
3525 # Figure out if we shall copy to this destination and delete any
3526 # destination path that is in the way.
3527 do_copy = 0
3528 if dest_key is None:
3529 do_copy = 1
3530 elif prune_ok and (parent_source_prefix != copy_source.prefix or
3531 copy_source.revnum != preferred_revnum):
3532 # We are about to replace the destination, so we need to remove
3533 # it before we perform the copy.
3534 self._delete_path(dest_path)
3535 do_copy = 1
3537 if do_copy:
3538 dest_key, dest_entries = self._copy_path(src_path, dest_path,
3539 copy_source.revnum)
3540 prune_ok = 1
3541 else:
3542 dest_entries = self._get_node(dest_key)
3544 # Create the SRC_ENTRIES hash from SOURCES. The keys are path
3545 # elements and the values are lists of FillSource classes where
3546 # this path element exists.
3547 src_entries = {}
3548 for source in sources:
3549 if isinstance(source.node, SvnRevisionRange):
3550 continue
3551 for entry, node in source.node.items():
3552 src_entries.setdefault(entry, []).append(
3553 FillSource(source.prefix, node))
3555 if prune_ok:
3556 # Delete the entries in DEST_ENTRIES that are not in src_entries.
3557 delete_list = [ ]
3558 for entry in dest_entries.keys():
3559 if not src_entries.has_key(entry):
3560 delete_list.append(entry)
3561 if delete_list:
3562 if not self.new_nodes.has_key(dest_key):
3563 dest_key, dest_entries = self._open_writable_node(dest_path, True)
3564 # Sort the delete list to get "diffable" dumpfiles.
3565 delete_list.sort()
3566 for entry in delete_list:
3567 self._fast_delete_path(dest_path, dest_entries, entry)
3569 # Recurse into the SRC_ENTRIES keys sorted in alphabetical order.
3570 src_keys = src_entries.keys()
3571 src_keys.sort()
3572 for src_key in src_keys:
3573 next_dest_key = dest_entries.get(src_key, None)
3574 self._fill(symbol_fill, dest_prefix, next_dest_key,
3575 src_entries[src_key], _path_join(path, src_key),
3576 copy_source.prefix, sources[0].revnum, prune_ok)
3578 def _synchronize_default_branch(self, svn_commit):
3579 """Propagate any changes that happened on a non-trunk default
3580 branch to the trunk of the repository. See
3581 CVSCommit._post_commit() for details on why this is necessary."""
3582 for cvs_rev in svn_commit.cvs_revs:
3583 svn_trunk_path = Ctx().project.make_trunk_path(
3584 Ctx().project.relative_name(cvs_rev.fname))
3585 if cvs_rev.op == OP_ADD or cvs_rev.op == OP_CHANGE:
3586 if self._path_exists(svn_trunk_path):
3587 # Delete the path on trunk...
3588 self._delete_path(svn_trunk_path)
3589 # ...and copy over from branch
3590 self._copy_path(cvs_rev.svn_path, svn_trunk_path,
3591 svn_commit.motivating_revnum)
3592 elif cvs_rev.op == OP_DELETE:
3593 # delete trunk path
3594 self._delete_path(svn_trunk_path)
3595 else:
3596 msg = ("Unknown CVSRevision operation '%s' in default branch sync."
3597 % cvs_rev.op)
3598 raise self.SVNRepositoryMirrorUnexpectedOperationError, msg
3600 def commit(self, svn_commit):
3601 """Add an SVNCommit to the SVNRepository, incrementing the
3602 Repository revision number, and changing the repository. Invoke
3603 the delegates' _start_commit() method."""
3605 if svn_commit.revnum == 2:
3606 self._initialize_repository(svn_commit.get_date())
3608 self._start_commit(svn_commit)
3610 if svn_commit.symbolic_name:
3611 Log().write(LOG_VERBOSE, "Filling symbolic name:",
3612 _clean_symbolic_name(svn_commit.symbolic_name))
3613 self._fill_symbolic_name(svn_commit)
3614 elif svn_commit.motivating_revnum:
3615 Log().write(LOG_VERBOSE, "Synchronizing default_branch motivated by %d"
3616 % svn_commit.motivating_revnum)
3617 self._synchronize_default_branch(svn_commit)
3618 else: # This actually commits CVSRevisions
3619 if len(svn_commit.cvs_revs) > 1: plural = "s"
3620 else: plural = ""
3621 Log().write(LOG_VERBOSE, "Committing %d CVSRevision%s"
3622 % (len(svn_commit.cvs_revs), plural))
3623 for cvs_rev in svn_commit.cvs_revs:
3624 # See comment in CVSCommit._commit() for what this is all
3625 # about. Note that although asking self._path_exists() is
3626 # somewhat expensive, we only do it if the first two (cheap)
3627 # tests succeed first.
3628 if not ((cvs_rev.deltatext_code == DELTATEXT_EMPTY)
3629 and (cvs_rev.rev == "1.1.1.1")
3630 and self._path_exists(cvs_rev.svn_path)):
3631 if cvs_rev.op == OP_ADD:
3632 self._add_path(cvs_rev)
3633 elif cvs_rev.op == OP_CHANGE:
3634 # Fix for Issue #74:
3636 # Here's the scenario. You have file FOO that is imported
3637 # on a non-trunk vendor branch. So in r1.1 and r1.1.1.1,
3638 # the file exists.
3640 # Moving forward in time, FOO is deleted on the default
3641 # branch (r1.1.1.2). cvs2svn determines that this delete
3642 # also needs to happen on trunk, so FOO is deleted on
3643 # trunk.
3645 # Along come r1.2, whose op is OP_CHANGE (because r1.1 is
3646 # not 'dead', we assume it's a change). However, since
3647 # our trunk file has been deleted, svnadmin blows up--you
3648 # can't change a file that doesn't exist!
3650 # Soooo... we just check the path, and if it doesn't
3651 # exist, we do an add... if the path does exist, it's
3652 # business as usual.
3653 if not self._path_exists(cvs_rev.svn_path):
3654 self._add_path(cvs_rev)
3655 else:
3656 self._change_path(cvs_rev)
3658 if cvs_rev.op == OP_DELETE:
3659 self._delete_path(cvs_rev.svn_path, Ctx().prune)
3661 def cleanup(self):
3662 """Callback for the Cleanup.register in self.__init__."""
3663 self.revs_db = None
3664 self.nodes_db = None
3666 def add_delegate(self, delegate):
3667 """Adds DELEGATE to self.delegates.
3669 For every delegate you add, as soon as SVNRepositoryMirror
3670 performs a repository action method, SVNRepositoryMirror will call
3671 the delegate's corresponding repository action method. Multiple
3672 delegates will be called in the order that they are added. See
3673 SVNRepositoryMirrorDelegate for more information."""
3674 self.delegates.append(delegate)
3676 def _invoke_delegates(self, method, *args):
3677 """Iterate through each of our delegates, in the order that they
3678 were added, and call the delegate's method named METHOD with the
3679 arguments in ARGS."""
3680 for delegate in self.delegates:
3681 getattr(delegate, method)(*args)
3683 def finish(self):
3684 """Calls the delegate finish method."""
3685 self._end_commit()
3686 self._invoke_delegates('finish')
3687 self.cleanup()
3690 class SVNCommitItem:
3691 """A wrapper class for CVSRevision objects upon which
3692 Subversion-related data (such as properties) may be hung."""
3694 def __init__(self, c_rev, svn_props_changed):
3695 """Initialize instance and record the properties for this file.
3696 SVN_PROPS_CHANGED indicates whether the svn: properties are known
3697 to have changed since the last revision.
3699 The properties are set by the SVNPropertySetters in
3700 Ctx().svn_property_setters, then we read a couple of the
3701 properties back out for our own purposes."""
3703 self.c_rev = c_rev
3704 # Did the svn properties change for this file (i.e., do they have
3705 # to be written to the dumpfile?)
3706 self.svn_props_changed = svn_props_changed
3708 # The properties for this item as a map { key : value }. If VALUE
3709 # is None, no property should be set.
3710 self.svn_props = { }
3712 for svn_property_setter in Ctx().svn_property_setters:
3713 svn_property_setter.set_properties(self)
3715 # Remember if we need to filter the EOLs. We could actually use
3716 # self.svn_props now, since it is initialized for each revision.
3717 self.needs_eol_filter = \
3718 self.svn_props.get('svn:eol-style', None) is not None
3720 self.has_keywords = self.svn_props.get('svn:keywords', None) is not None
3723 class SVNPropertySetter:
3724 """Abstract class for objects that can set properties on a SVNCommitItem."""
3726 def set_properties(self, s_item):
3727 """Set any properties that can be determined for S_ITEM."""
3729 raise NotImplementedError
3732 class SVNRepositoryMirrorDelegate:
3733 """Abstract superclass for any delegate to SVNRepositoryMirror.
3734 Subclasses must implement all of the methods below.
3736 For each method, a subclass implements, in its own way, the
3737 Subversion operation implied by the method's name. For example, for
3738 the add_path method, the DumpfileDelegate would write out a
3739 "Node-add:" command to a Subversion dumpfile, the StdoutDelegate
3740 would merely print that the path is being added to the repository,
3741 and the RepositoryDelegate would actually cause the path to be added
3742 to the Subversion repository that it is creating.
3745 def start_commit(self, svn_commit):
3746 """Perform any actions needed to start SVNCommit SVN_COMMIT;
3747 see subclass implementation for details."""
3748 raise NotImplementedError
3750 def mkdir(self, path):
3751 """PATH is a string; see subclass implementation for details."""
3752 raise NotImplementedError
3754 def add_path(self, s_item):
3755 """S_ITEM is an SVNCommitItem; see subclass implementation for
3756 details."""
3757 raise NotImplementedError
3759 def change_path(self, s_item):
3760 """S_ITEM is an SVNCommitItem; see subclass implementation for
3761 details."""
3762 raise NotImplementedError
3764 def delete_path(self, path):
3765 """PATH is a string; see subclass implementation for
3766 details."""
3767 raise NotImplementedError
3769 def copy_path(self, src_path, dest_path, src_revnum):
3770 """SRC_PATH and DEST_PATH are both strings, and SRC_REVNUM is a
3771 subversion revision number (int); see subclass implementation for
3772 details."""
3773 raise NotImplementedError
3775 def finish(self):
3776 """Perform any cleanup necessary after all revisions have been
3777 committed."""
3778 raise NotImplementedError
3781 class DumpfileDelegate(SVNRepositoryMirrorDelegate):
3782 """Create a Subversion dumpfile."""
3784 def __init__(self, dumpfile_path=None):
3785 """Return a new DumpfileDelegate instance, attached to a dumpfile
3786 DUMPFILE_PATH (Ctx().dumpfile, if None), using Ctx().encoding."""
3787 if dumpfile_path:
3788 self.dumpfile_path = dumpfile_path
3789 else:
3790 self.dumpfile_path = Ctx().dumpfile
3792 self.dumpfile = open(self.dumpfile_path, 'wb')
3793 self._write_dumpfile_header(self.dumpfile)
3795 def _write_dumpfile_header(self, dumpfile):
3796 # Initialize the dumpfile with the standard headers.
3798 # Since the CVS repository doesn't have a UUID, and the Subversion
3799 # repository will be created with one anyway, we don't specify a
3800 # UUID in the dumpflie
3801 dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
3803 def _utf8_path(self, path):
3804 """Return a copy of PATH encoded in UTF-8."""
3805 pieces = string.split(path, '/')
3806 # Convert each path component separately (as they may each use
3807 # different encodings).
3808 for i in range(len(pieces)):
3809 try:
3810 # Log messages can be converted with the 'replace' strategy,
3811 # but we can't afford any lossiness here.
3812 pieces[i] = to_utf8(pieces[i], 'strict')
3813 except UnicodeError:
3814 raise FatalError(
3815 "Unable to convert a path '%s' to internal encoding.\n"
3816 "Consider rerunning with (for example) '--encoding=latin1'."
3817 % (path,))
3818 return string.join(pieces, '/')
3820 def _string_for_prop(self, name, value):
3821 """Return a property in the form needed for the dumpfile."""
3823 return 'K %d\n%s\nV %d\n%s\n' % (len(name), name, len(value), value)
3825 def start_commit(self, svn_commit):
3826 """Emit the start of SVN_COMMIT (an SVNCommit)."""
3828 self.revision = svn_commit.revnum
3830 # The start of a new commit typically looks like this:
3832 # Revision-number: 1
3833 # Prop-content-length: 129
3834 # Content-length: 129
3836 # K 7
3837 # svn:log
3838 # V 27
3839 # Log message for revision 1.
3840 # K 10
3841 # svn:author
3842 # V 7
3843 # jrandom
3844 # K 8
3845 # svn:date
3846 # V 27
3847 # 2003-04-22T22:57:58.132837Z
3848 # PROPS-END
3850 # Notice that the length headers count everything -- not just the
3851 # length of the data but also the lengths of the lengths, including
3852 # the 'K ' or 'V ' prefixes.
3854 # The reason there are both Prop-content-length and Content-length
3855 # is that the former includes just props, while the latter includes
3856 # everything. That's the generic header form for any entity in a
3857 # dumpfile. But since revisions only have props, the two lengths
3858 # are always the same for revisions.
3860 # Calculate the output needed for the property definitions.
3861 props = svn_commit.get_revprops()
3862 prop_names = props.keys()
3863 prop_names.sort()
3864 prop_strings = []
3865 for propname in prop_names:
3866 if props[propname] is not None:
3867 prop_strings.append(self._string_for_prop(propname, props[propname]))
3869 all_prop_strings = ''.join(prop_strings) + 'PROPS-END\n'
3870 total_len = len(all_prop_strings)
3872 # Print the revision header and props
3873 self.dumpfile.write('Revision-number: %d\n'
3874 'Prop-content-length: %d\n'
3875 'Content-length: %d\n'
3876 '\n'
3877 % (self.revision, total_len, total_len))
3879 self.dumpfile.write(all_prop_strings)
3880 self.dumpfile.write('\n')
3882 def mkdir(self, path):
3883 """Emit the creation of directory PATH."""
3884 self.dumpfile.write("Node-path: %s\n"
3885 "Node-kind: dir\n"
3886 "Node-action: add\n"
3887 "\n"
3888 "\n" % self._utf8_path(path))
3890 def _add_or_change_path(self, s_item, op):
3891 """Emit the addition or change corresponding to S_ITEM.
3892 OP is either the constant OP_ADD or OP_CHANGE."""
3894 # Validation stuffs
3895 if op == OP_ADD:
3896 action = 'add'
3897 elif op == OP_CHANGE:
3898 action = 'change'
3899 else:
3900 raise FatalError("_add_or_change_path() called with bad op ('%s')"
3901 % (op,))
3903 # Convenience variables
3904 c_rev = s_item.c_rev
3906 # The property handling here takes advantage of an undocumented
3907 # but IMHO consistent feature of the Subversion dumpfile-loading
3908 # code. When a node's properties aren't mentioned (that is, the
3909 # "Prop-content-length:" header is absent, no properties are
3910 # listed at all, and there is no "PROPS-END\n" line) then no
3911 # change is made to the node's properties.
3913 # This is consistent with the way dumpfiles behave w.r.t. text
3914 # content changes, so I'm comfortable relying on it. If you
3915 # commit a change to *just* the properties of some node that
3916 # already has text contents from a previous revision, then in the
3917 # dumpfile output for the prop change, no "Text-content-length:"
3918 # nor "Text-content-md5:" header will be present, and the text of
3919 # the file will not be given. But this does not cause the file's
3920 # text to be erased! It simply remains unchanged.
3922 # This works out great for cvs2svn, due to lucky coincidences:
3924 # For files, the only properties we ever set are set in the first
3925 # revision; all other revisions (including on branches) inherit
3926 # from that. After the first revision, we never change file
3927 # properties, therefore, there is no need to remember the full set
3928 # of properties on a given file once we've set it.
3930 # For directories, the only property we set is "svn:ignore", and
3931 # while we may change it after the first revision, we always do so
3932 # based on the contents of a ".cvsignore" file -- in other words,
3933 # CVS is doing the remembering for us, so we still don't have to
3934 # preserve the previous value of the property ourselves.
3936 # Calculate the (sorted-by-name) property string and length, if any.
3937 if s_item.svn_props_changed:
3938 svn_props = s_item.svn_props
3939 prop_contents = ''
3940 prop_names = svn_props.keys()
3941 prop_names.sort()
3942 for pname in prop_names:
3943 pvalue = svn_props[pname]
3944 if pvalue is not None:
3945 prop_contents += self._string_for_prop(pname, pvalue)
3946 prop_contents += 'PROPS-END\n'
3947 props_header = 'Prop-content-length: %d\n' % len(prop_contents)
3948 else:
3949 prop_contents = ''
3950 props_header = ''
3952 # treat .cvsignore as a directory property
3953 dir_path, basename = os.path.split(c_rev.svn_path)
3954 if basename == ".cvsignore":
3955 ignore_vals = generate_ignores(c_rev)
3956 ignore_contents = '\n'.join(ignore_vals)
3957 ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
3958 (len(ignore_contents), ignore_contents))
3959 ignore_contents = ignore_contents + 'PROPS-END\n'
3960 ignore_len = len(ignore_contents)
3962 # write headers, then props
3963 self.dumpfile.write('Node-path: %s\n'
3964 'Node-kind: dir\n'
3965 'Node-action: change\n'
3966 'Prop-content-length: %d\n'
3967 'Content-length: %d\n'
3968 '\n'
3969 '%s'
3970 % (self._utf8_path(dir_path), ignore_len,
3971 ignore_len, ignore_contents))
3973 # If the file has keywords, we must prevent CVS/RCS from expanding
3974 # the keywords because they must be unexpanded in the repository,
3975 # or Subversion will get confused.
3976 pipe_cmd, pipe = Ctx().cvs_repository.get_co_pipe(
3977 c_rev, suppress_keyword_substitution=s_item.has_keywords)
3979 self.dumpfile.write('Node-path: %s\n'
3980 'Node-kind: file\n'
3981 'Node-action: %s\n'
3982 '%s' # no property header if no props
3983 'Text-content-length: '
3984 % (self._utf8_path(c_rev.svn_path),
3985 action, props_header))
3987 pos = self.dumpfile.tell()
3989 self.dumpfile.write('0000000000000000\n'
3990 'Text-content-md5: 00000000000000000000000000000000\n'
3991 'Content-length: 0000000000000000\n'
3992 '\n')
3994 if prop_contents:
3995 self.dumpfile.write(prop_contents)
3997 # Insert a filter to convert all EOLs to LFs if neccessary
3998 if s_item.needs_eol_filter:
3999 data_reader = LF_EOL_Filter(pipe.stdout)
4000 else:
4001 data_reader = pipe.stdout
4003 # Insert the rev contents, calculating length and checksum as we go.
4004 checksum = md5.new()
4005 length = 0
4006 while True:
4007 buf = data_reader.read(PIPE_READ_SIZE)
4008 if buf == '':
4009 break
4010 checksum.update(buf)
4011 length = length + len(buf)
4012 self.dumpfile.write(buf)
4014 pipe.stdout.close()
4015 error_output = pipe.stderr.read()
4016 exit_status = pipe.wait()
4017 if exit_status:
4018 raise FatalError("The command '%s' failed with exit status: %s\n"
4019 "and the following output:\n"
4020 "%s" % (pipe_cmd, exit_status, error_output))
4022 # Go back to patch up the length and checksum headers:
4023 self.dumpfile.seek(pos, 0)
4024 # We left 16 zeros for the text length; replace them with the real
4025 # length, padded on the left with spaces:
4026 self.dumpfile.write('%16d' % length)
4027 # 16... + 1 newline + len('Text-content-md5: ') == 35
4028 self.dumpfile.seek(pos + 35, 0)
4029 self.dumpfile.write(checksum.hexdigest())
4030 # 35... + 32 bytes of checksum + 1 newline + len('Content-length: ') == 84
4031 self.dumpfile.seek(pos + 84, 0)
4032 # The content length is the length of property data, text data,
4033 # and any metadata around/inside around them.
4034 self.dumpfile.write('%16d' % (length + len(prop_contents)))
4035 # Jump back to the end of the stream
4036 self.dumpfile.seek(0, 2)
4038 # This record is done (write two newlines -- one to terminate
4039 # contents that weren't themselves newline-termination, one to
4040 # provide a blank line for readability.
4041 self.dumpfile.write('\n\n')
4043 def add_path(self, s_item):
4044 """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
4045 self._add_or_change_path(s_item, OP_ADD)
4047 def change_path(self, s_item):
4048 """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
4049 self._add_or_change_path(s_item, OP_CHANGE)
4051 def delete_path(self, path):
4052 """Emit the deletion of PATH."""
4053 self.dumpfile.write('Node-path: %s\n'
4054 'Node-action: delete\n'
4055 '\n' % self._utf8_path(path))
4057 def copy_path(self, src_path, dest_path, src_revnum):
4058 """Emit the copying of SRC_PATH at SRC_REV to DEST_PATH."""
4059 # We don't need to include "Node-kind:" for copies; the loader
4060 # ignores it anyway and just uses the source kind instead.
4061 self.dumpfile.write('Node-path: %s\n'
4062 'Node-action: add\n'
4063 'Node-copyfrom-rev: %d\n'
4064 'Node-copyfrom-path: /%s\n'
4065 '\n'
4066 % (self._utf8_path(dest_path),
4067 src_revnum,
4068 self._utf8_path(src_path)))
4070 def finish(self):
4071 """Perform any cleanup necessary after all revisions have been
4072 committed."""
4073 self.dumpfile.close()
4076 class RepositoryDelegate(DumpfileDelegate):
4077 """Creates a new Subversion Repository. DumpfileDelegate does all
4078 of the heavy lifting."""
4079 def __init__(self):
4080 self.svnadmin = Ctx().svnadmin
4081 self.target = Ctx().target
4082 if not Ctx().existing_svnrepos:
4083 Log().write(LOG_NORMAL,"Creating new repository '%s'" % (self.target))
4084 if not Ctx().fs_type:
4085 # User didn't say what kind repository (bdb, fsfs, etc).
4086 # We still pass --bdb-txn-nosync. It's a no-op if the default
4087 # repository type doesn't support it, but we definitely want
4088 # it if BDB is the default.
4089 run_command('%s create %s "%s"' % (self.svnadmin,
4090 "--bdb-txn-nosync",
4091 self.target))
4092 elif Ctx().fs_type == 'bdb':
4093 # User explicitly specified bdb.
4095 # Since this is a BDB repository, pass --bdb-txn-nosync,
4096 # because it gives us a 4-5x speed boost (if cvs2svn is
4097 # creating the repository, cvs2svn should be the only program
4098 # accessing the svn repository (until cvs is done, at least)).
4099 # But we'll turn no-sync off in self.finish(), unless
4100 # instructed otherwise.
4101 run_command('%s create %s %s "%s"' % (self.svnadmin,
4102 "--fs-type=bdb",
4103 "--bdb-txn-nosync",
4104 self.target))
4105 else:
4106 # User specified something other than bdb.
4107 run_command('%s create %s "%s"' % (self.svnadmin,
4108 "--fs-type=%s" % Ctx().fs_type,
4109 self.target))
4111 # Since the output of this run is a repository, not a dumpfile,
4112 # the temporary dumpfiles we create should go in the tmpdir.
4113 DumpfileDelegate.__init__(self, temp(Ctx().dumpfile))
4115 # This is 1 if a commit is in progress, otherwise None.
4116 self._commit_in_progress = None
4118 self.dumpfile = open(self.dumpfile_path, 'w+b')
4119 self.loader_pipe = SimplePopen([ self.svnadmin, 'load', '-q',
4120 self.target ], True)
4121 self.loader_pipe.stdout.close()
4122 try:
4123 self._write_dumpfile_header(self.loader_pipe.stdin)
4124 except IOError:
4125 raise FatalError("svnadmin failed with the following output while "
4126 "loading the dumpfile:\n"
4127 + self.loader_pipe.stderr.read())
4129 def _feed_pipe(self):
4130 """Feed the revision stored in the dumpfile to the svnadmin
4131 load pipe."""
4132 self.dumpfile.seek(0)
4133 while 1:
4134 data = self.dumpfile.read(128*1024) # Chunk size is arbitrary
4135 if not len(data):
4136 break
4137 try:
4138 self.loader_pipe.stdin.write(data)
4139 except IOError:
4140 raise FatalError("svnadmin failed with the following output "
4141 "while loading the dumpfile:\n"
4142 + self.loader_pipe.stderr.read())
4144 def start_commit(self, svn_commit):
4145 """Start a new commit. If a commit is already in progress, close
4146 the dumpfile, load it into the svn repository, open a new
4147 dumpfile, and write the header into it."""
4148 if self._commit_in_progress:
4149 self._feed_pipe()
4150 self.dumpfile.seek(0)
4151 self.dumpfile.truncate()
4152 DumpfileDelegate.start_commit(self, svn_commit)
4153 self._commit_in_progress = 1
4155 def finish(self):
4156 """Loads the last commit into the repository."""
4157 self._feed_pipe()
4158 self.dumpfile.close()
4159 self.loader_pipe.stdin.close()
4160 error_output = self.loader_pipe.stderr.read()
4161 exit_status = self.loader_pipe.wait()
4162 if exit_status:
4163 raise FatalError('svnadmin load failed with exit status: %s\n'
4164 'and the following output:\n'
4165 '%s' % (exit_status, error_output,))
4166 os.remove(self.dumpfile_path)
4168 # If this is a BDB repository, and we created the repository, and
4169 # --bdb-no-sync wasn't passed, then comment out the DB_TXN_NOSYNC
4170 # line in the DB_CONFIG file, because txn syncing should be on by
4171 # default in BDB repositories.
4173 # We determine if this is a BDB repository by looking for the
4174 # DB_CONFIG file, which doesn't exist in FSFS, rather than by
4175 # checking Ctx().fs_type. That way this code will Do The Right
4176 # Thing in all circumstances.
4177 db_config = os.path.join(self.target, "db/DB_CONFIG")
4178 if (not Ctx().existing_svnrepos and not Ctx().bdb_txn_nosync
4179 and os.path.exists(db_config)):
4180 no_sync = 'set_flags DB_TXN_NOSYNC\n'
4182 contents = open(db_config, 'r').readlines()
4183 index = contents.index(no_sync)
4184 contents[index] = '# ' + no_sync
4185 contents = open(db_config, 'w').writelines(contents)
4188 class StdoutDelegate(SVNRepositoryMirrorDelegate):
4189 """Makes no changes to the disk, but writes out information to
4190 STDOUT about what the SVNRepositoryMirror is doing. Of course, our
4191 print statements will state that we're doing something, when in
4192 reality, we aren't doing anything other than printing out that we're
4193 doing something. Kind of zen, really."""
4194 def __init__(self, total_revs):
4195 self.total_revs = total_revs
4197 def start_commit(self, svn_commit):
4198 """Prints out the Subversion revision number of the commit that is
4199 being started."""
4200 Log().write(LOG_VERBOSE, "=" * 60)
4201 Log().write(LOG_NORMAL, "Starting Subversion r%d / %d" %
4202 (svn_commit.revnum, self.total_revs))
4204 def mkdir(self, path):
4205 """Print a line stating that we are creating directory PATH."""
4206 Log().write(LOG_VERBOSE, " New Directory", path)
4208 def add_path(self, s_item):
4209 """Print a line stating that we are 'adding' s_item.c_rev.svn_path."""
4210 Log().write(LOG_VERBOSE, " Adding", s_item.c_rev.svn_path)
4212 def change_path(self, s_item):
4213 """Print a line stating that we are 'changing' s_item.c_rev.svn_path."""
4214 Log().write(LOG_VERBOSE, " Changing", s_item.c_rev.svn_path)
4216 def delete_path(self, path):
4217 """Print a line stating that we are 'deleting' PATH."""
4218 Log().write(LOG_VERBOSE, " Deleting", path)
4220 def copy_path(self, src_path, dest_path, src_revnum):
4221 """Print a line stating that we are 'copying' revision SRC_REVNUM
4222 of SRC_PATH to DEST_PATH."""
4223 Log().write(LOG_VERBOSE, " Copying revision", src_revnum, "of", src_path)
4224 Log().write(LOG_VERBOSE, " to", dest_path)
4226 def finish(self):
4227 """State that we are done creating our repository."""
4228 Log().write(LOG_VERBOSE, "Finished creating Subversion repository.")
4229 Log().write(LOG_QUIET, "Done.")
4231 # This should be a local to pass1,
4232 # but Python 2.0 does not support nested scopes.
4233 OS_SEP_PLUS_ATTIC = os.sep + 'Attic'
4234 def pass1():
4235 Log().write(LOG_QUIET, "Examining all CVS ',v' files...")
4236 cd = CollectData()
4238 def visit_file(baton, dirname, files):
4239 cd = baton
4240 for fname in files:
4241 if fname[-2:] != ',v':
4242 continue
4243 cd.found_valid_file = 1
4244 pathname = os.path.join(dirname, fname)
4245 if dirname[-6:] == OS_SEP_PLUS_ATTIC:
4246 # drop the 'Attic' portion from the pathname for the canonical name.
4247 cd.set_fname(os.path.join(dirname[:-6], fname), pathname)
4248 else:
4249 # If this file also exists in the attic, it's a fatal error
4250 attic_path = os.path.join(dirname, 'Attic', fname)
4251 if os.path.exists(attic_path):
4252 err = "%s: A CVS repository cannot contain both %s and %s" \
4253 % (error_prefix, pathname, attic_path)
4254 sys.stderr.write(err + '\n')
4255 cd.fatal_errors.append(err)
4256 cd.set_fname(pathname, pathname)
4257 Log().write(LOG_NORMAL, pathname)
4258 try:
4259 cvs2svn_rcsparse.parse(open(pathname, 'rb'), cd)
4260 except (cvs2svn_rcsparse.common.RCSParseError, ValueError,
4261 RuntimeError):
4262 err = "%s: '%s' is not a valid ,v file" \
4263 % (error_prefix, pathname)
4264 sys.stderr.write(err + '\n')
4265 cd.fatal_errors.append(err)
4266 except:
4267 Log().write(LOG_WARN,
4268 "Exception occurred while parsing %s" % pathname)
4269 raise
4271 os.path.walk(Ctx().project.project_cvs_repos_path, visit_file, cd)
4272 Log().write(LOG_VERBOSE, 'Processed', cd.num_files, 'files')
4274 cd.write_symbol_db()
4276 if len(cd.fatal_errors) > 0:
4277 raise FatalException("Pass 1 complete.\n"
4278 + "=" * 75 + "\n"
4279 + "Error summary:\n"
4280 + "\n".join(cd.fatal_errors) + "\n"
4281 + "Exited due to fatal error(s).\n")
4283 if cd.found_valid_file is None:
4284 raise FatalException(
4285 "\n"
4286 "No RCS files found in your CVS Repository!\n"
4287 "Are you absolutely certain you are pointing cvs2svn\n"
4288 "at a CVS repository?\n"
4289 "\n"
4290 "Exited due to fatal error(s).\n")
4292 StatsKeeper().reset_c_rev_info()
4293 StatsKeeper().archive()
4294 Log().write(LOG_QUIET, "Done")
4296 def pass2():
4297 "Pass 2: clean up the revision information."
4299 symbol_db = SymbolDatabase()
4300 symbol_db.read()
4302 # Convert the list of regexps to a list of strings
4303 excludes = symbol_db.find_excluded_symbols(Ctx().excludes)
4305 error_detected = 0
4307 Log().write(LOG_QUIET, "Checking for blocked exclusions...")
4308 blocked_excludes = symbol_db.find_blocked_excludes(excludes)
4309 if blocked_excludes:
4310 for branch, blockers in blocked_excludes.items():
4311 sys.stderr.write(error_prefix + ": The branch '%s' cannot be "
4312 "excluded because the following symbols depend "
4313 "on it:\n" % (branch))
4314 for blocker in blockers:
4315 sys.stderr.write(" '%s'\n" % (blocker))
4316 sys.stderr.write("\n")
4317 error_detected = 1
4319 Log().write(LOG_QUIET, "Checking for forced tags with commits...")
4320 invalid_forced_tags = [ ]
4321 for forced_tag in Ctx().forced_tags:
4322 if excludes.has_key(forced_tag):
4323 continue
4324 if symbol_db.branch_has_commit(forced_tag):
4325 invalid_forced_tags.append(forced_tag)
4326 if invalid_forced_tags:
4327 sys.stderr.write(error_prefix + ": The following branches cannot be "
4328 "forced to be tags because they have commits:\n")
4329 for tag in invalid_forced_tags:
4330 sys.stderr.write(" '%s'\n" % (tag))
4331 sys.stderr.write("\n")
4332 error_detected = 1
4334 Log().write(LOG_QUIET, "Checking for tag/branch mismatches...")
4335 mismatches = symbol_db.find_mismatches(excludes)
4336 def is_not_forced(mismatch):
4337 name = mismatch[0]
4338 return not (name in Ctx().forced_tags or name in Ctx().forced_branches)
4339 mismatches = filter(is_not_forced, mismatches)
4340 if mismatches:
4341 sys.stderr.write(error_prefix + ": The following symbols are tags "
4342 "in some files and branches in others.\nUse "
4343 "--force-tag, --force-branch and/or --exclude to "
4344 "resolve the symbols.\n")
4345 for name, tag_count, branch_count, commit_count in mismatches:
4346 sys.stderr.write(" '%s' is a tag in %d files, a branch in "
4347 "%d files and has commits in %d files.\n"
4348 % (name, tag_count, branch_count, commit_count))
4349 error_detected = 1
4351 # Bail out now if we found errors
4352 if error_detected:
4353 sys.exit(1)
4355 # Create the tags database
4356 tags_db = TagsDatabase(DB_OPEN_NEW)
4357 for tag in symbol_db.tags.keys():
4358 if tag not in Ctx().forced_branches:
4359 tags_db[tag] = None
4360 for tag in Ctx().forced_tags:
4361 tags_db[tag] = None
4363 Log().write(LOG_QUIET, "Re-synchronizing CVS revision timestamps...")
4365 # We may have recorded some changes in revisions' timestamp. We need to
4366 # scan for any other files which may have had the same log message and
4367 # occurred at "the same time" and change their timestamps, too.
4369 # read the resync data file
4370 def read_resync(fname):
4371 "Read the .resync file into memory."
4373 ### note that we assume that we can hold the entire resync file in
4374 ### memory. really large repositories with whacky timestamps could
4375 ### bust this assumption. should that ever happen, then it is possible
4376 ### to split the resync file into pieces and make multiple passes,
4377 ### using each piece.
4380 # A digest maps to a sequence of lists which specify a lower and upper
4381 # time bound for matching up the commit. We keep a sequence of these
4382 # because a number of checkins with the same log message (e.g. an empty
4383 # log message) could need to be remapped. We also make them a list
4384 # because we will dynamically expand the lower/upper bound as we find
4385 # commits that fall into a particular msg and time range.
4387 # resync == digest -> [ [old_time_lower, old_time_upper, new_time], ... ]
4389 resync = { }
4391 for line in fileinput.FileInput(fname):
4392 t1 = int(line[:8], 16)
4393 digest = line[9:DIGEST_END_IDX]
4394 t2 = int(line[DIGEST_END_IDX+1:], 16)
4395 t1_l = t1 - COMMIT_THRESHOLD/2
4396 t1_u = t1 + COMMIT_THRESHOLD/2
4397 resync.setdefault(digest, []).append([t1_l, t1_u, t2])
4399 # For each digest, sort the resync items in it in increasing order,
4400 # based on the lower time bound.
4401 for val in resync.values():
4402 val.sort()
4404 return resync
4406 resync = read_resync(temp(DATAFILE + RESYNC_SUFFIX))
4408 output = open(temp(DATAFILE + CLEAN_REVS_SUFFIX), 'w')
4409 Cleanup().register(temp(DATAFILE + CLEAN_REVS_SUFFIX), pass3)
4411 tweaked_timestamps_db = Database(temp(TWEAKED_TIMESTAMPS_DB), DB_OPEN_NEW)
4412 Cleanup().register(temp(TWEAKED_TIMESTAMPS_DB), pass2)
4414 # process the revisions file, looking for items to clean up
4415 for line in fileinput.FileInput(temp(DATAFILE + REVS_SUFFIX)):
4416 c_rev = CVSRevision(Ctx(), line[:-1])
4418 # Skip this entire revision if it's on an excluded branch
4419 if excludes.has_key(c_rev.branch_name):
4420 continue
4422 new_prev_ts = None
4423 if c_rev.prev_rev is not None:
4424 new_prev_ts = tweaked_timestamps_db.get(
4425 c_rev.unique_key(c_rev.prev_rev), None)
4426 if new_prev_ts:
4427 c_rev.prev_timestamp = new_prev_ts
4429 new_next_ts = None
4430 if c_rev.next_rev is not None:
4431 new_next_ts = tweaked_timestamps_db.get(
4432 c_rev.unique_key(c_rev.next_rev), None)
4433 if new_next_ts:
4434 c_rev.next_timestamp = new_next_ts
4436 # Remove all references to excluded tags and branches
4437 def not_excluded(symbol, excludes=excludes):
4438 return not excludes.has_key(symbol)
4439 c_rev.branches = filter(not_excluded, c_rev.branches)
4440 c_rev.tags = filter(not_excluded, c_rev.tags)
4442 # Convert all branches that are forced to be tags
4443 for forced_tag in Ctx().forced_tags:
4444 if forced_tag in c_rev.branches:
4445 c_rev.branches.remove(forced_tag)
4446 c_rev.tags.append(forced_tag)
4448 # Convert all tags that are forced to be branches
4449 for forced_branch in Ctx().forced_branches:
4450 if forced_branch in c_rev.tags:
4451 c_rev.tags.remove(forced_branch)
4452 c_rev.branches.append(forced_branch)
4454 # see if this is "near" any of the resync records we
4455 # have recorded for this digest [of the log message].
4456 for record in resync.get(c_rev.digest, []):
4457 if record[2] == c_rev.timestamp:
4458 # This means that either c_rev is the same revision that
4459 # caused the resync record to exist, or c_rev is a different
4460 # CVS revision that happens to have the same timestamp. In
4461 # either case, we don't have to do anything, so we...
4462 continue
4464 if record[0] <= c_rev.timestamp <= record[1]:
4465 # bingo! We probably want to remap the time on this c_rev,
4466 # unless the remapping would be useless because the new time
4467 # would fall outside the COMMIT_THRESHOLD window for this
4468 # commit group.
4469 new_timestamp = record[2]
4470 # If the new timestamp is earlier than that of our previous revision
4471 if new_timestamp < c_rev.prev_timestamp:
4472 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4473 + " to time %s, which is before previous the time of"
4474 + " revision %s (%s):")
4475 Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4476 c_rev.cvs_path, new_timestamp,
4477 c_rev.prev_rev, c_rev.prev_timestamp))
4478 # If resyncing our rev to c_rev.prev_timestamp + 1 will place
4479 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4480 # attempted resync time, then sync back to c_rev.prev_timestamp
4481 # + 1...
4482 if ((c_rev.prev_timestamp + 1) - new_timestamp) < COMMIT_THRESHOLD:
4483 new_timestamp = c_rev.prev_timestamp + 1
4484 Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4485 new_timestamp))
4486 else:
4487 Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4488 warning_prefix)
4489 continue
4491 # If the new timestamp is later than that of our next revision
4492 elif c_rev.next_timestamp and new_timestamp > c_rev.next_timestamp:
4493 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4494 + " to time %s, which is after time of next"
4495 + " revision %s (%s):")
4496 Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4497 c_rev.cvs_path, new_timestamp,
4498 c_rev.prev_rev, c_rev.next_timestamp))
4499 # If resyncing our rev to c_rev.next_timestamp - 1 will place
4500 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4501 # attempted resync time, then sync forward to c_rev.next_timestamp
4502 # - 1...
4503 if (new_timestamp - (c_rev.next_timestamp - 1)) < COMMIT_THRESHOLD:
4504 new_timestamp = c_rev.next_timestamp - 1
4505 Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4506 new_timestamp))
4507 else:
4508 Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4509 warning_prefix)
4510 continue
4512 # Fix for Issue #71: Avoid resyncing two consecutive revisions
4513 # to the same timestamp.
4514 elif (new_timestamp == c_rev.prev_timestamp
4515 or new_timestamp == c_rev.next_timestamp):
4516 continue
4518 # adjust the time range. we want the COMMIT_THRESHOLD from the
4519 # bounds of the earlier/latest commit in this group.
4520 record[0] = min(record[0], c_rev.timestamp - COMMIT_THRESHOLD/2)
4521 record[1] = max(record[1], c_rev.timestamp + COMMIT_THRESHOLD/2)
4523 msg = "PASS2 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
4524 % (c_rev.cvs_path, c_rev.rev, time.ctime(c_rev.timestamp),
4525 new_timestamp - c_rev.timestamp)
4526 Log().write(LOG_VERBOSE, msg)
4528 c_rev.timestamp = new_timestamp
4529 tweaked_timestamps_db[c_rev.unique_key()] = new_timestamp
4531 # stop looking for hits
4532 break
4534 output.write(str(c_rev) + "\n")
4535 Log().write(LOG_QUIET, "Done")
4537 def pass3():
4538 Log().write(LOG_QUIET, "Sorting CVS revisions...")
4539 sort_file(temp(DATAFILE + CLEAN_REVS_SUFFIX),
4540 temp(DATAFILE + SORTED_REVS_SUFFIX))
4541 Cleanup().register(temp(DATAFILE + SORTED_REVS_SUFFIX), pass5)
4542 Log().write(LOG_QUIET, "Done")
4544 def pass4():
4545 """Iterate through sorted revs, storing them in a database.
4546 If we're not doing a trunk-only conversion, generate the
4547 LastSymbolicNameDatabase, which contains the last CVSRevision
4548 that is a source for each tag or branch.
4550 Log().write(LOG_QUIET,
4551 "Copying CVS revision data from flat file to database...")
4552 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_NEW)
4553 if not Ctx().trunk_only:
4554 Log().write(LOG_QUIET,
4555 "Finding last CVS revisions for all symbolic names...")
4556 last_sym_name_db = LastSymbolicNameDatabase(DB_OPEN_NEW)
4557 else:
4558 # This is to avoid testing Ctx().trunk_only every time around the loop
4559 class DummyLSNDB:
4560 def noop(*args): pass
4561 log_revision = noop
4562 create_database = noop
4563 last_sym_name_db = DummyLSNDB()
4565 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4566 c_rev = CVSRevision(Ctx(), line[:-1])
4567 cvs_revs_db.log_revision(c_rev)
4568 last_sym_name_db.log_revision(c_rev)
4569 StatsKeeper().record_c_rev(c_rev)
4571 last_sym_name_db.create_database()
4572 StatsKeeper().archive()
4573 Log().write(LOG_QUIET, "Done")
4575 def pass5():
4577 Generate the SVNCommit <-> CVSRevision mapping
4578 databases. CVSCommit._commit also calls SymbolingsLogger to register
4579 CVSRevisions that represent an opening or closing for a path on a
4580 branch or tag. See SymbolingsLogger for more details.
4582 Log().write(LOG_QUIET, "Mapping CVS revisions to Subversion commits...")
4584 aggregator = CVSRevisionAggregator()
4585 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4586 c_rev = CVSRevision(Ctx(), line[:-1])
4587 if not (Ctx().trunk_only and c_rev.branch_name is not None):
4588 aggregator.process_revision(c_rev)
4589 aggregator.flush()
4591 StatsKeeper().set_svn_rev_count(SVNCommit.revnum - 1)
4592 StatsKeeper().archive()
4593 Log().write(LOG_QUIET, "Done")
4595 def pass6():
4596 Log().write(LOG_QUIET, "Sorting symbolic name source revisions...")
4598 if not Ctx().trunk_only:
4599 sort_file(temp(SYMBOL_OPENINGS_CLOSINGS),
4600 temp(SYMBOL_OPENINGS_CLOSINGS_SORTED))
4601 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), pass8)
4602 Log().write(LOG_QUIET, "Done")
4604 def pass7():
4605 Log().write(LOG_QUIET, "Determining offsets for all symbolic names...")
4607 def generate_offsets_for_symbolings():
4608 """This function iterates through all the lines in
4609 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
4610 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
4611 where SYMBOLIC_NAME is first encountered. This will allow us to
4612 seek to the various offsets in the file and sequentially read only
4613 the openings and closings that we need."""
4615 ###PERF This is a fine example of a db that can be in-memory and
4616 #just flushed to disk when we're done. Later, it can just be sucked
4617 #back into memory.
4618 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_NEW)
4619 Cleanup().register(temp(SYMBOL_OFFSETS_DB), pass8)
4621 file = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
4622 old_sym = ""
4623 while 1:
4624 fpos = file.tell()
4625 line = file.readline()
4626 if not line:
4627 break
4628 sym, svn_revnum, cvs_rev_key = line.split(" ", 2)
4629 if sym != old_sym:
4630 Log().write(LOG_VERBOSE, " ", sym)
4631 old_sym = sym
4632 offsets_db[sym] = fpos
4634 if not Ctx().trunk_only:
4635 generate_offsets_for_symbolings()
4636 Log().write(LOG_QUIET, "Done.")
4638 def pass8():
4639 svncounter = 2 # Repository initialization is 1.
4640 repos = SVNRepositoryMirror()
4641 persistence_manager = PersistenceManager(DB_OPEN_READ)
4643 if Ctx().target:
4644 if not Ctx().dry_run:
4645 repos.add_delegate(RepositoryDelegate())
4646 Log().write(LOG_QUIET, "Starting Subversion Repository.")
4647 else:
4648 if not Ctx().dry_run:
4649 repos.add_delegate(DumpfileDelegate())
4650 Log().write(LOG_QUIET, "Starting Subversion Dumpfile.")
4652 repos.add_delegate(StdoutDelegate(StatsKeeper().svn_rev_count()))
4654 while 1:
4655 svn_commit = persistence_manager.get_svn_commit(svncounter)
4656 if not svn_commit:
4657 break
4658 repos.commit(svn_commit)
4659 svncounter += 1
4661 repos.finish()
4663 _passes = [
4664 pass1,
4665 pass2,
4666 pass3,
4667 pass4,
4668 pass5,
4669 pass6,
4670 pass7,
4671 pass8,
4675 class Ctx:
4676 """Session state for this run of cvs2svn. For example, run-time
4677 options are stored here. This class is a Borg, see
4678 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531.
4680 __shared_state = { }
4681 def __init__(self):
4682 self.__dict__ = self.__shared_state
4683 if self.__dict__:
4684 return
4685 # Else, initialize to defaults.
4686 self.target = None
4687 self.dumpfile = DUMPFILE
4688 self.tmpdir = '.'
4689 self.verbose = 0
4690 self.quiet = 0
4691 self.prune = 1
4692 self.existing_svnrepos = 0
4693 self.dump_only = 0
4694 self.dry_run = 0
4695 self.trunk_only = 0
4696 self.trunk_base = "trunk"
4697 self.tags_base = "tags"
4698 self.branches_base = "branches"
4699 self.encoding = ["ascii"]
4700 self.mime_types_file = None
4701 self.no_default_eol = 0
4702 self.eol_from_mime_type = 0
4703 self.keywords_off = 0
4704 self.use_cvs = None
4705 self.svnadmin = "svnadmin"
4706 self.username = None
4707 self.print_help = 0
4708 self.skip_cleanup = 0
4709 self.bdb_txn_nosync = 0
4710 self.fs_type = None
4711 self.forced_branches = []
4712 self.forced_tags = []
4713 self.excludes = []
4714 self.symbol_transforms = []
4715 self.svn_property_setters = []
4718 class CVSRevisionNumberSetter(SVNPropertySetter):
4719 """Set the cvs2svn:cvs-rev property to the CVS revision number."""
4721 def set_properties(self, s_item):
4722 s_item.svn_props['cvs2svn:cvs-rev'] = s_item.c_rev.rev
4723 s_item.svn_props_changed = True
4726 class MimeMapper(SVNPropertySetter):
4727 """A class that provides mappings from file names to MIME types."""
4729 def __init__(self, mime_types_file):
4730 self.mappings = { }
4732 for line in fileinput.input(mime_types_file):
4733 if line.startswith("#"):
4734 continue
4736 # format of a line is something like
4737 # text/plain c h cpp
4738 extensions = line.split()
4739 if len(extensions) < 2:
4740 continue
4741 type = extensions.pop(0)
4742 for ext in extensions:
4743 if self.mappings.has_key(ext) and self.mappings[ext] != type:
4744 sys.stderr.write("%s: ambiguous MIME mapping for *.%s (%s or %s)\n"
4745 % (warning_prefix, ext, self.mappings[ext], type))
4746 self.mappings[ext] = type
4748 def set_properties(self, s_item):
4749 basename, extension = os.path.splitext(
4750 os.path.basename(s_item.c_rev.cvs_path)
4753 # Extension includes the dot, so strip it (will leave extension
4754 # empty if filename ends with a dot, which is ok):
4755 extension = extension[1:]
4757 # If there is no extension (or the file ends with a period), use
4758 # the base name for mapping. This allows us to set mappings for
4759 # files such as README or Makefile:
4760 if not extension:
4761 extension = basename
4763 mime_type = self.mappings.get(extension, None)
4764 if mime_type is not None:
4765 s_item.svn_props['svn:mime-type'] = mime_type
4768 class BinaryFileDefaultMimeTypeSetter(SVNPropertySetter):
4769 """Set the default mime type for binary files, if no other one is known."""
4771 def set_properties(self, s_item):
4772 if not s_item.svn_props.has_key('svn:mime-type') \
4773 and s_item.c_rev.mode == 'b':
4774 s_item.svn_props['svn:mime-type'] = 'application/octet-stream'
4777 class BinaryFileEOLStyleSetter(SVNPropertySetter):
4778 """Set the eol-style for binary files to None."""
4780 def set_properties(self, s_item):
4781 if s_item.c_rev.mode == 'b':
4782 s_item.svn_props['svn:eol-style'] = None
4785 class EOLStyleFromMimeTypeSetter(SVNPropertySetter):
4786 """Set the eol-style from the mime type if it is not already known.
4788 This setting is influenced by the mime-type setting, which must
4789 already have been set. See also issue #39."""
4791 def set_properties(self, s_item):
4792 if not s_item.svn_props.has_key('svn:eol-style') \
4793 and s_item.svn_props.get('svn:mime-type', None) is not None:
4794 if s_item.svn_props['svn:mime-type'].startswith("text/"):
4795 s_item.svn_props['svn:eol-style'] = 'native'
4796 else:
4797 s_item.svn_props['svn:eol-style'] = None
4800 class DefaultEOLStyleSetter(SVNPropertySetter):
4801 """Set the default eol-style if one has not already been set."""
4803 def __init__(self, value):
4804 """Initialize with the specified default VALUE."""
4806 self.value = value
4808 def set_properties(self, s_item):
4809 if not s_item.svn_props.has_key('svn:eol-style'):
4810 s_item.svn_props['svn:eol-style'] = self.value
4813 class KeywordsPropertySetter(SVNPropertySetter):
4814 """Set the svn:keywords property based on the file's mode. See
4815 issue #2."""
4817 def __init__(self, value):
4818 """Use VALUE for the value of the svn:keywords property if it is
4819 to be set."""
4821 self.value = value
4823 def set_properties(self, s_item):
4824 if not s_item.svn_props.has_key('svn:keywords') \
4825 and s_item.c_rev.mode in [None, 'kv', 'kvl']:
4826 s_item.svn_props['svn:keywords'] = self.value
4829 class ExecutablePropertySetter(SVNPropertySetter):
4830 """Set the svn:executable property based on c_rev.file_executable."""
4832 def set_properties(self, s_item):
4833 if s_item.c_rev.file_executable:
4834 s_item.svn_props['svn:executable'] = '*'
4837 def convert(start_pass, end_pass):
4838 "Convert a CVS repository to an SVN repository."
4840 cleanup = Cleanup()
4841 times = [ None ] * (end_pass + 1)
4842 times[start_pass - 1] = time.time()
4843 StatsKeeper().set_start_time(time.time())
4844 for i in range(start_pass - 1, end_pass):
4845 Log().write(LOG_QUIET, '----- pass %d -----' % (i + 1))
4846 _passes[i]()
4847 times[i + 1] = time.time()
4848 StatsKeeper().log_duration_for_pass(times[i + 1] - times[i], i + 1)
4849 # Dispose of items in Ctx() not intended to live past the end of the pass
4850 # (Identified by exactly one leading underscore)
4851 for attr in dir(Ctx()):
4852 if (len(attr) > 2 and attr[0] == '_' and attr[1] != '_'
4853 and attr[:6] != "_Ctx__"):
4854 delattr(Ctx(), attr)
4855 if not Ctx().skip_cleanup:
4856 cleanup.cleanup(_passes[i])
4857 StatsKeeper().set_end_time(time.time())
4859 Log().write(LOG_QUIET, StatsKeeper())
4860 if end_pass < 4:
4861 Log().write(LOG_QUIET,
4862 '(These are unaltered CVS repository stats and do not\n'
4863 ' reflect tags or branches excluded via --exclude)\n')
4864 Log().write(LOG_NORMAL, StatsKeeper().timings())
4867 def normalize_ttb_path(opt, path):
4868 """Normalize a path to be used for --trunk, --tags, or --branches.
4870 1. Strip leading, trailing, and duplicated '/'.
4871 2. Verify that the path is not empty.
4873 Return the normalized path.
4875 If the path is invalid, write an error message and exit."""
4877 norm_path = _path_join(*path.split('/'))
4878 if not norm_path:
4879 raise FatalError("cannot pass an empty path to %s." % (opt,))
4880 return norm_path
4883 def verify_paths_disjoint(*paths):
4884 """Verify that all of the paths in the argument list are disjoint.
4886 If any of the paths is nested in another one (i.e., in the sense
4887 that 'a/b/c/d' is nested in 'a/b'), or any two paths are identical,
4888 write an error message and exit."""
4890 paths = [(path.split('/'), path) for path in paths]
4891 # If all overlapping elements are equal, a shorter list is
4892 # considered "less than" a longer one. Therefore if any paths are
4893 # nested, this sort will leave at least one such pair adjacent, in
4894 # the order [nest,nestling].
4895 paths.sort()
4896 for i in range(1, len(paths)):
4897 split_path1, path1 = paths[i - 1]
4898 split_path2, path2 = paths[i]
4899 if len(split_path1) <= len(split_path2) \
4900 and split_path2[:len(split_path1)] == split_path1:
4901 raise FatalError("paths %s and %s are not disjoint." % (path1, path2,))
4904 def usage():
4905 print 'USAGE: %s [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
4906 % os.path.basename(sys.argv[0])
4907 print ' --help, -h print this usage message and exit with success'
4908 print ' --version print the version number'
4909 print ' -q quiet'
4910 print ' -v verbose'
4911 print ' -s PATH path for SVN repos'
4912 print ' -p START[:END] start at pass START, end at pass END of %d' \
4913 % len(_passes)
4914 print ' If only START is given, run only pass START'
4915 print ' (implicitly enables --skip-cleanup)'
4916 print ' --existing-svnrepos load into existing SVN repository'
4917 print ' --dumpfile=PATH name of intermediate svn dumpfile'
4918 print ' --tmpdir=PATH directory to use for tmp data (default to cwd)'
4919 print ' --profile profile with \'hotshot\' (into file cvs2svn.hotshot)'
4920 print ' --dry-run do not create a repository or a dumpfile;'
4921 print ' just print what would happen.'
4922 print ' --use-cvs use CVS instead of RCS \'co\' to extract data'
4923 print ' (only use this if having problems with RCS)'
4924 print ' --svnadmin=PATH path to the svnadmin program'
4925 print ' --trunk-only convert only trunk commits, not tags nor branches'
4926 print ' --trunk=PATH path for trunk (default: %s)' \
4927 % Ctx().trunk_base
4928 print ' --branches=PATH path for branches (default: %s)' \
4929 % Ctx().branches_base
4930 print ' --tags=PATH path for tags (default: %s)' \
4931 % Ctx().tags_base
4932 print ' --no-prune don\'t prune empty directories'
4933 print ' --dump-only just produce a dumpfile, don\'t commit to a repos'
4934 print ' --encoding=ENC encoding of paths and log messages in CVS repos'
4935 print ' Multiple of these options may be passed, where they'
4936 print ' will be treated as an ordered list of encodings to'
4937 print ' attempt (with "ascii" as a hardcoded last resort)'
4938 print ' --force-branch=NAME force NAME to be a branch'
4939 print ' --force-tag=NAME force NAME to be a tag'
4940 print ' --exclude=REGEXP exclude branches and tags matching REGEXP'
4941 print ' --symbol-transform=P:S transform symbol names from P to S where P and S'
4942 print ' use Python regexp and reference syntax respectively'
4943 print ' --username=NAME username for cvs2svn-synthesized commits'
4944 print ' --skip-cleanup prevent the deletion of intermediate files'
4945 print ' --bdb-txn-nosync pass --bdb-txn-nosync to "svnadmin create"'
4946 print ' --fs-type=TYPE pass --fs-type=TYPE to "svnadmin create"'
4947 print ' --cvs-revnums record CVS revision numbers as file properties'
4948 print ' --mime-types=FILE specify an apache-style mime.types file for'
4949 print ' setting svn:mime-type'
4950 print ' --eol-from-mime-type set svn:eol-style from mime type if known'
4951 print ' --no-default-eol don\'t set svn:eol-style to \'native\' for'
4952 print ' non-binary files with undetermined mime types'
4953 print ' --keywords-off don\'t set svn:keywords on any files (by default,'
4954 print ' cvs2svn sets svn:keywords on non-binary files to'
4955 print ' "%s")' % SVN_KEYWORDS_VALUE
4957 def main():
4958 # Convenience var, so we don't have to keep instantiating this Borg.
4959 ctx = Ctx()
4961 profiling = None
4962 start_pass = 1
4963 end_pass = len(_passes)
4965 try:
4966 opts, args = getopt.getopt(sys.argv[1:], 'p:s:qvh',
4967 [ "help", "create", "trunk=",
4968 "username=", "existing-svnrepos",
4969 "branches=", "tags=", "encoding=",
4970 "force-branch=", "force-tag=", "exclude=",
4971 "use-cvs", "mime-types=",
4972 "eol-from-mime-type", "no-default-eol",
4973 "trunk-only", "no-prune", "dry-run",
4974 "dump-only", "dumpfile=", "tmpdir=",
4975 "svnadmin=", "skip-cleanup", "cvs-revnums",
4976 "bdb-txn-nosync", "fs-type=",
4977 "version", "profile",
4978 "keywords-off", "symbol-transform="])
4979 except getopt.GetoptError, e:
4980 sys.stderr.write(error_prefix + ': ' + str(e) + '\n\n')
4981 usage()
4982 sys.exit(1)
4984 for opt, value in opts:
4985 if opt == '--version':
4986 print '%s version %s' % (os.path.basename(sys.argv[0]), VERSION)
4987 sys.exit(0)
4988 elif opt == '-p':
4989 # Don't cleanup if we're doing incrementals.
4990 ctx.skip_cleanup = 1
4991 if value.find(':') > 0:
4992 start_pass, end_pass = map(int, value.split(':'))
4993 else:
4994 end_pass = start_pass = int(value)
4995 if start_pass > len(_passes) or start_pass < 1:
4996 raise FatalError(
4997 'illegal value (%d) for starting pass. Must be 1 through %d.'
4998 % (int(start_pass), len(_passes),))
4999 if end_pass < start_pass or end_pass > len(_passes):
5000 raise FatalError(
5001 'illegal value (%d) for ending pass. Must be %d through %d.'
5002 % (int(end_pass), int(start_pass), len(_passes),))
5003 elif (opt == '--help') or (opt == '-h'):
5004 ctx.print_help = 1
5005 elif opt == '-v':
5006 Log().log_level = LOG_VERBOSE
5007 ctx.verbose = 1
5008 elif opt == '-q':
5009 Log().log_level = LOG_QUIET
5010 ctx.quiet = 1
5011 elif opt == '-s':
5012 ctx.target = value
5013 elif opt == '--existing-svnrepos':
5014 ctx.existing_svnrepos = 1
5015 elif opt == '--dumpfile':
5016 ctx.dumpfile = value
5017 elif opt == '--tmpdir':
5018 ctx.tmpdir = value
5019 elif opt == '--use-cvs':
5020 ctx.use_cvs = 1
5021 elif opt == '--svnadmin':
5022 ctx.svnadmin = value
5023 elif opt == '--trunk-only':
5024 ctx.trunk_only = 1
5025 elif opt == '--trunk':
5026 ctx.trunk_base = normalize_ttb_path(opt, value)
5027 elif opt == '--branches':
5028 ctx.branches_base = normalize_ttb_path(opt, value)
5029 elif opt == '--tags':
5030 ctx.tags_base = normalize_ttb_path(opt, value)
5031 elif opt == '--no-prune':
5032 ctx.prune = None
5033 elif opt == '--dump-only':
5034 ctx.dump_only = 1
5035 elif opt == '--dry-run':
5036 ctx.dry_run = 1
5037 elif opt == '--encoding':
5038 ctx.encoding.insert(-1, value)
5039 elif opt == '--force-branch':
5040 ctx.forced_branches.append(value)
5041 elif opt == '--force-tag':
5042 ctx.forced_tags.append(value)
5043 elif opt == '--exclude':
5044 try:
5045 ctx.excludes.append(re.compile('^' + value + '$'))
5046 except re.error, e:
5047 raise FatalError("'%s' is not a valid regexp." % (value,))
5048 elif opt == '--mime-types':
5049 ctx.mime_types_file = value
5050 elif opt == '--eol-from-mime-type':
5051 ctx.eol_from_mime_type = 1
5052 elif opt == '--no-default-eol':
5053 ctx.no_default_eol = 1
5054 elif opt == '--keywords-off':
5055 ctx.keywords_off = 1
5056 elif opt == '--username':
5057 ctx.username = value
5058 elif opt == '--skip-cleanup':
5059 ctx.skip_cleanup = 1
5060 elif opt == '--cvs-revnums':
5061 ctx.svn_property_setters.append(CVSRevisionNumberSetter())
5062 elif opt == '--bdb-txn-nosync':
5063 ctx.bdb_txn_nosync = 1
5064 elif opt == '--fs-type':
5065 ctx.fs_type = value
5066 elif opt == '--create':
5067 sys.stderr.write(warning_prefix +
5068 ': The behaviour produced by the --create option is now the '
5069 'default,\nand passing the option is deprecated.\n')
5070 elif opt == '--profile':
5071 profiling = 1
5072 elif opt == '--symbol-transform':
5073 [pattern, replacement] = value.split(":")
5074 try:
5075 pattern = re.compile(pattern)
5076 except re.error, e:
5077 raise FatalError("'%s' is not a valid regexp." % (pattern,))
5078 ctx.symbol_transforms.append((pattern, replacement,))
5080 if ctx.print_help:
5081 usage()
5082 sys.exit(0)
5084 # Consistency check for options and arguments.
5085 if len(args) == 0:
5086 usage()
5087 sys.exit(1)
5089 if len(args) > 1:
5090 sys.stderr.write(error_prefix +
5091 ": must pass only one CVS repository.\n")
5092 usage()
5093 sys.exit(1)
5095 cvsroot = args[0]
5097 if ctx.use_cvs:
5098 ctx.cvs_repository = CVSRepositoryViaCVS(cvsroot)
5099 else:
5100 ctx.cvs_repository = CVSRepositoryViaRCS(cvsroot)
5102 if (not ctx.target) and (not ctx.dump_only) and (not ctx.dry_run):
5103 raise FatalError("must pass one of '-s' or '--dump-only'.")
5105 def not_both(opt1val, opt1name, opt2val, opt2name):
5106 if opt1val and opt2val:
5107 raise FatalError("cannot pass both '%s' and '%s'."
5108 % (opt1name, opt2name,))
5110 not_both(ctx.target, '-s',
5111 ctx.dump_only, '--dump-only')
5113 not_both(ctx.dump_only, '--dump-only',
5114 ctx.existing_svnrepos, '--existing-svnrepos')
5116 not_both(ctx.bdb_txn_nosync, '--bdb-txn-nosync',
5117 ctx.existing_svnrepos, '--existing-svnrepos')
5119 not_both(ctx.dump_only, '--dump-only',
5120 ctx.bdb_txn_nosync, '--bdb-txn-nosync')
5122 not_both(ctx.quiet, '-q',
5123 ctx.verbose, '-v')
5125 not_both(ctx.fs_type, '--fs-type',
5126 ctx.existing_svnrepos, '--existing-svnrepos')
5128 if ctx.fs_type and ctx.fs_type != 'bdb' and ctx.bdb_txn_nosync:
5129 raise FatalError("cannot pass --bdb-txn-nosync with --fs-type=%s."
5130 % ctx.fs_type)
5132 # Create the default project (using ctx.trunk, ctx.branches, and ctx.tags):
5133 ctx.project = Project(ctx.cvs_repository.cvs_repos_path,
5134 ctx.trunk_base, ctx.branches_base, ctx.tags_base)
5136 if ctx.existing_svnrepos and not os.path.isdir(ctx.target):
5137 raise FatalError("the svn-repos-path '%s' is not an "
5138 "existing directory." % ctx.target)
5140 if not ctx.dump_only and not ctx.existing_svnrepos \
5141 and (not ctx.dry_run) and os.path.exists(ctx.target):
5142 raise FatalError("the svn-repos-path '%s' exists.\n"
5143 "Remove it, or pass '--existing-svnrepos'."
5144 % ctx.target)
5146 if ctx.target and not ctx.dry_run:
5147 # Verify that svnadmin can be executed. The 'help' subcommand
5148 # should be harmless.
5149 try:
5150 check_command_runs([ctx.svnadmin, 'help'], 'svnadmin')
5151 except CommandFailedException, e:
5152 raise FatalError(
5153 '%s\n'
5154 'svnadmin could not be executed. Please ensure that it is\n'
5155 'installed and/or use the --svnadmin option.' % (e,))
5157 if ctx.mime_types_file:
5158 ctx.svn_property_setters.append(MimeMapper(ctx.mime_types_file))
5160 ctx.svn_property_setters.append(BinaryFileDefaultMimeTypeSetter())
5161 ctx.svn_property_setters.append(BinaryFileEOLStyleSetter())
5163 if ctx.eol_from_mime_type:
5164 ctx.svn_property_setters.append(EOLStyleFromMimeTypeSetter())
5166 if ctx.no_default_eol:
5167 ctx.svn_property_setters.append(DefaultEOLStyleSetter(None))
5168 else:
5169 ctx.svn_property_setters.append(DefaultEOLStyleSetter('native'))
5171 if not ctx.keywords_off:
5172 ctx.svn_property_setters.append(
5173 KeywordsPropertySetter(SVN_KEYWORDS_VALUE))
5175 ctx.svn_property_setters.append(ExecutablePropertySetter())
5177 # Make sure the tmp directory exists. Note that we don't check if
5178 # it's empty -- we want to be able to use, for example, "." to hold
5179 # tempfiles. But if we *did* want check if it were empty, we'd do
5180 # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
5181 if not os.path.exists(ctx.tmpdir):
5182 os.mkdir(ctx.tmpdir)
5183 elif not os.path.isdir(ctx.tmpdir):
5184 raise FatalError(
5185 "cvs2svn tried to use '%s' for temporary files, but that path\n"
5186 " exists and is not a directory. Please make it be a directory,\n"
5187 " or specify some other directory for temporary files."
5188 % (ctx.tmpdir,))
5190 # But do lock the tmpdir, to avoid process clash.
5191 try:
5192 os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
5193 except OSError, e:
5194 if e.errno == errno.EACCES:
5195 raise FatalError("Permission denied:"
5196 + " No write access to directory '%s'." % ctx.tmpdir)
5197 if e.errno == errno.EEXIST:
5198 raise FatalError(
5199 "cvs2svn is using directory '%s' for temporary files, but\n"
5200 " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
5201 " cvs2svn process is currently using '%s' as its temporary\n"
5202 " workspace. If you are certain that is not the case,\n"
5203 " then remove the '%s/cvs2svn.lock' subdirectory."
5204 % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir,))
5205 raise
5206 try:
5207 if profiling:
5208 import hotshot
5209 prof = hotshot.Profile('cvs2svn.hotshot')
5210 prof.runcall(convert, start_pass, end_pass)
5211 prof.close()
5212 else:
5213 convert(start_pass, end_pass)
5214 finally:
5215 try: os.rmdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
5216 except: pass
5219 if __name__ == '__main__':
5220 try:
5221 main()
5222 except FatalException, e:
5223 sys.stderr.write(str(e))
5224 sys.exit(1)