* cvs2svn (SVNRepository.__init__): Normalize SVNRepository.cvs_repos_path.
[cvs2svn.git] / cvs2svn
blob23040d9d038265fd69fd469e1c651edf8db1b7b1
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # cvs2svn: ...
6 # ====================================================================
7 # Copyright (c) 2000-2004 CollabNet. All rights reserved.
9 # This software is licensed as described in the file COPYING, which
10 # you should have received as part of this distribution. The terms
11 # are also available at http://subversion.tigris.org/license-1.html.
12 # If newer versions of this license are posted there, you may use a
13 # newer version instead, at your option.
15 # This software consists of voluntary contributions made by many
16 # individuals. For exact contribution history, see the revision
17 # history and logs, available at http://cvs2svn.tigris.org/.
18 # ====================================================================
20 VERSION = 'r' + "$LastChangedRevision$"[22:-2]
22 import cvs2svn_rcsparse
23 import os
24 import sys
25 import sha
26 import re
27 import time
28 import fileinput
29 import string
30 import getopt
31 import stat
32 import md5
33 import marshal
34 import errno
35 import popen2
36 import types
37 try:
38 # Try to get access to a bunch of encodings for use with --encoding.
39 # See http://cjkpython.i18n.org/ for details.
40 import iconv_codec
41 except ImportError:
42 pass
44 # Warnings and errors start with these strings. They are typically
45 # followed by a colon and a space, as in "%s: " ==> "WARNING: ".
46 warning_prefix = "WARNING"
47 error_prefix = "ERROR"
49 # Make sure this Python is recent enough.
50 if sys.hexversion < 0x2000000:
51 sys.stderr.write("'%s: Python 2.0 or higher required, "
52 "see www.python.org.\n" % error_prefix)
53 sys.exit(1)
55 # Pretend we have true booleans on older python versions
56 try:
57 True
58 except:
59 True = 1
60 False = 0
62 # Opening pipes was a mess before Python 2.4, because some methods did
63 # not exist on some platforms, and some behaved differenly on other.
64 # Python 2.4 solved this by adding the subprocess module, but since we
65 # cannot require such a new version, we cannot use it directly, but
66 # must implement a simplified Popen using the best means neccessary.
68 # The SimplePopen class only has the following members and methods, all
69 # behaving as documented in the subprocess.Popen class:
70 # - stdin
71 # - stdout
72 # - stderr
73 # - wait
74 try:
75 # First try subprocess.Popen...
76 import subprocess
77 class SimplePopen:
78 def __init__(self, cmd, capture_stderr):
79 if capture_stderr:
80 stderr = subprocess.PIPE
81 else:
82 stderr = None
83 self._popen = subprocess.Popen(cmd, stdin=subprocess.PIPE,
84 stdout=subprocess.PIPE, stderr=stderr)
85 self.stdin = self._popen.stdin
86 self.stdout = self._popen.stdout
87 if capture_stderr:
88 self.stderr = self._popen.stderr
89 self.wait = self._popen.wait
90 except ImportError:
91 if hasattr(popen2, 'Popen3'):
92 # ...then try popen2.Popen3...
93 class SimplePopen:
94 def __init__(self, cmd, capture_stderr):
95 self._popen3 = popen2.Popen3(cmd, capture_stderr)
96 self.stdin = self._popen3.tochild
97 self.stdout = self._popen3.fromchild
98 if capture_stderr:
99 self.stderr = self._popen3.childerr
100 self.wait = self._popen3.wait
101 else:
102 # ...and if all fails, use popen2.popen3...
103 class SimplePopen:
104 def __init__(self, cmd, capture_stderr):
105 if type(cmd) != types.StringType:
106 cmd = argv_to_command_string(cmd)
107 self.stdout, self.stdin, self.stderr = popen2.popen3(cmd, mode='b')
108 def wait(self):
109 return self.stdout.close() or self.stdin.close() or \
110 self.stderr.close()
112 # DBM module selection
114 # 1. If we have bsddb3, it is probably newer than bsddb. Fake bsddb = bsddb3,
115 # so that the dbhash module used by anydbm will use bsddb3.
116 try:
117 import bsddb3
118 sys.modules['bsddb'] = sys.modules['bsddb3']
119 except ImportError:
120 pass
122 # 2. These DBM modules are not good for cvs2svn.
123 import anydbm
124 if (anydbm._defaultmod.__name__ == 'dumbdbm'
125 or anydbm._defaultmod.__name__ == 'dbm'):
126 sys.stderr.write(
127 error_prefix
128 + ': your installation of Python does not contain a suitable\n'
129 + 'DBM module -- cvs2svn cannot continue.\n'
130 + 'See http://python.org/doc/current/lib/module-anydbm.html to solve.\n')
131 sys.exit(1)
133 # 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
134 # Unfortunately, gdbm appears not to be trouble free, either.
135 if hasattr(anydbm._defaultmod, 'bsddb') \
136 and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
137 try:
138 gdbm = __import__('gdbm')
139 except ImportError:
140 sys.stderr.write(warning_prefix +
141 ': The version of the bsddb module found '
142 'on your computer has been reported to malfunction on some datasets, '
143 'causing KeyError exceptions. You may wish to upgrade your Python to '
144 'version 2.3 or later.\n')
145 else:
146 anydbm._defaultmod = gdbm
148 trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
149 branch_tag = re.compile('^[0-9.]+\\.0\\.[0-9]+$')
150 vendor_tag = re.compile('^[0-9]+\\.[0-9]+\\.[0-9]+$')
152 SVN_KEYWORDS_VALUE = 'Author Date Id Revision'
154 # This really only matches standard '1.1.1.*'-style vendor revisions.
155 # One could conceivably have a file whose default branch is 1.1.3 or
156 # whatever, or was that at some point in time, with vendor revisions
157 # 1.1.3.1, 1.1.3.2, etc. But with the default branch gone now (which
158 # is the only time this regexp gets used), we'd have no basis for
159 # assuming that the non-standard vendor branch had ever been the
160 # default branch anyway, so we don't want this to match them anyway.
161 vendor_revision = re.compile('^(1\\.1\\.1)\\.([0-9])+$')
163 # If this run's output is a repository, then (in the tmpdir) we use
164 # a dumpfile of this name for repository loads.
166 # If this run's output is a dumpfile, then this is default name of
167 # that dumpfile, but in the current directory (unless the user has
168 # specified a dumpfile path, of course, in which case it will be
169 # wherever the user said).
170 DUMPFILE = 'cvs2svn-dump'
172 # This file appears with different suffixes at different stages of
173 # processing. CVS revisions are cleaned and sorted here, for commit
174 # grouping. See design-notes.txt for details.
175 DATAFILE = 'cvs2svn-data'
177 # This file contains a marshalled copy of all the statistics that we
178 # gather throughout the various runs of cvs2svn. The data stored as a
179 # marshalled dictionary.
180 STATISTICS_FILE = 'cvs2svn-statistics'
182 # This text file contains records (1 per line) that describe svn
183 # filesystem paths that are the opening and closing source revisions
184 # for copies to tags and branches. The format is as follows:
186 # SYMBOL_NAME SVN_REVNUM TYPE SVN_PATH
188 # Where type is either OPENING or CLOSING. The SYMBOL_NAME and
189 # SVN_REVNUM are the primary and secondary sorting criteria for
190 # creating SYMBOL_OPENINGS_CLOSINGS_SORTED.
191 SYMBOL_OPENINGS_CLOSINGS = 'cvs2svn-symbolic-names.txt'
192 # A sorted version of the above file.
193 SYMBOL_OPENINGS_CLOSINGS_SORTED = 'cvs2svn-symbolic-names-s.txt'
195 # This file is a temporary file for storing symbolic_name -> closing
196 # CVSRevision until the end of our pass where we can look up the
197 # corresponding SVNRevNum for the closing revs and write these out to
198 # the SYMBOL_OPENINGS_CLOSINGS.
199 SYMBOL_CLOSINGS_TMP = 'cvs2svn-symbolic-names-closings-tmp.txt'
201 # Skeleton version of an svn filesystem.
202 # (These supersede and will eventually replace the two above.)
203 # See class SVNRepositoryMirror for how these work.
204 SVN_MIRROR_REVISIONS_DB = 'cvs2svn-svn-revisions.db'
205 SVN_MIRROR_NODES_DB = 'cvs2svn-svn-nodes.db'
207 # Offsets pointing to the beginning of each SYMBOLIC_NAME in
208 # SYMBOL_OPENINGS_CLOSINGS_SORTED
209 SYMBOL_OFFSETS_DB = 'cvs2svn-symbolic-name-offsets.db'
211 # Maps CVSRevision.unique_key()s to lists of symbolic names, where
212 # the CVSRevision is the last such that is a source for those symbolic
213 # names. For example, if branch B's number is 1.3.0.2 in this CVS
214 # file, and this file's 1.3 is the latest (by date) revision among
215 # *all* CVS files that is a source for branch B, then the
216 # CVSRevision.unique_key() corresponding to this file at 1.3 would
217 # list at least B in its list.
218 SYMBOL_LAST_CVS_REVS_DB = 'cvs2svn-symbol-last-cvs-revs.db'
220 # Maps CVSRevision.unique_key() to corresponding line in s-revs.
221 ###PERF Or, we could map to an offset into s-revs, instead of dup'ing
222 ### the s-revs data in this database.
223 CVS_REVS_DB = 'cvs2svn-cvs-revs.db'
225 # Lists all symbolic names that are tags. Keys are strings (symbolic
226 # names), values are ignorable.
227 TAGS_DB = 'cvs2svn-tags.db'
229 # A list all tags. Each line consists of the tag name and the number
230 # of files in which it exists, separated by a space.
231 TAGS_LIST = 'cvs2svn-tags.txt'
233 # A list of all branches. The file is stored as a plain text file
234 # to make it easy to look at in an editor. Each line contains the
235 # branch name, the number of files where the branch is created, the
236 # commit count, and a list of tags and branches that are defined on
237 # revisions in the branch.
238 BRANCHES_LIST = 'cvs2svn-branches.txt'
240 # These two databases provide a bidirectional mapping between
241 # CVSRevision.unique_key()s and Subversion revision numbers.
243 # The first maps CVSRevision.unique_key() to a number; the values are
244 # not unique.
246 # The second maps a number to a list of CVSRevision.unique_key()s.
247 CVS_REVS_TO_SVN_REVNUMS = 'cvs2svn-cvs-revs-to-svn-revnums.db'
248 SVN_REVNUMS_TO_CVS_REVS = 'cvs2svn-svn-revnums-to-cvs-revs.db'
250 # This database maps svn_revnums to tuples of (symbolic_name, date).
252 # The svn_revnums are the revision numbers of all non-primary
253 # SVNCommits. No primary SVNCommit has a key in this database.
255 # The date is stored for all commits in this database.
257 # For commits that fill symbolic names, the symbolic_name is stored.
258 # For commits that default branch syncs, the symbolic_name is None.
259 SVN_COMMIT_NAMES_DATES = 'cvs2svn-svn-commit-names-and-dates.db'
261 # This database maps svn_revnums of a default branch synchronization
262 # commit to the svn_revnum of the primary SVNCommit that motivated it.
264 # (NOTE: Secondary commits that fill branches and tags also have a
265 # motivating commit, but we do not record it because it is (currently)
266 # not needed for anything.)
268 # This mapping is used when generating the log message for the commit
269 # that synchronizes the default branch with trunk.
270 MOTIVATING_REVNUMS = 'cvs2svn-svn-motivating-commit-revnums.db'
272 # How many bytes to read at a time from a pipe. 128 kiB should be
273 # large enough to be efficient without wasting too much memory.
274 PIPE_READ_SIZE = 128 * 1024
276 # Record the default RCS branches, if any, for CVS filepaths.
278 # The keys are CVS filepaths, relative to the top of the repository
279 # and with the ",v" stripped off, so they match the cvs paths used in
280 # Commit.commit(). The values are vendor branch revisions, such as
281 # '1.1.1.1', or '1.1.1.2', or '1.1.1.96'. The vendor branch revision
282 # represents the highest vendor branch revision thought to have ever
283 # been head of the default branch.
285 # The reason we record a specific vendor revision, rather than a
286 # default branch number, is that there are two cases to handle:
288 # One case is simple. The RCS file lists a default branch explicitly
289 # in its header, such as '1.1.1'. In this case, we know that every
290 # revision on the vendor branch is to be treated as head of trunk at
291 # that point in time.
293 # But there's also a degenerate case. The RCS file does not currently
294 # have a default branch, yet we can deduce that for some period in the
295 # past it probably *did* have one. For example, the file has vendor
296 # revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2,
297 # and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2. In this
298 # case, we should record 1.1.1.96 as the last vendor revision to have
299 # been the head of the default branch.
300 DEFAULT_BRANCHES_DB = 'cvs2svn-default-branches.db'
302 # Records the author and log message for each changeset.
303 # The keys are author+log digests, the same kind used to identify
304 # unique revisions in the .revs, etc files. Each value is a tuple
305 # of two elements: '(author logmessage)'.
306 METADATA_DB = "cvs2svn-metadata.db"
308 # A temporary on-disk hash that maps CVSRevision unique keys to a new
309 # timestamp for that CVSRevision. These new timestamps are created in
310 # pass2, and this hash is used exclusively in pass2.
311 TWEAKED_TIMESTAMPS_DB = "cvs2svn-fixed-timestamps.db"
313 REVS_SUFFIX = '.revs'
314 CLEAN_REVS_SUFFIX = '.c-revs'
315 SORTED_REVS_SUFFIX = '.s-revs'
316 RESYNC_SUFFIX = '.resync'
318 SVN_INVALID_REVNUM = -1
320 COMMIT_THRESHOLD = 5 * 60 # flush a commit if a 5 minute gap occurs
322 # Things that can happen to a file.
323 OP_NOOP = '-'
324 OP_ADD = 'A'
325 OP_DELETE = 'D'
326 OP_CHANGE = 'C'
328 # A deltatext either does or doesn't represent some change.
329 DELTATEXT_NONEMPTY = 'N'
330 DELTATEXT_EMPTY = 'E'
332 DIGEST_END_IDX = 9 + (sha.digestsize * 2)
334 # Constants used in SYMBOL_OPENINGS_CLOSINGS
335 OPENING = 'O'
336 CLOSING = 'C'
338 class FatalException(Exception):
339 """Exception thrown on a non-recoverable error.
341 If this exception is thrown by main(), it is caught by the global
342 layer of the program, its string representation is printed, and the
343 program is ended with an exit code of 1."""
345 pass
348 class FatalError(FatalException):
349 """A FatalException that prepends error_prefix to the message."""
351 def __init__(self, msg):
352 """Use (error_prefix + ': ' + MSG + '\n') as the error message."""
354 FatalException.__init__(self, '%s: %s\n' % (error_prefix, msg,))
357 def temp(basename):
358 """Return a path to BASENAME in Ctx().tmpdir.
359 This is a convenience function to save horizontal space in source."""
360 return os.path.join(Ctx().tmpdir, basename)
362 # Since the unofficial set also includes [/\] we need to translate those
363 # into ones that don't conflict with Subversion limitations.
364 def _clean_symbolic_name(name):
365 """Return symbolic name NAME, translating characters that Subversion
366 does not allow in a pathname."""
367 name = name.replace('/','++')
368 name = name.replace('\\','--')
369 return name
371 def _path_join(*components):
372 """Join two or more pathname COMPONENTS, inserting '/' as needed.
373 Empty component are skipped."""
374 return string.join(filter(None, components), '/')
376 def _path_split(path):
377 """Split the svn pathname PATH into a pair, (HEAD, TAIL).
379 This is similar to os.path.split(), but always uses '/' as path
380 separator. PATH is an svn path, which should not start with a '/'.
381 HEAD is everything before the last slash, and TAIL is everything
382 after. If PATH ends in a slash, TAIL will be empty. If there is no
383 slash in PATH, HEAD will be empty. If PATH is empty, both HEAD and
384 TAIL are empty."""
386 pos = path.rfind('/')
387 if pos == -1:
388 return ('', path,)
389 else:
390 return (path[:pos], path[pos+1:],)
392 def to_utf8(value, mode='replace'):
393 """Encode (as Unicode) VALUE, trying the encodings in Ctx.encoding
394 as valid source encodings. Raise UnicodeError on failure of all
395 source encodings."""
396 ### FIXME: The 'replace' default mode should be an option,
397 ### like --encoding is.
398 for encoding in Ctx().encoding:
399 try:
400 return unicode(value, encoding, mode).encode('utf8')
401 except UnicodeError:
402 Log().write(LOG_VERBOSE, "Encoding '%s' failed for string '%s'"
403 % (encoding, value))
404 raise UnicodeError
406 def run_command(command):
407 if os.system(command):
408 raise FatalError('Command failed: "%s"' % (command,))
411 class CommandFailedException(Exception):
412 """Exception raised if check_command_runs() fails."""
414 pass
417 def check_command_runs(cmd, cmdname):
418 """Check whether the command CMD can be executed without errors.
420 CMD is a list or string, as accepted by SimplePopen. CMDNAME is the
421 name of the command as it should be included in exception error
422 messages.
424 This function checks three things: (1) the command can be run
425 without throwing an OSError; (2) it exits with status=0; (3) it
426 doesn't output anything to stderr. If any of these conditions is
427 not met, raise a CommandFailedException describing the problem."""
429 try:
430 pipe = SimplePopen(cmd, True)
431 except OSError, e:
432 raise CommandFailedException('error executing %s: %s' % (cmdname, e,))
433 pipe.stdin.close()
434 pipe.stdout.read()
435 errmsg = pipe.stderr.read()
436 status = pipe.wait()
437 if status != 0 or errmsg:
438 msg = 'error executing %s: status %s' % (cmdname, status,)
439 if errmsg:
440 msg += ', error output:\n%s' % (errmsg,)
441 raise CommandFailedException(msg)
444 class CVSRepository:
445 """A CVS repository from which data can be extracted."""
447 def __init__(self, cvs_repos_path):
448 """CVS_REPOS_PATH is the top of the CVS repository (at least as
449 far as this run is concerned)."""
451 if not os.path.isdir(cvs_repos_path):
452 raise FatalError("The specified CVS repository path '%s' is not an "
453 "existing directory." % cvs_repos_path)
455 self.cvs_repos_path = os.path.normpath(cvs_repos_path)
457 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
458 """Return a command string, and the pipe created using that
459 string. C_REV is a CVSRevision. If SUPPRESS_KEYWORD_SUBSTITUTION
460 is True, then suppress the substitution of RCS/CVS keywords in the
461 output. The pipe returns the text of that CVS Revision."""
462 raise NotImplementedError
465 class CVSRepositoryViaRCS(CVSRepository):
466 """A CVSRepository accessed via RCS."""
468 def __init__(self, cvs_repos_path):
469 CVSRepository.__init__(self, cvs_repos_path)
470 try:
471 check_command_runs([ 'co', '-V' ], 'co')
472 except CommandFailedException, e:
473 raise FatalError('%s\n'
474 'Please check that co is installed and in your PATH\n'
475 '(it is a part of the RCS software).' % (e,))
477 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
478 pipe_cmd = [ 'co', '-q', '-x,v', '-p' + c_rev.rev ]
479 if suppress_keyword_substitution:
480 pipe_cmd.append('-kk')
481 pipe_cmd.append(c_rev.rcs_path())
482 pipe = SimplePopen(pipe_cmd, True)
483 pipe.stdin.close()
484 return pipe_cmd, pipe
487 class CVSRepositoryViaCVS(CVSRepository):
488 """A CVSRepository accessed via CVS."""
490 def __init__(self, cvs_repos_path):
491 CVSRepository.__init__(self, cvs_repos_path)
492 # Ascend above the specified root if necessary, to find the
493 # cvs_repository_root (a directory containing a CVSROOT directory)
494 # and the cvs_module (the path of the conversion root within the
495 # cvs repository) NB: cvs_module must be seperated by '/' *not* by
496 # os.sep .
497 self.cvs_repository_root = os.path.abspath(self.cvs_repos_path)
498 prev_cvs_repository_root = None
499 self.cvs_module = ""
500 while prev_cvs_repository_root != self.cvs_repository_root:
501 if os.path.isdir(os.path.join(self.cvs_repository_root, 'CVSROOT')):
502 break
503 prev_cvs_repository_root = self.cvs_repository_root
504 self.cvs_repository_root, module_component = \
505 os.path.split(self.cvs_repository_root)
506 self.cvs_module = module_component + "/" + self.cvs_module
507 else:
508 # Hit the root (of the drive, on Windows) without finding a CVSROOT dir.
509 raise FatalError("the path '%s' is not a CVS repository, nor a path "
510 "within a CVS repository. A CVS repository contains "
511 "a CVSROOT directory within its root directory."
512 % (self.cvs_repos_path,))
513 os.environ['CVSROOT'] = self.cvs_repository_root
515 def cvs_ok(global_arguments):
516 check_command_runs(
517 [ 'cvs' ] + global_arguments + [ '--version' ], 'cvs')
519 self.global_arguments = [ "-q", "-R" ]
520 try:
521 cvs_ok(self.global_arguments)
522 except CommandFailedException, e:
523 self.global_arguments = [ "-q" ]
524 try:
525 cvs_ok(self.global_arguments)
526 except CommandFailedException, e:
527 raise FatalError(
528 '%s\n'
529 'Please check that cvs is installed and in your PATH.' % (e,))
531 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
532 pipe_cmd = [ 'cvs' ] + self.global_arguments + \
533 [ 'co', '-r' + c_rev.rev, '-p' ]
534 if suppress_keyword_substitution:
535 pipe_cmd.append('-kk')
536 pipe_cmd.append(self.cvs_module + c_rev.cvs_path)
537 pipe = SimplePopen(pipe_cmd, True)
538 pipe.stdin.close()
539 return pipe_cmd, pipe
542 def generate_ignores(c_rev):
543 # Read in props
544 pipe_cmd, pipe = Ctx().cvs_repository.get_co_pipe(c_rev)
545 buf = pipe.stdout.read(PIPE_READ_SIZE)
546 raw_ignore_val = ""
547 while buf:
548 raw_ignore_val = raw_ignore_val + buf
549 buf = pipe.stdout.read(PIPE_READ_SIZE)
550 pipe.stdout.close()
551 error_output = pipe.stderr.read()
552 exit_status = pipe.wait()
553 if exit_status:
554 raise FatalError("The command '%s' failed with exit status: %s\n"
555 "and the following output:\n"
556 "%s" % (pipe_cmd, exit_status, error_output))
558 # Tweak props: First, convert any spaces to newlines...
559 raw_ignore_val = '\n'.join(raw_ignore_val.split())
560 raw_ignores = raw_ignore_val.split('\n')
561 ignore_vals = [ ]
562 for ignore in raw_ignores:
563 # Reset the list if we encounter a '!'
564 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
565 if ignore == '!':
566 ignore_vals = [ ]
567 continue
568 # Skip empty lines
569 if len(ignore) == 0:
570 continue
571 ignore_vals.append(ignore)
572 return ignore_vals
574 # Return a string that has not been returned by gen_key() before.
575 gen_key_base = 0L
576 def gen_key():
577 global gen_key_base
578 key = '%x' % gen_key_base
579 gen_key_base = gen_key_base + 1
580 return key
582 # ============================================================================
583 # This code is copied with a few modifications from:
584 # subversion/subversion/bindings/swig/python/svn/core.py
586 if sys.platform == "win32":
587 _escape_shell_arg_re = re.compile(r'(\\+)(\"|$)')
589 def escape_shell_arg(arg):
590 # The (very strange) parsing rules used by the C runtime library are
591 # described at:
592 # http://msdn.microsoft.com/library/en-us/vclang/html/_pluslang_Parsing_C.2b2b_.Command.2d.Line_Arguments.asp
594 # double up slashes, but only if they are followed by a quote character
595 arg = re.sub(_escape_shell_arg_re, r'\1\1\2', arg)
597 # surround by quotes and escape quotes inside
598 arg = '"' + string.replace(arg, '"', '"^""') + '"'
599 return arg
602 def argv_to_command_string(argv):
603 """Flatten a list of command line arguments into a command string.
605 The resulting command string is expected to be passed to the system
606 shell which os functions like popen() and system() invoke internally.
609 # According cmd's usage notes (cmd /?), it parses the command line by
610 # "seeing if the first character is a quote character and if so, stripping
611 # the leading character and removing the last quote character."
612 # So to prevent the argument string from being changed we add an extra set
613 # of quotes around it here.
614 return '"' + string.join(map(escape_shell_arg, argv), " ") + '"'
616 else:
617 def escape_shell_arg(str):
618 return "'" + string.replace(str, "'", "'\\''") + "'"
620 def argv_to_command_string(argv):
621 """Flatten a list of command line arguments into a command string.
623 The resulting command string is expected to be passed to the system
624 shell which os functions like popen() and system() invoke internally.
627 return string.join(map(escape_shell_arg, argv), " ")
628 # ============================================================================
630 def format_date(date):
631 """Return an svn-compatible date string for DATE (seconds since epoch)."""
632 # A Subversion date looks like "2002-09-29T14:44:59.000000Z"
633 return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date))
635 def sort_file(infile, outfile):
636 # sort the log files
638 # GNU sort will sort our dates differently (incorrectly!) if our
639 # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
640 # it to 'C'
641 lc_all_tmp = os.environ.get('LC_ALL', None)
642 os.environ['LC_ALL'] = 'C'
643 # The -T option to sort has a nice side effect. The Win32 sort is
644 # case insensitive and cannot be used, and since it does not
645 # understand the -T option and dies if we try to use it, there is
646 # no risk that we use that sort by accident.
647 run_command('sort -T %s %s > %s' % (Ctx().tmpdir, infile, outfile))
648 if lc_all_tmp is None:
649 del os.environ['LC_ALL']
650 else:
651 os.environ['LC_ALL'] = lc_all_tmp
653 def match_regexp_list(regexp_list, string):
654 """Test whether STRING matches any of the compiled regexps in
655 REGEXP_LIST."""
656 for regexp in regexp_list:
657 if regexp.match(string):
658 return True
659 return False
661 class LF_EOL_Filter:
662 """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
663 into LFs only."""
664 def __init__(self, stream):
665 self.stream = stream
666 self.carry_cr = False
667 self.eof = False
669 def read(self, size):
670 while True:
671 buf = self.stream.read(size)
672 self.eof = len(buf) == 0
673 if self.carry_cr:
674 buf = '\r' + buf
675 self.carry_cr = False
676 if not self.eof and buf[-1] == '\r':
677 self.carry_cr = True
678 buf = buf[:-1]
679 buf = string.replace(buf, '\r\n', '\n')
680 buf = string.replace(buf, '\r', '\n')
681 if len(buf) > 0 or self.eof:
682 return buf
685 # These constants represent the log levels that this script supports
686 LOG_WARN = -1
687 LOG_QUIET = 0
688 LOG_NORMAL = 1
689 LOG_VERBOSE = 2
690 class Log:
691 """A Simple logging facility. Each line will be timestamped is
692 self.use_timestamps is TRUE. This class is a Borg, see
693 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
694 __shared_state = {}
695 def __init__(self):
696 self.__dict__ = self.__shared_state
697 if self.__dict__:
698 return
699 self.log_level = LOG_NORMAL
700 # Set this to true if you want to see timestamps on each line output.
701 self.use_timestamps = None
702 self.logger = sys.stdout
704 def _timestamp(self):
705 """Output a detailed timestamp at the beginning of each line output."""
706 self.logger.write(time.strftime('[%Y-%m-%d %I:%m:%S %Z] - '))
708 def write(self, log_level, *args):
709 """This is the public method to use for writing to a file. Only
710 messages whose LOG_LEVEL is <= self.log_level will be printed. If
711 there are multiple ARGS, they will be separated by a space."""
712 if log_level > self.log_level:
713 return
714 if self.use_timestamps:
715 self._timestamp()
716 self.logger.write(' '.join(map(str,args)) + "\n")
717 # Ensure that log output doesn't get out-of-order with respect to
718 # stderr output.
719 self.logger.flush()
722 class Cleanup:
723 """This singleton class manages any files created by cvs2svn. When
724 you first create a file, call Cleanup.register, passing the
725 filename, and the last pass that you need the file. After the end
726 of that pass, your file will be cleaned up after running an optional
727 callback. This class is a Borg, see
728 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
730 __shared_state = {}
731 def __init__(self):
732 self.__dict__ = self.__shared_state
733 if self.__dict__:
734 return
735 self._log = {}
736 self._callbacks = {}
738 def register(self, file, which_pass, callback=None):
739 """Register FILE for cleanup at the end of WHICH_PASS, running
740 function CALLBACK prior to removal. Registering a given FILE is
741 idempotent; you may register as many times as you wish, but it
742 will only be cleaned up once.
744 Note that if a file is registered multiple times, only the first
745 callback registered for that file will be called at cleanup
746 time. Also note that if you register a database file you must
747 close the database before cleanup, e.g. using a callback."""
748 self._log.setdefault(which_pass, {})[file] = 1
749 if callback and not self._callbacks.has_key(file):
750 self._callbacks[file] = callback
752 def cleanup(self, which_pass):
753 """Clean up all files, and invoke callbacks, for pass WHICH_PASS."""
754 if not self._log.has_key(which_pass):
755 return
756 for file in self._log[which_pass].keys():
757 Log().write(LOG_VERBOSE, "Deleting", file)
758 if self._callbacks.has_key(file):
759 self._callbacks[file]()
760 os.unlink(file)
763 # Always use these constants for opening databases.
764 DB_OPEN_READ = 'r'
765 DB_OPEN_NEW = 'n'
768 class AbstractDatabase:
769 """An abstract base class for anydbm-based databases."""
771 def __init__(self, filename, mode):
772 """A convenience function for opening an anydbm database."""
773 # pybsddb3 has a bug which prevents it from working with
774 # Berkeley DB 4.2 if you open the db with 'n' ("new"). This
775 # causes the DB_TRUNCATE flag to be passed, which is disallowed
776 # for databases protected by lock and transaction support
777 # (bsddb databases use locking from bsddb version 4.2.4 onwards).
779 # Therefore, manually perform the removal (we can do this, because
780 # we know that for bsddb - but *not* anydbm in general - the database
781 # consists of one file with the name we specify, rather than several
782 # based on that name).
783 if mode == 'n' and anydbm._defaultmod.__name__ == 'dbhash':
784 if os.path.isfile(filename):
785 os.unlink(filename)
786 mode = 'c'
788 self.db = anydbm.open(filename, mode)
789 self.has_key = self.db.has_key
790 self.__delitem__ = self.db.__delitem__
792 def get(self, key, default=None):
793 """bsddb3 doesn't have a get() method, so define one here."""
795 try:
796 return self[key]
797 except KeyError:
798 return default
801 class SDatabase(AbstractDatabase):
802 """A database that can only store strings."""
804 def __getitem__(self, key):
805 return self.db[key]
807 def __setitem__(self, key, value):
808 self.db[key] = value
811 class Database(AbstractDatabase):
812 """A database that uses the marshal module to store built-in types."""
814 def __getitem__(self, key):
815 return marshal.loads(self.db[key])
817 def __setitem__(self, key, value):
818 self.db[key] = marshal.dumps(value)
821 class StatsKeeper:
822 __shared_state = { }
823 def __init__(self):
824 self.__dict__ = self.__shared_state
825 if self.__dict__:
826 return
827 self.filename = temp(STATISTICS_FILE)
828 Cleanup().register(self.filename, pass8)
829 # This can get kinda large, so we don't store it in our data dict.
830 self.repos_files = { }
832 if os.path.exists(self.filename):
833 self.unarchive()
834 else:
835 self.data = { 'cvs_revs_count' : 0,
836 'tags': { },
837 'branches' : { },
838 'repos_size' : 0,
839 'repos_file_count' : 0,
840 'svn_rev_count' : None,
841 'first_rev_date' : 1L<<32,
842 'last_rev_date' : 0,
843 'pass_timings' : { },
844 'start_time' : 0,
845 'end_time' : 0,
848 def log_duration_for_pass(self, duration, pass_num):
849 self.data['pass_timings'][pass_num] = duration
851 def set_start_time(self, start):
852 self.data['start_time'] = start
854 def set_end_time(self, end):
855 self.data['end_time'] = end
857 def _bump_item(self, key, amount=1):
858 self.data[key] = self.data[key] + amount
860 def reset_c_rev_info(self):
861 self.data['cvs_revs_count'] = 0
862 self.data['tags'] = { }
863 self.data['branches'] = { }
865 def record_c_rev(self, c_rev):
866 self._bump_item('cvs_revs_count')
868 for tag in c_rev.tags:
869 self.data['tags'][tag] = None
870 for branch in c_rev.branches:
871 self.data['branches'][branch] = None
873 if c_rev.timestamp < self.data['first_rev_date']:
874 self.data['first_rev_date'] = c_rev.timestamp
876 if c_rev.timestamp > self.data['last_rev_date']:
877 self.data['last_rev_date'] = c_rev.timestamp
879 # Only add the size if this is the first time we see the file.
880 if not self.repos_files.has_key(c_rev.fname):
881 self._bump_item('repos_size', c_rev.file_size)
882 self.repos_files[c_rev.fname] = None
884 self.data['repos_file_count'] = len(self.repos_files)
886 def set_svn_rev_count(self, count):
887 self.data['svn_rev_count'] = count
889 def svn_rev_count(self):
890 return self.data['svn_rev_count']
892 def archive(self):
893 open(self.filename, 'w').write(marshal.dumps(self.data))
895 def unarchive(self):
896 self.data = marshal.loads(open(self.filename, 'r').read())
898 def __str__(self):
899 svn_revs_str = ""
900 if self.data['svn_rev_count'] is not None:
901 svn_revs_str = ('Total SVN Commits: %10s\n'
902 % self.data['svn_rev_count'])
904 return ('\n' \
905 'cvs2svn Statistics:\n' \
906 '------------------\n' \
907 'Total CVS Files: %10i\n' \
908 'Total CVS Revisions: %10i\n' \
909 'Total Unique Tags: %10i\n' \
910 'Total Unique Branches: %10i\n' \
911 'CVS Repos Size in KB: %10i\n' \
912 '%s' \
913 'First Revision Date: %s\n' \
914 'Last Revision Date: %s\n' \
915 '------------------' \
916 % (self.data['repos_file_count'],
917 self.data['cvs_revs_count'],
918 len(self.data['tags']),
919 len(self.data['branches']),
920 (self.data['repos_size'] / 1024),
921 svn_revs_str,
922 time.ctime(self.data['first_rev_date']),
923 time.ctime(self.data['last_rev_date']),
926 def timings(self):
927 passes = self.data['pass_timings'].keys()
928 passes.sort()
929 str = 'Timings:\n------------------\n'
931 def desc(val):
932 if val == 1: return "second"
933 return "seconds"
935 for pass_num in passes:
936 duration = int(self.data['pass_timings'][pass_num])
937 p_str = ('pass %d:%6d %s\n'
938 % (pass_num, duration, desc(duration)))
939 str = str + p_str
941 total = int(self.data['end_time'] - self.data['start_time'])
942 str = str + ('total: %6d %s' % (total, desc(total)))
943 return str
946 class LastSymbolicNameDatabase:
947 """ Passing every CVSRevision in s-revs to this class will result in
948 a Database whose key is the last CVS Revision a symbolicname was
949 seen in, and whose value is a list of all symbolicnames that were
950 last seen in that revision."""
951 def __init__(self, mode):
952 self.symbols = {}
953 self.symbol_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB), mode)
954 Cleanup().register(temp(SYMBOL_LAST_CVS_REVS_DB), pass5)
956 # Once we've gone through all the revs,
957 # symbols.keys() will be a list of all tags and branches, and
958 # their corresponding values will be a key into the last CVS revision
959 # that they were used in.
960 def log_revision(self, c_rev):
961 # Gather last CVS Revision for symbolic name info and tag info
962 for tag in c_rev.tags:
963 self.symbols[tag] = c_rev.unique_key()
964 if c_rev.op is not OP_DELETE:
965 for branch in c_rev.branches:
966 self.symbols[branch] = c_rev.unique_key()
968 # Creates an inversion of symbols above--a dictionary of lists (key
969 # = CVS rev unique_key: val = list of symbols that close in that
970 # rev.
971 def create_database(self):
972 for sym, rev_unique_key in self.symbols.items():
973 ary = self.symbol_revs_db.get(rev_unique_key, [])
974 ary.append(sym)
975 self.symbol_revs_db[rev_unique_key] = ary
978 class CVSRevisionDatabase:
979 """A Database to store CVSRevision objects and retrieve them by their
980 unique_key()."""
982 def __init__(self, mode):
983 """Initialize an instance, opening database in MODE (like the MODE
984 argument to Database or anydbm.open())."""
985 self.cvs_revs_db = SDatabase(temp(CVS_REVS_DB), mode)
986 Cleanup().register(temp(CVS_REVS_DB), pass8)
988 def log_revision(self, c_rev):
989 """Add C_REV, a CVSRevision, to the database."""
990 self.cvs_revs_db[c_rev.unique_key()] = str(c_rev)
992 def get_revision(self, unique_key):
993 """Return the CVSRevision stored under UNIQUE_KEY."""
994 return CVSRevision(Ctx(), self.cvs_revs_db[unique_key])
997 def TagsDatabase(mode):
998 """A Database to store which symbolic names are tags.
999 Each key is a tag name.
1000 The value has no meaning, and should be set to None."""
1001 db = SDatabase(temp(TAGS_DB), mode)
1002 Cleanup().register(temp(TAGS_DB), pass8)
1003 return db
1006 class Project:
1007 """A project within a CVS repository."""
1009 def __init__(self, cvs_root, trunk_path, branches_path, tags_path):
1010 """Create a new Project record.
1012 CVS_ROOT is the main CVS directory for this project (within the
1013 filesystem). TRUNK_PATH, BRANCHES_PATH, and TAGS_PATH are the
1014 full, normalized directory names in svn for the corresponding part
1015 of the repository."""
1017 self.cvs_root = os.path.normpath(cvs_root)
1018 self.trunk_path = trunk_path
1019 self.branches_path = branches_path
1020 self.tags_path = tags_path
1021 verify_paths_disjoint(self.trunk_path, self.branches_path, self.tags_path)
1023 def is_source(self, svn_path):
1024 """Return True iff SVN_PATH is a legitimate source for this project.
1026 Legitimate paths are self.trunk_path or any directory directly
1027 under self.branches_path."""
1029 if svn_path == self.trunk_path:
1030 return True
1032 (head, tail,) = _path_split(svn_path)
1033 if head == self.branches_path:
1034 return True
1036 return False
1038 def is_unremovable(self, svn_path):
1039 """Return True iff the specified path must not be removed."""
1041 return svn_path in [self.trunk_path, self.branches_path, self.tags_path]
1043 def relative_name(self, fname):
1044 """Return the path to FNAME relative to cvs_root, with ',v' removed.
1046 FNAME is a filesystem name that has to begin (textually) with
1047 self.cvs_root and end with ',v'."""
1049 if not fname.startswith(self.cvs_root):
1050 raise FatalError(
1051 "relative_name: '%s' is not a sub-path of '%s'"
1052 % (fname, self.cvs_root,))
1053 if not fname.endswith(',v'):
1054 raise FatalError("relative_name: '%s' does not end with ',v'"
1055 % (fname,))
1056 l = len(self.cvs_root)
1057 if fname[l] == os.sep:
1058 l += 1
1059 return string.replace(fname[l:-2], os.sep, '/')
1061 def get_branch_path(self, branch_name):
1062 """Return the svnpath for the branch named BRANCH_NAME."""
1064 return _path_join(self.branches_path, _clean_symbolic_name(branch_name))
1066 def get_tag_path(self, tag_name):
1067 """Return the svnpath for the tag named TAG_NAME."""
1069 return _path_join(self.tags_path, _clean_symbolic_name(tag_name))
1071 def make_trunk_path(self, path):
1072 """Return the trunk path for PATH.
1074 PATH is a filesystem path relative to cvs_root. Return the svn
1075 path for this file on trunk."""
1077 return _path_join(self.trunk_path, path)
1079 def make_branch_path(self, branch_name, path):
1080 """Return the branch path for PATH on the branch with name BRANCH_NAME.
1082 PATH is a filesystem path relative to cvs_root. Return the svn
1083 path for this file on the specified branch."""
1085 return _path_join(self.get_branch_path(branch_name), path)
1088 class CVSRevision:
1089 def __init__(self, ctx, *args):
1090 """Initialize a new CVSRevision with Ctx object CTX, and ARGS.
1092 If CTX is None, the following members and methods of the
1093 instantiated CVSRevision class object will be unavailable (or
1094 simply will not work correctly, if at all):
1095 cvs_path
1096 svn_path
1097 is_default_branch_revision()
1099 (Note that this class treats CTX as const, because the caller
1100 likely passed in a Borg instance of a Ctx. The reason this class
1101 takes CTX as as a parameter, instead of just instantiating a Ctx
1102 itself, is that this class should be usable outside cvs2svn.)
1104 If there is one argument in ARGS, it is a string, in the format of
1105 a line from a revs file. Do *not* include a trailing newline.
1107 If there are multiple ARGS, there must be 17 of them,
1108 comprising a parsed revs line:
1109 timestamp --> (int) date stamp for this cvs revision
1110 digest --> (string) digest of author+logmsg
1111 prev_timestamp --> (int) date stamp for the previous cvs revision
1112 next_timestamp --> (int) date stamp for the next cvs revision
1113 op --> (char) OP_ADD, OP_CHANGE, or OP_DELETE
1114 prev_rev --> (string or None) previous CVS rev, e.g., "1.2"
1115 rev --> (string) this CVS rev, e.g., "1.3"
1116 next_rev --> (string or None) next CVS rev, e.g., "1.4"
1117 file_in_attic --> (char or None) true if RCS file is in Attic
1118 file_executable --> (char or None) true if RCS file has exec bit set.
1119 file_size --> (int) size of the RCS file
1120 deltatext_code --> (char) 'N' if non-empty deltatext, else 'E'
1121 fname --> (string) relative path of file in CVS repos
1122 mode --> (string or None) "kkv", "kb", etc.
1123 branch_name --> (string or None) branch on which this rev occurred
1124 tags --> (list of strings) all tags on this revision
1125 branches --> (list of strings) all branches rooted in this rev
1127 The two forms of initialization are equivalent.
1129 WARNING: Due to the resync process in pass2, prev_timestamp or
1130 next_timestamp may be incorrect in the c-revs or s-revs files."""
1132 self._ctx = ctx
1133 if len(args) == 17:
1134 (self.timestamp, self.digest, self.prev_timestamp, self.next_timestamp,
1135 self.op, self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
1136 self.file_executable, self.file_size, self.deltatext_code,
1137 self.fname,
1138 self.mode, self.branch_name, self.tags, self.branches) = args
1139 elif len(args) == 1:
1140 data = args[0].split(' ', 15)
1141 (self.timestamp, self.digest, self.prev_timestamp, self.next_timestamp,
1142 self.op, self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
1143 self.file_executable, self.file_size, self.deltatext_code,
1144 self.mode, self.branch_name, numtags, remainder) = data
1145 # Patch up data items which are not simple strings
1146 self.timestamp = int(self.timestamp, 16)
1147 if self.prev_timestamp == "*":
1148 self.prev_timestamp = 0
1149 else:
1150 self.prev_timestamp = int(self.prev_timestamp)
1151 if self.next_timestamp == "*":
1152 self.next_timestamp = 0
1153 else:
1154 self.next_timestamp = int(self.next_timestamp)
1155 if self.prev_rev == "*":
1156 self.prev_rev = None
1157 if self.next_rev == "*":
1158 self.next_rev = None
1159 if self.file_in_attic == "*":
1160 self.file_in_attic = None
1161 if self.file_executable == "*":
1162 self.file_executable = None
1163 self.file_size = int(self.file_size)
1164 if self.mode == "*":
1165 self.mode = None
1166 if self.branch_name == "*":
1167 self.branch_name = None
1168 numtags = int(numtags)
1169 tags_and_numbranches_and_remainder = remainder.split(' ', numtags + 1)
1170 self.tags = tags_and_numbranches_and_remainder[:-2]
1171 numbranches = int(tags_and_numbranches_and_remainder[-2])
1172 remainder = tags_and_numbranches_and_remainder[-1]
1173 branches_and_fname = remainder.split(' ', numbranches)
1174 self.branches = branches_and_fname[:-1]
1175 self.fname = branches_and_fname[-1]
1176 else:
1177 raise TypeError, 'CVSRevision() takes 2 or 18 arguments (%d given)' % \
1178 (len(args) + 1)
1179 if ctx is not None:
1180 self.cvs_path = ctx.project.relative_name(self.fname)
1181 if self.branch_name:
1182 self.svn_path = ctx.project.make_branch_path(
1183 self.branch_name, self.cvs_path)
1184 else:
1185 self.svn_path = ctx.project.make_trunk_path(self.cvs_path)
1187 # The 'primary key' of a CVS Revision is the revision number + the
1188 # filename. To provide a unique key (say, for a dict), we just glom
1189 # them together in a string. By passing in self.prev_rev or
1190 # self.next_rev, you can get the unique key for their respective
1191 # CVSRevisions.
1192 def unique_key(self, revnum="0"):
1193 if revnum is "0":
1194 revnum = self.rev
1195 elif revnum is None:
1196 return None
1197 return revnum + "/" + self.fname
1199 def __str__(self):
1200 return ('%08lx %s %s %s %s %s %s %s %s %s %d %s %s %s %d%s%s %d%s%s %s'
1201 % (self.timestamp, self.digest, self.prev_timestamp or "*",
1202 self.next_timestamp or "*", self.op, (self.prev_rev or "*"),
1203 self.rev, (self.next_rev or "*"), (self.file_in_attic or "*"),
1204 (self.file_executable or "*"),
1205 self.file_size,
1206 self.deltatext_code, (self.mode or "*"),
1207 (self.branch_name or "*"),
1208 len(self.tags), self.tags and " " or "", " ".join(self.tags),
1209 len(self.branches), self.branches and " " or "",
1210 " ".join(self.branches),
1211 self.fname, ))
1213 # Returns true if this CVSRevision is the opening CVSRevision for
1214 # NAME (for this RCS file).
1215 def opens_symbolic_name(self, name):
1216 if name in self.tags:
1217 return 1
1218 if name in self.branches:
1219 # If this c_rev opens a branch and our op is OP_DELETE, then
1220 # that means that the file that this c_rev belongs to was
1221 # created on the branch, so for all intents and purposes, this
1222 # c_rev is *technically* not an opening. See Issue #62 for more
1223 # information.
1224 if self.op != OP_DELETE:
1225 return 1
1226 return 0
1228 def is_default_branch_revision(self):
1229 """Return 1 if SELF.rev of SELF.cvs_path is a default branch
1230 revision according to DEFAULT_BRANCHES_DB (see the conditions
1231 documented there), else return None."""
1232 val = self._ctx._default_branches_db.get(self.cvs_path, None)
1233 if val is not None:
1234 val_last_dot = val.rindex(".")
1235 our_last_dot = self.rev.rindex(".")
1236 default_branch = val[:val_last_dot]
1237 our_branch = self.rev[:our_last_dot]
1238 default_rev_component = int(val[val_last_dot + 1:])
1239 our_rev_component = int(self.rev[our_last_dot + 1:])
1240 if (default_branch == our_branch
1241 and our_rev_component <= default_rev_component):
1242 return 1
1243 # else
1244 return None
1246 def rcs_path(self):
1247 """Returns the actual filesystem path to the RCS file of this
1248 CVSRevision."""
1249 if self.file_in_attic is None:
1250 return self.fname
1251 else:
1252 basepath, filename = os.path.split(self.fname)
1253 return os.path.join(basepath, 'Attic', filename)
1255 def filename(self):
1256 "Return the last path component of self.fname, minus the ',v'"
1257 return os.path.split(self.fname)[-1][:-2]
1259 class SymbolDatabase:
1260 """This database records information on all symbols in the RCS
1261 files. It is created in pass 1 and it is used in pass 2."""
1262 def __init__(self):
1263 # A hash that maps tag names to commit counts
1264 self.tags = { }
1265 # A hash that maps branch names to lists of the format
1266 # [ create_count, commit_count, blockers ], where blockers
1267 # is a hash that lists the symbols that depend on the
1268 # the branch. The blockers hash is used as a set, so the
1269 # values are not used.
1270 self.branches = { }
1272 def register_tag_creation(self, name):
1273 """Register the creation of the tag NAME."""
1274 self.tags[name] = self.tags.get(name, 0) + 1
1276 def _branch(self, name):
1277 """Helper function to get a branch node that will create and
1278 initialize the node if it does not exist."""
1279 if not self.branches.has_key(name):
1280 self.branches[name] = [ 0, 0, { } ]
1281 return self.branches[name]
1283 def register_branch_creation(self, name):
1284 """Register the creation of the branch NAME."""
1285 self._branch(name)[0] += 1
1287 def register_branch_commit(self, name):
1288 """Register a commit on the branch NAME."""
1289 self._branch(name)[1] += 1
1291 def register_branch_blocker(self, name, blocker):
1292 """Register BLOCKER as a blocker on the branch NAME."""
1293 self._branch(name)[2][blocker] = None
1295 def branch_has_commit(self, name):
1296 """Return non-zero if NAME has commits. Returns 0 if name
1297 is not a branch or if it has no commits."""
1298 return self.branches.has_key(name) and self.branches[name][1]
1300 def find_excluded_symbols(self, regexp_list):
1301 """Returns a hash of all symbols thaht match the regexps in
1302 REGEXP_LISTE. The hash is used as a set so the values are
1303 not used."""
1304 excludes = { }
1305 for tag in self.tags.keys():
1306 if match_regexp_list(regexp_list, tag):
1307 excludes[tag] = None
1308 for branch in self.branches.keys():
1309 if match_regexp_list(regexp_list, branch):
1310 excludes[branch] = None
1311 return excludes
1313 def find_branch_exclude_blockers(self, branch, excludes):
1314 """Find all blockers of BRANCH, excluding the ones in the hash
1315 EXCLUDES."""
1316 blockers = { }
1317 if excludes.has_key(branch):
1318 for blocker in self.branches[branch][2]:
1319 if not excludes.has_key(blocker):
1320 blockers[blocker] = None
1321 return blockers
1323 def find_blocked_excludes(self, excludes):
1324 """Find all branches not in EXCLUDES that have blocking symbols that
1325 are not themselves excluded. Return a hash that maps branch names
1326 to a hash of blockers. The hash of blockes is used as a set so the
1327 values are not used."""
1328 blocked_branches = { }
1329 for branch in self.branches.keys():
1330 blockers = self.find_branch_exclude_blockers(branch, excludes)
1331 if blockers:
1332 blocked_branches[branch] = blockers
1333 return blocked_branches
1335 def find_mismatches(self, excludes=None):
1336 """Find all symbols that are defined as both tags and branches,
1337 excluding the ones in EXCLUDES. Returns a list of 4-tuples with
1338 the symbol name, tag count, branch count and commit count."""
1339 if excludes is None:
1340 excludes = { }
1341 mismatches = [ ]
1342 for branch in self.branches.keys():
1343 if not excludes.has_key(branch) and self.tags.has_key(branch):
1344 mismatches.append((branch, # name
1345 self.tags[branch], # tag count
1346 self.branches[branch][0], # branch count
1347 self.branches[branch][1])) # commit count
1348 return mismatches
1350 def read(self):
1351 """Read the symbol database from files."""
1352 f = open(temp(TAGS_LIST))
1353 while 1:
1354 line = f.readline()
1355 if not line:
1356 break
1357 tag, count = line.split()
1358 self.tags[tag] = int(count)
1360 f = open(temp(BRANCHES_LIST))
1361 while 1:
1362 line = f.readline()
1363 if not line:
1364 break
1365 words = line.split()
1366 self.branches[words[0]] = [ int(words[1]), int(words[2]), { } ]
1367 for blocker in words[3:]:
1368 self.branches[words[0]][2][blocker] = None
1370 def write(self):
1371 """Store the symbol database to files."""
1372 f = open(temp(TAGS_LIST), "w")
1373 Cleanup().register(temp(TAGS_LIST), pass2)
1374 for tag, count in self.tags.items():
1375 f.write("%s %d\n" % (tag, count))
1377 f = open(temp(BRANCHES_LIST), "w")
1378 Cleanup().register(temp(BRANCHES_LIST), pass2)
1379 for branch, info in self.branches.items():
1380 f.write("%s %d %d" % (branch, info[0], info[1]))
1381 if info[2]:
1382 f.write(" ")
1383 f.write(" ".join(info[2].keys()))
1384 f.write("\n")
1386 class CollectData(cvs2svn_rcsparse.Sink):
1387 def __init__(self):
1388 self.revs = open(temp(DATAFILE + REVS_SUFFIX), 'w')
1389 Cleanup().register(temp(DATAFILE + REVS_SUFFIX), pass2)
1390 self.resync = open(temp(DATAFILE + RESYNC_SUFFIX), 'w')
1391 Cleanup().register(temp(DATAFILE + RESYNC_SUFFIX), pass2)
1392 self.default_branches_db = SDatabase(temp(DEFAULT_BRANCHES_DB),
1393 DB_OPEN_NEW)
1394 Cleanup().register(temp(DEFAULT_BRANCHES_DB), pass5)
1395 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_NEW)
1396 Cleanup().register(temp(METADATA_DB), pass8)
1397 self.fatal_errors = []
1398 self.num_files = 0
1399 self.symbol_db = SymbolDatabase()
1401 # 1 if we've collected data for at least one file, None otherwise.
1402 self.found_valid_file = None
1404 # See set_fname() for initializations of other variables.
1406 def set_fname(self, canonical_name, filename):
1407 """Prepare to receive data for FILENAME. FILENAME is the absolute
1408 filesystem path to the file in question, and CANONICAL_NAME is
1409 FILENAME with the 'Attic' component removed (if the file is indeed
1410 in the Attic) ."""
1411 self.fname = canonical_name
1413 # We calculate and save some file metadata here, where we can do
1414 # it only once per file, instead of waiting until later where we
1415 # would have to do the same calculations once per CVS *revision*.
1417 self.rel_name = Ctx().project.relative_name(self.fname)
1419 # If the paths are not the same, then that means that the
1420 # canonical_name has had the 'Attic' component stripped out.
1421 self.file_in_attic = None
1422 if canonical_name != filename:
1423 self.file_in_attic = 1
1425 file_stat = os.stat(filename)
1426 # The size of our file in bytes
1427 self.file_size = file_stat[stat.ST_SIZE]
1429 # Whether or not the executable bit is set.
1430 self.file_executable = None
1431 if file_stat[0] & stat.S_IXUSR:
1432 self.file_executable = 1
1434 # revision -> [timestamp, author, old-timestamp]
1435 self.rev_data = { }
1437 # Maps revision number (key) to the revision number of the
1438 # previous revision along this line of development.
1440 # For the first revision R on a branch, we consider the revision
1441 # from which R sprouted to be the 'previous'.
1443 # Note that this revision can't be determined arithmetically (due
1444 # to cvsadmin -o, which is why this is necessary).
1446 # If the key has no previous revision, then store None as key's
1447 # value.
1448 self.prev_rev = { }
1450 # This dict is essentially self.prev_rev with the values mapped in
1451 # the other direction, so following key -> value will yield you
1452 # the next revision number.
1454 # Unlike self.prev_rev, if the key has no next revision, then the
1455 # key is not present.
1456 self.next_rev = { }
1458 # Track the state of each revision so that in set_revision_info,
1459 # we can determine if our op is an add/change/delete. We can do
1460 # this because in set_revision_info, we'll have all of the
1461 # revisions for a file at our fingertips, and we need to examine
1462 # the state of our prev_rev to determine if we're an add or a
1463 # change--without the state of the prev_rev, we are unable to
1464 # distinguish between an add and a change.
1465 self.rev_state = { }
1467 # Hash mapping branch numbers, like '1.7.2', to branch names,
1468 # like 'Release_1_0_dev'.
1469 self.branch_names = { }
1471 # RCS flags (used for keyword expansion).
1472 self.mode = None
1474 # Hash mapping revision numbers, like '1.7', to lists of names
1475 # indicating which branches sprout from that revision, like
1476 # ['Release_1_0_dev', 'experimental_driver', ...].
1477 self.branchlist = { }
1479 # Like self.branchlist, but the values are lists of tag names that
1480 # apply to the key revision.
1481 self.taglist = { }
1483 # If set, this is an RCS branch number -- rcsparse calls this the
1484 # "principal branch", but CVS and RCS refer to it as the "default
1485 # branch", so that's what we call it, even though the rcsparse API
1486 # setter method is still 'set_principal_branch'.
1487 self.default_branch = None
1489 # If the RCS file doesn't have a default branch anymore, but does
1490 # have vendor revisions, then we make an educated guess that those
1491 # revisions *were* the head of the default branch up until the
1492 # commit of 1.2, at which point the file's default branch became
1493 # trunk. This records the date at which 1.2 was committed.
1494 self.first_non_vendor_revision_date = None
1496 # A list of all symbols defined for the current file. Used to
1497 # prevent multiple definitions of a symbol, something which can
1498 # easily happen when --symbol-transform is used.
1499 self.defined_symbols = { }
1501 def set_principal_branch(self, branch):
1502 self.default_branch = branch
1504 def set_expansion(self, mode):
1505 self.mode = mode
1507 def set_branch_name(self, branch_number, name):
1508 """Record that BRANCH_NUMBER is the branch number for branch NAME,
1509 and that NAME sprouts from BRANCH_NUMBER .
1510 BRANCH_NUMBER is an RCS branch number with an odd number of components,
1511 for example '1.7.2' (never '1.7.0.2')."""
1512 if not self.branch_names.has_key(branch_number):
1513 self.branch_names[branch_number] = name
1514 # The branchlist is keyed on the revision number from which the
1515 # branch sprouts, so strip off the odd final component.
1516 sprout_rev = branch_number[:branch_number.rfind(".")]
1517 self.branchlist.setdefault(sprout_rev, []).append(name)
1518 self.symbol_db.register_branch_creation(name)
1519 else:
1520 sys.stderr.write("%s: in '%s':\n"
1521 " branch '%s' already has name '%s',\n"
1522 " cannot also have name '%s', ignoring the latter\n"
1523 % (warning_prefix, self.fname, branch_number,
1524 self.branch_names[branch_number], name))
1526 def rev_to_branch_name(self, revision):
1527 """Return the name of the branch on which REVISION lies.
1528 REVISION is a non-branch revision number with an even number of,
1529 components, for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
1530 For the convenience of callers, REVISION can also be a trunk
1531 revision such as '1.2', in which case just return None."""
1532 if trunk_rev.match(revision):
1533 return None
1534 return self.branch_names.get(revision[:revision.rindex(".")])
1536 def add_cvs_branch(self, revision, branch_name):
1537 """Record the root revision and branch revision for BRANCH_NAME,
1538 based on REVISION. REVISION is a CVS branch number having an even
1539 number of components where the second-to-last is '0'. For
1540 example, if it's '1.7.0.2', then record that BRANCH_NAME sprouts
1541 from 1.7 and has branch number 1.7.2."""
1542 last_dot = revision.rfind(".")
1543 branch_rev = revision[:last_dot]
1544 last2_dot = branch_rev.rfind(".")
1545 branch_rev = branch_rev[:last2_dot] + revision[last_dot:]
1546 self.set_branch_name(branch_rev, branch_name)
1548 def define_tag(self, name, revision):
1549 """Record a bidirectional mapping between symbolic NAME and REVISION.
1550 REVISION is an unprocessed revision number from the RCS file's
1551 header, for example: '1.7', '1.7.0.2', or '1.1.1' or '1.1.1.1'.
1552 This function will determine what kind of symbolic name it is by
1553 inspection, and record it in the right places."""
1554 for (pattern, replacement) in Ctx().symbol_transforms:
1555 newname = pattern.sub(replacement, name)
1556 if newname != name:
1557 Log().write(LOG_WARN, " symbol '%s' transformed to '%s'"
1558 % (name, newname))
1559 name = newname
1560 if self.defined_symbols.has_key(name):
1561 err = "%s: Multiple definitions of the symbol '%s' in '%s'" \
1562 % (error_prefix, name, self.fname)
1563 sys.stderr.write(err + "\n")
1564 self.fatal_errors.append(err)
1565 self.defined_symbols[name] = None
1566 if branch_tag.match(revision):
1567 self.add_cvs_branch(revision, name)
1568 elif vendor_tag.match(revision):
1569 self.set_branch_name(revision, name)
1570 else:
1571 self.taglist.setdefault(revision, []).append(name)
1572 self.symbol_db.register_tag_creation(name)
1574 def define_revision(self, revision, timestamp, author, state,
1575 branches, next):
1577 # Record the state of our revision for later calculations
1578 self.rev_state[revision] = state
1580 # store the rev_data as a list in case we have to jigger the timestamp
1581 self.rev_data[revision] = [int(timestamp), author, None]
1583 # When on trunk, the RCS 'next' revision number points to what
1584 # humans might consider to be the 'previous' revision number. For
1585 # example, 1.3's RCS 'next' is 1.2.
1587 # However, on a branch, the RCS 'next' revision number really does
1588 # point to what humans would consider to be the 'next' revision
1589 # number. For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
1591 # In other words, in RCS, 'next' always means "where to find the next
1592 # deltatext that you need this revision to retrieve.
1594 # That said, we don't *want* RCS's behavior here, so we determine
1595 # whether we're on trunk or a branch and set self.prev_rev
1596 # accordingly.
1598 # One last thing. Note that if REVISION is a branch revision,
1599 # instead of mapping REVISION to NEXT, we instead map NEXT to
1600 # REVISION. Since we loop over all revisions in the file before
1601 # doing anything with the data we gather here, this 'reverse
1602 # assignment' effectively does the following:
1604 # 1. Gives us no 'prev' value for REVISION (in this
1605 # iteration... it may have been set in a previous iteration)
1607 # 2. Sets the 'prev' value for the revision with number NEXT to
1608 # REVISION. So when we come around to the branch revision whose
1609 # revision value is NEXT, its 'prev' and 'prev_rev' are already
1610 # set.
1611 if trunk_rev.match(revision):
1612 self.prev_rev[revision] = next
1613 self.next_rev[next] = revision
1614 elif next:
1615 self.prev_rev[next] = revision
1616 self.next_rev[revision] = next
1618 for b in branches:
1619 self.prev_rev[b] = revision
1621 # Ratchet up the highest vendor head revision, if necessary.
1622 if self.default_branch:
1623 default_branch_root = self.default_branch + "."
1624 if ((revision.find(default_branch_root) == 0)
1625 and (default_branch_root.count('.') == revision.count('.'))):
1626 # This revision is on the default branch, so record that it is
1627 # the new highest default branch head revision.
1628 self.default_branches_db[self.rel_name] = revision
1629 else:
1630 # No default branch, so make an educated guess.
1631 if revision == '1.2':
1632 # This is probably the time when the file stopped having a
1633 # default branch, so make a note of it.
1634 self.first_non_vendor_revision_date = timestamp
1635 else:
1636 m = vendor_revision.match(revision)
1637 if m and ((not self.first_non_vendor_revision_date)
1638 or (timestamp < self.first_non_vendor_revision_date)):
1639 # We're looking at a vendor revision, and it wasn't
1640 # committed after this file lost its default branch, so bump
1641 # the maximum trunk vendor revision in the permanent record.
1642 self.default_branches_db[self.rel_name] = revision
1644 if not trunk_rev.match(revision):
1645 # Check for unlabeled branches, record them. We tried to collect
1646 # all branch names when we parsed the symbolic name header
1647 # earlier, of course, but that didn't catch unlabeled branches.
1648 # If a branch is unlabeled, this is our first encounter with it,
1649 # so we have to record its data now.
1650 branch_number = revision[:revision.rindex(".")]
1651 if not self.branch_names.has_key(branch_number):
1652 branch_name = "unlabeled-" + branch_number
1653 self.set_branch_name(branch_number, branch_name)
1655 # Register the commit on this non-trunk branch
1656 branch_name = self.branch_names[branch_number]
1657 self.symbol_db.register_branch_commit(branch_name)
1659 def tree_completed(self):
1660 "The revision tree has been parsed. Analyze it for consistency."
1662 # Our algorithm depends upon the timestamps on the revisions occuring
1663 # monotonically over time. That is, we want to see rev 1.34 occur in
1664 # time before rev 1.35. If we inserted 1.35 *first* (due to the time-
1665 # sorting), and then tried to insert 1.34, we'd be screwed.
1667 # to perform the analysis, we'll simply visit all of the 'previous'
1668 # links that we have recorded and validate that the timestamp on the
1669 # previous revision is before the specified revision
1671 # if we have to resync some nodes, then we restart the scan. just keep
1672 # looping as long as we need to restart.
1673 while 1:
1674 for current, prev in self.prev_rev.items():
1675 if not prev:
1676 # no previous revision exists (i.e. the initial revision)
1677 continue
1678 t_c = self.rev_data[current][0]
1679 t_p = self.rev_data[prev][0]
1680 if t_p >= t_c:
1681 # the previous revision occurred later than the current revision.
1682 # shove the previous revision back in time (and any before it that
1683 # may need to shift).
1685 # We sync backwards and not forwards because any given CVS
1686 # Revision has only one previous revision. However, a CVS
1687 # Revision can *be* a previous revision for many other
1688 # revisions (e.g., a revision that is the source of multiple
1689 # branches). This becomes relevant when we do the secondary
1690 # synchronization in pass 2--we can make certain that we
1691 # don't resync a revision earlier than it's previous
1692 # revision, but it would be non-trivial to make sure that we
1693 # don't resync revision R *after* any revisions that have R
1694 # as a previous revision.
1695 while t_p >= t_c:
1696 self.rev_data[prev][0] = t_c - 1 # new timestamp
1697 self.rev_data[prev][2] = t_p # old timestamp
1698 delta = t_c - 1 - t_p
1699 msg = "PASS1 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
1700 % (self.rel_name, prev, time.ctime(t_p), delta)
1701 Log().write(LOG_VERBOSE, msg)
1702 if (delta > COMMIT_THRESHOLD
1703 or delta < (COMMIT_THRESHOLD * -1)):
1704 str = "%s: Significant timestamp change for '%s' (%d seconds)"
1705 Log().write(LOG_WARN,
1706 str % (warning_prefix, self.rel_name, delta))
1707 current = prev
1708 prev = self.prev_rev[current]
1709 if not prev:
1710 break
1711 t_c = t_c - 1 # self.rev_data[current][0]
1712 t_p = self.rev_data[prev][0]
1714 # break from the for-loop
1715 break
1716 else:
1717 # finished the for-loop (no resyncing was performed)
1718 return
1720 def set_revision_info(self, revision, log, text):
1721 timestamp, author, old_ts = self.rev_data[revision]
1722 digest = sha.new(log + '\0' + author).hexdigest()
1723 if old_ts:
1724 # the timestamp on this revision was changed. log it for later
1725 # resynchronization of other files's revisions that occurred
1726 # for this time and log message.
1727 self.resync.write('%08lx %s %08lx\n' % (old_ts, digest, timestamp))
1729 # "...Give back one kadam to honor the Hebrew God whose Ark this is."
1730 # -- Imam to Indy and Sallah, in 'Raiders of the Lost Ark'
1732 # If revision 1.1 appears to have been created via 'cvs add'
1733 # instead of 'cvs import', then this file probably never had a
1734 # default branch, so retroactively remove its record in the
1735 # default branches db. The test is that the log message CVS uses
1736 # for 1.1 in imports is "Initial revision\n" with no period.
1737 if revision == '1.1' and log != 'Initial revision\n':
1738 try:
1739 del self.default_branches_db[self.rel_name]
1740 except KeyError:
1741 pass
1743 # Get the timestamps of the previous and next revisions
1744 prev_rev = self.prev_rev[revision]
1745 prev_timestamp, ign, ign = self.rev_data.get(prev_rev, [0, None, None])
1747 next_rev = self.next_rev.get(revision)
1748 next_timestamp, ign, ign = self.rev_data.get(next_rev, [0, None, None])
1750 # How to tell if a CVSRevision is an add, a change, or a deletion:
1752 # It's a delete if RCS state is 'dead'
1754 # It's an add if RCS state is 'Exp.' and
1755 # - we either have no previous revision
1756 # or
1757 # - we have a previous revision whose state is 'dead'
1759 # Anything else is a change.
1760 if self.rev_state[revision] == 'dead':
1761 op = OP_DELETE
1762 elif ((self.prev_rev.get(revision, None) is None)
1763 or (self.rev_state[self.prev_rev[revision]] == 'dead')):
1764 op = OP_ADD
1765 else:
1766 op = OP_CHANGE
1768 def is_branch_revision(rev):
1769 """Return True if this revision is not a trunk revision,
1770 else return False."""
1771 if rev.count('.') >= 3:
1772 return True
1773 return False
1775 def is_same_line_of_development(rev1, rev2):
1776 """Return True if rev1 and rev2 are on the same line of
1777 development (i.e., both on trunk, or both on the same branch);
1778 return False otherwise. Either rev1 or rev2 can be None, in
1779 which case automatically return False."""
1780 if rev1 is None or rev2 is None:
1781 return False
1782 if rev1.count('.') == 1 and rev2.count('.') == 1:
1783 return True
1784 if rev1[0:rev1.rfind('.')] == rev2[0:rev2.rfind('.')]:
1785 return True
1786 return False
1788 # There can be an odd situation where the tip revision of a branch
1789 # is alive, but every predecessor on the branch is in state 'dead',
1790 # yet the revision from which the branch sprouts is alive. (This
1791 # is sort of a mirror image of the more common case of adding a
1792 # file on a branch, in which the first revision on the branch is
1793 # alive while the revision from which it sprouts is dead.)
1795 # In this odd situation, we must mark the first live revision on
1796 # the branch as an OP_CHANGE instead of an OP_ADD, because it
1797 # reflects, however indirectly, a change w.r.t. the source
1798 # revision from which the branch sprouts.
1800 # This is issue #89.
1801 cur_num = revision
1802 if is_branch_revision(revision) and self.rev_state[revision] != 'dead':
1803 while 1:
1804 prev_num = self.prev_rev.get(cur_num, None)
1805 if not cur_num or not prev_num:
1806 break
1807 if (not is_same_line_of_development(cur_num, prev_num)
1808 and self.rev_state[cur_num] == 'dead'
1809 and self.rev_state[prev_num] != 'dead'):
1810 op = OP_CHANGE
1811 cur_num = self.prev_rev.get(cur_num, None)
1813 if text:
1814 deltatext_code = DELTATEXT_NONEMPTY
1815 else:
1816 deltatext_code = DELTATEXT_EMPTY
1818 c_rev = CVSRevision(Ctx(), timestamp, digest, prev_timestamp,
1819 next_timestamp, op,
1820 prev_rev, revision, next_rev,
1821 self.file_in_attic, self.file_executable,
1822 self.file_size,
1823 deltatext_code, self.fname,
1824 self.mode, self.rev_to_branch_name(revision),
1825 self.taglist.get(revision, []),
1826 self.branchlist.get(revision, []))
1827 self.revs.write(str(c_rev) + "\n")
1828 StatsKeeper().record_c_rev(c_rev)
1830 if not self.metadata_db.has_key(digest):
1831 self.metadata_db[digest] = (author, log)
1833 def parse_completed(self):
1834 # Walk through all branches and tags and register them with
1835 # their parent branch in the symbol database.
1836 for revision, symbols in self.taglist.items() + self.branchlist.items():
1837 for symbol in symbols:
1838 name = self.rev_to_branch_name(revision)
1839 if name is not None:
1840 self.symbol_db.register_branch_blocker(name, symbol)
1842 self.num_files = self.num_files + 1
1844 def write_symbol_db(self):
1845 self.symbol_db.write()
1847 class SymbolingsLogger:
1848 """Manage the file that contains lines for symbol openings and
1849 closings.
1851 This data will later be used to determine valid SVNRevision ranges
1852 from which a file can be copied when creating a branch or tag in
1853 Subversion. Do this by finding "Openings" and "Closings" for each
1854 file copied onto a branch or tag.
1856 An "Opening" is the CVSRevision from which a given branch/tag
1857 sprouts on a path.
1859 The "Closing" for that branch/tag and path is the next CVSRevision
1860 on the same line of development as the opening.
1862 For example, on file 'foo.c', branch BEE has branch number 1.2.2 and
1863 obviously sprouts from revision 1.2. Therefore, 1.2 is the opening
1864 for BEE on path 'foo.c', and 1.3 is the closing for BEE on path
1865 'foo.c'. Note that there may be many revisions chronologically
1866 between 1.2 and 1.3, for example, revisions on branches of 'foo.c',
1867 perhaps even including on branch BEE itself. But 1.3 is the next
1868 revision *on the same line* as 1.2, that is why it is the closing
1869 revision for those symbolic names of which 1.2 is the opening.
1871 The reason for doing all this hullabaloo is to make branch and tag
1872 creation as efficient as possible by minimizing the number of copies
1873 and deletes per creation. For example, revisions 1.2 and 1.3 of
1874 foo.c might correspond to revisions 17 and 30 in Subversion. That
1875 means that when creating branch BEE, there is some motivation to do
1876 the copy from one of 17-30. Now if there were another file,
1877 'bar.c', whose opening and closing CVSRevisions for BEE corresponded
1878 to revisions 24 and 39 in Subversion, we would know that the ideal
1879 thing would be to copy the branch from somewhere between 24 and 29,
1880 inclusive.
1882 def __init__(self):
1883 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS), 'w')
1884 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS), pass6)
1885 self.closings = open(temp(SYMBOL_CLOSINGS_TMP), 'w')
1886 Cleanup().register(temp(SYMBOL_CLOSINGS_TMP), pass5)
1888 # This keys of this dictionary are *source* cvs_paths for which
1889 # we've encountered an 'opening' on the default branch. The
1890 # values are the (uncleaned) symbolic names that this path has
1891 # opened.
1892 self.open_paths_with_default_branches = { }
1894 def log_revision(self, c_rev, svn_revnum):
1895 """Log any openings found in C_REV, and if C_REV.next_rev is not
1896 None, a closing. The opening uses SVN_REVNUM, but the closing (if
1897 any) will have its revnum determined later."""
1898 for name in c_rev.tags + c_rev.branches:
1899 self._note_default_branch_opening(c_rev, name)
1900 if c_rev.op != OP_DELETE:
1901 self._log(name, svn_revnum,
1902 c_rev.cvs_path, c_rev.branch_name, OPENING)
1904 # If our c_rev has a next_rev, then that's the closing rev for
1905 # this source revision. Log it to closings for later processing
1906 # since we don't know the svn_revnum yet.
1907 if c_rev.next_rev is not None:
1908 self.closings.write('%s %s\n' %
1909 (name, c_rev.unique_key(c_rev.next_rev)))
1911 def _log(self, name, svn_revnum, cvs_path, branch_name, type):
1912 """Write out a single line to the symbol_openings_closings file
1913 representing that SVN_REVNUM of SVN_PATH on BRANCH_NAME is either the
1914 opening or closing (TYPE) of NAME (a symbolic name).
1916 TYPE should only be one of the following global constants:
1917 OPENING or CLOSING."""
1918 # 8 places gives us 999,999,999 SVN revs. That *should* be enough.
1919 self.symbolings.write(
1920 '%s %.8d %s %s %s\n'
1921 % (name, svn_revnum, type, branch_name or '*', cvs_path))
1923 def close(self):
1924 """Iterate through the closings file, lookup the svn_revnum for
1925 each closing CVSRevision, and write a proper line out to the
1926 symbolings file."""
1927 # Use this to get the c_rev of our rev_key
1928 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_READ)
1930 self.closings.close()
1931 for line in fileinput.FileInput(temp(SYMBOL_CLOSINGS_TMP)):
1932 (name, rev_key) = line.rstrip().split(" ", 1)
1933 svn_revnum = Ctx()._persistence_manager.get_svn_revnum(rev_key)
1935 c_rev = cvs_revs_db.get_revision(rev_key)
1936 self._log(name, svn_revnum, c_rev.cvs_path, c_rev.branch_name, CLOSING)
1938 self.symbolings.close()
1940 def _note_default_branch_opening(self, c_rev, symbolic_name):
1941 """If C_REV is a default branch revision, log C_REV.cvs_path as an
1942 opening for SYMBOLIC_NAME."""
1943 self.open_paths_with_default_branches.setdefault(
1944 c_rev.cvs_path, []).append(symbolic_name)
1946 def log_default_branch_closing(self, c_rev, svn_revnum):
1947 """If self.open_paths_with_default_branches contains
1948 C_REV.cvs_path, then call log each name in
1949 self.open_paths_with_default_branches[C_REV.cvs_path] as a closing
1950 with SVN_REVNUM as the closing revision number."""
1951 path = c_rev.cvs_path
1952 if self.open_paths_with_default_branches.has_key(path):
1953 # log each symbol as a closing
1954 for name in self.open_paths_with_default_branches[path]:
1955 self._log(name, svn_revnum, path, None, CLOSING)
1956 # Remove them from the openings list as we're done with them.
1957 del self.open_paths_with_default_branches[path]
1960 class PersistenceManager:
1961 """The PersistenceManager allows us to effectively store SVNCommits
1962 to disk and retrieve them later using only their subversion revision
1963 number as the key. It also returns the subversion revision number
1964 for a given CVSRevision's unique key.
1966 All information pertinent to each SVNCommit is stored in a series of
1967 on-disk databases so that SVNCommits can be retrieved on-demand.
1969 MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
1970 In 'new' mode, PersistenceManager will initialize a new set of on-disk
1971 databases and be fully-featured.
1972 In 'read' mode, PersistenceManager will open existing on-disk databases
1973 and the set_* methods will be unavailable."""
1974 def __init__(self, mode):
1975 self.mode = mode
1976 if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
1977 raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
1978 self.svn2cvs_db = Database(temp(SVN_REVNUMS_TO_CVS_REVS), mode)
1979 Cleanup().register(temp(SVN_REVNUMS_TO_CVS_REVS), pass8)
1980 self.cvs2svn_db = Database(temp(CVS_REVS_TO_SVN_REVNUMS), mode)
1981 Cleanup().register(temp(CVS_REVS_TO_SVN_REVNUMS), pass8)
1982 self.svn_commit_names_dates = Database(temp(SVN_COMMIT_NAMES_DATES), mode)
1983 Cleanup().register(temp(SVN_COMMIT_NAMES_DATES), pass8)
1984 self.svn_commit_metadata = Database(temp(METADATA_DB), DB_OPEN_READ)
1985 self.cvs_revisions = CVSRevisionDatabase(DB_OPEN_READ)
1986 ###PERF kff Elsewhere there are comments about sucking the tags db
1987 ### into memory. That seems like a good idea.
1988 if not Ctx().trunk_only:
1989 self.tags_db = TagsDatabase(DB_OPEN_READ)
1990 self.motivating_revnums = SDatabase(temp(MOTIVATING_REVNUMS), mode)
1991 Cleanup().register(temp(MOTIVATING_REVNUMS), pass8)
1993 # "branch_name" -> svn_revnum in which branch was last filled.
1994 # This is used by CVSCommit._pre_commit, to prevent creating a fill
1995 # revision which would have nothing to do.
1996 self.last_filled = {}
1998 def get_svn_revnum(self, cvs_rev_unique_key):
1999 """Return the Subversion revision number in which
2000 CVS_REV_UNIQUE_KEY was committed, or SVN_INVALID_REVNUM if there
2001 is no mapping for CVS_REV_UNIQUE_KEY."""
2002 return int(self.cvs2svn_db.get(cvs_rev_unique_key, SVN_INVALID_REVNUM))
2004 def get_svn_commit(self, svn_revnum):
2005 """Return an SVNCommit that corresponds to SVN_REVNUM.
2007 If no SVNCommit exists for revnum SVN_REVNUM, then return None.
2009 This method can throw SVNCommitInternalInconsistencyError.
2011 svn_commit = SVNCommit("Retrieved from disk", svn_revnum)
2012 c_rev_keys = self.svn2cvs_db.get(str(svn_revnum), None)
2013 if c_rev_keys == None:
2014 return None
2016 digest = None
2017 for key in c_rev_keys:
2018 c_rev = self.cvs_revisions.get_revision(key)
2019 svn_commit.add_revision(c_rev)
2020 # Set the author and log message for this commit by using
2021 # CVSRevision metadata, but only if haven't done so already.
2022 if digest is None:
2023 digest = c_rev.digest
2024 author, log_msg = self.svn_commit_metadata[digest]
2025 svn_commit.set_author(author)
2026 svn_commit.set_log_msg(log_msg)
2028 # If we're doing a trunk-only conversion, we don't need to do any more
2029 # work.
2030 if Ctx().trunk_only:
2031 return svn_commit
2033 name, date = self._get_name_and_date(svn_revnum)
2034 if name:
2035 svn_commit.set_symbolic_name(name)
2036 svn_commit.set_date(date)
2037 if self.tags_db.has_key(name):
2038 svn_commit.is_tag = 1
2040 motivating_revnum = self.motivating_revnums.get(str(svn_revnum), None)
2041 if motivating_revnum:
2042 svn_commit.set_motivating_revnum(int(motivating_revnum))
2043 svn_commit.set_date(date)
2045 if len(svn_commit.cvs_revs) and name:
2046 raise SVNCommit.SVNCommitInternalInconsistencyError(
2047 "An SVNCommit cannot have cvs_revisions *and* a corresponding\n"
2048 "symbolic name ('%s') to fill."
2049 % (_clean_symbolic_name(name),))
2051 return svn_commit
2053 def set_cvs_revs(self, svn_revnum, cvs_revs):
2054 """Record the bidirectional mapping between SVN_REVNUM and
2055 CVS_REVS."""
2056 if self.mode == DB_OPEN_READ:
2057 raise RuntimeError, \
2058 'Write operation attempted on read-only PersistenceManager'
2059 for c_rev in cvs_revs:
2060 Log().write(LOG_VERBOSE, " ", c_rev.unique_key())
2061 self.svn2cvs_db[str(svn_revnum)] = [x.unique_key() for x in cvs_revs]
2062 for c_rev in cvs_revs:
2063 self.cvs2svn_db[c_rev.unique_key()] = svn_revnum
2065 def set_name_and_date(self, svn_revnum, name, date):
2066 """Associate symbolic name NAME and DATE with SVN_REVNUM.
2068 NAME is allowed to be None."""
2070 if self.mode == DB_OPEN_READ:
2071 raise RuntimeError, \
2072 'Write operation attempted on read-only PersistenceManager'
2073 self.svn_commit_names_dates[str(svn_revnum)] = (name, date)
2074 self.last_filled[name] = svn_revnum
2076 def _get_name_and_date(self, svn_revnum):
2077 """Return a tuple containing the symbolic name and date associated
2078 with SVN_REVNUM, or (None, None) if SVN_REVNUM has no such data
2079 associated with it."""
2080 return self.svn_commit_names_dates.get(str(svn_revnum), (None, None))
2082 def set_motivating_revnum(self, svn_revnum, motivating_revnum):
2083 """Store MOTIVATING_REVNUM as the value of SVN_REVNUM"""
2084 if self.mode == DB_OPEN_READ:
2085 raise RuntimeError, \
2086 'Write operation attempted on read-only PersistenceManager'
2087 self.motivating_revnums[str(svn_revnum)] = str(motivating_revnum)
2090 class CVSCommit:
2091 """Each instance of this class contains a number of CVS Revisions
2092 that correspond to one or more Subversion Commits. After all CVS
2093 Revisions are added to the grouping, calling process_revisions will
2094 generate a Subversion Commit (or Commits) for the set of CVS
2095 Revisions in the grouping."""
2097 def __init__(self, digest, author, log):
2098 self.digest = digest
2099 self.author = author
2100 self.log = log
2102 # Symbolic names for which the last source revision has already
2103 # been seen and for which the CVSRevisionAggregator has already
2104 # generated a fill SVNCommit. See self.process_revisions().
2105 self.done_symbols = [ ]
2107 self.files = { }
2108 # Lists of CVSRevisions
2109 self.changes = [ ]
2110 self.deletes = [ ]
2112 # Start out with a t_min higher than any incoming time T, and a
2113 # t_max lower than any incoming T. This way the first T will
2114 # push t_min down to T, and t_max up to T, naturally (without any
2115 # special-casing), and successive times will then ratchet them
2116 # outward as appropriate.
2117 self.t_min = 1L<<32
2118 self.t_max = 0
2120 # This will be set to the SVNCommit that occurs in self._commit.
2121 self.motivating_commit = None
2123 # This is a list of all non-primary commits motivated by the main
2124 # commit. We gather these so that we can set their dates to the
2125 # same date as the primary commit.
2126 self.secondary_commits = [ ]
2128 # State for handling default branches.
2130 # Here is a tempting, but ultimately nugatory, bit of logic, which
2131 # I share with you so you may appreciate the less attractive, but
2132 # refreshingly non-nugatory, logic which follows it:
2134 # If some of the commits in this txn happened on a non-trunk
2135 # default branch, then those files will have to be copied into
2136 # trunk manually after being changed on the branch (because the
2137 # RCS "default branch" appears as head, i.e., trunk, in practice).
2138 # As long as those copies don't overwrite any trunk paths that
2139 # were also changed in this commit, then we can do the copies in
2140 # the same revision, because they won't cover changes that don't
2141 # appear anywhere/anywhen else. However, if some of the trunk dst
2142 # paths *did* change in this commit, then immediately copying the
2143 # branch changes would lose those trunk mods forever. So in this
2144 # case, we need to do at least that copy in its own revision. And
2145 # for simplicity's sake, if we're creating the new revision for
2146 # even one file, then we just do all such copies together in the
2147 # new revision.
2149 # Doesn't that sound nice?
2151 # Unfortunately, Subversion doesn't support copies with sources
2152 # in the current txn. All copies must be based in committed
2153 # revisions. Therefore, we generate the above-described new
2154 # revision unconditionally.
2156 # This is a list of c_revs, and a c_rev is appended for each
2157 # default branch commit that will need to be copied to trunk (or
2158 # deleted from trunk) in some generated revision following the
2159 # "regular" revision.
2160 self.default_branch_cvs_revisions = [ ]
2162 def __cmp__(self, other):
2163 # Commits should be sorted by t_max. If both self and other have
2164 # the same t_max, break the tie using t_min, and lastly, digest
2165 return (cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
2166 or cmp(self.digest, other.digest))
2168 def has_file(self, fname):
2169 return self.files.has_key(fname)
2171 def revisions(self):
2172 return self.changes + self.deletes
2174 def opens_symbolic_name(self, name):
2175 """Returns true if any CVSRevision in this commit is on a tag or a
2176 branch or is the origin of a tag or branch."""
2177 for c_rev in self.revisions():
2178 if c_rev.opens_symbolic_name(name):
2179 return 1
2180 return 0
2182 def add_revision(self, c_rev):
2183 # Record the time range of this commit.
2185 # ### ISSUE: It's possible, though unlikely, that the time range
2186 # of a commit could get gradually expanded to be arbitrarily
2187 # longer than COMMIT_THRESHOLD. I'm not sure this is a huge
2188 # problem, and anyway deciding where to break it up would be a
2189 # judgement call. For now, we just print a warning in commit() if
2190 # this happens.
2191 if c_rev.timestamp < self.t_min:
2192 self.t_min = c_rev.timestamp
2193 if c_rev.timestamp > self.t_max:
2194 self.t_max = c_rev.timestamp
2196 if c_rev.op == OP_DELETE:
2197 self.deletes.append(c_rev)
2198 else:
2199 # OP_CHANGE or OP_ADD
2200 self.changes.append(c_rev)
2202 self.files[c_rev.fname] = 1
2204 def _pre_commit(self):
2205 """Generates any SVNCommits that must exist before the main
2206 commit."""
2208 # There may be multiple c_revs in this commit that would cause
2209 # branch B to be filled, but we only want to fill B once. On the
2210 # other hand, there might be multiple branches committed on in
2211 # this commit. Whatever the case, we should count exactly one
2212 # commit per branch, because we only fill a branch once per
2213 # CVSCommit. This list tracks which branches we've already
2214 # counted.
2215 accounted_for_sym_names = [ ]
2217 def fill_needed(c_rev, pm):
2218 """Return 1 if this is the first commit on a new branch (for
2219 this file) and we need to fill the branch; else return 0
2220 (meaning that some other file's first commit on the branch has
2221 already done the fill for us).
2223 If C_REV.op is OP_ADD, only return 1 if the branch that this
2224 commit is on has no last filled revision.
2226 PM is a PersistenceManager to query.
2229 # Different '.' counts indicate that c_rev is now on a different
2230 # line of development (and may need a fill)
2231 if c_rev.rev.count('.') != c_rev.prev_rev.count('.'):
2232 svn_revnum = pm.get_svn_revnum(c_rev.unique_key(c_rev.prev_rev))
2233 # It should be the case that when we have a file F that
2234 # is added on branch B (thus, F on trunk is in state
2235 # 'dead'), we generate an SVNCommit to fill B iff the branch
2236 # has never been filled before.
2238 # If this c_rev.op == OP_ADD, *and* the branch has never
2239 # been filled before, then fill it now. Otherwise, no need to
2240 # fill it.
2241 if c_rev.op == OP_ADD:
2242 if pm.last_filled.get(c_rev.branch_name, None) is None:
2243 return 1
2244 elif c_rev.op == OP_CHANGE:
2245 if svn_revnum > pm.last_filled.get(c_rev.branch_name, 0):
2246 return 1
2247 elif c_rev.op == OP_DELETE:
2248 if pm.last_filled.get(c_rev.branch_name, None) is None:
2249 return 1
2250 return 0
2252 for c_rev in self.changes + self.deletes:
2253 # If a commit is on a branch, we must ensure that the branch
2254 # path being committed exists (in HEAD of the Subversion
2255 # repository). If it doesn't exist, we will need to fill the
2256 # branch. After the fill, the path on which we're committing
2257 # will exist.
2258 if c_rev.branch_name \
2259 and c_rev.branch_name not in accounted_for_sym_names \
2260 and c_rev.branch_name not in self.done_symbols \
2261 and fill_needed(c_rev, Ctx()._persistence_manager):
2262 svn_commit = SVNCommit("pre-commit symbolic name '%s'"
2263 % c_rev.branch_name)
2264 svn_commit.set_symbolic_name(c_rev.branch_name)
2265 self.secondary_commits.append(svn_commit)
2266 accounted_for_sym_names.append(c_rev.branch_name)
2268 def _commit(self):
2269 """Generates the primary SVNCommit that corresponds to this
2270 CVSCommit."""
2271 # Generate an SVNCommit unconditionally. Even if the only change
2272 # in this CVSCommit is a deletion of an already-deleted file (that
2273 # is, a CVS revision in state 'dead' whose predecessor was also in
2274 # state 'dead'), the conversion will still generate a Subversion
2275 # revision containing the log message for the second dead
2276 # revision, because we don't want to lose that information.
2277 svn_commit = SVNCommit("commit")
2278 self.motivating_commit = svn_commit
2280 for c_rev in self.changes:
2281 svn_commit.add_revision(c_rev)
2282 # Only make a change if we need to. When 1.1.1.1 has an empty
2283 # deltatext, the explanation is almost always that we're looking
2284 # at an imported file whose 1.1 and 1.1.1.1 are identical. On
2285 # such imports, CVS creates an RCS file where 1.1 has the
2286 # content, and 1.1.1.1 has an empty deltatext, i.e, the same
2287 # content as 1.1. There's no reason to reflect this non-change
2288 # in the repository, so we want to do nothing in this case. (If
2289 # we were really paranoid, we could make sure 1.1's log message
2290 # is the CVS-generated "Initial revision\n", but I think the
2291 # conditions below are strict enough.)
2292 if not ((c_rev.deltatext_code == DELTATEXT_EMPTY)
2293 and (c_rev.rev == "1.1.1.1")):
2294 if c_rev.is_default_branch_revision():
2295 self.default_branch_cvs_revisions.append(c_rev)
2297 for c_rev in self.deletes:
2298 # When a file is added on a branch, CVS not only adds the file
2299 # on the branch, but generates a trunk revision (typically
2300 # 1.1) for that file in state 'dead'. We only want to add
2301 # this revision if the log message is not the standard cvs
2302 # fabricated log message.
2303 if c_rev.prev_rev is None:
2304 # c_rev.branches may be empty if the originating branch
2305 # has been excluded.
2306 if not c_rev.branches:
2307 continue
2308 cvs_generated_msg = ('file %s was initially added on branch %s.\n'
2309 % (c_rev.filename(),
2310 c_rev.branches[0]))
2311 author, log_msg = \
2312 Ctx()._persistence_manager.svn_commit_metadata[c_rev.digest]
2313 if log_msg == cvs_generated_msg:
2314 continue
2316 svn_commit.add_revision(c_rev)
2317 if c_rev.is_default_branch_revision():
2318 self.default_branch_cvs_revisions.append(c_rev)
2320 # There is a slight chance that we didn't actually register any
2321 # CVSRevisions with our SVNCommit (see loop over self.deletes
2322 # above), so if we have no CVSRevisions, we don't flush the
2323 # svn_commit to disk and roll back our revnum.
2324 if len(svn_commit.cvs_revs) > 0:
2325 svn_commit.flush()
2326 else:
2327 # We will not be flushing this SVNCommit, so rollback the
2328 # SVNCommit revision counter.
2329 SVNCommit.revnum = SVNCommit.revnum - 1
2331 if not Ctx().trunk_only:
2332 for c_rev in self.revisions():
2333 Ctx()._symbolings_logger.log_revision(c_rev, svn_commit.revnum)
2335 def _post_commit(self):
2336 """Generates any SVNCommits that we can perform now that _commit
2337 has happened. That is, handle non-trunk default branches.
2338 Sometimes an RCS file has a non-trunk default branch, so a commit
2339 on that default branch would be visible in a default CVS checkout
2340 of HEAD. If we don't copy that commit over to Subversion's trunk,
2341 then there will be no Subversion tree which corresponds to that
2342 CVS checkout. Of course, in order to copy the path over, we may
2343 first need to delete the existing trunk there. """
2345 # Only generate a commit if we have default branch revs
2346 if len(self.default_branch_cvs_revisions):
2347 # Generate an SVNCommit for all of our default branch c_revs.
2348 svn_commit = SVNCommit("post-commit default branch(es)")
2349 svn_commit.set_motivating_revnum(self.motivating_commit.revnum)
2350 for c_rev in self.default_branch_cvs_revisions:
2351 svn_commit.add_revision(c_rev)
2352 Ctx()._symbolings_logger.log_default_branch_closing(c_rev,
2353 svn_commit.revnum)
2354 self.secondary_commits.append(svn_commit)
2356 def process_revisions(self, done_symbols):
2357 """Process all the CVSRevisions that this instance has, creating
2358 one or more SVNCommits in the process. Generate fill SVNCommits
2359 only for symbols not in DONE_SYMBOLS (avoids unnecessary
2360 fills).
2362 Return the primary SVNCommit that corresponds to this CVSCommit.
2363 The returned SVNCommit is the commit that motivated any other
2364 SVNCommits generated in this CVSCommit."""
2365 self.done_symbols = done_symbols
2366 seconds = self.t_max - self.t_min + 1
2368 Log().write(LOG_VERBOSE, '-' * 60)
2369 Log().write(LOG_VERBOSE, 'CVS Revision grouping:')
2370 if seconds == 1:
2371 Log().write(LOG_VERBOSE, ' Start time: %s (duration: 1 second)'
2372 % time.ctime(self.t_max))
2373 else:
2374 Log().write(LOG_VERBOSE, ' Start time: %s' % time.ctime(self.t_min))
2375 Log().write(LOG_VERBOSE, ' End time: %s (duration: %d seconds)'
2376 % (time.ctime(self.t_max), seconds))
2378 if seconds > COMMIT_THRESHOLD + 1:
2379 Log().write(LOG_WARN, '%s: grouping spans more than %d seconds'
2380 % (warning_prefix, COMMIT_THRESHOLD))
2382 if Ctx().trunk_only: # Only do the primary commit if we're trunk-only
2383 self._commit()
2384 return self.motivating_commit
2386 self._pre_commit()
2387 self._commit()
2388 self._post_commit()
2390 for svn_commit in self.secondary_commits:
2391 svn_commit.set_date(self.motivating_commit.get_date())
2392 svn_commit.flush()
2394 return self.motivating_commit
2397 class SVNCommit:
2398 """This represents one commit to the Subversion Repository. There
2399 are three types of SVNCommits:
2401 1. Commits one or more CVSRevisions (cannot fill a symbolic name).
2403 2. Creates or fills a symbolic name (cannot commit CVSRevisions).
2405 3. Updates trunk to reflect the contents of a particular branch
2406 (this is to handle RCS default branches)."""
2408 # The revision number to assign to the next new SVNCommit.
2409 # We start at 2 because SVNRepositoryMirror uses the first commit
2410 # to create trunk, tags, and branches.
2411 revnum = 2
2413 class SVNCommitInternalInconsistencyError(Exception):
2414 """Exception raised if we encounter an impossible state in the
2415 SVNCommit Databases."""
2416 pass
2418 def __init__(self, description="", revnum=None, cvs_revs=None):
2419 """Instantiate an SVNCommit. DESCRIPTION is for debugging only.
2420 If REVNUM, the SVNCommit will correspond to that revision number;
2421 and if CVS_REVS, then they must be the exact set of CVSRevisions for
2422 REVNUM.
2424 It is an error to pass CVS_REVS without REVNUM, but you may pass
2425 REVNUM without CVS_REVS, and then add a revision at a time by
2426 invoking add_revision()."""
2427 self._description = description
2429 # Revprop metadata for this commit.
2431 # These initial values are placeholders. At least the log and the
2432 # date should be different by the time these are used.
2434 # They are private because their values should be returned encoded
2435 # in UTF8, but callers aren't required to set them in UTF8.
2436 # Therefore, accessor methods are used to set them, and
2437 # self.get_revprops() is used to to get them, in dictionary form.
2438 self._author = Ctx().username
2439 self._log_msg = "This log message means an SVNCommit was used too soon."
2440 self._max_date = 0 # Latest date seen so far.
2442 self.cvs_revs = cvs_revs or []
2443 if revnum:
2444 self.revnum = revnum
2445 else:
2446 self.revnum = SVNCommit.revnum
2447 SVNCommit.revnum = SVNCommit.revnum + 1
2449 # The (uncleaned) symbolic name that is filled in this SVNCommit, if any.
2450 self.symbolic_name = None
2452 # If this commit is a default branch synchronization, this
2453 # variable represents the subversion revision number of the
2454 # *primary* commit where the default branch changes actually
2455 # happened. It is None otherwise.
2457 # It is possible for multiple synchronization commits to refer to
2458 # the same motivating commit revision number, and it is possible
2459 # for a single synchronization commit to contain CVSRevisions on
2460 # multiple different default branches.
2461 self.motivating_revnum = None
2463 # is_tag is true only if this commit is a fill of a symbolic name
2464 # that is a tag, None in all other cases.
2465 self.is_tag = None
2467 def set_symbolic_name(self, symbolic_name):
2468 "Set self.symbolic_name to SYMBOLIC_NAME."
2469 self.symbolic_name = symbolic_name
2471 def set_motivating_revnum(self, revnum):
2472 "Set self.motivating_revnum to REVNUM."
2473 self.motivating_revnum = revnum
2475 def set_author(self, author):
2476 """Set this SVNCommit's author to AUTHOR (a locally-encoded string).
2477 This is the only way to set an SVNCommit's author."""
2478 self._author = author
2480 def set_log_msg(self, msg):
2481 """Set this SVNCommit's log message to MSG (a locally-encoded string).
2482 This is the only way to set an SVNCommit's log message."""
2483 self._log_msg = msg
2485 def set_date(self, date):
2486 """Set this SVNCommit's date to DATE (an integer).
2487 Note that self.add_revision() updates this automatically based on
2488 a CVSRevision; so you may not need to call this at all, and even
2489 if you do, the value may be overwritten by a later call to
2490 self.add_revision()."""
2491 self._max_date = date
2493 def get_date(self):
2494 """Returns this SVNCommit's date as an integer."""
2495 return self._max_date
2497 def get_revprops(self):
2498 """Return the Subversion revprops for this SVNCommit."""
2499 date = format_date(self._max_date)
2500 try:
2501 utf8_author = None
2502 if self._author is not None:
2503 utf8_author = to_utf8(self._author)
2504 utf8_log = to_utf8(self.get_log_msg())
2505 return { 'svn:author' : utf8_author,
2506 'svn:log' : utf8_log,
2507 'svn:date' : date }
2508 except UnicodeError:
2509 Log().write(LOG_WARN, '%s: problem encoding author or log message:'
2510 % warning_prefix)
2511 Log().write(LOG_WARN, " author: '%s'" % self._author)
2512 Log().write(LOG_WARN, " log: '%s'" % self.get_log_msg().rstrip())
2513 Log().write(LOG_WARN, " date: '%s'" % date)
2514 Log().write(LOG_WARN,
2515 "(subversion rev %s) Related files:" % self.revnum)
2516 for c_rev in self.cvs_revs:
2517 Log().write(LOG_WARN, " ", c_rev.fname)
2519 Log().write(LOG_WARN, "Consider rerunning with (for example)",
2520 "'--encoding=latin1'.\n")
2521 # It's better to fall back to the original (unknown encoding) data
2522 # than to either 1) quit or 2) record nothing at all.
2523 return { 'svn:author' : self._author,
2524 'svn:log' : self.get_log_msg(),
2525 'svn:date' : date }
2527 def add_revision(self, cvs_rev):
2528 self.cvs_revs.append(cvs_rev)
2529 if cvs_rev.timestamp > self._max_date:
2530 self._max_date = cvs_rev.timestamp
2532 def _is_primary_commit(self):
2533 """Return true if this is a primary SVNCommit, false otherwise."""
2534 return not (self.symbolic_name or self.motivating_revnum)
2536 def flush(self):
2537 Log().write(LOG_NORMAL, "Creating Subversion r%d (%s)"
2538 % (self.revnum, self._description))
2539 Ctx()._persistence_manager.set_cvs_revs(self.revnum, self.cvs_revs)
2541 if self.motivating_revnum is not None:
2542 Ctx()._persistence_manager.set_motivating_revnum(self.revnum,
2543 self.motivating_revnum)
2545 # If we're not a primary commit, then store our date and/or our
2546 # symbolic_name
2547 if not self._is_primary_commit():
2548 Ctx()._persistence_manager.set_name_and_date(
2549 self.revnum, self.symbolic_name, self._max_date)
2551 def __str__(self):
2552 """ Print a human-readable description of this SVNCommit. This
2553 description is not intended to be machine-parseable (although
2554 we're not going to stop you if you try!)"""
2556 ret = "SVNCommit #: " + str(self.revnum) + "\n"
2557 if self.symbolic_name:
2558 ret += (" symbolic name: " + _clean_symbolic_name(self.symbolic_name)
2559 + "\n")
2560 else:
2561 ret += " NO symbolic name\n"
2562 ret += " debug description: " + self._description + "\n"
2563 ret += " cvs_revs:\n"
2564 for c_rev in self.cvs_revs:
2565 ret += " " + c_rev.unique_key() + "\n"
2566 return ret
2568 def get_log_msg(self):
2569 """Returns the actual log message for a primary commit, and the
2570 appropriate manufactured log message for a secondary commit."""
2571 if self.symbolic_name is not None:
2572 return self._log_msg_for_symbolic_name_commit()
2573 elif self.motivating_revnum is not None:
2574 return self._log_msg_for_default_branch_commit()
2575 else:
2576 return self._log_msg
2578 def _log_msg_for_symbolic_name_commit(self):
2579 """Creates a log message for a manufactured commit that fills
2580 self.symbolic_name. If self.is_tag is true, write the log message
2581 as though for a tag, else write it as though for a branch."""
2582 type = 'branch'
2583 if self.is_tag:
2584 type = 'tag'
2586 # In Python 2.2.3, we could use textwrap.fill(). Oh well :-).
2587 space_or_newline = ' '
2588 cleaned_symbolic_name = _clean_symbolic_name(self.symbolic_name)
2589 if len(cleaned_symbolic_name) >= 13:
2590 space_or_newline = '\n'
2592 return "This commit was manufactured by cvs2svn to create %s%s'%s'." \
2593 % (type, space_or_newline, cleaned_symbolic_name)
2595 def _log_msg_for_default_branch_commit(self):
2596 """Creates a log message for a manufactured commit that
2597 synchronizes a non-trunk default branch with trunk."""
2598 msg = 'This commit was generated by cvs2svn to compensate for ' \
2599 'changes in r%d,\n' \
2600 'which included commits to RCS files with non-trunk default ' \
2601 'branches.\n' % self.motivating_revnum
2602 return msg
2604 class CVSRevisionAggregator:
2605 """This class groups CVSRevisions into CVSCommits that represent
2606 at least one SVNCommit."""
2607 def __init__(self):
2608 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_READ)
2609 if not Ctx().trunk_only:
2610 self.last_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB),
2611 DB_OPEN_READ)
2612 self.cvs_commits = {}
2613 self.pending_symbols = {}
2614 # A list of symbols for which we've already encountered the last
2615 # CVSRevision that is a source for that symbol. That is, the
2616 # final fill for this symbol has been done, and we never need to
2617 # fill it again.
2618 self.done_symbols = [ ]
2620 # This variable holds the most recently created primary svn_commit
2621 # object. CVSRevisionAggregator maintains this variable merely
2622 # for its date, so that it can set dates for the SVNCommits
2623 # created in self.attempt_to_commit_symbols().
2624 self.latest_primary_svn_commit = None
2626 Ctx()._symbolings_logger = SymbolingsLogger()
2627 Ctx()._persistence_manager = PersistenceManager(DB_OPEN_NEW)
2628 Ctx()._default_branches_db = SDatabase(temp(DEFAULT_BRANCHES_DB),
2629 DB_OPEN_READ)
2632 def process_revision(self, c_rev):
2633 # Each time we read a new line, we scan the commits we've
2634 # accumulated so far to see if any are ready for processing now.
2635 ready_queue = [ ]
2636 for digest_key, cvs_commit in self.cvs_commits.items():
2637 if cvs_commit.t_max + COMMIT_THRESHOLD < c_rev.timestamp:
2638 ready_queue.append(cvs_commit)
2639 del self.cvs_commits[digest_key]
2640 continue
2641 # If the inbound commit is on the same file as a pending commit,
2642 # close the pending commit to further changes. Don't flush it though,
2643 # as there may be other pending commits dated before this one.
2644 # ### ISSUE: the has_file() check below is not optimal.
2645 # It does fix the dataloss bug where revisions would get lost
2646 # if checked in too quickly, but it can also break apart the
2647 # commits. The correct fix would require tracking the dependencies
2648 # between change sets and committing them in proper order.
2649 if cvs_commit.has_file(c_rev.fname):
2650 unused_id = digest_key + '-'
2651 # Find a string that does is not already a key in
2652 # the self.cvs_commits dict
2653 while self.cvs_commits.has_key(unused_id):
2654 unused_id = unused_id + '-'
2655 self.cvs_commits[unused_id] = cvs_commit
2656 del self.cvs_commits[digest_key]
2658 # Add this item into the set of still-available commits.
2659 if self.cvs_commits.has_key(c_rev.digest):
2660 cvs_commit = self.cvs_commits[c_rev.digest]
2661 else:
2662 author, log = self.metadata_db[c_rev.digest]
2663 self.cvs_commits[c_rev.digest] = CVSCommit(c_rev.digest,
2664 author, log)
2665 cvs_commit = self.cvs_commits[c_rev.digest]
2666 cvs_commit.add_revision(c_rev)
2668 # If there are any elements in the ready_queue at this point, they
2669 # need to be processed, because this latest rev couldn't possibly
2670 # be part of any of them. Sort them into time-order, then process
2671 # 'em.
2672 ready_queue.sort()
2674 # Make sure we attempt_to_commit_symbols for this c_rev, even if no
2675 # commits are ready.
2676 if len(ready_queue) == 0:
2677 self.attempt_to_commit_symbols(ready_queue, c_rev)
2679 for cvs_commit in ready_queue[:]:
2680 self.latest_primary_svn_commit \
2681 = cvs_commit.process_revisions(self.done_symbols)
2682 ready_queue.remove(cvs_commit)
2683 self.attempt_to_commit_symbols(ready_queue, c_rev)
2685 def flush(self):
2686 """Commit anything left in self.cvs_commits. Then inform the
2687 SymbolingsLogger that all commits are done."""
2689 ready_queue = [ ]
2690 for k, v in self.cvs_commits.items():
2691 ready_queue.append((v, k))
2693 ready_queue.sort()
2694 for cvs_commit_tuple in ready_queue[:]:
2695 self.latest_primary_svn_commit = \
2696 cvs_commit_tuple[0].process_revisions(self.done_symbols)
2697 ready_queue.remove(cvs_commit_tuple)
2698 del self.cvs_commits[cvs_commit_tuple[1]]
2699 self.attempt_to_commit_symbols([])
2701 if not Ctx().trunk_only:
2702 Ctx()._symbolings_logger.close()
2704 def attempt_to_commit_symbols(self, queued_commits, c_rev=None):
2706 This function generates 1 SVNCommit for each symbol in
2707 self.pending_symbols that doesn't have an opening CVSRevision in
2708 either QUEUED_COMMITS or self.cvs_commits.values().
2710 If C_REV is not None, then we first add to self.pending_symbols
2711 any symbols from C_REV that C_REV is the last CVSRevision for.
2713 # If we're not doing a trunk-only conversion, get the symbolic
2714 # names that this c_rev is the last *source* CVSRevision for and
2715 # add them to those left over from previous passes through the
2716 # aggregator.
2717 if c_rev and not Ctx().trunk_only:
2718 for sym in self.last_revs_db.get(c_rev.unique_key(), []):
2719 self.pending_symbols[sym] = None
2721 # Make a list of all symbols that still have *source* CVSRevisions
2722 # in the pending commit queue (self.cvs_commits).
2723 open_symbols = {}
2724 for sym in self.pending_symbols.keys():
2725 for cvs_commit in self.cvs_commits.values() + queued_commits:
2726 if cvs_commit.opens_symbolic_name(sym):
2727 open_symbols[sym] = None
2728 break
2730 # Sort the pending symbols so that we will always process the
2731 # symbols in the same order, regardless of the order in which the
2732 # dict hashing algorithm hands them back to us. We do this so
2733 # that our tests will get the same results on all platforms.
2734 sorted_pending_symbols_keys = self.pending_symbols.keys()
2735 sorted_pending_symbols_keys.sort()
2736 for sym in sorted_pending_symbols_keys:
2737 if open_symbols.has_key(sym): # sym is still open--don't close it.
2738 continue
2739 svn_commit = SVNCommit("closing tag/branch '%s'" % sym)
2740 svn_commit.set_symbolic_name(sym)
2741 svn_commit.set_date(self.latest_primary_svn_commit.get_date())
2742 svn_commit.flush()
2743 self.done_symbols.append(sym)
2744 del self.pending_symbols[sym]
2747 class SymbolingsReader:
2748 """Provides an interface to the SYMBOL_OPENINGS_CLOSINGS_SORTED file
2749 and the SYMBOL_OFFSETS_DB. Does the heavy lifting of finding and
2750 returning the correct opening and closing Subversion revision
2751 numbers for a given symbolic name."""
2752 def __init__(self):
2753 """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and
2754 reads the offsets database into memory."""
2755 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
2756 # The offsets_db is really small, and we need to read and write
2757 # from it a fair bit, so suck it into memory
2758 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_READ)
2759 self.offsets = { }
2760 for key in offsets_db.db.keys():
2761 #print " ZOO:", key, offsets_db[key]
2762 self.offsets[key] = offsets_db[key]
2764 def filling_guide_for_symbol(self, symbolic_name, svn_revnum):
2765 """Given SYMBOLIC_NAME and SVN_REVNUM, return a new
2766 SymbolicNameFillingGuide object.
2768 Note that if we encounter an opening rev in this fill, but the
2769 corresponding closing rev takes place later than SVN_REVNUM, the
2770 closing will not be passed to SymbolicNameFillingGuide in this
2771 fill (and will be discarded when encountered in a later fill).
2772 This is perfectly fine, because we can still do a valid fill
2773 without the closing--we always try to fill what we can as soon as
2774 we can."""
2776 openings_closings_map = OpeningsClosingsMap(symbolic_name)
2778 # It's possible to have a branch start with a file that was added
2779 # on a branch
2780 if self.offsets.has_key(symbolic_name):
2781 # set our read offset for self.symbolings to the offset for
2782 # symbolic_name
2783 self.symbolings.seek(self.offsets[symbolic_name])
2785 while 1:
2786 fpos = self.symbolings.tell()
2787 line = self.symbolings.readline().rstrip()
2788 if not line:
2789 break
2790 name, revnum, type, branch_name, cvs_path = line.split(" ", 4)
2791 if branch_name == '*':
2792 svn_path = Ctx().project.make_trunk_path(cvs_path)
2793 else:
2794 svn_path = Ctx().project.make_branch_path(branch_name, cvs_path)
2795 revnum = int(revnum)
2796 if revnum > svn_revnum or name != symbolic_name:
2797 break
2798 openings_closings_map.register(svn_path, revnum, type)
2800 # get current offset of the read marker and set it to the offset
2801 # for the beginning of the line we just read if we used anything
2802 # we read.
2803 if not openings_closings_map.is_empty():
2804 self.offsets[symbolic_name] = fpos
2806 return SymbolicNameFillingGuide(openings_closings_map)
2809 class SvnRevisionRange:
2810 """The range of subversion revision numbers from which a path can be
2811 copied. self.opening_revnum is the number of the earliest such
2812 revision, and self.closing_revnum is one higher than the number of
2813 the last such revision. If self.closing_revnum is None, then no
2814 closings were registered."""
2816 def __init__(self, opening_revnum):
2817 self.opening_revnum = opening_revnum
2818 self.closing_revnum = None
2820 def add_closing(self, closing_revnum):
2821 # When we have a non-trunk default branch, we may have multiple
2822 # closings--only register the first closing we encounter.
2823 if self.closing_revnum is None:
2824 self.closing_revnum = closing_revnum
2826 def __str__(self):
2827 if self.closing_revnum is None:
2828 return '[%d:]' % (self.opening_revnum,)
2829 else:
2830 return '[%d:%d]' % (self.opening_revnum, self.closing_revnum,)
2833 class OpeningsClosingsMap:
2834 """A dictionary of openings and closings for a symbolic name in the
2835 current SVNCommit.
2837 The user should call self.register() for the openings and closings,
2838 then self.get_node_tree() to retrieve the information as a
2839 SymbolicNameFillingGuide."""
2841 def __init__(self, symbolic_name):
2842 """Initialize OpeningsClosingsMap and prepare it for receiving
2843 openings and closings."""
2845 self.name = symbolic_name
2847 # A dictionary of SVN_PATHS to SvnRevisionRange objects.
2848 self.things = { }
2850 def register(self, svn_path, svn_revnum, type):
2851 """Register an opening or closing revision for this symbolic name.
2852 SVN_PATH is the source path that needs to be copied into
2853 self.symbolic_name, and SVN_REVNUM is either the first svn
2854 revision number that we can copy from (our opening), or the last
2855 (not inclusive) svn revision number that we can copy from (our
2856 closing). TYPE indicates whether this path is an opening or a a
2857 closing.
2859 The opening for a given SVN_PATH must be passed before the closing
2860 for it to have any effect... any closing encountered before a
2861 corresponding opening will be discarded.
2863 It is not necessary to pass a corresponding closing for every
2864 opening.
2866 # Always log an OPENING
2867 if type == OPENING:
2868 self.things[svn_path] = SvnRevisionRange(svn_revnum)
2869 # Only log a closing if we've already registered the opening for that
2870 # path.
2871 elif type == CLOSING and self.things.has_key(svn_path):
2872 self.things[svn_path].add_closing(svn_revnum)
2874 def is_empty(self):
2875 """Return true if we haven't accumulated any openings or closings,
2876 false otherwise."""
2877 return not len(self.things)
2879 def get_things(self):
2880 """Return a list of (svn_path, SvnRevisionRange) tuples for all
2881 svn_paths with registered openings or closings."""
2883 return self.things.items()
2886 class SymbolicNameFillingGuide:
2887 """A node tree representing the source paths to be copied to fill
2888 self.symbolic_name in the current SVNCommit.
2890 self._node_tree is the root of the directory tree, in the form {
2891 path_component : subnode }. Leaf nodes are instances of
2892 SvnRevisionRange. Intermediate (directory) nodes are dictionaries
2893 mapping relative names to subnodes.
2895 By walking self._node_tree and calling self.get_best_revnum() on
2896 each node, the caller can determine what subversion revision number
2897 to copy the path corresponding to that node from. self._node_tree
2898 should be treated as read-only.
2900 The caller can then descend to sub-nodes to see if their "best
2901 revnum" differs from their parents' and if it does, take appropriate
2902 actions to "patch up" the subtrees."""
2904 def __init__(self, openings_closings_map):
2905 """Initializes a SymbolicNameFillingGuide for SYMBOLIC_NAME and
2906 store into it the openings and closings from
2907 OPENINGS_CLOSINGS_MAP."""
2909 self.name = openings_closings_map.name
2911 # The dictionary that holds our node tree as a map { node_key :
2912 # node }.
2913 self._node_tree = { }
2915 for svn_path, svn_revision_range in openings_closings_map.get_things():
2916 (head, tail) = _path_split(svn_path)
2917 self._get_node_for_path(head)[tail] = svn_revision_range
2919 #self.print_node_tree(self._node_tree)
2921 def _get_node_for_path(self, svn_path):
2922 """Return the node key for svn_path, creating new nodes as needed."""
2923 # Walk down the path, one node at a time.
2924 node = self._node_tree
2925 for component in svn_path.split('/'):
2926 if node.has_key(component):
2927 node = node[component]
2928 else:
2929 old_node = node
2930 node = {}
2931 old_node[component] = node
2933 return node
2935 def get_best_revnum(self, node, preferred_revnum):
2936 """Determine the best subversion revision number to use when
2937 copying the source tree beginning at NODE. Returns a
2938 subversion revision number.
2940 PREFERRED_REVNUM is passed to best_rev and used to calculate the
2941 best_revnum."""
2943 def score_revisions(svn_revision_ranges):
2944 """Return a list of revisions and scores based on
2945 SVN_REVISION_RANGES. The returned list looks like:
2947 [(REV1 SCORE1), (REV2 SCORE2), ...]
2949 where the tuples are sorted by revision number.
2950 SVN_REVISION_RANGES is a list of SvnRevisionRange objects.
2952 For each svn revision that appears as either an opening_revnum
2953 or closing_revnum for one of the svn_revision_ranges, output a
2954 tuple indicating how many of the SvnRevisionRanges include that
2955 svn_revision in its range. A score thus indicates that copying
2956 the corresponding revision (or any following revision up to the
2957 next revision in the list) of the object in question would yield
2958 that many correct paths at or underneath the object. There may
2959 be other paths underneath it which are not correct and would
2960 need to be deleted or recopied; those can only be detected by
2961 descending and examining their scores.
2963 If OPENINGS is empty, return the empty list."""
2964 openings = [ x.opening_revnum
2965 for x in svn_revision_ranges ]
2966 closings = [ x.closing_revnum
2967 for x in svn_revision_ranges
2968 if x.closing_revnum is not None ]
2970 # First look for easy out.
2971 if not openings:
2972 return []
2974 # Create a list with both openings (which increment the total)
2975 # and closings (which decrement the total):
2976 things = [(rev,1) for rev in openings] + [(rev,-1) for rev in closings]
2977 # Sort by revision number:
2978 things.sort()
2979 # Initialize output list with zeroth element of things. This
2980 # element must exist, because it was already verified that
2981 # openings is not empty.
2982 scores = [ things[0] ]
2983 total = scores[-1][1]
2984 for (rev, change) in things[1:]:
2985 total += change
2986 if rev == scores[-1][0]:
2987 # Same revision as last entry; modify last entry:
2988 scores[-1] = (rev, total)
2989 else:
2990 # Previously-unseen revision; create new entry:
2991 scores.append((rev, total))
2992 return scores
2994 def best_rev(scores, preferred_rev):
2995 """Return the revision with the highest score from SCORES, a list
2996 returned by score_revisions(). When the maximum score is shared
2997 by multiple revisions, the oldest revision is selected, unless
2998 PREFERRED_REV is one of the possibilities, in which case, it is
2999 selected."""
3000 max_score = 0
3001 preferred_rev_score = -1
3002 rev = SVN_INVALID_REVNUM
3003 if preferred_rev is None:
3004 # Comparison order of different types is arbitrary. Do not
3005 # expect None to compare less than int values below.
3006 # In Python 2.3 None compares with ints like negative infinity.
3007 # In Python 2.0 None compares with ints like positive infinity.
3008 preferred_rev = SVN_INVALID_REVNUM
3009 for revnum, count in scores:
3010 if count > max_score:
3011 max_score = count
3012 rev = revnum
3013 if revnum <= preferred_rev:
3014 preferred_rev_score = count
3015 if preferred_rev_score == max_score:
3016 rev = preferred_rev
3017 return rev, max_score
3019 # Aggregate openings and closings from the rev tree
3020 svn_revision_ranges = self._list_revnums(node)
3022 # Score the lists
3023 scores = score_revisions(svn_revision_ranges)
3025 revnum, max_score = best_rev(scores, preferred_revnum)
3027 if revnum == SVN_INVALID_REVNUM:
3028 raise FatalError("failed to find a revision "
3029 + "to copy from when copying %s" % name)
3030 return revnum, max_score
3032 def _list_revnums(self, node):
3033 """Return a list of all the SvnRevisionRanges (including
3034 duplicates) for all leaf nodes at and under NODE."""
3036 if isinstance(node, SvnRevisionRange):
3037 # It is a leaf node.
3038 return [ node ]
3039 else:
3040 # It is an intermediate node.
3041 revnums = []
3042 for key, subnode in node.items():
3043 revnums.extend(self._list_revnums(subnode))
3044 return revnums
3046 def get_sources(self):
3047 """Return the list of sources for this symbolic name.
3049 The Project instance defines what are legitimate sources. Raise
3050 an exception if a change occurred outside of the source
3051 directories."""
3053 return self._get_sub_sources('', self._node_tree)
3055 def _get_sub_sources(self, start_svn_path, start_node):
3056 """Return the list of sources for this symbolic name, starting the
3057 search at path START_SVN_PATH, which is node START_NODE. This is
3058 a helper method, called by get_sources() (see)."""
3060 project = Ctx().project
3061 if isinstance(start_node, SvnRevisionRange):
3062 # This implies that a change was found outside of the
3063 # legitimate sources. This should never happen.
3064 raise
3065 elif project.is_source(start_svn_path):
3066 # This is a legitimate source. Add it to list.
3067 return [ FillSource(start_svn_path, start_node) ]
3068 else:
3069 # This is a directory that is not a legitimate source. (That's
3070 # OK because it hasn't changed directly.) But directories
3071 # within it have been changed, so we need to search recursively
3072 # to find their enclosing sources.
3073 sources = []
3074 for entry, node in start_node.items():
3075 svn_path = _path_join(start_svn_path, entry)
3076 sources.extend(self._get_sub_sources(svn_path, node))
3078 return sources
3080 def print_node_tree(self, node, name='/', indent_depth=0):
3081 """For debugging purposes. Prints all nodes in TREE that are
3082 rooted at NODE. INDENT_DEPTH is used to indent the output of
3083 recursive calls."""
3084 if not indent_depth:
3085 print "TREE", "=" * 75
3086 if isinstance(node, SvnRevisionRange):
3087 print "TREE:", " " * (indent_depth * 2), name, node
3088 else:
3089 print "TREE:", " " * (indent_depth * 2), name
3090 for key, value in node.items():
3091 self.print_node_tree(value, key, (indent_depth + 1))
3094 class FillSource:
3095 """Representation of a fill source used by the symbol filler in
3096 SVNRepositoryMirror."""
3097 def __init__(self, prefix, node):
3098 """Create an unscored fill source with a prefix and a key."""
3099 self.prefix = prefix
3100 self.node = node
3101 self.score = None
3102 self.revnum = None
3104 def set_score(self, score, revnum):
3105 """Set the SCORE and REVNUM."""
3106 self.score = score
3107 self.revnum = revnum
3109 def __cmp__(self, other):
3110 """Comparison operator used to sort FillSources in descending
3111 score order."""
3112 if self.score is None or other.score is None:
3113 raise TypeError, 'Tried to compare unscored FillSource'
3114 return cmp(other.score, self.score)
3117 class SVNRepositoryMirror:
3118 """Mirror a Subversion Repository as it is constructed, one
3119 SVNCommit at a time. The mirror is skeletal; it does not contain
3120 file contents. The creation of a dumpfile or Subversion repository
3121 is handled by delegates. See self.add_delegate method for how to
3122 set delegates.
3124 The structure of the repository is kept in two databases and one
3125 hash. The revs_db database maps revisions to root node keys, and
3126 the nodes_db database maps node keys to nodes. A node is a hash
3127 from directory names to keys. Both the revs_db and the nodes_db are
3128 stored on disk and each access is expensive.
3130 The nodes_db database only has the keys for old revisions. The
3131 revision that is being contructed is kept in memory in the new_nodes
3132 hash which is cheap to access.
3134 You must invoke _start_commit between SVNCommits.
3136 *** WARNING *** All path arguments to methods in this class CANNOT
3137 have leading or trailing slashes.
3140 class SVNRepositoryMirrorPathExistsError(Exception):
3141 """Exception raised if an attempt is made to add a path to the
3142 repository mirror and that path already exists in the youngest
3143 revision of the repository."""
3144 pass
3146 class SVNRepositoryMirrorUnexpectedOperationError(Exception):
3147 """Exception raised if a CVSRevision is found to have an unexpected
3148 operation (OP) value."""
3149 pass
3151 class SVNRepositoryMirrorInvalidFillOperationError(Exception):
3152 """Exception raised if an empty SymbolicNameFillingGuide is returned
3153 during a fill where the branch in question already exists."""
3154 pass
3156 def __init__(self):
3157 """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""
3158 self.delegates = [ ]
3160 # This corresponds to the 'revisions' table in a Subversion fs.
3161 self.revs_db = SDatabase(temp(SVN_MIRROR_REVISIONS_DB), DB_OPEN_NEW)
3162 Cleanup().register(temp(SVN_MIRROR_REVISIONS_DB), pass8)
3164 # This corresponds to the 'nodes' table in a Subversion fs. (We
3165 # don't need a 'representations' or 'strings' table because we
3166 # only track metadata, not file contents.)
3167 self.nodes_db = Database(temp(SVN_MIRROR_NODES_DB), DB_OPEN_NEW)
3168 Cleanup().register(temp(SVN_MIRROR_NODES_DB), pass8)
3170 # Start at revision 0 without a root node. It will be created
3171 # by _open_writable_root_node.
3172 self.youngest = 0
3173 self.new_root_key = None
3174 self.new_nodes = { }
3176 if not Ctx().trunk_only:
3177 ###PERF IMPT: Suck this into memory.
3178 self.tags_db = TagsDatabase(DB_OPEN_READ)
3179 self.symbolings_reader = SymbolingsReader()
3181 def _initialize_repository(self, date):
3182 """Initialize the repository by creating the directories for
3183 trunk, tags, and branches. This method should only be called
3184 after all delegates are added to the repository mirror."""
3185 # Make a 'fake' SVNCommit so we can take advantage of the revprops
3186 # magic therein
3187 svn_commit = SVNCommit("Initialization", 1)
3188 svn_commit.set_date(date)
3189 svn_commit.set_log_msg("New repository initialized by cvs2svn.")
3191 self._start_commit(svn_commit)
3192 self._mkdir(Ctx().project.trunk_path)
3193 if not Ctx().trunk_only:
3194 self._mkdir(Ctx().project.branches_path)
3195 self._mkdir(Ctx().project.tags_path)
3197 def _start_commit(self, svn_commit):
3198 """Start a new commit."""
3199 if self.youngest > 0:
3200 self._end_commit()
3202 self.youngest = svn_commit.revnum
3203 self.new_root_key = None
3204 self.new_nodes = { }
3206 self._invoke_delegates('start_commit', svn_commit)
3208 def _end_commit(self):
3209 """Called at the end of each commit. This method copies the newly
3210 created nodes to the on-disk nodes db."""
3211 if self.new_root_key is None:
3212 # No changes were made in this revision, so we make the root node
3213 # of the new revision be the same as the last one.
3214 self.revs_db[str(self.youngest)] = self.revs_db[str(self.youngest - 1)]
3215 else:
3216 self.revs_db[str(self.youngest)] = self.new_root_key
3217 # Copy the new nodes to the nodes_db
3218 for key, value in self.new_nodes.items():
3219 self.nodes_db[key] = value
3221 def _get_node(self, key):
3222 """Returns the node contents for KEY which may refer to either
3223 self.nodes_db or self.new_nodes."""
3224 if self.new_nodes.has_key(key):
3225 return self.new_nodes[key]
3226 else:
3227 return self.nodes_db[key]
3229 def _open_readonly_node(self, path, revnum):
3230 """Open a readonly node for PATH at revision REVNUM. Returns the
3231 node key and node contents if the path exists, else (None, None)."""
3232 # Get the root key
3233 if revnum == self.youngest:
3234 if self.new_root_key is None:
3235 node_key = self.revs_db[str(self.youngest - 1)]
3236 else:
3237 node_key = self.new_root_key
3238 else:
3239 node_key = self.revs_db[str(revnum)]
3241 for component in path.split('/'):
3242 node_contents = self._get_node(node_key)
3243 node_key = node_contents.get(component, None)
3244 if node_key is None:
3245 return None
3247 return node_key
3249 def _open_writable_root_node(self):
3250 """Open a writable root node. The current root node is returned
3251 immeditely if it is already writable. If not, create a new one by
3252 copying the contents of the root node of the previous version."""
3253 if self.new_root_key is not None:
3254 return self.new_root_key, self.new_nodes[self.new_root_key]
3256 if self.youngest < 2:
3257 new_contents = { }
3258 else:
3259 new_contents = self.nodes_db[self.revs_db[str(self.youngest - 1)]]
3260 self.new_root_key = gen_key()
3261 self.new_nodes = { self.new_root_key: new_contents }
3263 return self.new_root_key, new_contents
3265 def _open_writable_node(self, svn_path, create):
3266 """Open a writable node for the path SVN_PATH, creating SVN_PATH
3267 and any missing directories if CREATE is True."""
3268 parent_key, parent_contents = self._open_writable_root_node()
3270 # Walk up the path, one node at a time.
3271 path_so_far = None
3272 components = svn_path.split('/')
3273 for i in range(len(components)):
3274 component = components[i]
3275 path_so_far = _path_join(path_so_far, component)
3276 this_key = parent_contents.get(component, None)
3277 if this_key is not None:
3278 # The component exists.
3279 this_contents = self.new_nodes.get(this_key, None)
3280 if this_contents is None:
3281 # Suck the node from the nodes_db, but update the key
3282 this_contents = self.nodes_db[this_key]
3283 this_key = gen_key()
3284 self.new_nodes[this_key] = this_contents
3285 parent_contents[component] = this_key
3286 elif create:
3287 # The component does not exists, so we create it.
3288 this_contents = { }
3289 this_key = gen_key()
3290 self.new_nodes[this_key] = this_contents
3291 parent_contents[component] = this_key
3292 if i < len(components) - 1:
3293 self._invoke_delegates('mkdir', path_so_far)
3294 else:
3295 # The component does not exists and we are not instructed to
3296 # create it, so we give up.
3297 return None, None
3299 parent_key = this_key
3300 parent_contents = this_contents
3302 return this_key, this_contents
3304 def _path_exists(self, path):
3305 """If PATH exists in self.youngest of the svn repository mirror,
3306 return true, else return None.
3308 PATH must not start with '/'."""
3309 return self._open_readonly_node(path, self.youngest) is not None
3311 def _fast_delete_path(self, parent_path, parent_contents, component):
3312 """Delete COMPONENT from the parent direcory PARENT_PATH with the
3313 contents PARENT_CONTENTS. Do nothing if COMPONENT does not exist
3314 in PARENT_CONTENTS."""
3315 if parent_contents.has_key(component):
3316 del parent_contents[component]
3317 self._invoke_delegates('delete_path',
3318 _path_join(parent_path, component))
3320 def _delete_path(self, svn_path, should_prune=False):
3321 """Delete PATH from the tree. If SHOULD_PRUNE is true, then delete
3322 all ancestor directories that are made empty when SVN_PATH is deleted.
3323 In other words, SHOULD_PRUNE is like the -P option to 'cvs checkout'.
3325 NOTE: This function ignores requests to delete the root directory
3326 or any directory for which Ctx().project.is_unremovable() returns
3327 True, either directly or by pruning."""
3329 if svn_path == '' or Ctx().project.is_unremovable(svn_path):
3330 return
3332 (parent_path, entry,) = _path_split(svn_path)
3333 if parent_path:
3334 parent_key, parent_contents = \
3335 self._open_writable_node(parent_path, False)
3336 else:
3337 parent_key, parent_contents = self._open_writable_root_node()
3339 if parent_key is not None:
3340 self._fast_delete_path(parent_path, parent_contents, entry)
3341 # The following recursion makes pruning an O(n^2) operation in the
3342 # worst case (where n is the depth of SVN_PATH), but the worst case
3343 # is probably rare, and the constant cost is pretty low. Another
3344 # drawback is that we issue a delete for each path and not just
3345 # a single delete for the topmost directory pruned.
3346 if should_prune and len(parent_contents) == 0:
3347 self._delete_path(parent_path, True)
3349 def _mkdir(self, path):
3350 """Create PATH in the repository mirror at the youngest revision."""
3351 self._open_writable_node(path, True)
3352 self._invoke_delegates('mkdir', path)
3354 def _change_path(self, cvs_rev):
3355 """Register a change in self.youngest for the CVS_REV's svn_path
3356 in the repository mirror."""
3357 # We do not have to update the nodes because our mirror is only
3358 # concerned with the presence or absence of paths, and a file
3359 # content change does not cause any path changes.
3360 self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, False))
3362 def _add_path(self, cvs_rev):
3363 """Add the CVS_REV's svn_path to the repository mirror."""
3364 self._open_writable_node(cvs_rev.svn_path, True)
3365 self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, True))
3367 def _copy_path(self, src_path, dest_path, src_revnum):
3368 """Copy SRC_PATH at subversion revision number SRC_REVNUM to
3369 DEST_PATH. In the youngest revision of the repository, DEST_PATH's
3370 parent *must* exist, but DEST_PATH *cannot* exist.
3372 Return the node key and the contents of the new node at DEST_PATH
3373 as a dictionary."""
3374 # get the contents of the node of our src_path
3375 src_key = self._open_readonly_node(src_path, src_revnum)
3376 src_contents = self._get_node(src_key)
3378 # Get the parent path and the base path of the dest_path
3379 (dest_parent, dest_basename,) = _path_split(dest_path)
3380 dest_parent_key, dest_parent_contents = \
3381 self._open_writable_node(dest_parent, False)
3383 if dest_parent_contents.has_key(dest_basename):
3384 msg = "Attempt to add path '%s' to repository mirror " % dest_path
3385 msg = msg + "when it already exists in the mirror."
3386 raise self.SVNRepositoryMirrorPathExistsError, msg
3388 dest_parent_contents[dest_basename] = src_key
3389 self._invoke_delegates('copy_path', src_path, dest_path, src_revnum)
3391 # Yes sir, src_key and src_contents are also the contents of the
3392 # destination. This is a cheap copy, remember! :-)
3393 return src_key, src_contents
3395 def _fill_symbolic_name(self, svn_commit):
3396 """Performs all copies necessary to create as much of the the tag
3397 or branch SVN_COMMIT.symbolic_name as possible given the current
3398 revision of the repository mirror.
3400 The symbolic name is guaranteed to exist in the Subversion
3401 repository by the end of this call, even if there are no paths
3402 under it."""
3403 symbol_fill = self.symbolings_reader.filling_guide_for_symbol(
3404 svn_commit.symbolic_name, self.youngest)
3405 # Get the list of sources for the symbolic name.
3406 sources = symbol_fill.get_sources()
3408 if sources:
3409 if self.tags_db.has_key(svn_commit.symbolic_name):
3410 dest_prefix = Ctx().project.get_tag_path(svn_commit.symbolic_name)
3411 else:
3412 dest_prefix = Ctx().project.get_branch_path(svn_commit.symbolic_name)
3414 dest_key = self._open_writable_node(dest_prefix, False)[0]
3415 self._fill(symbol_fill, dest_prefix, dest_key, sources)
3416 else:
3417 # We can only get here for a branch whose first commit is an add
3418 # (as opposed to a copy).
3419 dest_path = Ctx().project.get_branch_path(symbol_fill.name)
3420 if not self._path_exists(dest_path):
3421 # If our symbol_fill was empty, that means that our first
3422 # commit on the branch was to a file added on the branch, and
3423 # that this is our first fill of that branch.
3425 # This case is covered by test 16.
3427 # ...we create the branch by copying trunk from the our
3428 # current revision number minus 1
3429 source_path = Ctx().project.trunk_path
3430 entries = self._copy_path(source_path, dest_path,
3431 svn_commit.revnum - 1)[1]
3432 # Now since we've just copied trunk to a branch that's
3433 # *supposed* to be empty, we delete any entries in the
3434 # copied directory.
3435 for entry in entries.keys():
3436 del_path = dest_path + '/' + entry
3437 # Delete but don't prune.
3438 self._delete_path(del_path)
3439 else:
3440 msg = "Error filling branch '" \
3441 + _clean_symbolic_name(symbol_fill.name) + "'.\n"
3442 msg = msg + "Received an empty SymbolicNameFillingGuide and\n"
3443 msg = msg + "attempted to create a branch that already exists."
3444 raise self.SVNRepositoryMirrorInvalidFillOperationError, msg
3446 def _fill(self, symbol_fill, dest_prefix, dest_key, sources,
3447 path = None, parent_source_prefix = None,
3448 preferred_revnum = None, prune_ok = None):
3449 """Fill the tag or branch at DEST_PREFIX + PATH with items from
3450 SOURCES, and recurse into the child items.
3452 DEST_PREFIX is the prefix of the destination directory, e.g.
3453 '/tags/my_tag' or '/branches/my_branch', and SOURCES is a list of
3454 FillSource classes that are candidates to be copied to the
3455 destination. DEST_KEY is the key in self.nodes_db to the
3456 destination, or None if the destination does not yet exist.
3458 PATH is the path relative to DEST_PREFIX. If PATH is None, we
3459 are at the top level, e.g. '/tags/my_tag'.
3461 PARENT_SOURCE_PREFIX is the source prefix that was used to copy
3462 the parent directory, and PREFERRED_REVNUM is an int which is the
3463 source revision number that the caller (who may have copied KEY's
3464 parent) used to perform its copy. If PREFERRED_REVNUM is None,
3465 then no revision is preferable to any other (which probably means
3466 that no copies have happened yet).
3468 PRUNE_OK means that a copy has been made in this recursion, and
3469 it's safe to prune directories that are not in
3470 SYMBOL_FILL._node_tree, provided that said directory has a source
3471 prefix of one of the PARENT_SOURCE_PREFIX.
3473 PATH, PARENT_SOURCE_PREFIX, PRUNE_OK, and PREFERRED_REVNUM
3474 should only be passed in by recursive calls."""
3475 # Calculate scores and revnums for all sources
3476 for source in sources:
3477 src_revnum, score = symbol_fill.get_best_revnum(source.node,
3478 preferred_revnum)
3479 source.set_score(score, src_revnum)
3481 # Sort the sources in descending score order so that we will make
3482 # a eventual copy from the source with the highest score.
3483 sources.sort()
3484 copy_source = sources[0]
3486 src_path = _path_join(copy_source.prefix, path)
3487 dest_path = _path_join(dest_prefix, path)
3489 # Figure out if we shall copy to this destination and delete any
3490 # destination path that is in the way.
3491 do_copy = 0
3492 if dest_key is None:
3493 do_copy = 1
3494 elif prune_ok and (parent_source_prefix != copy_source.prefix or
3495 copy_source.revnum != preferred_revnum):
3496 # We are about to replace the destination, so we need to remove
3497 # it before we perform the copy.
3498 self._delete_path(dest_path)
3499 do_copy = 1
3501 if do_copy:
3502 dest_key, dest_entries = self._copy_path(src_path, dest_path,
3503 copy_source.revnum)
3504 prune_ok = 1
3505 else:
3506 dest_entries = self._get_node(dest_key)
3508 # Create the SRC_ENTRIES hash from SOURCES. The keys are path
3509 # elements and the values are lists of FillSource classes where
3510 # this path element exists.
3511 src_entries = {}
3512 for source in sources:
3513 if isinstance(source.node, SvnRevisionRange):
3514 continue
3515 for entry, node in source.node.items():
3516 src_entries.setdefault(entry, []).append(
3517 FillSource(source.prefix, node))
3519 if prune_ok:
3520 # Delete the entries in DEST_ENTRIES that are not in src_entries.
3521 delete_list = [ ]
3522 for entry in dest_entries.keys():
3523 if not src_entries.has_key(entry):
3524 delete_list.append(entry)
3525 if delete_list:
3526 if not self.new_nodes.has_key(dest_key):
3527 dest_key, dest_entries = self._open_writable_node(dest_path, True)
3528 # Sort the delete list to get "diffable" dumpfiles.
3529 delete_list.sort()
3530 for entry in delete_list:
3531 self._fast_delete_path(dest_path, dest_entries, entry)
3533 # Recurse into the SRC_ENTRIES keys sorted in alphabetical order.
3534 src_keys = src_entries.keys()
3535 src_keys.sort()
3536 for src_key in src_keys:
3537 next_dest_key = dest_entries.get(src_key, None)
3538 self._fill(symbol_fill, dest_prefix, next_dest_key,
3539 src_entries[src_key], _path_join(path, src_key),
3540 copy_source.prefix, sources[0].revnum, prune_ok)
3542 def _synchronize_default_branch(self, svn_commit):
3543 """Propagate any changes that happened on a non-trunk default
3544 branch to the trunk of the repository. See
3545 CVSCommit._post_commit() for details on why this is necessary."""
3546 for cvs_rev in svn_commit.cvs_revs:
3547 svn_trunk_path = Ctx().project.make_trunk_path(cvs_rev.cvs_path)
3548 if cvs_rev.op == OP_ADD or cvs_rev.op == OP_CHANGE:
3549 if self._path_exists(svn_trunk_path):
3550 # Delete the path on trunk...
3551 self._delete_path(svn_trunk_path)
3552 # ...and copy over from branch
3553 self._copy_path(cvs_rev.svn_path, svn_trunk_path,
3554 svn_commit.motivating_revnum)
3555 elif cvs_rev.op == OP_DELETE:
3556 # delete trunk path
3557 self._delete_path(svn_trunk_path)
3558 else:
3559 msg = ("Unknown CVSRevision operation '%s' in default branch sync."
3560 % cvs_rev.op)
3561 raise self.SVNRepositoryMirrorUnexpectedOperationError, msg
3563 def commit(self, svn_commit):
3564 """Add an SVNCommit to the SVNRepository, incrementing the
3565 Repository revision number, and changing the repository. Invoke
3566 the delegates' _start_commit() method."""
3568 if svn_commit.revnum == 2:
3569 self._initialize_repository(svn_commit.get_date())
3571 self._start_commit(svn_commit)
3573 if svn_commit.symbolic_name:
3574 Log().write(LOG_VERBOSE, "Filling symbolic name:",
3575 _clean_symbolic_name(svn_commit.symbolic_name))
3576 self._fill_symbolic_name(svn_commit)
3577 elif svn_commit.motivating_revnum:
3578 Log().write(LOG_VERBOSE, "Synchronizing default_branch motivated by %d"
3579 % svn_commit.motivating_revnum)
3580 self._synchronize_default_branch(svn_commit)
3581 else: # This actually commits CVSRevisions
3582 if len(svn_commit.cvs_revs) > 1: plural = "s"
3583 else: plural = ""
3584 Log().write(LOG_VERBOSE, "Committing %d CVSRevision%s"
3585 % (len(svn_commit.cvs_revs), plural))
3586 for cvs_rev in svn_commit.cvs_revs:
3587 # See comment in CVSCommit._commit() for what this is all
3588 # about. Note that although asking self._path_exists() is
3589 # somewhat expensive, we only do it if the first two (cheap)
3590 # tests succeed first.
3591 if not ((cvs_rev.deltatext_code == DELTATEXT_EMPTY)
3592 and (cvs_rev.rev == "1.1.1.1")
3593 and self._path_exists(cvs_rev.svn_path)):
3594 if cvs_rev.op == OP_ADD:
3595 self._add_path(cvs_rev)
3596 elif cvs_rev.op == OP_CHANGE:
3597 # Fix for Issue #74:
3599 # Here's the scenario. You have file FOO that is imported
3600 # on a non-trunk vendor branch. So in r1.1 and r1.1.1.1,
3601 # the file exists.
3603 # Moving forward in time, FOO is deleted on the default
3604 # branch (r1.1.1.2). cvs2svn determines that this delete
3605 # also needs to happen on trunk, so FOO is deleted on
3606 # trunk.
3608 # Along come r1.2, whose op is OP_CHANGE (because r1.1 is
3609 # not 'dead', we assume it's a change). However, since
3610 # our trunk file has been deleted, svnadmin blows up--you
3611 # can't change a file that doesn't exist!
3613 # Soooo... we just check the path, and if it doesn't
3614 # exist, we do an add... if the path does exist, it's
3615 # business as usual.
3616 if not self._path_exists(cvs_rev.svn_path):
3617 self._add_path(cvs_rev)
3618 else:
3619 self._change_path(cvs_rev)
3621 if cvs_rev.op == OP_DELETE:
3622 self._delete_path(cvs_rev.svn_path, Ctx().prune)
3624 def cleanup(self):
3625 """Callback for the Cleanup.register in self.__init__."""
3626 self.revs_db = None
3627 self.nodes_db = None
3629 def add_delegate(self, delegate):
3630 """Adds DELEGATE to self.delegates.
3632 For every delegate you add, as soon as SVNRepositoryMirror
3633 performs a repository action method, SVNRepositoryMirror will call
3634 the delegate's corresponding repository action method. Multiple
3635 delegates will be called in the order that they are added. See
3636 SVNRepositoryMirrorDelegate for more information."""
3637 self.delegates.append(delegate)
3639 def _invoke_delegates(self, method, *args):
3640 """Iterate through each of our delegates, in the order that they
3641 were added, and call the delegate's method named METHOD with the
3642 arguments in ARGS."""
3643 for delegate in self.delegates:
3644 getattr(delegate, method)(*args)
3646 def finish(self):
3647 """Calls the delegate finish method."""
3648 self._end_commit()
3649 self._invoke_delegates('finish')
3650 self.cleanup()
3653 class SVNCommitItem:
3654 """A wrapper class for CVSRevision objects upon which
3655 Subversion-related data (such as properties) may be hung."""
3657 def __init__(self, c_rev, svn_props_changed):
3658 """Initialize instance and record the properties for this file.
3659 SVN_PROPS_CHANGED indicates whether the svn: properties are known
3660 to have changed since the last revision.
3662 The properties are set by the SVNPropertySetters in
3663 Ctx().svn_property_setters, then we read a couple of the
3664 properties back out for our own purposes."""
3666 self.c_rev = c_rev
3667 # Did the svn properties change for this file (i.e., do they have
3668 # to be written to the dumpfile?)
3669 self.svn_props_changed = svn_props_changed
3671 # The properties for this item as a map { key : value }. If VALUE
3672 # is None, no property should be set.
3673 self.svn_props = { }
3675 for svn_property_setter in Ctx().svn_property_setters:
3676 svn_property_setter.set_properties(self)
3678 # Remember if we need to filter the EOLs. We could actually use
3679 # self.svn_props now, since it is initialized for each revision.
3680 self.needs_eol_filter = \
3681 self.svn_props.get('svn:eol-style', None) is not None
3683 self.has_keywords = self.svn_props.get('svn:keywords', None) is not None
3686 class SVNPropertySetter:
3687 """Abstract class for objects that can set properties on a SVNCommitItem."""
3689 def set_properties(self, s_item):
3690 """Set any properties that can be determined for S_ITEM."""
3692 raise NotImplementedError
3695 class SVNRepositoryMirrorDelegate:
3696 """Abstract superclass for any delegate to SVNRepositoryMirror.
3697 Subclasses must implement all of the methods below.
3699 For each method, a subclass implements, in its own way, the
3700 Subversion operation implied by the method's name. For example, for
3701 the add_path method, the DumpfileDelegate would write out a
3702 "Node-add:" command to a Subversion dumpfile, the StdoutDelegate
3703 would merely print that the path is being added to the repository,
3704 and the RepositoryDelegate would actually cause the path to be added
3705 to the Subversion repository that it is creating.
3708 def start_commit(self, svn_commit):
3709 """Perform any actions needed to start SVNCommit SVN_COMMIT;
3710 see subclass implementation for details."""
3711 raise NotImplementedError
3713 def mkdir(self, path):
3714 """PATH is a string; see subclass implementation for details."""
3715 raise NotImplementedError
3717 def add_path(self, s_item):
3718 """S_ITEM is an SVNCommitItem; see subclass implementation for
3719 details."""
3720 raise NotImplementedError
3722 def change_path(self, s_item):
3723 """S_ITEM is an SVNCommitItem; see subclass implementation for
3724 details."""
3725 raise NotImplementedError
3727 def delete_path(self, path):
3728 """PATH is a string; see subclass implementation for
3729 details."""
3730 raise NotImplementedError
3732 def copy_path(self, src_path, dest_path, src_revnum):
3733 """SRC_PATH and DEST_PATH are both strings, and SRC_REVNUM is a
3734 subversion revision number (int); see subclass implementation for
3735 details."""
3736 raise NotImplementedError
3738 def finish(self):
3739 """Perform any cleanup necessary after all revisions have been
3740 committed."""
3741 raise NotImplementedError
3744 class DumpfileDelegate(SVNRepositoryMirrorDelegate):
3745 """Create a Subversion dumpfile."""
3747 def __init__(self, dumpfile_path=None):
3748 """Return a new DumpfileDelegate instance, attached to a dumpfile
3749 DUMPFILE_PATH (Ctx().dumpfile, if None), using Ctx().encoding."""
3750 if dumpfile_path:
3751 self.dumpfile_path = dumpfile_path
3752 else:
3753 self.dumpfile_path = Ctx().dumpfile
3755 self.dumpfile = open(self.dumpfile_path, 'wb')
3756 self._write_dumpfile_header(self.dumpfile)
3758 def _write_dumpfile_header(self, dumpfile):
3759 # Initialize the dumpfile with the standard headers.
3761 # Since the CVS repository doesn't have a UUID, and the Subversion
3762 # repository will be created with one anyway, we don't specify a
3763 # UUID in the dumpflie
3764 dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
3766 def _utf8_path(self, path):
3767 """Return a copy of PATH encoded in UTF-8."""
3768 pieces = string.split(path, '/')
3769 # Convert each path component separately (as they may each use
3770 # different encodings).
3771 for i in range(len(pieces)):
3772 try:
3773 # Log messages can be converted with the 'replace' strategy,
3774 # but we can't afford any lossiness here.
3775 pieces[i] = to_utf8(pieces[i], 'strict')
3776 except UnicodeError:
3777 raise FatalError(
3778 "Unable to convert a path '%s' to internal encoding.\n"
3779 "Consider rerunning with (for example) '--encoding=latin1'."
3780 % (path,))
3781 return string.join(pieces, '/')
3783 def _string_for_prop(self, name, value):
3784 """Return a property in the form needed for the dumpfile."""
3786 return 'K %d\n%s\nV %d\n%s\n' % (len(name), name, len(value), value)
3788 def start_commit(self, svn_commit):
3789 """Emit the start of SVN_COMMIT (an SVNCommit)."""
3791 self.revision = svn_commit.revnum
3793 # The start of a new commit typically looks like this:
3795 # Revision-number: 1
3796 # Prop-content-length: 129
3797 # Content-length: 129
3799 # K 7
3800 # svn:log
3801 # V 27
3802 # Log message for revision 1.
3803 # K 10
3804 # svn:author
3805 # V 7
3806 # jrandom
3807 # K 8
3808 # svn:date
3809 # V 27
3810 # 2003-04-22T22:57:58.132837Z
3811 # PROPS-END
3813 # Notice that the length headers count everything -- not just the
3814 # length of the data but also the lengths of the lengths, including
3815 # the 'K ' or 'V ' prefixes.
3817 # The reason there are both Prop-content-length and Content-length
3818 # is that the former includes just props, while the latter includes
3819 # everything. That's the generic header form for any entity in a
3820 # dumpfile. But since revisions only have props, the two lengths
3821 # are always the same for revisions.
3823 # Calculate the output needed for the property definitions.
3824 props = svn_commit.get_revprops()
3825 prop_names = props.keys()
3826 prop_names.sort()
3827 prop_strings = []
3828 for propname in prop_names:
3829 if props[propname] is not None:
3830 prop_strings.append(self._string_for_prop(propname, props[propname]))
3832 all_prop_strings = ''.join(prop_strings) + 'PROPS-END\n'
3833 total_len = len(all_prop_strings)
3835 # Print the revision header and props
3836 self.dumpfile.write('Revision-number: %d\n'
3837 'Prop-content-length: %d\n'
3838 'Content-length: %d\n'
3839 '\n'
3840 % (self.revision, total_len, total_len))
3842 self.dumpfile.write(all_prop_strings)
3843 self.dumpfile.write('\n')
3845 def mkdir(self, path):
3846 """Emit the creation of directory PATH."""
3847 self.dumpfile.write("Node-path: %s\n"
3848 "Node-kind: dir\n"
3849 "Node-action: add\n"
3850 "\n"
3851 "\n" % self._utf8_path(path))
3853 def _add_or_change_path(self, s_item, op):
3854 """Emit the addition or change corresponding to S_ITEM.
3855 OP is either the constant OP_ADD or OP_CHANGE."""
3857 # Validation stuffs
3858 if op == OP_ADD:
3859 action = 'add'
3860 elif op == OP_CHANGE:
3861 action = 'change'
3862 else:
3863 raise FatalError("_add_or_change_path() called with bad op ('%s')"
3864 % (op,))
3866 # Convenience variables
3867 c_rev = s_item.c_rev
3869 # The property handling here takes advantage of an undocumented
3870 # but IMHO consistent feature of the Subversion dumpfile-loading
3871 # code. When a node's properties aren't mentioned (that is, the
3872 # "Prop-content-length:" header is absent, no properties are
3873 # listed at all, and there is no "PROPS-END\n" line) then no
3874 # change is made to the node's properties.
3876 # This is consistent with the way dumpfiles behave w.r.t. text
3877 # content changes, so I'm comfortable relying on it. If you
3878 # commit a change to *just* the properties of some node that
3879 # already has text contents from a previous revision, then in the
3880 # dumpfile output for the prop change, no "Text-content-length:"
3881 # nor "Text-content-md5:" header will be present, and the text of
3882 # the file will not be given. But this does not cause the file's
3883 # text to be erased! It simply remains unchanged.
3885 # This works out great for cvs2svn, due to lucky coincidences:
3887 # For files, the only properties we ever set are set in the first
3888 # revision; all other revisions (including on branches) inherit
3889 # from that. After the first revision, we never change file
3890 # properties, therefore, there is no need to remember the full set
3891 # of properties on a given file once we've set it.
3893 # For directories, the only property we set is "svn:ignore", and
3894 # while we may change it after the first revision, we always do so
3895 # based on the contents of a ".cvsignore" file -- in other words,
3896 # CVS is doing the remembering for us, so we still don't have to
3897 # preserve the previous value of the property ourselves.
3899 # Calculate the (sorted-by-name) property string and length, if any.
3900 if s_item.svn_props_changed:
3901 svn_props = s_item.svn_props
3902 prop_contents = ''
3903 prop_names = svn_props.keys()
3904 prop_names.sort()
3905 for pname in prop_names:
3906 pvalue = svn_props[pname]
3907 if pvalue is not None:
3908 prop_contents += self._string_for_prop(pname, pvalue)
3909 prop_contents += 'PROPS-END\n'
3910 props_header = 'Prop-content-length: %d\n' % len(prop_contents)
3911 else:
3912 prop_contents = ''
3913 props_header = ''
3915 # treat .cvsignore as a directory property
3916 dir_path, basename = os.path.split(c_rev.svn_path)
3917 if basename == ".cvsignore":
3918 ignore_vals = generate_ignores(c_rev)
3919 ignore_contents = '\n'.join(ignore_vals)
3920 ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
3921 (len(ignore_contents), ignore_contents))
3922 ignore_contents = ignore_contents + 'PROPS-END\n'
3923 ignore_len = len(ignore_contents)
3925 # write headers, then props
3926 self.dumpfile.write('Node-path: %s\n'
3927 'Node-kind: dir\n'
3928 'Node-action: change\n'
3929 'Prop-content-length: %d\n'
3930 'Content-length: %d\n'
3931 '\n'
3932 '%s'
3933 % (self._utf8_path(dir_path), ignore_len,
3934 ignore_len, ignore_contents))
3936 # If the file has keywords, we must prevent CVS/RCS from expanding
3937 # the keywords because they must be unexpanded in the repository,
3938 # or Subversion will get confused.
3939 pipe_cmd, pipe = Ctx().cvs_repository.get_co_pipe(
3940 c_rev, suppress_keyword_substitution=s_item.has_keywords)
3942 self.dumpfile.write('Node-path: %s\n'
3943 'Node-kind: file\n'
3944 'Node-action: %s\n'
3945 '%s' # no property header if no props
3946 'Text-content-length: '
3947 % (self._utf8_path(c_rev.svn_path),
3948 action, props_header))
3950 pos = self.dumpfile.tell()
3952 self.dumpfile.write('0000000000000000\n'
3953 'Text-content-md5: 00000000000000000000000000000000\n'
3954 'Content-length: 0000000000000000\n'
3955 '\n')
3957 if prop_contents:
3958 self.dumpfile.write(prop_contents)
3960 # Insert a filter to convert all EOLs to LFs if neccessary
3961 if s_item.needs_eol_filter:
3962 data_reader = LF_EOL_Filter(pipe.stdout)
3963 else:
3964 data_reader = pipe.stdout
3966 # Insert the rev contents, calculating length and checksum as we go.
3967 checksum = md5.new()
3968 length = 0
3969 while True:
3970 buf = data_reader.read(PIPE_READ_SIZE)
3971 if buf == '':
3972 break
3973 checksum.update(buf)
3974 length = length + len(buf)
3975 self.dumpfile.write(buf)
3977 pipe.stdout.close()
3978 error_output = pipe.stderr.read()
3979 exit_status = pipe.wait()
3980 if exit_status:
3981 raise FatalError("The command '%s' failed with exit status: %s\n"
3982 "and the following output:\n"
3983 "%s" % (pipe_cmd, exit_status, error_output))
3985 # Go back to patch up the length and checksum headers:
3986 self.dumpfile.seek(pos, 0)
3987 # We left 16 zeros for the text length; replace them with the real
3988 # length, padded on the left with spaces:
3989 self.dumpfile.write('%16d' % length)
3990 # 16... + 1 newline + len('Text-content-md5: ') == 35
3991 self.dumpfile.seek(pos + 35, 0)
3992 self.dumpfile.write(checksum.hexdigest())
3993 # 35... + 32 bytes of checksum + 1 newline + len('Content-length: ') == 84
3994 self.dumpfile.seek(pos + 84, 0)
3995 # The content length is the length of property data, text data,
3996 # and any metadata around/inside around them.
3997 self.dumpfile.write('%16d' % (length + len(prop_contents)))
3998 # Jump back to the end of the stream
3999 self.dumpfile.seek(0, 2)
4001 # This record is done (write two newlines -- one to terminate
4002 # contents that weren't themselves newline-termination, one to
4003 # provide a blank line for readability.
4004 self.dumpfile.write('\n\n')
4006 def add_path(self, s_item):
4007 """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
4008 self._add_or_change_path(s_item, OP_ADD)
4010 def change_path(self, s_item):
4011 """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
4012 self._add_or_change_path(s_item, OP_CHANGE)
4014 def delete_path(self, path):
4015 """Emit the deletion of PATH."""
4016 self.dumpfile.write('Node-path: %s\n'
4017 'Node-action: delete\n'
4018 '\n' % self._utf8_path(path))
4020 def copy_path(self, src_path, dest_path, src_revnum):
4021 """Emit the copying of SRC_PATH at SRC_REV to DEST_PATH."""
4022 # We don't need to include "Node-kind:" for copies; the loader
4023 # ignores it anyway and just uses the source kind instead.
4024 self.dumpfile.write('Node-path: %s\n'
4025 'Node-action: add\n'
4026 'Node-copyfrom-rev: %d\n'
4027 'Node-copyfrom-path: /%s\n'
4028 '\n'
4029 % (self._utf8_path(dest_path),
4030 src_revnum,
4031 self._utf8_path(src_path)))
4033 def finish(self):
4034 """Perform any cleanup necessary after all revisions have been
4035 committed."""
4036 self.dumpfile.close()
4039 class RepositoryDelegate(DumpfileDelegate):
4040 """Creates a new Subversion Repository. DumpfileDelegate does all
4041 of the heavy lifting."""
4042 def __init__(self):
4043 self.svnadmin = Ctx().svnadmin
4044 self.target = Ctx().target
4045 if not Ctx().existing_svnrepos:
4046 Log().write(LOG_NORMAL,"Creating new repository '%s'" % (self.target))
4047 if not Ctx().fs_type:
4048 # User didn't say what kind repository (bdb, fsfs, etc).
4049 # We still pass --bdb-txn-nosync. It's a no-op if the default
4050 # repository type doesn't support it, but we definitely want
4051 # it if BDB is the default.
4052 run_command('%s create %s "%s"' % (self.svnadmin,
4053 "--bdb-txn-nosync",
4054 self.target))
4055 elif Ctx().fs_type == 'bdb':
4056 # User explicitly specified bdb.
4058 # Since this is a BDB repository, pass --bdb-txn-nosync,
4059 # because it gives us a 4-5x speed boost (if cvs2svn is
4060 # creating the repository, cvs2svn should be the only program
4061 # accessing the svn repository (until cvs is done, at least)).
4062 # But we'll turn no-sync off in self.finish(), unless
4063 # instructed otherwise.
4064 run_command('%s create %s %s "%s"' % (self.svnadmin,
4065 "--fs-type=bdb",
4066 "--bdb-txn-nosync",
4067 self.target))
4068 else:
4069 # User specified something other than bdb.
4070 run_command('%s create %s "%s"' % (self.svnadmin,
4071 "--fs-type=%s" % Ctx().fs_type,
4072 self.target))
4074 # Since the output of this run is a repository, not a dumpfile,
4075 # the temporary dumpfiles we create should go in the tmpdir.
4076 DumpfileDelegate.__init__(self, temp(Ctx().dumpfile))
4078 # This is 1 if a commit is in progress, otherwise None.
4079 self._commit_in_progress = None
4081 self.dumpfile = open(self.dumpfile_path, 'w+b')
4082 self.loader_pipe = SimplePopen([ self.svnadmin, 'load', '-q',
4083 self.target ], True)
4084 self.loader_pipe.stdout.close()
4085 try:
4086 self._write_dumpfile_header(self.loader_pipe.stdin)
4087 except IOError:
4088 raise FatalError("svnadmin failed with the following output while "
4089 "loading the dumpfile:\n"
4090 + self.loader_pipe.stderr.read())
4092 def _feed_pipe(self):
4093 """Feed the revision stored in the dumpfile to the svnadmin
4094 load pipe."""
4095 self.dumpfile.seek(0)
4096 while 1:
4097 data = self.dumpfile.read(128*1024) # Chunk size is arbitrary
4098 if not len(data):
4099 break
4100 try:
4101 self.loader_pipe.stdin.write(data)
4102 except IOError:
4103 raise FatalError("svnadmin failed with the following output "
4104 "while loading the dumpfile:\n"
4105 + self.loader_pipe.stderr.read())
4107 def start_commit(self, svn_commit):
4108 """Start a new commit. If a commit is already in progress, close
4109 the dumpfile, load it into the svn repository, open a new
4110 dumpfile, and write the header into it."""
4111 if self._commit_in_progress:
4112 self._feed_pipe()
4113 self.dumpfile.seek(0)
4114 self.dumpfile.truncate()
4115 DumpfileDelegate.start_commit(self, svn_commit)
4116 self._commit_in_progress = 1
4118 def finish(self):
4119 """Loads the last commit into the repository."""
4120 self._feed_pipe()
4121 self.dumpfile.close()
4122 self.loader_pipe.stdin.close()
4123 error_output = self.loader_pipe.stderr.read()
4124 exit_status = self.loader_pipe.wait()
4125 if exit_status:
4126 raise FatalError('svnadmin load failed with exit status: %s\n'
4127 'and the following output:\n'
4128 '%s' % (exit_status, error_output,))
4129 os.remove(self.dumpfile_path)
4131 # If this is a BDB repository, and we created the repository, and
4132 # --bdb-no-sync wasn't passed, then comment out the DB_TXN_NOSYNC
4133 # line in the DB_CONFIG file, because txn syncing should be on by
4134 # default in BDB repositories.
4136 # We determine if this is a BDB repository by looking for the
4137 # DB_CONFIG file, which doesn't exist in FSFS, rather than by
4138 # checking Ctx().fs_type. That way this code will Do The Right
4139 # Thing in all circumstances.
4140 db_config = os.path.join(self.target, "db/DB_CONFIG")
4141 if (not Ctx().existing_svnrepos and not Ctx().bdb_txn_nosync
4142 and os.path.exists(db_config)):
4143 no_sync = 'set_flags DB_TXN_NOSYNC\n'
4145 contents = open(db_config, 'r').readlines()
4146 index = contents.index(no_sync)
4147 contents[index] = '# ' + no_sync
4148 contents = open(db_config, 'w').writelines(contents)
4151 class StdoutDelegate(SVNRepositoryMirrorDelegate):
4152 """Makes no changes to the disk, but writes out information to
4153 STDOUT about what the SVNRepositoryMirror is doing. Of course, our
4154 print statements will state that we're doing something, when in
4155 reality, we aren't doing anything other than printing out that we're
4156 doing something. Kind of zen, really."""
4157 def __init__(self, total_revs):
4158 self.total_revs = total_revs
4160 def start_commit(self, svn_commit):
4161 """Prints out the Subversion revision number of the commit that is
4162 being started."""
4163 Log().write(LOG_VERBOSE, "=" * 60)
4164 Log().write(LOG_NORMAL, "Starting Subversion r%d / %d" %
4165 (svn_commit.revnum, self.total_revs))
4167 def mkdir(self, path):
4168 """Print a line stating that we are creating directory PATH."""
4169 Log().write(LOG_VERBOSE, " New Directory", path)
4171 def add_path(self, s_item):
4172 """Print a line stating that we are 'adding' s_item.c_rev.svn_path."""
4173 Log().write(LOG_VERBOSE, " Adding", s_item.c_rev.svn_path)
4175 def change_path(self, s_item):
4176 """Print a line stating that we are 'changing' s_item.c_rev.svn_path."""
4177 Log().write(LOG_VERBOSE, " Changing", s_item.c_rev.svn_path)
4179 def delete_path(self, path):
4180 """Print a line stating that we are 'deleting' PATH."""
4181 Log().write(LOG_VERBOSE, " Deleting", path)
4183 def copy_path(self, src_path, dest_path, src_revnum):
4184 """Print a line stating that we are 'copying' revision SRC_REVNUM
4185 of SRC_PATH to DEST_PATH."""
4186 Log().write(LOG_VERBOSE, " Copying revision", src_revnum, "of", src_path)
4187 Log().write(LOG_VERBOSE, " to", dest_path)
4189 def finish(self):
4190 """State that we are done creating our repository."""
4191 Log().write(LOG_VERBOSE, "Finished creating Subversion repository.")
4192 Log().write(LOG_QUIET, "Done.")
4194 # This should be a local to pass1,
4195 # but Python 2.0 does not support nested scopes.
4196 OS_SEP_PLUS_ATTIC = os.sep + 'Attic'
4197 def pass1():
4198 Log().write(LOG_QUIET, "Examining all CVS ',v' files...")
4199 cd = CollectData()
4201 def visit_file(baton, dirname, files):
4202 cd = baton
4203 for fname in files:
4204 if fname[-2:] != ',v':
4205 continue
4206 cd.found_valid_file = 1
4207 pathname = os.path.join(dirname, fname)
4208 if dirname[-6:] == OS_SEP_PLUS_ATTIC:
4209 # drop the 'Attic' portion from the pathname for the canonical name.
4210 cd.set_fname(os.path.join(dirname[:-6], fname), pathname)
4211 else:
4212 # If this file also exists in the attic, it's a fatal error
4213 attic_path = os.path.join(dirname, 'Attic', fname)
4214 if os.path.exists(attic_path):
4215 err = "%s: A CVS repository cannot contain both %s and %s" \
4216 % (error_prefix, pathname, attic_path)
4217 sys.stderr.write(err + '\n')
4218 cd.fatal_errors.append(err)
4219 cd.set_fname(pathname, pathname)
4220 Log().write(LOG_NORMAL, pathname)
4221 try:
4222 cvs2svn_rcsparse.parse(open(pathname, 'rb'), cd)
4223 except (cvs2svn_rcsparse.common.RCSParseError, ValueError,
4224 RuntimeError):
4225 err = "%s: '%s' is not a valid ,v file" \
4226 % (error_prefix, pathname)
4227 sys.stderr.write(err + '\n')
4228 cd.fatal_errors.append(err)
4229 except:
4230 Log().write(LOG_WARN,
4231 "Exception occurred while parsing %s" % pathname)
4232 raise
4234 os.path.walk(Ctx().cvsroot, visit_file, cd)
4235 Log().write(LOG_VERBOSE, 'Processed', cd.num_files, 'files')
4237 cd.write_symbol_db()
4239 if len(cd.fatal_errors) > 0:
4240 raise FatalException("Pass 1 complete.\n"
4241 + "=" * 75 + "\n"
4242 + "Error summary:\n"
4243 + "\n".join(cd.fatal_errors) + "\n"
4244 + "Exited due to fatal error(s).\n")
4246 if cd.found_valid_file is None:
4247 raise FatalException(
4248 "\n"
4249 "No RCS files found in your CVS Repository!\n"
4250 "Are you absolutely certain you are pointing cvs2svn\n"
4251 "at a CVS repository?\n"
4252 "\n"
4253 "Exited due to fatal error(s).\n")
4255 StatsKeeper().reset_c_rev_info()
4256 StatsKeeper().archive()
4257 Log().write(LOG_QUIET, "Done")
4259 def pass2():
4260 "Pass 2: clean up the revision information."
4262 symbol_db = SymbolDatabase()
4263 symbol_db.read()
4265 # Convert the list of regexps to a list of strings
4266 excludes = symbol_db.find_excluded_symbols(Ctx().excludes)
4268 error_detected = 0
4270 Log().write(LOG_QUIET, "Checking for blocked exclusions...")
4271 blocked_excludes = symbol_db.find_blocked_excludes(excludes)
4272 if blocked_excludes:
4273 for branch, blockers in blocked_excludes.items():
4274 sys.stderr.write(error_prefix + ": The branch '%s' cannot be "
4275 "excluded because the following symbols depend "
4276 "on it:\n" % (branch))
4277 for blocker in blockers:
4278 sys.stderr.write(" '%s'\n" % (blocker))
4279 sys.stderr.write("\n")
4280 error_detected = 1
4282 Log().write(LOG_QUIET, "Checking for forced tags with commits...")
4283 invalid_forced_tags = [ ]
4284 for forced_tag in Ctx().forced_tags:
4285 if excludes.has_key(forced_tag):
4286 continue
4287 if symbol_db.branch_has_commit(forced_tag):
4288 invalid_forced_tags.append(forced_tag)
4289 if invalid_forced_tags:
4290 sys.stderr.write(error_prefix + ": The following branches cannot be "
4291 "forced to be tags because they have commits:\n")
4292 for tag in invalid_forced_tags:
4293 sys.stderr.write(" '%s'\n" % (tag))
4294 sys.stderr.write("\n")
4295 error_detected = 1
4297 Log().write(LOG_QUIET, "Checking for tag/branch mismatches...")
4298 mismatches = symbol_db.find_mismatches(excludes)
4299 def is_not_forced(mismatch):
4300 name = mismatch[0]
4301 return not (name in Ctx().forced_tags or name in Ctx().forced_branches)
4302 mismatches = filter(is_not_forced, mismatches)
4303 if mismatches:
4304 sys.stderr.write(error_prefix + ": The following symbols are tags "
4305 "in some files and branches in others.\nUse "
4306 "--force-tag, --force-branch and/or --exclude to "
4307 "resolve the symbols.\n")
4308 for name, tag_count, branch_count, commit_count in mismatches:
4309 sys.stderr.write(" '%s' is a tag in %d files, a branch in "
4310 "%d files and has commits in %d files.\n"
4311 % (name, tag_count, branch_count, commit_count))
4312 error_detected = 1
4314 # Bail out now if we found errors
4315 if error_detected:
4316 sys.exit(1)
4318 # Create the tags database
4319 tags_db = TagsDatabase(DB_OPEN_NEW)
4320 for tag in symbol_db.tags.keys():
4321 if tag not in Ctx().forced_branches:
4322 tags_db[tag] = None
4323 for tag in Ctx().forced_tags:
4324 tags_db[tag] = None
4326 Log().write(LOG_QUIET, "Re-synchronizing CVS revision timestamps...")
4328 # We may have recorded some changes in revisions' timestamp. We need to
4329 # scan for any other files which may have had the same log message and
4330 # occurred at "the same time" and change their timestamps, too.
4332 # read the resync data file
4333 def read_resync(fname):
4334 "Read the .resync file into memory."
4336 ### note that we assume that we can hold the entire resync file in
4337 ### memory. really large repositories with whacky timestamps could
4338 ### bust this assumption. should that ever happen, then it is possible
4339 ### to split the resync file into pieces and make multiple passes,
4340 ### using each piece.
4343 # A digest maps to a sequence of lists which specify a lower and upper
4344 # time bound for matching up the commit. We keep a sequence of these
4345 # because a number of checkins with the same log message (e.g. an empty
4346 # log message) could need to be remapped. We also make them a list
4347 # because we will dynamically expand the lower/upper bound as we find
4348 # commits that fall into a particular msg and time range.
4350 # resync == digest -> [ [old_time_lower, old_time_upper, new_time], ... ]
4352 resync = { }
4354 for line in fileinput.FileInput(fname):
4355 t1 = int(line[:8], 16)
4356 digest = line[9:DIGEST_END_IDX]
4357 t2 = int(line[DIGEST_END_IDX+1:], 16)
4358 t1_l = t1 - COMMIT_THRESHOLD/2
4359 t1_u = t1 + COMMIT_THRESHOLD/2
4360 resync.setdefault(digest, []).append([t1_l, t1_u, t2])
4362 # For each digest, sort the resync items in it in increasing order,
4363 # based on the lower time bound.
4364 for val in resync.values():
4365 val.sort()
4367 return resync
4369 resync = read_resync(temp(DATAFILE + RESYNC_SUFFIX))
4371 output = open(temp(DATAFILE + CLEAN_REVS_SUFFIX), 'w')
4372 Cleanup().register(temp(DATAFILE + CLEAN_REVS_SUFFIX), pass3)
4374 tweaked_timestamps_db = Database(temp(TWEAKED_TIMESTAMPS_DB), DB_OPEN_NEW)
4375 Cleanup().register(temp(TWEAKED_TIMESTAMPS_DB), pass2)
4377 # process the revisions file, looking for items to clean up
4378 for line in fileinput.FileInput(temp(DATAFILE + REVS_SUFFIX)):
4379 c_rev = CVSRevision(Ctx(), line[:-1])
4381 # Skip this entire revision if it's on an excluded branch
4382 if excludes.has_key(c_rev.branch_name):
4383 continue
4385 new_prev_ts = None
4386 if c_rev.prev_rev is not None:
4387 new_prev_ts = tweaked_timestamps_db.get(
4388 c_rev.unique_key(c_rev.prev_rev), None)
4389 if new_prev_ts:
4390 c_rev.prev_timestamp = new_prev_ts
4392 new_next_ts = None
4393 if c_rev.next_rev is not None:
4394 new_next_ts = tweaked_timestamps_db.get(
4395 c_rev.unique_key(c_rev.next_rev), None)
4396 if new_next_ts:
4397 c_rev.next_timestamp = new_next_ts
4399 # Remove all references to excluded tags and branches
4400 def not_excluded(symbol, excludes=excludes):
4401 return not excludes.has_key(symbol)
4402 c_rev.branches = filter(not_excluded, c_rev.branches)
4403 c_rev.tags = filter(not_excluded, c_rev.tags)
4405 # Convert all branches that are forced to be tags
4406 for forced_tag in Ctx().forced_tags:
4407 if forced_tag in c_rev.branches:
4408 c_rev.branches.remove(forced_tag)
4409 c_rev.tags.append(forced_tag)
4411 # Convert all tags that are forced to be branches
4412 for forced_branch in Ctx().forced_branches:
4413 if forced_branch in c_rev.tags:
4414 c_rev.tags.remove(forced_branch)
4415 c_rev.branches.append(forced_branch)
4417 # see if this is "near" any of the resync records we
4418 # have recorded for this digest [of the log message].
4419 for record in resync.get(c_rev.digest, []):
4420 if record[2] == c_rev.timestamp:
4421 # This means that either c_rev is the same revision that
4422 # caused the resync record to exist, or c_rev is a different
4423 # CVS revision that happens to have the same timestamp. In
4424 # either case, we don't have to do anything, so we...
4425 continue
4427 if record[0] <= c_rev.timestamp <= record[1]:
4428 # bingo! We probably want to remap the time on this c_rev,
4429 # unless the remapping would be useless because the new time
4430 # would fall outside the COMMIT_THRESHOLD window for this
4431 # commit group.
4432 new_timestamp = record[2]
4433 # If the new timestamp is earlier than that of our previous revision
4434 if new_timestamp < c_rev.prev_timestamp:
4435 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4436 + " to time %s, which is before previous the time of"
4437 + " revision %s (%s):")
4438 Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4439 c_rev.cvs_path, new_timestamp,
4440 c_rev.prev_rev, c_rev.prev_timestamp))
4441 # If resyncing our rev to c_rev.prev_timestamp + 1 will place
4442 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4443 # attempted resync time, then sync back to c_rev.prev_timestamp
4444 # + 1...
4445 if ((c_rev.prev_timestamp + 1) - new_timestamp) < COMMIT_THRESHOLD:
4446 new_timestamp = c_rev.prev_timestamp + 1
4447 Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4448 new_timestamp))
4449 else:
4450 Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4451 warning_prefix)
4452 continue
4454 # If the new timestamp is later than that of our next revision
4455 elif c_rev.next_timestamp and new_timestamp > c_rev.next_timestamp:
4456 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4457 + " to time %s, which is after time of next"
4458 + " revision %s (%s):")
4459 Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4460 c_rev.cvs_path, new_timestamp,
4461 c_rev.prev_rev, c_rev.next_timestamp))
4462 # If resyncing our rev to c_rev.next_timestamp - 1 will place
4463 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4464 # attempted resync time, then sync forward to c_rev.next_timestamp
4465 # - 1...
4466 if (new_timestamp - (c_rev.next_timestamp - 1)) < COMMIT_THRESHOLD:
4467 new_timestamp = c_rev.next_timestamp - 1
4468 Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4469 new_timestamp))
4470 else:
4471 Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4472 warning_prefix)
4473 continue
4475 # Fix for Issue #71: Avoid resyncing two consecutive revisions
4476 # to the same timestamp.
4477 elif (new_timestamp == c_rev.prev_timestamp
4478 or new_timestamp == c_rev.next_timestamp):
4479 continue
4481 # adjust the time range. we want the COMMIT_THRESHOLD from the
4482 # bounds of the earlier/latest commit in this group.
4483 record[0] = min(record[0], c_rev.timestamp - COMMIT_THRESHOLD/2)
4484 record[1] = max(record[1], c_rev.timestamp + COMMIT_THRESHOLD/2)
4486 msg = "PASS2 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
4487 % (c_rev.cvs_path, c_rev.rev, time.ctime(c_rev.timestamp),
4488 new_timestamp - c_rev.timestamp)
4489 Log().write(LOG_VERBOSE, msg)
4491 c_rev.timestamp = new_timestamp
4492 tweaked_timestamps_db[c_rev.unique_key()] = new_timestamp
4494 # stop looking for hits
4495 break
4497 output.write(str(c_rev) + "\n")
4498 Log().write(LOG_QUIET, "Done")
4500 def pass3():
4501 Log().write(LOG_QUIET, "Sorting CVS revisions...")
4502 sort_file(temp(DATAFILE + CLEAN_REVS_SUFFIX),
4503 temp(DATAFILE + SORTED_REVS_SUFFIX))
4504 Cleanup().register(temp(DATAFILE + SORTED_REVS_SUFFIX), pass5)
4505 Log().write(LOG_QUIET, "Done")
4507 def pass4():
4508 """Iterate through sorted revs, storing them in a database.
4509 If we're not doing a trunk-only conversion, generate the
4510 LastSymbolicNameDatabase, which contains the last CVSRevision
4511 that is a source for each tag or branch.
4513 Log().write(LOG_QUIET,
4514 "Copying CVS revision data from flat file to database...")
4515 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_NEW)
4516 if not Ctx().trunk_only:
4517 Log().write(LOG_QUIET,
4518 "Finding last CVS revisions for all symbolic names...")
4519 last_sym_name_db = LastSymbolicNameDatabase(DB_OPEN_NEW)
4520 else:
4521 # This is to avoid testing Ctx().trunk_only every time around the loop
4522 class DummyLSNDB:
4523 def noop(*args): pass
4524 log_revision = noop
4525 create_database = noop
4526 last_sym_name_db = DummyLSNDB()
4528 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4529 c_rev = CVSRevision(Ctx(), line[:-1])
4530 cvs_revs_db.log_revision(c_rev)
4531 last_sym_name_db.log_revision(c_rev)
4532 StatsKeeper().record_c_rev(c_rev)
4534 last_sym_name_db.create_database()
4535 StatsKeeper().archive()
4536 Log().write(LOG_QUIET, "Done")
4538 def pass5():
4540 Generate the SVNCommit <-> CVSRevision mapping
4541 databases. CVSCommit._commit also calls SymbolingsLogger to register
4542 CVSRevisions that represent an opening or closing for a path on a
4543 branch or tag. See SymbolingsLogger for more details.
4545 Log().write(LOG_QUIET, "Mapping CVS revisions to Subversion commits...")
4547 aggregator = CVSRevisionAggregator()
4548 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4549 c_rev = CVSRevision(Ctx(), line[:-1])
4550 if not (Ctx().trunk_only and c_rev.branch_name is not None):
4551 aggregator.process_revision(c_rev)
4552 aggregator.flush()
4554 StatsKeeper().set_svn_rev_count(SVNCommit.revnum - 1)
4555 StatsKeeper().archive()
4556 Log().write(LOG_QUIET, "Done")
4558 def pass6():
4559 Log().write(LOG_QUIET, "Sorting symbolic name source revisions...")
4561 if not Ctx().trunk_only:
4562 sort_file(temp(SYMBOL_OPENINGS_CLOSINGS),
4563 temp(SYMBOL_OPENINGS_CLOSINGS_SORTED))
4564 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), pass8)
4565 Log().write(LOG_QUIET, "Done")
4567 def pass7():
4568 Log().write(LOG_QUIET, "Determining offsets for all symbolic names...")
4570 def generate_offsets_for_symbolings():
4571 """This function iterates through all the lines in
4572 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
4573 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
4574 where SYMBOLIC_NAME is first encountered. This will allow us to
4575 seek to the various offsets in the file and sequentially read only
4576 the openings and closings that we need."""
4578 ###PERF This is a fine example of a db that can be in-memory and
4579 #just flushed to disk when we're done. Later, it can just be sucked
4580 #back into memory.
4581 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_NEW)
4582 Cleanup().register(temp(SYMBOL_OFFSETS_DB), pass8)
4584 file = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
4585 old_sym = ""
4586 while 1:
4587 fpos = file.tell()
4588 line = file.readline()
4589 if not line:
4590 break
4591 sym, svn_revnum, cvs_rev_key = line.split(" ", 2)
4592 if sym != old_sym:
4593 Log().write(LOG_VERBOSE, " ", sym)
4594 old_sym = sym
4595 offsets_db[sym] = fpos
4597 if not Ctx().trunk_only:
4598 generate_offsets_for_symbolings()
4599 Log().write(LOG_QUIET, "Done.")
4601 def pass8():
4602 svncounter = 2 # Repository initialization is 1.
4603 repos = SVNRepositoryMirror()
4604 persistence_manager = PersistenceManager(DB_OPEN_READ)
4606 if Ctx().target:
4607 if not Ctx().dry_run:
4608 repos.add_delegate(RepositoryDelegate())
4609 Log().write(LOG_QUIET, "Starting Subversion Repository.")
4610 else:
4611 if not Ctx().dry_run:
4612 repos.add_delegate(DumpfileDelegate())
4613 Log().write(LOG_QUIET, "Starting Subversion Dumpfile.")
4615 repos.add_delegate(StdoutDelegate(StatsKeeper().svn_rev_count()))
4617 while 1:
4618 svn_commit = persistence_manager.get_svn_commit(svncounter)
4619 if not svn_commit:
4620 break
4621 repos.commit(svn_commit)
4622 svncounter += 1
4624 repos.finish()
4626 _passes = [
4627 pass1,
4628 pass2,
4629 pass3,
4630 pass4,
4631 pass5,
4632 pass6,
4633 pass7,
4634 pass8,
4638 class Ctx:
4639 """Session state for this run of cvs2svn. For example, run-time
4640 options are stored here. This class is a Borg, see
4641 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531.
4643 __shared_state = { }
4644 def __init__(self):
4645 self.__dict__ = self.__shared_state
4646 if self.__dict__:
4647 return
4648 # Else, initialize to defaults.
4649 self.cvsroot = None
4650 self.target = None
4651 self.dumpfile = DUMPFILE
4652 self.tmpdir = '.'
4653 self.verbose = 0
4654 self.quiet = 0
4655 self.prune = 1
4656 self.existing_svnrepos = 0
4657 self.dump_only = 0
4658 self.dry_run = 0
4659 self.trunk_only = 0
4660 self.trunk_base = "trunk"
4661 self.tags_base = "tags"
4662 self.branches_base = "branches"
4663 self.encoding = ["ascii"]
4664 self.mime_types_file = None
4665 self.no_default_eol = 0
4666 self.eol_from_mime_type = 0
4667 self.keywords_off = 0
4668 self.use_cvs = None
4669 self.svnadmin = "svnadmin"
4670 self.username = None
4671 self.print_help = 0
4672 self.skip_cleanup = 0
4673 self.bdb_txn_nosync = 0
4674 self.fs_type = None
4675 self.forced_branches = []
4676 self.forced_tags = []
4677 self.excludes = []
4678 self.symbol_transforms = []
4679 self.svn_property_setters = []
4682 class CVSRevisionNumberSetter(SVNPropertySetter):
4683 """Set the cvs2svn:cvs-rev property to the CVS revision number."""
4685 def set_properties(self, s_item):
4686 s_item.svn_props['cvs2svn:cvs-rev'] = s_item.c_rev.rev
4687 s_item.svn_props_changed = True
4690 class MimeMapper(SVNPropertySetter):
4691 """A class that provides mappings from file names to MIME types."""
4693 def __init__(self, mime_types_file):
4694 self.mappings = { }
4696 for line in fileinput.input(mime_types_file):
4697 if line.startswith("#"):
4698 continue
4700 # format of a line is something like
4701 # text/plain c h cpp
4702 extensions = line.split()
4703 if len(extensions) < 2:
4704 continue
4705 type = extensions.pop(0)
4706 for ext in extensions:
4707 if self.mappings.has_key(ext) and self.mappings[ext] != type:
4708 sys.stderr.write("%s: ambiguous MIME mapping for *.%s (%s or %s)\n"
4709 % (warning_prefix, ext, self.mappings[ext], type))
4710 self.mappings[ext] = type
4712 def set_properties(self, s_item):
4713 basename, extension = os.path.splitext(
4714 os.path.basename(s_item.c_rev.cvs_path)
4717 # Extension includes the dot, so strip it (will leave extension
4718 # empty if filename ends with a dot, which is ok):
4719 extension = extension[1:]
4721 # If there is no extension (or the file ends with a period), use
4722 # the base name for mapping. This allows us to set mappings for
4723 # files such as README or Makefile:
4724 if not extension:
4725 extension = basename
4727 mime_type = self.mappings.get(extension, None)
4728 if mime_type is not None:
4729 s_item.svn_props['svn:mime-type'] = mime_type
4732 class BinaryFileDefaultMimeTypeSetter(SVNPropertySetter):
4733 """Set the default mime type for binary files, if no other one is known."""
4735 def set_properties(self, s_item):
4736 if not s_item.svn_props.has_key('svn:mime-type') \
4737 and s_item.c_rev.mode == 'b':
4738 s_item.svn_props['svn:mime-type'] = 'application/octet-stream'
4741 class BinaryFileEOLStyleSetter(SVNPropertySetter):
4742 """Set the eol-style for binary files to None."""
4744 def set_properties(self, s_item):
4745 if s_item.c_rev.mode == 'b':
4746 s_item.svn_props['svn:eol-style'] = None
4749 class EOLStyleFromMimeTypeSetter(SVNPropertySetter):
4750 """Set the eol-style from the mime type if it is not already known.
4752 This setting is influenced by the mime-type setting, which must
4753 already have been set. See also issue #39."""
4755 def set_properties(self, s_item):
4756 if not s_item.svn_props.has_key('svn:eol-style') \
4757 and s_item.svn_props.get('svn:mime-type', None) is not None:
4758 if s_item.svn_props['svn:mime-type'].startswith("text/"):
4759 s_item.svn_props['svn:eol-style'] = 'native'
4760 else:
4761 s_item.svn_props['svn:eol-style'] = None
4764 class DefaultEOLStyleSetter(SVNPropertySetter):
4765 """Set the default eol-style if one has not already been set."""
4767 def __init__(self, value):
4768 """Initialize with the specified default VALUE."""
4770 self.value = value
4772 def set_properties(self, s_item):
4773 if not s_item.svn_props.has_key('svn:eol-style'):
4774 s_item.svn_props['svn:eol-style'] = self.value
4777 class KeywordsPropertySetter(SVNPropertySetter):
4778 """Set the svn:keywords property based on the file's mode. See
4779 issue #2."""
4781 def __init__(self, value):
4782 """Use VALUE for the value of the svn:keywords property if it is
4783 to be set."""
4785 self.value = value
4787 def set_properties(self, s_item):
4788 if not s_item.svn_props.has_key('svn:keywords') \
4789 and s_item.c_rev.mode in [None, 'kv', 'kvl']:
4790 s_item.svn_props['svn:keywords'] = self.value
4793 class ExecutablePropertySetter(SVNPropertySetter):
4794 """Set the svn:executable property based on c_rev.file_executable."""
4796 def set_properties(self, s_item):
4797 if s_item.c_rev.file_executable:
4798 s_item.svn_props['svn:executable'] = '*'
4801 def convert(start_pass, end_pass):
4802 "Convert a CVS repository to an SVN repository."
4804 cleanup = Cleanup()
4805 times = [ None ] * (end_pass + 1)
4806 times[start_pass - 1] = time.time()
4807 StatsKeeper().set_start_time(time.time())
4808 for i in range(start_pass - 1, end_pass):
4809 Log().write(LOG_QUIET, '----- pass %d -----' % (i + 1))
4810 _passes[i]()
4811 times[i + 1] = time.time()
4812 StatsKeeper().log_duration_for_pass(times[i + 1] - times[i], i + 1)
4813 # Dispose of items in Ctx() not intended to live past the end of the pass
4814 # (Identified by exactly one leading underscore)
4815 for attr in dir(Ctx()):
4816 if (len(attr) > 2 and attr[0] == '_' and attr[1] != '_'
4817 and attr[:6] != "_Ctx__"):
4818 delattr(Ctx(), attr)
4819 if not Ctx().skip_cleanup:
4820 cleanup.cleanup(_passes[i])
4821 StatsKeeper().set_end_time(time.time())
4823 Log().write(LOG_QUIET, StatsKeeper())
4824 if end_pass < 4:
4825 Log().write(LOG_QUIET,
4826 '(These are unaltered CVS repository stats and do not\n'
4827 ' reflect tags or branches excluded via --exclude)\n')
4828 Log().write(LOG_NORMAL, StatsKeeper().timings())
4831 def normalize_ttb_path(opt, path):
4832 """Normalize a path to be used for --trunk, --tags, or --branches.
4834 1. Strip leading, trailing, and duplicated '/'.
4835 2. Verify that the path is not empty.
4837 Return the normalized path.
4839 If the path is invalid, write an error message and exit."""
4841 norm_path = _path_join(*path.split('/'))
4842 if not norm_path:
4843 raise FatalError("cannot pass an empty path to %s." % (opt,))
4844 return norm_path
4847 def verify_paths_disjoint(*paths):
4848 """Verify that all of the paths in the argument list are disjoint.
4850 If any of the paths is nested in another one (i.e., in the sense
4851 that 'a/b/c/d' is nested in 'a/b'), or any two paths are identical,
4852 write an error message and exit."""
4854 paths = [(path.split('/'), path) for path in paths]
4855 # If all overlapping elements are equal, a shorter list is
4856 # considered "less than" a longer one. Therefore if any paths are
4857 # nested, this sort will leave at least one such pair adjacent, in
4858 # the order [nest,nestling].
4859 paths.sort()
4860 for i in range(1, len(paths)):
4861 split_path1, path1 = paths[i - 1]
4862 split_path2, path2 = paths[i]
4863 if len(split_path1) <= len(split_path2) \
4864 and split_path2[:len(split_path1)] == split_path1:
4865 raise FatalError("paths %s and %s are not disjoint." % (path1, path2,))
4868 def usage():
4869 print 'USAGE: %s [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
4870 % os.path.basename(sys.argv[0])
4871 print ' --help, -h print this usage message and exit with success'
4872 print ' --version print the version number'
4873 print ' -q quiet'
4874 print ' -v verbose'
4875 print ' -s PATH path for SVN repos'
4876 print ' -p START[:END] start at pass START, end at pass END of %d' \
4877 % len(_passes)
4878 print ' If only START is given, run only pass START'
4879 print ' (implicitly enables --skip-cleanup)'
4880 print ' --existing-svnrepos load into existing SVN repository'
4881 print ' --dumpfile=PATH name of intermediate svn dumpfile'
4882 print ' --tmpdir=PATH directory to use for tmp data (default to cwd)'
4883 print ' --profile profile with \'hotshot\' (into file cvs2svn.hotshot)'
4884 print ' --dry-run do not create a repository or a dumpfile;'
4885 print ' just print what would happen.'
4886 print ' --use-cvs use CVS instead of RCS \'co\' to extract data'
4887 print ' (only use this if having problems with RCS)'
4888 print ' --svnadmin=PATH path to the svnadmin program'
4889 print ' --trunk-only convert only trunk commits, not tags nor branches'
4890 print ' --trunk=PATH path for trunk (default: %s)' \
4891 % Ctx().trunk_base
4892 print ' --branches=PATH path for branches (default: %s)' \
4893 % Ctx().branches_base
4894 print ' --tags=PATH path for tags (default: %s)' \
4895 % Ctx().tags_base
4896 print ' --no-prune don\'t prune empty directories'
4897 print ' --dump-only just produce a dumpfile, don\'t commit to a repos'
4898 print ' --encoding=ENC encoding of paths and log messages in CVS repos'
4899 print ' Multiple of these options may be passed, where they'
4900 print ' will be treated as an ordered list of encodings to'
4901 print ' attempt (with "ascii" as a hardcoded last resort)'
4902 print ' --force-branch=NAME force NAME to be a branch'
4903 print ' --force-tag=NAME force NAME to be a tag'
4904 print ' --exclude=REGEXP exclude branches and tags matching REGEXP'
4905 print ' --symbol-transform=P:S transform symbol names from P to S where P and S'
4906 print ' use Python regexp and reference syntax respectively'
4907 print ' --username=NAME username for cvs2svn-synthesized commits'
4908 print ' --skip-cleanup prevent the deletion of intermediate files'
4909 print ' --bdb-txn-nosync pass --bdb-txn-nosync to "svnadmin create"'
4910 print ' --fs-type=TYPE pass --fs-type=TYPE to "svnadmin create"'
4911 print ' --cvs-revnums record CVS revision numbers as file properties'
4912 print ' --mime-types=FILE specify an apache-style mime.types file for'
4913 print ' setting svn:mime-type'
4914 print ' --eol-from-mime-type set svn:eol-style from mime type if known'
4915 print ' --no-default-eol don\'t set svn:eol-style to \'native\' for'
4916 print ' non-binary files with undetermined mime types'
4917 print ' --keywords-off don\'t set svn:keywords on any files (by default,'
4918 print ' cvs2svn sets svn:keywords on non-binary files to'
4919 print ' "%s")' % SVN_KEYWORDS_VALUE
4921 def main():
4922 # Convenience var, so we don't have to keep instantiating this Borg.
4923 ctx = Ctx()
4925 profiling = None
4926 start_pass = 1
4927 end_pass = len(_passes)
4929 try:
4930 opts, args = getopt.getopt(sys.argv[1:], 'p:s:qvh',
4931 [ "help", "create", "trunk=",
4932 "username=", "existing-svnrepos",
4933 "branches=", "tags=", "encoding=",
4934 "force-branch=", "force-tag=", "exclude=",
4935 "use-cvs", "mime-types=",
4936 "eol-from-mime-type", "no-default-eol",
4937 "trunk-only", "no-prune", "dry-run",
4938 "dump-only", "dumpfile=", "tmpdir=",
4939 "svnadmin=", "skip-cleanup", "cvs-revnums",
4940 "bdb-txn-nosync", "fs-type=",
4941 "version", "profile",
4942 "keywords-off", "symbol-transform="])
4943 except getopt.GetoptError, e:
4944 sys.stderr.write(error_prefix + ': ' + str(e) + '\n\n')
4945 usage()
4946 sys.exit(1)
4948 for opt, value in opts:
4949 if opt == '--version':
4950 print '%s version %s' % (os.path.basename(sys.argv[0]), VERSION)
4951 sys.exit(0)
4952 elif opt == '-p':
4953 # Don't cleanup if we're doing incrementals.
4954 ctx.skip_cleanup = 1
4955 if value.find(':') > 0:
4956 start_pass, end_pass = map(int, value.split(':'))
4957 else:
4958 end_pass = start_pass = int(value)
4959 if start_pass > len(_passes) or start_pass < 1:
4960 raise FatalError(
4961 'illegal value (%d) for starting pass. Must be 1 through %d.'
4962 % (int(start_pass), len(_passes),))
4963 if end_pass < start_pass or end_pass > len(_passes):
4964 raise FatalError(
4965 'illegal value (%d) for ending pass. Must be %d through %d.'
4966 % (int(end_pass), int(start_pass), len(_passes),))
4967 elif (opt == '--help') or (opt == '-h'):
4968 ctx.print_help = 1
4969 elif opt == '-v':
4970 Log().log_level = LOG_VERBOSE
4971 ctx.verbose = 1
4972 elif opt == '-q':
4973 Log().log_level = LOG_QUIET
4974 ctx.quiet = 1
4975 elif opt == '-s':
4976 ctx.target = value
4977 elif opt == '--existing-svnrepos':
4978 ctx.existing_svnrepos = 1
4979 elif opt == '--dumpfile':
4980 ctx.dumpfile = value
4981 elif opt == '--tmpdir':
4982 ctx.tmpdir = value
4983 elif opt == '--use-cvs':
4984 ctx.use_cvs = 1
4985 elif opt == '--svnadmin':
4986 ctx.svnadmin = value
4987 elif opt == '--trunk-only':
4988 ctx.trunk_only = 1
4989 elif opt == '--trunk':
4990 ctx.trunk_base = normalize_ttb_path(opt, value)
4991 elif opt == '--branches':
4992 ctx.branches_base = normalize_ttb_path(opt, value)
4993 elif opt == '--tags':
4994 ctx.tags_base = normalize_ttb_path(opt, value)
4995 elif opt == '--no-prune':
4996 ctx.prune = None
4997 elif opt == '--dump-only':
4998 ctx.dump_only = 1
4999 elif opt == '--dry-run':
5000 ctx.dry_run = 1
5001 elif opt == '--encoding':
5002 ctx.encoding.insert(-1, value)
5003 elif opt == '--force-branch':
5004 ctx.forced_branches.append(value)
5005 elif opt == '--force-tag':
5006 ctx.forced_tags.append(value)
5007 elif opt == '--exclude':
5008 try:
5009 ctx.excludes.append(re.compile('^' + value + '$'))
5010 except re.error, e:
5011 raise FatalError("'%s' is not a valid regexp." % (value,))
5012 elif opt == '--mime-types':
5013 ctx.mime_types_file = value
5014 elif opt == '--eol-from-mime-type':
5015 ctx.eol_from_mime_type = 1
5016 elif opt == '--no-default-eol':
5017 ctx.no_default_eol = 1
5018 elif opt == '--keywords-off':
5019 ctx.keywords_off = 1
5020 elif opt == '--username':
5021 ctx.username = value
5022 elif opt == '--skip-cleanup':
5023 ctx.skip_cleanup = 1
5024 elif opt == '--cvs-revnums':
5025 ctx.svn_property_setters.append(CVSRevisionNumberSetter())
5026 elif opt == '--bdb-txn-nosync':
5027 ctx.bdb_txn_nosync = 1
5028 elif opt == '--fs-type':
5029 ctx.fs_type = value
5030 elif opt == '--create':
5031 sys.stderr.write(warning_prefix +
5032 ': The behaviour produced by the --create option is now the '
5033 'default,\nand passing the option is deprecated.\n')
5034 elif opt == '--profile':
5035 profiling = 1
5036 elif opt == '--symbol-transform':
5037 [pattern, replacement] = value.split(":")
5038 try:
5039 pattern = re.compile(pattern)
5040 except re.error, e:
5041 raise FatalError("'%s' is not a valid regexp." % (pattern,))
5042 ctx.symbol_transforms.append((pattern, replacement,))
5044 if ctx.print_help:
5045 usage()
5046 sys.exit(0)
5048 # Consistency check for options and arguments.
5049 if len(args) == 0:
5050 usage()
5051 sys.exit(1)
5053 if len(args) > 1:
5054 sys.stderr.write(error_prefix +
5055 ": must pass only one CVS repository.\n")
5056 usage()
5057 sys.exit(1)
5059 ctx.cvsroot = args[0]
5061 if ctx.use_cvs:
5062 ctx.cvs_repository = CVSRepositoryViaCVS(ctx.cvsroot)
5063 else:
5064 ctx.cvs_repository = CVSRepositoryViaRCS(ctx.cvsroot)
5066 if (not ctx.target) and (not ctx.dump_only) and (not ctx.dry_run):
5067 raise FatalError("must pass one of '-s' or '--dump-only'.")
5069 def not_both(opt1val, opt1name, opt2val, opt2name):
5070 if opt1val and opt2val:
5071 raise FatalError("cannot pass both '%s' and '%s'."
5072 % (opt1name, opt2name,))
5074 not_both(ctx.target, '-s',
5075 ctx.dump_only, '--dump-only')
5077 not_both(ctx.dump_only, '--dump-only',
5078 ctx.existing_svnrepos, '--existing-svnrepos')
5080 not_both(ctx.bdb_txn_nosync, '--bdb-txn-nosync',
5081 ctx.existing_svnrepos, '--existing-svnrepos')
5083 not_both(ctx.dump_only, '--dump-only',
5084 ctx.bdb_txn_nosync, '--bdb-txn-nosync')
5086 not_both(ctx.quiet, '-q',
5087 ctx.verbose, '-v')
5089 not_both(ctx.fs_type, '--fs-type',
5090 ctx.existing_svnrepos, '--existing-svnrepos')
5092 if ctx.fs_type and ctx.fs_type != 'bdb' and ctx.bdb_txn_nosync:
5093 raise FatalError("cannot pass --bdb-txn-nosync with --fs-type=%s."
5094 % ctx.fs_type)
5096 # Create the default project (using ctx.trunk, ctx.branches, and ctx.tags):
5097 ctx.project = Project(ctx.cvsroot,
5098 ctx.trunk_base, ctx.branches_base, ctx.tags_base)
5100 if ctx.existing_svnrepos and not os.path.isdir(ctx.target):
5101 raise FatalError("the svn-repos-path '%s' is not an "
5102 "existing directory." % ctx.target)
5104 if not ctx.dump_only and not ctx.existing_svnrepos \
5105 and (not ctx.dry_run) and os.path.exists(ctx.target):
5106 raise FatalError("the svn-repos-path '%s' exists.\n"
5107 "Remove it, or pass '--existing-svnrepos'."
5108 % ctx.target)
5110 if ctx.target and not ctx.dry_run:
5111 # Verify that svnadmin can be executed. The 'help' subcommand
5112 # should be harmless.
5113 try:
5114 check_command_runs([ctx.svnadmin, 'help'], 'svnadmin')
5115 except CommandFailedException, e:
5116 raise FatalError(
5117 '%s\n'
5118 'svnadmin could not be executed. Please ensure that it is\n'
5119 'installed and/or use the --svnadmin option.' % (e,))
5121 if ctx.mime_types_file:
5122 ctx.svn_property_setters.append(MimeMapper(ctx.mime_types_file))
5124 ctx.svn_property_setters.append(BinaryFileDefaultMimeTypeSetter())
5125 ctx.svn_property_setters.append(BinaryFileEOLStyleSetter())
5127 if ctx.eol_from_mime_type:
5128 ctx.svn_property_setters.append(EOLStyleFromMimeTypeSetter())
5130 if ctx.no_default_eol:
5131 ctx.svn_property_setters.append(DefaultEOLStyleSetter(None))
5132 else:
5133 ctx.svn_property_setters.append(DefaultEOLStyleSetter('native'))
5135 if not ctx.keywords_off:
5136 ctx.svn_property_setters.append(
5137 KeywordsPropertySetter(SVN_KEYWORDS_VALUE))
5139 ctx.svn_property_setters.append(ExecutablePropertySetter())
5141 # Make sure the tmp directory exists. Note that we don't check if
5142 # it's empty -- we want to be able to use, for example, "." to hold
5143 # tempfiles. But if we *did* want check if it were empty, we'd do
5144 # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
5145 if not os.path.exists(ctx.tmpdir):
5146 os.mkdir(ctx.tmpdir)
5147 elif not os.path.isdir(ctx.tmpdir):
5148 raise FatalError(
5149 "cvs2svn tried to use '%s' for temporary files, but that path\n"
5150 " exists and is not a directory. Please make it be a directory,\n"
5151 " or specify some other directory for temporary files."
5152 % (ctx.tmpdir,))
5154 # But do lock the tmpdir, to avoid process clash.
5155 try:
5156 os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
5157 except OSError, e:
5158 if e.errno == errno.EACCES:
5159 raise FatalError("Permission denied:"
5160 + " No write access to directory '%s'." % ctx.tmpdir)
5161 if e.errno == errno.EEXIST:
5162 raise FatalError(
5163 "cvs2svn is using directory '%s' for temporary files, but\n"
5164 " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
5165 " cvs2svn process is currently using '%s' as its temporary\n"
5166 " workspace. If you are certain that is not the case,\n"
5167 " then remove the '%s/cvs2svn.lock' subdirectory."
5168 % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir,))
5169 raise
5170 try:
5171 if profiling:
5172 import hotshot
5173 prof = hotshot.Profile('cvs2svn.hotshot')
5174 prof.runcall(convert, start_pass, end_pass)
5175 prof.close()
5176 else:
5177 convert(start_pass, end_pass)
5178 finally:
5179 try: os.rmdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
5180 except: pass
5183 if __name__ == '__main__':
5184 try:
5185 main()
5186 except FatalException, e:
5187 sys.stderr.write(str(e))
5188 sys.exit(1)