* cvs2svn (SymbolicNameFillingGuide._score_revisions): Updated docstring.
[cvs2svn.git] / cvs2svn
bloba5ecfdb63eb6e107efab586c7371f1b53bd7d25e
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # cvs2svn: ...
6 # ====================================================================
7 # Copyright (c) 2000-2004 CollabNet. All rights reserved.
9 # This software is licensed as described in the file COPYING, which
10 # you should have received as part of this distribution. The terms
11 # are also available at http://subversion.tigris.org/license-1.html.
12 # If newer versions of this license are posted there, you may use a
13 # newer version instead, at your option.
15 # This software consists of voluntary contributions made by many
16 # individuals. For exact contribution history, see the revision
17 # history and logs, available at http://cvs2svn.tigris.org/.
18 # ====================================================================
20 VERSION = 'r' + "$LastChangedRevision$"[22:-2]
22 import cvs2svn_rcsparse
23 import os
24 import sys
25 import sha
26 import re
27 import time
28 import fileinput
29 import string
30 import getopt
31 import stat
32 import md5
33 import marshal
34 import errno
35 import popen2
36 import types
37 try:
38 # Try to get access to a bunch of encodings for use with --encoding.
39 # See http://cjkpython.i18n.org/ for details.
40 import iconv_codec
41 except ImportError:
42 pass
44 # Warnings and errors start with these strings. They are typically
45 # followed by a colon and a space, as in "%s: " ==> "WARNING: ".
46 warning_prefix = "WARNING"
47 error_prefix = "ERROR"
49 # Make sure this Python is recent enough.
50 if sys.hexversion < 0x2000000:
51 sys.stderr.write("'%s: Python 2.0 or higher required, "
52 "see www.python.org.\n" % error_prefix)
53 sys.exit(1)
55 # Pretend we have true booleans on older python versions
56 try:
57 True
58 except:
59 True = 1
60 False = 0
62 # Opening pipes was a mess before Python 2.4, because some methods did
63 # not exist on some platforms, and some behaved differenly on other.
64 # Python 2.4 solved this by adding the subprocess module, but since we
65 # cannot require such a new version, we cannot use it directly, but
66 # must implement a simplified Popen using the best means neccessary.
68 # The SimplePopen class only has the following members and methods, all
69 # behaving as documented in the subprocess.Popen class:
70 # - stdin
71 # - stdout
72 # - stderr
73 # - wait
74 try:
75 # First try subprocess.Popen...
76 import subprocess
77 class SimplePopen:
78 def __init__(self, cmd, capture_stderr):
79 if capture_stderr:
80 stderr = subprocess.PIPE
81 else:
82 stderr = None
83 self._popen = subprocess.Popen(cmd, stdin=subprocess.PIPE,
84 stdout=subprocess.PIPE, stderr=stderr)
85 self.stdin = self._popen.stdin
86 self.stdout = self._popen.stdout
87 if capture_stderr:
88 self.stderr = self._popen.stderr
89 self.wait = self._popen.wait
90 except ImportError:
91 if hasattr(popen2, 'Popen3'):
92 # ...then try popen2.Popen3...
93 class SimplePopen:
94 def __init__(self, cmd, capture_stderr):
95 self._popen3 = popen2.Popen3(cmd, capture_stderr)
96 self.stdin = self._popen3.tochild
97 self.stdout = self._popen3.fromchild
98 if capture_stderr:
99 self.stderr = self._popen3.childerr
100 self.wait = self._popen3.wait
101 else:
102 # ...and if all fails, use popen2.popen3...
103 class SimplePopen:
104 def __init__(self, cmd, capture_stderr):
105 if type(cmd) != types.StringType:
106 cmd = argv_to_command_string(cmd)
107 self.stdout, self.stdin, self.stderr = popen2.popen3(cmd, mode='b')
108 def wait(self):
109 return self.stdout.close() or self.stdin.close() or \
110 self.stderr.close()
112 # DBM module selection
114 # 1. If we have bsddb3, it is probably newer than bsddb. Fake bsddb = bsddb3,
115 # so that the dbhash module used by anydbm will use bsddb3.
116 try:
117 import bsddb3
118 sys.modules['bsddb'] = sys.modules['bsddb3']
119 except ImportError:
120 pass
122 # 2. These DBM modules are not good for cvs2svn.
123 import anydbm
124 if (anydbm._defaultmod.__name__ == 'dumbdbm'
125 or anydbm._defaultmod.__name__ == 'dbm'):
126 sys.stderr.write(
127 error_prefix
128 + ': your installation of Python does not contain a suitable\n'
129 + 'DBM module -- cvs2svn cannot continue.\n'
130 + 'See http://python.org/doc/current/lib/module-anydbm.html to solve.\n')
131 sys.exit(1)
133 # 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
134 # Unfortunately, gdbm appears not to be trouble free, either.
135 if hasattr(anydbm._defaultmod, 'bsddb') \
136 and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
137 try:
138 gdbm = __import__('gdbm')
139 except ImportError:
140 sys.stderr.write(warning_prefix +
141 ': The version of the bsddb module found '
142 'on your computer has been reported to malfunction on some datasets, '
143 'causing KeyError exceptions. You may wish to upgrade your Python to '
144 'version 2.3 or later.\n')
145 else:
146 anydbm._defaultmod = gdbm
148 trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
149 branch_tag = re.compile('^[0-9.]+\\.0\\.[0-9]+$')
150 vendor_tag = re.compile('^[0-9]+\\.[0-9]+\\.[0-9]+$')
152 SVN_KEYWORDS_VALUE = 'Author Date Id Revision'
154 # This really only matches standard '1.1.1.*'-style vendor revisions.
155 # One could conceivably have a file whose default branch is 1.1.3 or
156 # whatever, or was that at some point in time, with vendor revisions
157 # 1.1.3.1, 1.1.3.2, etc. But with the default branch gone now (which
158 # is the only time this regexp gets used), we'd have no basis for
159 # assuming that the non-standard vendor branch had ever been the
160 # default branch anyway, so we don't want this to match them anyway.
161 vendor_revision = re.compile('^(1\\.1\\.1)\\.([0-9])+$')
163 # If this run's output is a repository, then (in the tmpdir) we use
164 # a dumpfile of this name for repository loads.
166 # If this run's output is a dumpfile, then this is default name of
167 # that dumpfile, but in the current directory (unless the user has
168 # specified a dumpfile path, of course, in which case it will be
169 # wherever the user said).
170 DUMPFILE = 'cvs2svn-dump'
172 # This file appears with different suffixes at different stages of
173 # processing. CVS revisions are cleaned and sorted here, for commit
174 # grouping. See design-notes.txt for details.
175 DATAFILE = 'cvs2svn-data'
177 # This file contains a marshalled copy of all the statistics that we
178 # gather throughout the various runs of cvs2svn. The data stored as a
179 # marshalled dictionary.
180 STATISTICS_FILE = 'cvs2svn-statistics'
182 # This text file contains records (1 per line) that describe svn
183 # filesystem paths that are the opening and closing source revisions
184 # for copies to tags and branches. The format is as follows:
186 # SYMBOL_NAME SVN_REVNUM TYPE SVN_PATH
188 # Where type is either OPENING or CLOSING. The SYMBOL_NAME and
189 # SVN_REVNUM are the primary and secondary sorting criteria for
190 # creating SYMBOL_OPENINGS_CLOSINGS_SORTED.
191 SYMBOL_OPENINGS_CLOSINGS = 'cvs2svn-symbolic-names.txt'
192 # A sorted version of the above file.
193 SYMBOL_OPENINGS_CLOSINGS_SORTED = 'cvs2svn-symbolic-names-s.txt'
195 # This file is a temporary file for storing symbolic_name -> closing
196 # CVSRevision until the end of our pass where we can look up the
197 # corresponding SVNRevNum for the closing revs and write these out to
198 # the SYMBOL_OPENINGS_CLOSINGS.
199 SYMBOL_CLOSINGS_TMP = 'cvs2svn-symbolic-names-closings-tmp.txt'
201 # Skeleton version of an svn filesystem.
202 # (These supersede and will eventually replace the two above.)
203 # See class SVNRepositoryMirror for how these work.
204 SVN_MIRROR_REVISIONS_DB = 'cvs2svn-svn-revisions.db'
205 SVN_MIRROR_NODES_DB = 'cvs2svn-svn-nodes.db'
207 # Offsets pointing to the beginning of each SYMBOLIC_NAME in
208 # SYMBOL_OPENINGS_CLOSINGS_SORTED
209 SYMBOL_OFFSETS_DB = 'cvs2svn-symbolic-name-offsets.db'
211 # Maps CVSRevision.unique_key()s to lists of symbolic names, where
212 # the CVSRevision is the last such that is a source for those symbolic
213 # names. For example, if branch B's number is 1.3.0.2 in this CVS
214 # file, and this file's 1.3 is the latest (by date) revision among
215 # *all* CVS files that is a source for branch B, then the
216 # CVSRevision.unique_key() corresponding to this file at 1.3 would
217 # list at least B in its list.
218 SYMBOL_LAST_CVS_REVS_DB = 'cvs2svn-symbol-last-cvs-revs.db'
220 # Maps CVSRevision.unique_key() to corresponding line in s-revs.
221 ###PERF Or, we could map to an offset into s-revs, instead of dup'ing
222 ### the s-revs data in this database.
223 CVS_REVS_DB = 'cvs2svn-cvs-revs.db'
225 # Lists all symbolic names that are tags. Keys are strings (symbolic
226 # names), values are ignorable.
227 TAGS_DB = 'cvs2svn-tags.db'
229 # A list all tags. Each line consists of the tag name and the number
230 # of files in which it exists, separated by a space.
231 TAGS_LIST = 'cvs2svn-tags.txt'
233 # A list of all branches. The file is stored as a plain text file
234 # to make it easy to look at in an editor. Each line contains the
235 # branch name, the number of files where the branch is created, the
236 # commit count, and a list of tags and branches that are defined on
237 # revisions in the branch.
238 BRANCHES_LIST = 'cvs2svn-branches.txt'
240 # These two databases provide a bidirectional mapping between
241 # CVSRevision.unique_key()s and Subversion revision numbers.
243 # The first maps CVSRevision.unique_key() to a number; the values are
244 # not unique.
246 # The second maps a number to a list of CVSRevision.unique_key()s.
247 CVS_REVS_TO_SVN_REVNUMS = 'cvs2svn-cvs-revs-to-svn-revnums.db'
248 SVN_REVNUMS_TO_CVS_REVS = 'cvs2svn-svn-revnums-to-cvs-revs.db'
250 # This database maps svn_revnums to tuples of (symbolic_name, date).
252 # The svn_revnums are the revision numbers of all non-primary
253 # SVNCommits. No primary SVNCommit has a key in this database.
255 # The date is stored for all commits in this database.
257 # For commits that fill symbolic names, the symbolic_name is stored.
258 # For commits that default branch syncs, the symbolic_name is None.
259 SVN_COMMIT_NAMES_DATES = 'cvs2svn-svn-commit-names-and-dates.db'
261 # This database maps svn_revnums of a default branch synchronization
262 # commit to the svn_revnum of the primary SVNCommit that motivated it.
264 # (NOTE: Secondary commits that fill branches and tags also have a
265 # motivating commit, but we do not record it because it is (currently)
266 # not needed for anything.)
268 # This mapping is used when generating the log message for the commit
269 # that synchronizes the default branch with trunk.
270 MOTIVATING_REVNUMS = 'cvs2svn-svn-motivating-commit-revnums.db'
272 # How many bytes to read at a time from a pipe. 128 kiB should be
273 # large enough to be efficient without wasting too much memory.
274 PIPE_READ_SIZE = 128 * 1024
276 # Record the default RCS branches, if any, for CVS filepaths.
278 # The keys are CVS filepaths, relative to the top of the repository
279 # and with the ",v" stripped off, so they match the cvs paths used in
280 # Commit.commit(). The values are vendor branch revisions, such as
281 # '1.1.1.1', or '1.1.1.2', or '1.1.1.96'. The vendor branch revision
282 # represents the highest vendor branch revision thought to have ever
283 # been head of the default branch.
285 # The reason we record a specific vendor revision, rather than a
286 # default branch number, is that there are two cases to handle:
288 # One case is simple. The RCS file lists a default branch explicitly
289 # in its header, such as '1.1.1'. In this case, we know that every
290 # revision on the vendor branch is to be treated as head of trunk at
291 # that point in time.
293 # But there's also a degenerate case. The RCS file does not currently
294 # have a default branch, yet we can deduce that for some period in the
295 # past it probably *did* have one. For example, the file has vendor
296 # revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2,
297 # and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2. In this
298 # case, we should record 1.1.1.96 as the last vendor revision to have
299 # been the head of the default branch.
300 DEFAULT_BRANCHES_DB = 'cvs2svn-default-branches.db'
302 # Records the author and log message for each changeset.
303 # The keys are author+log digests, the same kind used to identify
304 # unique revisions in the .revs, etc files. Each value is a tuple
305 # of two elements: '(author logmessage)'.
306 METADATA_DB = "cvs2svn-metadata.db"
308 # A temporary on-disk hash that maps CVSRevision unique keys to a new
309 # timestamp for that CVSRevision. These new timestamps are created in
310 # pass2, and this hash is used exclusively in pass2.
311 TWEAKED_TIMESTAMPS_DB = "cvs2svn-fixed-timestamps.db"
313 REVS_SUFFIX = '.revs'
314 CLEAN_REVS_SUFFIX = '.c-revs'
315 SORTED_REVS_SUFFIX = '.s-revs'
316 RESYNC_SUFFIX = '.resync'
318 SVN_INVALID_REVNUM = -1
320 COMMIT_THRESHOLD = 5 * 60 # flush a commit if a 5 minute gap occurs
322 # Things that can happen to a file.
323 OP_NOOP = '-'
324 OP_ADD = 'A'
325 OP_DELETE = 'D'
326 OP_CHANGE = 'C'
328 # A deltatext either does or doesn't represent some change.
329 DELTATEXT_NONEMPTY = 'N'
330 DELTATEXT_EMPTY = 'E'
332 DIGEST_END_IDX = 9 + (sha.digestsize * 2)
334 # Constants used in SYMBOL_OPENINGS_CLOSINGS
335 OPENING = 'O'
336 CLOSING = 'C'
338 class FatalException(Exception):
339 """Exception thrown on a non-recoverable error.
341 If this exception is thrown by main(), it is caught by the global
342 layer of the program, its string representation is printed, and the
343 program is ended with an exit code of 1."""
345 pass
348 class FatalError(FatalException):
349 """A FatalException that prepends error_prefix to the message."""
351 def __init__(self, msg):
352 """Use (error_prefix + ': ' + MSG + '\n') as the error message."""
354 FatalException.__init__(self, '%s: %s\n' % (error_prefix, msg,))
357 def temp(basename):
358 """Return a path to BASENAME in Ctx().tmpdir.
359 This is a convenience function to save horizontal space in source."""
360 return os.path.join(Ctx().tmpdir, basename)
362 # Since the unofficial set also includes [/\] we need to translate those
363 # into ones that don't conflict with Subversion limitations.
364 def _clean_symbolic_name(name):
365 """Return symbolic name NAME, translating characters that Subversion
366 does not allow in a pathname."""
367 name = name.replace('/','++')
368 name = name.replace('\\','--')
369 return name
371 def _path_join(*components):
372 """Join two or more pathname COMPONENTS, inserting '/' as needed.
373 Empty component are skipped."""
374 return string.join(filter(None, components), '/')
376 def _path_split(path):
377 """Split the svn pathname PATH into a pair, (HEAD, TAIL).
379 This is similar to os.path.split(), but always uses '/' as path
380 separator. PATH is an svn path, which should not start with a '/'.
381 HEAD is everything before the last slash, and TAIL is everything
382 after. If PATH ends in a slash, TAIL will be empty. If there is no
383 slash in PATH, HEAD will be empty. If PATH is empty, both HEAD and
384 TAIL are empty."""
386 pos = path.rfind('/')
387 if pos == -1:
388 return ('', path,)
389 else:
390 return (path[:pos], path[pos+1:],)
392 def to_utf8(value, mode='replace'):
393 """Encode (as Unicode) VALUE, trying the encodings in Ctx.encoding
394 as valid source encodings. Raise UnicodeError on failure of all
395 source encodings."""
396 ### FIXME: The 'replace' default mode should be an option,
397 ### like --encoding is.
398 for encoding in Ctx().encoding:
399 try:
400 return unicode(value, encoding, mode).encode('utf8')
401 except UnicodeError:
402 Log().write(LOG_VERBOSE, "Encoding '%s' failed for string '%s'"
403 % (encoding, value))
404 raise UnicodeError
406 def run_command(command):
407 if os.system(command):
408 raise FatalError('Command failed: "%s"' % (command,))
411 class CommandFailedException(Exception):
412 """Exception raised if check_command_runs() fails."""
414 pass
417 def check_command_runs(cmd, cmdname):
418 """Check whether the command CMD can be executed without errors.
420 CMD is a list or string, as accepted by SimplePopen. CMDNAME is the
421 name of the command as it should be included in exception error
422 messages.
424 This function checks three things: (1) the command can be run
425 without throwing an OSError; (2) it exits with status=0; (3) it
426 doesn't output anything to stderr. If any of these conditions is
427 not met, raise a CommandFailedException describing the problem."""
429 try:
430 pipe = SimplePopen(cmd, True)
431 except OSError, e:
432 raise CommandFailedException('error executing %s: %s' % (cmdname, e,))
433 pipe.stdin.close()
434 pipe.stdout.read()
435 errmsg = pipe.stderr.read()
436 status = pipe.wait()
437 if status != 0 or errmsg:
438 msg = 'error executing %s: status %s' % (cmdname, status,)
439 if errmsg:
440 msg += ', error output:\n%s' % (errmsg,)
441 raise CommandFailedException(msg)
444 class CVSRepository:
445 """A CVS repository from which data can be extracted."""
446 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
447 """Return a command string, and the pipe created using that
448 string. C_REV is a CVSRevision. If SUPPRESS_KEYWORD_SUBSTITUTION
449 is True, then suppress the substitution of RCS/CVS keywords in the
450 output. The pipe returns the text of that CVS Revision."""
451 raise NotImplementedError
454 class CVSRepositoryViaRCS(CVSRepository):
455 """A CVSRepository accessed via RCS."""
457 def __init__(self):
458 try:
459 check_command_runs([ 'co', '-V' ], 'co')
460 except CommandFailedException, e:
461 raise FatalError('%s\n'
462 'Please check that co is installed and in your PATH\n'
463 '(it is a part of the RCS software).' % (e,))
465 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
466 pipe_cmd = [ 'co', '-q', '-x,v', '-p' + c_rev.rev ]
467 if suppress_keyword_substitution:
468 pipe_cmd.append('-kk')
469 pipe_cmd.append(c_rev.rcs_path())
470 pipe = SimplePopen(pipe_cmd, True)
471 pipe.stdin.close()
472 return pipe_cmd, pipe
475 class CVSRepositoryViaCVS(CVSRepository):
476 """A CVSRepository accessed via CVS."""
478 def __init__(self):
479 ctx = Ctx()
480 # Ascend above the specified root if necessary, to find the
481 # cvs_repository_root (a directory containing a CVSROOT directory)
482 # and the cvs_module (the path of the conversion root within the
483 # cvs repository) NB: cvs_module must be seperated by '/' *not* by
484 # os.sep .
485 self.cvs_repository_root = os.path.abspath(ctx.cvsroot)
486 prev_cvs_repository_root = None
487 self.cvs_module = ""
488 while prev_cvs_repository_root != self.cvs_repository_root:
489 if os.path.isdir(os.path.join(self.cvs_repository_root, 'CVSROOT')):
490 break
491 prev_cvs_repository_root = self.cvs_repository_root
492 self.cvs_repository_root, module_component = \
493 os.path.split(self.cvs_repository_root)
494 self.cvs_module = module_component + "/" + self.cvs_module
495 else:
496 # Hit the root (of the drive, on Windows) without finding a CVSROOT dir.
497 raise FatalError("the path '%s' is not a CVS repository, nor a path "
498 "within a CVS repository. A CVS repository contains "
499 "a CVSROOT directory within its root directory."
500 % (ctx.cvsroot,))
501 os.environ['CVSROOT'] = self.cvs_repository_root
503 def cvs_ok(global_arguments):
504 check_command_runs(
505 [ 'cvs' ] + global_arguments + [ '--version' ], 'cvs')
507 self.global_arguments = [ "-q", "-R" ]
508 try:
509 cvs_ok(self.global_arguments)
510 except CommandFailedException, e:
511 self.global_arguments = [ "-q" ]
512 try:
513 cvs_ok(self.global_arguments)
514 except CommandFailedException, e:
515 raise FatalError(
516 '%s\n'
517 'Please check that cvs is installed and in your PATH.' % (e,))
519 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
520 pipe_cmd = [ 'cvs' ] + self.global_arguments + \
521 [ 'co', '-r' + c_rev.rev, '-p' ]
522 if suppress_keyword_substitution:
523 pipe_cmd.append('-kk')
524 pipe_cmd.append(self.cvs_module + c_rev.cvs_path)
525 pipe = SimplePopen(pipe_cmd, True)
526 pipe.stdin.close()
527 return pipe_cmd, pipe
530 def generate_ignores(c_rev):
531 # Read in props
532 pipe_cmd, pipe = Ctx().cvs_repository.get_co_pipe(c_rev)
533 buf = pipe.stdout.read(PIPE_READ_SIZE)
534 raw_ignore_val = ""
535 while buf:
536 raw_ignore_val = raw_ignore_val + buf
537 buf = pipe.stdout.read(PIPE_READ_SIZE)
538 pipe.stdout.close()
539 error_output = pipe.stderr.read()
540 exit_status = pipe.wait()
541 if exit_status:
542 raise FatalError("The command '%s' failed with exit status: %s\n"
543 "and the following output:\n"
544 "%s" % (pipe_cmd, exit_status, error_output))
546 # Tweak props: First, convert any spaces to newlines...
547 raw_ignore_val = '\n'.join(raw_ignore_val.split())
548 raw_ignores = raw_ignore_val.split('\n')
549 ignore_vals = [ ]
550 for ignore in raw_ignores:
551 # Reset the list if we encounter a '!'
552 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
553 if ignore == '!':
554 ignore_vals = [ ]
555 continue
556 # Skip empty lines
557 if len(ignore) == 0:
558 continue
559 ignore_vals.append(ignore)
560 return ignore_vals
562 # Return a string that has not been returned by gen_key() before.
563 gen_key_base = 0L
564 def gen_key():
565 global gen_key_base
566 key = '%x' % gen_key_base
567 gen_key_base = gen_key_base + 1
568 return key
570 # ============================================================================
571 # This code is copied with a few modifications from:
572 # subversion/subversion/bindings/swig/python/svn/core.py
574 if sys.platform == "win32":
575 _escape_shell_arg_re = re.compile(r'(\\+)(\"|$)')
577 def escape_shell_arg(arg):
578 # The (very strange) parsing rules used by the C runtime library are
579 # described at:
580 # http://msdn.microsoft.com/library/en-us/vclang/html/_pluslang_Parsing_C.2b2b_.Command.2d.Line_Arguments.asp
582 # double up slashes, but only if they are followed by a quote character
583 arg = re.sub(_escape_shell_arg_re, r'\1\1\2', arg)
585 # surround by quotes and escape quotes inside
586 arg = '"' + string.replace(arg, '"', '"^""') + '"'
587 return arg
590 def argv_to_command_string(argv):
591 """Flatten a list of command line arguments into a command string.
593 The resulting command string is expected to be passed to the system
594 shell which os functions like popen() and system() invoke internally.
597 # According cmd's usage notes (cmd /?), it parses the command line by
598 # "seeing if the first character is a quote character and if so, stripping
599 # the leading character and removing the last quote character."
600 # So to prevent the argument string from being changed we add an extra set
601 # of quotes around it here.
602 return '"' + string.join(map(escape_shell_arg, argv), " ") + '"'
604 else:
605 def escape_shell_arg(str):
606 return "'" + string.replace(str, "'", "'\\''") + "'"
608 def argv_to_command_string(argv):
609 """Flatten a list of command line arguments into a command string.
611 The resulting command string is expected to be passed to the system
612 shell which os functions like popen() and system() invoke internally.
615 return string.join(map(escape_shell_arg, argv), " ")
616 # ============================================================================
618 def format_date(date):
619 """Return an svn-compatible date string for DATE (seconds since epoch)."""
620 # A Subversion date looks like "2002-09-29T14:44:59.000000Z"
621 return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date))
623 def sort_file(infile, outfile):
624 # sort the log files
626 # GNU sort will sort our dates differently (incorrectly!) if our
627 # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
628 # it to 'C'
629 if os.environ.has_key('LC_ALL'):
630 lc_all_tmp = os.environ['LC_ALL']
631 else:
632 lc_all_tmp = None
633 os.environ['LC_ALL'] = 'C'
634 # The -T option to sort has a nice side effect. The Win32 sort is
635 # case insensitive and cannot be used, and since it does not
636 # understand the -T option and dies if we try to use it, there is
637 # no risk that we use that sort by accident.
638 run_command('sort -T %s %s > %s' % (Ctx().tmpdir, infile, outfile))
639 if lc_all_tmp is None:
640 del os.environ['LC_ALL']
641 else:
642 os.environ['LC_ALL'] = lc_all_tmp
644 def match_regexp_list(regexp_list, string):
645 """Test whether STRING matches any of the compiled regexps in
646 REGEXP_LIST."""
647 for regexp in regexp_list:
648 if regexp.match(string):
649 return True
650 return False
652 class LF_EOL_Filter:
653 """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
654 into LFs only."""
655 def __init__(self, stream):
656 self.stream = stream
657 self.carry_cr = False
658 self.eof = False
660 def read(self, size):
661 while True:
662 buf = self.stream.read(size)
663 self.eof = len(buf) == 0
664 if self.carry_cr:
665 buf = '\r' + buf
666 self.carry_cr = False
667 if not self.eof and buf[-1] == '\r':
668 self.carry_cr = True
669 buf = buf[:-1]
670 buf = string.replace(buf, '\r\n', '\n')
671 buf = string.replace(buf, '\r', '\n')
672 if len(buf) > 0 or self.eof:
673 return buf
676 # These constants represent the log levels that this script supports
677 LOG_WARN = -1
678 LOG_QUIET = 0
679 LOG_NORMAL = 1
680 LOG_VERBOSE = 2
681 class Log:
682 """A Simple logging facility. Each line will be timestamped is
683 self.use_timestamps is TRUE. This class is a Borg, see
684 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
685 __shared_state = {}
686 def __init__(self):
687 self.__dict__ = self.__shared_state
688 if self.__dict__:
689 return
690 self.log_level = LOG_NORMAL
691 # Set this to true if you want to see timestamps on each line output.
692 self.use_timestamps = None
693 self.logger = sys.stdout
695 def _timestamp(self):
696 """Output a detailed timestamp at the beginning of each line output."""
697 self.logger.write(time.strftime('[%Y-%m-%d %I:%m:%S %Z] - '))
699 def write(self, log_level, *args):
700 """This is the public method to use for writing to a file. Only
701 messages whose LOG_LEVEL is <= self.log_level will be printed. If
702 there are multiple ARGS, they will be separated by a space."""
703 if log_level > self.log_level:
704 return
705 if self.use_timestamps:
706 self._timestamp()
707 self.logger.write(' '.join(map(str,args)) + "\n")
708 # Ensure that log output doesn't get out-of-order with respect to
709 # stderr output.
710 self.logger.flush()
713 class Cleanup:
714 """This singleton class manages any files created by cvs2svn. When
715 you first create a file, call Cleanup.register, passing the
716 filename, and the last pass that you need the file. After the end
717 of that pass, your file will be cleaned up after running an optional
718 callback. This class is a Borg, see
719 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
721 __shared_state = {}
722 def __init__(self):
723 self.__dict__ = self.__shared_state
724 if self.__dict__:
725 return
726 self._log = {}
727 self._callbacks = {}
729 def register(self, file, which_pass, callback=None):
730 """Register FILE for cleanup at the end of WHICH_PASS, running
731 function CALLBACK prior to removal. Registering a given FILE is
732 idempotent; you may register as many times as you wish, but it
733 will only be cleaned up once.
735 Note that if a file is registered multiple times, only the first
736 callback registered for that file will be called at cleanup
737 time. Also note that if you register a database file you must
738 close the database before cleanup, e.g. using a callback."""
739 if not self._log.has_key(which_pass):
740 self._log[which_pass] = {}
741 self._log[which_pass][file] = 1
742 if callback and not self._callbacks.has_key(file):
743 self._callbacks[file] = callback
745 def cleanup(self, which_pass):
746 """Clean up all files, and invoke callbacks, for pass WHICH_PASS."""
747 if not self._log.has_key(which_pass):
748 return
749 for file in self._log[which_pass].keys():
750 Log().write(LOG_VERBOSE, "Deleting", file)
751 if self._callbacks.has_key(file):
752 self._callbacks[file]()
753 os.unlink(file)
756 # Always use these constants for opening databases.
757 DB_OPEN_READ = 'r'
758 DB_OPEN_NEW = 'n'
760 # A wrapper for anydbm that uses the marshal module to store items as
761 # strings.
762 class Database:
763 def __init__(self, filename, mode):
764 # pybsddb3 has a bug which prevents it from working with
765 # Berkeley DB 4.2 if you open the db with 'n' ("new"). This
766 # causes the DB_TRUNCATE flag to be passed, which is disallowed
767 # for databases protected by lock and transaction support
768 # (bsddb databases use locking from bsddb version 4.2.4 onwards).
770 # Therefore, manually perform the removal (we can do this, because
771 # we know that for bsddb - but *not* anydbm in general - the database
772 # consists of one file with the name we specify, rather than several
773 # based on that name).
774 if mode == 'n' and anydbm._defaultmod.__name__ == 'dbhash':
775 if os.path.isfile(filename):
776 os.unlink(filename)
777 mode = 'c'
779 self.db = anydbm.open(filename, mode)
781 def has_key(self, key):
782 return self.db.has_key(key)
784 def __getitem__(self, key):
785 return marshal.loads(self.db[key])
787 def __setitem__(self, key, value):
788 self.db[key] = marshal.dumps(value)
790 def __delitem__(self, key):
791 del self.db[key]
793 def get(self, key, default):
794 if self.has_key(key):
795 return self.__getitem__(key)
796 return default
799 class StatsKeeper:
800 __shared_state = { }
801 def __init__(self):
802 self.__dict__ = self.__shared_state
803 if self.__dict__:
804 return
805 self.filename = temp(STATISTICS_FILE)
806 Cleanup().register(self.filename, pass8)
807 # This can get kinda large, so we don't store it in our data dict.
808 self.repos_files = { }
810 if os.path.exists(self.filename):
811 self.unarchive()
812 else:
813 self.data = { 'cvs_revs_count' : 0,
814 'tags': { },
815 'branches' : { },
816 'repos_size' : 0,
817 'repos_file_count' : 0,
818 'svn_rev_count' : None,
819 'first_rev_date' : 1L<<32,
820 'last_rev_date' : 0,
821 'pass_timings' : { },
822 'start_time' : 0,
823 'end_time' : 0,
826 def log_duration_for_pass(self, duration, pass_num):
827 self.data['pass_timings'][pass_num] = duration
829 def set_start_time(self, start):
830 self.data['start_time'] = start
832 def set_end_time(self, end):
833 self.data['end_time'] = end
835 def _bump_item(self, key, amount=1):
836 self.data[key] = self.data[key] + amount
838 def reset_c_rev_info(self):
839 self.data['cvs_revs_count'] = 0
840 self.data['tags'] = { }
841 self.data['branches'] = { }
843 def record_c_rev(self, c_rev):
844 self._bump_item('cvs_revs_count')
846 for tag in c_rev.tags:
847 self.data['tags'][tag] = None
848 for branch in c_rev.branches:
849 self.data['branches'][branch] = None
851 if c_rev.timestamp < self.data['first_rev_date']:
852 self.data['first_rev_date'] = c_rev.timestamp
854 if c_rev.timestamp > self.data['last_rev_date']:
855 self.data['last_rev_date'] = c_rev.timestamp
857 # Only add the size if this is the first time we see the file.
858 if not self.repos_files.has_key(c_rev.fname):
859 self._bump_item('repos_size', c_rev.file_size)
860 self.repos_files[c_rev.fname] = None
862 self.data['repos_file_count'] = len(self.repos_files)
864 def set_svn_rev_count(self, count):
865 self.data['svn_rev_count'] = count
867 def svn_rev_count(self):
868 return self.data['svn_rev_count']
870 def archive(self):
871 open(self.filename, 'w').write(marshal.dumps(self.data))
873 def unarchive(self):
874 self.data = marshal.loads(open(self.filename, 'r').read())
876 def __str__(self):
877 svn_revs_str = ""
878 if self.data['svn_rev_count'] is not None:
879 svn_revs_str = ('Total SVN Commits: %10s\n'
880 % self.data['svn_rev_count'])
882 return ('\n' \
883 'cvs2svn Statistics:\n' \
884 '------------------\n' \
885 'Total CVS Files: %10i\n' \
886 'Total CVS Revisions: %10i\n' \
887 'Total Unique Tags: %10i\n' \
888 'Total Unique Branches: %10i\n' \
889 'CVS Repos Size in KB: %10i\n' \
890 '%s' \
891 'First Revision Date: %s\n' \
892 'Last Revision Date: %s\n' \
893 '------------------' \
894 % (self.data['repos_file_count'],
895 self.data['cvs_revs_count'],
896 len(self.data['tags']),
897 len(self.data['branches']),
898 (self.data['repos_size'] / 1024),
899 svn_revs_str,
900 time.ctime(self.data['first_rev_date']),
901 time.ctime(self.data['last_rev_date']),
904 def timings(self):
905 passes = self.data['pass_timings'].keys()
906 passes.sort()
907 str = 'Timings:\n------------------\n'
909 def desc(val):
910 if val == 1: return "second"
911 return "seconds"
913 for pass_num in passes:
914 duration = int(self.data['pass_timings'][pass_num])
915 p_str = ('pass %d:%6d %s\n'
916 % (pass_num, duration, desc(duration)))
917 str = str + p_str
919 total = int(self.data['end_time'] - self.data['start_time'])
920 str = str + ('total: %6d %s' % (total, desc(total)))
921 return str
924 class LastSymbolicNameDatabase:
925 """ Passing every CVSRevision in s-revs to this class will result in
926 a Database whose key is the last CVS Revision a symbolicname was
927 seen in, and whose value is a list of all symbolicnames that were
928 last seen in that revision."""
929 def __init__(self, mode):
930 self.symbols = {}
931 self.symbol_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB), mode)
932 Cleanup().register(temp(SYMBOL_LAST_CVS_REVS_DB), pass5)
934 # Once we've gone through all the revs,
935 # symbols.keys() will be a list of all tags and branches, and
936 # their corresponding values will be a key into the last CVS revision
937 # that they were used in.
938 def log_revision(self, c_rev):
939 # Gather last CVS Revision for symbolic name info and tag info
940 for tag in c_rev.tags:
941 self.symbols[tag] = c_rev.unique_key()
942 if c_rev.op is not OP_DELETE:
943 for branch in c_rev.branches:
944 self.symbols[branch] = c_rev.unique_key()
946 # Creates an inversion of symbols above--a dictionary of lists (key
947 # = CVS rev unique_key: val = list of symbols that close in that
948 # rev.
949 def create_database(self):
950 for sym, rev_unique_key in self.symbols.items():
951 if self.symbol_revs_db.has_key(rev_unique_key):
952 ary = self.symbol_revs_db[rev_unique_key]
953 ary.append(sym)
954 self.symbol_revs_db[rev_unique_key] = ary
955 else:
956 self.symbol_revs_db[rev_unique_key] = [sym]
959 class CVSRevisionDatabase:
960 """A Database to store CVSRevision objects and retrieve them by their
961 unique_key()."""
963 def __init__(self, mode):
964 """Initialize an instance, opening database in MODE (like the MODE
965 argument to Database or anydbm.open())."""
966 self.cvs_revs_db = Database(temp(CVS_REVS_DB), mode)
967 Cleanup().register(temp(CVS_REVS_DB), pass8)
969 def log_revision(self, c_rev):
970 """Add C_REV, a CVSRevision, to the database."""
971 self.cvs_revs_db[c_rev.unique_key()] = str(c_rev)
973 def get_revision(self, unique_key):
974 """Return the CVSRevision stored under UNIQUE_KEY."""
975 return CVSRevision(Ctx(), self.cvs_revs_db[unique_key])
978 class TagsDatabase(Database):
979 """A Database to store which symbolic names are tags.
980 Each key is a tag name.
981 The value has no meaning, and should be set to None."""
982 def __init__(self, mode):
983 Database.__init__(self, temp(TAGS_DB), mode)
984 Cleanup().register(temp(TAGS_DB), pass8)
987 class Project:
988 """A project within a CVS repository."""
990 def __init__(self, cvs_root, trunk_path, branches_path, tags_path):
991 """Create a new Project record.
993 CVS_ROOT is the main CVS directory for this project (within the
994 filesystem). TRUNK_PATH, BRANCHES_PATH, and TAGS_PATH are the
995 full, normalized directory names in svn for the corresponding part
996 of the repository."""
998 self.cvs_root = os.path.normpath(cvs_root)
999 self.trunk_path = trunk_path
1000 self.branches_path = branches_path
1001 self.tags_path = tags_path
1002 verify_paths_disjoint(self.trunk_path, self.branches_path, self.tags_path)
1004 def is_source(self, svn_path):
1005 """Return True iff SVN_PATH is a legitimate source for this project.
1007 Legitimate paths are self.trunk_path or any directory directly
1008 under self.branches_path."""
1010 if svn_path == self.trunk_path:
1011 return True
1013 (head, tail,) = _path_split(svn_path)
1014 if head == self.branches_path:
1015 return True
1017 return False
1019 def is_unremovable(self, svn_path):
1020 """Return True iff the specified path must not be removed."""
1022 return svn_path in [self.trunk_path, self.branches_path, self.tags_path]
1024 def relative_name(self, fname):
1025 """Return the path to FNAME relative to cvs_root, with ',v' removed.
1027 FNAME is a filesystem name that has to begin (textually) with
1028 self.cvs_root and end with ',v'."""
1030 if not fname.startswith(self.cvs_root):
1031 raise FatalError(
1032 "relative_name: '%s' is not a sub-path of '%s'"
1033 % (fname, self.cvs_root,))
1034 if not fname.endswith(',v'):
1035 raise FatalError("relative_name: '%s' does not end with ',v'"
1036 % (fname,))
1037 l = len(self.cvs_root)
1038 if fname[l] == os.sep:
1039 l += 1
1040 return string.replace(fname[l:-2], os.sep, '/')
1042 def get_branch_path(self, branch_name):
1043 """Return the svnpath for the branch named BRANCH_NAME."""
1045 return _path_join(self.branches_path, _clean_symbolic_name(branch_name))
1047 def get_tag_path(self, tag_name):
1048 """Return the svnpath for the tag named TAG_NAME."""
1050 return _path_join(self.tags_path, _clean_symbolic_name(tag_name))
1052 def make_path(self, path, branch_name=None):
1053 """Return the trunk path or branch path for PATH.
1055 PATH is a filesystem path relative to cvs_root. If BRANCH_NAME is
1056 None, then return the svn path for this file on trunk; otherwise,
1057 return the svn path for this file on the specified branch."""
1059 # For a while, we treated each top-level subdir of the CVS
1060 # repository as a "project root" and interpolated the appropriate
1061 # genealogy (trunk|tag|branch) in according to the official
1062 # recommended layout. For example, the path '/foo/bar/baz.c' on
1063 # branch 'Rel2' would become
1065 # /foo/branches/Rel2/bar/baz.c
1067 # and on trunk it would become
1069 # /foo/trunk/bar/baz.c
1071 # However, we went back to the older and simpler method of just
1072 # prepending the genealogy to the front, instead of interpolating.
1073 # So now we produce:
1075 # /branches/Rel2/foo/bar/baz.c
1076 # /trunk/foo/bar/baz.c
1078 # Why? Well, Jack Repenning pointed out that this way is much
1079 # friendlier to "anonymously rooted subtrees" (that's a tree where
1080 # the name of the top level dir doesn't matter, the point is that if
1081 # you cd into it and, say, run 'make', something good will happen).
1082 # By interpolating, we made it impossible to point cvs2svn at some
1083 # subdir in the CVS repository and convert it as a project, because
1084 # we'd treat every subdir underneath it as an independent project
1085 # root, which is probably not what the user wanted.
1087 # Also, see Blair Zajac's post
1089 # http://subversion.tigris.org/servlets/ReadMsg?list=dev&msgNo=38965
1091 # and the surrounding thread, for why what people really want is a
1092 # way of specifying an in-repository prefix path, not interpolation.
1094 if branch_name:
1095 return _path_join(self.get_branch_path(branch_name), path)
1096 else:
1097 return _path_join(self.trunk_path, path)
1100 class CVSRevision:
1101 def __init__(self, ctx, *args):
1102 """Initialize a new CVSRevision with Ctx object CTX, and ARGS.
1104 If CTX is None, the following members and methods of the
1105 instantiated CVSRevision class object will be unavailable (or
1106 simply will not work correctly, if at all):
1107 cvs_path
1108 svn_path
1109 is_default_branch_revision()
1111 (Note that this class treats CTX as const, because the caller
1112 likely passed in a Borg instance of a Ctx. The reason this class
1113 takes CTX as as a parameter, instead of just instantiating a Ctx
1114 itself, is that this class should be usable outside cvs2svn.)
1116 If there is one argument in ARGS, it is a string, in the format of
1117 a line from a revs file. Do *not* include a trailing newline.
1119 If there are multiple ARGS, there must be 17 of them,
1120 comprising a parsed revs line:
1121 timestamp --> (int) date stamp for this cvs revision
1122 digest --> (string) digest of author+logmsg
1123 prev_timestamp --> (int) date stamp for the previous cvs revision
1124 next_timestamp --> (int) date stamp for the next cvs revision
1125 op --> (char) OP_ADD, OP_CHANGE, or OP_DELETE
1126 prev_rev --> (string or None) previous CVS rev, e.g., "1.2"
1127 rev --> (string) this CVS rev, e.g., "1.3"
1128 next_rev --> (string or None) next CVS rev, e.g., "1.4"
1129 file_in_attic --> (char or None) true if RCS file is in Attic
1130 file_executable --> (char or None) true if RCS file has exec bit set.
1131 file_size --> (int) size of the RCS file
1132 deltatext_code --> (char) 'N' if non-empty deltatext, else 'E'
1133 fname --> (string) relative path of file in CVS repos
1134 mode --> (string or None) "kkv", "kb", etc.
1135 branch_name --> (string or None) branch on which this rev occurred
1136 tags --> (list of strings) all tags on this revision
1137 branches --> (list of strings) all branches rooted in this rev
1139 The two forms of initialization are equivalent.
1141 WARNING: Due to the resync process in pass2, prev_timestamp or
1142 next_timestamp may be incorrect in the c-revs or s-revs files."""
1144 self._ctx = ctx
1145 if len(args) == 17:
1146 (self.timestamp, self.digest, self.prev_timestamp, self.next_timestamp,
1147 self.op, self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
1148 self.file_executable, self.file_size, self.deltatext_code,
1149 self.fname,
1150 self.mode, self.branch_name, self.tags, self.branches) = args
1151 elif len(args) == 1:
1152 data = args[0].split(' ', 15)
1153 (self.timestamp, self.digest, self.prev_timestamp, self.next_timestamp,
1154 self.op, self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
1155 self.file_executable, self.file_size, self.deltatext_code,
1156 self.mode, self.branch_name, numtags, remainder) = data
1157 # Patch up data items which are not simple strings
1158 self.timestamp = int(self.timestamp, 16)
1159 if self.prev_timestamp == "*":
1160 self.prev_timestamp = 0
1161 else:
1162 self.prev_timestamp = int(self.prev_timestamp)
1163 if self.next_timestamp == "*":
1164 self.next_timestamp = 0
1165 else:
1166 self.next_timestamp = int(self.next_timestamp)
1167 if self.prev_rev == "*":
1168 self.prev_rev = None
1169 if self.next_rev == "*":
1170 self.next_rev = None
1171 if self.file_in_attic == "*":
1172 self.file_in_attic = None
1173 if self.file_executable == "*":
1174 self.file_executable = None
1175 self.file_size = int(self.file_size)
1176 if self.mode == "*":
1177 self.mode = None
1178 if self.branch_name == "*":
1179 self.branch_name = None
1180 numtags = int(numtags)
1181 tags_and_numbranches_and_remainder = remainder.split(' ', numtags + 1)
1182 self.tags = tags_and_numbranches_and_remainder[:-2]
1183 numbranches = int(tags_and_numbranches_and_remainder[-2])
1184 remainder = tags_and_numbranches_and_remainder[-1]
1185 branches_and_fname = remainder.split(' ', numbranches)
1186 self.branches = branches_and_fname[:-1]
1187 self.fname = branches_and_fname[-1]
1188 else:
1189 raise TypeError, 'CVSRevision() takes 2 or 18 arguments (%d given)' % \
1190 (len(args) + 1)
1191 if ctx is not None:
1192 self.cvs_path = ctx.project.relative_name(self.fname)
1193 self.svn_path = ctx.project.make_path(self.cvs_path, self.branch_name)
1195 # The 'primary key' of a CVS Revision is the revision number + the
1196 # filename. To provide a unique key (say, for a dict), we just glom
1197 # them together in a string. By passing in self.prev_rev or
1198 # self.next_rev, you can get the unique key for their respective
1199 # CVSRevisions.
1200 def unique_key(self, revnum="0"):
1201 if revnum is "0":
1202 revnum = self.rev
1203 elif revnum is None:
1204 return None
1205 return revnum + "/" + self.fname
1207 def __str__(self):
1208 return ('%08lx %s %s %s %s %s %s %s %s %s %d %s %s %s %d%s%s %d%s%s %s'
1209 % (self.timestamp, self.digest, self.prev_timestamp or "*",
1210 self.next_timestamp or "*", self.op, (self.prev_rev or "*"),
1211 self.rev, (self.next_rev or "*"), (self.file_in_attic or "*"),
1212 (self.file_executable or "*"),
1213 self.file_size,
1214 self.deltatext_code, (self.mode or "*"),
1215 (self.branch_name or "*"),
1216 len(self.tags), self.tags and " " or "", " ".join(self.tags),
1217 len(self.branches), self.branches and " " or "",
1218 " ".join(self.branches),
1219 self.fname, ))
1221 # Returns true if this CVSRevision is the opening CVSRevision for
1222 # NAME (for this RCS file).
1223 def opens_symbolic_name(self, name):
1224 if name in self.tags:
1225 return 1
1226 if name in self.branches:
1227 # If this c_rev opens a branch and our op is OP_DELETE, then
1228 # that means that the file that this c_rev belongs to was
1229 # created on the branch, so for all intents and purposes, this
1230 # c_rev is *technically* not an opening. See Issue #62 for more
1231 # information.
1232 if self.op != OP_DELETE:
1233 return 1
1234 return 0
1236 def is_default_branch_revision(self):
1237 """Return 1 if SELF.rev of SELF.cvs_path is a default branch
1238 revision according to DEFAULT_BRANCHES_DB (see the conditions
1239 documented there), else return None."""
1240 if self._ctx._default_branches_db.has_key(self.cvs_path):
1241 val = self._ctx._default_branches_db[self.cvs_path]
1242 val_last_dot = val.rindex(".")
1243 our_last_dot = self.rev.rindex(".")
1244 default_branch = val[:val_last_dot]
1245 our_branch = self.rev[:our_last_dot]
1246 default_rev_component = int(val[val_last_dot + 1:])
1247 our_rev_component = int(self.rev[our_last_dot + 1:])
1248 if (default_branch == our_branch
1249 and our_rev_component <= default_rev_component):
1250 return 1
1251 # else
1252 return None
1254 def rcs_path(self):
1255 """Returns the actual filesystem path to the RCS file of this
1256 CVSRevision."""
1257 if self.file_in_attic is None:
1258 return self.fname
1259 else:
1260 basepath, filename = os.path.split(self.fname)
1261 return os.path.join(basepath, 'Attic', filename)
1263 def filename(self):
1264 "Return the last path component of self.fname, minus the ',v'"
1265 return os.path.split(self.fname)[-1][:-2]
1267 class SymbolDatabase:
1268 """This database records information on all symbols in the RCS
1269 files. It is created in pass 1 and it is used in pass 2."""
1270 def __init__(self):
1271 # A hash that maps tag names to commit counts
1272 self.tags = { }
1273 # A hash that maps branch names to lists of the format
1274 # [ create_count, commit_count, blockers ], where blockers
1275 # is a hash that lists the symbols that depend on the
1276 # the branch. The blockers hash is used as a set, so the
1277 # values are not used.
1278 self.branches = { }
1280 def register_tag_creation(self, name):
1281 """Register the creation of the tag NAME."""
1282 if not self.tags.has_key(name):
1283 self.tags[name] = 0
1284 self.tags[name] += 1
1286 def _branch(self, name):
1287 """Helper function to get a branch node that will create and
1288 initialize the node if it does not exist."""
1289 if not self.branches.has_key(name):
1290 self.branches[name] = [ 0, 0, { } ]
1291 return self.branches[name]
1293 def register_branch_creation(self, name):
1294 """Register the creation of the branch NAME."""
1295 self._branch(name)[0] += 1
1297 def register_branch_commit(self, name):
1298 """Register a commit on the branch NAME."""
1299 self._branch(name)[1] += 1
1301 def register_branch_blocker(self, name, blocker):
1302 """Register BLOCKER as a blocker on the branch NAME."""
1303 self._branch(name)[2][blocker] = None
1305 def branch_has_commit(self, name):
1306 """Return non-zero if NAME has commits. Returns 0 if name
1307 is not a branch or if it has no commits."""
1308 return self.branches.has_key(name) and self.branches[name][1]
1310 def find_excluded_symbols(self, regexp_list):
1311 """Returns a hash of all symbols thaht match the regexps in
1312 REGEXP_LISTE. The hash is used as a set so the values are
1313 not used."""
1314 excludes = { }
1315 for tag in self.tags.keys():
1316 if match_regexp_list(regexp_list, tag):
1317 excludes[tag] = None
1318 for branch in self.branches.keys():
1319 if match_regexp_list(regexp_list, branch):
1320 excludes[branch] = None
1321 return excludes
1323 def find_branch_exclude_blockers(self, branch, excludes):
1324 """Find all blockers of BRANCH, excluding the ones in the hash
1325 EXCLUDES."""
1326 blockers = { }
1327 if excludes.has_key(branch):
1328 for blocker in self.branches[branch][2]:
1329 if not excludes.has_key(blocker):
1330 blockers[blocker] = None
1331 return blockers
1333 def find_blocked_excludes(self, excludes):
1334 """Find all branches not in EXCLUDES that have blocking symbols that
1335 are not themselves excluded. Return a hash that maps branch names
1336 to a hash of blockers. The hash of blockes is used as a set so the
1337 values are not used."""
1338 blocked_branches = { }
1339 for branch in self.branches.keys():
1340 blockers = self.find_branch_exclude_blockers(branch, excludes)
1341 if blockers:
1342 blocked_branches[branch] = blockers
1343 return blocked_branches
1345 def find_mismatches(self, excludes=None):
1346 """Find all symbols that are defined as both tags and branches,
1347 excluding the ones in EXCLUDES. Returns a list of 4-tuples with
1348 the symbol name, tag count, branch count and commit count."""
1349 if excludes is None:
1350 excludes = { }
1351 mismatches = [ ]
1352 for branch in self.branches.keys():
1353 if not excludes.has_key(branch) and self.tags.has_key(branch):
1354 mismatches.append((branch, # name
1355 self.tags[branch], # tag count
1356 self.branches[branch][0], # branch count
1357 self.branches[branch][1])) # commit count
1358 return mismatches
1360 def read(self):
1361 """Read the symbol database from files."""
1362 f = open(temp(TAGS_LIST))
1363 while 1:
1364 line = f.readline()
1365 if not line:
1366 break
1367 tag, count = line.split()
1368 self.tags[tag] = int(count)
1370 f = open(temp(BRANCHES_LIST))
1371 while 1:
1372 line = f.readline()
1373 if not line:
1374 break
1375 words = line.split()
1376 self.branches[words[0]] = [ int(words[1]), int(words[2]), { } ]
1377 for blocker in words[3:]:
1378 self.branches[words[0]][2][blocker] = None
1380 def write(self):
1381 """Store the symbol database to files."""
1382 f = open(temp(TAGS_LIST), "w")
1383 Cleanup().register(temp(TAGS_LIST), pass2)
1384 for tag, count in self.tags.items():
1385 f.write("%s %d\n" % (tag, count))
1387 f = open(temp(BRANCHES_LIST), "w")
1388 Cleanup().register(temp(BRANCHES_LIST), pass2)
1389 for branch, info in self.branches.items():
1390 f.write("%s %d %d" % (branch, info[0], info[1]))
1391 if info[2]:
1392 f.write(" ")
1393 f.write(" ".join(info[2].keys()))
1394 f.write("\n")
1396 class CollectData(cvs2svn_rcsparse.Sink):
1397 def __init__(self):
1398 self.revs = open(temp(DATAFILE + REVS_SUFFIX), 'w')
1399 Cleanup().register(temp(DATAFILE + REVS_SUFFIX), pass2)
1400 self.resync = open(temp(DATAFILE + RESYNC_SUFFIX), 'w')
1401 Cleanup().register(temp(DATAFILE + RESYNC_SUFFIX), pass2)
1402 self.default_branches_db = Database(temp(DEFAULT_BRANCHES_DB),
1403 DB_OPEN_NEW)
1404 Cleanup().register(temp(DEFAULT_BRANCHES_DB), pass5)
1405 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_NEW)
1406 Cleanup().register(temp(METADATA_DB), pass8)
1407 self.fatal_errors = []
1408 self.num_files = 0
1409 self.symbol_db = SymbolDatabase()
1411 # 1 if we've collected data for at least one file, None otherwise.
1412 self.found_valid_file = None
1414 # See set_fname() for initializations of other variables.
1416 def set_fname(self, canonical_name, filename):
1417 """Prepare to receive data for FILENAME. FILENAME is the absolute
1418 filesystem path to the file in question, and CANONICAL_NAME is
1419 FILENAME with the 'Attic' component removed (if the file is indeed
1420 in the Attic) ."""
1421 self.fname = canonical_name
1423 # We calculate and save some file metadata here, where we can do
1424 # it only once per file, instead of waiting until later where we
1425 # would have to do the same calculations once per CVS *revision*.
1427 self.rel_name = Ctx().project.relative_name(self.fname)
1429 # If the paths are not the same, then that means that the
1430 # canonical_name has had the 'Attic' component stripped out.
1431 self.file_in_attic = None
1432 if not canonical_name == filename:
1433 self.file_in_attic = 1
1435 file_stat = os.stat(filename)
1436 # The size of our file in bytes
1437 self.file_size = file_stat[stat.ST_SIZE]
1439 # Whether or not the executable bit is set.
1440 self.file_executable = None
1441 if file_stat[0] & stat.S_IXUSR:
1442 self.file_executable = 1
1444 # revision -> [timestamp, author, old-timestamp]
1445 self.rev_data = { }
1447 # Maps revision number (key) to the revision number of the
1448 # previous revision along this line of development.
1450 # For the first revision R on a branch, we consider the revision
1451 # from which R sprouted to be the 'previous'.
1453 # Note that this revision can't be determined arithmetically (due
1454 # to cvsadmin -o, which is why this is necessary).
1456 # If the key has no previous revision, then store None as key's
1457 # value.
1458 self.prev_rev = { }
1460 # This dict is essentially self.prev_rev with the values mapped in
1461 # the other direction, so following key -> value will yield you
1462 # the next revision number.
1464 # Unlike self.prev_rev, if the key has no next revision, then the
1465 # key is not present.
1466 self.next_rev = { }
1468 # Track the state of each revision so that in set_revision_info,
1469 # we can determine if our op is an add/change/delete. We can do
1470 # this because in set_revision_info, we'll have all of the
1471 # revisions for a file at our fingertips, and we need to examine
1472 # the state of our prev_rev to determine if we're an add or a
1473 # change--without the state of the prev_rev, we are unable to
1474 # distinguish between an add and a change.
1475 self.rev_state = { }
1477 # Hash mapping branch numbers, like '1.7.2', to branch names,
1478 # like 'Release_1_0_dev'.
1479 self.branch_names = { }
1481 # RCS flags (used for keyword expansion).
1482 self.mode = None
1484 # Hash mapping revision numbers, like '1.7', to lists of names
1485 # indicating which branches sprout from that revision, like
1486 # ['Release_1_0_dev', 'experimental_driver', ...].
1487 self.branchlist = { }
1489 # Like self.branchlist, but the values are lists of tag names that
1490 # apply to the key revision.
1491 self.taglist = { }
1493 # If set, this is an RCS branch number -- rcsparse calls this the
1494 # "principal branch", but CVS and RCS refer to it as the "default
1495 # branch", so that's what we call it, even though the rcsparse API
1496 # setter method is still 'set_principal_branch'.
1497 self.default_branch = None
1499 # If the RCS file doesn't have a default branch anymore, but does
1500 # have vendor revisions, then we make an educated guess that those
1501 # revisions *were* the head of the default branch up until the
1502 # commit of 1.2, at which point the file's default branch became
1503 # trunk. This records the date at which 1.2 was committed.
1504 self.first_non_vendor_revision_date = None
1506 # A list of all symbols defined for the current file. Used to
1507 # prevent multiple definitions of a symbol, something which can
1508 # easily happen when --symbol-transform is used.
1509 self.defined_symbols = { }
1511 def set_principal_branch(self, branch):
1512 self.default_branch = branch
1514 def set_expansion(self, mode):
1515 self.mode = mode
1517 def set_branch_name(self, branch_number, name):
1518 """Record that BRANCH_NUMBER is the branch number for branch NAME,
1519 and that NAME sprouts from BRANCH_NUMBER .
1520 BRANCH_NUMBER is an RCS branch number with an odd number of components,
1521 for example '1.7.2' (never '1.7.0.2')."""
1522 if not self.branch_names.has_key(branch_number):
1523 self.branch_names[branch_number] = name
1524 # The branchlist is keyed on the revision number from which the
1525 # branch sprouts, so strip off the odd final component.
1526 sprout_rev = branch_number[:branch_number.rfind(".")]
1527 if not self.branchlist.has_key(sprout_rev):
1528 self.branchlist[sprout_rev] = []
1529 self.branchlist[sprout_rev].append(name)
1530 self.symbol_db.register_branch_creation(name)
1531 else:
1532 sys.stderr.write("%s: in '%s':\n"
1533 " branch '%s' already has name '%s',\n"
1534 " cannot also have name '%s', ignoring the latter\n"
1535 % (warning_prefix, self.fname, branch_number,
1536 self.branch_names[branch_number], name))
1538 def rev_to_branch_name(self, revision):
1539 """Return the name of the branch on which REVISION lies.
1540 REVISION is a non-branch revision number with an even number of,
1541 components, for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
1542 For the convenience of callers, REVISION can also be a trunk
1543 revision such as '1.2', in which case just return None."""
1544 if trunk_rev.match(revision):
1545 return None
1546 return self.branch_names.get(revision[:revision.rindex(".")])
1548 def add_cvs_branch(self, revision, branch_name):
1549 """Record the root revision and branch revision for BRANCH_NAME,
1550 based on REVISION. REVISION is a CVS branch number having an even
1551 number of components where the second-to-last is '0'. For
1552 example, if it's '1.7.0.2', then record that BRANCH_NAME sprouts
1553 from 1.7 and has branch number 1.7.2."""
1554 last_dot = revision.rfind(".")
1555 branch_rev = revision[:last_dot]
1556 last2_dot = branch_rev.rfind(".")
1557 branch_rev = branch_rev[:last2_dot] + revision[last_dot:]
1558 self.set_branch_name(branch_rev, branch_name)
1560 def define_tag(self, name, revision):
1561 """Record a bidirectional mapping between symbolic NAME and REVISION.
1562 REVISION is an unprocessed revision number from the RCS file's
1563 header, for example: '1.7', '1.7.0.2', or '1.1.1' or '1.1.1.1'.
1564 This function will determine what kind of symbolic name it is by
1565 inspection, and record it in the right places."""
1566 for (pattern, replacement) in Ctx().symbol_transforms:
1567 newname = pattern.sub(replacement, name)
1568 if newname != name:
1569 Log().write(LOG_WARN, " symbol '%s' transformed to '%s'"
1570 % (name, newname))
1571 name = newname
1572 if self.defined_symbols.has_key(name):
1573 err = "%s: Multiple definitions of the symbol '%s' in '%s'" \
1574 % (error_prefix, name, self.fname)
1575 sys.stderr.write(err + "\n")
1576 self.fatal_errors.append(err)
1577 self.defined_symbols[name] = None
1578 if branch_tag.match(revision):
1579 self.add_cvs_branch(revision, name)
1580 elif vendor_tag.match(revision):
1581 self.set_branch_name(revision, name)
1582 else:
1583 if not self.taglist.has_key(revision):
1584 self.taglist[revision] = []
1585 self.taglist[revision].append(name)
1586 self.symbol_db.register_tag_creation(name)
1588 def define_revision(self, revision, timestamp, author, state,
1589 branches, next):
1591 # Record the state of our revision for later calculations
1592 self.rev_state[revision] = state
1594 # store the rev_data as a list in case we have to jigger the timestamp
1595 self.rev_data[revision] = [int(timestamp), author, None]
1597 # When on trunk, the RCS 'next' revision number points to what
1598 # humans might consider to be the 'previous' revision number. For
1599 # example, 1.3's RCS 'next' is 1.2.
1601 # However, on a branch, the RCS 'next' revision number really does
1602 # point to what humans would consider to be the 'next' revision
1603 # number. For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
1605 # In other words, in RCS, 'next' always means "where to find the next
1606 # deltatext that you need this revision to retrieve.
1608 # That said, we don't *want* RCS's behavior here, so we determine
1609 # whether we're on trunk or a branch and set self.prev_rev
1610 # accordingly.
1612 # One last thing. Note that if REVISION is a branch revision,
1613 # instead of mapping REVISION to NEXT, we instead map NEXT to
1614 # REVISION. Since we loop over all revisions in the file before
1615 # doing anything with the data we gather here, this 'reverse
1616 # assignment' effectively does the following:
1618 # 1. Gives us no 'prev' value for REVISION (in this
1619 # iteration... it may have been set in a previous iteration)
1621 # 2. Sets the 'prev' value for the revision with number NEXT to
1622 # REVISION. So when we come around to the branch revision whose
1623 # revision value is NEXT, its 'prev' and 'prev_rev' are already
1624 # set.
1625 if trunk_rev.match(revision):
1626 self.prev_rev[revision] = next
1627 self.next_rev[next] = revision
1628 elif next:
1629 self.prev_rev[next] = revision
1630 self.next_rev[revision] = next
1632 for b in branches:
1633 self.prev_rev[b] = revision
1635 # Ratchet up the highest vendor head revision, if necessary.
1636 if self.default_branch:
1637 default_branch_root = self.default_branch + "."
1638 if ((revision.find(default_branch_root) == 0)
1639 and (default_branch_root.count('.') == revision.count('.'))):
1640 # This revision is on the default branch, so record that it is
1641 # the new highest default branch head revision.
1642 self.default_branches_db[self.rel_name] = revision
1643 else:
1644 # No default branch, so make an educated guess.
1645 if revision == '1.2':
1646 # This is probably the time when the file stopped having a
1647 # default branch, so make a note of it.
1648 self.first_non_vendor_revision_date = timestamp
1649 else:
1650 m = vendor_revision.match(revision)
1651 if m and ((not self.first_non_vendor_revision_date)
1652 or (timestamp < self.first_non_vendor_revision_date)):
1653 # We're looking at a vendor revision, and it wasn't
1654 # committed after this file lost its default branch, so bump
1655 # the maximum trunk vendor revision in the permanent record.
1656 self.default_branches_db[self.rel_name] = revision
1658 if not trunk_rev.match(revision):
1659 # Check for unlabeled branches, record them. We tried to collect
1660 # all branch names when we parsed the symbolic name header
1661 # earlier, of course, but that didn't catch unlabeled branches.
1662 # If a branch is unlabeled, this is our first encounter with it,
1663 # so we have to record its data now.
1664 branch_number = revision[:revision.rindex(".")]
1665 if not self.branch_names.has_key(branch_number):
1666 branch_name = "unlabeled-" + branch_number
1667 self.set_branch_name(branch_number, branch_name)
1669 # Register the commit on this non-trunk branch
1670 branch_name = self.branch_names[branch_number]
1671 self.symbol_db.register_branch_commit(branch_name)
1673 def tree_completed(self):
1674 "The revision tree has been parsed. Analyze it for consistency."
1676 # Our algorithm depends upon the timestamps on the revisions occuring
1677 # monotonically over time. That is, we want to see rev 1.34 occur in
1678 # time before rev 1.35. If we inserted 1.35 *first* (due to the time-
1679 # sorting), and then tried to insert 1.34, we'd be screwed.
1681 # to perform the analysis, we'll simply visit all of the 'previous'
1682 # links that we have recorded and validate that the timestamp on the
1683 # previous revision is before the specified revision
1685 # if we have to resync some nodes, then we restart the scan. just keep
1686 # looping as long as we need to restart.
1687 while 1:
1688 for current, prev in self.prev_rev.items():
1689 if not prev:
1690 # no previous revision exists (i.e. the initial revision)
1691 continue
1692 t_c = self.rev_data[current][0]
1693 t_p = self.rev_data[prev][0]
1694 if t_p >= t_c:
1695 # the previous revision occurred later than the current revision.
1696 # shove the previous revision back in time (and any before it that
1697 # may need to shift).
1699 # We sync backwards and not forwards because any given CVS
1700 # Revision has only one previous revision. However, a CVS
1701 # Revision can *be* a previous revision for many other
1702 # revisions (e.g., a revision that is the source of multiple
1703 # branches). This becomes relevant when we do the secondary
1704 # synchronization in pass 2--we can make certain that we
1705 # don't resync a revision earlier than it's previous
1706 # revision, but it would be non-trivial to make sure that we
1707 # don't resync revision R *after* any revisions that have R
1708 # as a previous revision.
1709 while t_p >= t_c:
1710 self.rev_data[prev][0] = t_c - 1 # new timestamp
1711 self.rev_data[prev][2] = t_p # old timestamp
1712 delta = t_c - 1 - t_p
1713 msg = "PASS1 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
1714 % (self.rel_name,
1715 prev, time.ctime(t_p), delta)
1716 Log().write(LOG_VERBOSE, msg)
1717 if (delta > COMMIT_THRESHOLD
1718 or delta < (COMMIT_THRESHOLD * -1)):
1719 str = "%s: Significant timestamp change for '%s' (%d seconds)"
1720 Log().write(LOG_WARN, str % (warning_prefix, self.rel_name,
1721 delta))
1722 current = prev
1723 prev = self.prev_rev[current]
1724 if not prev:
1725 break
1726 t_c = t_c - 1 # self.rev_data[current][0]
1727 t_p = self.rev_data[prev][0]
1729 # break from the for-loop
1730 break
1731 else:
1732 # finished the for-loop (no resyncing was performed)
1733 return
1735 def set_revision_info(self, revision, log, text):
1736 timestamp, author, old_ts = self.rev_data[revision]
1737 digest = sha.new(log + '\0' + author).hexdigest()
1738 if old_ts:
1739 # the timestamp on this revision was changed. log it for later
1740 # resynchronization of other files's revisions that occurred
1741 # for this time and log message.
1742 self.resync.write('%08lx %s %08lx\n' % (old_ts, digest, timestamp))
1744 # "...Give back one kadam to honor the Hebrew God whose Ark this is."
1745 # -- Imam to Indy and Sallah, in 'Raiders of the Lost Ark'
1747 # If revision 1.1 appears to have been created via 'cvs add'
1748 # instead of 'cvs import', then this file probably never had a
1749 # default branch, so retroactively remove its record in the
1750 # default branches db. The test is that the log message CVS uses
1751 # for 1.1 in imports is "Initial revision\n" with no period.
1752 if revision == '1.1' and log != 'Initial revision\n':
1753 if self.default_branches_db.has_key(self.rel_name):
1754 del self.default_branches_db[self.rel_name]
1756 # Get the timestamps of the previous and next revisions
1757 prev_rev = self.prev_rev[revision]
1758 prev_timestamp, ign, ign = self.rev_data.get(prev_rev, [0, None, None])
1760 next_rev = self.next_rev.get(revision)
1761 next_timestamp, ign, ign = self.rev_data.get(next_rev, [0, None, None])
1763 # How to tell if a CVSRevision is an add, a change, or a deletion:
1765 # It's a delete if RCS state is 'dead'
1767 # It's an add if RCS state is 'Exp.' and
1768 # - we either have no previous revision
1769 # or
1770 # - we have a previous revision whose state is 'dead'
1772 # Anything else is a change.
1773 if self.rev_state[revision] == 'dead':
1774 op = OP_DELETE
1775 elif ((self.prev_rev.get(revision, None) is None)
1776 or (self.rev_state[self.prev_rev[revision]] == 'dead')):
1777 op = OP_ADD
1778 else:
1779 op = OP_CHANGE
1781 def is_branch_revision(rev):
1782 """Return True if this revision is not a trunk revision,
1783 else return False."""
1784 if rev.count('.') >= 3:
1785 return True
1786 return False
1788 def is_same_line_of_development(rev1, rev2):
1789 """Return True if rev1 and rev2 are on the same line of
1790 development (i.e., both on trunk, or both on the same branch);
1791 return False otherwise. Either rev1 or rev2 can be None, in
1792 which case automatically return False."""
1793 if rev1 is None or rev2 is None:
1794 return False
1795 if rev1.count('.') == 1 and rev2.count('.') == 1:
1796 return True
1797 if rev1[0:rev1.rfind('.')] == rev2[0:rev2.rfind('.')]:
1798 return True
1799 return False
1801 # There can be an odd situation where the tip revision of a branch
1802 # is alive, but every predecessor on the branch is in state 'dead',
1803 # yet the revision from which the branch sprouts is alive. (This
1804 # is sort of a mirror image of the more common case of adding a
1805 # file on a branch, in which the first revision on the branch is
1806 # alive while the revision from which it sprouts is dead.)
1808 # In this odd situation, we must mark the first live revision on
1809 # the branch as an OP_CHANGE instead of an OP_ADD, because it
1810 # reflects, however indirectly, a change w.r.t. the source
1811 # revision from which the branch sprouts.
1813 # This is issue #89.
1814 cur_num = revision
1815 if (is_branch_revision(revision)
1816 and not self.rev_state[revision] == 'dead'):
1817 while 1:
1818 prev_num = self.prev_rev.get(cur_num, None)
1819 if not cur_num or not prev_num:
1820 break
1821 if (not is_same_line_of_development(cur_num, prev_num)
1822 and self.rev_state[cur_num] == 'dead'
1823 and not self.rev_state[prev_num] == 'dead'):
1824 op = OP_CHANGE
1825 cur_num = self.prev_rev.get(cur_num, None)
1827 if text:
1828 deltatext_code = DELTATEXT_NONEMPTY
1829 else:
1830 deltatext_code = DELTATEXT_EMPTY
1832 c_rev = CVSRevision(Ctx(), timestamp, digest, prev_timestamp,
1833 next_timestamp, op,
1834 prev_rev, revision, next_rev,
1835 self.file_in_attic, self.file_executable,
1836 self.file_size,
1837 deltatext_code, self.fname,
1838 self.mode, self.rev_to_branch_name(revision),
1839 self.taglist.get(revision, []),
1840 self.branchlist.get(revision, []))
1841 self.revs.write(str(c_rev) + "\n")
1842 StatsKeeper().record_c_rev(c_rev)
1844 if not self.metadata_db.has_key(digest):
1845 self.metadata_db[digest] = (author, log)
1847 def parse_completed(self):
1848 # Walk through all branches and tags and register them with
1849 # their parent branch in the symbol database.
1850 for revision, symbols in self.taglist.items() + self.branchlist.items():
1851 for symbol in symbols:
1852 name = self.rev_to_branch_name(revision)
1853 if name is not None:
1854 self.symbol_db.register_branch_blocker(name, symbol)
1856 self.num_files = self.num_files + 1
1858 def write_symbol_db(self):
1859 self.symbol_db.write()
1861 class SymbolingsLogger:
1862 """Manage the file that contains lines for symbol openings and
1863 closings.
1865 This data will later be used to determine valid SVNRevision ranges
1866 from which a file can be copied when creating a branch or tag in
1867 Subversion. Do this by finding "Openings" and "Closings" for each
1868 file copied onto a branch or tag.
1870 An "Opening" is the CVSRevision from which a given branch/tag
1871 sprouts on a path.
1873 The "Closing" for that branch/tag and path is the next CVSRevision
1874 on the same line of development as the opening.
1876 For example, on file 'foo.c', branch BEE has branch number 1.2.2 and
1877 obviously sprouts from revision 1.2. Therefore, 1.2 is the opening
1878 for BEE on path 'foo.c', and 1.3 is the closing for BEE on path
1879 'foo.c'. Note that there may be many revisions chronologically
1880 between 1.2 and 1.3, for example, revisions on branches of 'foo.c',
1881 perhaps even including on branch BEE itself. But 1.3 is the next
1882 revision *on the same line* as 1.2, that is why it is the closing
1883 revision for those symbolic names of which 1.2 is the opening.
1885 The reason for doing all this hullabaloo is to make branch and tag
1886 creation as efficient as possible by minimizing the number of copies
1887 and deletes per creation. For example, revisions 1.2 and 1.3 of
1888 foo.c might correspond to revisions 17 and 30 in Subversion. That
1889 means that when creating branch BEE, there is some motivation to do
1890 the copy from one of 17-30. Now if there were another file,
1891 'bar.c', whose opening and closing CVSRevisions for BEE corresponded
1892 to revisions 24 and 39 in Subversion, we would know that the ideal
1893 thing would be to copy the branch from somewhere between 24 and 29,
1894 inclusive.
1896 def __init__(self):
1897 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS), 'w')
1898 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS), pass6)
1899 self.closings = open(temp(SYMBOL_CLOSINGS_TMP), 'w')
1900 Cleanup().register(temp(SYMBOL_CLOSINGS_TMP), pass5)
1902 # This keys of this dictionary are *source* cvs_paths for which
1903 # we've encountered an 'opening' on the default branch. The
1904 # values are the (uncleaned) symbolic names that this path has
1905 # opened.
1906 self.open_paths_with_default_branches = { }
1908 def log_revision(self, c_rev, svn_revnum):
1909 """Log any openings found in C_REV, and if C_REV.next_rev is not
1910 None, a closing. The opening uses SVN_REVNUM, but the closing (if
1911 any) will have its revnum determined later."""
1912 for name in c_rev.tags + c_rev.branches:
1913 self._note_default_branch_opening(c_rev, name)
1914 if c_rev.op != OP_DELETE:
1915 self._log(name, svn_revnum,
1916 c_rev.cvs_path, c_rev.branch_name, OPENING)
1918 # If our c_rev has a next_rev, then that's the closing rev for
1919 # this source revision. Log it to closings for later processing
1920 # since we don't know the svn_revnum yet.
1921 if c_rev.next_rev is not None:
1922 self.closings.write('%s %s\n' %
1923 (name, c_rev.unique_key(c_rev.next_rev)))
1925 def _log(self, name, svn_revnum, cvs_path, branch_name, type):
1926 """Write out a single line to the symbol_openings_closings file
1927 representing that SVN_REVNUM of SVN_PATH on BRANCH_NAME is either the
1928 opening or closing (TYPE) of NAME (a symbolic name).
1930 TYPE should only be one of the following global constants:
1931 OPENING or CLOSING."""
1932 # 8 places gives us 999,999,999 SVN revs. That *should* be enough.
1933 self.symbolings.write(
1934 '%s %.8d %s %s %s\n'
1935 % (name, svn_revnum, type, branch_name or '*', cvs_path))
1937 def close(self):
1938 """Iterate through the closings file, lookup the svn_revnum for
1939 each closing CVSRevision, and write a proper line out to the
1940 symbolings file."""
1941 # Use this to get the c_rev of our rev_key
1942 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_READ)
1944 self.closings.close()
1945 for line in fileinput.FileInput(temp(SYMBOL_CLOSINGS_TMP)):
1946 (name, rev_key) = line.rstrip().split(" ", 1)
1947 svn_revnum = Ctx()._persistence_manager.get_svn_revnum(rev_key)
1949 c_rev = cvs_revs_db.get_revision(rev_key)
1950 self._log(name, svn_revnum, c_rev.cvs_path, c_rev.branch_name, CLOSING)
1952 self.symbolings.close()
1954 def _note_default_branch_opening(self, c_rev, symbolic_name):
1955 """If C_REV is a default branch revision, log C_REV.cvs_path as an
1956 opening for SYMBOLIC_NAME."""
1957 path = c_rev.cvs_path
1958 if not self.open_paths_with_default_branches.has_key(path):
1959 self.open_paths_with_default_branches[path] = [ ]
1960 self.open_paths_with_default_branches[path].append(symbolic_name)
1962 def log_default_branch_closing(self, c_rev, svn_revnum):
1963 """If self.open_paths_with_default_branches contains
1964 C_REV.cvs_path, then call log each name in
1965 self.open_paths_with_default_branches[C_REV.cvs_path] as a closing
1966 with SVN_REVNUM as the closing revision number."""
1967 path = c_rev.cvs_path
1968 if self.open_paths_with_default_branches.has_key(path):
1969 # log each symbol as a closing
1970 for name in self.open_paths_with_default_branches[path]:
1971 self._log(name, svn_revnum, path, None, CLOSING)
1972 # Remove them from the openings list as we're done with them.
1973 del self.open_paths_with_default_branches[path]
1976 class PersistenceManager:
1977 """The PersistenceManager allows us to effectively store SVNCommits
1978 to disk and retrieve them later using only their subversion revision
1979 number as the key. It also returns the subversion revision number
1980 for a given CVSRevision's unique key.
1982 All information pertinent to each SVNCommit is stored in a series of
1983 on-disk databases so that SVNCommits can be retrieved on-demand.
1985 MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
1986 In 'new' mode, PersistenceManager will initialize a new set of on-disk
1987 databases and be fully-featured.
1988 In 'read' mode, PersistenceManager will open existing on-disk databases
1989 and the set_* methods will be unavailable."""
1990 def __init__(self, mode):
1991 self.mode = mode
1992 if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
1993 raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
1994 self.svn2cvs_db = Database(temp(SVN_REVNUMS_TO_CVS_REVS), mode)
1995 Cleanup().register(temp(SVN_REVNUMS_TO_CVS_REVS), pass8)
1996 self.cvs2svn_db = Database(temp(CVS_REVS_TO_SVN_REVNUMS), mode)
1997 Cleanup().register(temp(CVS_REVS_TO_SVN_REVNUMS), pass8)
1998 self.svn_commit_names_dates = Database(temp(SVN_COMMIT_NAMES_DATES), mode)
1999 Cleanup().register(temp(SVN_COMMIT_NAMES_DATES), pass8)
2000 self.svn_commit_metadata = Database(temp(METADATA_DB), DB_OPEN_READ)
2001 self.cvs_revisions = CVSRevisionDatabase(DB_OPEN_READ)
2002 ###PERF kff Elsewhere there are comments about sucking the tags db
2003 ### into memory. That seems like a good idea.
2004 if not Ctx().trunk_only:
2005 self.tags_db = TagsDatabase(DB_OPEN_READ)
2006 self.motivating_revnums = Database(temp(MOTIVATING_REVNUMS), mode)
2007 Cleanup().register(temp(MOTIVATING_REVNUMS), pass8)
2009 # "branch_name" -> svn_revnum in which branch was last filled.
2010 # This is used by CVSCommit._pre_commit, to prevent creating a fill
2011 # revision which would have nothing to do.
2012 self.last_filled = {}
2014 def get_svn_revnum(self, cvs_rev_unique_key):
2015 """Return the Subversion revision number in which
2016 CVS_REV_UNIQUE_KEY was committed, or SVN_INVALID_REVNUM if there
2017 is no mapping for CVS_REV_UNIQUE_KEY."""
2018 return int(self.cvs2svn_db.get(cvs_rev_unique_key, SVN_INVALID_REVNUM))
2020 def get_svn_commit(self, svn_revnum):
2021 """Return an SVNCommit that corresponds to SVN_REVNUM.
2023 If no SVNCommit exists for revnum SVN_REVNUM, then return None.
2025 This method can throw SVNCommitInternalInconsistencyError.
2027 svn_commit = SVNCommit("Retrieved from disk", svn_revnum)
2028 c_rev_keys = self.svn2cvs_db.get(str(svn_revnum), None)
2029 if c_rev_keys == None:
2030 return None
2032 digest = None
2033 for key in c_rev_keys:
2034 c_rev = self.cvs_revisions.get_revision(key)
2035 svn_commit.add_revision(c_rev)
2036 # Set the author and log message for this commit by using
2037 # CVSRevision metadata, but only if haven't done so already.
2038 if digest is None:
2039 digest = c_rev.digest
2040 author, log_msg = self.svn_commit_metadata[digest]
2041 svn_commit.set_author(author)
2042 svn_commit.set_log_msg(log_msg)
2044 # If we're doing a trunk-only conversion, we don't need to do any more
2045 # work.
2046 if Ctx().trunk_only:
2047 return svn_commit
2049 name, date = self._get_name_and_date(svn_revnum)
2050 if name:
2051 svn_commit.set_symbolic_name(name)
2052 svn_commit.set_date(date)
2053 if self.tags_db.has_key(name):
2054 svn_commit.is_tag = 1
2056 motivating_revnum = self.motivating_revnums.get(str(svn_revnum), None)
2057 if motivating_revnum:
2058 svn_commit.set_motivating_revnum(int(motivating_revnum))
2059 svn_commit.set_date(date)
2061 if len(svn_commit.cvs_revs) and name:
2062 raise SVNCommit.SVNCommitInternalInconsistencyError(
2063 "An SVNCommit cannot have cvs_revisions *and* a corresponding\n"
2064 "symbolic name ('%s') to fill."
2065 % (_clean_symbolic_name(name),))
2067 return svn_commit
2069 def set_cvs_revs(self, svn_revnum, cvs_revs):
2070 """Record the bidirectional mapping between SVN_REVNUM and
2071 CVS_REVS."""
2072 if self.mode == DB_OPEN_READ:
2073 raise RuntimeError, \
2074 'Write operation attempted on read-only PersistenceManager'
2075 for c_rev in cvs_revs:
2076 Log().write(LOG_VERBOSE, " ", c_rev.unique_key())
2077 self.svn2cvs_db[str(svn_revnum)] = [x.unique_key() for x in cvs_revs]
2078 for c_rev in cvs_revs:
2079 self.cvs2svn_db[c_rev.unique_key()] = svn_revnum
2081 def set_name_and_date(self, svn_revnum, name, date):
2082 """Associate symbolic name NAME and DATE with SVN_REVNUM.
2084 NAME is allowed to be None."""
2086 if self.mode == DB_OPEN_READ:
2087 raise RuntimeError, \
2088 'Write operation attempted on read-only PersistenceManager'
2089 self.svn_commit_names_dates[str(svn_revnum)] = (name, date)
2090 self.last_filled[name] = svn_revnum
2092 def _get_name_and_date(self, svn_revnum):
2093 """Return a tuple containing the symbolic name and date associated
2094 with SVN_REVNUM, or (None, None) if SVN_REVNUM has no such data
2095 associated with it."""
2096 return self.svn_commit_names_dates.get(str(svn_revnum), (None, None))
2098 def set_motivating_revnum(self, svn_revnum, motivating_revnum):
2099 """Store MOTIVATING_REVNUM as the value of SVN_REVNUM"""
2100 if self.mode == DB_OPEN_READ:
2101 raise RuntimeError, \
2102 'Write operation attempted on read-only PersistenceManager'
2103 self.motivating_revnums[str(svn_revnum)] = str(motivating_revnum)
2106 class CVSCommit:
2107 """Each instance of this class contains a number of CVS Revisions
2108 that correspond to one or more Subversion Commits. After all CVS
2109 Revisions are added to the grouping, calling process_revisions will
2110 generate a Subversion Commit (or Commits) for the set of CVS
2111 Revisions in the grouping."""
2113 def __init__(self, digest, author, log):
2114 self.digest = digest
2115 self.author = author
2116 self.log = log
2118 # Symbolic names for which the last source revision has already
2119 # been seen and for which the CVSRevisionAggregator has already
2120 # generated a fill SVNCommit. See self.process_revisions().
2121 self.done_symbols = [ ]
2123 self.files = { }
2124 # Lists of CVSRevisions
2125 self.changes = [ ]
2126 self.deletes = [ ]
2128 # Start out with a t_min higher than any incoming time T, and a
2129 # t_max lower than any incoming T. This way the first T will
2130 # push t_min down to T, and t_max up to T, naturally (without any
2131 # special-casing), and successive times will then ratchet them
2132 # outward as appropriate.
2133 self.t_min = 1L<<32
2134 self.t_max = 0
2136 # This will be set to the SVNCommit that occurs in self._commit.
2137 self.motivating_commit = None
2139 # This is a list of all non-primary commits motivated by the main
2140 # commit. We gather these so that we can set their dates to the
2141 # same date as the primary commit.
2142 self.secondary_commits = [ ]
2144 # State for handling default branches.
2146 # Here is a tempting, but ultimately nugatory, bit of logic, which
2147 # I share with you so you may appreciate the less attractive, but
2148 # refreshingly non-nugatory, logic which follows it:
2150 # If some of the commits in this txn happened on a non-trunk
2151 # default branch, then those files will have to be copied into
2152 # trunk manually after being changed on the branch (because the
2153 # RCS "default branch" appears as head, i.e., trunk, in practice).
2154 # As long as those copies don't overwrite any trunk paths that
2155 # were also changed in this commit, then we can do the copies in
2156 # the same revision, because they won't cover changes that don't
2157 # appear anywhere/anywhen else. However, if some of the trunk dst
2158 # paths *did* change in this commit, then immediately copying the
2159 # branch changes would lose those trunk mods forever. So in this
2160 # case, we need to do at least that copy in its own revision. And
2161 # for simplicity's sake, if we're creating the new revision for
2162 # even one file, then we just do all such copies together in the
2163 # new revision.
2165 # Doesn't that sound nice?
2167 # Unfortunately, Subversion doesn't support copies with sources
2168 # in the current txn. All copies must be based in committed
2169 # revisions. Therefore, we generate the above-described new
2170 # revision unconditionally.
2172 # This is a list of c_revs, and a c_rev is appended for each
2173 # default branch commit that will need to be copied to trunk (or
2174 # deleted from trunk) in some generated revision following the
2175 # "regular" revision.
2176 self.default_branch_cvs_revisions = [ ]
2178 def __cmp__(self, other):
2179 # Commits should be sorted by t_max. If both self and other have
2180 # the same t_max, break the tie using t_min, and lastly, digest
2181 return (cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
2182 or cmp(self.digest, other.digest))
2184 def has_file(self, fname):
2185 return self.files.has_key(fname)
2187 def revisions(self):
2188 return self.changes + self.deletes
2190 def opens_symbolic_name(self, name):
2191 """Returns true if any CVSRevision in this commit is on a tag or a
2192 branch or is the origin of a tag or branch."""
2193 for c_rev in self.revisions():
2194 if c_rev.opens_symbolic_name(name):
2195 return 1
2196 return 0
2198 def add_revision(self, c_rev):
2199 # Record the time range of this commit.
2201 # ### ISSUE: It's possible, though unlikely, that the time range
2202 # of a commit could get gradually expanded to be arbitrarily
2203 # longer than COMMIT_THRESHOLD. I'm not sure this is a huge
2204 # problem, and anyway deciding where to break it up would be a
2205 # judgement call. For now, we just print a warning in commit() if
2206 # this happens.
2207 if c_rev.timestamp < self.t_min:
2208 self.t_min = c_rev.timestamp
2209 if c_rev.timestamp > self.t_max:
2210 self.t_max = c_rev.timestamp
2212 if c_rev.op == OP_DELETE:
2213 self.deletes.append(c_rev)
2214 else:
2215 # OP_CHANGE or OP_ADD
2216 self.changes.append(c_rev)
2218 self.files[c_rev.fname] = 1
2220 def _pre_commit(self):
2221 """Generates any SVNCommits that must exist before the main
2222 commit."""
2224 # There may be multiple c_revs in this commit that would cause
2225 # branch B to be filled, but we only want to fill B once. On the
2226 # other hand, there might be multiple branches committed on in
2227 # this commit. Whatever the case, we should count exactly one
2228 # commit per branch, because we only fill a branch once per
2229 # CVSCommit. This list tracks which branches we've already
2230 # counted.
2231 accounted_for_sym_names = [ ]
2233 def fill_needed(c_rev, pm):
2234 """Return 1 if this is the first commit on a new branch (for
2235 this file) and we need to fill the branch; else return 0
2236 (meaning that some other file's first commit on the branch has
2237 already done the fill for us).
2239 If C_REV.op is OP_ADD, only return 1 if the branch that this
2240 commit is on has no last filled revision.
2242 PM is a PersistenceManager to query.
2245 # Different '.' counts indicate that c_rev is now on a different
2246 # line of development (and may need a fill)
2247 if c_rev.rev.count('.') != c_rev.prev_rev.count('.'):
2248 svn_revnum = pm.get_svn_revnum(c_rev.unique_key(c_rev.prev_rev))
2249 # It should be the case that when we have a file F that
2250 # is added on branch B (thus, F on trunk is in state
2251 # 'dead'), we generate an SVNCommit to fill B iff the branch
2252 # has never been filled before.
2254 # If this c_rev.op == OP_ADD, *and* the branch has never
2255 # been filled before, then fill it now. Otherwise, no need to
2256 # fill it.
2257 if c_rev.op == OP_ADD:
2258 if pm.last_filled.get(c_rev.branch_name, None) is None:
2259 return 1
2260 elif c_rev.op == OP_CHANGE:
2261 if svn_revnum > pm.last_filled.get(c_rev.branch_name, 0):
2262 return 1
2263 elif c_rev.op == OP_DELETE:
2264 if pm.last_filled.get(c_rev.branch_name, None) is None:
2265 return 1
2266 return 0
2268 for c_rev in self.changes + self.deletes:
2269 # If a commit is on a branch, we must ensure that the branch
2270 # path being committed exists (in HEAD of the Subversion
2271 # repository). If it doesn't exist, we will need to fill the
2272 # branch. After the fill, the path on which we're committing
2273 # will exist.
2274 if c_rev.branch_name \
2275 and c_rev.branch_name not in accounted_for_sym_names \
2276 and c_rev.branch_name not in self.done_symbols \
2277 and fill_needed(c_rev, Ctx()._persistence_manager):
2278 svn_commit = SVNCommit("pre-commit symbolic name '%s'"
2279 % c_rev.branch_name)
2280 svn_commit.set_symbolic_name(c_rev.branch_name)
2281 self.secondary_commits.append(svn_commit)
2282 accounted_for_sym_names.append(c_rev.branch_name)
2284 def _commit(self):
2285 """Generates the primary SVNCommit that corresponds to this
2286 CVSCommit."""
2287 # Generate an SVNCommit unconditionally. Even if the only change
2288 # in this CVSCommit is a deletion of an already-deleted file (that
2289 # is, a CVS revision in state 'dead' whose predecessor was also in
2290 # state 'dead'), the conversion will still generate a Subversion
2291 # revision containing the log message for the second dead
2292 # revision, because we don't want to lose that information.
2293 svn_commit = SVNCommit("commit")
2294 self.motivating_commit = svn_commit
2296 for c_rev in self.changes:
2297 svn_commit.add_revision(c_rev)
2298 # Only make a change if we need to. When 1.1.1.1 has an empty
2299 # deltatext, the explanation is almost always that we're looking
2300 # at an imported file whose 1.1 and 1.1.1.1 are identical. On
2301 # such imports, CVS creates an RCS file where 1.1 has the
2302 # content, and 1.1.1.1 has an empty deltatext, i.e, the same
2303 # content as 1.1. There's no reason to reflect this non-change
2304 # in the repository, so we want to do nothing in this case. (If
2305 # we were really paranoid, we could make sure 1.1's log message
2306 # is the CVS-generated "Initial revision\n", but I think the
2307 # conditions below are strict enough.)
2308 if not ((c_rev.deltatext_code == DELTATEXT_EMPTY)
2309 and (c_rev.rev == "1.1.1.1")):
2310 if c_rev.is_default_branch_revision():
2311 self.default_branch_cvs_revisions.append(c_rev)
2313 for c_rev in self.deletes:
2314 # When a file is added on a branch, CVS not only adds the file
2315 # on the branch, but generates a trunk revision (typically
2316 # 1.1) for that file in state 'dead'. We only want to add
2317 # this revision if the log message is not the standard cvs
2318 # fabricated log message.
2319 if c_rev.prev_rev is None:
2320 # c_rev.branches may be empty if the originating branch
2321 # has been excluded.
2322 if not c_rev.branches:
2323 continue
2324 cvs_generated_msg = ('file %s was initially added on branch %s.\n'
2325 % (c_rev.filename(),
2326 c_rev.branches[0]))
2327 author, log_msg = \
2328 Ctx()._persistence_manager.svn_commit_metadata[c_rev.digest]
2329 if log_msg == cvs_generated_msg:
2330 continue
2332 svn_commit.add_revision(c_rev)
2333 if c_rev.is_default_branch_revision():
2334 self.default_branch_cvs_revisions.append(c_rev)
2336 # There is a slight chance that we didn't actually register any
2337 # CVSRevisions with our SVNCommit (see loop over self.deletes
2338 # above), so if we have no CVSRevisions, we don't flush the
2339 # svn_commit to disk and roll back our revnum.
2340 if len(svn_commit.cvs_revs) > 0:
2341 svn_commit.flush()
2342 else:
2343 # We will not be flushing this SVNCommit, so rollback the
2344 # SVNCommit revision counter.
2345 SVNCommit.revnum = SVNCommit.revnum - 1
2347 if not Ctx().trunk_only:
2348 for c_rev in self.revisions():
2349 Ctx()._symbolings_logger.log_revision(c_rev, svn_commit.revnum)
2351 def _post_commit(self):
2352 """Generates any SVNCommits that we can perform now that _commit
2353 has happened. That is, handle non-trunk default branches.
2354 Sometimes an RCS file has a non-trunk default branch, so a commit
2355 on that default branch would be visible in a default CVS checkout
2356 of HEAD. If we don't copy that commit over to Subversion's trunk,
2357 then there will be no Subversion tree which corresponds to that
2358 CVS checkout. Of course, in order to copy the path over, we may
2359 first need to delete the existing trunk there. """
2361 # Only generate a commit if we have default branch revs
2362 if len(self.default_branch_cvs_revisions):
2363 # Generate an SVNCommit for all of our default branch c_revs.
2364 svn_commit = SVNCommit("post-commit default branch(es)")
2365 svn_commit.set_motivating_revnum(self.motivating_commit.revnum)
2366 for c_rev in self.default_branch_cvs_revisions:
2367 svn_commit.add_revision(c_rev)
2368 Ctx()._symbolings_logger.log_default_branch_closing(c_rev,
2369 svn_commit.revnum)
2370 self.secondary_commits.append(svn_commit)
2372 def process_revisions(self, done_symbols):
2373 """Process all the CVSRevisions that this instance has, creating
2374 one or more SVNCommits in the process. Generate fill SVNCommits
2375 only for symbols not in DONE_SYMBOLS (avoids unnecessary
2376 fills).
2378 Return the primary SVNCommit that corresponds to this CVSCommit.
2379 The returned SVNCommit is the commit that motivated any other
2380 SVNCommits generated in this CVSCommit."""
2381 self.done_symbols = done_symbols
2382 seconds = self.t_max - self.t_min + 1
2384 Log().write(LOG_VERBOSE, '-' * 60)
2385 Log().write(LOG_VERBOSE, 'CVS Revision grouping:')
2386 if seconds == 1:
2387 Log().write(LOG_VERBOSE, ' Start time: %s (duration: 1 second)'
2388 % time.ctime(self.t_max))
2389 else:
2390 Log().write(LOG_VERBOSE, ' Start time: %s' % time.ctime(self.t_min))
2391 Log().write(LOG_VERBOSE, ' End time: %s (duration: %d seconds)'
2392 % (time.ctime(self.t_max), seconds))
2394 if seconds > COMMIT_THRESHOLD + 1:
2395 Log().write(LOG_WARN, '%s: grouping spans more than %d seconds'
2396 % (warning_prefix, COMMIT_THRESHOLD))
2398 if Ctx().trunk_only: # Only do the primary commit if we're trunk-only
2399 self._commit()
2400 return self.motivating_commit
2402 self._pre_commit()
2403 self._commit()
2404 self._post_commit()
2406 for svn_commit in self.secondary_commits:
2407 svn_commit.set_date(self.motivating_commit.get_date())
2408 svn_commit.flush()
2410 return self.motivating_commit
2413 class SVNCommit:
2414 """This represents one commit to the Subversion Repository. There
2415 are three types of SVNCommits:
2417 1. Commits one or more CVSRevisions (cannot fill a symbolic name).
2419 2. Creates or fills a symbolic name (cannot commit CVSRevisions).
2421 3. Updates trunk to reflect the contents of a particular branch
2422 (this is to handle RCS default branches)."""
2424 # The revision number to assign to the next new SVNCommit.
2425 # We start at 2 because SVNRepositoryMirror uses the first commit
2426 # to create trunk, tags, and branches.
2427 revnum = 2
2429 class SVNCommitInternalInconsistencyError(Exception):
2430 """Exception raised if we encounter an impossible state in the
2431 SVNCommit Databases."""
2432 pass
2434 def __init__(self, description="", revnum=None, cvs_revs=None):
2435 """Instantiate an SVNCommit. DESCRIPTION is for debugging only.
2436 If REVNUM, the SVNCommit will correspond to that revision number;
2437 and if CVS_REVS, then they must be the exact set of CVSRevisions for
2438 REVNUM.
2440 It is an error to pass CVS_REVS without REVNUM, but you may pass
2441 REVNUM without CVS_REVS, and then add a revision at a time by
2442 invoking add_revision()."""
2443 self._description = description
2445 # Revprop metadata for this commit.
2447 # These initial values are placeholders. At least the log and the
2448 # date should be different by the time these are used.
2450 # They are private because their values should be returned encoded
2451 # in UTF8, but callers aren't required to set them in UTF8.
2452 # Therefore, accessor methods are used to set them, and
2453 # self.get_revprops() is used to to get them, in dictionary form.
2454 self._author = Ctx().username
2455 self._log_msg = "This log message means an SVNCommit was used too soon."
2456 self._max_date = 0 # Latest date seen so far.
2458 self.cvs_revs = cvs_revs or []
2459 if revnum:
2460 self.revnum = revnum
2461 else:
2462 self.revnum = SVNCommit.revnum
2463 SVNCommit.revnum = SVNCommit.revnum + 1
2465 # The (uncleaned) symbolic name that is filled in this SVNCommit, if any.
2466 self.symbolic_name = None
2468 # If this commit is a default branch synchronization, this
2469 # variable represents the subversion revision number of the
2470 # *primary* commit where the default branch changes actually
2471 # happened. It is None otherwise.
2473 # It is possible for multiple synchronization commits to refer to
2474 # the same motivating commit revision number, and it is possible
2475 # for a single synchronization commit to contain CVSRevisions on
2476 # multiple different default branches.
2477 self.motivating_revnum = None
2479 # is_tag is true only if this commit is a fill of a symbolic name
2480 # that is a tag, None in all other cases.
2481 self.is_tag = None
2483 def set_symbolic_name(self, symbolic_name):
2484 "Set self.symbolic_name to SYMBOLIC_NAME."
2485 self.symbolic_name = symbolic_name
2487 def set_motivating_revnum(self, revnum):
2488 "Set self.motivating_revnum to REVNUM."
2489 self.motivating_revnum = revnum
2491 def set_author(self, author):
2492 """Set this SVNCommit's author to AUTHOR (a locally-encoded string).
2493 This is the only way to set an SVNCommit's author."""
2494 self._author = author
2496 def set_log_msg(self, msg):
2497 """Set this SVNCommit's log message to MSG (a locally-encoded string).
2498 This is the only way to set an SVNCommit's log message."""
2499 self._log_msg = msg
2501 def set_date(self, date):
2502 """Set this SVNCommit's date to DATE (an integer).
2503 Note that self.add_revision() updates this automatically based on
2504 a CVSRevision; so you may not need to call this at all, and even
2505 if you do, the value may be overwritten by a later call to
2506 self.add_revision()."""
2507 self._max_date = date
2509 def get_date(self):
2510 """Returns this SVNCommit's date as an integer."""
2511 return self._max_date
2513 def get_revprops(self):
2514 """Return the Subversion revprops for this SVNCommit."""
2515 date = format_date(self._max_date)
2516 try:
2517 utf8_author = None
2518 if self._author is not None:
2519 utf8_author = to_utf8(self._author)
2520 utf8_log = to_utf8(self.get_log_msg())
2521 return { 'svn:author' : utf8_author,
2522 'svn:log' : utf8_log,
2523 'svn:date' : date }
2524 except UnicodeError:
2525 Log().write(LOG_WARN, '%s: problem encoding author or log message:'
2526 % warning_prefix)
2527 Log().write(LOG_WARN, " author: '%s'" % self._author)
2528 Log().write(LOG_WARN, " log: '%s'" % self.get_log_msg().rstrip())
2529 Log().write(LOG_WARN, " date: '%s'" % date)
2530 Log().write(LOG_WARN,
2531 "(subversion rev %s) Related files:" % self.revnum)
2532 for c_rev in self.cvs_revs:
2533 Log().write(LOG_WARN, " ", c_rev.fname)
2535 Log().write(LOG_WARN, "Consider rerunning with (for example)",
2536 "'--encoding=latin1'.\n")
2537 # It's better to fall back to the original (unknown encoding) data
2538 # than to either 1) quit or 2) record nothing at all.
2539 return { 'svn:author' : self._author,
2540 'svn:log' : self.get_log_msg(),
2541 'svn:date' : date }
2543 def add_revision(self, cvs_rev):
2544 self.cvs_revs.append(cvs_rev)
2545 if cvs_rev.timestamp > self._max_date:
2546 self._max_date = cvs_rev.timestamp
2548 def _is_primary_commit(self):
2549 """Return true if this is a primary SVNCommit, false otherwise."""
2550 return not (self.symbolic_name or self.motivating_revnum)
2552 def flush(self):
2553 Log().write(LOG_NORMAL, "Creating Subversion r%d (%s)"
2554 % (self.revnum, self._description))
2555 Ctx()._persistence_manager.set_cvs_revs(self.revnum, self.cvs_revs)
2557 if self.motivating_revnum is not None:
2558 Ctx()._persistence_manager.set_motivating_revnum(self.revnum,
2559 self.motivating_revnum)
2561 # If we're not a primary commit, then store our date and/or our
2562 # symbolic_name
2563 if not self._is_primary_commit():
2564 Ctx()._persistence_manager.set_name_and_date(
2565 self.revnum, self.symbolic_name, self._max_date)
2567 def __str__(self):
2568 """ Print a human-readable description of this SVNCommit. This
2569 description is not intended to be machine-parseable (although
2570 we're not going to stop you if you try!)"""
2572 ret = "SVNCommit #: " + str(self.revnum) + "\n"
2573 if self.symbolic_name:
2574 ret += (" symbolic name: " + _clean_symbolic_name(self.symbolic_name)
2575 + "\n")
2576 else:
2577 ret += " NO symbolic name\n"
2578 ret += " debug description: " + self._description + "\n"
2579 ret += " cvs_revs:\n"
2580 for c_rev in self.cvs_revs:
2581 ret += " " + c_rev.unique_key() + "\n"
2582 return ret
2584 def get_log_msg(self):
2585 """Returns the actual log message for a primary commit, and the
2586 appropriate manufactured log message for a secondary commit."""
2587 if self.symbolic_name is not None:
2588 return self._log_msg_for_symbolic_name_commit()
2589 elif self.motivating_revnum is not None:
2590 return self._log_msg_for_default_branch_commit()
2591 else:
2592 return self._log_msg
2594 def _log_msg_for_symbolic_name_commit(self):
2595 """Creates a log message for a manufactured commit that fills
2596 self.symbolic_name. If self.is_tag is true, write the log message
2597 as though for a tag, else write it as though for a branch."""
2598 type = 'branch'
2599 if self.is_tag:
2600 type = 'tag'
2602 # In Python 2.2.3, we could use textwrap.fill(). Oh well :-).
2603 space_or_newline = ' '
2604 cleaned_symbolic_name = _clean_symbolic_name(self.symbolic_name)
2605 if len(cleaned_symbolic_name) >= 13:
2606 space_or_newline = '\n'
2608 return "This commit was manufactured by cvs2svn to create %s%s'%s'." \
2609 % (type, space_or_newline, cleaned_symbolic_name)
2611 def _log_msg_for_default_branch_commit(self):
2612 """Creates a log message for a manufactured commit that
2613 synchronizes a non-trunk default branch with trunk."""
2614 msg = 'This commit was generated by cvs2svn to compensate for ' \
2615 'changes in r%d,\n' \
2616 'which included commits to RCS files with non-trunk default ' \
2617 'branches.\n' % self.motivating_revnum
2618 return msg
2620 class CVSRevisionAggregator:
2621 """This class groups CVSRevisions into CVSCommits that represent
2622 at least one SVNCommit."""
2623 def __init__(self):
2624 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_READ)
2625 if not Ctx().trunk_only:
2626 self.last_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB),
2627 DB_OPEN_READ)
2628 self.cvs_commits = {}
2629 self.pending_symbols = {}
2630 # A list of symbols for which we've already encountered the last
2631 # CVSRevision that is a source for that symbol. That is, the
2632 # final fill for this symbol has been done, and we never need to
2633 # fill it again.
2634 self.done_symbols = [ ]
2636 # This variable holds the most recently created primary svn_commit
2637 # object. CVSRevisionAggregator maintains this variable merely
2638 # for its date, so that it can set dates for the SVNCommits
2639 # created in self.attempt_to_commit_symbols().
2640 self.latest_primary_svn_commit = None
2642 Ctx()._symbolings_logger = SymbolingsLogger()
2643 Ctx()._persistence_manager = PersistenceManager(DB_OPEN_NEW)
2644 Ctx()._default_branches_db = Database(temp(DEFAULT_BRANCHES_DB),
2645 DB_OPEN_READ)
2648 def process_revision(self, c_rev):
2649 # Each time we read a new line, we scan the commits we've
2650 # accumulated so far to see if any are ready for processing now.
2651 ready_queue = [ ]
2652 for digest_key, cvs_commit in self.cvs_commits.items():
2653 if cvs_commit.t_max + COMMIT_THRESHOLD < c_rev.timestamp:
2654 ready_queue.append(cvs_commit)
2655 del self.cvs_commits[digest_key]
2656 continue
2657 # If the inbound commit is on the same file as a pending commit,
2658 # close the pending commit to further changes. Don't flush it though,
2659 # as there may be other pending commits dated before this one.
2660 # ### ISSUE: the has_file() check below is not optimal.
2661 # It does fix the dataloss bug where revisions would get lost
2662 # if checked in too quickly, but it can also break apart the
2663 # commits. The correct fix would require tracking the dependencies
2664 # between change sets and committing them in proper order.
2665 if cvs_commit.has_file(c_rev.fname):
2666 unused_id = digest_key + '-'
2667 # Find a string that does is not already a key in
2668 # the self.cvs_commits dict
2669 while self.cvs_commits.has_key(unused_id):
2670 unused_id = unused_id + '-'
2671 self.cvs_commits[unused_id] = cvs_commit
2672 del self.cvs_commits[digest_key]
2674 # Add this item into the set of still-available commits.
2675 if self.cvs_commits.has_key(c_rev.digest):
2676 cvs_commit = self.cvs_commits[c_rev.digest]
2677 else:
2678 author, log = self.metadata_db[c_rev.digest]
2679 self.cvs_commits[c_rev.digest] = CVSCommit(c_rev.digest,
2680 author, log)
2681 cvs_commit = self.cvs_commits[c_rev.digest]
2682 cvs_commit.add_revision(c_rev)
2684 # If there are any elements in the ready_queue at this point, they
2685 # need to be processed, because this latest rev couldn't possibly
2686 # be part of any of them. Sort them into time-order, then process
2687 # 'em.
2688 ready_queue.sort()
2690 # Make sure we attempt_to_commit_symbols for this c_rev, even if no
2691 # commits are ready.
2692 if len(ready_queue) == 0:
2693 self.attempt_to_commit_symbols(ready_queue, c_rev)
2695 for cvs_commit in ready_queue[:]:
2696 self.latest_primary_svn_commit \
2697 = cvs_commit.process_revisions(self.done_symbols)
2698 ready_queue.remove(cvs_commit)
2699 self.attempt_to_commit_symbols(ready_queue, c_rev)
2701 def flush(self):
2702 """Commit anything left in self.cvs_commits. Then inform the
2703 SymbolingsLogger that all commits are done."""
2705 ready_queue = [ ]
2706 for k, v in self.cvs_commits.items():
2707 ready_queue.append((v, k))
2709 ready_queue.sort()
2710 for cvs_commit_tuple in ready_queue[:]:
2711 self.latest_primary_svn_commit = \
2712 cvs_commit_tuple[0].process_revisions(self.done_symbols)
2713 ready_queue.remove(cvs_commit_tuple)
2714 del self.cvs_commits[cvs_commit_tuple[1]]
2715 self.attempt_to_commit_symbols([])
2717 if not Ctx().trunk_only:
2718 Ctx()._symbolings_logger.close()
2720 def attempt_to_commit_symbols(self, queued_commits, c_rev=None):
2722 This function generates 1 SVNCommit for each symbol in
2723 self.pending_symbols that doesn't have an opening CVSRevision in
2724 either QUEUED_COMMITS or self.cvs_commits.values().
2726 If C_REV is not None, then we first add to self.pending_symbols
2727 any symbols from C_REV that C_REV is the last CVSRevision for.
2729 # If we're not doing a trunk-only conversion, get the symbolic
2730 # names that this c_rev is the last *source* CVSRevision for and
2731 # add them to those left over from previous passes through the
2732 # aggregator.
2733 if c_rev and not Ctx().trunk_only:
2734 for sym in self.last_revs_db.get(c_rev.unique_key(), []):
2735 self.pending_symbols[sym] = None
2737 # Make a list of all symbols that still have *source* CVSRevisions
2738 # in the pending commit queue (self.cvs_commits).
2739 open_symbols = {}
2740 for sym in self.pending_symbols.keys():
2741 for cvs_commit in self.cvs_commits.values() + queued_commits:
2742 if cvs_commit.opens_symbolic_name(sym):
2743 open_symbols[sym] = None
2744 break
2746 # Sort the pending symbols so that we will always process the
2747 # symbols in the same order, regardless of the order in which the
2748 # dict hashing algorithm hands them back to us. We do this so
2749 # that our tests will get the same results on all platforms.
2750 sorted_pending_symbols_keys = self.pending_symbols.keys()
2751 sorted_pending_symbols_keys.sort()
2752 for sym in sorted_pending_symbols_keys:
2753 if open_symbols.has_key(sym): # sym is still open--don't close it.
2754 continue
2755 svn_commit = SVNCommit("closing tag/branch '%s'" % sym)
2756 svn_commit.set_symbolic_name(sym)
2757 svn_commit.set_date(self.latest_primary_svn_commit.get_date())
2758 svn_commit.flush()
2759 self.done_symbols.append(sym)
2760 del self.pending_symbols[sym]
2763 class SymbolingsReader:
2764 """Provides an interface to the SYMBOL_OPENINGS_CLOSINGS_SORTED file
2765 and the SYMBOL_OFFSETS_DB. Does the heavy lifting of finding and
2766 returning the correct opening and closing Subversion revision
2767 numbers for a given symbolic name."""
2768 def __init__(self):
2769 """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and
2770 reads the offsets database into memory."""
2771 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
2772 # The offsets_db is really small, and we need to read and write
2773 # from it a fair bit, so suck it into memory
2774 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_READ)
2775 self.offsets = { }
2776 for key in offsets_db.db.keys():
2777 #print " ZOO:", key, offsets_db[key]
2778 self.offsets[key] = offsets_db[key]
2780 def filling_guide_for_symbol(self, symbolic_name, svn_revnum):
2781 """Given SYMBOLIC_NAME and SVN_REVNUM, return a new
2782 SymbolicNameFillingGuide object.
2784 Note that if we encounter an opening rev in this fill, but the
2785 corresponding closing rev takes place later than SVN_REVNUM, the
2786 closing will not be passed to SymbolicNameFillingGuide in this
2787 fill (and will be discarded when encountered in a later fill).
2788 This is perfectly fine, because we can still do a valid fill
2789 without the closing--we always try to fill what we can as soon as
2790 we can."""
2792 openings_closings_map = OpeningsClosingsMap(symbolic_name)
2794 # It's possible to have a branch start with a file that was added
2795 # on a branch
2796 if self.offsets.has_key(symbolic_name):
2797 # set our read offset for self.symbolings to the offset for
2798 # symbolic_name
2799 self.symbolings.seek(self.offsets[symbolic_name])
2801 while 1:
2802 fpos = self.symbolings.tell()
2803 line = self.symbolings.readline().rstrip()
2804 if not line:
2805 break
2806 name, revnum, type, branch_name, cvs_path = line.split(" ", 4)
2807 if branch_name == '*':
2808 branch_name = None
2809 svn_path = Ctx().project.make_path(cvs_path, branch_name)
2810 revnum = int(revnum)
2811 if revnum > svn_revnum or name != symbolic_name:
2812 break
2813 openings_closings_map.register(svn_path, revnum, type)
2815 # get current offset of the read marker and set it to the offset
2816 # for the beginning of the line we just read if we used anything
2817 # we read.
2818 if not openings_closings_map.is_empty():
2819 self.offsets[symbolic_name] = fpos
2821 return SymbolicNameFillingGuide(openings_closings_map)
2824 class SvnRevisionRange:
2825 """The range of subversion revision numbers from which a path can be
2826 copied. self.opening_revnum is the number of the earliest such
2827 revision, and self.closing_revnum is one higher than the number of
2828 the last such revision. If self.closing_revnum is None, then no
2829 closings were registered."""
2831 def __init__(self, opening_revnum):
2832 self.opening_revnum = opening_revnum
2833 self.closing_revnum = None
2835 def add_closing(self, closing_revnum):
2836 # When we have a non-trunk default branch, we may have multiple
2837 # closings--only register the first closing we encounter.
2838 if self.closing_revnum is None:
2839 self.closing_revnum = closing_revnum
2841 def __str__(self):
2842 if self.closing_revnum is None:
2843 return '[%d:]' % (self.opening_revnum,)
2844 else:
2845 return '[%d:%d]' % (self.opening_revnum, self.closing_revnum,)
2848 class OpeningsClosingsMap:
2849 """A dictionary of openings and closings for a symbolic name in the
2850 current SVNCommit.
2852 The user should call self.register() for the openings and closings,
2853 then self.get_node_tree() to retrieve the information as a
2854 SymbolicNameFillingGuide."""
2856 def __init__(self, symbolic_name):
2857 """Initialize OpeningsClosingsMap and prepare it for receiving
2858 openings and closings."""
2860 self.name = symbolic_name
2862 # A dictionary of SVN_PATHS to SvnRevisionRange objects.
2863 self.things = { }
2865 def register(self, svn_path, svn_revnum, type):
2866 """Register an opening or closing revision for this symbolic name.
2867 SVN_PATH is the source path that needs to be copied into
2868 self.symbolic_name, and SVN_REVNUM is either the first svn
2869 revision number that we can copy from (our opening), or the last
2870 (not inclusive) svn revision number that we can copy from (our
2871 closing). TYPE indicates whether this path is an opening or a a
2872 closing.
2874 The opening for a given SVN_PATH must be passed before the closing
2875 for it to have any effect... any closing encountered before a
2876 corresponding opening will be discarded.
2878 It is not necessary to pass a corresponding closing for every
2879 opening.
2881 # Always log an OPENING
2882 if type == OPENING:
2883 self.things[svn_path] = SvnRevisionRange(svn_revnum)
2884 # Only log a closing if we've already registered the opening for that
2885 # path.
2886 elif type == CLOSING and self.things.has_key(svn_path):
2887 self.things[svn_path].add_closing(svn_revnum)
2889 def is_empty(self):
2890 """Return true if we haven't accumulated any openings or closings,
2891 false otherwise."""
2892 return not len(self.things)
2894 def get_things(self):
2895 """Return a list of (svn_path, SvnRevisionRange) tuples for all
2896 svn_paths with registered openings or closings."""
2898 return self.things.items()
2901 class SymbolicNameFillingGuide:
2902 """A node tree representing the source paths to be copied to fill
2903 self.symbolic_name in the current SVNCommit.
2905 self._node_tree is the root of the directory tree, in the form {
2906 path_component : subnode }. Leaf nodes are instances of
2907 SvnRevisionRange. Intermediate (directory) nodes are dictionaries
2908 mapping relative names to subnodes.
2910 By walking self._node_tree and calling self.get_best_revnum() on
2911 each node, the caller can determine what subversion revision number
2912 to copy the path corresponding to that node from. self._node_tree
2913 should be treated as read-only.
2915 The caller can then descend to sub-nodes to see if their "best
2916 revnum" differs from their parents' and if it does, take appropriate
2917 actions to "patch up" the subtrees."""
2919 def __init__(self, openings_closings_map):
2920 """Initializes a SymbolicNameFillingGuide for SYMBOLIC_NAME and
2921 store into it the openings and closings from
2922 OPENINGS_CLOSINGS_MAP."""
2924 self.name = openings_closings_map.name
2926 # The dictionary that holds our node tree as a map { node_key :
2927 # node }.
2928 self._node_tree = { }
2930 for svn_path, svn_revision_range in openings_closings_map.get_things():
2931 (head, tail) = _path_split(svn_path)
2932 self._get_node_for_path(head)[tail] = svn_revision_range
2934 #self.print_node_tree(self._node_tree)
2936 def _get_node_for_path(self, svn_path):
2937 """Return the node key for svn_path, creating new nodes as needed."""
2938 # Walk down the path, one node at a time.
2939 node = self._node_tree
2940 for component in svn_path.split('/'):
2941 if node.has_key(component):
2942 node = node[component]
2943 else:
2944 old_node = node
2945 node = {}
2946 old_node[component] = node
2948 return node
2950 def get_best_revnum(self, node, preferred_revnum):
2951 """Determine the best subversion revision number to use when
2952 copying the source tree beginning at NODE. Returns a
2953 subversion revision number.
2955 PREFERRED_REVNUM is passed to self._best_rev and used to
2956 calculate the best_revnum."""
2957 revnum = SVN_INVALID_REVNUM
2959 # Aggregate openings and closings from the rev tree
2960 svn_revision_ranges = self._list_revnums(node)
2961 openings = [ x.opening_revnum
2962 for x in svn_revision_ranges ]
2963 closings = [ x.closing_revnum
2964 for x in svn_revision_ranges
2965 if x.closing_revnum is not None ]
2967 # Helper function for scoring the lists.
2968 def tally_frequencies(rev_list):
2969 """Takes an array of revisions (REV_LIST), for example:
2971 [21, 18, 6, 49, 39, 24, 24, 24, 24, 24, 24, 24]
2973 and adds up every occurrence of each revision and returns a sorted
2974 array of tuples containing (svn_revnum, count):
2976 [(6, 1), (18, 1), (21, 1), (24, 7), (39, 1), (49, 1)]
2978 s = {}
2979 for k in rev_list: # Add up the scores
2980 s[k] = s.get(k, 0) + 1
2981 a = s.items()
2982 a.sort()
2983 return a
2985 # Score the lists
2986 scores = self._score_revisions(tally_frequencies(openings),
2987 tally_frequencies(closings))
2989 revnum, max_score = self._best_rev(scores, preferred_revnum)
2991 if revnum == SVN_INVALID_REVNUM:
2992 raise FatalError("failed to find a revision "
2993 + "to copy from when copying %s" % name)
2994 return revnum, max_score
2996 def _best_rev(self, scores, preferred_rev):
2997 """Return the revision with the highest score from SCORES, a list
2998 returned by _score_revisions(). When the maximum score is shared
2999 by multiple revisions, the oldest revision is selected, unless
3000 PREFERRED_REV is one of the possibilities, in which case, it is
3001 selected."""
3002 max_score = 0
3003 preferred_rev_score = -1
3004 rev = SVN_INVALID_REVNUM
3005 if preferred_rev is None:
3006 # Comparison order of different types is arbitrary. Do not
3007 # expect None to compare less than int values below.
3008 # In Python 2.3 None compares with ints like negative infinity.
3009 # In Python 2.0 None compares with ints like positive infinity.
3010 preferred_rev = SVN_INVALID_REVNUM
3011 for revnum, count in scores:
3012 if count > max_score:
3013 max_score = count
3014 rev = revnum
3015 if revnum <= preferred_rev:
3016 preferred_rev_score = count
3017 if preferred_rev_score == max_score:
3018 rev = preferred_rev
3019 return rev, max_score
3021 def _score_revisions(self, openings, closings):
3022 """Return a list of revisions and scores based on OPENINGS and
3023 CLOSINGS. The returned list looks like:
3025 [(REV1 SCORE1), (REV2 SCORE2), ...]
3027 where the tuples are sorted by revision number. OPENINGS and
3028 CLOSINGS are lists of tuples [(svn_revnum, count), ...] reflecting
3029 the frequency with which svn revision numbers appeared as the
3030 opening_revnum and closing_revnum of file nodes.
3032 Each score indicates that copying the corresponding revision (or
3033 any following revision up to the next revision in the list) of the
3034 object in question would yield that many correct paths at or
3035 underneath the object. There may be other paths underneath it
3036 which are not correct and would need to be deleted or recopied;
3037 those can only be detected by descending and examining their
3038 scores.
3040 If OPENINGS is empty, return the empty list."""
3041 # First look for easy out.
3042 if not openings:
3043 return []
3045 # No easy out, so wish for lexical closures and calculate the scores :-).
3046 scores = []
3047 opening_score_accum = 0
3048 for (opening_rev, opening_score) in openings:
3049 opening_score_accum = opening_score_accum + opening_score
3050 scores.append((opening_rev, opening_score_accum))
3051 min = 0
3052 for (closing_rev, closing_score) in closings:
3053 done_exact_rev = None
3054 insert_index = None
3055 insert_score = None
3056 for j in range(min, len(scores)):
3057 score_rev, score = scores[j]
3058 if score_rev >= closing_rev:
3059 if not done_exact_rev:
3060 if score_rev > closing_rev:
3061 insert_index = j
3062 insert_score = scores[j-1][1] - closing_score
3063 done_exact_rev = 1
3064 scores[j] = (score_rev, score - closing_score)
3065 else:
3066 min = j + 1
3067 if not done_exact_rev:
3068 scores.append((closing_rev,scores[-1][1] - closing_score))
3069 if insert_index is not None:
3070 scores.insert(insert_index, (closing_rev, insert_score))
3071 return scores
3073 def _list_revnums(self, node):
3074 """Return a list of all the SvnRevisionRanges (including
3075 duplicates) for all leaf nodes at and under NODE."""
3077 if isinstance(node, SvnRevisionRange):
3078 # It is a leaf node.
3079 return [ node ]
3080 else:
3081 # It is an intermediate node.
3082 revnums = []
3083 for key, subnode in node.items():
3084 revnums.extend(self._list_revnums(subnode))
3085 return revnums
3087 def get_sources(self):
3088 """Return the list of sources for this symbolic name.
3090 The Project instance defines what are legitimate sources. Raise
3091 an exception if a change occurred outside of the source
3092 directories."""
3094 return self._get_sub_sources('', self._node_tree)
3096 def _get_sub_sources(self, start_svn_path, start_node):
3097 """Return the list of sources for this symbolic name, starting the
3098 search at path START_SVN_PATH, which is node START_NODE. This is
3099 a helper method, called by get_sources() (see)."""
3101 project = Ctx().project
3102 if isinstance(start_node, SvnRevisionRange):
3103 # This implies that a change was found outside of the
3104 # legitimate sources. This should never happen.
3105 raise
3106 elif project.is_source(start_svn_path):
3107 # This is a legitimate source. Add it to list.
3108 return [ FillSource(start_svn_path, start_node) ]
3109 else:
3110 # This is a directory that is not a legitimate source. (That's
3111 # OK because it hasn't changed directly.) But directories
3112 # within it have been changed, so we need to search recursively
3113 # to find their enclosing sources.
3114 sources = []
3115 for entry, node in start_node.items():
3116 svn_path = _path_join(start_svn_path, entry)
3117 sources.extend(self._get_sub_sources(svn_path, node))
3119 return sources
3121 def print_node_tree(self, node, name='/', indent_depth=0):
3122 """For debugging purposes. Prints all nodes in TREE that are
3123 rooted at NODE. INDENT_DEPTH is used to indent the output of
3124 recursive calls."""
3125 if not indent_depth:
3126 print "TREE", "=" * 75
3127 if isinstance(node, SvnRevisionRange):
3128 print "TREE:", " " * (indent_depth * 2), name, node
3129 else:
3130 print "TREE:", " " * (indent_depth * 2), name
3131 for key, value in node.items():
3132 self.print_node_tree(value, key, (indent_depth + 1))
3135 class FillSource:
3136 """Representation of a fill source used by the symbol filler in
3137 SVNRepositoryMirror."""
3138 def __init__(self, prefix, node):
3139 """Create an unscored fill source with a prefix and a key."""
3140 self.prefix = prefix
3141 self.node = node
3142 self.score = None
3143 self.revnum = None
3145 def set_score(self, score, revnum):
3146 """Set the SCORE and REVNUM."""
3147 self.score = score
3148 self.revnum = revnum
3150 def __cmp__(self, other):
3151 """Comparison operator used to sort FillSources in descending
3152 score order."""
3153 if self.score is None or other.score is None:
3154 raise TypeError, 'Tried to compare unscored FillSource'
3155 return cmp(other.score, self.score)
3158 class SVNRepositoryMirror:
3159 """Mirror a Subversion Repository as it is constructed, one
3160 SVNCommit at a time. The mirror is skeletal; it does not contain
3161 file contents. The creation of a dumpfile or Subversion repository
3162 is handled by delegates. See self.add_delegate method for how to
3163 set delegates.
3165 The structure of the repository is kept in two databases and one
3166 hash. The revs_db database maps revisions to root node keys, and
3167 the nodes_db database maps node keys to nodes. A node is a hash
3168 from directory names to keys. Both the revs_db and the nodes_db are
3169 stored on disk and each access is expensive.
3171 The nodes_db database only has the keys for old revisions. The
3172 revision that is being contructed is kept in memory in the new_nodes
3173 hash which is cheap to access.
3175 You must invoke _start_commit between SVNCommits.
3177 *** WARNING *** All path arguments to methods in this class CANNOT
3178 have leading or trailing slashes.
3181 class SVNRepositoryMirrorPathExistsError(Exception):
3182 """Exception raised if an attempt is made to add a path to the
3183 repository mirror and that path already exists in the youngest
3184 revision of the repository."""
3185 pass
3187 class SVNRepositoryMirrorUnexpectedOperationError(Exception):
3188 """Exception raised if a CVSRevision is found to have an unexpected
3189 operation (OP) value."""
3190 pass
3192 class SVNRepositoryMirrorInvalidFillOperationError(Exception):
3193 """Exception raised if an empty SymbolicNameFillingGuide is returned
3194 during a fill where the branch in question already exists."""
3195 pass
3197 def __init__(self):
3198 """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""
3199 self.delegates = [ ]
3201 # This corresponds to the 'revisions' table in a Subversion fs.
3202 self.revs_db = Database(temp(SVN_MIRROR_REVISIONS_DB), DB_OPEN_NEW)
3203 Cleanup().register(temp(SVN_MIRROR_REVISIONS_DB), pass8)
3205 # This corresponds to the 'nodes' table in a Subversion fs. (We
3206 # don't need a 'representations' or 'strings' table because we
3207 # only track metadata, not file contents.)
3208 self.nodes_db = Database(temp(SVN_MIRROR_NODES_DB), DB_OPEN_NEW)
3209 Cleanup().register(temp(SVN_MIRROR_NODES_DB), pass8)
3211 # Start at revision 0 without a root node. It will be created
3212 # by _open_writable_root_node.
3213 self.youngest = 0
3214 self.new_root_key = None
3215 self.new_nodes = { }
3217 if not Ctx().trunk_only:
3218 ###PERF IMPT: Suck this into memory.
3219 self.tags_db = TagsDatabase(DB_OPEN_READ)
3220 self.symbolings_reader = SymbolingsReader()
3222 def _initialize_repository(self, date):
3223 """Initialize the repository by creating the directories for
3224 trunk, tags, and branches. This method should only be called
3225 after all delegates are added to the repository mirror."""
3226 # Make a 'fake' SVNCommit so we can take advantage of the revprops
3227 # magic therein
3228 svn_commit = SVNCommit("Initialization", 1)
3229 svn_commit.set_date(date)
3230 svn_commit.set_log_msg("New repository initialized by cvs2svn.")
3232 self._start_commit(svn_commit)
3233 self._mkdir(Ctx().project.trunk_path)
3234 if not Ctx().trunk_only:
3235 self._mkdir(Ctx().project.branches_path)
3236 self._mkdir(Ctx().project.tags_path)
3238 def _start_commit(self, svn_commit):
3239 """Start a new commit."""
3240 if self.youngest > 0:
3241 self._end_commit()
3243 self.youngest = svn_commit.revnum
3244 self.new_root_key = None
3245 self.new_nodes = { }
3247 self._invoke_delegates('start_commit', svn_commit)
3249 def _end_commit(self):
3250 """Called at the end of each commit. This method copies the newly
3251 created nodes to the on-disk nodes db."""
3252 if self.new_root_key is None:
3253 # No changes were made in this revision, so we make the root node
3254 # of the new revision be the same as the last one.
3255 self.revs_db[str(self.youngest)] = self.revs_db[str(self.youngest - 1)]
3256 else:
3257 self.revs_db[str(self.youngest)] = self.new_root_key
3258 # Copy the new nodes to the nodes_db
3259 for key, value in self.new_nodes.items():
3260 self.nodes_db[key] = value
3262 def _get_node(self, key):
3263 """Returns the node contents for KEY which may refer to either
3264 self.nodes_db or self.new_nodes."""
3265 if self.new_nodes.has_key(key):
3266 return self.new_nodes[key]
3267 else:
3268 return self.nodes_db[key]
3270 def _open_readonly_node(self, path, revnum):
3271 """Open a readonly node for PATH at revision REVNUM. Returns the
3272 node key and node contents if the path exists, else (None, None)."""
3273 # Get the root key
3274 if revnum == self.youngest:
3275 if self.new_root_key is None:
3276 node_key = self.revs_db[str(self.youngest - 1)]
3277 else:
3278 node_key = self.new_root_key
3279 else:
3280 node_key = self.revs_db[str(revnum)]
3282 for component in path.split('/'):
3283 node_contents = self._get_node(node_key)
3284 if not node_contents.has_key(component):
3285 return None
3286 node_key = node_contents[component]
3288 return node_key
3290 def _open_writable_root_node(self):
3291 """Open a writable root node. The current root node is returned
3292 immeditely if it is already writable. If not, create a new one by
3293 copying the contents of the root node of the previous version."""
3294 if self.new_root_key is not None:
3295 return self.new_root_key, self.new_nodes[self.new_root_key]
3297 if self.youngest < 2:
3298 new_contents = { }
3299 else:
3300 new_contents = self.nodes_db[self.revs_db[str(self.youngest - 1)]]
3301 self.new_root_key = gen_key()
3302 self.new_nodes = { self.new_root_key: new_contents }
3304 return self.new_root_key, new_contents
3306 def _open_writable_node(self, svn_path, create):
3307 """Open a writable node for the path SVN_PATH, creating SVN_PATH
3308 and any missing directories if CREATE is True."""
3309 parent_key, parent_contents = self._open_writable_root_node()
3311 # Walk up the path, one node at a time.
3312 path_so_far = None
3313 components = svn_path.split('/')
3314 for i in range(len(components)):
3315 component = components[i]
3316 this_key = this_contents = None
3317 path_so_far = _path_join(path_so_far, component)
3318 if parent_contents.has_key(component):
3319 # The component exists.
3320 this_key = parent_contents[component]
3321 if self.new_nodes.has_key(this_key):
3322 this_contents = self.new_nodes[this_key]
3323 else:
3324 # Suck the node from the nodes_db, but update the key
3325 this_contents = self.nodes_db[this_key]
3326 this_key = gen_key()
3327 self.new_nodes[this_key] = this_contents
3328 parent_contents[component] = this_key
3329 elif create:
3330 # The component does not exists, so we create it.
3331 this_contents = { }
3332 this_key = gen_key()
3333 self.new_nodes[this_key] = this_contents
3334 parent_contents[component] = this_key
3335 if i < len(components) - 1:
3336 self._invoke_delegates('mkdir', path_so_far)
3337 else:
3338 # The component does not exists and we are not instructed to
3339 # create it, so we give up.
3340 return None, None
3342 parent_key = this_key
3343 parent_contents = this_contents
3345 return this_key, this_contents
3347 def _path_exists(self, path):
3348 """If PATH exists in self.youngest of the svn repository mirror,
3349 return true, else return None.
3351 PATH must not start with '/'."""
3352 return self._open_readonly_node(path, self.youngest) is not None
3354 def _fast_delete_path(self, parent_path, parent_contents, component):
3355 """Delete COMPONENT from the parent direcory PARENT_PATH with the
3356 contents PARENT_CONTENTS. Do nothing if COMPONENT does not exist
3357 in PARENT_CONTENTS."""
3358 if parent_contents.has_key(component):
3359 del parent_contents[component]
3360 self._invoke_delegates('delete_path',
3361 _path_join(parent_path, component))
3363 def _delete_path(self, svn_path, should_prune=False):
3364 """Delete PATH from the tree. If SHOULD_PRUNE is true, then delete
3365 all ancestor directories that are made empty when SVN_PATH is deleted.
3366 In other words, SHOULD_PRUNE is like the -P option to 'cvs checkout'.
3368 NOTE: This function ignores requests to delete the root directory
3369 or any directory for which Ctx().project.is_unremovable() returns
3370 True, either directly or by pruning."""
3372 if svn_path == '' or Ctx().project.is_unremovable(svn_path):
3373 return
3375 (parent_path, entry,) = _path_split(svn_path)
3376 if parent_path:
3377 parent_key, parent_contents = \
3378 self._open_writable_node(parent_path, False)
3379 else:
3380 parent_key, parent_contents = self._open_writable_root_node()
3382 if parent_key is not None:
3383 self._fast_delete_path(parent_path, parent_contents, entry)
3384 # The following recursion makes pruning an O(n^2) operation in the
3385 # worst case (where n is the depth of SVN_PATH), but the worst case
3386 # is probably rare, and the constant cost is pretty low. Another
3387 # drawback is that we issue a delete for each path and not just
3388 # a single delete for the topmost directory pruned.
3389 if should_prune and len(parent_contents) == 0:
3390 self._delete_path(parent_path, True)
3392 def _mkdir(self, path):
3393 """Create PATH in the repository mirror at the youngest revision."""
3394 self._open_writable_node(path, True)
3395 self._invoke_delegates('mkdir', path)
3397 def _change_path(self, cvs_rev):
3398 """Register a change in self.youngest for the CVS_REV's svn_path
3399 in the repository mirror."""
3400 # We do not have to update the nodes because our mirror is only
3401 # concerned with the presence or absence of paths, and a file
3402 # content change does not cause any path changes.
3403 self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, 0))
3405 def _add_path(self, cvs_rev):
3406 """Add the CVS_REV's svn_path to the repository mirror."""
3407 self._open_writable_node(cvs_rev.svn_path, True)
3408 self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, 1))
3410 def _copy_path(self, src_path, dest_path, src_revnum):
3411 """Copy SRC_PATH at subversion revision number SRC_REVNUM to
3412 DEST_PATH. In the youngest revision of the repository, DEST_PATH's
3413 parent *must* exist, but DEST_PATH *cannot* exist.
3415 Return the node key and the contents of the new node at DEST_PATH
3416 as a dictionary."""
3417 # get the contents of the node of our src_path
3418 src_key = self._open_readonly_node(src_path, src_revnum)
3419 src_contents = self._get_node(src_key)
3421 # Get the parent path and the base path of the dest_path
3422 (dest_parent, dest_basename,) = _path_split(dest_path)
3423 dest_parent_key, dest_parent_contents = \
3424 self._open_writable_node(dest_parent, False)
3426 if dest_parent_contents.has_key(dest_basename):
3427 msg = "Attempt to add path '%s' to repository mirror " % dest_path
3428 msg = msg + "when it already exists in the mirror."
3429 raise self.SVNRepositoryMirrorPathExistsError, msg
3431 dest_parent_contents[dest_basename] = src_key
3432 self._invoke_delegates('copy_path', src_path, dest_path, src_revnum)
3434 # Yes sir, src_key and src_contents are also the contents of the
3435 # destination. This is a cheap copy, remember! :-)
3436 return src_key, src_contents
3438 def _fill_symbolic_name(self, svn_commit):
3439 """Performs all copies necessary to create as much of the the tag
3440 or branch SVN_COMMIT.symbolic_name as possible given the current
3441 revision of the repository mirror.
3443 The symbolic name is guaranteed to exist in the Subversion
3444 repository by the end of this call, even if there are no paths
3445 under it."""
3446 symbol_fill = self.symbolings_reader.filling_guide_for_symbol(
3447 svn_commit.symbolic_name, self.youngest)
3448 # Get the list of sources for the symbolic name.
3449 sources = symbol_fill.get_sources()
3451 if sources:
3452 if self.tags_db.has_key(svn_commit.symbolic_name):
3453 dest_prefix = Ctx().project.get_tag_path(svn_commit.symbolic_name)
3454 else:
3455 dest_prefix = Ctx().project.get_branch_path(svn_commit.symbolic_name)
3457 dest_key = self._open_writable_node(dest_prefix, False)[0]
3458 self._fill(symbol_fill, dest_prefix, dest_key, sources)
3459 else:
3460 # We can only get here for a branch whose first commit is an add
3461 # (as opposed to a copy).
3462 dest_path = Ctx().project.get_branch_path(symbol_fill.name)
3463 if not self._path_exists(dest_path):
3464 # If our symbol_fill was empty, that means that our first
3465 # commit on the branch was to a file added on the branch, and
3466 # that this is our first fill of that branch.
3468 # This case is covered by test 16.
3470 # ...we create the branch by copying trunk from the our
3471 # current revision number minus 1
3472 source_path = Ctx().project.trunk_path
3473 entries = self._copy_path(source_path, dest_path,
3474 svn_commit.revnum - 1)[1]
3475 # Now since we've just copied trunk to a branch that's
3476 # *supposed* to be empty, we delete any entries in the
3477 # copied directory.
3478 for entry in entries.keys():
3479 del_path = dest_path + '/' + entry
3480 # Delete but don't prune.
3481 self._delete_path(del_path)
3482 else:
3483 msg = "Error filling branch '" \
3484 + _clean_symbolic_name(symbol_fill.name) + "'.\n"
3485 msg = msg + "Received an empty SymbolicNameFillingGuide and\n"
3486 msg = msg + "attempted to create a branch that already exists."
3487 raise self.SVNRepositoryMirrorInvalidFillOperationError, msg
3489 def _fill(self, symbol_fill, dest_prefix, dest_key, sources,
3490 path = None, parent_source_prefix = None,
3491 preferred_revnum = None, prune_ok = None):
3492 """Fill the tag or branch at DEST_PREFIX + PATH with items from
3493 SOURCES, and recurse into the child items.
3495 DEST_PREFIX is the prefix of the destination directory, e.g.
3496 '/tags/my_tag' or '/branches/my_branch', and SOURCES is a list of
3497 FillSource classes that are candidates to be copied to the
3498 destination. DEST_KEY is the key in self.nodes_db to the
3499 destination, or None if the destination does not yet exist.
3501 PATH is the path relative to DEST_PREFIX. If PATH is None, we
3502 are at the top level, e.g. '/tags/my_tag'.
3504 PARENT_SOURCE_PREFIX is the source prefix that was used to copy
3505 the parent directory, and PREFERRED_REVNUM is an int which is the
3506 source revision number that the caller (who may have copied KEY's
3507 parent) used to perform its copy. If PREFERRED_REVNUM is None,
3508 then no revision is preferable to any other (which probably means
3509 that no copies have happened yet).
3511 PRUNE_OK means that a copy has been made in this recursion, and
3512 it's safe to prune directories that are not in
3513 SYMBOL_FILL._node_tree, provided that said directory has a source
3514 prefix of one of the PARENT_SOURCE_PREFIX.
3516 PATH, PARENT_SOURCE_PREFIX, PRUNE_OK, and PREFERRED_REVNUM
3517 should only be passed in by recursive calls."""
3518 # Calculate scores and revnums for all sources
3519 for source in sources:
3520 src_revnum, score = symbol_fill.get_best_revnum(source.node,
3521 preferred_revnum)
3522 source.set_score(score, src_revnum)
3524 # Sort the sources in descending score order so that we will make
3525 # a eventual copy from the source with the highest score.
3526 sources.sort()
3527 copy_source = sources[0]
3529 src_path = _path_join(copy_source.prefix, path)
3530 dest_path = _path_join(dest_prefix, path)
3532 # Figure out if we shall copy to this destination and delete any
3533 # destination path that is in the way.
3534 do_copy = 0
3535 if dest_key is None:
3536 do_copy = 1
3537 elif prune_ok and (parent_source_prefix != copy_source.prefix or
3538 copy_source.revnum != preferred_revnum):
3539 # We are about to replace the destination, so we need to remove
3540 # it before we perform the copy.
3541 self._delete_path(dest_path)
3542 do_copy = 1
3544 if do_copy:
3545 dest_key, dest_entries = self._copy_path(src_path, dest_path,
3546 copy_source.revnum)
3547 prune_ok = 1
3548 else:
3549 dest_entries = self._get_node(dest_key)
3551 # Create the SRC_ENTRIES hash from SOURCES. The keys are path
3552 # elements and the values are lists of FillSource classes where
3553 # this path element exists.
3554 src_entries = {}
3555 for source in sources:
3556 if isinstance(source.node, SvnRevisionRange):
3557 continue
3558 for entry, node in source.node.items():
3559 if not src_entries.has_key(entry):
3560 src_entries[entry] = []
3561 src_entries[entry].append(FillSource(source.prefix, node))
3563 if prune_ok:
3564 # Delete the entries in DEST_ENTRIES that are not in src_entries.
3565 delete_list = [ ]
3566 for entry in dest_entries.keys():
3567 if not src_entries.has_key(entry):
3568 delete_list.append(entry)
3569 if delete_list:
3570 if not self.new_nodes.has_key(dest_key):
3571 dest_key, dest_entries = self._open_writable_node(dest_path, True)
3572 # Sort the delete list to get "diffable" dumpfiles.
3573 delete_list.sort()
3574 for entry in delete_list:
3575 self._fast_delete_path(dest_path, dest_entries, entry)
3577 # Recurse into the SRC_ENTRIES keys sorted in alphabetical order.
3578 src_keys = src_entries.keys()
3579 src_keys.sort()
3580 for src_key in src_keys:
3581 next_dest_key = dest_entries.get(src_key, None)
3582 self._fill(symbol_fill, dest_prefix, next_dest_key,
3583 src_entries[src_key], _path_join(path, src_key),
3584 copy_source.prefix, sources[0].revnum, prune_ok)
3586 def _synchronize_default_branch(self, svn_commit):
3587 """Propagate any changes that happened on a non-trunk default
3588 branch to the trunk of the repository. See
3589 CVSCommit._post_commit() for details on why this is necessary."""
3590 for cvs_rev in svn_commit.cvs_revs:
3591 svn_trunk_path = Ctx().project.make_path(cvs_rev.cvs_path)
3592 if cvs_rev.op == OP_ADD or cvs_rev.op == OP_CHANGE:
3593 if self._path_exists(svn_trunk_path):
3594 # Delete the path on trunk...
3595 self._delete_path(svn_trunk_path)
3596 # ...and copy over from branch
3597 self._copy_path(cvs_rev.svn_path, svn_trunk_path,
3598 svn_commit.motivating_revnum)
3599 elif cvs_rev.op == OP_DELETE:
3600 # delete trunk path
3601 self._delete_path(svn_trunk_path)
3602 else:
3603 msg = ("Unknown CVSRevision operation '%s' in default branch sync."
3604 % cvs_rev.op)
3605 raise self.SVNRepositoryMirrorUnexpectedOperationError, msg
3607 def commit(self, svn_commit):
3608 """Add an SVNCommit to the SVNRepository, incrementing the
3609 Repository revision number, and changing the repository. Invoke
3610 the delegates' _start_commit() method."""
3612 if svn_commit.revnum == 2:
3613 self._initialize_repository(svn_commit.get_date())
3615 self._start_commit(svn_commit)
3617 if svn_commit.symbolic_name:
3618 Log().write(LOG_VERBOSE, "Filling symbolic name:",
3619 _clean_symbolic_name(svn_commit.symbolic_name))
3620 self._fill_symbolic_name(svn_commit)
3621 elif svn_commit.motivating_revnum:
3622 Log().write(LOG_VERBOSE, "Synchronizing default_branch motivated by %d"
3623 % svn_commit.motivating_revnum)
3624 self._synchronize_default_branch(svn_commit)
3625 else: # This actually commits CVSRevisions
3626 if len(svn_commit.cvs_revs) > 1: plural = "s"
3627 else: plural = ""
3628 Log().write(LOG_VERBOSE, "Committing %d CVSRevision%s"
3629 % (len(svn_commit.cvs_revs), plural))
3630 for cvs_rev in svn_commit.cvs_revs:
3631 # See comment in CVSCommit._commit() for what this is all
3632 # about. Note that although asking self._path_exists() is
3633 # somewhat expensive, we only do it if the first two (cheap)
3634 # tests succeed first.
3635 if not ((cvs_rev.deltatext_code == DELTATEXT_EMPTY)
3636 and (cvs_rev.rev == "1.1.1.1")
3637 and self._path_exists(cvs_rev.svn_path)):
3638 if cvs_rev.op == OP_ADD:
3639 self._add_path(cvs_rev)
3640 elif cvs_rev.op == OP_CHANGE:
3641 # Fix for Issue #74:
3643 # Here's the scenario. You have file FOO that is imported
3644 # on a non-trunk vendor branch. So in r1.1 and r1.1.1.1,
3645 # the file exists.
3647 # Moving forward in time, FOO is deleted on the default
3648 # branch (r1.1.1.2). cvs2svn determines that this delete
3649 # also needs to happen on trunk, so FOO is deleted on
3650 # trunk.
3652 # Along come r1.2, whose op is OP_CHANGE (because r1.1 is
3653 # not 'dead', we assume it's a change). However, since
3654 # our trunk file has been deleted, svnadmin blows up--you
3655 # can't change a file that doesn't exist!
3657 # Soooo... we just check the path, and if it doesn't
3658 # exist, we do an add... if the path does exist, it's
3659 # business as usual.
3660 if not self._path_exists(cvs_rev.svn_path):
3661 self._add_path(cvs_rev)
3662 else:
3663 self._change_path(cvs_rev)
3665 if cvs_rev.op == OP_DELETE:
3666 self._delete_path(cvs_rev.svn_path, Ctx().prune)
3668 def cleanup(self):
3669 """Callback for the Cleanup.register in self.__init__."""
3670 self.revs_db = None
3671 self.nodes_db = None
3673 def add_delegate(self, delegate):
3674 """Adds DELEGATE to self.delegates.
3676 For every delegate you add, as soon as SVNRepositoryMirror
3677 performs a repository action method, SVNRepositoryMirror will call
3678 the delegate's corresponding repository action method. Multiple
3679 delegates will be called in the order that they are added. See
3680 SVNRepositoryMirrorDelegate for more information."""
3681 self.delegates.append(delegate)
3683 def _invoke_delegates(self, method, *args):
3684 """Iterate through each of our delegates, in the order that they
3685 were added, and call the delegate's method named METHOD with the
3686 arguments in ARGS."""
3687 for delegate in self.delegates:
3688 getattr(delegate, method)(*args)
3690 def finish(self):
3691 """Calls the delegate finish method."""
3692 self._end_commit()
3693 self._invoke_delegates('finish')
3694 self.cleanup()
3697 class SVNCommitItem:
3698 """A wrapper class for CVSRevision objects upon which
3699 Subversion-related data (such as properties) may be hung."""
3701 def __init__(self, c_rev, make_svn_props):
3702 self.c_rev = c_rev
3703 self.set_cvs_revnum_properties = Ctx().cvs_revnums
3704 self.eol_from_mime_type = Ctx().eol_from_mime_type
3705 self.no_default_eol = Ctx().no_default_eol
3706 self.keywords_off = Ctx().keywords_off
3707 self.mime_mapper = Ctx().mime_mapper
3709 # We begin with only a "CVS revision" property.
3710 self.svn_props = { }
3711 if self.set_cvs_revnum_properties:
3712 self.svn_props['cvs2svn:cvs-rev'] = c_rev.rev
3713 make_svn_props = True
3715 # Set mime-type and eol. These two properties are intertwingled;
3716 # follow the conditionals carefully. See also issue #39.
3717 mime_type = None
3718 eol_style = None
3719 keywords = None
3721 if self.mime_mapper:
3722 mime_type = self.mime_mapper.get_type_from_filename(c_rev.cvs_path)
3724 if not c_rev.mode == 'b':
3725 if not self.no_default_eol:
3726 eol_style = 'native'
3727 elif mime_type and self.eol_from_mime_type:
3728 if mime_type.startswith("text/"):
3729 eol_style = 'native'
3730 else:
3731 eol_style = None
3732 elif mime_type is None:
3733 # file is kb, and no other mimetype specified
3734 mime_type = 'application/octet-stream'
3736 # Set the svn:keywords property, if appropriate. See issue #2.
3737 if not self.keywords_off and (c_rev.mode is None or c_rev.mode == 'kv' or
3738 c_rev.mode == 'kvl'):
3739 keywords = SVN_KEYWORDS_VALUE
3741 # Remember if we need to filter the EOLs. We can't use self.svn_props
3742 # becase they are only set on the first revision and we need to filter
3743 # all revisions.
3744 self.needs_eol_filter = eol_style == 'native'
3746 # Remember if this file has svn:keywords set
3747 self.has_keywords = keywords is not None
3749 # If asked to fill in the Subversion properties ('svn:' ones), do so.
3750 if make_svn_props:
3751 # Tack on the executableness, if any.
3752 if c_rev.file_executable:
3753 self.svn_props['svn:executable'] = '*'
3755 # Set the svn:keywords property, if appropriate. See issue #2.
3756 if keywords:
3757 self.svn_props['svn:keywords'] = SVN_KEYWORDS_VALUE
3759 if mime_type:
3760 self.svn_props['svn:mime-type'] = mime_type
3762 if eol_style:
3763 self.svn_props['svn:eol-style'] = eol_style
3766 class SVNRepositoryMirrorDelegate:
3767 """Abstract superclass for any delegate to SVNRepositoryMirror.
3768 Subclasses must implement all of the methods below.
3770 For each method, a subclass implements, in its own way, the
3771 Subversion operation implied by the method's name. For example, for
3772 the add_path method, the DumpfileDelegate would write out a
3773 "Node-add:" command to a Subversion dumpfile, the StdoutDelegate
3774 would merely print that the path is being added to the repository,
3775 and the RepositoryDelegate would actually cause the path to be added
3776 to the Subversion repository that it is creating.
3779 def start_commit(self, svn_commit):
3780 """Perform any actions needed to start SVNCommit SVN_COMMIT;
3781 see subclass implementation for details."""
3782 raise NotImplementedError
3784 def mkdir(self, path):
3785 """PATH is a string; see subclass implementation for details."""
3786 raise NotImplementedError
3788 def add_path(self, s_item):
3789 """S_ITEM is an SVNCommitItem; see subclass implementation for
3790 details."""
3791 raise NotImplementedError
3793 def change_path(self, s_item):
3794 """S_ITEM is an SVNCommitItem; see subclass implementation for
3795 details."""
3796 raise NotImplementedError
3798 def delete_path(self, path):
3799 """PATH is a string; see subclass implementation for
3800 details."""
3801 raise NotImplementedError
3803 def copy_path(self, src_path, dest_path, src_revnum):
3804 """SRC_PATH and DEST_PATH are both strings, and SRC_REVNUM is a
3805 subversion revision number (int); see subclass implementation for
3806 details."""
3807 raise NotImplementedError
3809 def finish(self):
3810 """Perform any cleanup necessary after all revisions have been
3811 committed."""
3812 raise NotImplementedError
3815 class DumpfileDelegate(SVNRepositoryMirrorDelegate):
3816 """Create a Subversion dumpfile."""
3818 def __init__(self, dumpfile_path=None):
3819 """Return a new DumpfileDelegate instance, attached to a dumpfile
3820 DUMPFILE_PATH (Ctx().dumpfile, if None), using Ctx().encoding.
3822 If Ctx().cvs_revnums is true, then set the 'cvs2svn:cvs-revnum'
3823 property on files, when they are changed due to a corresponding
3824 CVS revision.
3826 If Ctx().mime_mapper is not None, then it is a MimeMapper
3827 instance, used to determine whether or not to set the
3828 'svn:mime-type' property on files. But even if Ctx().mime_mapper
3829 is None, files marked with the CVS 'kb' flag will receive a mime
3830 type of "application/octet-stream".
3832 Unless Ctx().no_default_eol is true, set 'svn:eol-style' to
3833 'native' for files not marked with the CVS 'kb' flag, except as
3834 superseded by Ctx().eol_from_mime_type (see below).
3836 If Ctx().eol_from_mime_type is not None, then set 'svn:eol-style'
3837 to 'native' for all files to which Ctx().mime_mapper assigns a
3838 mime type beginning with "text/", and don't set 'svn:eol-style'
3839 for files assigned a type not beginning with "text/".
3841 if dumpfile_path:
3842 self.dumpfile_path = dumpfile_path
3843 else:
3844 self.dumpfile_path = Ctx().dumpfile
3846 self.dumpfile = open(self.dumpfile_path, 'wb')
3847 self._write_dumpfile_header(self.dumpfile)
3849 def _write_dumpfile_header(self, dumpfile):
3850 # Initialize the dumpfile with the standard headers.
3852 # Since the CVS repository doesn't have a UUID, and the Subversion
3853 # repository will be created with one anyway, we don't specify a
3854 # UUID in the dumpflie
3855 dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
3857 def _utf8_path(self, path):
3858 """Return a copy of PATH encoded in UTF-8."""
3859 pieces = string.split(path, '/')
3860 # Convert each path component separately (as they may each use
3861 # different encodings).
3862 for i in range(len(pieces)):
3863 try:
3864 # Log messages can be converted with the 'replace' strategy,
3865 # but we can't afford any lossiness here.
3866 pieces[i] = to_utf8(pieces[i], 'strict')
3867 except UnicodeError:
3868 raise FatalError(
3869 "Unable to convert a path '%s' to internal encoding.\n"
3870 "Consider rerunning with (for example) '--encoding=latin1'."
3871 % (path,))
3872 return string.join(pieces, '/')
3874 def start_commit(self, svn_commit):
3875 """Emit the start of SVN_COMMIT (an SVNCommit)."""
3877 self.revision = svn_commit.revnum
3879 # The start of a new commit typically looks like this:
3881 # Revision-number: 1
3882 # Prop-content-length: 129
3883 # Content-length: 129
3885 # K 7
3886 # svn:log
3887 # V 27
3888 # Log message for revision 1.
3889 # K 10
3890 # svn:author
3891 # V 7
3892 # jrandom
3893 # K 8
3894 # svn:date
3895 # V 27
3896 # 2003-04-22T22:57:58.132837Z
3897 # PROPS-END
3899 # Notice that the length headers count everything -- not just the
3900 # length of the data but also the lengths of the lengths, including
3901 # the 'K ' or 'V ' prefixes.
3903 # The reason there are both Prop-content-length and Content-length
3904 # is that the former includes just props, while the latter includes
3905 # everything. That's the generic header form for any entity in a
3906 # dumpfile. But since revisions only have props, the two lengths
3907 # are always the same for revisions.
3909 # Calculate the total length of the props section.
3910 props = svn_commit.get_revprops()
3911 prop_names = props.keys()
3912 prop_names.sort()
3913 total_len = 10 # len('PROPS-END\n')
3914 for propname in prop_names:
3915 if props[propname] is None:
3916 continue
3917 klen = len(propname)
3918 klen_len = len('K %d' % klen)
3919 vlen = len(props[propname])
3920 vlen_len = len('V %d' % vlen)
3921 # + 4 for the four newlines within a given property's section
3922 total_len = total_len + klen + klen_len + vlen + vlen_len + 4
3924 # Print the revision header and props
3925 self.dumpfile.write('Revision-number: %d\n'
3926 'Prop-content-length: %d\n'
3927 'Content-length: %d\n'
3928 '\n'
3929 % (self.revision, total_len, total_len))
3931 for propname in prop_names:
3932 if props[propname] is None:
3933 continue
3934 self.dumpfile.write('K %d\n'
3935 '%s\n'
3936 'V %d\n'
3937 '%s\n' % (len(propname),
3938 propname,
3939 len(props[propname]),
3940 props[propname]))
3942 self.dumpfile.write('PROPS-END\n')
3943 self.dumpfile.write('\n')
3945 def mkdir(self, path):
3946 """Emit the creation of directory PATH."""
3947 self.dumpfile.write("Node-path: %s\n"
3948 "Node-kind: dir\n"
3949 "Node-action: add\n"
3950 "\n"
3951 "\n" % self._utf8_path(path))
3953 def _add_or_change_path(self, s_item, op):
3954 """Emit the addition or change corresponding to S_ITEM.
3955 OP is either the constant OP_ADD or OP_CHANGE."""
3957 # Validation stuffs
3958 if op == OP_ADD:
3959 action = 'add'
3960 elif op == OP_CHANGE:
3961 action = 'change'
3962 else:
3963 raise FatalError("_add_or_change_path() called with bad op ('%s')"
3964 % (op,))
3966 # Convenience variables
3967 c_rev = s_item.c_rev
3968 svn_props = s_item.svn_props
3970 # The property handling here takes advantage of an undocumented
3971 # but IMHO consistent feature of the Subversion dumpfile-loading
3972 # code. When a node's properties aren't mentioned (that is, the
3973 # "Prop-content-length:" header is absent, no properties are
3974 # listed at all, and there is no "PROPS-END\n" line) then no
3975 # change is made to the node's properties.
3977 # This is consistent with the way dumpfiles behave w.r.t. text
3978 # content changes, so I'm comfortable relying on it. If you
3979 # commit a change to *just* the properties of some node that
3980 # already has text contents from a previous revision, then in the
3981 # dumpfile output for the prop change, no "Text-content-length:"
3982 # nor "Text-content-md5:" header will be present, and the text of
3983 # the file will not be given. But this does not cause the file's
3984 # text to be erased! It simply remains unchanged.
3986 # This works out great for cvs2svn, due to lucky coincidences:
3988 # For files, the only properties we ever set are set in the first
3989 # revision; all other revisions (including on branches) inherit
3990 # from that. After the first revision, we never change file
3991 # properties, therefore, there is no need to remember the full set
3992 # of properties on a given file once we've set it.
3994 # For directories, the only property we set is "svn:ignore", and
3995 # while we may change it after the first revision, we always do so
3996 # based on the contents of a ".cvsignore" file -- in other words,
3997 # CVS is doing the remembering for us, so we still don't have to
3998 # preserve the previous value of the property ourselves.
4000 # Calculate the (sorted-by-name) property string and length, if any.
4001 prop_contents = ''
4002 prop_names = svn_props.keys()
4003 prop_names.sort()
4004 for pname in prop_names:
4005 pval = svn_props[pname]
4006 prop_contents = prop_contents + \
4007 'K %d\n%s\nV %d\n%s\n' \
4008 % (len(pname), pname, len(pval), pval)
4009 if prop_contents:
4010 prop_contents = prop_contents + 'PROPS-END\n'
4011 props_len = len(prop_contents)
4012 else:
4013 props_len = 0
4015 props_header = ''
4016 if props_len:
4017 props_header = 'Prop-content-length: %d\n' % props_len
4019 # treat .cvsignore as a directory property
4020 dir_path, basename = os.path.split(c_rev.svn_path)
4021 if basename == ".cvsignore":
4022 ignore_vals = generate_ignores(c_rev)
4023 ignore_contents = '\n'.join(ignore_vals)
4024 ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
4025 (len(ignore_contents), ignore_contents))
4026 ignore_contents = ignore_contents + 'PROPS-END\n'
4027 ignore_len = len(ignore_contents)
4029 # write headers, then props
4030 self.dumpfile.write('Node-path: %s\n'
4031 'Node-kind: dir\n'
4032 'Node-action: change\n'
4033 'Prop-content-length: %d\n'
4034 'Content-length: %d\n'
4035 '\n'
4036 '%s'
4037 % (self._utf8_path(dir_path), ignore_len,
4038 ignore_len, ignore_contents))
4040 # If the file has keywords, we must prevent CVS/RCS from expanding
4041 # the keywords because they must be unexpanded in the repository,
4042 # or Subversion will get confused.
4043 pipe_cmd, pipe = Ctx().cvs_repository.get_co_pipe(
4044 c_rev, suppress_keyword_substitution=s_item.has_keywords)
4046 self.dumpfile.write('Node-path: %s\n'
4047 'Node-kind: file\n'
4048 'Node-action: %s\n'
4049 '%s' # no property header if no props
4050 'Text-content-length: '
4051 % (self._utf8_path(c_rev.svn_path),
4052 action, props_header))
4054 pos = self.dumpfile.tell()
4056 self.dumpfile.write('0000000000000000\n'
4057 'Text-content-md5: 00000000000000000000000000000000\n'
4058 'Content-length: 0000000000000000\n'
4059 '\n')
4061 if prop_contents:
4062 self.dumpfile.write(prop_contents)
4064 # Insert a filter to convert all EOLs to LFs if neccessary
4065 if s_item.needs_eol_filter:
4066 data_reader = LF_EOL_Filter(pipe.stdout)
4067 else:
4068 data_reader = pipe.stdout
4070 # Insert the rev contents, calculating length and checksum as we go.
4071 checksum = md5.new()
4072 length = 0
4073 while True:
4074 buf = data_reader.read(PIPE_READ_SIZE)
4075 if buf == '':
4076 break
4077 checksum.update(buf)
4078 length = length + len(buf)
4079 self.dumpfile.write(buf)
4081 pipe.stdout.close()
4082 error_output = pipe.stderr.read()
4083 exit_status = pipe.wait()
4084 if exit_status:
4085 raise FatalError("The command '%s' failed with exit status: %s\n"
4086 "and the following output:\n"
4087 "%s" % (pipe_cmd, exit_status, error_output))
4089 # Go back to patch up the length and checksum headers:
4090 self.dumpfile.seek(pos, 0)
4091 # We left 16 zeros for the text length; replace them with the real
4092 # length, padded on the left with spaces:
4093 self.dumpfile.write('%16d' % length)
4094 # 16... + 1 newline + len('Text-content-md5: ') == 35
4095 self.dumpfile.seek(pos + 35, 0)
4096 self.dumpfile.write(checksum.hexdigest())
4097 # 35... + 32 bytes of checksum + 1 newline + len('Content-length: ') == 84
4098 self.dumpfile.seek(pos + 84, 0)
4099 # The content length is the length of property data, text data,
4100 # and any metadata around/inside around them.
4101 self.dumpfile.write('%16d' % (length + props_len))
4102 # Jump back to the end of the stream
4103 self.dumpfile.seek(0, 2)
4105 # This record is done (write two newlines -- one to terminate
4106 # contents that weren't themselves newline-termination, one to
4107 # provide a blank line for readability.
4108 self.dumpfile.write('\n\n')
4110 def add_path(self, s_item):
4111 """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
4112 self._add_or_change_path(s_item, OP_ADD)
4114 def change_path(self, s_item):
4115 """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
4116 self._add_or_change_path(s_item, OP_CHANGE)
4118 def delete_path(self, path):
4119 """Emit the deletion of PATH."""
4120 self.dumpfile.write('Node-path: %s\n'
4121 'Node-action: delete\n'
4122 '\n' % self._utf8_path(path))
4124 def copy_path(self, src_path, dest_path, src_revnum):
4125 """Emit the copying of SRC_PATH at SRC_REV to DEST_PATH."""
4126 # We don't need to include "Node-kind:" for copies; the loader
4127 # ignores it anyway and just uses the source kind instead.
4128 self.dumpfile.write('Node-path: %s\n'
4129 'Node-action: add\n'
4130 'Node-copyfrom-rev: %d\n'
4131 'Node-copyfrom-path: /%s\n'
4132 '\n'
4133 % (self._utf8_path(dest_path),
4134 src_revnum,
4135 self._utf8_path(src_path)))
4137 def finish(self):
4138 """Perform any cleanup necessary after all revisions have been
4139 committed."""
4140 self.dumpfile.close()
4143 class RepositoryDelegate(DumpfileDelegate):
4144 """Creates a new Subversion Repository. DumpfileDelegate does all
4145 of the heavy lifting."""
4146 def __init__(self):
4147 self.svnadmin = Ctx().svnadmin
4148 self.target = Ctx().target
4149 if not Ctx().existing_svnrepos:
4150 Log().write(LOG_NORMAL,"Creating new repository '%s'" % (self.target))
4151 if not Ctx().fs_type:
4152 # User didn't say what kind repository (bdb, fsfs, etc).
4153 # We still pass --bdb-txn-nosync. It's a no-op if the default
4154 # repository type doesn't support it, but we definitely want
4155 # it if BDB is the default.
4156 run_command('%s create %s "%s"' % (self.svnadmin,
4157 "--bdb-txn-nosync",
4158 self.target))
4159 elif Ctx().fs_type == 'bdb':
4160 # User explicitly specified bdb.
4162 # Since this is a BDB repository, pass --bdb-txn-nosync,
4163 # because it gives us a 4-5x speed boost (if cvs2svn is
4164 # creating the repository, cvs2svn should be the only program
4165 # accessing the svn repository (until cvs is done, at least)).
4166 # But we'll turn no-sync off in self.finish(), unless
4167 # instructed otherwise.
4168 run_command('%s create %s %s "%s"' % (self.svnadmin,
4169 "--fs-type=bdb",
4170 "--bdb-txn-nosync",
4171 self.target))
4172 else:
4173 # User specified something other than bdb.
4174 run_command('%s create %s "%s"' % (self.svnadmin,
4175 "--fs-type=%s" % Ctx().fs_type,
4176 self.target))
4178 # Since the output of this run is a repository, not a dumpfile,
4179 # the temporary dumpfiles we create should go in the tmpdir.
4180 DumpfileDelegate.__init__(self, temp(Ctx().dumpfile))
4182 # This is 1 if a commit is in progress, otherwise None.
4183 self._commit_in_progress = None
4185 self.dumpfile = open(self.dumpfile_path, 'w+b')
4186 self.loader_pipe = SimplePopen([ self.svnadmin, 'load', '-q',
4187 self.target ], True)
4188 self.loader_pipe.stdout.close()
4189 try:
4190 self._write_dumpfile_header(self.loader_pipe.stdin)
4191 except IOError:
4192 raise FatalError("svnadmin failed with the following output while "
4193 "loading the dumpfile:\n"
4194 + self.loader_pipe.stderr.read())
4196 def _feed_pipe(self):
4197 """Feed the revision stored in the dumpfile to the svnadmin
4198 load pipe."""
4199 self.dumpfile.seek(0)
4200 while 1:
4201 data = self.dumpfile.read(128*1024) # Chunk size is arbitrary
4202 if not len(data):
4203 break
4204 try:
4205 self.loader_pipe.stdin.write(data)
4206 except IOError:
4207 raise FatalError("svnadmin failed with the following output "
4208 "while loading the dumpfile:\n"
4209 + self.loader_pipe.stderr.read())
4211 def start_commit(self, svn_commit):
4212 """Start a new commit. If a commit is already in progress, close
4213 the dumpfile, load it into the svn repository, open a new
4214 dumpfile, and write the header into it."""
4215 if self._commit_in_progress:
4216 self._feed_pipe()
4217 self.dumpfile.seek(0)
4218 self.dumpfile.truncate()
4219 DumpfileDelegate.start_commit(self, svn_commit)
4220 self._commit_in_progress = 1
4222 def finish(self):
4223 """Loads the last commit into the repository."""
4224 self._feed_pipe()
4225 self.dumpfile.close()
4226 self.loader_pipe.stdin.close()
4227 error_output = self.loader_pipe.stderr.read()
4228 exit_status = self.loader_pipe.wait()
4229 if exit_status:
4230 raise FatalError('svnadmin load failed with exit status: %s\n'
4231 'and the following output:\n'
4232 '%s' % (exit_status, error_output,))
4233 os.remove(self.dumpfile_path)
4235 # If this is a BDB repository, and we created the repository, and
4236 # --bdb-no-sync wasn't passed, then comment out the DB_TXN_NOSYNC
4237 # line in the DB_CONFIG file, because txn syncing should be on by
4238 # default in BDB repositories.
4240 # We determine if this is a BDB repository by looking for the
4241 # DB_CONFIG file, which doesn't exist in FSFS, rather than by
4242 # checking Ctx().fs_type. That way this code will Do The Right
4243 # Thing in all circumstances.
4244 db_config = os.path.join(self.target, "db/DB_CONFIG")
4245 if (not Ctx().existing_svnrepos and not Ctx().bdb_txn_nosync
4246 and os.path.exists(db_config)):
4247 no_sync = 'set_flags DB_TXN_NOSYNC\n'
4249 contents = open(db_config, 'r').readlines()
4250 index = contents.index(no_sync)
4251 contents[index] = '# ' + no_sync
4252 contents = open(db_config, 'w').writelines(contents)
4255 class StdoutDelegate(SVNRepositoryMirrorDelegate):
4256 """Makes no changes to the disk, but writes out information to
4257 STDOUT about what the SVNRepositoryMirror is doing. Of course, our
4258 print statements will state that we're doing something, when in
4259 reality, we aren't doing anything other than printing out that we're
4260 doing something. Kind of zen, really."""
4261 def __init__(self, total_revs):
4262 self.total_revs = total_revs
4264 def start_commit(self, svn_commit):
4265 """Prints out the Subversion revision number of the commit that is
4266 being started."""
4267 Log().write(LOG_VERBOSE, "=" * 60)
4268 Log().write(LOG_NORMAL, "Starting Subversion r%d / %d" %
4269 (svn_commit.revnum, self.total_revs))
4271 def mkdir(self, path):
4272 """Print a line stating that we are creating directory PATH."""
4273 Log().write(LOG_VERBOSE, " New Directory", path)
4275 def add_path(self, s_item):
4276 """Print a line stating that we are 'adding' s_item.c_rev.svn_path."""
4277 Log().write(LOG_VERBOSE, " Adding", s_item.c_rev.svn_path)
4279 def change_path(self, s_item):
4280 """Print a line stating that we are 'changing' s_item.c_rev.svn_path."""
4281 Log().write(LOG_VERBOSE, " Changing", s_item.c_rev.svn_path)
4283 def delete_path(self, path):
4284 """Print a line stating that we are 'deleting' PATH."""
4285 Log().write(LOG_VERBOSE, " Deleting", path)
4287 def copy_path(self, src_path, dest_path, src_revnum):
4288 """Print a line stating that we are 'copying' revision SRC_REVNUM
4289 of SRC_PATH to DEST_PATH."""
4290 Log().write(LOG_VERBOSE, " Copying revision", src_revnum, "of", src_path)
4291 Log().write(LOG_VERBOSE, " to", dest_path)
4293 def finish(self):
4294 """State that we are done creating our repository."""
4295 Log().write(LOG_VERBOSE, "Finished creating Subversion repository.")
4296 Log().write(LOG_QUIET, "Done.")
4298 # This should be a local to pass1,
4299 # but Python 2.0 does not support nested scopes.
4300 OS_SEP_PLUS_ATTIC = os.sep + 'Attic'
4301 def pass1():
4302 Log().write(LOG_QUIET, "Examining all CVS ',v' files...")
4303 cd = CollectData()
4305 def visit_file(baton, dirname, files):
4306 cd = baton
4307 for fname in files:
4308 if fname[-2:] != ',v':
4309 continue
4310 cd.found_valid_file = 1
4311 pathname = os.path.join(dirname, fname)
4312 if dirname[-6:] == OS_SEP_PLUS_ATTIC:
4313 # drop the 'Attic' portion from the pathname for the canonical name.
4314 cd.set_fname(os.path.join(dirname[:-6], fname), pathname)
4315 else:
4316 # If this file also exists in the attic, it's a fatal error
4317 attic_path = os.path.join(dirname, 'Attic', fname)
4318 if os.path.exists(attic_path):
4319 err = "%s: A CVS repository cannot contain both %s and %s" \
4320 % (error_prefix, pathname, attic_path)
4321 sys.stderr.write(err + '\n')
4322 cd.fatal_errors.append(err)
4323 cd.set_fname(pathname, pathname)
4324 Log().write(LOG_NORMAL, pathname)
4325 try:
4326 cvs2svn_rcsparse.parse(open(pathname, 'rb'), cd)
4327 except (cvs2svn_rcsparse.common.RCSParseError, ValueError,
4328 RuntimeError):
4329 err = "%s: '%s' is not a valid ,v file" \
4330 % (error_prefix, pathname)
4331 sys.stderr.write(err + '\n')
4332 cd.fatal_errors.append(err)
4333 except:
4334 Log().write(LOG_WARN,
4335 "Exception occurred while parsing %s" % pathname)
4336 raise
4338 os.path.walk(Ctx().cvsroot, visit_file, cd)
4339 Log().write(LOG_VERBOSE, 'Processed', cd.num_files, 'files')
4341 cd.write_symbol_db()
4343 if len(cd.fatal_errors) > 0:
4344 raise FatalException("Pass 1 complete.\n"
4345 + "=" * 75 + "\n"
4346 + "Error summary:\n"
4347 + "\n".join(cd.fatal_errors) + "\n"
4348 + "Exited due to fatal error(s).\n")
4350 if cd.found_valid_file is None:
4351 raise FatalException(
4352 "\n"
4353 "No RCS files found in your CVS Repository!\n"
4354 "Are you absolutely certain you are pointing cvs2svn\n"
4355 "at a CVS repository?\n"
4356 "\n"
4357 "Exited due to fatal error(s).\n")
4359 StatsKeeper().reset_c_rev_info()
4360 StatsKeeper().archive()
4361 Log().write(LOG_QUIET, "Done")
4363 def pass2():
4364 "Pass 2: clean up the revision information."
4366 symbol_db = SymbolDatabase()
4367 symbol_db.read()
4369 # Convert the list of regexps to a list of strings
4370 excludes = symbol_db.find_excluded_symbols(Ctx().excludes)
4372 error_detected = 0
4374 Log().write(LOG_QUIET, "Checking for blocked exclusions...")
4375 blocked_excludes = symbol_db.find_blocked_excludes(excludes)
4376 if blocked_excludes:
4377 for branch, blockers in blocked_excludes.items():
4378 sys.stderr.write(error_prefix + ": The branch '%s' cannot be "
4379 "excluded because the following symbols depend "
4380 "on it:\n" % (branch))
4381 for blocker in blockers:
4382 sys.stderr.write(" '%s'\n" % (blocker))
4383 sys.stderr.write("\n")
4384 error_detected = 1
4386 Log().write(LOG_QUIET, "Checking for forced tags with commits...")
4387 invalid_forced_tags = [ ]
4388 for forced_tag in Ctx().forced_tags:
4389 if excludes.has_key(forced_tag):
4390 continue
4391 if symbol_db.branch_has_commit(forced_tag):
4392 invalid_forced_tags.append(forced_tag)
4393 if invalid_forced_tags:
4394 sys.stderr.write(error_prefix + ": The following branches cannot be "
4395 "forced to be tags because they have commits:\n")
4396 for tag in invalid_forced_tags:
4397 sys.stderr.write(" '%s'\n" % (tag))
4398 sys.stderr.write("\n")
4399 error_detected = 1
4401 Log().write(LOG_QUIET, "Checking for tag/branch mismatches...")
4402 mismatches = symbol_db.find_mismatches(excludes)
4403 def is_not_forced(mismatch):
4404 name = mismatch[0]
4405 return not (name in Ctx().forced_tags or name in Ctx().forced_branches)
4406 mismatches = filter(is_not_forced, mismatches)
4407 if mismatches:
4408 sys.stderr.write(error_prefix + ": The following symbols are tags "
4409 "in some files and branches in others.\nUse "
4410 "--force-tag, --force-branch and/or --exclude to "
4411 "resolve the symbols.\n")
4412 for name, tag_count, branch_count, commit_count in mismatches:
4413 sys.stderr.write(" '%s' is a tag in %d files, a branch in "
4414 "%d files and has commits in %d files.\n"
4415 % (name, tag_count, branch_count, commit_count))
4416 error_detected = 1
4418 # Bail out now if we found errors
4419 if error_detected:
4420 sys.exit(1)
4422 # Create the tags database
4423 tags_db = TagsDatabase(DB_OPEN_NEW)
4424 for tag in symbol_db.tags.keys():
4425 if tag not in Ctx().forced_branches:
4426 tags_db[tag] = None
4427 for tag in Ctx().forced_tags:
4428 tags_db[tag] = None
4430 Log().write(LOG_QUIET, "Re-synchronizing CVS revision timestamps...")
4432 # We may have recorded some changes in revisions' timestamp. We need to
4433 # scan for any other files which may have had the same log message and
4434 # occurred at "the same time" and change their timestamps, too.
4436 # read the resync data file
4437 def read_resync(fname):
4438 "Read the .resync file into memory."
4440 ### note that we assume that we can hold the entire resync file in
4441 ### memory. really large repositories with whacky timestamps could
4442 ### bust this assumption. should that ever happen, then it is possible
4443 ### to split the resync file into pieces and make multiple passes,
4444 ### using each piece.
4447 # A digest maps to a sequence of lists which specify a lower and upper
4448 # time bound for matching up the commit. We keep a sequence of these
4449 # because a number of checkins with the same log message (e.g. an empty
4450 # log message) could need to be remapped. We also make them a list
4451 # because we will dynamically expand the lower/upper bound as we find
4452 # commits that fall into a particular msg and time range.
4454 # resync == digest -> [ [old_time_lower, old_time_upper, new_time], ... ]
4456 resync = { }
4458 for line in fileinput.FileInput(fname):
4459 t1 = int(line[:8], 16)
4460 digest = line[9:DIGEST_END_IDX]
4461 t2 = int(line[DIGEST_END_IDX+1:], 16)
4462 t1_l = t1 - COMMIT_THRESHOLD/2
4463 t1_u = t1 + COMMIT_THRESHOLD/2
4464 if resync.has_key(digest):
4465 resync[digest].append([t1_l, t1_u, t2])
4466 else:
4467 resync[digest] = [ [t1_l, t1_u, t2] ]
4469 # For each digest, sort the resync items in it in increasing order,
4470 # based on the lower time bound.
4471 digests = resync.keys()
4472 for digest in digests:
4473 (resync[digest]).sort()
4475 return resync
4477 resync = read_resync(temp(DATAFILE + RESYNC_SUFFIX))
4479 output = open(temp(DATAFILE + CLEAN_REVS_SUFFIX), 'w')
4480 Cleanup().register(temp(DATAFILE + CLEAN_REVS_SUFFIX), pass3)
4482 tweaked_timestamps_db = Database(temp(TWEAKED_TIMESTAMPS_DB), DB_OPEN_NEW)
4483 Cleanup().register(temp(TWEAKED_TIMESTAMPS_DB), pass2)
4485 # process the revisions file, looking for items to clean up
4486 for line in fileinput.FileInput(temp(DATAFILE + REVS_SUFFIX)):
4487 c_rev = CVSRevision(Ctx(), line[:-1])
4489 # Skip this entire revision if it's on an excluded branch
4490 if excludes.has_key(c_rev.branch_name):
4491 continue
4493 new_prev_ts = None
4494 if c_rev.prev_rev is not None:
4495 new_prev_ts = tweaked_timestamps_db.get(
4496 c_rev.unique_key(c_rev.prev_rev), None)
4497 if new_prev_ts:
4498 c_rev.prev_timestamp = new_prev_ts
4500 new_next_ts = None
4501 if c_rev.next_rev is not None:
4502 new_next_ts = tweaked_timestamps_db.get(
4503 c_rev.unique_key(c_rev.next_rev), None)
4504 if new_next_ts:
4505 c_rev.next_timestamp = new_next_ts
4507 # Remove all references to excluded tags and branches
4508 def not_excluded(symbol, excludes=excludes):
4509 return not excludes.has_key(symbol)
4510 c_rev.branches = filter(not_excluded, c_rev.branches)
4511 c_rev.tags = filter(not_excluded, c_rev.tags)
4513 # Convert all branches that are forced to be tags
4514 for forced_tag in Ctx().forced_tags:
4515 if forced_tag in c_rev.branches:
4516 c_rev.branches.remove(forced_tag)
4517 c_rev.tags.append(forced_tag)
4519 # Convert all tags that are forced to be branches
4520 for forced_branch in Ctx().forced_branches:
4521 if forced_branch in c_rev.tags:
4522 c_rev.tags.remove(forced_branch)
4523 c_rev.branches.append(forced_branch)
4525 # see if this is "near" any of the resync records we
4526 # have recorded for this digest [of the log message].
4527 for record in resync.get(c_rev.digest, []):
4528 if record[2] == c_rev.timestamp:
4529 # This means that either c_rev is the same revision that
4530 # caused the resync record to exist, or c_rev is a different
4531 # CVS revision that happens to have the same timestamp. In
4532 # either case, we don't have to do anything, so we...
4533 continue
4535 if record[0] <= c_rev.timestamp <= record[1]:
4536 # bingo! We probably want to remap the time on this c_rev,
4537 # unless the remapping would be useless because the new time
4538 # would fall outside the COMMIT_THRESHOLD window for this
4539 # commit group.
4540 new_timestamp = record[2]
4541 # If the new timestamp is earlier than that of our previous revision
4542 if new_timestamp < c_rev.prev_timestamp:
4543 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4544 + " to time %s, which is before previous the time of"
4545 + " revision %s (%s):")
4546 Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4547 c_rev.cvs_path, new_timestamp,
4548 c_rev.prev_rev, c_rev.prev_timestamp))
4549 # If resyncing our rev to c_rev.prev_timestamp + 1 will place
4550 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4551 # attempted resync time, then sync back to c_rev.prev_timestamp
4552 # + 1...
4553 if ((c_rev.prev_timestamp + 1) - new_timestamp) < COMMIT_THRESHOLD:
4554 new_timestamp = c_rev.prev_timestamp + 1
4555 Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4556 new_timestamp))
4557 else:
4558 Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4559 warning_prefix)
4560 continue
4562 # If the new timestamp is later than that of our next revision
4563 elif c_rev.next_timestamp and new_timestamp > c_rev.next_timestamp:
4564 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4565 + " to time %s, which is after time of next"
4566 + " revision %s (%s):")
4567 Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4568 c_rev.cvs_path, new_timestamp,
4569 c_rev.prev_rev, c_rev.next_timestamp))
4570 # If resyncing our rev to c_rev.next_timestamp - 1 will place
4571 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4572 # attempted resync time, then sync forward to c_rev.next_timestamp
4573 # - 1...
4574 if (new_timestamp - (c_rev.next_timestamp - 1)) < COMMIT_THRESHOLD:
4575 new_timestamp = c_rev.next_timestamp - 1
4576 Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4577 new_timestamp))
4578 else:
4579 Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4580 warning_prefix)
4581 continue
4583 # Fix for Issue #71: Avoid resyncing two consecutive revisions
4584 # to the same timestamp.
4585 elif (new_timestamp == c_rev.prev_timestamp
4586 or new_timestamp == c_rev.next_timestamp):
4587 continue
4589 # adjust the time range. we want the COMMIT_THRESHOLD from the
4590 # bounds of the earlier/latest commit in this group.
4591 record[0] = min(record[0], c_rev.timestamp - COMMIT_THRESHOLD/2)
4592 record[1] = max(record[1], c_rev.timestamp + COMMIT_THRESHOLD/2)
4594 msg = "PASS2 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
4595 % (c_rev.cvs_path, c_rev.rev, time.ctime(c_rev.timestamp),
4596 new_timestamp - c_rev.timestamp)
4597 Log().write(LOG_VERBOSE, msg)
4599 c_rev.timestamp = new_timestamp
4600 tweaked_timestamps_db[c_rev.unique_key()] = new_timestamp
4602 # stop looking for hits
4603 break
4605 output.write(str(c_rev) + "\n")
4606 Log().write(LOG_QUIET, "Done")
4608 def pass3():
4609 Log().write(LOG_QUIET, "Sorting CVS revisions...")
4610 sort_file(temp(DATAFILE + CLEAN_REVS_SUFFIX),
4611 temp(DATAFILE + SORTED_REVS_SUFFIX))
4612 Cleanup().register(temp(DATAFILE + SORTED_REVS_SUFFIX), pass5)
4613 Log().write(LOG_QUIET, "Done")
4615 def pass4():
4616 """Iterate through sorted revs, storing them in a database.
4617 If we're not doing a trunk-only conversion, generate the
4618 LastSymbolicNameDatabase, which contains the last CVSRevision
4619 that is a source for each tag or branch.
4621 Log().write(LOG_QUIET,
4622 "Copying CVS revision data from flat file to database...")
4623 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_NEW)
4624 if not Ctx().trunk_only:
4625 Log().write(LOG_QUIET,
4626 "Finding last CVS revisions for all symbolic names...")
4627 last_sym_name_db = LastSymbolicNameDatabase(DB_OPEN_NEW)
4628 else:
4629 # This is to avoid testing Ctx().trunk_only every time around the loop
4630 class DummyLSNDB:
4631 def noop(*args): pass
4632 log_revision = noop
4633 create_database = noop
4634 last_sym_name_db = DummyLSNDB()
4636 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4637 c_rev = CVSRevision(Ctx(), line[:-1])
4638 cvs_revs_db.log_revision(c_rev)
4639 last_sym_name_db.log_revision(c_rev)
4640 StatsKeeper().record_c_rev(c_rev)
4642 last_sym_name_db.create_database()
4643 StatsKeeper().archive()
4644 Log().write(LOG_QUIET, "Done")
4646 def pass5():
4648 Generate the SVNCommit <-> CVSRevision mapping
4649 databases. CVSCommit._commit also calls SymbolingsLogger to register
4650 CVSRevisions that represent an opening or closing for a path on a
4651 branch or tag. See SymbolingsLogger for more details.
4653 Log().write(LOG_QUIET, "Mapping CVS revisions to Subversion commits...")
4655 aggregator = CVSRevisionAggregator()
4656 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4657 c_rev = CVSRevision(Ctx(), line[:-1])
4658 if not (Ctx().trunk_only and c_rev.branch_name is not None):
4659 aggregator.process_revision(c_rev)
4660 aggregator.flush()
4662 StatsKeeper().set_svn_rev_count(SVNCommit.revnum - 1)
4663 StatsKeeper().archive()
4664 Log().write(LOG_QUIET, "Done")
4666 def pass6():
4667 Log().write(LOG_QUIET, "Sorting symbolic name source revisions...")
4669 if not Ctx().trunk_only:
4670 sort_file(temp(SYMBOL_OPENINGS_CLOSINGS),
4671 temp(SYMBOL_OPENINGS_CLOSINGS_SORTED))
4672 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), pass8)
4673 Log().write(LOG_QUIET, "Done")
4675 def pass7():
4676 Log().write(LOG_QUIET, "Determining offsets for all symbolic names...")
4678 def generate_offsets_for_symbolings():
4679 """This function iterates through all the lines in
4680 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
4681 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
4682 where SYMBOLIC_NAME is first encountered. This will allow us to
4683 seek to the various offsets in the file and sequentially read only
4684 the openings and closings that we need."""
4686 ###PERF This is a fine example of a db that can be in-memory and
4687 #just flushed to disk when we're done. Later, it can just be sucked
4688 #back into memory.
4689 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_NEW)
4690 Cleanup().register(temp(SYMBOL_OFFSETS_DB), pass8)
4692 file = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
4693 old_sym = ""
4694 while 1:
4695 fpos = file.tell()
4696 line = file.readline()
4697 if not line:
4698 break
4699 sym, svn_revnum, cvs_rev_key = line.split(" ", 2)
4700 if not sym == old_sym:
4701 Log().write(LOG_VERBOSE, " ", sym)
4702 old_sym = sym
4703 offsets_db[sym] = fpos
4705 if not Ctx().trunk_only:
4706 generate_offsets_for_symbolings()
4707 Log().write(LOG_QUIET, "Done.")
4709 def pass8():
4710 svncounter = 2 # Repository initialization is 1.
4711 repos = SVNRepositoryMirror()
4712 persistence_manager = PersistenceManager(DB_OPEN_READ)
4714 if Ctx().target:
4715 if not Ctx().dry_run:
4716 repos.add_delegate(RepositoryDelegate())
4717 Log().write(LOG_QUIET, "Starting Subversion Repository.")
4718 else:
4719 if not Ctx().dry_run:
4720 repos.add_delegate(DumpfileDelegate())
4721 Log().write(LOG_QUIET, "Starting Subversion Dumpfile.")
4723 repos.add_delegate(StdoutDelegate(StatsKeeper().svn_rev_count()))
4725 while 1:
4726 svn_commit = persistence_manager.get_svn_commit(svncounter)
4727 if not svn_commit:
4728 break
4729 repos.commit(svn_commit)
4730 svncounter += 1
4732 repos.finish()
4734 _passes = [
4735 pass1,
4736 pass2,
4737 pass3,
4738 pass4,
4739 pass5,
4740 pass6,
4741 pass7,
4742 pass8,
4746 class Ctx:
4747 """Session state for this run of cvs2svn. For example, run-time
4748 options are stored here. This class is a Borg, see
4749 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531.
4751 __shared_state = { }
4752 def __init__(self):
4753 self.__dict__ = self.__shared_state
4754 if self.__dict__:
4755 return
4756 # Else, initialize to defaults.
4757 self.cvsroot = None
4758 self.target = None
4759 self.dumpfile = DUMPFILE
4760 self.tmpdir = '.'
4761 self.verbose = 0
4762 self.quiet = 0
4763 self.prune = 1
4764 self.existing_svnrepos = 0
4765 self.dump_only = 0
4766 self.dry_run = 0
4767 self.trunk_only = 0
4768 self.trunk_base = "trunk"
4769 self.tags_base = "tags"
4770 self.branches_base = "branches"
4771 self.encoding = ["ascii"]
4772 self.mime_types_file = None
4773 self.mime_mapper = None
4774 self.no_default_eol = 0
4775 self.eol_from_mime_type = 0
4776 self.keywords_off = 0
4777 self.use_cvs = None
4778 self.svnadmin = "svnadmin"
4779 self.username = None
4780 self.print_help = 0
4781 self.skip_cleanup = 0
4782 self.cvs_revnums = 0
4783 self.bdb_txn_nosync = 0
4784 self.fs_type = None
4785 self.forced_branches = []
4786 self.forced_tags = []
4787 self.excludes = []
4788 self.symbol_transforms = []
4790 class MimeMapper:
4791 """A class that provides mappings from file names to MIME types.
4792 Note that we should really be using Python's 'mimetypes' module.
4793 See http://cvs2svn.tigris.org/servlets/ReadMsg?list=dev&msgNo=266
4794 for more."""
4796 def __init__(self):
4797 self.mappings = { }
4799 def set_mime_types_file(self, mime_types_file):
4800 for line in fileinput.input(mime_types_file):
4801 if line.startswith("#"):
4802 continue
4804 # format of a line is something like
4805 # text/plain c h cpp
4806 extensions = line.split()
4807 if len(extensions) < 2:
4808 continue
4809 type = extensions.pop(0)
4810 for ext in extensions:
4811 if self.mappings.has_key(ext) and self.mappings[ext] != type:
4812 sys.stderr.write("%s: ambiguous MIME mapping for *.%s (%s or %s)\n"
4813 % (warning_prefix, ext, self.mappings[ext], type))
4814 self.mappings[ext] = type
4817 def get_type_from_filename(self, filename):
4818 basename, extension = os.path.splitext(os.path.basename(filename))
4820 # Extension includes the dot, so strip it (will leave extension
4821 # empty if filename ends with a dot, which is ok):
4822 extension = extension[1:]
4824 # If there is no extension (or the file ends with a period), use
4825 # the base name for mapping. This allows us to set mappings for
4826 # files such as README or Makefile:
4827 if not extension:
4828 extension = basename
4829 return self.mappings.get(extension, None)
4832 def convert(start_pass, end_pass):
4833 "Convert a CVS repository to an SVN repository."
4835 cleanup = Cleanup()
4836 times = [ None ] * (end_pass + 1)
4837 times[start_pass - 1] = time.time()
4838 StatsKeeper().set_start_time(time.time())
4839 for i in range(start_pass - 1, end_pass):
4840 Log().write(LOG_QUIET, '----- pass %d -----' % (i + 1))
4841 _passes[i]()
4842 times[i + 1] = time.time()
4843 StatsKeeper().log_duration_for_pass(times[i + 1] - times[i], i + 1)
4844 # Dispose of items in Ctx() not intended to live past the end of the pass
4845 # (Identified by exactly one leading underscore)
4846 for attr in dir(Ctx()):
4847 if (len(attr) > 2 and attr[0] == '_' and attr[1] != '_'
4848 and not attr[:6] == "_Ctx__"):
4849 delattr(Ctx(), attr)
4850 if not Ctx().skip_cleanup:
4851 cleanup.cleanup(_passes[i])
4852 StatsKeeper().set_end_time(time.time())
4854 Log().write(LOG_QUIET, StatsKeeper())
4855 if end_pass < 4:
4856 Log().write(LOG_QUIET,
4857 '(These are unaltered CVS repository stats and do not\n'
4858 ' reflect tags or branches excluded via --exclude)\n')
4859 Log().write(LOG_NORMAL, StatsKeeper().timings())
4862 def normalize_ttb_path(opt, path):
4863 """Normalize a path to be used for --trunk, --tags, or --branches.
4865 1. Strip leading, trailing, and duplicated '/'.
4866 2. Verify that the path is not empty.
4868 Return the normalized path.
4870 If the path is invalid, write an error message and exit."""
4872 norm_path = _path_join(*path.split('/'))
4873 if not norm_path:
4874 raise FatalError("cannot pass an empty path to %s." % (opt,))
4875 return norm_path
4878 def verify_paths_disjoint(*paths):
4879 """Verify that all of the paths in the argument list are disjoint.
4881 If any of the paths is nested in another one (i.e., in the sense
4882 that 'a/b/c/d' is nested in 'a/b'), or any two paths are identical,
4883 write an error message and exit."""
4885 paths = [(path.split('/'), path) for path in paths]
4886 # If all overlapping elements are equal, a shorter list is
4887 # considered "less than" a longer one. Therefore if any paths are
4888 # nested, this sort will leave at least one such pair adjacent, in
4889 # the order [nest,nestling].
4890 paths.sort()
4891 for i in range(1, len(paths)):
4892 split_path1, path1 = paths[i - 1]
4893 split_path2, path2 = paths[i]
4894 if len(split_path1) <= len(split_path2) \
4895 and split_path2[:len(split_path1)] == split_path1:
4896 raise FatalError("paths %s and %s are not disjoint." % (path1, path2,))
4899 def usage():
4900 print 'USAGE: %s [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
4901 % os.path.basename(sys.argv[0])
4902 print ' --help, -h print this usage message and exit with success'
4903 print ' --version print the version number'
4904 print ' -q quiet'
4905 print ' -v verbose'
4906 print ' -s PATH path for SVN repos'
4907 print ' -p START[:END] start at pass START, end at pass END of %d' \
4908 % len(_passes)
4909 print ' If only START is given, run only pass START'
4910 print ' (implicitly enables --skip-cleanup)'
4911 print ' --existing-svnrepos load into existing SVN repository'
4912 print ' --dumpfile=PATH name of intermediate svn dumpfile'
4913 print ' --tmpdir=PATH directory to use for tmp data (default to cwd)'
4914 print ' --profile profile with \'hotshot\' (into file cvs2svn.hotshot)'
4915 print ' --dry-run do not create a repository or a dumpfile;'
4916 print ' just print what would happen.'
4917 print ' --use-cvs use CVS instead of RCS \'co\' to extract data'
4918 print ' (only use this if having problems with RCS)'
4919 print ' --svnadmin=PATH path to the svnadmin program'
4920 print ' --trunk-only convert only trunk commits, not tags nor branches'
4921 print ' --trunk=PATH path for trunk (default: %s)' \
4922 % Ctx().trunk_base
4923 print ' --branches=PATH path for branches (default: %s)' \
4924 % Ctx().branches_base
4925 print ' --tags=PATH path for tags (default: %s)' \
4926 % Ctx().tags_base
4927 print ' --no-prune don\'t prune empty directories'
4928 print ' --dump-only just produce a dumpfile, don\'t commit to a repos'
4929 print ' --encoding=ENC encoding of log messages in CVS repos'
4930 print ' Multiple of these options may be passed, where they'
4931 print ' will be treated as an ordered list of encodings to'
4932 print ' attempt (with "ascii" as a hardcoded last resort)'
4933 print ' --force-branch=NAME force NAME to be a branch'
4934 print ' --force-tag=NAME force NAME to be a tag'
4935 print ' --exclude=REGEXP exclude branches and tags matching REGEXP'
4936 print ' --symbol-transform=P:S transform symbol names from P to S where P and S'
4937 print ' use Python regexp and reference syntax respectively'
4938 print ' --username=NAME username for cvs2svn-synthesized commits'
4939 print ' --skip-cleanup prevent the deletion of intermediate files'
4940 print ' --bdb-txn-nosync pass --bdb-txn-nosync to "svnadmin create"'
4941 print ' --fs-type=TYPE pass --fs-type=TYPE to "svnadmin create"'
4942 print ' --cvs-revnums record CVS revision numbers as file properties'
4943 print ' --mime-types=FILE specify an apache-style mime.types file for\n' \
4944 ' setting svn:mime-type'
4945 print ' --eol-from-mime-type set svn:eol-style by mime type (only with --mime-types)'
4946 print ' --no-default-eol don\'t set svn:eol-style by CVS defaults'
4947 print ' --keywords-off don\'t set svn:keywords on any files (by default,'
4948 print ' cvs2svn sets svn:keywords on non-binary files to'
4949 print ' "%s")' % SVN_KEYWORDS_VALUE
4951 def main():
4952 # Convenience var, so we don't have to keep instantiating this Borg.
4953 ctx = Ctx()
4955 profiling = None
4956 start_pass = 1
4957 end_pass = len(_passes)
4959 try:
4960 opts, args = getopt.getopt(sys.argv[1:], 'p:s:qvh',
4961 [ "help", "create", "trunk=",
4962 "username=", "existing-svnrepos",
4963 "branches=", "tags=", "encoding=",
4964 "force-branch=", "force-tag=", "exclude=",
4965 "use-cvs", "mime-types=",
4966 "eol-from-mime-type", "no-default-eol",
4967 "trunk-only", "no-prune", "dry-run",
4968 "dump-only", "dumpfile=", "tmpdir=",
4969 "svnadmin=", "skip-cleanup", "cvs-revnums",
4970 "bdb-txn-nosync", "fs-type=",
4971 "version", "profile",
4972 "keywords-off", "symbol-transform="])
4973 except getopt.GetoptError, e:
4974 sys.stderr.write(error_prefix + ': ' + str(e) + '\n\n')
4975 usage()
4976 sys.exit(1)
4978 for opt, value in opts:
4979 if opt == '--version':
4980 print '%s version %s' % (os.path.basename(sys.argv[0]), VERSION)
4981 sys.exit(0)
4982 elif opt == '-p':
4983 # Don't cleanup if we're doing incrementals.
4984 ctx.skip_cleanup = 1
4985 if value.find(':') > 0:
4986 start_pass, end_pass = map(int, value.split(':'))
4987 else:
4988 end_pass = start_pass = int(value)
4989 if start_pass > len(_passes) or start_pass < 1:
4990 raise FatalError(
4991 'illegal value (%d) for starting pass. Must be 1 through %d.'
4992 % (int(start_pass), len(_passes),))
4993 if end_pass < start_pass or end_pass > len(_passes):
4994 raise FatalError(
4995 'illegal value (%d) for ending pass. Must be %d through %d.'
4996 % (int(end_pass), int(start_pass), len(_passes),))
4997 elif (opt == '--help') or (opt == '-h'):
4998 ctx.print_help = 1
4999 elif opt == '-v':
5000 Log().log_level = LOG_VERBOSE
5001 ctx.verbose = 1
5002 elif opt == '-q':
5003 Log().log_level = LOG_QUIET
5004 ctx.quiet = 1
5005 elif opt == '-s':
5006 ctx.target = value
5007 elif opt == '--existing-svnrepos':
5008 ctx.existing_svnrepos = 1
5009 elif opt == '--dumpfile':
5010 ctx.dumpfile = value
5011 elif opt == '--tmpdir':
5012 ctx.tmpdir = value
5013 elif opt == '--use-cvs':
5014 ctx.use_cvs = 1
5015 elif opt == '--svnadmin':
5016 ctx.svnadmin = value
5017 elif opt == '--trunk-only':
5018 ctx.trunk_only = 1
5019 elif opt == '--trunk':
5020 ctx.trunk_base = normalize_ttb_path(opt, value)
5021 elif opt == '--branches':
5022 ctx.branches_base = normalize_ttb_path(opt, value)
5023 elif opt == '--tags':
5024 ctx.tags_base = normalize_ttb_path(opt, value)
5025 elif opt == '--no-prune':
5026 ctx.prune = None
5027 elif opt == '--dump-only':
5028 ctx.dump_only = 1
5029 elif opt == '--dry-run':
5030 ctx.dry_run = 1
5031 elif opt == '--encoding':
5032 ctx.encoding.insert(-1, value)
5033 elif opt == '--force-branch':
5034 ctx.forced_branches.append(value)
5035 elif opt == '--force-tag':
5036 ctx.forced_tags.append(value)
5037 elif opt == '--exclude':
5038 try:
5039 ctx.excludes.append(re.compile('^' + value + '$'))
5040 except re.error, e:
5041 raise FatalError("'%s' is not a valid regexp." % (value,))
5042 elif opt == '--mime-types':
5043 ctx.mime_types_file = value
5044 elif opt == '--eol-from-mime-type':
5045 ctx.eol_from_mime_type = 1
5046 elif opt == '--no-default-eol':
5047 ctx.no_default_eol = 1
5048 elif opt == '--keywords-off':
5049 ctx.keywords_off = 1
5050 elif opt == '--username':
5051 ctx.username = value
5052 elif opt == '--skip-cleanup':
5053 ctx.skip_cleanup = 1
5054 elif opt == '--cvs-revnums':
5055 ctx.cvs_revnums = 1
5056 elif opt == '--bdb-txn-nosync':
5057 ctx.bdb_txn_nosync = 1
5058 elif opt == '--fs-type':
5059 ctx.fs_type = value
5060 elif opt == '--create':
5061 sys.stderr.write(warning_prefix +
5062 ': The behaviour produced by the --create option is now the '
5063 'default,\nand passing the option is deprecated.\n')
5064 elif opt == '--profile':
5065 profiling = 1
5066 elif opt == '--symbol-transform':
5067 [pattern, replacement] = value.split(":")
5068 try:
5069 pattern = re.compile(pattern)
5070 except re.error, e:
5071 raise FatalError("'%s' is not a valid regexp." % (pattern,))
5072 ctx.symbol_transforms.append((pattern, replacement,))
5074 if ctx.print_help:
5075 usage()
5076 sys.exit(0)
5078 # Consistency check for options and arguments.
5079 if len(args) == 0:
5080 usage()
5081 sys.exit(1)
5083 if len(args) > 1:
5084 sys.stderr.write(error_prefix +
5085 ": must pass only one CVS repository.\n")
5086 usage()
5087 sys.exit(1)
5089 ctx.cvsroot = args[0]
5091 if not os.path.isdir(ctx.cvsroot):
5092 raise FatalError("the given CVS repository path '%s' is not an "
5093 "existing directory." % ctx.cvsroot)
5095 if ctx.use_cvs:
5096 ctx.cvs_repository = CVSRepositoryViaCVS()
5097 else:
5098 ctx.cvs_repository = CVSRepositoryViaRCS()
5100 if (not ctx.target) and (not ctx.dump_only) and (not ctx.dry_run):
5101 raise FatalError("must pass one of '-s' or '--dump-only'.")
5103 def not_both(opt1val, opt1name, opt2val, opt2name):
5104 if opt1val and opt2val:
5105 raise FatalError("cannot pass both '%s' and '%s'."
5106 % (opt1name, opt2name,))
5108 not_both(ctx.target, '-s',
5109 ctx.dump_only, '--dump-only')
5111 not_both(ctx.dump_only, '--dump-only',
5112 ctx.existing_svnrepos, '--existing-svnrepos')
5114 not_both(ctx.bdb_txn_nosync, '--bdb-txn-nosync',
5115 ctx.existing_svnrepos, '--existing-svnrepos')
5117 not_both(ctx.dump_only, '--dump-only',
5118 ctx.bdb_txn_nosync, '--bdb-txn-nosync')
5120 not_both(ctx.quiet, '-q',
5121 ctx.verbose, '-v')
5123 not_both(ctx.fs_type, '--fs-type',
5124 ctx.existing_svnrepos, '--existing-svnrepos')
5126 if ctx.fs_type and ctx.fs_type != 'bdb' and ctx.bdb_txn_nosync:
5127 raise FatalError("cannot pass --bdb-txn-nosync with --fs-type=%s."
5128 % ctx.fs_type)
5130 # Create the default project (using ctx.trunk, ctx.branches, and ctx.tags):
5131 ctx.project = Project(ctx.cvsroot,
5132 ctx.trunk_base, ctx.branches_base, ctx.tags_base)
5134 if ctx.existing_svnrepos and not os.path.isdir(ctx.target):
5135 raise FatalError("the svn-repos-path '%s' is not an "
5136 "existing directory." % ctx.target)
5138 if not ctx.dump_only and not ctx.existing_svnrepos \
5139 and (not ctx.dry_run) and os.path.exists(ctx.target):
5140 raise FatalError("the svn-repos-path '%s' exists.\n"
5141 "Remove it, or pass '--existing-svnrepos'."
5142 % ctx.target)
5144 if ctx.target and not ctx.dry_run:
5145 # Verify that svnadmin can be executed. The 'help' subcommand
5146 # should be harmless.
5147 try:
5148 check_command_runs([ctx.svnadmin, 'help'], 'svnadmin')
5149 except CommandFailedException, e:
5150 raise FatalError(
5151 '%s\n'
5152 'svnadmin could not be executed. Please ensure that it is\n'
5153 'installed and/or use the --svnadmin option.' % (e,))
5155 if ctx.mime_types_file:
5156 ctx.mime_mapper = MimeMapper()
5157 ctx.mime_mapper.set_mime_types_file(ctx.mime_types_file)
5159 # Make sure the tmp directory exists. Note that we don't check if
5160 # it's empty -- we want to be able to use, for example, "." to hold
5161 # tempfiles. But if we *did* want check if it were empty, we'd do
5162 # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
5163 if not os.path.exists(ctx.tmpdir):
5164 os.mkdir(ctx.tmpdir)
5165 elif not os.path.isdir(ctx.tmpdir):
5166 raise FatalError(
5167 "cvs2svn tried to use '%s' for temporary files, but that path\n"
5168 " exists and is not a directory. Please make it be a directory,\n"
5169 " or specify some other directory for temporary files."
5170 % (ctx.tmpdir,))
5172 # But do lock the tmpdir, to avoid process clash.
5173 try:
5174 os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
5175 except OSError, e:
5176 if e.errno == errno.EACCES:
5177 raise FatalError("Permission denied:"
5178 + " No write access to directory '%s'." % ctx.tmpdir)
5179 if e.errno == errno.EEXIST:
5180 raise FatalError(
5181 "cvs2svn is using directory '%s' for temporary files, but\n"
5182 " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
5183 " cvs2svn process is currently using '%s' as its temporary\n"
5184 " workspace. If you are certain that is not the case,\n"
5185 " then remove the '%s/cvs2svn.lock' subdirectory."
5186 % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir,))
5187 raise
5188 try:
5189 if profiling:
5190 import hotshot
5191 prof = hotshot.Profile('cvs2svn.hotshot')
5192 prof.runcall(convert, start_pass, end_pass)
5193 prof.close()
5194 else:
5195 convert(start_pass, end_pass)
5196 finally:
5197 try: os.rmdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
5198 except: pass
5201 if __name__ == '__main__':
5202 try:
5203 main()
5204 except FatalException, e:
5205 sys.stderr.write(str(e))
5206 sys.exit(1)