Fix brokenness when gdbm is the selected anydbm module.
[cvs2svn.git] / cvs2svn
blob0984e54881f07e40692684bdb7aa01f5036681c1
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # cvs2svn: ...
6 # ====================================================================
7 # Copyright (c) 2000-2004 CollabNet. All rights reserved.
9 # This software is licensed as described in the file COPYING, which
10 # you should have received as part of this distribution. The terms
11 # are also available at http://subversion.tigris.org/license-1.html.
12 # If newer versions of this license are posted there, you may use a
13 # newer version instead, at your option.
15 # This software consists of voluntary contributions made by many
16 # individuals. For exact contribution history, see the revision
17 # history and logs, available at http://cvs2svn.tigris.org/.
18 # ====================================================================
20 VERSION = 'r' + "$LastChangedRevision$"[22:-2]
22 import cvs2svn_rcsparse
23 import os
24 import sys
25 import sha
26 import re
27 import time
28 import fileinput
29 import fnmatch
30 import string
31 import getopt
32 import stat
33 import md5
34 import marshal
35 import errno
36 import popen2
37 import types
38 import ConfigParser
39 import UserDict
40 try:
41 # Try to get access to a bunch of encodings for use with --encoding.
42 # See http://cjkpython.i18n.org/ for details.
43 import iconv_codec
44 except ImportError:
45 pass
47 # Warnings and errors start with these strings. They are typically
48 # followed by a colon and a space, as in "%s: " ==> "WARNING: ".
49 warning_prefix = "WARNING"
50 error_prefix = "ERROR"
52 # Make sure this Python is recent enough.
53 if sys.hexversion < 0x02020000:
54 sys.stderr.write("'%s: Python 2.2 or higher required, "
55 "see www.python.org.\n" % error_prefix)
56 sys.exit(1)
58 # Pretend we have true booleans on older python versions
59 try:
60 True
61 except:
62 True = 1
63 False = 0
65 # Opening pipes was a mess before Python 2.4, because some methods did
66 # not exist on some platforms, and some behaved differenly on other.
67 # Python 2.4 solved this by adding the subprocess module, but since we
68 # cannot require such a new version, we cannot use it directly, but
69 # must implement a simplified Popen using the best means neccessary.
71 # The SimplePopen class only has the following members and methods, all
72 # behaving as documented in the subprocess.Popen class:
73 # - stdin
74 # - stdout
75 # - stderr
76 # - wait
77 try:
78 # First try subprocess.Popen...
79 import subprocess
80 class SimplePopen:
81 def __init__(self, cmd, capture_stderr):
82 if capture_stderr:
83 stderr = subprocess.PIPE
84 else:
85 stderr = None
86 self._popen = subprocess.Popen(cmd, stdin=subprocess.PIPE,
87 stdout=subprocess.PIPE, stderr=stderr)
88 self.stdin = self._popen.stdin
89 self.stdout = self._popen.stdout
90 if capture_stderr:
91 self.stderr = self._popen.stderr
92 self.wait = self._popen.wait
93 except ImportError:
94 if hasattr(popen2, 'Popen3'):
95 # ...then try popen2.Popen3...
96 class SimplePopen:
97 def __init__(self, cmd, capture_stderr):
98 self._popen3 = popen2.Popen3(cmd, capture_stderr)
99 self.stdin = self._popen3.tochild
100 self.stdout = self._popen3.fromchild
101 if capture_stderr:
102 self.stderr = self._popen3.childerr
103 self.wait = self._popen3.wait
104 else:
105 # ...and if all fails, use popen2.popen3...
106 class SimplePopen:
107 def __init__(self, cmd, capture_stderr):
108 if type(cmd) != types.StringType:
109 cmd = argv_to_command_string(cmd)
110 self.stdout, self.stdin, self.stderr = popen2.popen3(cmd, mode='b')
111 def wait(self):
112 return self.stdout.close() or self.stdin.close() or \
113 self.stderr.close()
115 # DBM module selection
117 # 1. If we have bsddb3, it is probably newer than bsddb. Fake bsddb = bsddb3,
118 # so that the dbhash module used by anydbm will use bsddb3.
119 try:
120 import bsddb3
121 sys.modules['bsddb'] = sys.modules['bsddb3']
122 except ImportError:
123 pass
125 # 2. These DBM modules are not good for cvs2svn.
126 import anydbm
127 if (anydbm._defaultmod.__name__ == 'dumbdbm'
128 or anydbm._defaultmod.__name__ == 'dbm'):
129 sys.stderr.write(
130 error_prefix
131 + ': your installation of Python does not contain a suitable\n'
132 + 'DBM module -- cvs2svn cannot continue.\n'
133 + 'See http://python.org/doc/current/lib/module-anydbm.html to solve.\n')
134 sys.exit(1)
136 # 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
137 # Unfortunately, gdbm appears not to be trouble free, either.
138 if hasattr(anydbm._defaultmod, 'bsddb') \
139 and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
140 try:
141 gdbm = __import__('gdbm')
142 except ImportError:
143 sys.stderr.write(warning_prefix +
144 ': The version of the bsddb module found '
145 'on your computer has been reported to malfunction on some datasets, '
146 'causing KeyError exceptions. You may wish to upgrade your Python to '
147 'version 2.3 or later.\n')
148 else:
149 anydbm._defaultmod = gdbm
151 trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
152 branch_tag = re.compile('^[0-9.]+\\.0\\.[0-9]+$')
153 vendor_tag = re.compile('^[0-9]+\\.[0-9]+\\.[0-9]+$')
155 SVN_KEYWORDS_VALUE = 'Author Date Id Revision'
157 # This really only matches standard '1.1.1.*'-style vendor revisions.
158 # One could conceivably have a file whose default branch is 1.1.3 or
159 # whatever, or was that at some point in time, with vendor revisions
160 # 1.1.3.1, 1.1.3.2, etc. But with the default branch gone now (which
161 # is the only time this regexp gets used), we'd have no basis for
162 # assuming that the non-standard vendor branch had ever been the
163 # default branch anyway, so we don't want this to match them anyway.
164 vendor_revision = re.compile('^(1\\.1\\.1)\\.([0-9])+$')
166 # If this run's output is a repository, then (in the tmpdir) we use
167 # a dumpfile of this name for repository loads.
169 # If this run's output is a dumpfile, then this is default name of
170 # that dumpfile, but in the current directory (unless the user has
171 # specified a dumpfile path, of course, in which case it will be
172 # wherever the user said).
173 DUMPFILE = 'cvs2svn-dump'
175 # This file appears with different suffixes at different stages of
176 # processing. CVS revisions are cleaned and sorted here, for commit
177 # grouping. See design-notes.txt for details.
178 DATAFILE = 'cvs2svn-data'
180 # This file contains a marshalled copy of all the statistics that we
181 # gather throughout the various runs of cvs2svn. The data stored as a
182 # marshalled dictionary.
183 STATISTICS_FILE = 'cvs2svn-statistics'
185 # This text file contains records (1 per line) that describe svn
186 # filesystem paths that are the opening and closing source revisions
187 # for copies to tags and branches. The format is as follows:
189 # SYMBOL_NAME SVN_REVNUM TYPE SVN_PATH
191 # Where type is either OPENING or CLOSING. The SYMBOL_NAME and
192 # SVN_REVNUM are the primary and secondary sorting criteria for
193 # creating SYMBOL_OPENINGS_CLOSINGS_SORTED.
194 SYMBOL_OPENINGS_CLOSINGS = 'cvs2svn-symbolic-names.txt'
195 # A sorted version of the above file.
196 SYMBOL_OPENINGS_CLOSINGS_SORTED = 'cvs2svn-symbolic-names-s.txt'
198 # This file is a temporary file for storing symbolic_name -> closing
199 # CVSRevision until the end of our pass where we can look up the
200 # corresponding SVNRevNum for the closing revs and write these out to
201 # the SYMBOL_OPENINGS_CLOSINGS.
202 SYMBOL_CLOSINGS_TMP = 'cvs2svn-symbolic-names-closings-tmp.txt'
204 # Skeleton version of an svn filesystem.
205 # (These supersede and will eventually replace the two above.)
206 # See class SVNRepositoryMirror for how these work.
207 SVN_MIRROR_REVISIONS_DB = 'cvs2svn-svn-revisions.db'
208 SVN_MIRROR_NODES_DB = 'cvs2svn-svn-nodes.db'
210 # Offsets pointing to the beginning of each SYMBOLIC_NAME in
211 # SYMBOL_OPENINGS_CLOSINGS_SORTED
212 SYMBOL_OFFSETS_DB = 'cvs2svn-symbolic-name-offsets.db'
214 # Maps CVSRevision.unique_key()s to lists of symbolic names, where
215 # the CVSRevision is the last such that is a source for those symbolic
216 # names. For example, if branch B's number is 1.3.0.2 in this CVS
217 # file, and this file's 1.3 is the latest (by date) revision among
218 # *all* CVS files that is a source for branch B, then the
219 # CVSRevision.unique_key() corresponding to this file at 1.3 would
220 # list at least B in its list.
221 SYMBOL_LAST_CVS_REVS_DB = 'cvs2svn-symbol-last-cvs-revs.db'
223 # Maps CVSRevision.unique_key() to corresponding line in s-revs.
224 ###PERF Or, we could map to an offset into s-revs, instead of dup'ing
225 ### the s-revs data in this database.
226 CVS_REVS_DB = 'cvs2svn-cvs-revs.db'
228 # Lists all symbolic names that are tags. Keys are strings (symbolic
229 # names), values are ignorable.
230 TAGS_DB = 'cvs2svn-tags.db'
232 # A list all tags. Each line consists of the tag name and the number
233 # of files in which it exists, separated by a space.
234 TAGS_LIST = 'cvs2svn-tags.txt'
236 # A list of all branches. The file is stored as a plain text file
237 # to make it easy to look at in an editor. Each line contains the
238 # branch name, the number of files where the branch is created, the
239 # commit count, and a list of tags and branches that are defined on
240 # revisions in the branch.
241 BRANCHES_LIST = 'cvs2svn-branches.txt'
243 # These two databases provide a bidirectional mapping between
244 # CVSRevision.unique_key()s and Subversion revision numbers.
246 # The first maps CVSRevision.unique_key() to a number; the values are
247 # not unique.
249 # The second maps a number to a list of CVSRevision.unique_key()s.
250 CVS_REVS_TO_SVN_REVNUMS = 'cvs2svn-cvs-revs-to-svn-revnums.db'
251 SVN_REVNUMS_TO_CVS_REVS = 'cvs2svn-svn-revnums-to-cvs-revs.db'
253 # This database maps svn_revnums to tuples of (symbolic_name, date).
255 # The svn_revnums are the revision numbers of all non-primary
256 # SVNCommits. No primary SVNCommit has a key in this database.
258 # The date is stored for all commits in this database.
260 # For commits that fill symbolic names, the symbolic_name is stored.
261 # For commits that default branch syncs, the symbolic_name is None.
262 SVN_COMMIT_NAMES_DATES = 'cvs2svn-svn-commit-names-and-dates.db'
264 # This database maps svn_revnums of a default branch synchronization
265 # commit to the svn_revnum of the primary SVNCommit that motivated it.
267 # (NOTE: Secondary commits that fill branches and tags also have a
268 # motivating commit, but we do not record it because it is (currently)
269 # not needed for anything.)
271 # This mapping is used when generating the log message for the commit
272 # that synchronizes the default branch with trunk.
273 MOTIVATING_REVNUMS = 'cvs2svn-svn-motivating-commit-revnums.db'
275 # How many bytes to read at a time from a pipe. 128 kiB should be
276 # large enough to be efficient without wasting too much memory.
277 PIPE_READ_SIZE = 128 * 1024
279 # Record the default RCS branches, if any, for CVS filepaths.
281 # The keys are CVS filepaths, relative to the top of the repository
282 # and with the ",v" stripped off, so they match the cvs paths used in
283 # Commit.commit(). The values are vendor branch revisions, such as
284 # '1.1.1.1', or '1.1.1.2', or '1.1.1.96'. The vendor branch revision
285 # represents the highest vendor branch revision thought to have ever
286 # been head of the default branch.
288 # The reason we record a specific vendor revision, rather than a
289 # default branch number, is that there are two cases to handle:
291 # One case is simple. The RCS file lists a default branch explicitly
292 # in its header, such as '1.1.1'. In this case, we know that every
293 # revision on the vendor branch is to be treated as head of trunk at
294 # that point in time.
296 # But there's also a degenerate case. The RCS file does not currently
297 # have a default branch, yet we can deduce that for some period in the
298 # past it probably *did* have one. For example, the file has vendor
299 # revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2,
300 # and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2. In this
301 # case, we should record 1.1.1.96 as the last vendor revision to have
302 # been the head of the default branch.
303 DEFAULT_BRANCHES_DB = 'cvs2svn-default-branches.db'
305 # Records the author and log message for each changeset.
306 # The keys are author+log digests, the same kind used to identify
307 # unique revisions in the .revs, etc files. Each value is a tuple
308 # of two elements: '(author logmessage)'.
309 METADATA_DB = "cvs2svn-metadata.db"
311 # A temporary on-disk hash that maps CVSRevision unique keys to a new
312 # timestamp for that CVSRevision. These new timestamps are created in
313 # pass2, and this hash is used exclusively in pass2.
314 TWEAKED_TIMESTAMPS_DB = "cvs2svn-fixed-timestamps.db"
316 REVS_SUFFIX = '.revs'
317 CLEAN_REVS_SUFFIX = '.c-revs'
318 SORTED_REVS_SUFFIX = '.s-revs'
319 RESYNC_SUFFIX = '.resync'
321 SVN_INVALID_REVNUM = -1
323 COMMIT_THRESHOLD = 5 * 60 # flush a commit if a 5 minute gap occurs
325 # Things that can happen to a file.
326 OP_NOOP = '-'
327 OP_ADD = 'A'
328 OP_DELETE = 'D'
329 OP_CHANGE = 'C'
331 # A deltatext either does or doesn't represent some change.
332 DELTATEXT_NONEMPTY = 'N'
333 DELTATEXT_EMPTY = 'E'
335 DIGEST_END_IDX = 9 + (sha.digestsize * 2)
337 # Constants used in SYMBOL_OPENINGS_CLOSINGS
338 OPENING = 'O'
339 CLOSING = 'C'
341 class FatalException(Exception):
342 """Exception thrown on a non-recoverable error.
344 If this exception is thrown by main(), it is caught by the global
345 layer of the program, its string representation is printed, and the
346 program is ended with an exit code of 1."""
348 pass
351 class FatalError(FatalException):
352 """A FatalException that prepends error_prefix to the message."""
354 def __init__(self, msg):
355 """Use (error_prefix + ': ' + MSG + '\n') as the error message."""
357 FatalException.__init__(self, '%s: %s\n' % (error_prefix, msg,))
360 def temp(basename):
361 """Return a path to BASENAME in Ctx().tmpdir.
362 This is a convenience function to save horizontal space in source."""
363 return os.path.join(Ctx().tmpdir, basename)
365 # Since the unofficial set also includes [/\] we need to translate those
366 # into ones that don't conflict with Subversion limitations.
367 def _clean_symbolic_name(name):
368 """Return symbolic name NAME, translating characters that Subversion
369 does not allow in a pathname."""
370 name = name.replace('/','++')
371 name = name.replace('\\','--')
372 return name
374 def _path_join(*components):
375 """Join two or more pathname COMPONENTS, inserting '/' as needed.
376 Empty component are skipped."""
377 return string.join(filter(None, components), '/')
379 def _path_split(path):
380 """Split the svn pathname PATH into a pair, (HEAD, TAIL).
382 This is similar to os.path.split(), but always uses '/' as path
383 separator. PATH is an svn path, which should not start with a '/'.
384 HEAD is everything before the last slash, and TAIL is everything
385 after. If PATH ends in a slash, TAIL will be empty. If there is no
386 slash in PATH, HEAD will be empty. If PATH is empty, both HEAD and
387 TAIL are empty."""
389 pos = path.rfind('/')
390 if pos == -1:
391 return ('', path,)
392 else:
393 return (path[:pos], path[pos+1:],)
395 def to_utf8(value, mode='replace'):
396 """Encode (as Unicode) VALUE, trying the encodings in Ctx.encoding
397 as valid source encodings. Raise UnicodeError on failure of all
398 source encodings."""
399 ### FIXME: The 'replace' default mode should be an option,
400 ### like --encoding is.
401 for encoding in Ctx().encoding:
402 try:
403 return unicode(value, encoding, mode).encode('utf8')
404 except UnicodeError:
405 Log().write(LOG_VERBOSE, "Encoding '%s' failed for string '%s'"
406 % (encoding, value))
407 raise UnicodeError
409 def run_command(command):
410 if os.system(command):
411 raise FatalError('Command failed: "%s"' % (command,))
414 class CommandFailedException(Exception):
415 """Exception raised if check_command_runs() fails."""
417 pass
420 def check_command_runs(cmd, cmdname):
421 """Check whether the command CMD can be executed without errors.
423 CMD is a list or string, as accepted by SimplePopen. CMDNAME is the
424 name of the command as it should be included in exception error
425 messages.
427 This function checks three things: (1) the command can be run
428 without throwing an OSError; (2) it exits with status=0; (3) it
429 doesn't output anything to stderr. If any of these conditions is
430 not met, raise a CommandFailedException describing the problem."""
432 try:
433 pipe = SimplePopen(cmd, True)
434 except OSError, e:
435 raise CommandFailedException('error executing %s: %s' % (cmdname, e,))
436 pipe.stdin.close()
437 pipe.stdout.read()
438 errmsg = pipe.stderr.read()
439 status = pipe.wait()
440 if status != 0 or errmsg:
441 msg = 'error executing %s: status %s' % (cmdname, status,)
442 if errmsg:
443 msg += ', error output:\n%s' % (errmsg,)
444 raise CommandFailedException(msg)
447 class CVSRepository:
448 """A CVS repository from which data can be extracted."""
450 def __init__(self, cvs_repos_path):
451 """CVS_REPOS_PATH is the top of the CVS repository (at least as
452 far as this run is concerned)."""
454 if not os.path.isdir(cvs_repos_path):
455 raise FatalError("The specified CVS repository path '%s' is not an "
456 "existing directory." % cvs_repos_path)
458 self.cvs_repos_path = os.path.normpath(cvs_repos_path)
459 self.cvs_prefix_re = re.compile(
460 r'^' + re.escape(self.cvs_repos_path)
461 + r'(' + re.escape(os.sep) + r'|$)')
463 def get_cvs_path(self, fname):
464 """Return the path to FNAME relative to cvs_repos_path, with ',v' removed.
466 FNAME is a filesystem name that has to be within
467 self.cvs_repos_path. Return the filename relative to
468 self.cvs_repos_path, with ',v' striped off if present, and with
469 os.sep converted to '/'."""
471 (tail, n) = self.cvs_prefix_re.subn('', fname, 1)
472 if n != 1:
473 raise FatalError(
474 "get_cvs_path: '%s' is not a sub-path of '%s'"
475 % (fname, self.cvs_repos_path,))
476 if tail.endswith(',v'):
477 tail = tail[:-2]
478 return string.replace(tail, os.sep, '/')
480 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
481 """Return a command string, and the pipe created using that
482 string. C_REV is a CVSRevision. If SUPPRESS_KEYWORD_SUBSTITUTION
483 is True, then suppress the substitution of RCS/CVS keywords in the
484 output. The pipe returns the text of that CVS Revision."""
485 raise NotImplementedError
488 class CVSRepositoryViaRCS(CVSRepository):
489 """A CVSRepository accessed via RCS."""
491 def __init__(self, cvs_repos_path):
492 CVSRepository.__init__(self, cvs_repos_path)
493 try:
494 check_command_runs([ 'co', '-V' ], 'co')
495 except CommandFailedException, e:
496 raise FatalError('%s\n'
497 'Please check that co is installed and in your PATH\n'
498 '(it is a part of the RCS software).' % (e,))
500 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
501 pipe_cmd = [ 'co', '-q', '-x,v', '-p' + c_rev.rev ]
502 if suppress_keyword_substitution:
503 pipe_cmd.append('-kk')
504 pipe_cmd.append(c_rev.rcs_path())
505 pipe = SimplePopen(pipe_cmd, True)
506 pipe.stdin.close()
507 return pipe_cmd, pipe
510 class CVSRepositoryViaCVS(CVSRepository):
511 """A CVSRepository accessed via CVS."""
513 def __init__(self, cvs_repos_path):
514 CVSRepository.__init__(self, cvs_repos_path)
515 # Ascend above the specified root if necessary, to find the
516 # cvs_repository_root (a directory containing a CVSROOT directory)
517 # and the cvs_module (the path of the conversion root within the
518 # cvs repository) NB: cvs_module must be seperated by '/' *not* by
519 # os.sep .
520 def is_cvs_repository_root(path):
521 return os.path.isdir(os.path.join(path, 'CVSROOT'))
523 self.cvs_repository_root = os.path.abspath(self.cvs_repos_path)
524 self.cvs_module = ""
525 while not is_cvs_repository_root(self.cvs_repository_root):
526 # Step up one directory:
527 prev_cvs_repository_root = self.cvs_repository_root
528 self.cvs_repository_root, module_component = \
529 os.path.split(self.cvs_repository_root)
530 if self.cvs_repository_root == prev_cvs_repository_root:
531 # Hit the root (of the drive, on Windows) without finding a
532 # CVSROOT dir.
533 raise FatalError(
534 "the path '%s' is not a CVS repository, nor a path "
535 "within a CVS repository. A CVS repository contains "
536 "a CVSROOT directory within its root directory."
537 % (self.cvs_repos_path,))
539 self.cvs_module = module_component + "/" + self.cvs_module
541 os.environ['CVSROOT'] = self.cvs_repository_root
543 def cvs_ok(global_arguments):
544 check_command_runs(
545 [ 'cvs' ] + global_arguments + [ '--version' ], 'cvs')
547 self.global_arguments = [ "-q", "-R" ]
548 try:
549 cvs_ok(self.global_arguments)
550 except CommandFailedException, e:
551 self.global_arguments = [ "-q" ]
552 try:
553 cvs_ok(self.global_arguments)
554 except CommandFailedException, e:
555 raise FatalError(
556 '%s\n'
557 'Please check that cvs is installed and in your PATH.' % (e,))
559 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
560 pipe_cmd = [ 'cvs' ] + self.global_arguments + \
561 [ 'co', '-r' + c_rev.rev, '-p' ]
562 if suppress_keyword_substitution:
563 pipe_cmd.append('-kk')
564 pipe_cmd.append(self.cvs_module + c_rev.cvs_path)
565 pipe = SimplePopen(pipe_cmd, True)
566 pipe.stdin.close()
567 return pipe_cmd, pipe
570 def generate_ignores(c_rev):
571 # Read in props
572 pipe_cmd, pipe = Ctx().cvs_repository.get_co_pipe(c_rev)
573 buf = pipe.stdout.read(PIPE_READ_SIZE)
574 raw_ignore_val = ""
575 while buf:
576 raw_ignore_val = raw_ignore_val + buf
577 buf = pipe.stdout.read(PIPE_READ_SIZE)
578 pipe.stdout.close()
579 error_output = pipe.stderr.read()
580 exit_status = pipe.wait()
581 if exit_status:
582 raise FatalError("The command '%s' failed with exit status: %s\n"
583 "and the following output:\n"
584 "%s" % (pipe_cmd, exit_status, error_output))
586 # Tweak props: First, convert any spaces to newlines...
587 raw_ignore_val = '\n'.join(raw_ignore_val.split())
588 raw_ignores = raw_ignore_val.split('\n')
589 ignore_vals = [ ]
590 for ignore in raw_ignores:
591 # Reset the list if we encounter a '!'
592 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
593 if ignore == '!':
594 ignore_vals = [ ]
595 continue
596 # Skip empty lines
597 if len(ignore) == 0:
598 continue
599 ignore_vals.append(ignore)
600 return ignore_vals
602 # Return a string that has not been returned by gen_key() before.
603 gen_key_base = 0L
604 def gen_key():
605 global gen_key_base
606 key = '%x' % gen_key_base
607 gen_key_base = gen_key_base + 1
608 return key
610 # ============================================================================
611 # This code is copied with a few modifications from:
612 # subversion/subversion/bindings/swig/python/svn/core.py
614 if sys.platform == "win32":
615 _escape_shell_arg_re = re.compile(r'(\\+)(\"|$)')
617 def escape_shell_arg(arg):
618 # The (very strange) parsing rules used by the C runtime library are
619 # described at:
620 # http://msdn.microsoft.com/library/en-us/vclang/html/_pluslang_Parsing_C.2b2b_.Command.2d.Line_Arguments.asp
622 # double up slashes, but only if they are followed by a quote character
623 arg = re.sub(_escape_shell_arg_re, r'\1\1\2', arg)
625 # surround by quotes and escape quotes inside
626 arg = '"' + string.replace(arg, '"', '"^""') + '"'
627 return arg
630 def argv_to_command_string(argv):
631 """Flatten a list of command line arguments into a command string.
633 The resulting command string is expected to be passed to the system
634 shell which os functions like popen() and system() invoke internally.
637 # According cmd's usage notes (cmd /?), it parses the command line by
638 # "seeing if the first character is a quote character and if so, stripping
639 # the leading character and removing the last quote character."
640 # So to prevent the argument string from being changed we add an extra set
641 # of quotes around it here.
642 return '"' + string.join(map(escape_shell_arg, argv), " ") + '"'
644 else:
645 def escape_shell_arg(str):
646 return "'" + string.replace(str, "'", "'\\''") + "'"
648 def argv_to_command_string(argv):
649 """Flatten a list of command line arguments into a command string.
651 The resulting command string is expected to be passed to the system
652 shell which os functions like popen() and system() invoke internally.
655 return string.join(map(escape_shell_arg, argv), " ")
656 # ============================================================================
658 def format_date(date):
659 """Return an svn-compatible date string for DATE (seconds since epoch)."""
660 # A Subversion date looks like "2002-09-29T14:44:59.000000Z"
661 return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date))
663 def sort_file(infile, outfile):
664 # sort the log files
666 # GNU sort will sort our dates differently (incorrectly!) if our
667 # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
668 # it to 'C'
669 lc_all_tmp = os.environ.get('LC_ALL', None)
670 os.environ['LC_ALL'] = 'C'
671 # The -T option to sort has a nice side effect. The Win32 sort is
672 # case insensitive and cannot be used, and since it does not
673 # understand the -T option and dies if we try to use it, there is
674 # no risk that we use that sort by accident.
675 run_command('sort -T %s %s > %s' % (Ctx().tmpdir, infile, outfile))
676 if lc_all_tmp is None:
677 del os.environ['LC_ALL']
678 else:
679 os.environ['LC_ALL'] = lc_all_tmp
681 def match_regexp_list(regexp_list, string):
682 """Test whether STRING matches any of the compiled regexps in
683 REGEXP_LIST."""
684 for regexp in regexp_list:
685 if regexp.match(string):
686 return True
687 return False
689 class LF_EOL_Filter:
690 """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
691 into LFs only."""
692 def __init__(self, stream):
693 self.stream = stream
694 self.carry_cr = False
695 self.eof = False
697 def read(self, size):
698 while True:
699 buf = self.stream.read(size)
700 self.eof = len(buf) == 0
701 if self.carry_cr:
702 buf = '\r' + buf
703 self.carry_cr = False
704 if not self.eof and buf[-1] == '\r':
705 self.carry_cr = True
706 buf = buf[:-1]
707 buf = string.replace(buf, '\r\n', '\n')
708 buf = string.replace(buf, '\r', '\n')
709 if len(buf) > 0 or self.eof:
710 return buf
713 # These constants represent the log levels that this script supports
714 LOG_WARN = -1
715 LOG_QUIET = 0
716 LOG_NORMAL = 1
717 LOG_VERBOSE = 2
718 class Log:
719 """A Simple logging facility. Each line will be timestamped is
720 self.use_timestamps is TRUE. This class is a Borg, see
721 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
722 __shared_state = {}
723 def __init__(self):
724 self.__dict__ = self.__shared_state
725 if self.__dict__:
726 return
727 self.log_level = LOG_NORMAL
728 # Set this to true if you want to see timestamps on each line output.
729 self.use_timestamps = None
730 self.logger = sys.stdout
732 def _timestamp(self):
733 """Output a detailed timestamp at the beginning of each line output."""
734 self.logger.write(time.strftime('[%Y-%m-%d %I:%m:%S %Z] - '))
736 def write(self, log_level, *args):
737 """This is the public method to use for writing to a file. Only
738 messages whose LOG_LEVEL is <= self.log_level will be printed. If
739 there are multiple ARGS, they will be separated by a space."""
740 if log_level > self.log_level:
741 return
742 if self.use_timestamps:
743 self._timestamp()
744 self.logger.write(' '.join(map(str,args)) + "\n")
745 # Ensure that log output doesn't get out-of-order with respect to
746 # stderr output.
747 self.logger.flush()
750 class Cleanup:
751 """This singleton class manages any files created by cvs2svn. When
752 you first create a file, call Cleanup.register, passing the
753 filename, and the last pass that you need the file. After the end
754 of that pass, your file will be cleaned up after running an optional
755 callback. This class is a Borg, see
756 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
758 __shared_state = {}
759 def __init__(self):
760 self.__dict__ = self.__shared_state
761 if self.__dict__:
762 return
763 self._log = {}
764 self._callbacks = {}
766 def register(self, file, which_pass, callback=None):
767 """Register FILE for cleanup at the end of WHICH_PASS, running
768 function CALLBACK prior to removal. Registering a given FILE is
769 idempotent; you may register as many times as you wish, but it
770 will only be cleaned up once.
772 Note that if a file is registered multiple times, only the first
773 callback registered for that file will be called at cleanup
774 time. Also note that if you register a database file you must
775 close the database before cleanup, e.g. using a callback."""
776 self._log.setdefault(which_pass, {})[file] = 1
777 if callback and not self._callbacks.has_key(file):
778 self._callbacks[file] = callback
780 def cleanup(self, which_pass):
781 """Clean up all files, and invoke callbacks, for pass WHICH_PASS."""
782 if not self._log.has_key(which_pass):
783 return
784 for file in self._log[which_pass]:
785 Log().write(LOG_VERBOSE, "Deleting", file)
786 if self._callbacks.has_key(file):
787 self._callbacks[file]()
788 os.unlink(file)
791 # Always use these constants for opening databases.
792 DB_OPEN_READ = 'r'
793 DB_OPEN_NEW = 'n'
796 class AbstractDatabase(UserDict.DictMixin):
797 """An abstract base class for anydbm-based databases."""
799 def __init__(self, filename, mode):
800 """A convenience function for opening an anydbm database."""
801 # pybsddb3 has a bug which prevents it from working with
802 # Berkeley DB 4.2 if you open the db with 'n' ("new"). This
803 # causes the DB_TRUNCATE flag to be passed, which is disallowed
804 # for databases protected by lock and transaction support
805 # (bsddb databases use locking from bsddb version 4.2.4 onwards).
807 # Therefore, manually perform the removal (we can do this, because
808 # we know that for bsddb - but *not* anydbm in general - the database
809 # consists of one file with the name we specify, rather than several
810 # based on that name).
811 if mode == 'n' and anydbm._defaultmod.__name__ == 'dbhash':
812 if os.path.isfile(filename):
813 os.unlink(filename)
814 mode = 'c'
816 self.db = anydbm.open(filename, mode)
818 # Import implementations for many mapping interface methods.
819 # Note that we specifically do not do this for any method which handles
820 # *values*, because our derived classes may define __getitem__ and
821 # __setitem__ to override the storage of values, and grabbing methods
822 # directly from the dbm object would bypass this.
823 for meth_name in ('__delitem__', 'keys',
824 '__iter__', 'has_key', '__contains__', 'iterkeys', 'clear'):
825 meth_ref = getattr(self.db, meth_name, None)
826 if meth_ref:
827 setattr(self, meth_name, meth_ref)
829 def __delitem__(self, key):
830 "gdbm does not define a __delitem__ we can assign."
831 del self.db[key]
834 class SDatabase(AbstractDatabase):
835 """A database that can only store strings."""
837 def __getitem__(self, key):
838 return self.db[key]
840 def __setitem__(self, key, value):
841 self.db[key] = value
844 class Database(AbstractDatabase):
845 """A database that uses the marshal module to store built-in types."""
847 def __getitem__(self, key):
848 return marshal.loads(self.db[key])
850 def __setitem__(self, key, value):
851 self.db[key] = marshal.dumps(value)
854 class StatsKeeper:
855 __shared_state = { }
856 def __init__(self):
857 self.__dict__ = self.__shared_state
858 if self.__dict__:
859 return
860 self.filename = temp(STATISTICS_FILE)
861 Cleanup().register(self.filename, pass8)
862 # This can get kinda large, so we don't store it in our data dict.
863 self.repos_files = { }
865 if os.path.exists(self.filename):
866 self.unarchive()
867 else:
868 self.data = { 'cvs_revs_count' : 0,
869 'tags': { },
870 'branches' : { },
871 'repos_size' : 0,
872 'repos_file_count' : 0,
873 'svn_rev_count' : None,
874 'first_rev_date' : 1L<<32,
875 'last_rev_date' : 0,
876 'pass_timings' : { },
877 'start_time' : 0,
878 'end_time' : 0,
881 def log_duration_for_pass(self, duration, pass_num):
882 self.data['pass_timings'][pass_num] = duration
884 def set_start_time(self, start):
885 self.data['start_time'] = start
887 def set_end_time(self, end):
888 self.data['end_time'] = end
890 def _bump_item(self, key, amount=1):
891 self.data[key] = self.data[key] + amount
893 def reset_c_rev_info(self):
894 self.data['cvs_revs_count'] = 0
895 self.data['tags'] = { }
896 self.data['branches'] = { }
898 def record_c_rev(self, c_rev):
899 self._bump_item('cvs_revs_count')
901 for tag in c_rev.tags:
902 self.data['tags'][tag] = None
903 for branch in c_rev.branches:
904 self.data['branches'][branch] = None
906 if c_rev.timestamp < self.data['first_rev_date']:
907 self.data['first_rev_date'] = c_rev.timestamp
909 if c_rev.timestamp > self.data['last_rev_date']:
910 self.data['last_rev_date'] = c_rev.timestamp
912 # Only add the size if this is the first time we see the file.
913 if not self.repos_files.has_key(c_rev.fname):
914 self._bump_item('repos_size', c_rev.file_size)
915 self.repos_files[c_rev.fname] = None
917 self.data['repos_file_count'] = len(self.repos_files)
919 def set_svn_rev_count(self, count):
920 self.data['svn_rev_count'] = count
922 def svn_rev_count(self):
923 return self.data['svn_rev_count']
925 def archive(self):
926 open(self.filename, 'w').write(marshal.dumps(self.data))
928 def unarchive(self):
929 self.data = marshal.loads(open(self.filename, 'r').read())
931 def __str__(self):
932 svn_revs_str = ""
933 if self.data['svn_rev_count'] is not None:
934 svn_revs_str = ('Total SVN Commits: %10s\n'
935 % self.data['svn_rev_count'])
937 return ('\n' \
938 'cvs2svn Statistics:\n' \
939 '------------------\n' \
940 'Total CVS Files: %10i\n' \
941 'Total CVS Revisions: %10i\n' \
942 'Total Unique Tags: %10i\n' \
943 'Total Unique Branches: %10i\n' \
944 'CVS Repos Size in KB: %10i\n' \
945 '%s' \
946 'First Revision Date: %s\n' \
947 'Last Revision Date: %s\n' \
948 '------------------' \
949 % (self.data['repos_file_count'],
950 self.data['cvs_revs_count'],
951 len(self.data['tags']),
952 len(self.data['branches']),
953 (self.data['repos_size'] / 1024),
954 svn_revs_str,
955 time.ctime(self.data['first_rev_date']),
956 time.ctime(self.data['last_rev_date']),
959 def timings(self):
960 passes = self.data['pass_timings'].keys()
961 passes.sort()
962 str = 'Timings:\n------------------\n'
964 def desc(val):
965 if val == 1: return "second"
966 return "seconds"
968 for pass_num in passes:
969 duration = int(self.data['pass_timings'][pass_num])
970 p_str = ('pass %d:%6d %s\n'
971 % (pass_num, duration, desc(duration)))
972 str = str + p_str
974 total = int(self.data['end_time'] - self.data['start_time'])
975 str = str + ('total: %6d %s' % (total, desc(total)))
976 return str
979 class LastSymbolicNameDatabase:
980 """ Passing every CVSRevision in s-revs to this class will result in
981 a Database whose key is the last CVS Revision a symbolicname was
982 seen in, and whose value is a list of all symbolicnames that were
983 last seen in that revision."""
984 def __init__(self, mode):
985 self.symbols = {}
986 self.symbol_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB), mode)
987 Cleanup().register(temp(SYMBOL_LAST_CVS_REVS_DB), pass5)
989 # Once we've gone through all the revs,
990 # symbols.keys() will be a list of all tags and branches, and
991 # their corresponding values will be a key into the last CVS revision
992 # that they were used in.
993 def log_revision(self, c_rev):
994 # Gather last CVS Revision for symbolic name info and tag info
995 for tag in c_rev.tags:
996 self.symbols[tag] = c_rev.unique_key()
997 if c_rev.op is not OP_DELETE:
998 for branch in c_rev.branches:
999 self.symbols[branch] = c_rev.unique_key()
1001 # Creates an inversion of symbols above--a dictionary of lists (key
1002 # = CVS rev unique_key: val = list of symbols that close in that
1003 # rev.
1004 def create_database(self):
1005 for sym, rev_unique_key in self.symbols.items():
1006 ary = self.symbol_revs_db.get(rev_unique_key, [])
1007 ary.append(sym)
1008 self.symbol_revs_db[rev_unique_key] = ary
1011 class CVSRevisionDatabase:
1012 """A Database to store CVSRevision objects and retrieve them by their
1013 unique_key()."""
1015 def __init__(self, mode):
1016 """Initialize an instance, opening database in MODE (like the MODE
1017 argument to Database or anydbm.open())."""
1018 self.cvs_revs_db = SDatabase(temp(CVS_REVS_DB), mode)
1019 Cleanup().register(temp(CVS_REVS_DB), pass8)
1021 def log_revision(self, c_rev):
1022 """Add C_REV, a CVSRevision, to the database."""
1023 self.cvs_revs_db[c_rev.unique_key()] = str(c_rev)
1025 def get_revision(self, unique_key):
1026 """Return the CVSRevision stored under UNIQUE_KEY."""
1027 return CVSRevision(Ctx(), self.cvs_revs_db[unique_key])
1030 def TagsDatabase(mode):
1031 """A Database to store which symbolic names are tags.
1032 Each key is a tag name.
1033 The value has no meaning, and should be set to None."""
1034 db = SDatabase(temp(TAGS_DB), mode)
1035 Cleanup().register(temp(TAGS_DB), pass8)
1036 return db
1039 class Project:
1040 """A project within a CVS repository."""
1042 def __init__(self, project_cvs_repos_path,
1043 trunk_path, branches_path, tags_path):
1044 """Create a new Project record.
1046 PROJECT_CVS_REPOS_PATH is the main CVS directory for this project
1047 (within the filesystem). TRUNK_PATH, BRANCHES_PATH, and TAGS_PATH
1048 are the full, normalized directory names in svn for the
1049 corresponding part of the repository."""
1051 self.project_cvs_repos_path = project_cvs_repos_path
1052 prefix = Ctx().cvs_repository.cvs_repos_path
1053 if not self.project_cvs_repos_path.startswith(prefix):
1054 raise FatalError("Project '%s' must start with '%s'"
1055 % (self.project_cvs_repos_path, prefix,))
1056 # The project's main directory as a cvs_path:
1057 self.project_cvs_path = self.project_cvs_repos_path[len(prefix):]
1058 if self.project_cvs_path.startswith(os.sep):
1059 self.project_cvs_path = self.project_cvs_path[1:]
1060 self.trunk_path = trunk_path
1061 self.branches_path = branches_path
1062 self.tags_path = tags_path
1063 verify_paths_disjoint(self.trunk_path, self.branches_path, self.tags_path)
1065 def is_source(self, svn_path):
1066 """Return True iff SVN_PATH is a legitimate source for this project.
1068 Legitimate paths are self.trunk_path or any directory directly
1069 under self.branches_path."""
1071 if svn_path == self.trunk_path:
1072 return True
1074 (head, tail,) = _path_split(svn_path)
1075 if head == self.branches_path:
1076 return True
1078 return False
1080 def is_unremovable(self, svn_path):
1081 """Return True iff the specified path must not be removed."""
1083 return svn_path in [self.trunk_path, self.branches_path, self.tags_path]
1085 def get_branch_path(self, branch_name):
1086 """Return the svnpath for the branch named BRANCH_NAME."""
1088 return _path_join(self.branches_path, _clean_symbolic_name(branch_name))
1090 def get_tag_path(self, tag_name):
1091 """Return the svnpath for the tag named TAG_NAME."""
1093 return _path_join(self.tags_path, _clean_symbolic_name(tag_name))
1095 def _relative_name(self, cvs_path):
1096 """Convert CVS_PATH into a name relative to this project's root directory.
1098 CVS_PATH has to begin (textually) with self.project_cvs_path.
1099 Remove prefix and optional '/'."""
1101 if not cvs_path.startswith(self.project_cvs_path):
1102 raise FatalError(
1103 "_relative_name: '%s' is not a sub-path of '%s'"
1104 % (cvs_path, self.project_cvs_path,))
1105 l = len(self.project_cvs_path)
1106 if cvs_path[l] == os.sep:
1107 l += 1
1108 return cvs_path[l:]
1110 def make_trunk_path(self, cvs_path):
1111 """Return the trunk path for CVS_PATH.
1113 Return the svn path for this file on trunk."""
1115 return _path_join(self.trunk_path, self._relative_name(cvs_path))
1117 def make_branch_path(self, branch_name, cvs_path):
1118 """Return the svn path for CVS_PATH on branch BRANCH_NAME."""
1120 return _path_join(self.get_branch_path(branch_name),
1121 self._relative_name(cvs_path))
1124 class CVSRevision:
1125 def __init__(self, ctx, *args):
1126 """Initialize a new CVSRevision with Ctx object CTX, and ARGS.
1128 If CTX is None, the following members and methods of the
1129 instantiated CVSRevision class object will be unavailable (or
1130 simply will not work correctly, if at all):
1131 cvs_path
1132 svn_path
1133 is_default_branch_revision()
1135 (Note that this class treats CTX as const, because the caller
1136 likely passed in a Borg instance of a Ctx. The reason this class
1137 takes CTX as as a parameter, instead of just instantiating a Ctx
1138 itself, is that this class should be usable outside cvs2svn.)
1140 If there is one argument in ARGS, it is a string, in the format of
1141 a line from a revs file. Do *not* include a trailing newline.
1143 If there are multiple ARGS, there must be 17 of them,
1144 comprising a parsed revs line:
1145 timestamp --> (int) date stamp for this cvs revision
1146 digest --> (string) digest of author+logmsg
1147 prev_timestamp --> (int) date stamp for the previous cvs revision
1148 next_timestamp --> (int) date stamp for the next cvs revision
1149 op --> (char) OP_ADD, OP_CHANGE, or OP_DELETE
1150 prev_rev --> (string or None) previous CVS rev, e.g., "1.2"
1151 rev --> (string) this CVS rev, e.g., "1.3"
1152 next_rev --> (string or None) next CVS rev, e.g., "1.4"
1153 file_in_attic --> (char or None) true if RCS file is in Attic
1154 file_executable --> (char or None) true if RCS file has exec bit set.
1155 file_size --> (int) size of the RCS file
1156 deltatext_code --> (char) 'N' if non-empty deltatext, else 'E'
1157 fname --> (string) relative path of file in CVS repos
1158 mode --> (string or None) "kkv", "kb", etc.
1159 branch_name --> (string or None) branch on which this rev occurred
1160 tags --> (list of strings) all tags on this revision
1161 branches --> (list of strings) all branches rooted in this rev
1163 The two forms of initialization are equivalent.
1165 WARNING: Due to the resync process in pass2, prev_timestamp or
1166 next_timestamp may be incorrect in the c-revs or s-revs files."""
1168 self._ctx = ctx
1169 if len(args) == 17:
1170 (self.timestamp, self.digest, self.prev_timestamp, self.next_timestamp,
1171 self.op, self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
1172 self.file_executable, self.file_size, self.deltatext_code,
1173 self.fname,
1174 self.mode, self.branch_name, self.tags, self.branches) = args
1175 elif len(args) == 1:
1176 data = args[0].split(' ', 15)
1177 (self.timestamp, self.digest, self.prev_timestamp, self.next_timestamp,
1178 self.op, self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
1179 self.file_executable, self.file_size, self.deltatext_code,
1180 self.mode, self.branch_name, numtags, remainder) = data
1181 # Patch up data items which are not simple strings
1182 self.timestamp = int(self.timestamp, 16)
1183 if self.prev_timestamp == "*":
1184 self.prev_timestamp = 0
1185 else:
1186 self.prev_timestamp = int(self.prev_timestamp)
1187 if self.next_timestamp == "*":
1188 self.next_timestamp = 0
1189 else:
1190 self.next_timestamp = int(self.next_timestamp)
1191 if self.prev_rev == "*":
1192 self.prev_rev = None
1193 if self.next_rev == "*":
1194 self.next_rev = None
1195 if self.file_in_attic == "*":
1196 self.file_in_attic = None
1197 if self.file_executable == "*":
1198 self.file_executable = None
1199 self.file_size = int(self.file_size)
1200 if self.mode == "*":
1201 self.mode = None
1202 if self.branch_name == "*":
1203 self.branch_name = None
1204 numtags = int(numtags)
1205 tags_and_numbranches_and_remainder = remainder.split(' ', numtags + 1)
1206 self.tags = tags_and_numbranches_and_remainder[:-2]
1207 numbranches = int(tags_and_numbranches_and_remainder[-2])
1208 remainder = tags_and_numbranches_and_remainder[-1]
1209 branches_and_fname = remainder.split(' ', numbranches)
1210 self.branches = branches_and_fname[:-1]
1211 self.fname = branches_and_fname[-1]
1212 else:
1213 raise TypeError, 'CVSRevision() takes 2 or 18 arguments (%d given)' % \
1214 (len(args) + 1)
1215 if ctx is not None:
1216 self.cvs_path = ctx.cvs_repository.get_cvs_path(self.fname)
1217 if self.branch_name:
1218 self.svn_path = ctx.project.make_branch_path(self.branch_name,
1219 self.cvs_path)
1220 else:
1221 self.svn_path = ctx.project.make_trunk_path(self.cvs_path)
1223 # The 'primary key' of a CVS Revision is the revision number + the
1224 # filename. To provide a unique key (say, for a dict), we just glom
1225 # them together in a string. By passing in self.prev_rev or
1226 # self.next_rev, you can get the unique key for their respective
1227 # CVSRevisions.
1228 def unique_key(self, revnum="0"):
1229 if revnum is "0":
1230 revnum = self.rev
1231 elif revnum is None:
1232 return None
1233 return revnum + "/" + self.fname
1235 def __str__(self):
1236 return ('%08lx %s %s %s %s %s %s %s %s %s %d %s %s %s %d%s%s %d%s%s %s'
1237 % (self.timestamp, self.digest, self.prev_timestamp or "*",
1238 self.next_timestamp or "*", self.op, (self.prev_rev or "*"),
1239 self.rev, (self.next_rev or "*"), (self.file_in_attic or "*"),
1240 (self.file_executable or "*"),
1241 self.file_size,
1242 self.deltatext_code, (self.mode or "*"),
1243 (self.branch_name or "*"),
1244 len(self.tags), self.tags and " " or "", " ".join(self.tags),
1245 len(self.branches), self.branches and " " or "",
1246 " ".join(self.branches),
1247 self.fname, ))
1249 # Returns true if this CVSRevision is the opening CVSRevision for
1250 # NAME (for this RCS file).
1251 def opens_symbolic_name(self, name):
1252 if name in self.tags:
1253 return 1
1254 if name in self.branches:
1255 # If this c_rev opens a branch and our op is OP_DELETE, then
1256 # that means that the file that this c_rev belongs to was
1257 # created on the branch, so for all intents and purposes, this
1258 # c_rev is *technically* not an opening. See Issue #62 for more
1259 # information.
1260 if self.op != OP_DELETE:
1261 return 1
1262 return 0
1264 def is_default_branch_revision(self):
1265 """Return 1 if SELF.rev of SELF.cvs_path is a default branch
1266 revision according to DEFAULT_BRANCHES_DB (see the conditions
1267 documented there), else return None."""
1268 val = self._ctx._default_branches_db.get(self.cvs_path, None)
1269 if val is not None:
1270 val_last_dot = val.rindex(".")
1271 our_last_dot = self.rev.rindex(".")
1272 default_branch = val[:val_last_dot]
1273 our_branch = self.rev[:our_last_dot]
1274 default_rev_component = int(val[val_last_dot + 1:])
1275 our_rev_component = int(self.rev[our_last_dot + 1:])
1276 if (default_branch == our_branch
1277 and our_rev_component <= default_rev_component):
1278 return 1
1279 # else
1280 return None
1282 def rcs_path(self):
1283 """Returns the actual filesystem path to the RCS file of this
1284 CVSRevision."""
1285 if self.file_in_attic is None:
1286 return self.fname
1287 else:
1288 basepath, filename = os.path.split(self.fname)
1289 return os.path.join(basepath, 'Attic', filename)
1291 def filename(self):
1292 "Return the last path component of self.fname, minus the ',v'"
1293 return os.path.split(self.fname)[-1][:-2]
1295 class SymbolDatabase:
1296 """This database records information on all symbols in the RCS
1297 files. It is created in pass 1 and it is used in pass 2."""
1298 def __init__(self):
1299 # A hash that maps tag names to commit counts
1300 self.tags = { }
1301 # A hash that maps branch names to lists of the format
1302 # [ create_count, commit_count, blockers ], where blockers
1303 # is a hash that lists the symbols that depend on the
1304 # the branch. The blockers hash is used as a set, so the
1305 # values are not used.
1306 self.branches = { }
1308 def register_tag_creation(self, name):
1309 """Register the creation of the tag NAME."""
1310 self.tags[name] = self.tags.get(name, 0) + 1
1312 def _branch(self, name):
1313 """Helper function to get a branch node that will create and
1314 initialize the node if it does not exist."""
1315 if not self.branches.has_key(name):
1316 self.branches[name] = [ 0, 0, { } ]
1317 return self.branches[name]
1319 def register_branch_creation(self, name):
1320 """Register the creation of the branch NAME."""
1321 self._branch(name)[0] += 1
1323 def register_branch_commit(self, name):
1324 """Register a commit on the branch NAME."""
1325 self._branch(name)[1] += 1
1327 def register_branch_blocker(self, name, blocker):
1328 """Register BLOCKER as a blocker on the branch NAME."""
1329 self._branch(name)[2][blocker] = None
1331 def branch_has_commit(self, name):
1332 """Return non-zero if NAME has commits. Returns 0 if name
1333 is not a branch or if it has no commits."""
1334 return self.branches.has_key(name) and self.branches[name][1]
1336 def find_excluded_symbols(self, regexp_list):
1337 """Returns a hash of all symbols thaht match the regexps in
1338 REGEXP_LISTE. The hash is used as a set so the values are
1339 not used."""
1340 excludes = { }
1341 for tag in self.tags:
1342 if match_regexp_list(regexp_list, tag):
1343 excludes[tag] = None
1344 for branch in self.branches:
1345 if match_regexp_list(regexp_list, branch):
1346 excludes[branch] = None
1347 return excludes
1349 def find_branch_exclude_blockers(self, branch, excludes):
1350 """Find all blockers of BRANCH, excluding the ones in the hash
1351 EXCLUDES."""
1352 blockers = { }
1353 if excludes.has_key(branch):
1354 for blocker in self.branches[branch][2]:
1355 if not excludes.has_key(blocker):
1356 blockers[blocker] = None
1357 return blockers
1359 def find_blocked_excludes(self, excludes):
1360 """Find all branches not in EXCLUDES that have blocking symbols that
1361 are not themselves excluded. Return a hash that maps branch names
1362 to a hash of blockers. The hash of blockes is used as a set so the
1363 values are not used."""
1364 blocked_branches = { }
1365 for branch in self.branches:
1366 blockers = self.find_branch_exclude_blockers(branch, excludes)
1367 if blockers:
1368 blocked_branches[branch] = blockers
1369 return blocked_branches
1371 def find_mismatches(self, excludes=None):
1372 """Find all symbols that are defined as both tags and branches,
1373 excluding the ones in EXCLUDES. Returns a list of 4-tuples with
1374 the symbol name, tag count, branch count and commit count."""
1375 if excludes is None:
1376 excludes = { }
1377 mismatches = [ ]
1378 for branch in self.branches:
1379 if not excludes.has_key(branch) and self.tags.has_key(branch):
1380 mismatches.append((branch, # name
1381 self.tags[branch], # tag count
1382 self.branches[branch][0], # branch count
1383 self.branches[branch][1])) # commit count
1384 return mismatches
1386 def read(self):
1387 """Read the symbol database from files."""
1388 f = open(temp(TAGS_LIST))
1389 while 1:
1390 line = f.readline()
1391 if not line:
1392 break
1393 tag, count = line.split()
1394 self.tags[tag] = int(count)
1396 f = open(temp(BRANCHES_LIST))
1397 while 1:
1398 line = f.readline()
1399 if not line:
1400 break
1401 words = line.split()
1402 self.branches[words[0]] = [ int(words[1]), int(words[2]), { } ]
1403 for blocker in words[3:]:
1404 self.branches[words[0]][2][blocker] = None
1406 def write(self):
1407 """Store the symbol database to files."""
1408 f = open(temp(TAGS_LIST), "w")
1409 Cleanup().register(temp(TAGS_LIST), pass2)
1410 for tag, count in self.tags.items():
1411 f.write("%s %d\n" % (tag, count))
1413 f = open(temp(BRANCHES_LIST), "w")
1414 Cleanup().register(temp(BRANCHES_LIST), pass2)
1415 for branch, info in self.branches.items():
1416 f.write("%s %d %d" % (branch, info[0], info[1]))
1417 if info[2]:
1418 f.write(" ")
1419 f.write(" ".join(info[2].keys()))
1420 f.write("\n")
1422 class CollectData(cvs2svn_rcsparse.Sink):
1423 def __init__(self):
1424 self.revs = open(temp(DATAFILE + REVS_SUFFIX), 'w')
1425 Cleanup().register(temp(DATAFILE + REVS_SUFFIX), pass2)
1426 self.resync = open(temp(DATAFILE + RESYNC_SUFFIX), 'w')
1427 Cleanup().register(temp(DATAFILE + RESYNC_SUFFIX), pass2)
1428 self.default_branches_db = SDatabase(temp(DEFAULT_BRANCHES_DB),
1429 DB_OPEN_NEW)
1430 Cleanup().register(temp(DEFAULT_BRANCHES_DB), pass5)
1431 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_NEW)
1432 Cleanup().register(temp(METADATA_DB), pass8)
1433 self.fatal_errors = []
1434 self.num_files = 0
1435 self.symbol_db = SymbolDatabase()
1437 # 1 if we've collected data for at least one file, None otherwise.
1438 self.found_valid_file = None
1440 # See set_fname() for initializations of other variables.
1442 def set_fname(self, canonical_name, filename):
1443 """Prepare to receive data for FILENAME. FILENAME is the absolute
1444 filesystem path to the file in question, and CANONICAL_NAME is
1445 FILENAME with the 'Attic' component removed (if the file is indeed
1446 in the Attic) ."""
1447 self.fname = canonical_name
1449 # We calculate and save some file metadata here, where we can do
1450 # it only once per file, instead of waiting until later where we
1451 # would have to do the same calculations once per CVS *revision*.
1453 self.cvs_path = Ctx().cvs_repository.get_cvs_path(self.fname)
1455 # If the paths are not the same, then that means that the
1456 # canonical_name has had the 'Attic' component stripped out.
1457 self.file_in_attic = None
1458 if canonical_name != filename:
1459 self.file_in_attic = 1
1461 file_stat = os.stat(filename)
1462 # The size of our file in bytes
1463 self.file_size = file_stat[stat.ST_SIZE]
1465 # Whether or not the executable bit is set.
1466 self.file_executable = None
1467 if file_stat[0] & stat.S_IXUSR:
1468 self.file_executable = 1
1470 # revision -> [timestamp, author, old-timestamp]
1471 self.rev_data = { }
1473 # Maps revision number (key) to the revision number of the
1474 # previous revision along this line of development.
1476 # For the first revision R on a branch, we consider the revision
1477 # from which R sprouted to be the 'previous'.
1479 # Note that this revision can't be determined arithmetically (due
1480 # to cvsadmin -o, which is why this is necessary).
1482 # If the key has no previous revision, then store None as key's
1483 # value.
1484 self.prev_rev = { }
1486 # This dict is essentially self.prev_rev with the values mapped in
1487 # the other direction, so following key -> value will yield you
1488 # the next revision number.
1490 # Unlike self.prev_rev, if the key has no next revision, then the
1491 # key is not present.
1492 self.next_rev = { }
1494 # Track the state of each revision so that in set_revision_info,
1495 # we can determine if our op is an add/change/delete. We can do
1496 # this because in set_revision_info, we'll have all of the
1497 # revisions for a file at our fingertips, and we need to examine
1498 # the state of our prev_rev to determine if we're an add or a
1499 # change--without the state of the prev_rev, we are unable to
1500 # distinguish between an add and a change.
1501 self.rev_state = { }
1503 # Hash mapping branch numbers, like '1.7.2', to branch names,
1504 # like 'Release_1_0_dev'.
1505 self.branch_names = { }
1507 # RCS flags (used for keyword expansion).
1508 self.mode = None
1510 # Hash mapping revision numbers, like '1.7', to lists of names
1511 # indicating which branches sprout from that revision, like
1512 # ['Release_1_0_dev', 'experimental_driver', ...].
1513 self.branchlist = { }
1515 # Like self.branchlist, but the values are lists of tag names that
1516 # apply to the key revision.
1517 self.taglist = { }
1519 # If set, this is an RCS branch number -- rcsparse calls this the
1520 # "principal branch", but CVS and RCS refer to it as the "default
1521 # branch", so that's what we call it, even though the rcsparse API
1522 # setter method is still 'set_principal_branch'.
1523 self.default_branch = None
1525 # If the RCS file doesn't have a default branch anymore, but does
1526 # have vendor revisions, then we make an educated guess that those
1527 # revisions *were* the head of the default branch up until the
1528 # commit of 1.2, at which point the file's default branch became
1529 # trunk. This records the date at which 1.2 was committed.
1530 self.first_non_vendor_revision_date = None
1532 # A list of all symbols defined for the current file. Used to
1533 # prevent multiple definitions of a symbol, something which can
1534 # easily happen when --symbol-transform is used.
1535 self.defined_symbols = { }
1537 def set_principal_branch(self, branch):
1538 self.default_branch = branch
1540 def set_expansion(self, mode):
1541 self.mode = mode
1543 def set_branch_name(self, branch_number, name):
1544 """Record that BRANCH_NUMBER is the branch number for branch NAME,
1545 and that NAME sprouts from BRANCH_NUMBER .
1546 BRANCH_NUMBER is an RCS branch number with an odd number of components,
1547 for example '1.7.2' (never '1.7.0.2')."""
1548 if not self.branch_names.has_key(branch_number):
1549 self.branch_names[branch_number] = name
1550 # The branchlist is keyed on the revision number from which the
1551 # branch sprouts, so strip off the odd final component.
1552 sprout_rev = branch_number[:branch_number.rfind(".")]
1553 self.branchlist.setdefault(sprout_rev, []).append(name)
1554 self.symbol_db.register_branch_creation(name)
1555 else:
1556 sys.stderr.write("%s: in '%s':\n"
1557 " branch '%s' already has name '%s',\n"
1558 " cannot also have name '%s', ignoring the latter\n"
1559 % (warning_prefix, self.fname, branch_number,
1560 self.branch_names[branch_number], name))
1562 def rev_to_branch_name(self, revision):
1563 """Return the name of the branch on which REVISION lies.
1564 REVISION is a non-branch revision number with an even number of,
1565 components, for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
1566 For the convenience of callers, REVISION can also be a trunk
1567 revision such as '1.2', in which case just return None."""
1568 if trunk_rev.match(revision):
1569 return None
1570 return self.branch_names.get(revision[:revision.rindex(".")])
1572 def add_cvs_branch(self, revision, branch_name):
1573 """Record the root revision and branch revision for BRANCH_NAME,
1574 based on REVISION. REVISION is a CVS branch number having an even
1575 number of components where the second-to-last is '0'. For
1576 example, if it's '1.7.0.2', then record that BRANCH_NAME sprouts
1577 from 1.7 and has branch number 1.7.2."""
1578 last_dot = revision.rfind(".")
1579 branch_rev = revision[:last_dot]
1580 last2_dot = branch_rev.rfind(".")
1581 branch_rev = branch_rev[:last2_dot] + revision[last_dot:]
1582 self.set_branch_name(branch_rev, branch_name)
1584 def define_tag(self, name, revision):
1585 """Record a bidirectional mapping between symbolic NAME and REVISION.
1586 REVISION is an unprocessed revision number from the RCS file's
1587 header, for example: '1.7', '1.7.0.2', or '1.1.1' or '1.1.1.1'.
1588 This function will determine what kind of symbolic name it is by
1589 inspection, and record it in the right places."""
1590 for (pattern, replacement) in Ctx().symbol_transforms:
1591 newname = pattern.sub(replacement, name)
1592 if newname != name:
1593 Log().write(LOG_WARN, " symbol '%s' transformed to '%s'"
1594 % (name, newname))
1595 name = newname
1596 if self.defined_symbols.has_key(name):
1597 err = "%s: Multiple definitions of the symbol '%s' in '%s'" \
1598 % (error_prefix, name, self.fname)
1599 sys.stderr.write(err + "\n")
1600 self.fatal_errors.append(err)
1601 self.defined_symbols[name] = None
1602 if branch_tag.match(revision):
1603 self.add_cvs_branch(revision, name)
1604 elif vendor_tag.match(revision):
1605 self.set_branch_name(revision, name)
1606 else:
1607 self.taglist.setdefault(revision, []).append(name)
1608 self.symbol_db.register_tag_creation(name)
1610 def define_revision(self, revision, timestamp, author, state,
1611 branches, next):
1613 # Record the state of our revision for later calculations
1614 self.rev_state[revision] = state
1616 # store the rev_data as a list in case we have to jigger the timestamp
1617 self.rev_data[revision] = [int(timestamp), author, None]
1619 # When on trunk, the RCS 'next' revision number points to what
1620 # humans might consider to be the 'previous' revision number. For
1621 # example, 1.3's RCS 'next' is 1.2.
1623 # However, on a branch, the RCS 'next' revision number really does
1624 # point to what humans would consider to be the 'next' revision
1625 # number. For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
1627 # In other words, in RCS, 'next' always means "where to find the next
1628 # deltatext that you need this revision to retrieve.
1630 # That said, we don't *want* RCS's behavior here, so we determine
1631 # whether we're on trunk or a branch and set self.prev_rev
1632 # accordingly.
1634 # One last thing. Note that if REVISION is a branch revision,
1635 # instead of mapping REVISION to NEXT, we instead map NEXT to
1636 # REVISION. Since we loop over all revisions in the file before
1637 # doing anything with the data we gather here, this 'reverse
1638 # assignment' effectively does the following:
1640 # 1. Gives us no 'prev' value for REVISION (in this
1641 # iteration... it may have been set in a previous iteration)
1643 # 2. Sets the 'prev' value for the revision with number NEXT to
1644 # REVISION. So when we come around to the branch revision whose
1645 # revision value is NEXT, its 'prev' and 'prev_rev' are already
1646 # set.
1647 if trunk_rev.match(revision):
1648 self.prev_rev[revision] = next
1649 self.next_rev[next] = revision
1650 elif next:
1651 self.prev_rev[next] = revision
1652 self.next_rev[revision] = next
1654 for b in branches:
1655 self.prev_rev[b] = revision
1657 # Ratchet up the highest vendor head revision, if necessary.
1658 if self.default_branch:
1659 default_branch_root = self.default_branch + "."
1660 if ((revision.find(default_branch_root) == 0)
1661 and (default_branch_root.count('.') == revision.count('.'))):
1662 # This revision is on the default branch, so record that it is
1663 # the new highest default branch head revision.
1664 self.default_branches_db[self.cvs_path] = revision
1665 else:
1666 # No default branch, so make an educated guess.
1667 if revision == '1.2':
1668 # This is probably the time when the file stopped having a
1669 # default branch, so make a note of it.
1670 self.first_non_vendor_revision_date = timestamp
1671 else:
1672 m = vendor_revision.match(revision)
1673 if m and ((not self.first_non_vendor_revision_date)
1674 or (timestamp < self.first_non_vendor_revision_date)):
1675 # We're looking at a vendor revision, and it wasn't
1676 # committed after this file lost its default branch, so bump
1677 # the maximum trunk vendor revision in the permanent record.
1678 self.default_branches_db[self.cvs_path] = revision
1680 if not trunk_rev.match(revision):
1681 # Check for unlabeled branches, record them. We tried to collect
1682 # all branch names when we parsed the symbolic name header
1683 # earlier, of course, but that didn't catch unlabeled branches.
1684 # If a branch is unlabeled, this is our first encounter with it,
1685 # so we have to record its data now.
1686 branch_number = revision[:revision.rindex(".")]
1687 if not self.branch_names.has_key(branch_number):
1688 branch_name = "unlabeled-" + branch_number
1689 self.set_branch_name(branch_number, branch_name)
1691 # Register the commit on this non-trunk branch
1692 branch_name = self.branch_names[branch_number]
1693 self.symbol_db.register_branch_commit(branch_name)
1695 def tree_completed(self):
1696 "The revision tree has been parsed. Analyze it for consistency."
1698 # Our algorithm depends upon the timestamps on the revisions occuring
1699 # monotonically over time. That is, we want to see rev 1.34 occur in
1700 # time before rev 1.35. If we inserted 1.35 *first* (due to the time-
1701 # sorting), and then tried to insert 1.34, we'd be screwed.
1703 # to perform the analysis, we'll simply visit all of the 'previous'
1704 # links that we have recorded and validate that the timestamp on the
1705 # previous revision is before the specified revision
1707 # if we have to resync some nodes, then we restart the scan. just keep
1708 # looping as long as we need to restart.
1709 while 1:
1710 for current, prev in self.prev_rev.items():
1711 if not prev:
1712 # no previous revision exists (i.e. the initial revision)
1713 continue
1714 t_c = self.rev_data[current][0]
1715 t_p = self.rev_data[prev][0]
1716 if t_p >= t_c:
1717 # the previous revision occurred later than the current revision.
1718 # shove the previous revision back in time (and any before it that
1719 # may need to shift).
1721 # We sync backwards and not forwards because any given CVS
1722 # Revision has only one previous revision. However, a CVS
1723 # Revision can *be* a previous revision for many other
1724 # revisions (e.g., a revision that is the source of multiple
1725 # branches). This becomes relevant when we do the secondary
1726 # synchronization in pass 2--we can make certain that we
1727 # don't resync a revision earlier than it's previous
1728 # revision, but it would be non-trivial to make sure that we
1729 # don't resync revision R *after* any revisions that have R
1730 # as a previous revision.
1731 while t_p >= t_c:
1732 self.rev_data[prev][0] = t_c - 1 # new timestamp
1733 self.rev_data[prev][2] = t_p # old timestamp
1734 delta = t_c - 1 - t_p
1735 msg = "PASS1 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
1736 % (self.cvs_path, prev, time.ctime(t_p), delta)
1737 Log().write(LOG_VERBOSE, msg)
1738 if (delta > COMMIT_THRESHOLD
1739 or delta < (COMMIT_THRESHOLD * -1)):
1740 str = "%s: Significant timestamp change for '%s' (%d seconds)"
1741 Log().write(LOG_WARN,
1742 str % (warning_prefix, self.cvs_path, delta))
1743 current = prev
1744 prev = self.prev_rev[current]
1745 if not prev:
1746 break
1747 t_c = t_c - 1 # self.rev_data[current][0]
1748 t_p = self.rev_data[prev][0]
1750 # break from the for-loop
1751 break
1752 else:
1753 # finished the for-loop (no resyncing was performed)
1754 return
1756 def set_revision_info(self, revision, log, text):
1757 timestamp, author, old_ts = self.rev_data[revision]
1758 digest = sha.new(log + '\0' + author).hexdigest()
1759 if old_ts:
1760 # the timestamp on this revision was changed. log it for later
1761 # resynchronization of other files's revisions that occurred
1762 # for this time and log message.
1763 self.resync.write('%08lx %s %08lx\n' % (old_ts, digest, timestamp))
1765 # "...Give back one kadam to honor the Hebrew God whose Ark this is."
1766 # -- Imam to Indy and Sallah, in 'Raiders of the Lost Ark'
1768 # If revision 1.1 appears to have been created via 'cvs add'
1769 # instead of 'cvs import', then this file probably never had a
1770 # default branch, so retroactively remove its record in the
1771 # default branches db. The test is that the log message CVS uses
1772 # for 1.1 in imports is "Initial revision\n" with no period.
1773 if revision == '1.1' and log != 'Initial revision\n':
1774 try:
1775 del self.default_branches_db[self.cvs_path]
1776 except KeyError:
1777 pass
1779 # Get the timestamps of the previous and next revisions
1780 prev_rev = self.prev_rev[revision]
1781 prev_timestamp, ign, ign = self.rev_data.get(prev_rev, [0, None, None])
1783 next_rev = self.next_rev.get(revision)
1784 next_timestamp, ign, ign = self.rev_data.get(next_rev, [0, None, None])
1786 # How to tell if a CVSRevision is an add, a change, or a deletion:
1788 # It's a delete if RCS state is 'dead'
1790 # It's an add if RCS state is 'Exp.' and
1791 # - we either have no previous revision
1792 # or
1793 # - we have a previous revision whose state is 'dead'
1795 # Anything else is a change.
1796 if self.rev_state[revision] == 'dead':
1797 op = OP_DELETE
1798 elif ((self.prev_rev.get(revision, None) is None)
1799 or (self.rev_state[self.prev_rev[revision]] == 'dead')):
1800 op = OP_ADD
1801 else:
1802 op = OP_CHANGE
1804 def is_branch_revision(rev):
1805 """Return True if this revision is not a trunk revision,
1806 else return False."""
1807 if rev.count('.') >= 3:
1808 return True
1809 return False
1811 def is_same_line_of_development(rev1, rev2):
1812 """Return True if rev1 and rev2 are on the same line of
1813 development (i.e., both on trunk, or both on the same branch);
1814 return False otherwise. Either rev1 or rev2 can be None, in
1815 which case automatically return False."""
1816 if rev1 is None or rev2 is None:
1817 return False
1818 if rev1.count('.') == 1 and rev2.count('.') == 1:
1819 return True
1820 if rev1[0:rev1.rfind('.')] == rev2[0:rev2.rfind('.')]:
1821 return True
1822 return False
1824 # There can be an odd situation where the tip revision of a branch
1825 # is alive, but every predecessor on the branch is in state 'dead',
1826 # yet the revision from which the branch sprouts is alive. (This
1827 # is sort of a mirror image of the more common case of adding a
1828 # file on a branch, in which the first revision on the branch is
1829 # alive while the revision from which it sprouts is dead.)
1831 # In this odd situation, we must mark the first live revision on
1832 # the branch as an OP_CHANGE instead of an OP_ADD, because it
1833 # reflects, however indirectly, a change w.r.t. the source
1834 # revision from which the branch sprouts.
1836 # This is issue #89.
1837 cur_num = revision
1838 if is_branch_revision(revision) and self.rev_state[revision] != 'dead':
1839 while 1:
1840 prev_num = self.prev_rev.get(cur_num, None)
1841 if not cur_num or not prev_num:
1842 break
1843 if (not is_same_line_of_development(cur_num, prev_num)
1844 and self.rev_state[cur_num] == 'dead'
1845 and self.rev_state[prev_num] != 'dead'):
1846 op = OP_CHANGE
1847 cur_num = self.prev_rev.get(cur_num, None)
1849 if text:
1850 deltatext_code = DELTATEXT_NONEMPTY
1851 else:
1852 deltatext_code = DELTATEXT_EMPTY
1854 c_rev = CVSRevision(Ctx(), timestamp, digest, prev_timestamp,
1855 next_timestamp, op,
1856 prev_rev, revision, next_rev,
1857 self.file_in_attic, self.file_executable,
1858 self.file_size,
1859 deltatext_code, self.fname,
1860 self.mode, self.rev_to_branch_name(revision),
1861 self.taglist.get(revision, []),
1862 self.branchlist.get(revision, []))
1863 self.revs.write(str(c_rev) + "\n")
1864 StatsKeeper().record_c_rev(c_rev)
1866 if not self.metadata_db.has_key(digest):
1867 self.metadata_db[digest] = (author, log)
1869 def parse_completed(self):
1870 # Walk through all branches and tags and register them with
1871 # their parent branch in the symbol database.
1872 for revision, symbols in self.taglist.items() + self.branchlist.items():
1873 for symbol in symbols:
1874 name = self.rev_to_branch_name(revision)
1875 if name is not None:
1876 self.symbol_db.register_branch_blocker(name, symbol)
1878 self.num_files = self.num_files + 1
1880 def write_symbol_db(self):
1881 self.symbol_db.write()
1883 class SymbolingsLogger:
1884 """Manage the file that contains lines for symbol openings and
1885 closings.
1887 This data will later be used to determine valid SVNRevision ranges
1888 from which a file can be copied when creating a branch or tag in
1889 Subversion. Do this by finding "Openings" and "Closings" for each
1890 file copied onto a branch or tag.
1892 An "Opening" is the CVSRevision from which a given branch/tag
1893 sprouts on a path.
1895 The "Closing" for that branch/tag and path is the next CVSRevision
1896 on the same line of development as the opening.
1898 For example, on file 'foo.c', branch BEE has branch number 1.2.2 and
1899 obviously sprouts from revision 1.2. Therefore, 1.2 is the opening
1900 for BEE on path 'foo.c', and 1.3 is the closing for BEE on path
1901 'foo.c'. Note that there may be many revisions chronologically
1902 between 1.2 and 1.3, for example, revisions on branches of 'foo.c',
1903 perhaps even including on branch BEE itself. But 1.3 is the next
1904 revision *on the same line* as 1.2, that is why it is the closing
1905 revision for those symbolic names of which 1.2 is the opening.
1907 The reason for doing all this hullabaloo is to make branch and tag
1908 creation as efficient as possible by minimizing the number of copies
1909 and deletes per creation. For example, revisions 1.2 and 1.3 of
1910 foo.c might correspond to revisions 17 and 30 in Subversion. That
1911 means that when creating branch BEE, there is some motivation to do
1912 the copy from one of 17-30. Now if there were another file,
1913 'bar.c', whose opening and closing CVSRevisions for BEE corresponded
1914 to revisions 24 and 39 in Subversion, we would know that the ideal
1915 thing would be to copy the branch from somewhere between 24 and 29,
1916 inclusive.
1918 def __init__(self):
1919 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS), 'w')
1920 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS), pass6)
1921 self.closings = open(temp(SYMBOL_CLOSINGS_TMP), 'w')
1922 Cleanup().register(temp(SYMBOL_CLOSINGS_TMP), pass5)
1924 # This keys of this dictionary are *source* cvs_paths for which
1925 # we've encountered an 'opening' on the default branch. The
1926 # values are the (uncleaned) symbolic names that this path has
1927 # opened.
1928 self.open_paths_with_default_branches = { }
1930 def log_revision(self, c_rev, svn_revnum):
1931 """Log any openings found in C_REV, and if C_REV.next_rev is not
1932 None, a closing. The opening uses SVN_REVNUM, but the closing (if
1933 any) will have its revnum determined later."""
1934 for name in c_rev.tags + c_rev.branches:
1935 self._note_default_branch_opening(c_rev, name)
1936 if c_rev.op != OP_DELETE:
1937 self._log(name, svn_revnum,
1938 c_rev.cvs_path, c_rev.branch_name, OPENING)
1940 # If our c_rev has a next_rev, then that's the closing rev for
1941 # this source revision. Log it to closings for later processing
1942 # since we don't know the svn_revnum yet.
1943 if c_rev.next_rev is not None:
1944 self.closings.write('%s %s\n' %
1945 (name, c_rev.unique_key(c_rev.next_rev)))
1947 def _log(self, name, svn_revnum, cvs_path, branch_name, type):
1948 """Write out a single line to the symbol_openings_closings file
1949 representing that SVN_REVNUM of SVN_PATH on BRANCH_NAME is either the
1950 opening or closing (TYPE) of NAME (a symbolic name).
1952 TYPE should only be one of the following global constants:
1953 OPENING or CLOSING."""
1954 # 8 places gives us 999,999,999 SVN revs. That *should* be enough.
1955 self.symbolings.write(
1956 '%s %.8d %s %s %s\n'
1957 % (name, svn_revnum, type, branch_name or '*', cvs_path))
1959 def close(self):
1960 """Iterate through the closings file, lookup the svn_revnum for
1961 each closing CVSRevision, and write a proper line out to the
1962 symbolings file."""
1963 # Use this to get the c_rev of our rev_key
1964 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_READ)
1966 self.closings.close()
1967 for line in fileinput.FileInput(temp(SYMBOL_CLOSINGS_TMP)):
1968 (name, rev_key) = line.rstrip().split(" ", 1)
1969 svn_revnum = Ctx()._persistence_manager.get_svn_revnum(rev_key)
1971 c_rev = cvs_revs_db.get_revision(rev_key)
1972 self._log(name, svn_revnum, c_rev.cvs_path, c_rev.branch_name, CLOSING)
1974 self.symbolings.close()
1976 def _note_default_branch_opening(self, c_rev, symbolic_name):
1977 """If C_REV is a default branch revision, log C_REV.cvs_path as an
1978 opening for SYMBOLIC_NAME."""
1979 self.open_paths_with_default_branches.setdefault(
1980 c_rev.cvs_path, []).append(symbolic_name)
1982 def log_default_branch_closing(self, c_rev, svn_revnum):
1983 """If self.open_paths_with_default_branches contains
1984 C_REV.cvs_path, then call log each name in
1985 self.open_paths_with_default_branches[C_REV.cvs_path] as a closing
1986 with SVN_REVNUM as the closing revision number."""
1987 path = c_rev.cvs_path
1988 if self.open_paths_with_default_branches.has_key(path):
1989 # log each symbol as a closing
1990 for name in self.open_paths_with_default_branches[path]:
1991 self._log(name, svn_revnum, path, None, CLOSING)
1992 # Remove them from the openings list as we're done with them.
1993 del self.open_paths_with_default_branches[path]
1996 class PersistenceManager:
1997 """The PersistenceManager allows us to effectively store SVNCommits
1998 to disk and retrieve them later using only their subversion revision
1999 number as the key. It also returns the subversion revision number
2000 for a given CVSRevision's unique key.
2002 All information pertinent to each SVNCommit is stored in a series of
2003 on-disk databases so that SVNCommits can be retrieved on-demand.
2005 MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
2006 In 'new' mode, PersistenceManager will initialize a new set of on-disk
2007 databases and be fully-featured.
2008 In 'read' mode, PersistenceManager will open existing on-disk databases
2009 and the set_* methods will be unavailable."""
2010 def __init__(self, mode):
2011 self.mode = mode
2012 if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
2013 raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
2014 self.svn2cvs_db = Database(temp(SVN_REVNUMS_TO_CVS_REVS), mode)
2015 Cleanup().register(temp(SVN_REVNUMS_TO_CVS_REVS), pass8)
2016 self.cvs2svn_db = Database(temp(CVS_REVS_TO_SVN_REVNUMS), mode)
2017 Cleanup().register(temp(CVS_REVS_TO_SVN_REVNUMS), pass8)
2018 self.svn_commit_names_dates = Database(temp(SVN_COMMIT_NAMES_DATES), mode)
2019 Cleanup().register(temp(SVN_COMMIT_NAMES_DATES), pass8)
2020 self.svn_commit_metadata = Database(temp(METADATA_DB), DB_OPEN_READ)
2021 self.cvs_revisions = CVSRevisionDatabase(DB_OPEN_READ)
2022 ###PERF kff Elsewhere there are comments about sucking the tags db
2023 ### into memory. That seems like a good idea.
2024 if not Ctx().trunk_only:
2025 self.tags_db = TagsDatabase(DB_OPEN_READ)
2026 self.motivating_revnums = SDatabase(temp(MOTIVATING_REVNUMS), mode)
2027 Cleanup().register(temp(MOTIVATING_REVNUMS), pass8)
2029 # "branch_name" -> svn_revnum in which branch was last filled.
2030 # This is used by CVSCommit._pre_commit, to prevent creating a fill
2031 # revision which would have nothing to do.
2032 self.last_filled = {}
2034 def get_svn_revnum(self, cvs_rev_unique_key):
2035 """Return the Subversion revision number in which
2036 CVS_REV_UNIQUE_KEY was committed, or SVN_INVALID_REVNUM if there
2037 is no mapping for CVS_REV_UNIQUE_KEY."""
2038 return int(self.cvs2svn_db.get(cvs_rev_unique_key, SVN_INVALID_REVNUM))
2040 def get_svn_commit(self, svn_revnum):
2041 """Return an SVNCommit that corresponds to SVN_REVNUM.
2043 If no SVNCommit exists for revnum SVN_REVNUM, then return None.
2045 This method can throw SVNCommitInternalInconsistencyError.
2047 svn_commit = SVNCommit("Retrieved from disk", svn_revnum)
2048 c_rev_keys = self.svn2cvs_db.get(str(svn_revnum), None)
2049 if c_rev_keys == None:
2050 return None
2052 digest = None
2053 for key in c_rev_keys:
2054 c_rev = self.cvs_revisions.get_revision(key)
2055 svn_commit.add_revision(c_rev)
2056 # Set the author and log message for this commit by using
2057 # CVSRevision metadata, but only if haven't done so already.
2058 if digest is None:
2059 digest = c_rev.digest
2060 author, log_msg = self.svn_commit_metadata[digest]
2061 svn_commit.set_author(author)
2062 svn_commit.set_log_msg(log_msg)
2064 # If we're doing a trunk-only conversion, we don't need to do any more
2065 # work.
2066 if Ctx().trunk_only:
2067 return svn_commit
2069 name, date = self._get_name_and_date(svn_revnum)
2070 if name:
2071 svn_commit.set_symbolic_name(name)
2072 svn_commit.set_date(date)
2073 if self.tags_db.has_key(name):
2074 svn_commit.is_tag = 1
2076 motivating_revnum = self.motivating_revnums.get(str(svn_revnum), None)
2077 if motivating_revnum:
2078 svn_commit.set_motivating_revnum(int(motivating_revnum))
2079 svn_commit.set_date(date)
2081 if len(svn_commit.cvs_revs) and name:
2082 raise SVNCommit.SVNCommitInternalInconsistencyError(
2083 "An SVNCommit cannot have cvs_revisions *and* a corresponding\n"
2084 "symbolic name ('%s') to fill."
2085 % (_clean_symbolic_name(name),))
2087 return svn_commit
2089 def set_cvs_revs(self, svn_revnum, cvs_revs):
2090 """Record the bidirectional mapping between SVN_REVNUM and
2091 CVS_REVS."""
2092 if self.mode == DB_OPEN_READ:
2093 raise RuntimeError, \
2094 'Write operation attempted on read-only PersistenceManager'
2095 for c_rev in cvs_revs:
2096 Log().write(LOG_VERBOSE, " ", c_rev.unique_key())
2097 self.svn2cvs_db[str(svn_revnum)] = [x.unique_key() for x in cvs_revs]
2098 for c_rev in cvs_revs:
2099 self.cvs2svn_db[c_rev.unique_key()] = svn_revnum
2101 def set_name_and_date(self, svn_revnum, name, date):
2102 """Associate symbolic name NAME and DATE with SVN_REVNUM.
2104 NAME is allowed to be None."""
2106 if self.mode == DB_OPEN_READ:
2107 raise RuntimeError, \
2108 'Write operation attempted on read-only PersistenceManager'
2109 self.svn_commit_names_dates[str(svn_revnum)] = (name, date)
2110 self.last_filled[name] = svn_revnum
2112 def _get_name_and_date(self, svn_revnum):
2113 """Return a tuple containing the symbolic name and date associated
2114 with SVN_REVNUM, or (None, None) if SVN_REVNUM has no such data
2115 associated with it."""
2116 return self.svn_commit_names_dates.get(str(svn_revnum), (None, None))
2118 def set_motivating_revnum(self, svn_revnum, motivating_revnum):
2119 """Store MOTIVATING_REVNUM as the value of SVN_REVNUM"""
2120 if self.mode == DB_OPEN_READ:
2121 raise RuntimeError, \
2122 'Write operation attempted on read-only PersistenceManager'
2123 self.motivating_revnums[str(svn_revnum)] = str(motivating_revnum)
2126 class CVSCommit:
2127 """Each instance of this class contains a number of CVS Revisions
2128 that correspond to one or more Subversion Commits. After all CVS
2129 Revisions are added to the grouping, calling process_revisions will
2130 generate a Subversion Commit (or Commits) for the set of CVS
2131 Revisions in the grouping."""
2133 def __init__(self, digest, author, log):
2134 self.digest = digest
2135 self.author = author
2136 self.log = log
2138 # Symbolic names for which the last source revision has already
2139 # been seen and for which the CVSRevisionAggregator has already
2140 # generated a fill SVNCommit. See self.process_revisions().
2141 self.done_symbols = [ ]
2143 self.files = { }
2144 # Lists of CVSRevisions
2145 self.changes = [ ]
2146 self.deletes = [ ]
2148 # Start out with a t_min higher than any incoming time T, and a
2149 # t_max lower than any incoming T. This way the first T will
2150 # push t_min down to T, and t_max up to T, naturally (without any
2151 # special-casing), and successive times will then ratchet them
2152 # outward as appropriate.
2153 self.t_min = 1L<<32
2154 self.t_max = 0
2156 # This will be set to the SVNCommit that occurs in self._commit.
2157 self.motivating_commit = None
2159 # This is a list of all non-primary commits motivated by the main
2160 # commit. We gather these so that we can set their dates to the
2161 # same date as the primary commit.
2162 self.secondary_commits = [ ]
2164 # State for handling default branches.
2166 # Here is a tempting, but ultimately nugatory, bit of logic, which
2167 # I share with you so you may appreciate the less attractive, but
2168 # refreshingly non-nugatory, logic which follows it:
2170 # If some of the commits in this txn happened on a non-trunk
2171 # default branch, then those files will have to be copied into
2172 # trunk manually after being changed on the branch (because the
2173 # RCS "default branch" appears as head, i.e., trunk, in practice).
2174 # As long as those copies don't overwrite any trunk paths that
2175 # were also changed in this commit, then we can do the copies in
2176 # the same revision, because they won't cover changes that don't
2177 # appear anywhere/anywhen else. However, if some of the trunk dst
2178 # paths *did* change in this commit, then immediately copying the
2179 # branch changes would lose those trunk mods forever. So in this
2180 # case, we need to do at least that copy in its own revision. And
2181 # for simplicity's sake, if we're creating the new revision for
2182 # even one file, then we just do all such copies together in the
2183 # new revision.
2185 # Doesn't that sound nice?
2187 # Unfortunately, Subversion doesn't support copies with sources
2188 # in the current txn. All copies must be based in committed
2189 # revisions. Therefore, we generate the above-described new
2190 # revision unconditionally.
2192 # This is a list of c_revs, and a c_rev is appended for each
2193 # default branch commit that will need to be copied to trunk (or
2194 # deleted from trunk) in some generated revision following the
2195 # "regular" revision.
2196 self.default_branch_cvs_revisions = [ ]
2198 def __cmp__(self, other):
2199 # Commits should be sorted by t_max. If both self and other have
2200 # the same t_max, break the tie using t_min, and lastly, digest
2201 return (cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
2202 or cmp(self.digest, other.digest))
2204 def has_file(self, fname):
2205 return self.files.has_key(fname)
2207 def revisions(self):
2208 return self.changes + self.deletes
2210 def opens_symbolic_name(self, name):
2211 """Returns true if any CVSRevision in this commit is on a tag or a
2212 branch or is the origin of a tag or branch."""
2213 for c_rev in self.revisions():
2214 if c_rev.opens_symbolic_name(name):
2215 return 1
2216 return 0
2218 def add_revision(self, c_rev):
2219 # Record the time range of this commit.
2221 # ### ISSUE: It's possible, though unlikely, that the time range
2222 # of a commit could get gradually expanded to be arbitrarily
2223 # longer than COMMIT_THRESHOLD. I'm not sure this is a huge
2224 # problem, and anyway deciding where to break it up would be a
2225 # judgement call. For now, we just print a warning in commit() if
2226 # this happens.
2227 if c_rev.timestamp < self.t_min:
2228 self.t_min = c_rev.timestamp
2229 if c_rev.timestamp > self.t_max:
2230 self.t_max = c_rev.timestamp
2232 if c_rev.op == OP_DELETE:
2233 self.deletes.append(c_rev)
2234 else:
2235 # OP_CHANGE or OP_ADD
2236 self.changes.append(c_rev)
2238 self.files[c_rev.fname] = 1
2240 def _pre_commit(self):
2241 """Generates any SVNCommits that must exist before the main
2242 commit."""
2244 # There may be multiple c_revs in this commit that would cause
2245 # branch B to be filled, but we only want to fill B once. On the
2246 # other hand, there might be multiple branches committed on in
2247 # this commit. Whatever the case, we should count exactly one
2248 # commit per branch, because we only fill a branch once per
2249 # CVSCommit. This list tracks which branches we've already
2250 # counted.
2251 accounted_for_sym_names = [ ]
2253 def fill_needed(c_rev, pm):
2254 """Return 1 if this is the first commit on a new branch (for
2255 this file) and we need to fill the branch; else return 0
2256 (meaning that some other file's first commit on the branch has
2257 already done the fill for us).
2259 If C_REV.op is OP_ADD, only return 1 if the branch that this
2260 commit is on has no last filled revision.
2262 PM is a PersistenceManager to query.
2265 # Different '.' counts indicate that c_rev is now on a different
2266 # line of development (and may need a fill)
2267 if c_rev.rev.count('.') != c_rev.prev_rev.count('.'):
2268 svn_revnum = pm.get_svn_revnum(c_rev.unique_key(c_rev.prev_rev))
2269 # It should be the case that when we have a file F that
2270 # is added on branch B (thus, F on trunk is in state
2271 # 'dead'), we generate an SVNCommit to fill B iff the branch
2272 # has never been filled before.
2274 # If this c_rev.op == OP_ADD, *and* the branch has never
2275 # been filled before, then fill it now. Otherwise, no need to
2276 # fill it.
2277 if c_rev.op == OP_ADD:
2278 if pm.last_filled.get(c_rev.branch_name, None) is None:
2279 return 1
2280 elif c_rev.op == OP_CHANGE:
2281 if svn_revnum > pm.last_filled.get(c_rev.branch_name, 0):
2282 return 1
2283 elif c_rev.op == OP_DELETE:
2284 if pm.last_filled.get(c_rev.branch_name, None) is None:
2285 return 1
2286 return 0
2288 for c_rev in self.changes + self.deletes:
2289 # If a commit is on a branch, we must ensure that the branch
2290 # path being committed exists (in HEAD of the Subversion
2291 # repository). If it doesn't exist, we will need to fill the
2292 # branch. After the fill, the path on which we're committing
2293 # will exist.
2294 if c_rev.branch_name \
2295 and c_rev.branch_name not in accounted_for_sym_names \
2296 and c_rev.branch_name not in self.done_symbols \
2297 and fill_needed(c_rev, Ctx()._persistence_manager):
2298 svn_commit = SVNCommit("pre-commit symbolic name '%s'"
2299 % c_rev.branch_name)
2300 svn_commit.set_symbolic_name(c_rev.branch_name)
2301 self.secondary_commits.append(svn_commit)
2302 accounted_for_sym_names.append(c_rev.branch_name)
2304 def _commit(self):
2305 """Generates the primary SVNCommit that corresponds to this
2306 CVSCommit."""
2307 # Generate an SVNCommit unconditionally. Even if the only change
2308 # in this CVSCommit is a deletion of an already-deleted file (that
2309 # is, a CVS revision in state 'dead' whose predecessor was also in
2310 # state 'dead'), the conversion will still generate a Subversion
2311 # revision containing the log message for the second dead
2312 # revision, because we don't want to lose that information.
2313 svn_commit = SVNCommit("commit")
2314 self.motivating_commit = svn_commit
2316 for c_rev in self.changes:
2317 svn_commit.add_revision(c_rev)
2318 # Only make a change if we need to. When 1.1.1.1 has an empty
2319 # deltatext, the explanation is almost always that we're looking
2320 # at an imported file whose 1.1 and 1.1.1.1 are identical. On
2321 # such imports, CVS creates an RCS file where 1.1 has the
2322 # content, and 1.1.1.1 has an empty deltatext, i.e, the same
2323 # content as 1.1. There's no reason to reflect this non-change
2324 # in the repository, so we want to do nothing in this case. (If
2325 # we were really paranoid, we could make sure 1.1's log message
2326 # is the CVS-generated "Initial revision\n", but I think the
2327 # conditions below are strict enough.)
2328 if not ((c_rev.deltatext_code == DELTATEXT_EMPTY)
2329 and (c_rev.rev == "1.1.1.1")):
2330 if c_rev.is_default_branch_revision():
2331 self.default_branch_cvs_revisions.append(c_rev)
2333 for c_rev in self.deletes:
2334 # When a file is added on a branch, CVS not only adds the file
2335 # on the branch, but generates a trunk revision (typically
2336 # 1.1) for that file in state 'dead'. We only want to add
2337 # this revision if the log message is not the standard cvs
2338 # fabricated log message.
2339 if c_rev.prev_rev is None:
2340 # c_rev.branches may be empty if the originating branch
2341 # has been excluded.
2342 if not c_rev.branches:
2343 continue
2344 cvs_generated_msg = ('file %s was initially added on branch %s.\n'
2345 % (c_rev.filename(),
2346 c_rev.branches[0]))
2347 author, log_msg = \
2348 Ctx()._persistence_manager.svn_commit_metadata[c_rev.digest]
2349 if log_msg == cvs_generated_msg:
2350 continue
2352 svn_commit.add_revision(c_rev)
2353 if c_rev.is_default_branch_revision():
2354 self.default_branch_cvs_revisions.append(c_rev)
2356 # There is a slight chance that we didn't actually register any
2357 # CVSRevisions with our SVNCommit (see loop over self.deletes
2358 # above), so if we have no CVSRevisions, we don't flush the
2359 # svn_commit to disk and roll back our revnum.
2360 if len(svn_commit.cvs_revs) > 0:
2361 svn_commit.flush()
2362 else:
2363 # We will not be flushing this SVNCommit, so rollback the
2364 # SVNCommit revision counter.
2365 SVNCommit.revnum = SVNCommit.revnum - 1
2367 if not Ctx().trunk_only:
2368 for c_rev in self.revisions():
2369 Ctx()._symbolings_logger.log_revision(c_rev, svn_commit.revnum)
2371 def _post_commit(self):
2372 """Generates any SVNCommits that we can perform now that _commit
2373 has happened. That is, handle non-trunk default branches.
2374 Sometimes an RCS file has a non-trunk default branch, so a commit
2375 on that default branch would be visible in a default CVS checkout
2376 of HEAD. If we don't copy that commit over to Subversion's trunk,
2377 then there will be no Subversion tree which corresponds to that
2378 CVS checkout. Of course, in order to copy the path over, we may
2379 first need to delete the existing trunk there. """
2381 # Only generate a commit if we have default branch revs
2382 if len(self.default_branch_cvs_revisions):
2383 # Generate an SVNCommit for all of our default branch c_revs.
2384 svn_commit = SVNCommit("post-commit default branch(es)")
2385 svn_commit.set_motivating_revnum(self.motivating_commit.revnum)
2386 for c_rev in self.default_branch_cvs_revisions:
2387 svn_commit.add_revision(c_rev)
2388 Ctx()._symbolings_logger.log_default_branch_closing(c_rev,
2389 svn_commit.revnum)
2390 self.secondary_commits.append(svn_commit)
2392 def process_revisions(self, done_symbols):
2393 """Process all the CVSRevisions that this instance has, creating
2394 one or more SVNCommits in the process. Generate fill SVNCommits
2395 only for symbols not in DONE_SYMBOLS (avoids unnecessary
2396 fills).
2398 Return the primary SVNCommit that corresponds to this CVSCommit.
2399 The returned SVNCommit is the commit that motivated any other
2400 SVNCommits generated in this CVSCommit."""
2401 self.done_symbols = done_symbols
2402 seconds = self.t_max - self.t_min + 1
2404 Log().write(LOG_VERBOSE, '-' * 60)
2405 Log().write(LOG_VERBOSE, 'CVS Revision grouping:')
2406 if seconds == 1:
2407 Log().write(LOG_VERBOSE, ' Start time: %s (duration: 1 second)'
2408 % time.ctime(self.t_max))
2409 else:
2410 Log().write(LOG_VERBOSE, ' Start time: %s' % time.ctime(self.t_min))
2411 Log().write(LOG_VERBOSE, ' End time: %s (duration: %d seconds)'
2412 % (time.ctime(self.t_max), seconds))
2414 if seconds > COMMIT_THRESHOLD + 1:
2415 Log().write(LOG_WARN, '%s: grouping spans more than %d seconds'
2416 % (warning_prefix, COMMIT_THRESHOLD))
2418 if Ctx().trunk_only: # Only do the primary commit if we're trunk-only
2419 self._commit()
2420 return self.motivating_commit
2422 self._pre_commit()
2423 self._commit()
2424 self._post_commit()
2426 for svn_commit in self.secondary_commits:
2427 svn_commit.set_date(self.motivating_commit.get_date())
2428 svn_commit.flush()
2430 return self.motivating_commit
2433 class SVNCommit:
2434 """This represents one commit to the Subversion Repository. There
2435 are three types of SVNCommits:
2437 1. Commits one or more CVSRevisions (cannot fill a symbolic name).
2439 2. Creates or fills a symbolic name (cannot commit CVSRevisions).
2441 3. Updates trunk to reflect the contents of a particular branch
2442 (this is to handle RCS default branches)."""
2444 # The revision number to assign to the next new SVNCommit.
2445 # We start at 2 because SVNRepositoryMirror uses the first commit
2446 # to create trunk, tags, and branches.
2447 revnum = 2
2449 class SVNCommitInternalInconsistencyError(Exception):
2450 """Exception raised if we encounter an impossible state in the
2451 SVNCommit Databases."""
2452 pass
2454 def __init__(self, description="", revnum=None, cvs_revs=None):
2455 """Instantiate an SVNCommit. DESCRIPTION is for debugging only.
2456 If REVNUM, the SVNCommit will correspond to that revision number;
2457 and if CVS_REVS, then they must be the exact set of CVSRevisions for
2458 REVNUM.
2460 It is an error to pass CVS_REVS without REVNUM, but you may pass
2461 REVNUM without CVS_REVS, and then add a revision at a time by
2462 invoking add_revision()."""
2463 self._description = description
2465 # Revprop metadata for this commit.
2467 # These initial values are placeholders. At least the log and the
2468 # date should be different by the time these are used.
2470 # They are private because their values should be returned encoded
2471 # in UTF8, but callers aren't required to set them in UTF8.
2472 # Therefore, accessor methods are used to set them, and
2473 # self.get_revprops() is used to to get them, in dictionary form.
2474 self._author = Ctx().username
2475 self._log_msg = "This log message means an SVNCommit was used too soon."
2476 self._max_date = 0 # Latest date seen so far.
2478 self.cvs_revs = cvs_revs or []
2479 if revnum:
2480 self.revnum = revnum
2481 else:
2482 self.revnum = SVNCommit.revnum
2483 SVNCommit.revnum = SVNCommit.revnum + 1
2485 # The (uncleaned) symbolic name that is filled in this SVNCommit, if any.
2486 self.symbolic_name = None
2488 # If this commit is a default branch synchronization, this
2489 # variable represents the subversion revision number of the
2490 # *primary* commit where the default branch changes actually
2491 # happened. It is None otherwise.
2493 # It is possible for multiple synchronization commits to refer to
2494 # the same motivating commit revision number, and it is possible
2495 # for a single synchronization commit to contain CVSRevisions on
2496 # multiple different default branches.
2497 self.motivating_revnum = None
2499 # is_tag is true only if this commit is a fill of a symbolic name
2500 # that is a tag, None in all other cases.
2501 self.is_tag = None
2503 def set_symbolic_name(self, symbolic_name):
2504 "Set self.symbolic_name to SYMBOLIC_NAME."
2505 self.symbolic_name = symbolic_name
2507 def set_motivating_revnum(self, revnum):
2508 "Set self.motivating_revnum to REVNUM."
2509 self.motivating_revnum = revnum
2511 def set_author(self, author):
2512 """Set this SVNCommit's author to AUTHOR (a locally-encoded string).
2513 This is the only way to set an SVNCommit's author."""
2514 self._author = author
2516 def set_log_msg(self, msg):
2517 """Set this SVNCommit's log message to MSG (a locally-encoded string).
2518 This is the only way to set an SVNCommit's log message."""
2519 self._log_msg = msg
2521 def set_date(self, date):
2522 """Set this SVNCommit's date to DATE (an integer).
2523 Note that self.add_revision() updates this automatically based on
2524 a CVSRevision; so you may not need to call this at all, and even
2525 if you do, the value may be overwritten by a later call to
2526 self.add_revision()."""
2527 self._max_date = date
2529 def get_date(self):
2530 """Returns this SVNCommit's date as an integer."""
2531 return self._max_date
2533 def get_revprops(self):
2534 """Return the Subversion revprops for this SVNCommit."""
2535 date = format_date(self._max_date)
2536 try:
2537 utf8_author = None
2538 if self._author is not None:
2539 utf8_author = to_utf8(self._author)
2540 utf8_log = to_utf8(self.get_log_msg())
2541 return { 'svn:author' : utf8_author,
2542 'svn:log' : utf8_log,
2543 'svn:date' : date }
2544 except UnicodeError:
2545 Log().write(LOG_WARN, '%s: problem encoding author or log message:'
2546 % warning_prefix)
2547 Log().write(LOG_WARN, " author: '%s'" % self._author)
2548 Log().write(LOG_WARN, " log: '%s'" % self.get_log_msg().rstrip())
2549 Log().write(LOG_WARN, " date: '%s'" % date)
2550 Log().write(LOG_WARN,
2551 "(subversion rev %s) Related files:" % self.revnum)
2552 for c_rev in self.cvs_revs:
2553 Log().write(LOG_WARN, " ", c_rev.fname)
2555 Log().write(LOG_WARN, "Consider rerunning with (for example)",
2556 "'--encoding=latin1'.\n")
2557 # It's better to fall back to the original (unknown encoding) data
2558 # than to either 1) quit or 2) record nothing at all.
2559 return { 'svn:author' : self._author,
2560 'svn:log' : self.get_log_msg(),
2561 'svn:date' : date }
2563 def add_revision(self, cvs_rev):
2564 self.cvs_revs.append(cvs_rev)
2565 if cvs_rev.timestamp > self._max_date:
2566 self._max_date = cvs_rev.timestamp
2568 def _is_primary_commit(self):
2569 """Return true if this is a primary SVNCommit, false otherwise."""
2570 return not (self.symbolic_name or self.motivating_revnum)
2572 def flush(self):
2573 Log().write(LOG_NORMAL, "Creating Subversion r%d (%s)"
2574 % (self.revnum, self._description))
2575 Ctx()._persistence_manager.set_cvs_revs(self.revnum, self.cvs_revs)
2577 if self.motivating_revnum is not None:
2578 Ctx()._persistence_manager.set_motivating_revnum(self.revnum,
2579 self.motivating_revnum)
2581 # If we're not a primary commit, then store our date and/or our
2582 # symbolic_name
2583 if not self._is_primary_commit():
2584 Ctx()._persistence_manager.set_name_and_date(
2585 self.revnum, self.symbolic_name, self._max_date)
2587 def __str__(self):
2588 """ Print a human-readable description of this SVNCommit. This
2589 description is not intended to be machine-parseable (although
2590 we're not going to stop you if you try!)"""
2592 ret = "SVNCommit #: " + str(self.revnum) + "\n"
2593 if self.symbolic_name:
2594 ret += (" symbolic name: " + _clean_symbolic_name(self.symbolic_name)
2595 + "\n")
2596 else:
2597 ret += " NO symbolic name\n"
2598 ret += " debug description: " + self._description + "\n"
2599 ret += " cvs_revs:\n"
2600 for c_rev in self.cvs_revs:
2601 ret += " " + c_rev.unique_key() + "\n"
2602 return ret
2604 def get_log_msg(self):
2605 """Returns the actual log message for a primary commit, and the
2606 appropriate manufactured log message for a secondary commit."""
2607 if self.symbolic_name is not None:
2608 return self._log_msg_for_symbolic_name_commit()
2609 elif self.motivating_revnum is not None:
2610 return self._log_msg_for_default_branch_commit()
2611 else:
2612 return self._log_msg
2614 def _log_msg_for_symbolic_name_commit(self):
2615 """Creates a log message for a manufactured commit that fills
2616 self.symbolic_name. If self.is_tag is true, write the log message
2617 as though for a tag, else write it as though for a branch."""
2618 type = 'branch'
2619 if self.is_tag:
2620 type = 'tag'
2622 # In Python 2.2.3, we could use textwrap.fill(). Oh well :-).
2623 space_or_newline = ' '
2624 cleaned_symbolic_name = _clean_symbolic_name(self.symbolic_name)
2625 if len(cleaned_symbolic_name) >= 13:
2626 space_or_newline = '\n'
2628 return "This commit was manufactured by cvs2svn to create %s%s'%s'." \
2629 % (type, space_or_newline, cleaned_symbolic_name)
2631 def _log_msg_for_default_branch_commit(self):
2632 """Creates a log message for a manufactured commit that
2633 synchronizes a non-trunk default branch with trunk."""
2634 msg = 'This commit was generated by cvs2svn to compensate for ' \
2635 'changes in r%d,\n' \
2636 'which included commits to RCS files with non-trunk default ' \
2637 'branches.\n' % self.motivating_revnum
2638 return msg
2640 class CVSRevisionAggregator:
2641 """This class groups CVSRevisions into CVSCommits that represent
2642 at least one SVNCommit."""
2643 def __init__(self):
2644 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_READ)
2645 if not Ctx().trunk_only:
2646 self.last_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB),
2647 DB_OPEN_READ)
2649 # A map { key : CVSCommit } of CVS commits currently being
2650 # accumulated. If the CVSCommit is still open to further
2651 # CVSRevisions, then key is CVSRevision.digest. If not (because
2652 # an inbound commit wanted to affect a file that was already
2653 # within the CVSCommit), then key is CVSRevision.digest plus some
2654 # number of appended '-'.
2655 self.cvs_commits = {}
2657 # A map { symbol : None } of symbolic names for which the last
2658 # source CVSRevision has already been processed but which haven't
2659 # been closed yet.
2660 self.pending_symbols = {}
2662 # A list of closed symbols. That is, we've already encountered
2663 # the last CVSRevision that is a source for that symbol, the final
2664 # fill for this symbol has been done, and we never need to fill it
2665 # again.
2666 self.done_symbols = [ ]
2668 # This variable holds the most recently created primary svn_commit
2669 # object. CVSRevisionAggregator maintains this variable merely
2670 # for its date, so that it can set dates for the SVNCommits
2671 # created in self._attempt_to_commit_symbols().
2672 self.latest_primary_svn_commit = None
2674 Ctx()._symbolings_logger = SymbolingsLogger()
2675 Ctx()._persistence_manager = PersistenceManager(DB_OPEN_NEW)
2676 Ctx()._default_branches_db = SDatabase(temp(DEFAULT_BRANCHES_DB),
2677 DB_OPEN_READ)
2679 def _extract_ready_commits(self, timestamp):
2680 """Extract and return any active commits that expire by TIMESTAMP."""
2682 ready_queue = [ ]
2683 for digest_key, cvs_commit in self.cvs_commits.items():
2684 if cvs_commit.t_max + COMMIT_THRESHOLD < timestamp:
2685 ready_queue.append(cvs_commit)
2686 del self.cvs_commits[digest_key]
2687 return ready_queue
2689 def process_revision(self, c_rev):
2690 # Each time we read a new line, scan the accumulating commits to
2691 # see if any are ready for processing.
2692 ready_queue = self._extract_ready_commits(c_rev.timestamp)
2694 for digest_key, cvs_commit in self.cvs_commits.items():
2695 # If the inbound commit is on the same file as a pending commit,
2696 # close the pending commit to further changes. Don't flush it though,
2697 # as there may be other pending commits dated before this one.
2698 # ### ISSUE: the has_file() check below is not optimal.
2699 # It does fix the dataloss bug where revisions would get lost
2700 # if checked in too quickly, but it can also break apart the
2701 # commits. The correct fix would require tracking the dependencies
2702 # between change sets and committing them in proper order.
2703 if cvs_commit.has_file(c_rev.fname):
2704 unused_id = digest_key + '-'
2705 # Find a string that does is not already a key in
2706 # the self.cvs_commits dict
2707 while self.cvs_commits.has_key(unused_id):
2708 unused_id = unused_id + '-'
2709 self.cvs_commits[unused_id] = cvs_commit
2710 del self.cvs_commits[digest_key]
2712 # Add this item into the set of still-available commits.
2713 if self.cvs_commits.has_key(c_rev.digest):
2714 cvs_commit = self.cvs_commits[c_rev.digest]
2715 else:
2716 author, log = self.metadata_db[c_rev.digest]
2717 cvs_commit = CVSCommit(c_rev.digest, author, log)
2718 self.cvs_commits[c_rev.digest] = cvs_commit
2719 cvs_commit.add_revision(c_rev)
2721 if ready_queue:
2722 # Any elements in the ready_queue at this point need to be
2723 # processed, because this latest rev couldn't possibly be part
2724 # of any of them. Sort them into time-order, then process 'em.
2725 ready_queue.sort()
2727 while ready_queue:
2728 cvs_commit = ready_queue.pop(0)
2729 self.latest_primary_svn_commit = \
2730 cvs_commit.process_revisions(self.done_symbols)
2731 self._add_pending_symbols(c_rev)
2732 self._attempt_to_commit_symbols(ready_queue)
2733 else:
2734 # Make sure we _add_pending_symbols() for this c_rev and
2735 # _attempt_to_commit_symbols(), even if no commits are ready.
2736 self._add_pending_symbols(c_rev)
2737 self._attempt_to_commit_symbols(ready_queue)
2739 def flush(self):
2740 """Commit anything left in self.cvs_commits. Then inform the
2741 SymbolingsLogger that all commits are done."""
2743 ready_queue = [ ]
2744 for k, v in self.cvs_commits.items():
2745 ready_queue.append((v, k))
2747 ready_queue.sort()
2748 while ready_queue:
2749 (cvs_commit, key) = ready_queue.pop(0)
2750 self.latest_primary_svn_commit = \
2751 cvs_commit.process_revisions(self.done_symbols)
2752 del self.cvs_commits[key]
2753 self._attempt_to_commit_symbols([])
2755 if not Ctx().trunk_only:
2756 Ctx()._symbolings_logger.close()
2758 def _add_pending_symbols(self, c_rev):
2759 """Add to self.pending_symbols any symbols from C_REV for which
2760 C_REV is the last CVSRevision.
2762 If we're not doing a trunk-only conversion, get the symbolic names
2763 that this c_rev is the last *source* CVSRevision for and add them
2764 to those left over from previous passes through the aggregator."""
2766 if not Ctx().trunk_only:
2767 for sym in self.last_revs_db.get(c_rev.unique_key(), []):
2768 self.pending_symbols[sym] = None
2770 def _attempt_to_commit_symbols(self, queued_commits):
2771 """Generate one SVNCommit for each symbol in self.pending_symbols
2772 that doesn't have an opening CVSRevision in either QUEUED_COMMITS
2773 or self.cvs_commits.values()."""
2775 # Make a list of all symbols from self.pending_symbols that do not
2776 # have *source* CVSRevisions in the pending commit queue
2777 # (self.cvs_commits) or in queued_commits:
2778 closeable_symbols = []
2779 for sym in self.pending_symbols:
2780 for cvs_commit in self.cvs_commits.values() + queued_commits:
2781 if cvs_commit.opens_symbolic_name(sym):
2782 break
2783 else:
2784 closeable_symbols.append(sym)
2786 # Sort the closeable symbols so that we will always process the
2787 # symbols in the same order, regardless of the order in which the
2788 # dict hashing algorithm hands them back to us. We do this so
2789 # that our tests will get the same results on all platforms.
2790 closeable_symbols.sort()
2791 for sym in closeable_symbols:
2792 svn_commit = SVNCommit("closing tag/branch '%s'" % sym)
2793 svn_commit.set_symbolic_name(sym)
2794 svn_commit.set_date(self.latest_primary_svn_commit.get_date())
2795 svn_commit.flush()
2796 self.done_symbols.append(sym)
2797 del self.pending_symbols[sym]
2800 class SymbolingsReader:
2801 """Provides an interface to the SYMBOL_OPENINGS_CLOSINGS_SORTED file
2802 and the SYMBOL_OFFSETS_DB. Does the heavy lifting of finding and
2803 returning the correct opening and closing Subversion revision
2804 numbers for a given symbolic name."""
2805 def __init__(self):
2806 """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and
2807 reads the offsets database into memory."""
2808 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
2809 # The offsets_db is really small, and we need to read and write
2810 # from it a fair bit, so suck it into memory
2811 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_READ)
2812 self.offsets = { }
2813 for key in offsets_db:
2814 #print " ZOO:", key, offsets_db[key]
2815 self.offsets[key] = offsets_db[key]
2817 def filling_guide_for_symbol(self, symbolic_name, svn_revnum):
2818 """Given SYMBOLIC_NAME and SVN_REVNUM, return a new
2819 SymbolicNameFillingGuide object.
2821 Note that if we encounter an opening rev in this fill, but the
2822 corresponding closing rev takes place later than SVN_REVNUM, the
2823 closing will not be passed to SymbolicNameFillingGuide in this
2824 fill (and will be discarded when encountered in a later fill).
2825 This is perfectly fine, because we can still do a valid fill
2826 without the closing--we always try to fill what we can as soon as
2827 we can."""
2829 openings_closings_map = OpeningsClosingsMap(symbolic_name)
2831 # It's possible to have a branch start with a file that was added
2832 # on a branch
2833 if self.offsets.has_key(symbolic_name):
2834 # set our read offset for self.symbolings to the offset for
2835 # symbolic_name
2836 self.symbolings.seek(self.offsets[symbolic_name])
2838 while 1:
2839 fpos = self.symbolings.tell()
2840 line = self.symbolings.readline().rstrip()
2841 if not line:
2842 break
2843 name, revnum, type, branch_name, cvs_path = line.split(" ", 4)
2844 if branch_name == '*':
2845 svn_path = Ctx().project.make_trunk_path(cvs_path)
2846 else:
2847 svn_path = Ctx().project.make_branch_path(branch_name, cvs_path)
2848 revnum = int(revnum)
2849 if revnum > svn_revnum or name != symbolic_name:
2850 break
2851 openings_closings_map.register(svn_path, revnum, type)
2853 # get current offset of the read marker and set it to the offset
2854 # for the beginning of the line we just read if we used anything
2855 # we read.
2856 if not openings_closings_map.is_empty():
2857 self.offsets[symbolic_name] = fpos
2859 return SymbolicNameFillingGuide(openings_closings_map)
2862 class SvnRevisionRange:
2863 """The range of subversion revision numbers from which a path can be
2864 copied. self.opening_revnum is the number of the earliest such
2865 revision, and self.closing_revnum is one higher than the number of
2866 the last such revision. If self.closing_revnum is None, then no
2867 closings were registered."""
2869 def __init__(self, opening_revnum):
2870 self.opening_revnum = opening_revnum
2871 self.closing_revnum = None
2873 def add_closing(self, closing_revnum):
2874 # When we have a non-trunk default branch, we may have multiple
2875 # closings--only register the first closing we encounter.
2876 if self.closing_revnum is None:
2877 self.closing_revnum = closing_revnum
2879 def __str__(self):
2880 if self.closing_revnum is None:
2881 return '[%d:]' % (self.opening_revnum,)
2882 else:
2883 return '[%d:%d]' % (self.opening_revnum, self.closing_revnum,)
2886 class OpeningsClosingsMap:
2887 """A dictionary of openings and closings for a symbolic name in the
2888 current SVNCommit.
2890 The user should call self.register() for the openings and closings,
2891 then self.get_node_tree() to retrieve the information as a
2892 SymbolicNameFillingGuide."""
2894 def __init__(self, symbolic_name):
2895 """Initialize OpeningsClosingsMap and prepare it for receiving
2896 openings and closings."""
2898 self.name = symbolic_name
2900 # A dictionary of SVN_PATHS to SvnRevisionRange objects.
2901 self.things = { }
2903 def register(self, svn_path, svn_revnum, type):
2904 """Register an opening or closing revision for this symbolic name.
2905 SVN_PATH is the source path that needs to be copied into
2906 self.symbolic_name, and SVN_REVNUM is either the first svn
2907 revision number that we can copy from (our opening), or the last
2908 (not inclusive) svn revision number that we can copy from (our
2909 closing). TYPE indicates whether this path is an opening or a a
2910 closing.
2912 The opening for a given SVN_PATH must be passed before the closing
2913 for it to have any effect... any closing encountered before a
2914 corresponding opening will be discarded.
2916 It is not necessary to pass a corresponding closing for every
2917 opening.
2919 # Always log an OPENING
2920 if type == OPENING:
2921 self.things[svn_path] = SvnRevisionRange(svn_revnum)
2922 # Only log a closing if we've already registered the opening for that
2923 # path.
2924 elif type == CLOSING and self.things.has_key(svn_path):
2925 self.things[svn_path].add_closing(svn_revnum)
2927 def is_empty(self):
2928 """Return true if we haven't accumulated any openings or closings,
2929 false otherwise."""
2930 return not len(self.things)
2932 def get_things(self):
2933 """Return a list of (svn_path, SvnRevisionRange) tuples for all
2934 svn_paths with registered openings or closings."""
2936 return self.things.items()
2939 class SymbolicNameFillingGuide:
2940 """A node tree representing the source paths to be copied to fill
2941 self.symbolic_name in the current SVNCommit.
2943 self._node_tree is the root of the directory tree, in the form {
2944 path_component : subnode }. Leaf nodes are instances of
2945 SvnRevisionRange. Intermediate (directory) nodes are dictionaries
2946 mapping relative names to subnodes.
2948 By walking self._node_tree and calling self.get_best_revnum() on
2949 each node, the caller can determine what subversion revision number
2950 to copy the path corresponding to that node from. self._node_tree
2951 should be treated as read-only.
2953 The caller can then descend to sub-nodes to see if their "best
2954 revnum" differs from their parents' and if it does, take appropriate
2955 actions to "patch up" the subtrees."""
2957 def __init__(self, openings_closings_map):
2958 """Initializes a SymbolicNameFillingGuide for SYMBOLIC_NAME and
2959 store into it the openings and closings from
2960 OPENINGS_CLOSINGS_MAP."""
2962 self.name = openings_closings_map.name
2964 # The dictionary that holds our node tree as a map { node_key :
2965 # node }.
2966 self._node_tree = { }
2968 for svn_path, svn_revision_range in openings_closings_map.get_things():
2969 (head, tail) = _path_split(svn_path)
2970 self._get_node_for_path(head)[tail] = svn_revision_range
2972 #self.print_node_tree(self._node_tree)
2974 def _get_node_for_path(self, svn_path):
2975 """Return the node key for svn_path, creating new nodes as needed."""
2976 # Walk down the path, one node at a time.
2977 node = self._node_tree
2978 for component in svn_path.split('/'):
2979 if node.has_key(component):
2980 node = node[component]
2981 else:
2982 old_node = node
2983 node = {}
2984 old_node[component] = node
2986 return node
2988 def get_best_revnum(self, node, preferred_revnum):
2989 """Determine the best subversion revision number to use when
2990 copying the source tree beginning at NODE. Returns a
2991 subversion revision number.
2993 PREFERRED_REVNUM is passed to best_rev and used to calculate the
2994 best_revnum."""
2996 def score_revisions(svn_revision_ranges):
2997 """Return a list of revisions and scores based on
2998 SVN_REVISION_RANGES. The returned list looks like:
3000 [(REV1 SCORE1), (REV2 SCORE2), ...]
3002 where the tuples are sorted by revision number.
3003 SVN_REVISION_RANGES is a list of SvnRevisionRange objects.
3005 For each svn revision that appears as either an opening_revnum
3006 or closing_revnum for one of the svn_revision_ranges, output a
3007 tuple indicating how many of the SvnRevisionRanges include that
3008 svn_revision in its range. A score thus indicates that copying
3009 the corresponding revision (or any following revision up to the
3010 next revision in the list) of the object in question would yield
3011 that many correct paths at or underneath the object. There may
3012 be other paths underneath it which are not correct and would
3013 need to be deleted or recopied; those can only be detected by
3014 descending and examining their scores.
3016 If OPENINGS is empty, return the empty list."""
3017 openings = [ x.opening_revnum
3018 for x in svn_revision_ranges ]
3019 closings = [ x.closing_revnum
3020 for x in svn_revision_ranges
3021 if x.closing_revnum is not None ]
3023 # First look for easy out.
3024 if not openings:
3025 return []
3027 # Create a list with both openings (which increment the total)
3028 # and closings (which decrement the total):
3029 things = [(rev,1) for rev in openings] + [(rev,-1) for rev in closings]
3030 # Sort by revision number:
3031 things.sort()
3032 # Initialize output list with zeroth element of things. This
3033 # element must exist, because it was already verified that
3034 # openings is not empty.
3035 scores = [ things[0] ]
3036 total = scores[-1][1]
3037 for (rev, change) in things[1:]:
3038 total += change
3039 if rev == scores[-1][0]:
3040 # Same revision as last entry; modify last entry:
3041 scores[-1] = (rev, total)
3042 else:
3043 # Previously-unseen revision; create new entry:
3044 scores.append((rev, total))
3045 return scores
3047 def best_rev(scores, preferred_rev):
3048 """Return the revision with the highest score from SCORES, a list
3049 returned by score_revisions(). When the maximum score is shared
3050 by multiple revisions, the oldest revision is selected, unless
3051 PREFERRED_REV is one of the possibilities, in which case, it is
3052 selected."""
3053 max_score = 0
3054 preferred_rev_score = -1
3055 rev = SVN_INVALID_REVNUM
3056 if preferred_rev is None:
3057 # Comparison order of different types is arbitrary. Do not
3058 # expect None to compare less than int values below.
3059 preferred_rev = SVN_INVALID_REVNUM
3060 for revnum, count in scores:
3061 if count > max_score:
3062 max_score = count
3063 rev = revnum
3064 if revnum <= preferred_rev:
3065 preferred_rev_score = count
3066 if preferred_rev_score == max_score:
3067 rev = preferred_rev
3068 return rev, max_score
3070 # Aggregate openings and closings from the rev tree
3071 svn_revision_ranges = self._list_revnums(node)
3073 # Score the lists
3074 scores = score_revisions(svn_revision_ranges)
3076 revnum, max_score = best_rev(scores, preferred_revnum)
3078 if revnum == SVN_INVALID_REVNUM:
3079 raise FatalError("failed to find a revision "
3080 + "to copy from when copying %s" % name)
3081 return revnum, max_score
3083 def _list_revnums(self, node):
3084 """Return a list of all the SvnRevisionRanges (including
3085 duplicates) for all leaf nodes at and under NODE."""
3087 if isinstance(node, SvnRevisionRange):
3088 # It is a leaf node.
3089 return [ node ]
3090 else:
3091 # It is an intermediate node.
3092 revnums = []
3093 for key, subnode in node.items():
3094 revnums.extend(self._list_revnums(subnode))
3095 return revnums
3097 def get_sources(self):
3098 """Return the list of sources for this symbolic name.
3100 The Project instance defines what are legitimate sources. Raise
3101 an exception if a change occurred outside of the source
3102 directories."""
3104 return self._get_sub_sources('', self._node_tree)
3106 def _get_sub_sources(self, start_svn_path, start_node):
3107 """Return the list of sources for this symbolic name, starting the
3108 search at path START_SVN_PATH, which is node START_NODE. This is
3109 a helper method, called by get_sources() (see)."""
3111 project = Ctx().project
3112 if isinstance(start_node, SvnRevisionRange):
3113 # This implies that a change was found outside of the
3114 # legitimate sources. This should never happen.
3115 raise
3116 elif project.is_source(start_svn_path):
3117 # This is a legitimate source. Add it to list.
3118 return [ FillSource(start_svn_path, start_node) ]
3119 else:
3120 # This is a directory that is not a legitimate source. (That's
3121 # OK because it hasn't changed directly.) But directories
3122 # within it have been changed, so we need to search recursively
3123 # to find their enclosing sources.
3124 sources = []
3125 for entry, node in start_node.items():
3126 svn_path = _path_join(start_svn_path, entry)
3127 sources.extend(self._get_sub_sources(svn_path, node))
3129 return sources
3131 def print_node_tree(self, node, name='/', indent_depth=0):
3132 """For debugging purposes. Prints all nodes in TREE that are
3133 rooted at NODE. INDENT_DEPTH is used to indent the output of
3134 recursive calls."""
3135 if not indent_depth:
3136 print "TREE", "=" * 75
3137 if isinstance(node, SvnRevisionRange):
3138 print "TREE:", " " * (indent_depth * 2), name, node
3139 else:
3140 print "TREE:", " " * (indent_depth * 2), name
3141 for key, value in node.items():
3142 self.print_node_tree(value, key, (indent_depth + 1))
3145 class FillSource:
3146 """Representation of a fill source used by the symbol filler in
3147 SVNRepositoryMirror."""
3148 def __init__(self, prefix, node):
3149 """Create an unscored fill source with a prefix and a key."""
3150 self.prefix = prefix
3151 self.node = node
3152 self.score = None
3153 self.revnum = None
3155 def set_score(self, score, revnum):
3156 """Set the SCORE and REVNUM."""
3157 self.score = score
3158 self.revnum = revnum
3160 def __cmp__(self, other):
3161 """Comparison operator used to sort FillSources in descending
3162 score order."""
3163 if self.score is None or other.score is None:
3164 raise TypeError, 'Tried to compare unscored FillSource'
3165 return cmp(other.score, self.score)
3168 class SVNRepositoryMirror:
3169 """Mirror a Subversion Repository as it is constructed, one
3170 SVNCommit at a time. The mirror is skeletal; it does not contain
3171 file contents. The creation of a dumpfile or Subversion repository
3172 is handled by delegates. See self.add_delegate method for how to
3173 set delegates.
3175 The structure of the repository is kept in two databases and one
3176 hash. The revs_db database maps revisions to root node keys, and
3177 the nodes_db database maps node keys to nodes. A node is a hash
3178 from directory names to keys. Both the revs_db and the nodes_db are
3179 stored on disk and each access is expensive.
3181 The nodes_db database only has the keys for old revisions. The
3182 revision that is being contructed is kept in memory in the new_nodes
3183 hash which is cheap to access.
3185 You must invoke _start_commit between SVNCommits.
3187 *** WARNING *** All path arguments to methods in this class CANNOT
3188 have leading or trailing slashes.
3191 class SVNRepositoryMirrorPathExistsError(Exception):
3192 """Exception raised if an attempt is made to add a path to the
3193 repository mirror and that path already exists in the youngest
3194 revision of the repository."""
3195 pass
3197 class SVNRepositoryMirrorUnexpectedOperationError(Exception):
3198 """Exception raised if a CVSRevision is found to have an unexpected
3199 operation (OP) value."""
3200 pass
3202 class SVNRepositoryMirrorInvalidFillOperationError(Exception):
3203 """Exception raised if an empty SymbolicNameFillingGuide is returned
3204 during a fill where the branch in question already exists."""
3205 pass
3207 def __init__(self):
3208 """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""
3209 self.delegates = [ ]
3211 # This corresponds to the 'revisions' table in a Subversion fs.
3212 self.revs_db = SDatabase(temp(SVN_MIRROR_REVISIONS_DB), DB_OPEN_NEW)
3213 Cleanup().register(temp(SVN_MIRROR_REVISIONS_DB), pass8)
3215 # This corresponds to the 'nodes' table in a Subversion fs. (We
3216 # don't need a 'representations' or 'strings' table because we
3217 # only track metadata, not file contents.)
3218 self.nodes_db = Database(temp(SVN_MIRROR_NODES_DB), DB_OPEN_NEW)
3219 Cleanup().register(temp(SVN_MIRROR_NODES_DB), pass8)
3221 # Start at revision 0 without a root node. It will be created
3222 # by _open_writable_root_node.
3223 self.youngest = 0
3224 self.new_root_key = None
3225 self.new_nodes = { }
3227 if not Ctx().trunk_only:
3228 ###PERF IMPT: Suck this into memory.
3229 self.tags_db = TagsDatabase(DB_OPEN_READ)
3230 self.symbolings_reader = SymbolingsReader()
3232 def _initialize_repository(self, date):
3233 """Initialize the repository by creating the directories for
3234 trunk, tags, and branches. This method should only be called
3235 after all delegates are added to the repository mirror."""
3236 # Make a 'fake' SVNCommit so we can take advantage of the revprops
3237 # magic therein
3238 svn_commit = SVNCommit("Initialization", 1)
3239 svn_commit.set_date(date)
3240 svn_commit.set_log_msg("New repository initialized by cvs2svn.")
3242 self._start_commit(svn_commit)
3243 self._mkdir(Ctx().project.trunk_path)
3244 if not Ctx().trunk_only:
3245 self._mkdir(Ctx().project.branches_path)
3246 self._mkdir(Ctx().project.tags_path)
3248 def _start_commit(self, svn_commit):
3249 """Start a new commit."""
3250 if self.youngest > 0:
3251 self._end_commit()
3253 self.youngest = svn_commit.revnum
3254 self.new_root_key = None
3255 self.new_nodes = { }
3257 self._invoke_delegates('start_commit', svn_commit)
3259 def _end_commit(self):
3260 """Called at the end of each commit. This method copies the newly
3261 created nodes to the on-disk nodes db."""
3262 if self.new_root_key is None:
3263 # No changes were made in this revision, so we make the root node
3264 # of the new revision be the same as the last one.
3265 self.revs_db[str(self.youngest)] = self.revs_db[str(self.youngest - 1)]
3266 else:
3267 self.revs_db[str(self.youngest)] = self.new_root_key
3268 # Copy the new nodes to the nodes_db
3269 for key, value in self.new_nodes.items():
3270 self.nodes_db[key] = value
3272 def _get_node(self, key):
3273 """Returns the node contents for KEY which may refer to either
3274 self.nodes_db or self.new_nodes."""
3275 if self.new_nodes.has_key(key):
3276 return self.new_nodes[key]
3277 else:
3278 return self.nodes_db[key]
3280 def _open_readonly_node(self, path, revnum):
3281 """Open a readonly node for PATH at revision REVNUM. Returns the
3282 node key and node contents if the path exists, else (None, None)."""
3283 # Get the root key
3284 if revnum == self.youngest:
3285 if self.new_root_key is None:
3286 node_key = self.revs_db[str(self.youngest - 1)]
3287 else:
3288 node_key = self.new_root_key
3289 else:
3290 node_key = self.revs_db[str(revnum)]
3292 for component in path.split('/'):
3293 node_contents = self._get_node(node_key)
3294 node_key = node_contents.get(component, None)
3295 if node_key is None:
3296 return None
3298 return node_key
3300 def _open_writable_root_node(self):
3301 """Open a writable root node. The current root node is returned
3302 immeditely if it is already writable. If not, create a new one by
3303 copying the contents of the root node of the previous version."""
3304 if self.new_root_key is not None:
3305 return self.new_root_key, self.new_nodes[self.new_root_key]
3307 if self.youngest < 2:
3308 new_contents = { }
3309 else:
3310 new_contents = self.nodes_db[self.revs_db[str(self.youngest - 1)]]
3311 self.new_root_key = gen_key()
3312 self.new_nodes = { self.new_root_key: new_contents }
3314 return self.new_root_key, new_contents
3316 def _open_writable_node(self, svn_path, create):
3317 """Open a writable node for the path SVN_PATH, creating SVN_PATH
3318 and any missing directories if CREATE is True."""
3319 parent_key, parent_contents = self._open_writable_root_node()
3321 # Walk up the path, one node at a time.
3322 path_so_far = None
3323 components = svn_path.split('/')
3324 for i in range(len(components)):
3325 component = components[i]
3326 path_so_far = _path_join(path_so_far, component)
3327 this_key = parent_contents.get(component, None)
3328 if this_key is not None:
3329 # The component exists.
3330 this_contents = self.new_nodes.get(this_key, None)
3331 if this_contents is None:
3332 # Suck the node from the nodes_db, but update the key
3333 this_contents = self.nodes_db[this_key]
3334 this_key = gen_key()
3335 self.new_nodes[this_key] = this_contents
3336 parent_contents[component] = this_key
3337 elif create:
3338 # The component does not exists, so we create it.
3339 this_contents = { }
3340 this_key = gen_key()
3341 self.new_nodes[this_key] = this_contents
3342 parent_contents[component] = this_key
3343 if i < len(components) - 1:
3344 self._invoke_delegates('mkdir', path_so_far)
3345 else:
3346 # The component does not exists and we are not instructed to
3347 # create it, so we give up.
3348 return None, None
3350 parent_key = this_key
3351 parent_contents = this_contents
3353 return this_key, this_contents
3355 def _path_exists(self, path):
3356 """If PATH exists in self.youngest of the svn repository mirror,
3357 return true, else return None.
3359 PATH must not start with '/'."""
3360 return self._open_readonly_node(path, self.youngest) is not None
3362 def _fast_delete_path(self, parent_path, parent_contents, component):
3363 """Delete COMPONENT from the parent direcory PARENT_PATH with the
3364 contents PARENT_CONTENTS. Do nothing if COMPONENT does not exist
3365 in PARENT_CONTENTS."""
3366 if parent_contents.has_key(component):
3367 del parent_contents[component]
3368 self._invoke_delegates('delete_path',
3369 _path_join(parent_path, component))
3371 def _delete_path(self, svn_path, should_prune=False):
3372 """Delete PATH from the tree. If SHOULD_PRUNE is true, then delete
3373 all ancestor directories that are made empty when SVN_PATH is deleted.
3374 In other words, SHOULD_PRUNE is like the -P option to 'cvs checkout'.
3376 NOTE: This function ignores requests to delete the root directory
3377 or any directory for which Ctx().project.is_unremovable() returns
3378 True, either directly or by pruning."""
3380 if svn_path == '' or Ctx().project.is_unremovable(svn_path):
3381 return
3383 (parent_path, entry,) = _path_split(svn_path)
3384 if parent_path:
3385 parent_key, parent_contents = \
3386 self._open_writable_node(parent_path, False)
3387 else:
3388 parent_key, parent_contents = self._open_writable_root_node()
3390 if parent_key is not None:
3391 self._fast_delete_path(parent_path, parent_contents, entry)
3392 # The following recursion makes pruning an O(n^2) operation in the
3393 # worst case (where n is the depth of SVN_PATH), but the worst case
3394 # is probably rare, and the constant cost is pretty low. Another
3395 # drawback is that we issue a delete for each path and not just
3396 # a single delete for the topmost directory pruned.
3397 if should_prune and len(parent_contents) == 0:
3398 self._delete_path(parent_path, True)
3400 def _mkdir(self, path):
3401 """Create PATH in the repository mirror at the youngest revision."""
3402 self._open_writable_node(path, True)
3403 self._invoke_delegates('mkdir', path)
3405 def _change_path(self, cvs_rev):
3406 """Register a change in self.youngest for the CVS_REV's svn_path
3407 in the repository mirror."""
3408 # We do not have to update the nodes because our mirror is only
3409 # concerned with the presence or absence of paths, and a file
3410 # content change does not cause any path changes.
3411 self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, False))
3413 def _add_path(self, cvs_rev):
3414 """Add the CVS_REV's svn_path to the repository mirror."""
3415 self._open_writable_node(cvs_rev.svn_path, True)
3416 self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, True))
3418 def _copy_path(self, src_path, dest_path, src_revnum):
3419 """Copy SRC_PATH at subversion revision number SRC_REVNUM to
3420 DEST_PATH. In the youngest revision of the repository, DEST_PATH's
3421 parent *must* exist, but DEST_PATH *cannot* exist.
3423 Return the node key and the contents of the new node at DEST_PATH
3424 as a dictionary."""
3425 # get the contents of the node of our src_path
3426 src_key = self._open_readonly_node(src_path, src_revnum)
3427 src_contents = self._get_node(src_key)
3429 # Get the parent path and the base path of the dest_path
3430 (dest_parent, dest_basename,) = _path_split(dest_path)
3431 dest_parent_key, dest_parent_contents = \
3432 self._open_writable_node(dest_parent, False)
3434 if dest_parent_contents.has_key(dest_basename):
3435 msg = "Attempt to add path '%s' to repository mirror " % dest_path
3436 msg = msg + "when it already exists in the mirror."
3437 raise self.SVNRepositoryMirrorPathExistsError, msg
3439 dest_parent_contents[dest_basename] = src_key
3440 self._invoke_delegates('copy_path', src_path, dest_path, src_revnum)
3442 # Yes sir, src_key and src_contents are also the contents of the
3443 # destination. This is a cheap copy, remember! :-)
3444 return src_key, src_contents
3446 def _fill_symbolic_name(self, svn_commit):
3447 """Performs all copies necessary to create as much of the the tag
3448 or branch SVN_COMMIT.symbolic_name as possible given the current
3449 revision of the repository mirror.
3451 The symbolic name is guaranteed to exist in the Subversion
3452 repository by the end of this call, even if there are no paths
3453 under it."""
3454 symbol_fill = self.symbolings_reader.filling_guide_for_symbol(
3455 svn_commit.symbolic_name, self.youngest)
3456 # Get the list of sources for the symbolic name.
3457 sources = symbol_fill.get_sources()
3459 if sources:
3460 if self.tags_db.has_key(svn_commit.symbolic_name):
3461 dest_prefix = Ctx().project.get_tag_path(svn_commit.symbolic_name)
3462 else:
3463 dest_prefix = Ctx().project.get_branch_path(svn_commit.symbolic_name)
3465 dest_key = self._open_writable_node(dest_prefix, False)[0]
3466 self._fill(symbol_fill, dest_prefix, dest_key, sources)
3467 else:
3468 # We can only get here for a branch whose first commit is an add
3469 # (as opposed to a copy).
3470 dest_path = Ctx().project.get_branch_path(symbol_fill.name)
3471 if not self._path_exists(dest_path):
3472 # If our symbol_fill was empty, that means that our first
3473 # commit on the branch was to a file added on the branch, and
3474 # that this is our first fill of that branch.
3476 # This case is covered by test 16.
3478 # ...we create the branch by copying trunk from the our
3479 # current revision number minus 1
3480 source_path = Ctx().project.trunk_path
3481 entries = self._copy_path(source_path, dest_path,
3482 svn_commit.revnum - 1)[1]
3483 # Now since we've just copied trunk to a branch that's
3484 # *supposed* to be empty, we delete any entries in the
3485 # copied directory.
3486 for entry in entries:
3487 del_path = dest_path + '/' + entry
3488 # Delete but don't prune.
3489 self._delete_path(del_path)
3490 else:
3491 msg = "Error filling branch '" \
3492 + _clean_symbolic_name(symbol_fill.name) + "'.\n"
3493 msg = msg + "Received an empty SymbolicNameFillingGuide and\n"
3494 msg = msg + "attempted to create a branch that already exists."
3495 raise self.SVNRepositoryMirrorInvalidFillOperationError, msg
3497 def _fill(self, symbol_fill, dest_prefix, dest_key, sources,
3498 path = None, parent_source_prefix = None,
3499 preferred_revnum = None, prune_ok = None):
3500 """Fill the tag or branch at DEST_PREFIX + PATH with items from
3501 SOURCES, and recurse into the child items.
3503 DEST_PREFIX is the prefix of the destination directory, e.g.
3504 '/tags/my_tag' or '/branches/my_branch', and SOURCES is a list of
3505 FillSource classes that are candidates to be copied to the
3506 destination. DEST_KEY is the key in self.nodes_db to the
3507 destination, or None if the destination does not yet exist.
3509 PATH is the path relative to DEST_PREFIX. If PATH is None, we
3510 are at the top level, e.g. '/tags/my_tag'.
3512 PARENT_SOURCE_PREFIX is the source prefix that was used to copy
3513 the parent directory, and PREFERRED_REVNUM is an int which is the
3514 source revision number that the caller (who may have copied KEY's
3515 parent) used to perform its copy. If PREFERRED_REVNUM is None,
3516 then no revision is preferable to any other (which probably means
3517 that no copies have happened yet).
3519 PRUNE_OK means that a copy has been made in this recursion, and
3520 it's safe to prune directories that are not in
3521 SYMBOL_FILL._node_tree, provided that said directory has a source
3522 prefix of one of the PARENT_SOURCE_PREFIX.
3524 PATH, PARENT_SOURCE_PREFIX, PRUNE_OK, and PREFERRED_REVNUM
3525 should only be passed in by recursive calls."""
3526 # Calculate scores and revnums for all sources
3527 for source in sources:
3528 src_revnum, score = symbol_fill.get_best_revnum(source.node,
3529 preferred_revnum)
3530 source.set_score(score, src_revnum)
3532 # Sort the sources in descending score order so that we will make
3533 # a eventual copy from the source with the highest score.
3534 sources.sort()
3535 copy_source = sources[0]
3537 src_path = _path_join(copy_source.prefix, path)
3538 dest_path = _path_join(dest_prefix, path)
3540 # Figure out if we shall copy to this destination and delete any
3541 # destination path that is in the way.
3542 do_copy = 0
3543 if dest_key is None:
3544 do_copy = 1
3545 elif prune_ok and (parent_source_prefix != copy_source.prefix or
3546 copy_source.revnum != preferred_revnum):
3547 # We are about to replace the destination, so we need to remove
3548 # it before we perform the copy.
3549 self._delete_path(dest_path)
3550 do_copy = 1
3552 if do_copy:
3553 dest_key, dest_entries = self._copy_path(src_path, dest_path,
3554 copy_source.revnum)
3555 prune_ok = 1
3556 else:
3557 dest_entries = self._get_node(dest_key)
3559 # Create the SRC_ENTRIES hash from SOURCES. The keys are path
3560 # elements and the values are lists of FillSource classes where
3561 # this path element exists.
3562 src_entries = {}
3563 for source in sources:
3564 if isinstance(source.node, SvnRevisionRange):
3565 continue
3566 for entry, node in source.node.items():
3567 src_entries.setdefault(entry, []).append(
3568 FillSource(source.prefix, node))
3570 if prune_ok:
3571 # Delete the entries in DEST_ENTRIES that are not in src_entries.
3572 delete_list = [ ]
3573 for entry in dest_entries:
3574 if not src_entries.has_key(entry):
3575 delete_list.append(entry)
3576 if delete_list:
3577 if not self.new_nodes.has_key(dest_key):
3578 dest_key, dest_entries = self._open_writable_node(dest_path, True)
3579 # Sort the delete list to get "diffable" dumpfiles.
3580 delete_list.sort()
3581 for entry in delete_list:
3582 self._fast_delete_path(dest_path, dest_entries, entry)
3584 # Recurse into the SRC_ENTRIES keys sorted in alphabetical order.
3585 src_keys = src_entries.keys()
3586 src_keys.sort()
3587 for src_key in src_keys:
3588 next_dest_key = dest_entries.get(src_key, None)
3589 self._fill(symbol_fill, dest_prefix, next_dest_key,
3590 src_entries[src_key], _path_join(path, src_key),
3591 copy_source.prefix, sources[0].revnum, prune_ok)
3593 def _synchronize_default_branch(self, svn_commit):
3594 """Propagate any changes that happened on a non-trunk default
3595 branch to the trunk of the repository. See
3596 CVSCommit._post_commit() for details on why this is necessary."""
3597 for cvs_rev in svn_commit.cvs_revs:
3598 svn_trunk_path = Ctx().project.make_trunk_path(cvs_rev.cvs_path)
3599 if cvs_rev.op == OP_ADD or cvs_rev.op == OP_CHANGE:
3600 if self._path_exists(svn_trunk_path):
3601 # Delete the path on trunk...
3602 self._delete_path(svn_trunk_path)
3603 # ...and copy over from branch
3604 self._copy_path(cvs_rev.svn_path, svn_trunk_path,
3605 svn_commit.motivating_revnum)
3606 elif cvs_rev.op == OP_DELETE:
3607 # delete trunk path
3608 self._delete_path(svn_trunk_path)
3609 else:
3610 msg = ("Unknown CVSRevision operation '%s' in default branch sync."
3611 % cvs_rev.op)
3612 raise self.SVNRepositoryMirrorUnexpectedOperationError, msg
3614 def commit(self, svn_commit):
3615 """Add an SVNCommit to the SVNRepository, incrementing the
3616 Repository revision number, and changing the repository. Invoke
3617 the delegates' _start_commit() method."""
3619 if svn_commit.revnum == 2:
3620 self._initialize_repository(svn_commit.get_date())
3622 self._start_commit(svn_commit)
3624 if svn_commit.symbolic_name:
3625 Log().write(LOG_VERBOSE, "Filling symbolic name:",
3626 _clean_symbolic_name(svn_commit.symbolic_name))
3627 self._fill_symbolic_name(svn_commit)
3628 elif svn_commit.motivating_revnum:
3629 Log().write(LOG_VERBOSE, "Synchronizing default_branch motivated by %d"
3630 % svn_commit.motivating_revnum)
3631 self._synchronize_default_branch(svn_commit)
3632 else: # This actually commits CVSRevisions
3633 if len(svn_commit.cvs_revs) > 1: plural = "s"
3634 else: plural = ""
3635 Log().write(LOG_VERBOSE, "Committing %d CVSRevision%s"
3636 % (len(svn_commit.cvs_revs), plural))
3637 for cvs_rev in svn_commit.cvs_revs:
3638 # See comment in CVSCommit._commit() for what this is all
3639 # about. Note that although asking self._path_exists() is
3640 # somewhat expensive, we only do it if the first two (cheap)
3641 # tests succeed first.
3642 if not ((cvs_rev.deltatext_code == DELTATEXT_EMPTY)
3643 and (cvs_rev.rev == "1.1.1.1")
3644 and self._path_exists(cvs_rev.svn_path)):
3645 if cvs_rev.op == OP_ADD:
3646 self._add_path(cvs_rev)
3647 elif cvs_rev.op == OP_CHANGE:
3648 # Fix for Issue #74:
3650 # Here's the scenario. You have file FOO that is imported
3651 # on a non-trunk vendor branch. So in r1.1 and r1.1.1.1,
3652 # the file exists.
3654 # Moving forward in time, FOO is deleted on the default
3655 # branch (r1.1.1.2). cvs2svn determines that this delete
3656 # also needs to happen on trunk, so FOO is deleted on
3657 # trunk.
3659 # Along come r1.2, whose op is OP_CHANGE (because r1.1 is
3660 # not 'dead', we assume it's a change). However, since
3661 # our trunk file has been deleted, svnadmin blows up--you
3662 # can't change a file that doesn't exist!
3664 # Soooo... we just check the path, and if it doesn't
3665 # exist, we do an add... if the path does exist, it's
3666 # business as usual.
3667 if not self._path_exists(cvs_rev.svn_path):
3668 self._add_path(cvs_rev)
3669 else:
3670 self._change_path(cvs_rev)
3672 if cvs_rev.op == OP_DELETE:
3673 self._delete_path(cvs_rev.svn_path, Ctx().prune)
3675 def cleanup(self):
3676 """Callback for the Cleanup.register in self.__init__."""
3677 self.revs_db = None
3678 self.nodes_db = None
3680 def add_delegate(self, delegate):
3681 """Adds DELEGATE to self.delegates.
3683 For every delegate you add, as soon as SVNRepositoryMirror
3684 performs a repository action method, SVNRepositoryMirror will call
3685 the delegate's corresponding repository action method. Multiple
3686 delegates will be called in the order that they are added. See
3687 SVNRepositoryMirrorDelegate for more information."""
3688 self.delegates.append(delegate)
3690 def _invoke_delegates(self, method, *args):
3691 """Iterate through each of our delegates, in the order that they
3692 were added, and call the delegate's method named METHOD with the
3693 arguments in ARGS."""
3694 for delegate in self.delegates:
3695 getattr(delegate, method)(*args)
3697 def finish(self):
3698 """Calls the delegate finish method."""
3699 self._end_commit()
3700 self._invoke_delegates('finish')
3701 self.cleanup()
3704 class SVNCommitItem:
3705 """A wrapper class for CVSRevision objects upon which
3706 Subversion-related data (such as properties) may be hung."""
3708 def __init__(self, c_rev, svn_props_changed):
3709 """Initialize instance and record the properties for this file.
3710 SVN_PROPS_CHANGED indicates whether the svn: properties are known
3711 to have changed since the last revision.
3713 The properties are set by the SVNPropertySetters in
3714 Ctx().svn_property_setters, then we read a couple of the
3715 properties back out for our own purposes."""
3717 self.c_rev = c_rev
3718 # Did the svn properties change for this file (i.e., do they have
3719 # to be written to the dumpfile?)
3720 self.svn_props_changed = svn_props_changed
3722 # The properties for this item as a map { key : value }. If VALUE
3723 # is None, no property should be set.
3724 self.svn_props = { }
3726 for svn_property_setter in Ctx().svn_property_setters:
3727 svn_property_setter.set_properties(self)
3729 # Remember if we need to filter the EOLs. We could actually use
3730 # self.svn_props now, since it is initialized for each revision.
3731 self.needs_eol_filter = \
3732 self.svn_props.get('svn:eol-style', None) is not None
3734 self.has_keywords = self.svn_props.get('svn:keywords', None) is not None
3737 class SVNPropertySetter:
3738 """Abstract class for objects that can set properties on a SVNCommitItem."""
3740 def set_properties(self, s_item):
3741 """Set any properties that can be determined for S_ITEM."""
3743 raise NotImplementedError
3746 class SVNRepositoryMirrorDelegate:
3747 """Abstract superclass for any delegate to SVNRepositoryMirror.
3748 Subclasses must implement all of the methods below.
3750 For each method, a subclass implements, in its own way, the
3751 Subversion operation implied by the method's name. For example, for
3752 the add_path method, the DumpfileDelegate would write out a
3753 "Node-add:" command to a Subversion dumpfile, the StdoutDelegate
3754 would merely print that the path is being added to the repository,
3755 and the RepositoryDelegate would actually cause the path to be added
3756 to the Subversion repository that it is creating.
3759 def start_commit(self, svn_commit):
3760 """Perform any actions needed to start SVNCommit SVN_COMMIT;
3761 see subclass implementation for details."""
3762 raise NotImplementedError
3764 def mkdir(self, path):
3765 """PATH is a string; see subclass implementation for details."""
3766 raise NotImplementedError
3768 def add_path(self, s_item):
3769 """S_ITEM is an SVNCommitItem; see subclass implementation for
3770 details."""
3771 raise NotImplementedError
3773 def change_path(self, s_item):
3774 """S_ITEM is an SVNCommitItem; see subclass implementation for
3775 details."""
3776 raise NotImplementedError
3778 def delete_path(self, path):
3779 """PATH is a string; see subclass implementation for
3780 details."""
3781 raise NotImplementedError
3783 def copy_path(self, src_path, dest_path, src_revnum):
3784 """SRC_PATH and DEST_PATH are both strings, and SRC_REVNUM is a
3785 subversion revision number (int); see subclass implementation for
3786 details."""
3787 raise NotImplementedError
3789 def finish(self):
3790 """Perform any cleanup necessary after all revisions have been
3791 committed."""
3792 raise NotImplementedError
3795 class DumpfileDelegate(SVNRepositoryMirrorDelegate):
3796 """Create a Subversion dumpfile."""
3798 def __init__(self, dumpfile_path=None):
3799 """Return a new DumpfileDelegate instance, attached to a dumpfile
3800 DUMPFILE_PATH (Ctx().dumpfile, if None), using Ctx().encoding."""
3801 if dumpfile_path:
3802 self.dumpfile_path = dumpfile_path
3803 else:
3804 self.dumpfile_path = Ctx().dumpfile
3806 self.dumpfile = open(self.dumpfile_path, 'wb')
3807 self._write_dumpfile_header(self.dumpfile)
3809 def _write_dumpfile_header(self, dumpfile):
3810 # Initialize the dumpfile with the standard headers.
3812 # Since the CVS repository doesn't have a UUID, and the Subversion
3813 # repository will be created with one anyway, we don't specify a
3814 # UUID in the dumpflie
3815 dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
3817 def _utf8_path(self, path):
3818 """Return a copy of PATH encoded in UTF-8."""
3819 pieces = string.split(path, '/')
3820 # Convert each path component separately (as they may each use
3821 # different encodings).
3822 for i in range(len(pieces)):
3823 try:
3824 # Log messages can be converted with the 'replace' strategy,
3825 # but we can't afford any lossiness here.
3826 pieces[i] = to_utf8(pieces[i], 'strict')
3827 except UnicodeError:
3828 raise FatalError(
3829 "Unable to convert a path '%s' to internal encoding.\n"
3830 "Consider rerunning with (for example) '--encoding=latin1'."
3831 % (path,))
3832 return string.join(pieces, '/')
3834 def _string_for_prop(self, name, value):
3835 """Return a property in the form needed for the dumpfile."""
3837 return 'K %d\n%s\nV %d\n%s\n' % (len(name), name, len(value), value)
3839 def start_commit(self, svn_commit):
3840 """Emit the start of SVN_COMMIT (an SVNCommit)."""
3842 self.revision = svn_commit.revnum
3844 # The start of a new commit typically looks like this:
3846 # Revision-number: 1
3847 # Prop-content-length: 129
3848 # Content-length: 129
3850 # K 7
3851 # svn:log
3852 # V 27
3853 # Log message for revision 1.
3854 # K 10
3855 # svn:author
3856 # V 7
3857 # jrandom
3858 # K 8
3859 # svn:date
3860 # V 27
3861 # 2003-04-22T22:57:58.132837Z
3862 # PROPS-END
3864 # Notice that the length headers count everything -- not just the
3865 # length of the data but also the lengths of the lengths, including
3866 # the 'K ' or 'V ' prefixes.
3868 # The reason there are both Prop-content-length and Content-length
3869 # is that the former includes just props, while the latter includes
3870 # everything. That's the generic header form for any entity in a
3871 # dumpfile. But since revisions only have props, the two lengths
3872 # are always the same for revisions.
3874 # Calculate the output needed for the property definitions.
3875 props = svn_commit.get_revprops()
3876 prop_names = props.keys()
3877 prop_names.sort()
3878 prop_strings = []
3879 for propname in prop_names:
3880 if props[propname] is not None:
3881 prop_strings.append(self._string_for_prop(propname, props[propname]))
3883 all_prop_strings = ''.join(prop_strings) + 'PROPS-END\n'
3884 total_len = len(all_prop_strings)
3886 # Print the revision header and props
3887 self.dumpfile.write('Revision-number: %d\n'
3888 'Prop-content-length: %d\n'
3889 'Content-length: %d\n'
3890 '\n'
3891 % (self.revision, total_len, total_len))
3893 self.dumpfile.write(all_prop_strings)
3894 self.dumpfile.write('\n')
3896 def mkdir(self, path):
3897 """Emit the creation of directory PATH."""
3898 self.dumpfile.write("Node-path: %s\n"
3899 "Node-kind: dir\n"
3900 "Node-action: add\n"
3901 "\n"
3902 "\n" % self._utf8_path(path))
3904 def _add_or_change_path(self, s_item, op):
3905 """Emit the addition or change corresponding to S_ITEM.
3906 OP is either the constant OP_ADD or OP_CHANGE."""
3908 # Validation stuffs
3909 if op == OP_ADD:
3910 action = 'add'
3911 elif op == OP_CHANGE:
3912 action = 'change'
3913 else:
3914 raise FatalError("_add_or_change_path() called with bad op ('%s')"
3915 % (op,))
3917 # Convenience variables
3918 c_rev = s_item.c_rev
3920 # The property handling here takes advantage of an undocumented
3921 # but IMHO consistent feature of the Subversion dumpfile-loading
3922 # code. When a node's properties aren't mentioned (that is, the
3923 # "Prop-content-length:" header is absent, no properties are
3924 # listed at all, and there is no "PROPS-END\n" line) then no
3925 # change is made to the node's properties.
3927 # This is consistent with the way dumpfiles behave w.r.t. text
3928 # content changes, so I'm comfortable relying on it. If you
3929 # commit a change to *just* the properties of some node that
3930 # already has text contents from a previous revision, then in the
3931 # dumpfile output for the prop change, no "Text-content-length:"
3932 # nor "Text-content-md5:" header will be present, and the text of
3933 # the file will not be given. But this does not cause the file's
3934 # text to be erased! It simply remains unchanged.
3936 # This works out great for cvs2svn, due to lucky coincidences:
3938 # For files, the only properties we ever set are set in the first
3939 # revision; all other revisions (including on branches) inherit
3940 # from that. After the first revision, we never change file
3941 # properties, therefore, there is no need to remember the full set
3942 # of properties on a given file once we've set it.
3944 # For directories, the only property we set is "svn:ignore", and
3945 # while we may change it after the first revision, we always do so
3946 # based on the contents of a ".cvsignore" file -- in other words,
3947 # CVS is doing the remembering for us, so we still don't have to
3948 # preserve the previous value of the property ourselves.
3950 # Calculate the (sorted-by-name) property string and length, if any.
3951 if s_item.svn_props_changed:
3952 svn_props = s_item.svn_props
3953 prop_contents = ''
3954 prop_names = svn_props.keys()
3955 prop_names.sort()
3956 for pname in prop_names:
3957 pvalue = svn_props[pname]
3958 if pvalue is not None:
3959 prop_contents += self._string_for_prop(pname, pvalue)
3960 prop_contents += 'PROPS-END\n'
3961 props_header = 'Prop-content-length: %d\n' % len(prop_contents)
3962 else:
3963 prop_contents = ''
3964 props_header = ''
3966 # treat .cvsignore as a directory property
3967 dir_path, basename = os.path.split(c_rev.svn_path)
3968 if basename == ".cvsignore":
3969 ignore_vals = generate_ignores(c_rev)
3970 ignore_contents = '\n'.join(ignore_vals)
3971 ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
3972 (len(ignore_contents), ignore_contents))
3973 ignore_contents = ignore_contents + 'PROPS-END\n'
3974 ignore_len = len(ignore_contents)
3976 # write headers, then props
3977 self.dumpfile.write('Node-path: %s\n'
3978 'Node-kind: dir\n'
3979 'Node-action: change\n'
3980 'Prop-content-length: %d\n'
3981 'Content-length: %d\n'
3982 '\n'
3983 '%s'
3984 % (self._utf8_path(dir_path), ignore_len,
3985 ignore_len, ignore_contents))
3987 # If the file has keywords, we must prevent CVS/RCS from expanding
3988 # the keywords because they must be unexpanded in the repository,
3989 # or Subversion will get confused.
3990 pipe_cmd, pipe = Ctx().cvs_repository.get_co_pipe(
3991 c_rev, suppress_keyword_substitution=s_item.has_keywords)
3993 self.dumpfile.write('Node-path: %s\n'
3994 'Node-kind: file\n'
3995 'Node-action: %s\n'
3996 '%s' # no property header if no props
3997 'Text-content-length: '
3998 % (self._utf8_path(c_rev.svn_path),
3999 action, props_header))
4001 pos = self.dumpfile.tell()
4003 self.dumpfile.write('0000000000000000\n'
4004 'Text-content-md5: 00000000000000000000000000000000\n'
4005 'Content-length: 0000000000000000\n'
4006 '\n')
4008 if prop_contents:
4009 self.dumpfile.write(prop_contents)
4011 # Insert a filter to convert all EOLs to LFs if neccessary
4012 if s_item.needs_eol_filter:
4013 data_reader = LF_EOL_Filter(pipe.stdout)
4014 else:
4015 data_reader = pipe.stdout
4017 # Insert the rev contents, calculating length and checksum as we go.
4018 checksum = md5.new()
4019 length = 0
4020 while True:
4021 buf = data_reader.read(PIPE_READ_SIZE)
4022 if buf == '':
4023 break
4024 checksum.update(buf)
4025 length = length + len(buf)
4026 self.dumpfile.write(buf)
4028 pipe.stdout.close()
4029 error_output = pipe.stderr.read()
4030 exit_status = pipe.wait()
4031 if exit_status:
4032 raise FatalError("The command '%s' failed with exit status: %s\n"
4033 "and the following output:\n"
4034 "%s" % (pipe_cmd, exit_status, error_output))
4036 # Go back to patch up the length and checksum headers:
4037 self.dumpfile.seek(pos, 0)
4038 # We left 16 zeros for the text length; replace them with the real
4039 # length, padded on the left with spaces:
4040 self.dumpfile.write('%16d' % length)
4041 # 16... + 1 newline + len('Text-content-md5: ') == 35
4042 self.dumpfile.seek(pos + 35, 0)
4043 self.dumpfile.write(checksum.hexdigest())
4044 # 35... + 32 bytes of checksum + 1 newline + len('Content-length: ') == 84
4045 self.dumpfile.seek(pos + 84, 0)
4046 # The content length is the length of property data, text data,
4047 # and any metadata around/inside around them.
4048 self.dumpfile.write('%16d' % (length + len(prop_contents)))
4049 # Jump back to the end of the stream
4050 self.dumpfile.seek(0, 2)
4052 # This record is done (write two newlines -- one to terminate
4053 # contents that weren't themselves newline-termination, one to
4054 # provide a blank line for readability.
4055 self.dumpfile.write('\n\n')
4057 def add_path(self, s_item):
4058 """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
4059 self._add_or_change_path(s_item, OP_ADD)
4061 def change_path(self, s_item):
4062 """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
4063 self._add_or_change_path(s_item, OP_CHANGE)
4065 def delete_path(self, path):
4066 """Emit the deletion of PATH."""
4067 self.dumpfile.write('Node-path: %s\n'
4068 'Node-action: delete\n'
4069 '\n' % self._utf8_path(path))
4071 def copy_path(self, src_path, dest_path, src_revnum):
4072 """Emit the copying of SRC_PATH at SRC_REV to DEST_PATH."""
4073 # We don't need to include "Node-kind:" for copies; the loader
4074 # ignores it anyway and just uses the source kind instead.
4075 self.dumpfile.write('Node-path: %s\n'
4076 'Node-action: add\n'
4077 'Node-copyfrom-rev: %d\n'
4078 'Node-copyfrom-path: /%s\n'
4079 '\n'
4080 % (self._utf8_path(dest_path),
4081 src_revnum,
4082 self._utf8_path(src_path)))
4084 def finish(self):
4085 """Perform any cleanup necessary after all revisions have been
4086 committed."""
4087 self.dumpfile.close()
4090 class RepositoryDelegate(DumpfileDelegate):
4091 """Creates a new Subversion Repository. DumpfileDelegate does all
4092 of the heavy lifting."""
4093 def __init__(self):
4094 self.svnadmin = Ctx().svnadmin
4095 self.target = Ctx().target
4096 if not Ctx().existing_svnrepos:
4097 Log().write(LOG_NORMAL,"Creating new repository '%s'" % (self.target))
4098 if not Ctx().fs_type:
4099 # User didn't say what kind repository (bdb, fsfs, etc).
4100 # We still pass --bdb-txn-nosync. It's a no-op if the default
4101 # repository type doesn't support it, but we definitely want
4102 # it if BDB is the default.
4103 run_command('%s create %s "%s"' % (self.svnadmin,
4104 "--bdb-txn-nosync",
4105 self.target))
4106 elif Ctx().fs_type == 'bdb':
4107 # User explicitly specified bdb.
4109 # Since this is a BDB repository, pass --bdb-txn-nosync,
4110 # because it gives us a 4-5x speed boost (if cvs2svn is
4111 # creating the repository, cvs2svn should be the only program
4112 # accessing the svn repository (until cvs is done, at least)).
4113 # But we'll turn no-sync off in self.finish(), unless
4114 # instructed otherwise.
4115 run_command('%s create %s %s "%s"' % (self.svnadmin,
4116 "--fs-type=bdb",
4117 "--bdb-txn-nosync",
4118 self.target))
4119 else:
4120 # User specified something other than bdb.
4121 run_command('%s create %s "%s"' % (self.svnadmin,
4122 "--fs-type=%s" % Ctx().fs_type,
4123 self.target))
4125 # Since the output of this run is a repository, not a dumpfile,
4126 # the temporary dumpfiles we create should go in the tmpdir.
4127 DumpfileDelegate.__init__(self, temp(Ctx().dumpfile))
4129 # This is 1 if a commit is in progress, otherwise None.
4130 self._commit_in_progress = None
4132 self.dumpfile = open(self.dumpfile_path, 'w+b')
4133 self.loader_pipe = SimplePopen([ self.svnadmin, 'load', '-q',
4134 self.target ], True)
4135 self.loader_pipe.stdout.close()
4136 try:
4137 self._write_dumpfile_header(self.loader_pipe.stdin)
4138 except IOError:
4139 raise FatalError("svnadmin failed with the following output while "
4140 "loading the dumpfile:\n"
4141 + self.loader_pipe.stderr.read())
4143 def _feed_pipe(self):
4144 """Feed the revision stored in the dumpfile to the svnadmin
4145 load pipe."""
4146 self.dumpfile.seek(0)
4147 while 1:
4148 data = self.dumpfile.read(128*1024) # Chunk size is arbitrary
4149 if not len(data):
4150 break
4151 try:
4152 self.loader_pipe.stdin.write(data)
4153 except IOError:
4154 raise FatalError("svnadmin failed with the following output "
4155 "while loading the dumpfile:\n"
4156 + self.loader_pipe.stderr.read())
4158 def start_commit(self, svn_commit):
4159 """Start a new commit. If a commit is already in progress, close
4160 the dumpfile, load it into the svn repository, open a new
4161 dumpfile, and write the header into it."""
4162 if self._commit_in_progress:
4163 self._feed_pipe()
4164 self.dumpfile.seek(0)
4165 self.dumpfile.truncate()
4166 DumpfileDelegate.start_commit(self, svn_commit)
4167 self._commit_in_progress = 1
4169 def finish(self):
4170 """Loads the last commit into the repository."""
4171 self._feed_pipe()
4172 self.dumpfile.close()
4173 self.loader_pipe.stdin.close()
4174 error_output = self.loader_pipe.stderr.read()
4175 exit_status = self.loader_pipe.wait()
4176 if exit_status:
4177 raise FatalError('svnadmin load failed with exit status: %s\n'
4178 'and the following output:\n'
4179 '%s' % (exit_status, error_output,))
4180 os.remove(self.dumpfile_path)
4182 # If this is a BDB repository, and we created the repository, and
4183 # --bdb-no-sync wasn't passed, then comment out the DB_TXN_NOSYNC
4184 # line in the DB_CONFIG file, because txn syncing should be on by
4185 # default in BDB repositories.
4187 # We determine if this is a BDB repository by looking for the
4188 # DB_CONFIG file, which doesn't exist in FSFS, rather than by
4189 # checking Ctx().fs_type. That way this code will Do The Right
4190 # Thing in all circumstances.
4191 db_config = os.path.join(self.target, "db/DB_CONFIG")
4192 if (not Ctx().existing_svnrepos and not Ctx().bdb_txn_nosync
4193 and os.path.exists(db_config)):
4194 no_sync = 'set_flags DB_TXN_NOSYNC\n'
4196 contents = open(db_config, 'r').readlines()
4197 index = contents.index(no_sync)
4198 contents[index] = '# ' + no_sync
4199 contents = open(db_config, 'w').writelines(contents)
4202 class StdoutDelegate(SVNRepositoryMirrorDelegate):
4203 """Makes no changes to the disk, but writes out information to
4204 STDOUT about what the SVNRepositoryMirror is doing. Of course, our
4205 print statements will state that we're doing something, when in
4206 reality, we aren't doing anything other than printing out that we're
4207 doing something. Kind of zen, really."""
4208 def __init__(self, total_revs):
4209 self.total_revs = total_revs
4211 def start_commit(self, svn_commit):
4212 """Prints out the Subversion revision number of the commit that is
4213 being started."""
4214 Log().write(LOG_VERBOSE, "=" * 60)
4215 Log().write(LOG_NORMAL, "Starting Subversion r%d / %d" %
4216 (svn_commit.revnum, self.total_revs))
4218 def mkdir(self, path):
4219 """Print a line stating that we are creating directory PATH."""
4220 Log().write(LOG_VERBOSE, " New Directory", path)
4222 def add_path(self, s_item):
4223 """Print a line stating that we are 'adding' s_item.c_rev.svn_path."""
4224 Log().write(LOG_VERBOSE, " Adding", s_item.c_rev.svn_path)
4226 def change_path(self, s_item):
4227 """Print a line stating that we are 'changing' s_item.c_rev.svn_path."""
4228 Log().write(LOG_VERBOSE, " Changing", s_item.c_rev.svn_path)
4230 def delete_path(self, path):
4231 """Print a line stating that we are 'deleting' PATH."""
4232 Log().write(LOG_VERBOSE, " Deleting", path)
4234 def copy_path(self, src_path, dest_path, src_revnum):
4235 """Print a line stating that we are 'copying' revision SRC_REVNUM
4236 of SRC_PATH to DEST_PATH."""
4237 Log().write(LOG_VERBOSE, " Copying revision", src_revnum, "of", src_path)
4238 Log().write(LOG_VERBOSE, " to", dest_path)
4240 def finish(self):
4241 """State that we are done creating our repository."""
4242 Log().write(LOG_VERBOSE, "Finished creating Subversion repository.")
4243 Log().write(LOG_QUIET, "Done.")
4245 def pass1():
4246 OS_SEP_PLUS_ATTIC = os.sep + 'Attic'
4247 Log().write(LOG_QUIET, "Examining all CVS ',v' files...")
4248 cd = CollectData()
4250 def visit_file(baton, dirname, files):
4251 cd = baton
4252 for fname in files:
4253 if fname[-2:] != ',v':
4254 continue
4255 cd.found_valid_file = 1
4256 pathname = os.path.join(dirname, fname)
4257 if dirname[-6:] == OS_SEP_PLUS_ATTIC:
4258 # drop the 'Attic' portion from the pathname for the canonical name.
4259 cd.set_fname(os.path.join(dirname[:-6], fname), pathname)
4260 else:
4261 # If this file also exists in the attic, it's a fatal error
4262 attic_path = os.path.join(dirname, 'Attic', fname)
4263 if os.path.exists(attic_path):
4264 err = "%s: A CVS repository cannot contain both %s and %s" \
4265 % (error_prefix, pathname, attic_path)
4266 sys.stderr.write(err + '\n')
4267 cd.fatal_errors.append(err)
4268 cd.set_fname(pathname, pathname)
4269 Log().write(LOG_NORMAL, pathname)
4270 try:
4271 cvs2svn_rcsparse.parse(open(pathname, 'rb'), cd)
4272 except (cvs2svn_rcsparse.common.RCSParseError, ValueError,
4273 RuntimeError):
4274 err = "%s: '%s' is not a valid ,v file" \
4275 % (error_prefix, pathname)
4276 sys.stderr.write(err + '\n')
4277 cd.fatal_errors.append(err)
4278 except:
4279 Log().write(LOG_WARN,
4280 "Exception occurred while parsing %s" % pathname)
4281 raise
4283 os.path.walk(Ctx().project.project_cvs_repos_path, visit_file, cd)
4284 Log().write(LOG_VERBOSE, 'Processed', cd.num_files, 'files')
4286 cd.write_symbol_db()
4288 if len(cd.fatal_errors) > 0:
4289 raise FatalException("Pass 1 complete.\n"
4290 + "=" * 75 + "\n"
4291 + "Error summary:\n"
4292 + "\n".join(cd.fatal_errors) + "\n"
4293 + "Exited due to fatal error(s).\n")
4295 if cd.found_valid_file is None:
4296 raise FatalException(
4297 "\n"
4298 "No RCS files found in your CVS Repository!\n"
4299 "Are you absolutely certain you are pointing cvs2svn\n"
4300 "at a CVS repository?\n"
4301 "\n"
4302 "Exited due to fatal error(s).\n")
4304 StatsKeeper().reset_c_rev_info()
4305 StatsKeeper().archive()
4306 Log().write(LOG_QUIET, "Done")
4308 def pass2():
4309 "Pass 2: clean up the revision information."
4311 symbol_db = SymbolDatabase()
4312 symbol_db.read()
4314 # Convert the list of regexps to a list of strings
4315 excludes = symbol_db.find_excluded_symbols(Ctx().excludes)
4317 error_detected = 0
4319 Log().write(LOG_QUIET, "Checking for blocked exclusions...")
4320 blocked_excludes = symbol_db.find_blocked_excludes(excludes)
4321 if blocked_excludes:
4322 for branch, blockers in blocked_excludes.items():
4323 sys.stderr.write(error_prefix + ": The branch '%s' cannot be "
4324 "excluded because the following symbols depend "
4325 "on it:\n" % (branch))
4326 for blocker in blockers:
4327 sys.stderr.write(" '%s'\n" % (blocker))
4328 sys.stderr.write("\n")
4329 error_detected = 1
4331 Log().write(LOG_QUIET, "Checking for forced tags with commits...")
4332 invalid_forced_tags = [ ]
4333 for forced_tag in Ctx().forced_tags:
4334 if excludes.has_key(forced_tag):
4335 continue
4336 if symbol_db.branch_has_commit(forced_tag):
4337 invalid_forced_tags.append(forced_tag)
4338 if invalid_forced_tags:
4339 sys.stderr.write(error_prefix + ": The following branches cannot be "
4340 "forced to be tags because they have commits:\n")
4341 for tag in invalid_forced_tags:
4342 sys.stderr.write(" '%s'\n" % (tag))
4343 sys.stderr.write("\n")
4344 error_detected = 1
4346 Log().write(LOG_QUIET, "Checking for tag/branch mismatches...")
4347 mismatches = symbol_db.find_mismatches(excludes)
4348 def is_not_forced(mismatch):
4349 name = mismatch[0]
4350 return not (name in Ctx().forced_tags or name in Ctx().forced_branches)
4351 mismatches = filter(is_not_forced, mismatches)
4352 if mismatches:
4353 sys.stderr.write(error_prefix + ": The following symbols are tags "
4354 "in some files and branches in others.\nUse "
4355 "--force-tag, --force-branch and/or --exclude to "
4356 "resolve the symbols.\n")
4357 for name, tag_count, branch_count, commit_count in mismatches:
4358 sys.stderr.write(" '%s' is a tag in %d files, a branch in "
4359 "%d files and has commits in %d files.\n"
4360 % (name, tag_count, branch_count, commit_count))
4361 error_detected = 1
4363 # Bail out now if we found errors
4364 if error_detected:
4365 sys.exit(1)
4367 # Create the tags database
4368 tags_db = TagsDatabase(DB_OPEN_NEW)
4369 for tag in symbol_db.tags:
4370 if tag not in Ctx().forced_branches:
4371 tags_db[tag] = None
4372 for tag in Ctx().forced_tags:
4373 tags_db[tag] = None
4375 Log().write(LOG_QUIET, "Re-synchronizing CVS revision timestamps...")
4377 # We may have recorded some changes in revisions' timestamp. We need to
4378 # scan for any other files which may have had the same log message and
4379 # occurred at "the same time" and change their timestamps, too.
4381 # read the resync data file
4382 def read_resync(fname):
4383 "Read the .resync file into memory."
4385 ### note that we assume that we can hold the entire resync file in
4386 ### memory. really large repositories with whacky timestamps could
4387 ### bust this assumption. should that ever happen, then it is possible
4388 ### to split the resync file into pieces and make multiple passes,
4389 ### using each piece.
4392 # A digest maps to a sequence of lists which specify a lower and upper
4393 # time bound for matching up the commit. We keep a sequence of these
4394 # because a number of checkins with the same log message (e.g. an empty
4395 # log message) could need to be remapped. We also make them a list
4396 # because we will dynamically expand the lower/upper bound as we find
4397 # commits that fall into a particular msg and time range.
4399 # resync == digest -> [ [old_time_lower, old_time_upper, new_time], ... ]
4401 resync = { }
4403 for line in fileinput.FileInput(fname):
4404 t1 = int(line[:8], 16)
4405 digest = line[9:DIGEST_END_IDX]
4406 t2 = int(line[DIGEST_END_IDX+1:], 16)
4407 t1_l = t1 - COMMIT_THRESHOLD/2
4408 t1_u = t1 + COMMIT_THRESHOLD/2
4409 resync.setdefault(digest, []).append([t1_l, t1_u, t2])
4411 # For each digest, sort the resync items in it in increasing order,
4412 # based on the lower time bound.
4413 for val in resync.values():
4414 val.sort()
4416 return resync
4418 resync = read_resync(temp(DATAFILE + RESYNC_SUFFIX))
4420 output = open(temp(DATAFILE + CLEAN_REVS_SUFFIX), 'w')
4421 Cleanup().register(temp(DATAFILE + CLEAN_REVS_SUFFIX), pass3)
4423 tweaked_timestamps_db = Database(temp(TWEAKED_TIMESTAMPS_DB), DB_OPEN_NEW)
4424 Cleanup().register(temp(TWEAKED_TIMESTAMPS_DB), pass2)
4426 # process the revisions file, looking for items to clean up
4427 for line in fileinput.FileInput(temp(DATAFILE + REVS_SUFFIX)):
4428 c_rev = CVSRevision(Ctx(), line[:-1])
4430 # Skip this entire revision if it's on an excluded branch
4431 if excludes.has_key(c_rev.branch_name):
4432 continue
4434 new_prev_ts = None
4435 if c_rev.prev_rev is not None:
4436 new_prev_ts = tweaked_timestamps_db.get(
4437 c_rev.unique_key(c_rev.prev_rev), None)
4438 if new_prev_ts:
4439 c_rev.prev_timestamp = new_prev_ts
4441 new_next_ts = None
4442 if c_rev.next_rev is not None:
4443 new_next_ts = tweaked_timestamps_db.get(
4444 c_rev.unique_key(c_rev.next_rev), None)
4445 if new_next_ts:
4446 c_rev.next_timestamp = new_next_ts
4448 # Remove all references to excluded tags and branches
4449 def not_excluded(symbol, excludes=excludes):
4450 return not excludes.has_key(symbol)
4451 c_rev.branches = filter(not_excluded, c_rev.branches)
4452 c_rev.tags = filter(not_excluded, c_rev.tags)
4454 # Convert all branches that are forced to be tags
4455 for forced_tag in Ctx().forced_tags:
4456 if forced_tag in c_rev.branches:
4457 c_rev.branches.remove(forced_tag)
4458 c_rev.tags.append(forced_tag)
4460 # Convert all tags that are forced to be branches
4461 for forced_branch in Ctx().forced_branches:
4462 if forced_branch in c_rev.tags:
4463 c_rev.tags.remove(forced_branch)
4464 c_rev.branches.append(forced_branch)
4466 # see if this is "near" any of the resync records we
4467 # have recorded for this digest [of the log message].
4468 for record in resync.get(c_rev.digest, []):
4469 if record[2] == c_rev.timestamp:
4470 # This means that either c_rev is the same revision that
4471 # caused the resync record to exist, or c_rev is a different
4472 # CVS revision that happens to have the same timestamp. In
4473 # either case, we don't have to do anything, so we...
4474 continue
4476 if record[0] <= c_rev.timestamp <= record[1]:
4477 # bingo! We probably want to remap the time on this c_rev,
4478 # unless the remapping would be useless because the new time
4479 # would fall outside the COMMIT_THRESHOLD window for this
4480 # commit group.
4481 new_timestamp = record[2]
4482 # If the new timestamp is earlier than that of our previous revision
4483 if new_timestamp < c_rev.prev_timestamp:
4484 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4485 + " to time %s, which is before previous the time of"
4486 + " revision %s (%s):")
4487 Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4488 c_rev.cvs_path, new_timestamp,
4489 c_rev.prev_rev, c_rev.prev_timestamp))
4490 # If resyncing our rev to c_rev.prev_timestamp + 1 will place
4491 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4492 # attempted resync time, then sync back to c_rev.prev_timestamp
4493 # + 1...
4494 if ((c_rev.prev_timestamp + 1) - new_timestamp) < COMMIT_THRESHOLD:
4495 new_timestamp = c_rev.prev_timestamp + 1
4496 Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4497 new_timestamp))
4498 else:
4499 Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4500 warning_prefix)
4501 continue
4503 # If the new timestamp is later than that of our next revision
4504 elif c_rev.next_timestamp and new_timestamp > c_rev.next_timestamp:
4505 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4506 + " to time %s, which is after time of next"
4507 + " revision %s (%s):")
4508 Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4509 c_rev.cvs_path, new_timestamp,
4510 c_rev.prev_rev, c_rev.next_timestamp))
4511 # If resyncing our rev to c_rev.next_timestamp - 1 will place
4512 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4513 # attempted resync time, then sync forward to c_rev.next_timestamp
4514 # - 1...
4515 if (new_timestamp - (c_rev.next_timestamp - 1)) < COMMIT_THRESHOLD:
4516 new_timestamp = c_rev.next_timestamp - 1
4517 Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4518 new_timestamp))
4519 else:
4520 Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4521 warning_prefix)
4522 continue
4524 # Fix for Issue #71: Avoid resyncing two consecutive revisions
4525 # to the same timestamp.
4526 elif (new_timestamp == c_rev.prev_timestamp
4527 or new_timestamp == c_rev.next_timestamp):
4528 continue
4530 # adjust the time range. we want the COMMIT_THRESHOLD from the
4531 # bounds of the earlier/latest commit in this group.
4532 record[0] = min(record[0], c_rev.timestamp - COMMIT_THRESHOLD/2)
4533 record[1] = max(record[1], c_rev.timestamp + COMMIT_THRESHOLD/2)
4535 msg = "PASS2 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
4536 % (c_rev.cvs_path, c_rev.rev, time.ctime(c_rev.timestamp),
4537 new_timestamp - c_rev.timestamp)
4538 Log().write(LOG_VERBOSE, msg)
4540 c_rev.timestamp = new_timestamp
4541 tweaked_timestamps_db[c_rev.unique_key()] = new_timestamp
4543 # stop looking for hits
4544 break
4546 output.write(str(c_rev) + "\n")
4547 Log().write(LOG_QUIET, "Done")
4549 def pass3():
4550 Log().write(LOG_QUIET, "Sorting CVS revisions...")
4551 sort_file(temp(DATAFILE + CLEAN_REVS_SUFFIX),
4552 temp(DATAFILE + SORTED_REVS_SUFFIX))
4553 Cleanup().register(temp(DATAFILE + SORTED_REVS_SUFFIX), pass5)
4554 Log().write(LOG_QUIET, "Done")
4556 def pass4():
4557 """Iterate through sorted revs, storing them in a database.
4558 If we're not doing a trunk-only conversion, generate the
4559 LastSymbolicNameDatabase, which contains the last CVSRevision
4560 that is a source for each tag or branch.
4562 Log().write(LOG_QUIET,
4563 "Copying CVS revision data from flat file to database...")
4564 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_NEW)
4565 if not Ctx().trunk_only:
4566 Log().write(LOG_QUIET,
4567 "Finding last CVS revisions for all symbolic names...")
4568 last_sym_name_db = LastSymbolicNameDatabase(DB_OPEN_NEW)
4569 else:
4570 # This is to avoid testing Ctx().trunk_only every time around the loop
4571 class DummyLSNDB:
4572 def noop(*args): pass
4573 log_revision = noop
4574 create_database = noop
4575 last_sym_name_db = DummyLSNDB()
4577 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4578 c_rev = CVSRevision(Ctx(), line[:-1])
4579 cvs_revs_db.log_revision(c_rev)
4580 last_sym_name_db.log_revision(c_rev)
4581 StatsKeeper().record_c_rev(c_rev)
4583 last_sym_name_db.create_database()
4584 StatsKeeper().archive()
4585 Log().write(LOG_QUIET, "Done")
4587 def pass5():
4589 Generate the SVNCommit <-> CVSRevision mapping
4590 databases. CVSCommit._commit also calls SymbolingsLogger to register
4591 CVSRevisions that represent an opening or closing for a path on a
4592 branch or tag. See SymbolingsLogger for more details.
4594 Log().write(LOG_QUIET, "Mapping CVS revisions to Subversion commits...")
4596 aggregator = CVSRevisionAggregator()
4597 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4598 c_rev = CVSRevision(Ctx(), line[:-1])
4599 if not (Ctx().trunk_only and c_rev.branch_name is not None):
4600 aggregator.process_revision(c_rev)
4601 aggregator.flush()
4603 StatsKeeper().set_svn_rev_count(SVNCommit.revnum - 1)
4604 StatsKeeper().archive()
4605 Log().write(LOG_QUIET, "Done")
4607 def pass6():
4608 Log().write(LOG_QUIET, "Sorting symbolic name source revisions...")
4610 if not Ctx().trunk_only:
4611 sort_file(temp(SYMBOL_OPENINGS_CLOSINGS),
4612 temp(SYMBOL_OPENINGS_CLOSINGS_SORTED))
4613 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), pass8)
4614 Log().write(LOG_QUIET, "Done")
4616 def pass7():
4617 Log().write(LOG_QUIET, "Determining offsets for all symbolic names...")
4619 def generate_offsets_for_symbolings():
4620 """This function iterates through all the lines in
4621 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
4622 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
4623 where SYMBOLIC_NAME is first encountered. This will allow us to
4624 seek to the various offsets in the file and sequentially read only
4625 the openings and closings that we need."""
4627 ###PERF This is a fine example of a db that can be in-memory and
4628 #just flushed to disk when we're done. Later, it can just be sucked
4629 #back into memory.
4630 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_NEW)
4631 Cleanup().register(temp(SYMBOL_OFFSETS_DB), pass8)
4633 file = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
4634 old_sym = ""
4635 while 1:
4636 fpos = file.tell()
4637 line = file.readline()
4638 if not line:
4639 break
4640 sym, svn_revnum, cvs_rev_key = line.split(" ", 2)
4641 if sym != old_sym:
4642 Log().write(LOG_VERBOSE, " ", sym)
4643 old_sym = sym
4644 offsets_db[sym] = fpos
4646 if not Ctx().trunk_only:
4647 generate_offsets_for_symbolings()
4648 Log().write(LOG_QUIET, "Done.")
4650 def pass8():
4651 svncounter = 2 # Repository initialization is 1.
4652 repos = SVNRepositoryMirror()
4653 persistence_manager = PersistenceManager(DB_OPEN_READ)
4655 if Ctx().target:
4656 if not Ctx().dry_run:
4657 repos.add_delegate(RepositoryDelegate())
4658 Log().write(LOG_QUIET, "Starting Subversion Repository.")
4659 else:
4660 if not Ctx().dry_run:
4661 repos.add_delegate(DumpfileDelegate())
4662 Log().write(LOG_QUIET, "Starting Subversion Dumpfile.")
4664 repos.add_delegate(StdoutDelegate(StatsKeeper().svn_rev_count()))
4666 while 1:
4667 svn_commit = persistence_manager.get_svn_commit(svncounter)
4668 if not svn_commit:
4669 break
4670 repos.commit(svn_commit)
4671 svncounter += 1
4673 repos.finish()
4675 _passes = [
4676 pass1,
4677 pass2,
4678 pass3,
4679 pass4,
4680 pass5,
4681 pass6,
4682 pass7,
4683 pass8,
4687 class Ctx:
4688 """Session state for this run of cvs2svn. For example, run-time
4689 options are stored here. This class is a Borg, see
4690 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531.
4692 __shared_state = { }
4693 def __init__(self):
4694 self.__dict__ = self.__shared_state
4695 if self.__dict__:
4696 return
4697 # Else, initialize to defaults.
4698 self.target = None
4699 self.dumpfile = DUMPFILE
4700 self.tmpdir = '.'
4701 self.verbose = 0
4702 self.quiet = 0
4703 self.prune = 1
4704 self.existing_svnrepos = 0
4705 self.dump_only = 0
4706 self.dry_run = 0
4707 self.trunk_only = 0
4708 self.trunk_base = "trunk"
4709 self.tags_base = "tags"
4710 self.branches_base = "branches"
4711 self.encoding = ["ascii"]
4712 self.mime_types_file = None
4713 self.auto_props_file = None
4714 self.auto_props_ignore_case = False
4715 self.no_default_eol = 0
4716 self.eol_from_mime_type = 0
4717 self.keywords_off = 0
4718 self.use_cvs = None
4719 self.svnadmin = "svnadmin"
4720 self.username = None
4721 self.print_help = 0
4722 self.skip_cleanup = 0
4723 self.bdb_txn_nosync = 0
4724 self.fs_type = None
4725 self.forced_branches = []
4726 self.forced_tags = []
4727 self.excludes = []
4728 self.symbol_transforms = []
4729 self.svn_property_setters = []
4732 class CVSRevisionNumberSetter(SVNPropertySetter):
4733 """Set the cvs2svn:cvs-rev property to the CVS revision number."""
4735 def set_properties(self, s_item):
4736 s_item.svn_props['cvs2svn:cvs-rev'] = s_item.c_rev.rev
4737 s_item.svn_props_changed = True
4740 class MimeMapper(SVNPropertySetter):
4741 """A class that provides mappings from file names to MIME types."""
4743 def __init__(self, mime_types_file):
4744 self.mappings = { }
4746 for line in fileinput.input(mime_types_file):
4747 if line.startswith("#"):
4748 continue
4750 # format of a line is something like
4751 # text/plain c h cpp
4752 extensions = line.split()
4753 if len(extensions) < 2:
4754 continue
4755 type = extensions.pop(0)
4756 for ext in extensions:
4757 if self.mappings.has_key(ext) and self.mappings[ext] != type:
4758 sys.stderr.write("%s: ambiguous MIME mapping for *.%s (%s or %s)\n"
4759 % (warning_prefix, ext, self.mappings[ext], type))
4760 self.mappings[ext] = type
4762 def set_properties(self, s_item):
4763 basename, extension = os.path.splitext(
4764 os.path.basename(s_item.c_rev.cvs_path)
4767 # Extension includes the dot, so strip it (will leave extension
4768 # empty if filename ends with a dot, which is ok):
4769 extension = extension[1:]
4771 # If there is no extension (or the file ends with a period), use
4772 # the base name for mapping. This allows us to set mappings for
4773 # files such as README or Makefile:
4774 if not extension:
4775 extension = basename
4777 mime_type = self.mappings.get(extension, None)
4778 if mime_type is not None:
4779 s_item.svn_props['svn:mime-type'] = mime_type
4782 class AutoPropsPropertySetter(SVNPropertySetter):
4783 """Set arbitrary svn properties based on an auto-props configuration.
4785 This class always supports case-sensitive and case-insensitive
4786 pattern matching. The 'correct' behavior is not quite clear,
4787 because subversion itself does an inconsistent job of handling case
4788 in auto-props patterns; see
4789 http://subversion.tigris.org/issues/show_bug.cgi?id=2036."""
4791 class Pattern:
4792 """Describes the properties to be set for files matching a pattern."""
4793 def __init__(self, pattern, propdict):
4794 # A glob-like pattern:
4795 self.pattern = pattern
4796 # A dictionary of properties that should be set:
4797 self.propdict = propdict
4799 def match(self, basename):
4800 """Does the file with the specified basename match pattern?"""
4801 return fnmatch.fnmatch(basename, self.pattern)
4803 def __init__(self, configfilename, ignore_case):
4804 config = ConfigParser.ConfigParser()
4805 if ignore_case:
4806 self.transform_case = self.squash_case
4807 else:
4808 config.optionxform = self.preserve_case
4809 self.transform_case = self.preserve_case
4811 config.readfp(file(configfilename))
4812 self.patterns = []
4813 for section in config.sections():
4814 if self.transform_case(section) == 'auto-props':
4815 for (pattern, value) in config.items(section):
4816 if value:
4817 self._add_pattern(pattern, value)
4819 def squash_case(self, s):
4820 return s.lower()
4822 def preserve_case(self, s):
4823 return s
4825 def _add_pattern(self, pattern, value):
4826 props = value.split(';')
4827 propdict = {}
4828 for prop in props:
4829 s = prop.split('=', 1)
4830 if len(s) == 1:
4831 propdict[s[0]] = None
4832 else:
4833 propdict[s[0]] = s[1]
4834 self.patterns.append(
4835 self.Pattern(self.transform_case(pattern), propdict))
4837 def get_propdict(self, path):
4838 basename = self.transform_case(os.path.basename(path))
4839 propdict = {}
4840 for pattern in self.patterns:
4841 if pattern.match(basename):
4842 for (key,value) in pattern.propdict.items():
4843 if propdict.has_key(key):
4844 if propdict[key] != value:
4845 Log().write(
4846 LOG_WARN,
4847 "Contradictory values set for property '%s' for file %s."
4848 % (k, path,))
4849 else:
4850 propdict[key] = value
4852 print 'propdict %s -> %s' % (path, propdict,) ###
4853 return propdict
4855 def set_properties(self, s_item):
4856 propdict = self.get_propdict(s_item.c_rev.cvs_path)
4857 for (k,v) in propdict.items():
4858 if s_item.svn_props.has_key(k):
4859 if s_item.svn_props[k] != v:
4860 Log().write(
4861 LOG_WARN,
4862 "Property '%s' already set for file %s."
4863 % (k, s_item.c_rev.cvs_path,))
4864 else:
4865 s_item.svn_props[k] = v
4868 class BinaryFileDefaultMimeTypeSetter(SVNPropertySetter):
4869 """Set the default mime type for binary files, if no other one is known."""
4871 def set_properties(self, s_item):
4872 if not s_item.svn_props.has_key('svn:mime-type') \
4873 and s_item.c_rev.mode == 'b':
4874 s_item.svn_props['svn:mime-type'] = 'application/octet-stream'
4877 class BinaryFileEOLStyleSetter(SVNPropertySetter):
4878 """Set the eol-style for binary files to None."""
4880 def set_properties(self, s_item):
4881 if s_item.c_rev.mode == 'b':
4882 s_item.svn_props['svn:eol-style'] = None
4885 class EOLStyleFromMimeTypeSetter(SVNPropertySetter):
4886 """Set the eol-style from the mime type if it is not already known.
4888 This setting is influenced by the mime-type setting, which must
4889 already have been set. See also issue #39."""
4891 def set_properties(self, s_item):
4892 if not s_item.svn_props.has_key('svn:eol-style') \
4893 and s_item.svn_props.get('svn:mime-type', None) is not None:
4894 if s_item.svn_props['svn:mime-type'].startswith("text/"):
4895 s_item.svn_props['svn:eol-style'] = 'native'
4896 else:
4897 s_item.svn_props['svn:eol-style'] = None
4900 class DefaultEOLStyleSetter(SVNPropertySetter):
4901 """Set the default eol-style if one has not already been set."""
4903 def __init__(self, value):
4904 """Initialize with the specified default VALUE."""
4906 self.value = value
4908 def set_properties(self, s_item):
4909 if not s_item.svn_props.has_key('svn:eol-style'):
4910 s_item.svn_props['svn:eol-style'] = self.value
4913 class KeywordsPropertySetter(SVNPropertySetter):
4914 """Set the svn:keywords property based on the file's mode. See
4915 issue #2."""
4917 def __init__(self, value):
4918 """Use VALUE for the value of the svn:keywords property if it is
4919 to be set."""
4921 self.value = value
4923 def set_properties(self, s_item):
4924 if not s_item.svn_props.has_key('svn:keywords') \
4925 and s_item.c_rev.mode in [None, 'kv', 'kvl']:
4926 s_item.svn_props['svn:keywords'] = self.value
4929 class ExecutablePropertySetter(SVNPropertySetter):
4930 """Set the svn:executable property based on c_rev.file_executable."""
4932 def set_properties(self, s_item):
4933 if s_item.c_rev.file_executable:
4934 s_item.svn_props['svn:executable'] = '*'
4937 def convert(start_pass, end_pass):
4938 "Convert a CVS repository to an SVN repository."
4940 cleanup = Cleanup()
4941 times = [ None ] * (end_pass + 1)
4942 times[start_pass - 1] = time.time()
4943 StatsKeeper().set_start_time(time.time())
4944 for i in range(start_pass - 1, end_pass):
4945 Log().write(LOG_QUIET, '----- pass %d -----' % (i + 1))
4946 _passes[i]()
4947 times[i + 1] = time.time()
4948 StatsKeeper().log_duration_for_pass(times[i + 1] - times[i], i + 1)
4949 # Dispose of items in Ctx() not intended to live past the end of the pass
4950 # (Identified by exactly one leading underscore)
4951 for attr in dir(Ctx()):
4952 if (len(attr) > 2 and attr[0] == '_' and attr[1] != '_'
4953 and attr[:6] != "_Ctx__"):
4954 delattr(Ctx(), attr)
4955 if not Ctx().skip_cleanup:
4956 cleanup.cleanup(_passes[i])
4957 StatsKeeper().set_end_time(time.time())
4959 Log().write(LOG_QUIET, StatsKeeper())
4960 if end_pass < 4:
4961 Log().write(LOG_QUIET,
4962 '(These are unaltered CVS repository stats and do not\n'
4963 ' reflect tags or branches excluded via --exclude)\n')
4964 Log().write(LOG_NORMAL, StatsKeeper().timings())
4967 def normalize_ttb_path(opt, path):
4968 """Normalize a path to be used for --trunk, --tags, or --branches.
4970 1. Strip leading, trailing, and duplicated '/'.
4971 2. Verify that the path is not empty.
4973 Return the normalized path.
4975 If the path is invalid, write an error message and exit."""
4977 norm_path = _path_join(*path.split('/'))
4978 if not norm_path:
4979 raise FatalError("cannot pass an empty path to %s." % (opt,))
4980 return norm_path
4983 def verify_paths_disjoint(*paths):
4984 """Verify that all of the paths in the argument list are disjoint.
4986 If any of the paths is nested in another one (i.e., in the sense
4987 that 'a/b/c/d' is nested in 'a/b'), or any two paths are identical,
4988 write an error message and exit."""
4990 paths = [(path.split('/'), path) for path in paths]
4991 # If all overlapping elements are equal, a shorter list is
4992 # considered "less than" a longer one. Therefore if any paths are
4993 # nested, this sort will leave at least one such pair adjacent, in
4994 # the order [nest,nestling].
4995 paths.sort()
4996 for i in range(1, len(paths)):
4997 split_path1, path1 = paths[i - 1]
4998 split_path2, path2 = paths[i]
4999 if len(split_path1) <= len(split_path2) \
5000 and split_path2[:len(split_path1)] == split_path1:
5001 raise FatalError("paths %s and %s are not disjoint." % (path1, path2,))
5004 def usage():
5005 print 'USAGE: %s [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
5006 % os.path.basename(sys.argv[0])
5007 print ' --help, -h print this usage message and exit with success'
5008 print ' --version print the version number'
5009 print ' -q quiet'
5010 print ' -v verbose'
5011 print ' -s PATH path for SVN repos'
5012 print ' -p START[:END] start at pass START, end at pass END of %d' \
5013 % len(_passes)
5014 print ' If only START is given, run only pass START'
5015 print ' (implicitly enables --skip-cleanup)'
5016 print ' --existing-svnrepos load into existing SVN repository'
5017 print ' --dumpfile=PATH name of intermediate svn dumpfile'
5018 print ' --tmpdir=PATH directory to use for tmp data (default to cwd)'
5019 print ' --profile profile with \'hotshot\' (into file cvs2svn.hotshot)'
5020 print ' --dry-run do not create a repository or a dumpfile;'
5021 print ' just print what would happen.'
5022 print ' --use-cvs use CVS instead of RCS \'co\' to extract data'
5023 print ' (only use this if having problems with RCS)'
5024 print ' --svnadmin=PATH path to the svnadmin program'
5025 print ' --trunk-only convert only trunk commits, not tags nor branches'
5026 print ' --trunk=PATH path for trunk (default: %s)' \
5027 % Ctx().trunk_base
5028 print ' --branches=PATH path for branches (default: %s)' \
5029 % Ctx().branches_base
5030 print ' --tags=PATH path for tags (default: %s)' \
5031 % Ctx().tags_base
5032 print ' --no-prune don\'t prune empty directories'
5033 print ' --dump-only just produce a dumpfile, don\'t commit to a repos'
5034 print ' --encoding=ENC encoding of paths and log messages in CVS repos'
5035 print ' Multiple of these options may be passed, where they'
5036 print ' will be treated as an ordered list of encodings to'
5037 print ' attempt (with "ascii" as a hardcoded last resort)'
5038 print ' --force-branch=NAME force NAME to be a branch'
5039 print ' --force-tag=NAME force NAME to be a tag'
5040 print ' --exclude=REGEXP exclude branches and tags matching REGEXP'
5041 print ' --symbol-transform=P:S transform symbol names from P to S where P and S'
5042 print ' use Python regexp and reference syntax respectively'
5043 print ' --username=NAME username for cvs2svn-synthesized commits'
5044 print ' --skip-cleanup prevent the deletion of intermediate files'
5045 print ' --bdb-txn-nosync pass --bdb-txn-nosync to "svnadmin create"'
5046 print ' --fs-type=TYPE pass --fs-type=TYPE to "svnadmin create"'
5047 print ' --cvs-revnums record CVS revision numbers as file properties'
5048 print ' --auto-props=FILE set file properties from the auto-props section'
5049 print ' of a file in svn config format'
5050 print ' --auto-props-ignore-case Ignore case when matching auto-props patterns'
5051 print ' --mime-types=FILE specify an apache-style mime.types file for'
5052 print ' setting svn:mime-type'
5053 print ' --eol-from-mime-type set svn:eol-style from mime type if known'
5054 print ' --no-default-eol don\'t set svn:eol-style to \'native\' for'
5055 print ' non-binary files with undetermined mime types'
5056 print ' --keywords-off don\'t set svn:keywords on any files (by default,'
5057 print ' cvs2svn sets svn:keywords on non-binary files to'
5058 print ' "%s")' % SVN_KEYWORDS_VALUE
5060 def main():
5061 # Convenience var, so we don't have to keep instantiating this Borg.
5062 ctx = Ctx()
5064 profiling = None
5065 start_pass = 1
5066 end_pass = len(_passes)
5068 try:
5069 opts, args = getopt.getopt(sys.argv[1:], 'p:s:qvh',
5070 [ "help", "create", "trunk=",
5071 "username=", "existing-svnrepos",
5072 "branches=", "tags=", "encoding=",
5073 "force-branch=", "force-tag=", "exclude=",
5074 "use-cvs", "mime-types=",
5075 "auto-props=", "auto-props-ignore-case",
5076 "eol-from-mime-type", "no-default-eol",
5077 "trunk-only", "no-prune", "dry-run",
5078 "dump-only", "dumpfile=", "tmpdir=",
5079 "svnadmin=", "skip-cleanup", "cvs-revnums",
5080 "bdb-txn-nosync", "fs-type=",
5081 "version", "profile",
5082 "keywords-off", "symbol-transform="])
5083 except getopt.GetoptError, e:
5084 sys.stderr.write(error_prefix + ': ' + str(e) + '\n\n')
5085 usage()
5086 sys.exit(1)
5088 for opt, value in opts:
5089 if opt == '--version':
5090 print '%s version %s' % (os.path.basename(sys.argv[0]), VERSION)
5091 sys.exit(0)
5092 elif opt == '-p':
5093 # Don't cleanup if we're doing incrementals.
5094 ctx.skip_cleanup = 1
5095 if value.find(':') > 0:
5096 start_pass, end_pass = map(int, value.split(':'))
5097 else:
5098 end_pass = start_pass = int(value)
5099 if start_pass > len(_passes) or start_pass < 1:
5100 raise FatalError(
5101 'illegal value (%d) for starting pass. Must be 1 through %d.'
5102 % (int(start_pass), len(_passes),))
5103 if end_pass < start_pass or end_pass > len(_passes):
5104 raise FatalError(
5105 'illegal value (%d) for ending pass. Must be %d through %d.'
5106 % (int(end_pass), int(start_pass), len(_passes),))
5107 elif (opt == '--help') or (opt == '-h'):
5108 ctx.print_help = 1
5109 elif opt == '-v':
5110 Log().log_level = LOG_VERBOSE
5111 ctx.verbose = 1
5112 elif opt == '-q':
5113 Log().log_level = LOG_QUIET
5114 ctx.quiet = 1
5115 elif opt == '-s':
5116 ctx.target = value
5117 elif opt == '--existing-svnrepos':
5118 ctx.existing_svnrepos = 1
5119 elif opt == '--dumpfile':
5120 ctx.dumpfile = value
5121 elif opt == '--tmpdir':
5122 ctx.tmpdir = value
5123 elif opt == '--use-cvs':
5124 ctx.use_cvs = 1
5125 elif opt == '--svnadmin':
5126 ctx.svnadmin = value
5127 elif opt == '--trunk-only':
5128 ctx.trunk_only = 1
5129 elif opt == '--trunk':
5130 ctx.trunk_base = normalize_ttb_path(opt, value)
5131 elif opt == '--branches':
5132 ctx.branches_base = normalize_ttb_path(opt, value)
5133 elif opt == '--tags':
5134 ctx.tags_base = normalize_ttb_path(opt, value)
5135 elif opt == '--no-prune':
5136 ctx.prune = None
5137 elif opt == '--dump-only':
5138 ctx.dump_only = 1
5139 elif opt == '--dry-run':
5140 ctx.dry_run = 1
5141 elif opt == '--encoding':
5142 ctx.encoding.insert(-1, value)
5143 elif opt == '--force-branch':
5144 ctx.forced_branches.append(value)
5145 elif opt == '--force-tag':
5146 ctx.forced_tags.append(value)
5147 elif opt == '--exclude':
5148 try:
5149 ctx.excludes.append(re.compile('^' + value + '$'))
5150 except re.error, e:
5151 raise FatalError("'%s' is not a valid regexp." % (value,))
5152 elif opt == '--mime-types':
5153 ctx.mime_types_file = value
5154 elif opt == '--auto-props':
5155 ctx.auto_props_file = value
5156 elif opt == '--auto-props-ignore-case':
5157 ctx.auto_props_ignore_case = True
5158 elif opt == '--eol-from-mime-type':
5159 ctx.eol_from_mime_type = 1
5160 elif opt == '--no-default-eol':
5161 ctx.no_default_eol = 1
5162 elif opt == '--keywords-off':
5163 ctx.keywords_off = 1
5164 elif opt == '--username':
5165 ctx.username = value
5166 elif opt == '--skip-cleanup':
5167 ctx.skip_cleanup = 1
5168 elif opt == '--cvs-revnums':
5169 ctx.svn_property_setters.append(CVSRevisionNumberSetter())
5170 elif opt == '--bdb-txn-nosync':
5171 ctx.bdb_txn_nosync = 1
5172 elif opt == '--fs-type':
5173 ctx.fs_type = value
5174 elif opt == '--create':
5175 sys.stderr.write(warning_prefix +
5176 ': The behaviour produced by the --create option is now the '
5177 'default,\nand passing the option is deprecated.\n')
5178 elif opt == '--profile':
5179 profiling = 1
5180 elif opt == '--symbol-transform':
5181 [pattern, replacement] = value.split(":")
5182 try:
5183 pattern = re.compile(pattern)
5184 except re.error, e:
5185 raise FatalError("'%s' is not a valid regexp." % (pattern,))
5186 ctx.symbol_transforms.append((pattern, replacement,))
5188 if ctx.print_help:
5189 usage()
5190 sys.exit(0)
5192 # Consistency check for options and arguments.
5193 if len(args) == 0:
5194 usage()
5195 sys.exit(1)
5197 if len(args) > 1:
5198 sys.stderr.write(error_prefix +
5199 ": must pass only one CVS repository.\n")
5200 usage()
5201 sys.exit(1)
5203 cvsroot = args[0]
5205 if ctx.use_cvs:
5206 ctx.cvs_repository = CVSRepositoryViaCVS(cvsroot)
5207 else:
5208 ctx.cvs_repository = CVSRepositoryViaRCS(cvsroot)
5210 if (not ctx.target) and (not ctx.dump_only) and (not ctx.dry_run):
5211 raise FatalError("must pass one of '-s' or '--dump-only'.")
5213 def not_both(opt1val, opt1name, opt2val, opt2name):
5214 if opt1val and opt2val:
5215 raise FatalError("cannot pass both '%s' and '%s'."
5216 % (opt1name, opt2name,))
5218 not_both(ctx.target, '-s',
5219 ctx.dump_only, '--dump-only')
5221 not_both(ctx.dump_only, '--dump-only',
5222 ctx.existing_svnrepos, '--existing-svnrepos')
5224 not_both(ctx.bdb_txn_nosync, '--bdb-txn-nosync',
5225 ctx.existing_svnrepos, '--existing-svnrepos')
5227 not_both(ctx.dump_only, '--dump-only',
5228 ctx.bdb_txn_nosync, '--bdb-txn-nosync')
5230 not_both(ctx.quiet, '-q',
5231 ctx.verbose, '-v')
5233 not_both(ctx.fs_type, '--fs-type',
5234 ctx.existing_svnrepos, '--existing-svnrepos')
5236 if ctx.fs_type and ctx.fs_type != 'bdb' and ctx.bdb_txn_nosync:
5237 raise FatalError("cannot pass --bdb-txn-nosync with --fs-type=%s."
5238 % ctx.fs_type)
5240 # Create the default project (using ctx.trunk, ctx.branches, and ctx.tags):
5241 ctx.project = Project(ctx.cvs_repository.cvs_repos_path,
5242 ctx.trunk_base, ctx.branches_base, ctx.tags_base)
5244 if ctx.existing_svnrepos and not os.path.isdir(ctx.target):
5245 raise FatalError("the svn-repos-path '%s' is not an "
5246 "existing directory." % ctx.target)
5248 if not ctx.dump_only and not ctx.existing_svnrepos \
5249 and (not ctx.dry_run) and os.path.exists(ctx.target):
5250 raise FatalError("the svn-repos-path '%s' exists.\n"
5251 "Remove it, or pass '--existing-svnrepos'."
5252 % ctx.target)
5254 if ctx.target and not ctx.dry_run:
5255 # Verify that svnadmin can be executed. The 'help' subcommand
5256 # should be harmless.
5257 try:
5258 check_command_runs([ctx.svnadmin, 'help'], 'svnadmin')
5259 except CommandFailedException, e:
5260 raise FatalError(
5261 '%s\n'
5262 'svnadmin could not be executed. Please ensure that it is\n'
5263 'installed and/or use the --svnadmin option.' % (e,))
5265 if ctx.mime_types_file:
5266 ctx.svn_property_setters.append(MimeMapper(ctx.mime_types_file))
5268 if ctx.auto_props_file:
5269 ctx.svn_property_setters.append(AutoPropsPropertySetter(
5270 ctx.auto_props_file, ctx.auto_props_ignore_case))
5272 ctx.svn_property_setters.append(BinaryFileDefaultMimeTypeSetter())
5273 ctx.svn_property_setters.append(BinaryFileEOLStyleSetter())
5275 if ctx.eol_from_mime_type:
5276 ctx.svn_property_setters.append(EOLStyleFromMimeTypeSetter())
5278 if ctx.no_default_eol:
5279 ctx.svn_property_setters.append(DefaultEOLStyleSetter(None))
5280 else:
5281 ctx.svn_property_setters.append(DefaultEOLStyleSetter('native'))
5283 if not ctx.keywords_off:
5284 ctx.svn_property_setters.append(
5285 KeywordsPropertySetter(SVN_KEYWORDS_VALUE))
5287 ctx.svn_property_setters.append(ExecutablePropertySetter())
5289 # Make sure the tmp directory exists. Note that we don't check if
5290 # it's empty -- we want to be able to use, for example, "." to hold
5291 # tempfiles. But if we *did* want check if it were empty, we'd do
5292 # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
5293 if not os.path.exists(ctx.tmpdir):
5294 os.mkdir(ctx.tmpdir)
5295 elif not os.path.isdir(ctx.tmpdir):
5296 raise FatalError(
5297 "cvs2svn tried to use '%s' for temporary files, but that path\n"
5298 " exists and is not a directory. Please make it be a directory,\n"
5299 " or specify some other directory for temporary files."
5300 % (ctx.tmpdir,))
5302 # But do lock the tmpdir, to avoid process clash.
5303 try:
5304 os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
5305 except OSError, e:
5306 if e.errno == errno.EACCES:
5307 raise FatalError("Permission denied:"
5308 + " No write access to directory '%s'." % ctx.tmpdir)
5309 if e.errno == errno.EEXIST:
5310 raise FatalError(
5311 "cvs2svn is using directory '%s' for temporary files, but\n"
5312 " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
5313 " cvs2svn process is currently using '%s' as its temporary\n"
5314 " workspace. If you are certain that is not the case,\n"
5315 " then remove the '%s/cvs2svn.lock' subdirectory."
5316 % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir,))
5317 raise
5318 try:
5319 if profiling:
5320 import hotshot
5321 prof = hotshot.Profile('cvs2svn.hotshot')
5322 prof.runcall(convert, start_pass, end_pass)
5323 prof.close()
5324 else:
5325 convert(start_pass, end_pass)
5326 finally:
5327 try: os.rmdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
5328 except: pass
5331 if __name__ == '__main__':
5332 try:
5333 main()
5334 except FatalException, e:
5335 sys.stderr.write(str(e))
5336 sys.exit(1)