Don't initialize database in LastSymbolicNameDatabase constructor.
[cvs2svn.git] / cvs2svn
blob39a3076a413560e140161fd4599078383082248a
1 #!/usr/bin/env python
2 # (Be in -*- python -*- mode.)
4 # cvs2svn: ...
6 # ====================================================================
7 # Copyright (c) 2000-2004 CollabNet. All rights reserved.
9 # This software is licensed as described in the file COPYING, which
10 # you should have received as part of this distribution. The terms
11 # are also available at http://subversion.tigris.org/license-1.html.
12 # If newer versions of this license are posted there, you may use a
13 # newer version instead, at your option.
15 # This software consists of voluntary contributions made by many
16 # individuals. For exact contribution history, see the revision
17 # history and logs, available at http://cvs2svn.tigris.org/.
18 # ====================================================================
20 VERSION = 'r' + "$LastChangedRevision$"[22:-2]
22 from __future__ import generators
24 import cvs2svn_rcsparse
25 import os
26 import sys
27 import sha
28 import re
29 import time
30 import fileinput
31 import fnmatch
32 import getopt
33 import stat
34 import md5
35 import marshal
36 import errno
37 import popen2
38 import types
39 import ConfigParser
40 try:
41 # Try to get access to a bunch of encodings for use with --encoding.
42 # See http://cjkpython.i18n.org/ for details.
43 import iconv_codec
44 except ImportError:
45 pass
47 # Warnings and errors start with these strings. They are typically
48 # followed by a colon and a space, as in "%s: " ==> "WARNING: ".
49 warning_prefix = "WARNING"
50 error_prefix = "ERROR"
52 # Make sure this Python is recent enough.
53 if sys.hexversion < 0x02020000:
54 sys.stderr.write("'%s: Python 2.2 or higher required, "
55 "see www.python.org.\n" % error_prefix)
56 sys.exit(1)
58 # Pretend we have true booleans on older python versions
59 try:
60 True
61 except:
62 True = 1
63 False = 0
65 # Opening pipes was a mess before Python 2.4, because some methods did
66 # not exist on some platforms, and some behaved differenly on other.
67 # Python 2.4 solved this by adding the subprocess module, but since we
68 # cannot require such a new version, we cannot use it directly, but
69 # must implement a simplified Popen using the best means neccessary.
71 # The SimplePopen class only has the following members and methods, all
72 # behaving as documented in the subprocess.Popen class:
73 # - stdin
74 # - stdout
75 # - stderr
76 # - wait
77 try:
78 # First try subprocess.Popen...
79 import subprocess
80 class SimplePopen:
81 def __init__(self, cmd, capture_stderr):
82 if capture_stderr:
83 stderr = subprocess.PIPE
84 else:
85 stderr = None
86 self._popen = subprocess.Popen(cmd, stdin=subprocess.PIPE,
87 stdout=subprocess.PIPE, stderr=stderr)
88 self.stdin = self._popen.stdin
89 self.stdout = self._popen.stdout
90 if capture_stderr:
91 self.stderr = self._popen.stderr
92 self.wait = self._popen.wait
93 except ImportError:
94 if hasattr(popen2, 'Popen3'):
95 # ...then try popen2.Popen3...
96 class SimplePopen:
97 def __init__(self, cmd, capture_stderr):
98 self._popen3 = popen2.Popen3(cmd, capture_stderr)
99 self.stdin = self._popen3.tochild
100 self.stdout = self._popen3.fromchild
101 if capture_stderr:
102 self.stderr = self._popen3.childerr
103 self.wait = self._popen3.wait
104 else:
105 # ...and if all fails, use popen2.popen3...
106 class SimplePopen:
107 def __init__(self, cmd, capture_stderr):
108 if type(cmd) != types.StringType:
109 cmd = argv_to_command_string(cmd)
110 self.stdout, self.stdin, self.stderr = popen2.popen3(cmd, mode='b')
111 def wait(self):
112 return self.stdout.close() or self.stdin.close() or \
113 self.stderr.close()
115 # DBM module selection
117 # 1. If we have bsddb3, it is probably newer than bsddb. Fake bsddb = bsddb3,
118 # so that the dbhash module used by anydbm will use bsddb3.
119 try:
120 import bsddb3
121 sys.modules['bsddb'] = sys.modules['bsddb3']
122 except ImportError:
123 pass
125 # 2. These DBM modules are not good for cvs2svn.
126 import anydbm
127 if (anydbm._defaultmod.__name__ == 'dumbdbm'
128 or anydbm._defaultmod.__name__ == 'dbm'):
129 sys.stderr.write(
130 error_prefix
131 + ': your installation of Python does not contain a suitable\n'
132 + 'DBM module -- cvs2svn cannot continue.\n'
133 + 'See http://python.org/doc/current/lib/module-anydbm.html to solve.\n')
134 sys.exit(1)
136 # 3. If we are using the old bsddb185 module, then try prefer gdbm instead.
137 # Unfortunately, gdbm appears not to be trouble free, either.
138 if hasattr(anydbm._defaultmod, 'bsddb') \
139 and not hasattr(anydbm._defaultmod.bsddb, '__version__'):
140 try:
141 gdbm = __import__('gdbm')
142 except ImportError:
143 sys.stderr.write(warning_prefix +
144 ': The version of the bsddb module found '
145 'on your computer has been reported to malfunction on some datasets, '
146 'causing KeyError exceptions. You may wish to upgrade your Python to '
147 'version 2.3 or later.\n')
148 else:
149 anydbm._defaultmod = gdbm
151 trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
152 cvs_branch_tag = re.compile('^((?:[0-9]+\\.[0-9]+\\.)+)0\\.([0-9]+)$')
153 rcs_branch_tag = re.compile('^(?:[0-9]+\\.[0-9]+\\.)+[0-9]+$')
155 SVN_KEYWORDS_VALUE = 'Author Date Id Revision'
157 # This really only matches standard '1.1.1.*'-style vendor revisions.
158 # One could conceivably have a file whose default branch is 1.1.3 or
159 # whatever, or was that at some point in time, with vendor revisions
160 # 1.1.3.1, 1.1.3.2, etc. But with the default branch gone now (which
161 # is the only time this regexp gets used), we'd have no basis for
162 # assuming that the non-standard vendor branch had ever been the
163 # default branch anyway, so we don't want this to match them anyway.
164 vendor_revision = re.compile('^(1\\.1\\.1)\\.([0-9])+$')
166 # If this run's output is a repository, then (in the tmpdir) we use
167 # a dumpfile of this name for repository loads.
169 # If this run's output is a dumpfile, then this is default name of
170 # that dumpfile, but in the current directory (unless the user has
171 # specified a dumpfile path, of course, in which case it will be
172 # wherever the user said).
173 DUMPFILE = 'cvs2svn-dump'
175 # This file appears with different suffixes at different stages of
176 # processing. CVS revisions are cleaned and sorted here, for commit
177 # grouping. See design-notes.txt for details.
178 DATAFILE = 'cvs2svn-data'
180 # This file contains a marshalled copy of all the statistics that we
181 # gather throughout the various runs of cvs2svn. The data stored as a
182 # marshalled dictionary.
183 STATISTICS_FILE = 'cvs2svn-statistics'
185 # This text file contains records (1 per line) that describe svn
186 # filesystem paths that are the opening and closing source revisions
187 # for copies to tags and branches. The format is as follows:
189 # SYMBOL_NAME SVN_REVNUM TYPE SVN_PATH
191 # Where type is either OPENING or CLOSING. The SYMBOL_NAME and
192 # SVN_REVNUM are the primary and secondary sorting criteria for
193 # creating SYMBOL_OPENINGS_CLOSINGS_SORTED.
194 SYMBOL_OPENINGS_CLOSINGS = 'cvs2svn-symbolic-names.txt'
195 # A sorted version of the above file.
196 SYMBOL_OPENINGS_CLOSINGS_SORTED = 'cvs2svn-symbolic-names-s.txt'
198 # This file is a temporary file for storing symbolic_name -> closing
199 # CVSRevision until the end of our pass where we can look up the
200 # corresponding SVNRevNum for the closing revs and write these out to
201 # the SYMBOL_OPENINGS_CLOSINGS.
202 SYMBOL_CLOSINGS_TMP = 'cvs2svn-symbolic-names-closings-tmp.txt'
204 # Skeleton version of an svn filesystem.
205 # (These supersede and will eventually replace the two above.)
206 # See class SVNRepositoryMirror for how these work.
207 SVN_MIRROR_REVISIONS_DB = 'cvs2svn-svn-revisions.db'
208 SVN_MIRROR_NODES_DB = 'cvs2svn-svn-nodes.db'
210 # Offsets pointing to the beginning of each SYMBOLIC_NAME in
211 # SYMBOL_OPENINGS_CLOSINGS_SORTED
212 SYMBOL_OFFSETS_DB = 'cvs2svn-symbolic-name-offsets.db'
214 # Maps CVSRevision.unique_key()s to lists of symbolic names, where
215 # the CVSRevision is the last such that is a source for those symbolic
216 # names. For example, if branch B's number is 1.3.0.2 in this CVS
217 # file, and this file's 1.3 is the latest (by date) revision among
218 # *all* CVS files that is a source for branch B, then the
219 # CVSRevision.unique_key() corresponding to this file at 1.3 would
220 # list at least B in its list.
221 SYMBOL_LAST_CVS_REVS_DB = 'cvs2svn-symbol-last-cvs-revs.db'
223 # Maps CVSRevision.unique_key() to corresponding line in s-revs.
224 ###PERF Or, we could map to an offset into s-revs, instead of dup'ing
225 ### the s-revs data in this database.
226 CVS_REVS_DB = 'cvs2svn-cvs-revs.db'
228 # Lists all symbolic names that are tags. Keys are strings (symbolic
229 # names), values are ignorable.
230 TAGS_DB = 'cvs2svn-tags.db'
232 # A list all tags. Each line consists of the tag name and the number
233 # of files in which it exists, separated by a space.
234 TAGS_LIST = 'cvs2svn-tags.txt'
236 # A list of all branches. The file is stored as a plain text file
237 # to make it easy to look at in an editor. Each line contains the
238 # branch name, the number of files where the branch is created, the
239 # commit count, and a list of tags and branches that are defined on
240 # revisions in the branch.
241 BRANCHES_LIST = 'cvs2svn-branches.txt'
243 # These two databases provide a bidirectional mapping between
244 # CVSRevision.unique_key()s and Subversion revision numbers.
246 # The first maps CVSRevision.unique_key() to a number; the values are
247 # not unique.
249 # The second maps Subversion revision numbers to tuples (c_rev_keys,
250 # motivating_revnum, symbolic_name, date).
252 # c_rev_keys is a list of CVSRevision.unique_key()s.
254 # If the SVNCommit is a default branch synchronization,
255 # motivating_revnum is the svn_revnum of the primary SVNCommit that
256 # motivated it; otherwise it is None. (NOTE: Secondary commits that
257 # fill branches and tags also have a motivating commit, but we do not
258 # record it because it is (currently) not needed for anything.)
259 # motivating_revnum is used when generating the log message for the
260 # commit that synchronizes the default branch with trunk.
262 # symbolic_name is the symbolic name associated with the commit (if it
263 # filled a symbolic name) or None otherwise.
265 # date is the date of the commit.
266 CVS_REVS_TO_SVN_REVNUMS = 'cvs2svn-cvs-revs-to-svn-revnums.db'
267 SVN_REVNUMS_TO_CVS_REVS = 'cvs2svn-svn-revnums-to-cvs-revs.db'
269 # How many bytes to read at a time from a pipe. 128 kiB should be
270 # large enough to be efficient without wasting too much memory.
271 PIPE_READ_SIZE = 128 * 1024
273 # Record the default RCS branches, if any, for CVS filepaths.
275 # The keys are CVS filepaths, relative to the top of the repository
276 # and with the ",v" stripped off, so they match the cvs paths used in
277 # Commit.commit(). The values are vendor branch revisions, such as
278 # '1.1.1.1', or '1.1.1.2', or '1.1.1.96'. The vendor branch revision
279 # represents the highest vendor branch revision thought to have ever
280 # been head of the default branch.
282 # The reason we record a specific vendor revision, rather than a
283 # default branch number, is that there are two cases to handle:
285 # One case is simple. The RCS file lists a default branch explicitly
286 # in its header, such as '1.1.1'. In this case, we know that every
287 # revision on the vendor branch is to be treated as head of trunk at
288 # that point in time.
290 # But there's also a degenerate case. The RCS file does not currently
291 # have a default branch, yet we can deduce that for some period in the
292 # past it probably *did* have one. For example, the file has vendor
293 # revisions 1.1.1.1 -> 1.1.1.96, all of which are dated before 1.2,
294 # and then it has 1.1.1.97 -> 1.1.1.100 dated after 1.2. In this
295 # case, we should record 1.1.1.96 as the last vendor revision to have
296 # been the head of the default branch.
297 DEFAULT_BRANCHES_DB = 'cvs2svn-default-branches.db'
299 # Records the author and log message for each changeset.
300 # The keys are author+log digests, the same kind used to identify
301 # unique revisions in the .revs, etc files. Each value is a tuple
302 # of two elements: '(author logmessage)'.
303 METADATA_DB = "cvs2svn-metadata.db"
305 # A temporary on-disk hash that maps CVSRevision unique keys to a new
306 # timestamp for that CVSRevision. These new timestamps are created in
307 # pass2, and this hash is used exclusively in pass2.
308 TWEAKED_TIMESTAMPS_DB = "cvs2svn-fixed-timestamps.db"
310 REVS_SUFFIX = '.revs'
311 CLEAN_REVS_SUFFIX = '.c-revs'
312 SORTED_REVS_SUFFIX = '.s-revs'
313 RESYNC_SUFFIX = '.resync'
315 SVN_INVALID_REVNUM = -1
317 COMMIT_THRESHOLD = 5 * 60 # flush a commit if a 5 minute gap occurs
319 # Things that can happen to a file.
320 OP_NOOP = '-'
321 OP_ADD = 'A'
322 OP_DELETE = 'D'
323 OP_CHANGE = 'C'
325 # A deltatext either does or doesn't represent some change.
326 DELTATEXT_NONEMPTY = 'N'
327 DELTATEXT_EMPTY = 'E'
329 DIGEST_END_IDX = 9 + (sha.digestsize * 2)
331 # Constants used in SYMBOL_OPENINGS_CLOSINGS
332 OPENING = 'O'
333 CLOSING = 'C'
336 class FatalException(Exception):
337 """Exception thrown on a non-recoverable error.
339 If this exception is thrown by main(), it is caught by the global
340 layer of the program, its string representation is printed, and the
341 program is ended with an exit code of 1."""
343 pass
346 class FatalError(FatalException):
347 """A FatalException that prepends error_prefix to the message."""
349 def __init__(self, msg):
350 """Use (error_prefix + ': ' + MSG + '\n') as the error message."""
352 FatalException.__init__(self, '%s: %s\n' % (error_prefix, msg,))
355 def temp(basename):
356 """Return a path to BASENAME in Ctx().tmpdir.
357 This is a convenience function to save horizontal space in source."""
359 return os.path.join(Ctx().tmpdir, basename)
362 # Since the unofficial set also includes [/\] we need to translate those
363 # into ones that don't conflict with Subversion limitations.
364 def _clean_symbolic_name(name):
365 """Return symbolic name NAME, translating characters that Subversion
366 does not allow in a pathname."""
368 name = name.replace('/','++')
369 name = name.replace('\\','--')
370 return name
373 def _path_join(*components):
374 """Join two or more pathname COMPONENTS, inserting '/' as needed.
375 Empty component are skipped."""
377 return '/'.join(filter(None, components))
380 def _path_split(path):
381 """Split the svn pathname PATH into a pair, (HEAD, TAIL).
383 This is similar to os.path.split(), but always uses '/' as path
384 separator. PATH is an svn path, which should not start with a '/'.
385 HEAD is everything before the last slash, and TAIL is everything
386 after. If PATH ends in a slash, TAIL will be empty. If there is no
387 slash in PATH, HEAD will be empty. If PATH is empty, both HEAD and
388 TAIL are empty."""
390 pos = path.rfind('/')
391 if pos == -1:
392 return ('', path,)
393 else:
394 return (path[:pos], path[pos+1:],)
397 def to_utf8(value, mode='replace'):
398 """Encode (as Unicode) VALUE, trying the encodings in Ctx.encoding
399 as valid source encodings. Raise UnicodeError on failure of all
400 source encodings."""
402 ### FIXME: The 'replace' default mode should be an option,
403 ### like --encoding is.
404 for encoding in Ctx().encoding:
405 try:
406 return unicode(value, encoding, mode).encode('utf8')
407 except UnicodeError:
408 Log().write(LOG_VERBOSE, "Encoding '%s' failed for string '%s'"
409 % (encoding, value))
410 raise UnicodeError
413 ctrl_characters_regexp = re.compile('[\\\x00-\\\x1f\\\x7f]')
415 def verify_filename_legal(filename):
416 """Verify that FILENAME does not include any control characters. If
417 it does, raise a FatalError."""
419 m = ctrl_characters_regexp.search(filename)
420 if m:
421 raise FatalError(
422 "Character %r in filename %r is not supported by subversion."
423 % (m.group(), filename,))
426 def run_command(command):
427 if os.system(command):
428 raise FatalError('Command failed: "%s"' % (command,))
431 class CommandFailedException(Exception):
432 """Exception raised if check_command_runs() fails."""
434 pass
437 def check_command_runs(cmd, cmdname):
438 """Check whether the command CMD can be executed without errors.
440 CMD is a list or string, as accepted by SimplePopen. CMDNAME is the
441 name of the command as it should be included in exception error
442 messages.
444 This function checks three things: (1) the command can be run
445 without throwing an OSError; (2) it exits with status=0; (3) it
446 doesn't output anything to stderr. If any of these conditions is
447 not met, raise a CommandFailedException describing the problem."""
449 try:
450 pipe = SimplePopen(cmd, True)
451 except OSError, e:
452 raise CommandFailedException('error executing %s: %s' % (cmdname, e,))
453 pipe.stdin.close()
454 pipe.stdout.read()
455 errmsg = pipe.stderr.read()
456 status = pipe.wait()
457 if status or errmsg:
458 msg = 'error executing %s: status %s' % (cmdname, status,)
459 if errmsg:
460 msg += ', error output:\n%s' % (errmsg,)
461 raise CommandFailedException(msg)
464 class CVSRepository:
465 """A CVS repository from which data can be extracted."""
467 def __init__(self, cvs_repos_path):
468 """CVS_REPOS_PATH is the top of the CVS repository (at least as
469 far as this run is concerned)."""
471 if not os.path.isdir(cvs_repos_path):
472 raise FatalError("The specified CVS repository path '%s' is not an "
473 "existing directory." % cvs_repos_path)
475 self.cvs_repos_path = os.path.normpath(cvs_repos_path)
476 self.cvs_prefix_re = re.compile(
477 r'^' + re.escape(self.cvs_repos_path)
478 + r'(' + re.escape(os.sep) + r'|$)')
480 def get_cvs_path(self, fname):
481 """Return the path to FNAME relative to cvs_repos_path, with ',v' removed.
483 FNAME is a filesystem name that has to be within
484 self.cvs_repos_path. Return the filename relative to
485 self.cvs_repos_path, with ',v' striped off if present, and with
486 os.sep converted to '/'."""
488 (tail, n) = self.cvs_prefix_re.subn('', fname, 1)
489 if n != 1:
490 raise FatalError(
491 "get_cvs_path: '%s' is not a sub-path of '%s'"
492 % (fname, self.cvs_repos_path,))
493 if tail.endswith(',v'):
494 tail = tail[:-2]
495 return tail.replace(os.sep, '/')
497 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
498 """Return a command string, and a pipe from which the file
499 contents of C_REV can be read. C_REV is a CVSRevision. If
500 SUPPRESS_KEYWORD_SUBSTITUTION is True, then suppress the
501 substitution of RCS/CVS keywords in the output. Standard output
502 of the pipe returns the text of that CVS Revision.
504 The command string that is returned is provided for use in error
505 messages; it is not escaped in such a way that it could
506 necessarily be executed."""
508 raise NotImplementedError
511 class CVSRepositoryViaRCS(CVSRepository):
512 """A CVSRepository accessed via RCS."""
514 def __init__(self, cvs_repos_path):
515 CVSRepository.__init__(self, cvs_repos_path)
516 try:
517 check_command_runs([ 'co', '-V' ], 'co')
518 except CommandFailedException, e:
519 raise FatalError('%s\n'
520 'Please check that co is installed and in your PATH\n'
521 '(it is a part of the RCS software).' % (e,))
523 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
524 pipe_cmd = [ 'co', '-q', '-x,v', '-p' + c_rev.rev ]
525 if suppress_keyword_substitution:
526 pipe_cmd.append('-kk')
527 pipe_cmd.append(c_rev.rcs_path())
528 pipe = SimplePopen(pipe_cmd, True)
529 pipe.stdin.close()
530 return ' '.join(pipe_cmd), pipe
533 class CVSRepositoryViaCVS(CVSRepository):
534 """A CVSRepository accessed via CVS."""
536 def __init__(self, cvs_repos_path):
537 CVSRepository.__init__(self, cvs_repos_path)
538 # Ascend above the specified root if necessary, to find the
539 # cvs_repository_root (a directory containing a CVSROOT directory)
540 # and the cvs_module (the path of the conversion root within the
541 # cvs repository) NB: cvs_module must be seperated by '/' *not* by
542 # os.sep .
543 def is_cvs_repository_root(path):
544 return os.path.isdir(os.path.join(path, 'CVSROOT'))
546 self.cvs_repository_root = os.path.abspath(self.cvs_repos_path)
547 self.cvs_module = ""
548 while not is_cvs_repository_root(self.cvs_repository_root):
549 # Step up one directory:
550 prev_cvs_repository_root = self.cvs_repository_root
551 self.cvs_repository_root, module_component = \
552 os.path.split(self.cvs_repository_root)
553 if self.cvs_repository_root == prev_cvs_repository_root:
554 # Hit the root (of the drive, on Windows) without finding a
555 # CVSROOT dir.
556 raise FatalError(
557 "the path '%s' is not a CVS repository, nor a path "
558 "within a CVS repository. A CVS repository contains "
559 "a CVSROOT directory within its root directory."
560 % (self.cvs_repos_path,))
562 self.cvs_module = module_component + "/" + self.cvs_module
564 os.environ['CVSROOT'] = self.cvs_repository_root
566 def cvs_ok(global_arguments):
567 check_command_runs(
568 [ 'cvs' ] + global_arguments + [ '--version' ], 'cvs')
570 self.global_arguments = [ "-q", "-R" ]
571 try:
572 cvs_ok(self.global_arguments)
573 except CommandFailedException, e:
574 self.global_arguments = [ "-q" ]
575 try:
576 cvs_ok(self.global_arguments)
577 except CommandFailedException, e:
578 raise FatalError(
579 '%s\n'
580 'Please check that cvs is installed and in your PATH.' % (e,))
582 def get_co_pipe(self, c_rev, suppress_keyword_substitution=False):
583 pipe_cmd = [ 'cvs' ] + self.global_arguments + \
584 [ 'co', '-r' + c_rev.rev, '-p' ]
585 if suppress_keyword_substitution:
586 pipe_cmd.append('-kk')
587 pipe_cmd.append(self.cvs_module + c_rev.cvs_path)
588 pipe = SimplePopen(pipe_cmd, True)
589 pipe.stdin.close()
590 return ' '.join(pipe_cmd), pipe
593 def generate_ignores(c_rev):
594 # Read in props
595 pipe_cmd, pipe = Ctx().cvs_repository.get_co_pipe(c_rev)
596 buf = pipe.stdout.read(PIPE_READ_SIZE)
597 raw_ignore_val = ""
598 while buf:
599 raw_ignore_val += buf
600 buf = pipe.stdout.read(PIPE_READ_SIZE)
601 pipe.stdout.close()
602 error_output = pipe.stderr.read()
603 exit_status = pipe.wait()
604 if exit_status:
605 raise FatalError("The command '%s' failed with exit status: %s\n"
606 "and the following output:\n"
607 "%s" % (pipe_cmd, exit_status, error_output))
609 # Tweak props: First, convert any spaces to newlines...
610 raw_ignore_val = '\n'.join(raw_ignore_val.split())
611 raw_ignores = raw_ignore_val.split('\n')
612 ignore_vals = [ ]
613 for ignore in raw_ignores:
614 # Reset the list if we encounter a '!'
615 # See http://cvsbook.red-bean.com/cvsbook.html#cvsignore
616 if ignore == '!':
617 ignore_vals = [ ]
618 continue
619 # Skip empty lines
620 if len(ignore) == 0:
621 continue
622 ignore_vals.append(ignore)
623 return ignore_vals
626 class KeyGenerator:
627 """Generate a series of unique strings."""
629 def __init__(self):
630 self.key_base = 0L
632 def gen_key(self):
633 """Generate and return a previously-unused key."""
635 key = '%x' % self.key_base
636 self.key_base += 1
638 return key
641 # ============================================================================
642 # This code is copied with a few modifications from:
643 # subversion/subversion/bindings/swig/python/svn/core.py
645 if sys.platform == "win32":
646 _escape_shell_arg_re = re.compile(r'(\\+)(\"|$)')
648 def escape_shell_arg(arg):
649 # The (very strange) parsing rules used by the C runtime library are
650 # described at:
651 # http://msdn.microsoft.com/library/en-us/vclang/html/_pluslang_Parsing_C.2b2b_.Command.2d.Line_Arguments.asp
653 # double up slashes, but only if they are followed by a quote character
654 arg = re.sub(_escape_shell_arg_re, r'\1\1\2', arg)
656 # surround by quotes and escape quotes inside
657 arg = '"' + arg.replace('"', '"^""') + '"'
658 return arg
661 def argv_to_command_string(argv):
662 """Flatten a list of command line arguments into a command string.
664 The resulting command string is expected to be passed to the system
665 shell which os functions like popen() and system() invoke internally.
668 # According cmd's usage notes (cmd /?), it parses the command line by
669 # "seeing if the first character is a quote character and if so, stripping
670 # the leading character and removing the last quote character."
671 # So to prevent the argument string from being changed we add an extra set
672 # of quotes around it here.
673 return '"' + ' '.join(map(escape_shell_arg, argv)) + '"'
675 else:
676 def escape_shell_arg(arg):
677 return "'" + arg.replace("'", "'\\''") + "'"
679 def argv_to_command_string(argv):
680 """Flatten a list of command line arguments into a command string.
682 The resulting command string is expected to be passed to the system
683 shell which os functions like popen() and system() invoke internally.
686 return ' '.join(map(escape_shell_arg, argv))
689 # ============================================================================
691 def format_date(date):
692 """Return an svn-compatible date string for DATE (seconds since epoch).
694 A Subversion date looks like '2002-09-29T14:44:59.000000Z'."""
696 return time.strftime("%Y-%m-%dT%H:%M:%S.000000Z", time.gmtime(date))
699 def sort_file(infilename, outfilename):
700 """Sort file INFILENAME, storing the results to OUTFILENAME."""
702 # GNU sort will sort our dates differently (incorrectly!) if our
703 # LC_ALL is anything but 'C', so if LC_ALL is set, temporarily set
704 # it to 'C'
705 lc_all_tmp = os.environ.get('LC_ALL', None)
706 os.environ['LC_ALL'] = 'C'
707 try:
708 # The -T option to sort has a nice side effect. The Win32 sort is
709 # case insensitive and cannot be used, and since it does not
710 # understand the -T option and dies if we try to use it, there is
711 # no risk that we use that sort by accident.
712 run_command('sort -T %s %s > %s'
713 % (Ctx().tmpdir, infilename, outfilename))
714 finally:
715 if lc_all_tmp is None:
716 del os.environ['LC_ALL']
717 else:
718 os.environ['LC_ALL'] = lc_all_tmp
721 def match_regexp_list(regexp_list, s):
722 """Test whether string S matches any of the compiled regexps in
723 REGEXP_LIST."""
725 for regexp in regexp_list:
726 if regexp.match(s):
727 return True
728 return False
731 class LF_EOL_Filter:
732 """Filter a stream and convert all end-of-line markers (CRLF, CR or LF)
733 into LFs only."""
735 def __init__(self, stream):
736 self.stream = stream
737 self.carry_cr = False
738 self.eof = False
740 def read(self, size):
741 while True:
742 buf = self.stream.read(size)
743 self.eof = len(buf) == 0
744 if self.carry_cr:
745 buf = '\r' + buf
746 self.carry_cr = False
747 if not self.eof and buf[-1] == '\r':
748 self.carry_cr = True
749 buf = buf[:-1]
750 buf = buf.replace('\r\n', '\n')
751 buf = buf.replace('\r', '\n')
752 if len(buf) > 0 or self.eof:
753 return buf
756 # These constants represent the log levels that this script supports
757 LOG_WARN = -1
758 LOG_QUIET = 0
759 LOG_NORMAL = 1
760 LOG_VERBOSE = 2
762 class Log:
763 """A Simple logging facility. Each line will be timestamped is
764 self.use_timestamps is TRUE. This class is a Borg, see
765 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
767 __shared_state = {}
769 def __init__(self):
770 self.__dict__ = self.__shared_state
771 if self.__dict__:
772 return
773 self.log_level = LOG_NORMAL
774 # Set this to true if you want to see timestamps on each line output.
775 self.use_timestamps = None
776 self.logger = sys.stdout
778 def _timestamp(self):
779 """Output a detailed timestamp at the beginning of each line output."""
781 self.logger.write(time.strftime('[%Y-%m-%d %I:%m:%S %Z] - '))
783 def write(self, log_level, *args):
784 """This is the public method to use for writing to a file. Only
785 messages whose LOG_LEVEL is <= self.log_level will be printed. If
786 there are multiple ARGS, they will be separated by a space."""
788 if log_level > self.log_level:
789 return
790 if self.use_timestamps:
791 self._timestamp()
792 self.logger.write(' '.join(map(str,args)) + "\n")
793 # Ensure that log output doesn't get out-of-order with respect to
794 # stderr output.
795 self.logger.flush()
798 class Cleanup:
799 """This singleton class manages any files created by cvs2svn. When
800 you first create a file, call Cleanup.register, passing the
801 filename, and the last pass that you need the file. After the end
802 of that pass, your file will be cleaned up after running an optional
803 callback. This class is a Borg, see
804 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
806 __shared_state = {}
808 def __init__(self):
809 self.__dict__ = self.__shared_state
810 if self.__dict__:
811 return
812 self._log = {}
813 self._callbacks = {}
815 def register(self, file, which_pass, callback=None):
816 """Register FILE for cleanup at the end of WHICH_PASS, running
817 function CALLBACK prior to removal. Registering a given FILE is
818 idempotent; you may register as many times as you wish, but it
819 will only be cleaned up once.
821 Note that if a file is registered multiple times, only the first
822 callback registered for that file will be called at cleanup
823 time. Also note that if you register a database file you must
824 close the database before cleanup, e.g. using a callback."""
826 self._log.setdefault(which_pass, {})[file] = 1
827 if callback and not self._callbacks.has_key(file):
828 self._callbacks[file] = callback
830 def cleanup(self, which_pass):
831 """Clean up all files, and invoke callbacks, for pass WHICH_PASS."""
833 if not self._log.has_key(which_pass):
834 return
835 for file in self._log[which_pass]:
836 Log().write(LOG_VERBOSE, "Deleting", file)
837 if self._callbacks.has_key(file):
838 self._callbacks[file]()
839 os.unlink(file)
842 # Always use these constants for opening databases.
843 DB_OPEN_READ = 'r'
844 DB_OPEN_NEW = 'n'
847 class AbstractDatabase:
848 """An abstract base class for anydbm-based databases."""
850 def __init__(self, filename, mode):
851 """A convenience function for opening an anydbm database."""
853 # pybsddb3 has a bug which prevents it from working with
854 # Berkeley DB 4.2 if you open the db with 'n' ("new"). This
855 # causes the DB_TRUNCATE flag to be passed, which is disallowed
856 # for databases protected by lock and transaction support
857 # (bsddb databases use locking from bsddb version 4.2.4 onwards).
859 # Therefore, manually perform the removal (we can do this, because
860 # we know that for bsddb - but *not* anydbm in general - the database
861 # consists of one file with the name we specify, rather than several
862 # based on that name).
863 if mode == 'n' and anydbm._defaultmod.__name__ == 'dbhash':
864 if os.path.isfile(filename):
865 os.unlink(filename)
866 mode = 'c'
868 self.db = anydbm.open(filename, mode)
870 # Import implementations for many mapping interface methods. Note
871 # that we specifically do not do this for any method which handles
872 # *values*, because our derived classes define __getitem__ and
873 # __setitem__ to override the storage of values, and grabbing
874 # methods directly from the dbm object would bypass this.
875 for meth_name in ('__delitem__', 'keys',
876 '__iter__', 'has_key', '__contains__', 'iterkeys', 'clear'):
877 meth_ref = getattr(self.db, meth_name, None)
878 if meth_ref:
879 setattr(self, meth_name, meth_ref)
881 def __delitem__(self, key):
882 # gdbm defines a __delitem__ method, but it cannot be assigned. So
883 # this method provides a fallback definition via explicit delegation:
884 del self.db[key]
886 def __iter__(self):
887 for key in self.keys():
888 yield key
890 def has_key(self, key):
891 try:
892 self.db[key]
893 return True
894 except KeyError:
895 return False
897 def __contains__(self, key):
898 return self.has_key(key)
900 def iterkeys(self):
901 return self.__iter__()
903 def clear(self):
904 for key in self.keys():
905 del self[key]
907 def items(self):
908 return [(key, self[key],) for key in self.keys()]
910 def values(self):
911 return [self[key] for key in self.keys()]
913 def get(self, key, default=None):
914 try:
915 return self[key]
916 except KeyError:
917 return default
920 class SDatabase(AbstractDatabase):
921 """A database that can only store strings."""
923 def __getitem__(self, key):
924 return self.db[key]
926 def __setitem__(self, key, value):
927 self.db[key] = value
930 class Database(AbstractDatabase):
931 """A database that uses the marshal module to store built-in types."""
933 def __getitem__(self, key):
934 return marshal.loads(self.db[key])
936 def __setitem__(self, key, value):
937 self.db[key] = marshal.dumps(value)
940 class StatsKeeper:
941 __shared_state = { }
943 def __init__(self):
944 self.__dict__ = self.__shared_state
945 if self.__dict__:
946 return
947 self.filename = temp(STATISTICS_FILE)
948 Cleanup().register(self.filename, pass8)
949 # This can get kinda large, so we don't store it in our data dict.
950 self.repos_files = { }
952 if os.path.exists(self.filename):
953 self.unarchive()
954 else:
955 self.data = { 'cvs_revs_count' : 0,
956 'tags': { },
957 'branches' : { },
958 'repos_size' : 0,
959 'repos_file_count' : 0,
960 'svn_rev_count' : None,
961 'first_rev_date' : 1L<<32,
962 'last_rev_date' : 0,
963 'pass_timings' : { },
964 'start_time' : 0,
965 'end_time' : 0,
968 def log_duration_for_pass(self, duration, pass_num):
969 self.data['pass_timings'][pass_num] = duration
971 def set_start_time(self, start):
972 self.data['start_time'] = start
974 def set_end_time(self, end):
975 self.data['end_time'] = end
977 def _bump_item(self, key, amount=1):
978 self.data[key] += amount
980 def reset_c_rev_info(self):
981 self.data['cvs_revs_count'] = 0
982 self.data['tags'] = { }
983 self.data['branches'] = { }
985 def record_c_rev(self, c_rev):
986 self._bump_item('cvs_revs_count')
988 for tag in c_rev.tags:
989 self.data['tags'][tag] = None
990 for branch in c_rev.branches:
991 self.data['branches'][branch] = None
993 if c_rev.timestamp < self.data['first_rev_date']:
994 self.data['first_rev_date'] = c_rev.timestamp
996 if c_rev.timestamp > self.data['last_rev_date']:
997 self.data['last_rev_date'] = c_rev.timestamp
999 # Only add the size if this is the first time we see the file.
1000 if not self.repos_files.has_key(c_rev.fname):
1001 self._bump_item('repos_size', c_rev.file_size)
1002 self.repos_files[c_rev.fname] = None
1004 self.data['repos_file_count'] = len(self.repos_files)
1006 def set_svn_rev_count(self, count):
1007 self.data['svn_rev_count'] = count
1009 def svn_rev_count(self):
1010 return self.data['svn_rev_count']
1012 def archive(self):
1013 open(self.filename, 'w').write(marshal.dumps(self.data))
1015 def unarchive(self):
1016 self.data = marshal.loads(open(self.filename, 'r').read())
1018 def __str__(self):
1019 svn_revs_str = ""
1020 if self.data['svn_rev_count'] is not None:
1021 svn_revs_str = ('Total SVN Commits: %10s\n'
1022 % self.data['svn_rev_count'])
1024 return ('\n' \
1025 'cvs2svn Statistics:\n' \
1026 '------------------\n' \
1027 'Total CVS Files: %10i\n' \
1028 'Total CVS Revisions: %10i\n' \
1029 'Total Unique Tags: %10i\n' \
1030 'Total Unique Branches: %10i\n' \
1031 'CVS Repos Size in KB: %10i\n' \
1032 '%s' \
1033 'First Revision Date: %s\n' \
1034 'Last Revision Date: %s\n' \
1035 '------------------' \
1036 % (self.data['repos_file_count'],
1037 self.data['cvs_revs_count'],
1038 len(self.data['tags']),
1039 len(self.data['branches']),
1040 (self.data['repos_size'] / 1024),
1041 svn_revs_str,
1042 time.ctime(self.data['first_rev_date']),
1043 time.ctime(self.data['last_rev_date']),
1046 def timings(self):
1047 passes = self.data['pass_timings'].keys()
1048 passes.sort()
1049 output = 'Timings:\n------------------\n'
1051 def desc(val):
1052 if val == 1: return "second"
1053 return "seconds"
1055 for pass_num in passes:
1056 duration = int(self.data['pass_timings'][pass_num])
1057 p_str = ('pass %d:%6d %s\n'
1058 % (pass_num, duration, desc(duration)))
1059 output += p_str
1061 total = int(self.data['end_time'] - self.data['start_time'])
1062 output += ('total: %6d %s' % (total, desc(total)))
1063 return output
1066 class LastSymbolicNameDatabase:
1067 """Passing every CVSRevision in s-revs to this class will result in
1068 a Database whose key is the last CVS Revision a symbolicname was
1069 seen in, and whose value is a list of all symbolicnames that were
1070 last seen in that revision."""
1072 def __init__(self):
1073 self.symbols = {}
1075 # Once we've gone through all the revs,
1076 # symbols.keys() will be a list of all tags and branches, and
1077 # their corresponding values will be a key into the last CVS revision
1078 # that they were used in.
1079 def log_revision(self, c_rev):
1080 # Gather last CVS Revision for symbolic name info and tag info
1081 for tag in c_rev.tags:
1082 self.symbols[tag] = c_rev.unique_key()
1083 if c_rev.op is not OP_DELETE:
1084 for branch in c_rev.branches:
1085 self.symbols[branch] = c_rev.unique_key()
1087 # Creates an inversion of symbols above--a dictionary of lists (key
1088 # = CVS rev unique_key: val = list of symbols that close in that
1089 # rev.
1090 def create_database(self):
1091 symbol_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB), DB_OPEN_NEW)
1092 Cleanup().register(temp(SYMBOL_LAST_CVS_REVS_DB), pass5)
1093 for sym, rev_unique_key in self.symbols.items():
1094 ary = symbol_revs_db.get(rev_unique_key, [])
1095 ary.append(sym)
1096 symbol_revs_db[rev_unique_key] = ary
1099 class CVSRevisionDatabase:
1100 """A Database to store CVSRevision objects and retrieve them by their
1101 unique_key()."""
1103 def __init__(self, mode):
1104 """Initialize an instance, opening database in MODE (like the MODE
1105 argument to Database or anydbm.open())."""
1107 self.cvs_revs_db = SDatabase(temp(CVS_REVS_DB), mode)
1108 Cleanup().register(temp(CVS_REVS_DB), pass8)
1110 def log_revision(self, c_rev):
1111 """Add C_REV, a CVSRevision, to the database."""
1113 self.cvs_revs_db[c_rev.unique_key()] = str(c_rev)
1115 def get_revision(self, unique_key):
1116 """Return the CVSRevision stored under UNIQUE_KEY."""
1118 return CVSRevision(Ctx(), self.cvs_revs_db[unique_key])
1121 class TagsDatabase:
1122 """A Database to record symbolic names that are tags.
1124 Each key is a tag name. The value has no meaning, and is set to the
1125 empty string. (Since an SDatabase is used, the key cannot be set to
1126 None.)"""
1128 def __init__(self, mode):
1129 self.db = SDatabase(temp(TAGS_DB), mode)
1130 Cleanup().register(temp(TAGS_DB), pass8)
1132 def add(self, item):
1133 self.db[item] = ''
1135 def remove(self, item):
1136 del self.db[item]
1138 def __contains__(self, item):
1139 return self.db.has_key(item)
1142 class Project:
1143 """A project within a CVS repository."""
1145 def __init__(self, project_cvs_repos_path,
1146 trunk_path, branches_path, tags_path):
1147 """Create a new Project record.
1149 PROJECT_CVS_REPOS_PATH is the main CVS directory for this project
1150 (within the filesystem). TRUNK_PATH, BRANCHES_PATH, and TAGS_PATH
1151 are the full, normalized directory names in svn for the
1152 corresponding part of the repository."""
1154 self.project_cvs_repos_path = project_cvs_repos_path
1155 prefix = Ctx().cvs_repository.cvs_repos_path
1156 if not self.project_cvs_repos_path.startswith(prefix):
1157 raise FatalError("Project '%s' must start with '%s'"
1158 % (self.project_cvs_repos_path, prefix,))
1159 # The project's main directory as a cvs_path:
1160 self.project_cvs_path = self.project_cvs_repos_path[len(prefix):]
1161 if self.project_cvs_path.startswith(os.sep):
1162 self.project_cvs_path = self.project_cvs_path[1:]
1163 self.trunk_path = trunk_path
1164 self.branches_path = branches_path
1165 self.tags_path = tags_path
1166 verify_paths_disjoint(self.trunk_path, self.branches_path, self.tags_path)
1168 def is_source(self, svn_path):
1169 """Return True iff SVN_PATH is a legitimate source for this project.
1171 Legitimate paths are self.trunk_path or any directory directly
1172 under self.branches_path."""
1174 if svn_path == self.trunk_path:
1175 return True
1177 (head, tail,) = _path_split(svn_path)
1178 if head == self.branches_path:
1179 return True
1181 return False
1183 def is_unremovable(self, svn_path):
1184 """Return True iff the specified path must not be removed."""
1186 return svn_path in [self.trunk_path, self.branches_path, self.tags_path]
1188 def get_branch_path(self, branch_name):
1189 """Return the svnpath for the branch named BRANCH_NAME."""
1191 return _path_join(self.branches_path, _clean_symbolic_name(branch_name))
1193 def get_tag_path(self, tag_name):
1194 """Return the svnpath for the tag named TAG_NAME."""
1196 return _path_join(self.tags_path, _clean_symbolic_name(tag_name))
1198 def _relative_name(self, cvs_path):
1199 """Convert CVS_PATH into a name relative to this project's root directory.
1201 CVS_PATH has to begin (textually) with self.project_cvs_path.
1202 Remove prefix and optional '/'."""
1204 if not cvs_path.startswith(self.project_cvs_path):
1205 raise FatalError(
1206 "_relative_name: '%s' is not a sub-path of '%s'"
1207 % (cvs_path, self.project_cvs_path,))
1208 l = len(self.project_cvs_path)
1209 if cvs_path[l] == os.sep:
1210 l += 1
1211 return cvs_path[l:]
1213 def make_trunk_path(self, cvs_path):
1214 """Return the trunk path for CVS_PATH.
1216 Return the svn path for this file on trunk."""
1218 return _path_join(self.trunk_path, self._relative_name(cvs_path))
1220 def make_branch_path(self, branch_name, cvs_path):
1221 """Return the svn path for CVS_PATH on branch BRANCH_NAME."""
1223 return _path_join(self.get_branch_path(branch_name),
1224 self._relative_name(cvs_path))
1227 class CVSRevision:
1228 def __init__(self, ctx, *args):
1229 """Initialize a new CVSRevision with Ctx object CTX, and ARGS.
1231 If CTX is None, the following members and methods of the
1232 instantiated CVSRevision class object will be unavailable (or
1233 simply will not work correctly, if at all):
1234 cvs_path
1235 svn_path
1236 is_default_branch_revision()
1238 (Note that this class treats CTX as const, because the caller
1239 likely passed in a Borg instance of a Ctx. The reason this class
1240 takes CTX as as a parameter, instead of just instantiating a Ctx
1241 itself, is that this class should be usable outside cvs2svn.)
1243 If there is one argument in ARGS, it is a string, in the format of
1244 a line from a revs file. Do *not* include a trailing newline.
1246 If there are multiple ARGS, there must be 17 of them,
1247 comprising a parsed revs line:
1248 timestamp --> (int) date stamp for this cvs revision
1249 digest --> (string) digest of author+logmsg
1250 prev_timestamp --> (int) date stamp for the previous cvs revision
1251 next_timestamp --> (int) date stamp for the next cvs revision
1252 op --> (char) OP_ADD, OP_CHANGE, or OP_DELETE
1253 prev_rev --> (string or None) previous CVS rev, e.g., "1.2"
1254 rev --> (string) this CVS rev, e.g., "1.3"
1255 next_rev --> (string or None) next CVS rev, e.g., "1.4"
1256 file_in_attic --> (char or None) true if RCS file is in Attic
1257 file_executable --> (char or None) true if RCS file has exec bit set.
1258 file_size --> (int) size of the RCS file
1259 deltatext_code --> (char) 'N' if non-empty deltatext, else 'E'
1260 fname --> (string) relative path of file in CVS repos
1261 mode --> (string or None) "kkv", "kb", etc.
1262 branch_name --> (string or None) branch on which this rev occurred
1263 tags --> (list of strings) all tags on this revision
1264 branches --> (list of strings) all branches rooted in this rev
1266 The two forms of initialization are equivalent.
1268 WARNING: Due to the resync process in pass2, prev_timestamp or
1269 next_timestamp may be incorrect in the c-revs or s-revs files."""
1271 self._ctx = ctx
1272 if len(args) == 17:
1273 (self.timestamp, self.digest, self.prev_timestamp, self.next_timestamp,
1274 self.op, self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
1275 self.file_executable, self.file_size, self.deltatext_code,
1276 self.fname,
1277 self.mode, self.branch_name, self.tags, self.branches) = args
1278 elif len(args) == 1:
1279 data = args[0].split(' ', 15)
1280 (self.timestamp, self.digest, self.prev_timestamp, self.next_timestamp,
1281 self.op, self.prev_rev, self.rev, self.next_rev, self.file_in_attic,
1282 self.file_executable, self.file_size, self.deltatext_code,
1283 self.mode, self.branch_name, numtags, remainder) = data
1284 # Patch up data items which are not simple strings
1285 self.timestamp = int(self.timestamp, 16)
1286 if self.prev_timestamp == "*":
1287 self.prev_timestamp = 0
1288 else:
1289 self.prev_timestamp = int(self.prev_timestamp)
1290 if self.next_timestamp == "*":
1291 self.next_timestamp = 0
1292 else:
1293 self.next_timestamp = int(self.next_timestamp)
1294 if self.prev_rev == "*":
1295 self.prev_rev = None
1296 if self.next_rev == "*":
1297 self.next_rev = None
1298 if self.file_in_attic == "*":
1299 self.file_in_attic = None
1300 if self.file_executable == "*":
1301 self.file_executable = None
1302 self.file_size = int(self.file_size)
1303 if self.mode == "*":
1304 self.mode = None
1305 if self.branch_name == "*":
1306 self.branch_name = None
1307 numtags = int(numtags)
1308 tags_and_numbranches_and_remainder = remainder.split(' ', numtags + 1)
1309 self.tags = tags_and_numbranches_and_remainder[:-2]
1310 numbranches = int(tags_and_numbranches_and_remainder[-2])
1311 remainder = tags_and_numbranches_and_remainder[-1]
1312 branches_and_fname = remainder.split(' ', numbranches)
1313 self.branches = branches_and_fname[:-1]
1314 self.fname = branches_and_fname[-1]
1315 else:
1316 raise TypeError, 'CVSRevision() takes 2 or 18 arguments (%d given)' % \
1317 (len(args) + 1)
1318 if ctx is not None:
1319 self.cvs_path = ctx.cvs_repository.get_cvs_path(self.fname)
1320 if self.branch_name:
1321 self.svn_path = ctx.project.make_branch_path(self.branch_name,
1322 self.cvs_path)
1323 else:
1324 self.svn_path = ctx.project.make_trunk_path(self.cvs_path)
1326 # The 'primary key' of a CVS Revision is the revision number + the
1327 # filename. To provide a unique key (say, for a dict), we just glom
1328 # them together in a string. By passing in self.prev_rev or
1329 # self.next_rev, you can get the unique key for their respective
1330 # CVSRevisions.
1331 def unique_key(self, revnum="0"):
1332 if revnum is "0":
1333 revnum = self.rev
1334 elif revnum is None:
1335 return None
1336 return revnum + "/" + self.fname
1338 def __str__(self):
1339 return ('%08lx %s %s %s %s %s %s %s %s %s %d %s %s %s %d%s%s %d%s%s %s'
1340 % (self.timestamp, self.digest, self.prev_timestamp or "*",
1341 self.next_timestamp or "*", self.op, (self.prev_rev or "*"),
1342 self.rev, (self.next_rev or "*"), (self.file_in_attic or "*"),
1343 (self.file_executable or "*"),
1344 self.file_size,
1345 self.deltatext_code, (self.mode or "*"),
1346 (self.branch_name or "*"),
1347 len(self.tags), self.tags and " " or "", " ".join(self.tags),
1348 len(self.branches), self.branches and " " or "",
1349 " ".join(self.branches),
1350 self.fname, ))
1352 # Returns true if this CVSRevision is the opening CVSRevision for
1353 # NAME (for this RCS file).
1354 def opens_symbolic_name(self, name):
1355 if name in self.tags:
1356 return 1
1357 if name in self.branches:
1358 # If this c_rev opens a branch and our op is OP_DELETE, then
1359 # that means that the file that this c_rev belongs to was
1360 # created on the branch, so for all intents and purposes, this
1361 # c_rev is *technically* not an opening. See Issue #62 for more
1362 # information.
1363 if self.op != OP_DELETE:
1364 return 1
1365 return 0
1367 def is_default_branch_revision(self):
1368 """Return 1 if SELF.rev of SELF.cvs_path is a default branch
1369 revision according to DEFAULT_BRANCHES_DB (see the conditions
1370 documented there), else return None."""
1372 val = self._ctx._default_branches_db.get(self.cvs_path, None)
1373 if val is not None:
1374 val_last_dot = val.rindex(".")
1375 our_last_dot = self.rev.rindex(".")
1376 default_branch = val[:val_last_dot]
1377 our_branch = self.rev[:our_last_dot]
1378 default_rev_component = int(val[val_last_dot + 1:])
1379 our_rev_component = int(self.rev[our_last_dot + 1:])
1380 if (default_branch == our_branch
1381 and our_rev_component <= default_rev_component):
1382 return 1
1383 # else
1384 return None
1386 def rcs_path(self):
1387 """Returns the actual filesystem path to the RCS file of this
1388 CVSRevision."""
1390 if self.file_in_attic is None:
1391 return self.fname
1392 else:
1393 basepath, filename = os.path.split(self.fname)
1394 return os.path.join(basepath, 'Attic', filename)
1396 def filename(self):
1397 """Return the last path component of self.fname, minus the ',v'."""
1399 return os.path.split(self.fname)[-1][:-2]
1402 class SymbolDatabase:
1403 """This database records information on all symbols in the RCS
1404 files. It is created in pass 1 and it is used in pass 2."""
1406 def __init__(self):
1407 # A hash that maps tag names to commit counts
1408 self.tags = { }
1409 # A hash that maps branch names to lists of the format
1410 # [ create_count, commit_count, blockers ], where blockers
1411 # is a hash that lists the symbols that depend on the
1412 # the branch. The blockers hash is used as a set, so the
1413 # values are not used.
1414 self.branches = { }
1416 def register_tag_creation(self, name):
1417 """Register the creation of the tag NAME."""
1419 self.tags[name] = self.tags.get(name, 0) + 1
1421 def _branch(self, name):
1422 """Helper function to get a branch node that will create and
1423 initialize the node if it does not exist."""
1425 if not self.branches.has_key(name):
1426 self.branches[name] = [ 0, 0, { } ]
1427 return self.branches[name]
1429 def register_branch_creation(self, name):
1430 """Register the creation of the branch NAME."""
1432 self._branch(name)[0] += 1
1434 def register_branch_commit(self, name):
1435 """Register a commit on the branch NAME."""
1437 self._branch(name)[1] += 1
1439 def register_branch_blocker(self, name, blocker):
1440 """Register BLOCKER as a blocker on the branch NAME."""
1442 self._branch(name)[2][blocker] = None
1444 def branch_has_commit(self, name):
1445 """Return non-zero if NAME has commits. Returns 0 if name
1446 is not a branch or if it has no commits."""
1448 return self.branches.has_key(name) and self.branches[name][1]
1450 def find_excluded_symbols(self, regexp_list):
1451 """Returns a hash of all symbols that match the regexps in
1452 REGEXP_LIST. The hash is used as a set so the values are
1453 not used."""
1455 excludes = { }
1456 for tag in self.tags:
1457 if match_regexp_list(regexp_list, tag):
1458 excludes[tag] = None
1459 for branch in self.branches:
1460 if match_regexp_list(regexp_list, branch):
1461 excludes[branch] = None
1462 return excludes
1464 def find_branch_exclude_blockers(self, branch, excludes):
1465 """Find all blockers of BRANCH, excluding the ones in the hash
1466 EXCLUDES."""
1468 blockers = { }
1469 if excludes.has_key(branch):
1470 for blocker in self.branches[branch][2]:
1471 if not excludes.has_key(blocker):
1472 blockers[blocker] = None
1473 return blockers
1475 def find_blocked_excludes(self, excludes):
1476 """Find all branches not in EXCLUDES that have blocking symbols that
1477 are not themselves excluded. Return a hash that maps branch names
1478 to a hash of blockers. The hash of blockes is used as a set so the
1479 values are not used."""
1481 blocked_branches = { }
1482 for branch in self.branches:
1483 blockers = self.find_branch_exclude_blockers(branch, excludes)
1484 if blockers:
1485 blocked_branches[branch] = blockers
1486 return blocked_branches
1488 def find_mismatches(self, excludes=None):
1489 """Find all symbols that are defined as both tags and branches,
1490 excluding the ones in EXCLUDES. Returns a list of 4-tuples with
1491 the symbol name, tag count, branch count and commit count."""
1493 if excludes is None:
1494 excludes = { }
1495 mismatches = [ ]
1496 for branch in self.branches:
1497 if not excludes.has_key(branch) and self.tags.has_key(branch):
1498 mismatches.append((branch, # name
1499 self.tags[branch], # tag count
1500 self.branches[branch][0], # branch count
1501 self.branches[branch][1])) # commit count
1502 return mismatches
1504 def read(self):
1505 """Read the symbol database from files."""
1507 f = open(temp(TAGS_LIST))
1508 while 1:
1509 line = f.readline()
1510 if not line:
1511 break
1512 tag, count = line.split()
1513 self.tags[tag] = int(count)
1515 f = open(temp(BRANCHES_LIST))
1516 while 1:
1517 line = f.readline()
1518 if not line:
1519 break
1520 words = line.split()
1521 self.branches[words[0]] = [ int(words[1]), int(words[2]), { } ]
1522 for blocker in words[3:]:
1523 self.branches[words[0]][2][blocker] = None
1525 def write(self):
1526 """Store the symbol database to files."""
1528 f = open(temp(TAGS_LIST), "w")
1529 Cleanup().register(temp(TAGS_LIST), pass2)
1530 for tag, count in self.tags.items():
1531 f.write("%s %d\n" % (tag, count))
1533 f = open(temp(BRANCHES_LIST), "w")
1534 Cleanup().register(temp(BRANCHES_LIST), pass2)
1535 for branch, info in self.branches.items():
1536 f.write("%s %d %d" % (branch, info[0], info[1]))
1537 if info[2]:
1538 f.write(" ")
1539 f.write(" ".join(info[2].keys()))
1540 f.write("\n")
1543 class FileDataCollector(cvs2svn_rcsparse.Sink):
1544 """Class responsible for collecting RCS data for a particular file.
1546 Any collected data that need to be remembered are stored into the
1547 referenced CollectData instance."""
1549 def __init__(self, collect_data, canonical_name, filename):
1550 """Create an object that is prepared to receive data for FILENAME.
1551 FILENAME is the absolute filesystem path to the file in question,
1552 and CANONICAL_NAME is FILENAME with the 'Attic' component removed
1553 (if the file is indeed in the Attic). COLLECT_DATA is used to
1554 store the information collected about the file."""
1556 self.collect_data = collect_data
1558 self.fname = canonical_name
1560 # We calculate and save some file metadata here, where we can do
1561 # it only once per file, instead of waiting until later where we
1562 # would have to do the same calculations once per CVS *revision*.
1564 self.cvs_path = Ctx().cvs_repository.get_cvs_path(self.fname)
1566 # If the paths are not the same, then that means that the
1567 # canonical_name has had the 'Attic' component stripped out.
1568 self.file_in_attic = None
1569 if canonical_name != filename:
1570 self.file_in_attic = 1
1572 file_stat = os.stat(filename)
1573 # The size of our file in bytes
1574 self.file_size = file_stat[stat.ST_SIZE]
1576 # Whether or not the executable bit is set.
1577 self.file_executable = None
1578 if file_stat[0] & stat.S_IXUSR:
1579 self.file_executable = 1
1581 # revision -> [timestamp, author, old-timestamp]
1582 self.rev_data = { }
1584 # Maps revision number (key) to the revision number of the
1585 # previous revision along this line of development.
1587 # For the first revision R on a branch, we consider the revision
1588 # from which R sprouted to be the 'previous'.
1590 # Note that this revision can't be determined arithmetically (due
1591 # to cvsadmin -o, which is why this is necessary).
1593 # If the key has no previous revision, then store None as key's
1594 # value.
1595 self.prev_rev = { }
1597 # This dict is essentially self.prev_rev with the values mapped in
1598 # the other direction, so following key -> value will yield you
1599 # the next revision number.
1601 # Unlike self.prev_rev, if the key has no next revision, then the
1602 # key is not present.
1603 self.next_rev = { }
1605 # Track the state of each revision so that in set_revision_info,
1606 # we can determine if our op is an add/change/delete. We can do
1607 # this because in set_revision_info, we'll have all of the
1608 # revisions for a file at our fingertips, and we need to examine
1609 # the state of our prev_rev to determine if we're an add or a
1610 # change--without the state of the prev_rev, we are unable to
1611 # distinguish between an add and a change.
1612 self.rev_state = { }
1614 # Hash mapping branch numbers, like '1.7.2', to branch names,
1615 # like 'Release_1_0_dev'.
1616 self.branch_names = { }
1618 # RCS flags (used for keyword expansion).
1619 self.mode = None
1621 # Hash mapping revision numbers, like '1.7', to lists of names
1622 # indicating which branches sprout from that revision, like
1623 # ['Release_1_0_dev', 'experimental_driver', ...].
1624 self.branchlist = { }
1626 # Like self.branchlist, but the values are lists of tag names that
1627 # apply to the key revision.
1628 self.taglist = { }
1630 # If set, this is an RCS branch number -- rcsparse calls this the
1631 # "principal branch", but CVS and RCS refer to it as the "default
1632 # branch", so that's what we call it, even though the rcsparse API
1633 # setter method is still 'set_principal_branch'.
1634 self.default_branch = None
1636 # If the RCS file doesn't have a default branch anymore, but does
1637 # have vendor revisions, then we make an educated guess that those
1638 # revisions *were* the head of the default branch up until the
1639 # commit of 1.2, at which point the file's default branch became
1640 # trunk. This records the date at which 1.2 was committed.
1641 self.first_non_vendor_revision_date = None
1643 # A list of all symbols defined for the current file. Used to
1644 # prevent multiple definitions of a symbol, something which can
1645 # easily happen when --symbol-transform is used.
1646 self.defined_symbols = { }
1648 def set_principal_branch(self, branch):
1649 self.default_branch = branch
1651 def set_expansion(self, mode):
1652 self.mode = mode
1654 def set_branch_name(self, branch_number, name):
1655 """Record that BRANCH_NUMBER is the branch number for branch NAME,
1656 and that NAME sprouts from BRANCH_NUMBER.
1657 BRANCH_NUMBER is an RCS branch number with an odd number of components,
1658 for example '1.7.2' (never '1.7.0.2')."""
1660 if not self.branch_names.has_key(branch_number):
1661 self.branch_names[branch_number] = name
1662 # The branchlist is keyed on the revision number from which the
1663 # branch sprouts, so strip off the odd final component.
1664 sprout_rev = branch_number[:branch_number.rfind(".")]
1665 self.branchlist.setdefault(sprout_rev, []).append(name)
1666 self.collect_data.symbol_db.register_branch_creation(name)
1667 else:
1668 sys.stderr.write("%s: in '%s':\n"
1669 " branch '%s' already has name '%s',\n"
1670 " cannot also have name '%s', ignoring the latter\n"
1671 % (warning_prefix, self.fname, branch_number,
1672 self.branch_names[branch_number], name))
1674 def rev_to_branch_name(self, revision):
1675 """Return the name of the branch on which REVISION lies.
1676 REVISION is a non-branch revision number with an even number of,
1677 components, for example '1.7.2.1' (never '1.7.2' nor '1.7.0.2').
1678 For the convenience of callers, REVISION can also be a trunk
1679 revision such as '1.2', in which case just return None."""
1681 if trunk_rev.match(revision):
1682 return None
1683 return self.branch_names.get(revision[:revision.rindex(".")])
1685 def define_tag(self, name, revision):
1686 """Record a bidirectional mapping between symbolic NAME and REVISION.
1687 REVISION is an unprocessed revision number from the RCS file's
1688 header, for example: '1.7', '1.7.0.2', or '1.1.1' or '1.1.1.1'.
1689 This function will determine what kind of symbolic name it is by
1690 inspection, and record it in the right places."""
1692 for (pattern, replacement) in Ctx().symbol_transforms:
1693 newname = pattern.sub(replacement, name)
1694 if newname != name:
1695 Log().write(LOG_WARN, " symbol '%s' transformed to '%s'"
1696 % (name, newname))
1697 name = newname
1698 if self.defined_symbols.has_key(name):
1699 err = "%s: Multiple definitions of the symbol '%s' in '%s'" \
1700 % (error_prefix, name, self.fname)
1701 sys.stderr.write(err + "\n")
1702 self.collect_data.fatal_errors.append(err)
1703 self.defined_symbols[name] = None
1704 m = cvs_branch_tag.match(revision)
1705 if m:
1706 self.set_branch_name(m.group(1) + m.group(2), name)
1707 elif rcs_branch_tag.match(revision):
1708 self.set_branch_name(revision, name)
1709 else:
1710 self.taglist.setdefault(revision, []).append(name)
1711 self.collect_data.symbol_db.register_tag_creation(name)
1713 def define_revision(self, revision, timestamp, author, state,
1714 branches, next):
1715 # Record the state of our revision for later calculations
1716 self.rev_state[revision] = state
1718 # store the rev_data as a list in case we have to jigger the timestamp
1719 self.rev_data[revision] = [int(timestamp), author, None]
1721 # When on trunk, the RCS 'next' revision number points to what
1722 # humans might consider to be the 'previous' revision number. For
1723 # example, 1.3's RCS 'next' is 1.2.
1725 # However, on a branch, the RCS 'next' revision number really does
1726 # point to what humans would consider to be the 'next' revision
1727 # number. For example, 1.1.2.1's RCS 'next' would be 1.1.2.2.
1729 # In other words, in RCS, 'next' always means "where to find the next
1730 # deltatext that you need this revision to retrieve.
1732 # That said, we don't *want* RCS's behavior here, so we determine
1733 # whether we're on trunk or a branch and set self.prev_rev
1734 # accordingly.
1736 # One last thing. Note that if REVISION is a branch revision,
1737 # instead of mapping REVISION to NEXT, we instead map NEXT to
1738 # REVISION. Since we loop over all revisions in the file before
1739 # doing anything with the data we gather here, this 'reverse
1740 # assignment' effectively does the following:
1742 # 1. Gives us no 'prev' value for REVISION (in this
1743 # iteration... it may have been set in a previous iteration)
1745 # 2. Sets the 'prev' value for the revision with number NEXT to
1746 # REVISION. So when we come around to the branch revision whose
1747 # revision value is NEXT, its 'prev' and 'prev_rev' are already
1748 # set.
1749 if trunk_rev.match(revision):
1750 self.prev_rev[revision] = next
1751 self.next_rev[next] = revision
1752 elif next:
1753 self.prev_rev[next] = revision
1754 self.next_rev[revision] = next
1756 for b in branches:
1757 self.prev_rev[b] = revision
1759 # Ratchet up the highest vendor head revision, if necessary.
1760 if self.default_branch:
1761 default_branch_root = self.default_branch + "."
1762 if ((revision.find(default_branch_root) == 0)
1763 and (default_branch_root.count('.') == revision.count('.'))):
1764 # This revision is on the default branch, so record that it is
1765 # the new highest default branch head revision.
1766 self.collect_data.default_branches_db[self.cvs_path] = revision
1767 else:
1768 # No default branch, so make an educated guess.
1769 if revision == '1.2':
1770 # This is probably the time when the file stopped having a
1771 # default branch, so make a note of it.
1772 self.first_non_vendor_revision_date = timestamp
1773 else:
1774 m = vendor_revision.match(revision)
1775 if m and ((not self.first_non_vendor_revision_date)
1776 or (timestamp < self.first_non_vendor_revision_date)):
1777 # We're looking at a vendor revision, and it wasn't
1778 # committed after this file lost its default branch, so bump
1779 # the maximum trunk vendor revision in the permanent record.
1780 self.collect_data.default_branches_db[self.cvs_path] = revision
1782 if not trunk_rev.match(revision):
1783 # Check for unlabeled branches, record them. We tried to collect
1784 # all branch names when we parsed the symbolic name header
1785 # earlier, of course, but that didn't catch unlabeled branches.
1786 # If a branch is unlabeled, this is our first encounter with it,
1787 # so we have to record its data now.
1788 branch_number = revision[:revision.rindex(".")]
1789 if not self.branch_names.has_key(branch_number):
1790 branch_name = "unlabeled-" + branch_number
1791 self.set_branch_name(branch_number, branch_name)
1793 # Register the commit on this non-trunk branch
1794 branch_name = self.branch_names[branch_number]
1795 self.collect_data.symbol_db.register_branch_commit(branch_name)
1797 def tree_completed(self):
1798 """The revision tree has been parsed. Analyze it for consistency."""
1800 # Our algorithm depends upon the timestamps on the revisions occuring
1801 # monotonically over time. That is, we want to see rev 1.34 occur in
1802 # time before rev 1.35. If we inserted 1.35 *first* (due to the time-
1803 # sorting), and then tried to insert 1.34, we'd be screwed.
1805 # to perform the analysis, we'll simply visit all of the 'previous'
1806 # links that we have recorded and validate that the timestamp on the
1807 # previous revision is before the specified revision
1809 # if we have to resync some nodes, then we restart the scan. just keep
1810 # looping as long as we need to restart.
1811 while 1:
1812 for current, prev in self.prev_rev.items():
1813 if not prev:
1814 # no previous revision exists (i.e. the initial revision)
1815 continue
1816 t_c = self.rev_data[current][0]
1817 t_p = self.rev_data[prev][0]
1818 if t_p >= t_c:
1819 # the previous revision occurred later than the current revision.
1820 # shove the previous revision back in time (and any before it that
1821 # may need to shift).
1823 # We sync backwards and not forwards because any given CVS
1824 # Revision has only one previous revision. However, a CVS
1825 # Revision can *be* a previous revision for many other
1826 # revisions (e.g., a revision that is the source of multiple
1827 # branches). This becomes relevant when we do the secondary
1828 # synchronization in pass 2--we can make certain that we
1829 # don't resync a revision earlier than it's previous
1830 # revision, but it would be non-trivial to make sure that we
1831 # don't resync revision R *after* any revisions that have R
1832 # as a previous revision.
1833 while t_p >= t_c:
1834 self.rev_data[prev][0] = t_c - 1 # new timestamp
1835 self.rev_data[prev][2] = t_p # old timestamp
1836 delta = t_c - 1 - t_p
1837 msg = "PASS1 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
1838 % (self.cvs_path, prev, time.ctime(t_p), delta)
1839 Log().write(LOG_VERBOSE, msg)
1840 if (delta > COMMIT_THRESHOLD
1841 or delta < (COMMIT_THRESHOLD * -1)):
1842 Log().write(LOG_WARN,
1843 "%s: Significant timestamp change for '%s' "
1844 "(%d seconds)"
1845 % (warning_prefix, self.cvs_path, delta))
1846 current = prev
1847 prev = self.prev_rev[current]
1848 if not prev:
1849 break
1850 t_c -= 1 # self.rev_data[current][0]
1851 t_p = self.rev_data[prev][0]
1853 # break from the for-loop
1854 break
1855 else:
1856 # finished the for-loop (no resyncing was performed)
1857 return
1859 def set_revision_info(self, revision, log, text):
1860 timestamp, author, old_ts = self.rev_data[revision]
1861 digest = sha.new(log + '\0' + author).hexdigest()
1862 if old_ts:
1863 # the timestamp on this revision was changed. log it for later
1864 # resynchronization of other files's revisions that occurred
1865 # for this time and log message.
1866 self.collect_data.resync.write('%08lx %s %08lx\n'
1867 % (old_ts, digest, timestamp))
1869 # "...Give back one kadam to honor the Hebrew God whose Ark this is."
1870 # -- Imam to Indy and Sallah, in 'Raiders of the Lost Ark'
1872 # If revision 1.1 appears to have been created via 'cvs add'
1873 # instead of 'cvs import', then this file probably never had a
1874 # default branch, so retroactively remove its record in the
1875 # default branches db. The test is that the log message CVS uses
1876 # for 1.1 in imports is "Initial revision\n" with no period.
1877 if revision == '1.1' and log != 'Initial revision\n':
1878 try:
1879 del self.collect_data.default_branches_db[self.cvs_path]
1880 except KeyError:
1881 pass
1883 # Get the timestamps of the previous and next revisions
1884 prev_rev = self.prev_rev[revision]
1885 prev_timestamp, ign, ign = self.rev_data.get(prev_rev, [0, None, None])
1887 next_rev = self.next_rev.get(revision)
1888 next_timestamp, ign, ign = self.rev_data.get(next_rev, [0, None, None])
1890 # How to tell if a CVSRevision is an add, a change, or a deletion:
1892 # It's a delete if RCS state is 'dead'
1894 # It's an add if RCS state is 'Exp.' and
1895 # - we either have no previous revision
1896 # or
1897 # - we have a previous revision whose state is 'dead'
1899 # Anything else is a change.
1900 if self.rev_state[revision] == 'dead':
1901 op = OP_DELETE
1902 elif ((self.prev_rev.get(revision, None) is None)
1903 or (self.rev_state[self.prev_rev[revision]] == 'dead')):
1904 op = OP_ADD
1905 else:
1906 op = OP_CHANGE
1908 def is_branch_revision(rev):
1909 """Return True if this revision is not a trunk revision,
1910 else return False."""
1912 if rev.count('.') >= 3:
1913 return True
1914 return False
1916 def is_same_line_of_development(rev1, rev2):
1917 """Return True if rev1 and rev2 are on the same line of
1918 development (i.e., both on trunk, or both on the same branch);
1919 return False otherwise. Either rev1 or rev2 can be None, in
1920 which case automatically return False."""
1922 if rev1 is None or rev2 is None:
1923 return False
1924 if rev1.count('.') == 1 and rev2.count('.') == 1:
1925 return True
1926 if rev1[0:rev1.rfind('.')] == rev2[0:rev2.rfind('.')]:
1927 return True
1928 return False
1930 # There can be an odd situation where the tip revision of a branch
1931 # is alive, but every predecessor on the branch is in state 'dead',
1932 # yet the revision from which the branch sprouts is alive. (This
1933 # is sort of a mirror image of the more common case of adding a
1934 # file on a branch, in which the first revision on the branch is
1935 # alive while the revision from which it sprouts is dead.)
1937 # In this odd situation, we must mark the first live revision on
1938 # the branch as an OP_CHANGE instead of an OP_ADD, because it
1939 # reflects, however indirectly, a change w.r.t. the source
1940 # revision from which the branch sprouts.
1942 # This is issue #89.
1943 cur_num = revision
1944 if is_branch_revision(revision) and self.rev_state[revision] != 'dead':
1945 while 1:
1946 prev_num = self.prev_rev.get(cur_num, None)
1947 if not cur_num or not prev_num:
1948 break
1949 if (not is_same_line_of_development(cur_num, prev_num)
1950 and self.rev_state[cur_num] == 'dead'
1951 and self.rev_state[prev_num] != 'dead'):
1952 op = OP_CHANGE
1953 cur_num = self.prev_rev.get(cur_num, None)
1955 if text:
1956 deltatext_code = DELTATEXT_NONEMPTY
1957 else:
1958 deltatext_code = DELTATEXT_EMPTY
1960 c_rev = CVSRevision(Ctx(), timestamp, digest, prev_timestamp,
1961 next_timestamp, op,
1962 prev_rev, revision, next_rev,
1963 self.file_in_attic, self.file_executable,
1964 self.file_size,
1965 deltatext_code, self.fname,
1966 self.mode, self.rev_to_branch_name(revision),
1967 self.taglist.get(revision, []),
1968 self.branchlist.get(revision, []))
1969 self.collect_data.revs.write(str(c_rev) + "\n")
1970 StatsKeeper().record_c_rev(c_rev)
1972 if not self.collect_data.metadata_db.has_key(digest):
1973 self.collect_data.metadata_db[digest] = (author, log)
1975 def parse_completed(self):
1976 # Walk through all branches and tags and register them with
1977 # their parent branch in the symbol database.
1978 for revision, symbols in self.taglist.items() + self.branchlist.items():
1979 for symbol in symbols:
1980 name = self.rev_to_branch_name(revision)
1981 if name is not None:
1982 self.collect_data.symbol_db.register_branch_blocker(name, symbol)
1984 self.collect_data.num_files += 1
1987 class CollectData:
1988 """Repository for data collected by parsing the CVS repository files.
1990 This class manages the databases into which information collected
1991 from the CVS repository is stored. The data are stored into this
1992 class by FileDataCollector instances, one of which is created for
1993 each file to be parsed."""
1995 def __init__(self):
1996 self.revs = open(temp(DATAFILE + REVS_SUFFIX), 'w')
1997 Cleanup().register(temp(DATAFILE + REVS_SUFFIX), pass2)
1998 self.resync = open(temp(DATAFILE + RESYNC_SUFFIX), 'w')
1999 Cleanup().register(temp(DATAFILE + RESYNC_SUFFIX), pass2)
2000 self.default_branches_db = SDatabase(temp(DEFAULT_BRANCHES_DB),
2001 DB_OPEN_NEW)
2002 Cleanup().register(temp(DEFAULT_BRANCHES_DB), pass5)
2003 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_NEW)
2004 Cleanup().register(temp(METADATA_DB), pass8)
2005 self.fatal_errors = []
2006 self.num_files = 0
2007 self.symbol_db = SymbolDatabase()
2009 # 1 if we've collected data for at least one file, None otherwise.
2010 self.found_valid_file = None
2012 def write_symbol_db(self):
2013 self.symbol_db.write()
2016 class SymbolingsLogger:
2017 """Manage the file that contains lines for symbol openings and
2018 closings.
2020 This data will later be used to determine valid SVNRevision ranges
2021 from which a file can be copied when creating a branch or tag in
2022 Subversion. Do this by finding "Openings" and "Closings" for each
2023 file copied onto a branch or tag.
2025 An "Opening" is the CVSRevision from which a given branch/tag
2026 sprouts on a path.
2028 The "Closing" for that branch/tag and path is the next CVSRevision
2029 on the same line of development as the opening.
2031 For example, on file 'foo.c', branch BEE has branch number 1.2.2 and
2032 obviously sprouts from revision 1.2. Therefore, 1.2 is the opening
2033 for BEE on path 'foo.c', and 1.3 is the closing for BEE on path
2034 'foo.c'. Note that there may be many revisions chronologically
2035 between 1.2 and 1.3, for example, revisions on branches of 'foo.c',
2036 perhaps even including on branch BEE itself. But 1.3 is the next
2037 revision *on the same line* as 1.2, that is why it is the closing
2038 revision for those symbolic names of which 1.2 is the opening.
2040 The reason for doing all this hullabaloo is to make branch and tag
2041 creation as efficient as possible by minimizing the number of copies
2042 and deletes per creation. For example, revisions 1.2 and 1.3 of
2043 foo.c might correspond to revisions 17 and 30 in Subversion. That
2044 means that when creating branch BEE, there is some motivation to do
2045 the copy from one of 17-30. Now if there were another file,
2046 'bar.c', whose opening and closing CVSRevisions for BEE corresponded
2047 to revisions 24 and 39 in Subversion, we would know that the ideal
2048 thing would be to copy the branch from somewhere between 24 and 29,
2049 inclusive.
2052 def __init__(self):
2053 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS), 'w')
2054 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS), pass6)
2055 self.closings = open(temp(SYMBOL_CLOSINGS_TMP), 'w')
2056 Cleanup().register(temp(SYMBOL_CLOSINGS_TMP), pass5)
2058 # This keys of this dictionary are *source* cvs_paths for which
2059 # we've encountered an 'opening' on the default branch. The
2060 # values are the (uncleaned) symbolic names that this path has
2061 # opened.
2062 self.open_paths_with_default_branches = { }
2064 def log_revision(self, c_rev, svn_revnum):
2065 """Log any openings found in C_REV, and if C_REV.next_rev is not
2066 None, a closing. The opening uses SVN_REVNUM, but the closing (if
2067 any) will have its revnum determined later."""
2069 for name in c_rev.tags + c_rev.branches:
2070 self._note_default_branch_opening(c_rev, name)
2071 if c_rev.op != OP_DELETE:
2072 self._log(name, svn_revnum,
2073 c_rev.cvs_path, c_rev.branch_name, OPENING)
2075 # If our c_rev has a next_rev, then that's the closing rev for
2076 # this source revision. Log it to closings for later processing
2077 # since we don't know the svn_revnum yet.
2078 if c_rev.next_rev is not None:
2079 self.closings.write('%s %s\n' %
2080 (name, c_rev.unique_key(c_rev.next_rev)))
2082 def _log(self, name, svn_revnum, cvs_path, branch_name, type):
2083 """Write out a single line to the symbol_openings_closings file
2084 representing that SVN_REVNUM of SVN_PATH on BRANCH_NAME is either the
2085 opening or closing (TYPE) of NAME (a symbolic name).
2087 TYPE should only be one of the following global constants:
2088 OPENING or CLOSING."""
2090 # 8 places gives us 999,999,999 SVN revs. That *should* be enough.
2091 self.symbolings.write(
2092 '%s %.8d %s %s %s\n'
2093 % (name, svn_revnum, type, branch_name or '*', cvs_path))
2095 def close(self):
2096 """Iterate through the closings file, lookup the svn_revnum for
2097 each closing CVSRevision, and write a proper line out to the
2098 symbolings file."""
2100 # Use this to get the c_rev of our rev_key
2101 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_READ)
2103 self.closings.close()
2104 for line in fileinput.FileInput(temp(SYMBOL_CLOSINGS_TMP)):
2105 (name, rev_key) = line.rstrip().split(" ", 1)
2106 svn_revnum = Ctx()._persistence_manager.get_svn_revnum(rev_key)
2108 c_rev = cvs_revs_db.get_revision(rev_key)
2109 self._log(name, svn_revnum, c_rev.cvs_path, c_rev.branch_name, CLOSING)
2111 self.symbolings.close()
2113 def _note_default_branch_opening(self, c_rev, symbolic_name):
2114 """If C_REV is a default branch revision, log C_REV.cvs_path as an
2115 opening for SYMBOLIC_NAME."""
2117 self.open_paths_with_default_branches.setdefault(
2118 c_rev.cvs_path, []).append(symbolic_name)
2120 def log_default_branch_closing(self, c_rev, svn_revnum):
2121 """If self.open_paths_with_default_branches contains
2122 C_REV.cvs_path, then call log each name in
2123 self.open_paths_with_default_branches[C_REV.cvs_path] as a closing
2124 with SVN_REVNUM as the closing revision number."""
2126 path = c_rev.cvs_path
2127 if self.open_paths_with_default_branches.has_key(path):
2128 # log each symbol as a closing
2129 for name in self.open_paths_with_default_branches[path]:
2130 self._log(name, svn_revnum, path, None, CLOSING)
2131 # Remove them from the openings list as we're done with them.
2132 del self.open_paths_with_default_branches[path]
2135 class PersistenceManager:
2136 """The PersistenceManager allows us to effectively store SVNCommits
2137 to disk and retrieve them later using only their subversion revision
2138 number as the key. It also returns the subversion revision number
2139 for a given CVSRevision's unique key.
2141 All information pertinent to each SVNCommit is stored in a series of
2142 on-disk databases so that SVNCommits can be retrieved on-demand.
2144 MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ.
2145 In 'new' mode, PersistenceManager will initialize a new set of on-disk
2146 databases and be fully-featured.
2147 In 'read' mode, PersistenceManager will open existing on-disk databases
2148 and the set_* methods will be unavailable."""
2150 def __init__(self, mode):
2151 self.mode = mode
2152 if mode not in (DB_OPEN_NEW, DB_OPEN_READ):
2153 raise RuntimeError, "Invalid 'mode' argument to PersistenceManager"
2154 self.svn2cvs_db = Database(temp(SVN_REVNUMS_TO_CVS_REVS), mode)
2155 Cleanup().register(temp(SVN_REVNUMS_TO_CVS_REVS), pass8)
2156 self.cvs2svn_db = Database(temp(CVS_REVS_TO_SVN_REVNUMS), mode)
2157 Cleanup().register(temp(CVS_REVS_TO_SVN_REVNUMS), pass8)
2158 self.svn_commit_metadata = Database(temp(METADATA_DB), DB_OPEN_READ)
2159 self.cvs_revisions = CVSRevisionDatabase(DB_OPEN_READ)
2160 ###PERF kff Elsewhere there are comments about sucking the tags db
2161 ### into memory. That seems like a good idea.
2162 if not Ctx().trunk_only:
2163 self.tags_db = TagsDatabase(DB_OPEN_READ)
2165 # "branch_name" -> svn_revnum in which branch was last filled.
2166 # This is used by CVSCommit._pre_commit, to prevent creating a fill
2167 # revision which would have nothing to do.
2168 self.last_filled = {}
2170 def get_svn_revnum(self, cvs_rev_unique_key):
2171 """Return the Subversion revision number in which
2172 CVS_REV_UNIQUE_KEY was committed, or SVN_INVALID_REVNUM if there
2173 is no mapping for CVS_REV_UNIQUE_KEY."""
2175 return int(self.cvs2svn_db.get(cvs_rev_unique_key, SVN_INVALID_REVNUM))
2177 def get_svn_commit(self, svn_revnum):
2178 """Return an SVNCommit that corresponds to SVN_REVNUM.
2180 If no SVNCommit exists for revnum SVN_REVNUM, then return None.
2182 This method can throw SVNCommitInternalInconsistencyError."""
2184 svn_commit = SVNCommit("Retrieved from disk", svn_revnum)
2185 (c_rev_keys, motivating_revnum, name, date) = self.svn2cvs_db.get(
2186 str(svn_revnum), (None, None, None, None))
2187 if c_rev_keys is None:
2188 return None
2190 digest = None
2191 for key in c_rev_keys:
2192 c_rev = self.cvs_revisions.get_revision(key)
2193 svn_commit.add_revision(c_rev)
2194 # Set the author and log message for this commit by using
2195 # CVSRevision metadata, but only if haven't done so already.
2196 if digest is None:
2197 digest = c_rev.digest
2198 author, log_msg = self.svn_commit_metadata[digest]
2199 svn_commit.set_author(author)
2200 svn_commit.set_log_msg(log_msg)
2202 svn_commit.set_date(date)
2204 # If we're doing a trunk-only conversion, we don't need to do any more
2205 # work.
2206 if Ctx().trunk_only:
2207 return svn_commit
2209 if name:
2210 if svn_commit.cvs_revs:
2211 raise SVNCommit.SVNCommitInternalInconsistencyError(
2212 "An SVNCommit cannot have cvs_revisions *and* a corresponding\n"
2213 "symbolic name ('%s') to fill."
2214 % (_clean_symbolic_name(name),))
2215 svn_commit.set_symbolic_name(name)
2216 if name in self.tags_db:
2217 svn_commit.is_tag = 1
2219 if motivating_revnum is not None:
2220 svn_commit.set_motivating_revnum(motivating_revnum)
2222 return svn_commit
2224 def put_svn_commit(self, svn_revnum, cvs_revs,
2225 date, name, motivating_revnum):
2226 """Record the bidirectional mapping between SVN_REVNUM and
2227 CVS_REVS and record associated attributes."""
2229 if self.mode == DB_OPEN_READ:
2230 raise RuntimeError, \
2231 'Write operation attempted on read-only PersistenceManager'
2233 for c_rev in cvs_revs:
2234 Log().write(LOG_VERBOSE, " ", c_rev.unique_key())
2236 self.svn2cvs_db[str(svn_revnum)] = ([x.unique_key() for x in cvs_revs],
2237 motivating_revnum, name, date)
2239 for c_rev in cvs_revs:
2240 self.cvs2svn_db[c_rev.unique_key()] = svn_revnum
2242 # If it is not a primary commit, then record last_filled. name is
2243 # allowed to be None.
2244 if name or motivating_revnum:
2245 self.last_filled[name] = svn_revnum
2248 class CVSCommit:
2249 """Each instance of this class contains a number of CVS Revisions
2250 that correspond to one or more Subversion Commits. After all CVS
2251 Revisions are added to the grouping, calling process_revisions will
2252 generate a Subversion Commit (or Commits) for the set of CVS
2253 Revisions in the grouping."""
2255 def __init__(self, digest, author, log):
2256 self.digest = digest
2257 self.author = author
2258 self.log = log
2260 # Symbolic names for which the last source revision has already
2261 # been seen and for which the CVSRevisionAggregator has already
2262 # generated a fill SVNCommit. See self.process_revisions().
2263 self.done_symbols = [ ]
2265 self.files = { }
2266 # Lists of CVSRevisions
2267 self.changes = [ ]
2268 self.deletes = [ ]
2270 # Start out with a t_min higher than any incoming time T, and a
2271 # t_max lower than any incoming T. This way the first T will
2272 # push t_min down to T, and t_max up to T, naturally (without any
2273 # special-casing), and successive times will then ratchet them
2274 # outward as appropriate.
2275 self.t_min = 1L<<32
2276 self.t_max = 0
2278 # This will be set to the SVNCommit that occurs in self._commit.
2279 self.motivating_commit = None
2281 # This is a list of all non-primary commits motivated by the main
2282 # commit. We gather these so that we can set their dates to the
2283 # same date as the primary commit.
2284 self.secondary_commits = [ ]
2286 # State for handling default branches.
2288 # Here is a tempting, but ultimately nugatory, bit of logic, which
2289 # I share with you so you may appreciate the less attractive, but
2290 # refreshingly non-nugatory, logic which follows it:
2292 # If some of the commits in this txn happened on a non-trunk
2293 # default branch, then those files will have to be copied into
2294 # trunk manually after being changed on the branch (because the
2295 # RCS "default branch" appears as head, i.e., trunk, in practice).
2296 # As long as those copies don't overwrite any trunk paths that
2297 # were also changed in this commit, then we can do the copies in
2298 # the same revision, because they won't cover changes that don't
2299 # appear anywhere/anywhen else. However, if some of the trunk dst
2300 # paths *did* change in this commit, then immediately copying the
2301 # branch changes would lose those trunk mods forever. So in this
2302 # case, we need to do at least that copy in its own revision. And
2303 # for simplicity's sake, if we're creating the new revision for
2304 # even one file, then we just do all such copies together in the
2305 # new revision.
2307 # Doesn't that sound nice?
2309 # Unfortunately, Subversion doesn't support copies with sources
2310 # in the current txn. All copies must be based in committed
2311 # revisions. Therefore, we generate the above-described new
2312 # revision unconditionally.
2314 # This is a list of c_revs, and a c_rev is appended for each
2315 # default branch commit that will need to be copied to trunk (or
2316 # deleted from trunk) in some generated revision following the
2317 # "regular" revision.
2318 self.default_branch_cvs_revisions = [ ]
2320 def __cmp__(self, other):
2321 # Commits should be sorted by t_max. If both self and other have
2322 # the same t_max, break the tie using t_min, and lastly, digest.
2323 # If all those are equal, then compare based on ids, to ensure
2324 # that no two instances compare equal.
2325 return (cmp(self.t_max, other.t_max) or cmp(self.t_min, other.t_min)
2326 or cmp(self.digest, other.digest) or cmp(id(self), id(other)))
2328 def __hash__(self):
2329 return id(self)
2331 def has_file(self, fname):
2332 return self.files.has_key(fname)
2334 def revisions(self):
2335 return self.changes + self.deletes
2337 def opens_symbolic_name(self, name):
2338 """Returns true if any CVSRevision in this commit is on a tag or a
2339 branch or is the origin of a tag or branch."""
2341 for c_rev in self.revisions():
2342 if c_rev.opens_symbolic_name(name):
2343 return 1
2344 return 0
2346 def add_revision(self, c_rev):
2347 # Record the time range of this commit.
2349 # ### ISSUE: It's possible, though unlikely, that the time range
2350 # of a commit could get gradually expanded to be arbitrarily
2351 # longer than COMMIT_THRESHOLD. I'm not sure this is a huge
2352 # problem, and anyway deciding where to break it up would be a
2353 # judgement call. For now, we just print a warning in commit() if
2354 # this happens.
2355 if c_rev.timestamp < self.t_min:
2356 self.t_min = c_rev.timestamp
2357 if c_rev.timestamp > self.t_max:
2358 self.t_max = c_rev.timestamp
2360 if c_rev.op == OP_DELETE:
2361 self.deletes.append(c_rev)
2362 else:
2363 # OP_CHANGE or OP_ADD
2364 self.changes.append(c_rev)
2366 self.files[c_rev.fname] = 1
2368 def _pre_commit(self):
2369 """Generates any SVNCommits that must exist before the main commit."""
2371 # There may be multiple c_revs in this commit that would cause
2372 # branch B to be filled, but we only want to fill B once. On the
2373 # other hand, there might be multiple branches committed on in
2374 # this commit. Whatever the case, we should count exactly one
2375 # commit per branch, because we only fill a branch once per
2376 # CVSCommit. This list tracks which branches we've already
2377 # counted.
2378 accounted_for_sym_names = [ ]
2380 def fill_needed(c_rev, pm):
2381 """Return 1 if this is the first commit on a new branch (for
2382 this file) and we need to fill the branch; else return 0
2383 (meaning that some other file's first commit on the branch has
2384 already done the fill for us).
2386 If C_REV.op is OP_ADD, only return 1 if the branch that this
2387 commit is on has no last filled revision.
2389 PM is a PersistenceManager to query."""
2391 # Different '.' counts indicate that c_rev is now on a different
2392 # line of development (and may need a fill)
2393 if c_rev.rev.count('.') != c_rev.prev_rev.count('.'):
2394 svn_revnum = pm.get_svn_revnum(c_rev.unique_key(c_rev.prev_rev))
2395 # It should be the case that when we have a file F that
2396 # is added on branch B (thus, F on trunk is in state
2397 # 'dead'), we generate an SVNCommit to fill B iff the branch
2398 # has never been filled before.
2400 # If this c_rev.op == OP_ADD, *and* the branch has never
2401 # been filled before, then fill it now. Otherwise, no need to
2402 # fill it.
2403 if c_rev.op == OP_ADD:
2404 if pm.last_filled.get(c_rev.branch_name, None) is None:
2405 return 1
2406 elif c_rev.op == OP_CHANGE:
2407 if svn_revnum > pm.last_filled.get(c_rev.branch_name, 0):
2408 return 1
2409 elif c_rev.op == OP_DELETE:
2410 if pm.last_filled.get(c_rev.branch_name, None) is None:
2411 return 1
2412 return 0
2414 for c_rev in self.changes + self.deletes:
2415 # If a commit is on a branch, we must ensure that the branch
2416 # path being committed exists (in HEAD of the Subversion
2417 # repository). If it doesn't exist, we will need to fill the
2418 # branch. After the fill, the path on which we're committing
2419 # will exist.
2420 if c_rev.branch_name \
2421 and c_rev.branch_name not in accounted_for_sym_names \
2422 and c_rev.branch_name not in self.done_symbols \
2423 and fill_needed(c_rev, Ctx()._persistence_manager):
2424 svn_commit = SVNCommit("pre-commit symbolic name '%s'"
2425 % c_rev.branch_name)
2426 svn_commit.set_symbolic_name(c_rev.branch_name)
2427 self.secondary_commits.append(svn_commit)
2428 accounted_for_sym_names.append(c_rev.branch_name)
2430 def _commit(self):
2431 """Generates the primary SVNCommit that corresponds to this
2432 CVSCommit."""
2434 # Generate an SVNCommit unconditionally. Even if the only change
2435 # in this CVSCommit is a deletion of an already-deleted file (that
2436 # is, a CVS revision in state 'dead' whose predecessor was also in
2437 # state 'dead'), the conversion will still generate a Subversion
2438 # revision containing the log message for the second dead
2439 # revision, because we don't want to lose that information.
2440 svn_commit = SVNCommit("commit")
2441 self.motivating_commit = svn_commit
2443 for c_rev in self.changes:
2444 svn_commit.add_revision(c_rev)
2445 # Only make a change if we need to. When 1.1.1.1 has an empty
2446 # deltatext, the explanation is almost always that we're looking
2447 # at an imported file whose 1.1 and 1.1.1.1 are identical. On
2448 # such imports, CVS creates an RCS file where 1.1 has the
2449 # content, and 1.1.1.1 has an empty deltatext, i.e, the same
2450 # content as 1.1. There's no reason to reflect this non-change
2451 # in the repository, so we want to do nothing in this case. (If
2452 # we were really paranoid, we could make sure 1.1's log message
2453 # is the CVS-generated "Initial revision\n", but I think the
2454 # conditions below are strict enough.)
2455 if not ((c_rev.deltatext_code == DELTATEXT_EMPTY)
2456 and (c_rev.rev == "1.1.1.1")):
2457 if c_rev.is_default_branch_revision():
2458 self.default_branch_cvs_revisions.append(c_rev)
2460 for c_rev in self.deletes:
2461 # When a file is added on a branch, CVS not only adds the file
2462 # on the branch, but generates a trunk revision (typically
2463 # 1.1) for that file in state 'dead'. We only want to add
2464 # this revision if the log message is not the standard cvs
2465 # fabricated log message.
2466 if c_rev.prev_rev is None:
2467 # c_rev.branches may be empty if the originating branch
2468 # has been excluded.
2469 if not c_rev.branches:
2470 continue
2471 cvs_generated_msg = ('file %s was initially added on branch %s.\n'
2472 % (c_rev.filename(),
2473 c_rev.branches[0]))
2474 author, log_msg = \
2475 Ctx()._persistence_manager.svn_commit_metadata[c_rev.digest]
2476 if log_msg == cvs_generated_msg:
2477 continue
2479 svn_commit.add_revision(c_rev)
2480 if c_rev.is_default_branch_revision():
2481 self.default_branch_cvs_revisions.append(c_rev)
2483 # There is a slight chance that we didn't actually register any
2484 # CVSRevisions with our SVNCommit (see loop over self.deletes
2485 # above), so if we have no CVSRevisions, we don't flush the
2486 # svn_commit to disk and roll back our revnum.
2487 if len(svn_commit.cvs_revs) > 0:
2488 svn_commit.flush()
2489 else:
2490 # We will not be flushing this SVNCommit, so rollback the
2491 # SVNCommit revision counter.
2492 SVNCommit.revnum -= 1
2494 if not Ctx().trunk_only:
2495 for c_rev in self.revisions():
2496 Ctx()._symbolings_logger.log_revision(c_rev, svn_commit.revnum)
2498 def _post_commit(self):
2499 """Generates any SVNCommits that we can perform now that _commit
2500 has happened. That is, handle non-trunk default branches.
2501 Sometimes an RCS file has a non-trunk default branch, so a commit
2502 on that default branch would be visible in a default CVS checkout
2503 of HEAD. If we don't copy that commit over to Subversion's trunk,
2504 then there will be no Subversion tree which corresponds to that
2505 CVS checkout. Of course, in order to copy the path over, we may
2506 first need to delete the existing trunk there."""
2508 # Only generate a commit if we have default branch revs
2509 if len(self.default_branch_cvs_revisions):
2510 # Generate an SVNCommit for all of our default branch c_revs.
2511 svn_commit = SVNCommit("post-commit default branch(es)")
2512 svn_commit.set_motivating_revnum(self.motivating_commit.revnum)
2513 for c_rev in self.default_branch_cvs_revisions:
2514 svn_commit.add_revision(c_rev)
2515 Ctx()._symbolings_logger.log_default_branch_closing(c_rev,
2516 svn_commit.revnum)
2517 self.secondary_commits.append(svn_commit)
2519 def process_revisions(self, done_symbols):
2520 """Process all the CVSRevisions that this instance has, creating
2521 one or more SVNCommits in the process. Generate fill SVNCommits
2522 only for symbols not in DONE_SYMBOLS (avoids unnecessary
2523 fills).
2525 Return the primary SVNCommit that corresponds to this CVSCommit.
2526 The returned SVNCommit is the commit that motivated any other
2527 SVNCommits generated in this CVSCommit."""
2529 self.done_symbols = done_symbols
2530 seconds = self.t_max - self.t_min + 1
2532 Log().write(LOG_VERBOSE, '-' * 60)
2533 Log().write(LOG_VERBOSE, 'CVS Revision grouping:')
2534 if seconds == 1:
2535 Log().write(LOG_VERBOSE, ' Start time: %s (duration: 1 second)'
2536 % time.ctime(self.t_max))
2537 else:
2538 Log().write(LOG_VERBOSE, ' Start time: %s' % time.ctime(self.t_min))
2539 Log().write(LOG_VERBOSE, ' End time: %s (duration: %d seconds)'
2540 % (time.ctime(self.t_max), seconds))
2542 if seconds > COMMIT_THRESHOLD + 1:
2543 Log().write(LOG_WARN, '%s: grouping spans more than %d seconds'
2544 % (warning_prefix, COMMIT_THRESHOLD))
2546 if Ctx().trunk_only: # Only do the primary commit if we're trunk-only
2547 self._commit()
2548 return self.motivating_commit
2550 self._pre_commit()
2551 self._commit()
2552 self._post_commit()
2554 for svn_commit in self.secondary_commits:
2555 svn_commit.set_date(self.motivating_commit.get_date())
2556 svn_commit.flush()
2558 return self.motivating_commit
2561 class SVNCommit:
2562 """This represents one commit to the Subversion Repository. There
2563 are three types of SVNCommits:
2565 1. Commits one or more CVSRevisions (cannot fill a symbolic name).
2567 2. Creates or fills a symbolic name (cannot commit CVSRevisions).
2569 3. Updates trunk to reflect the contents of a particular branch
2570 (this is to handle RCS default branches)."""
2572 # The revision number to assign to the next new SVNCommit.
2573 # We start at 2 because SVNRepositoryMirror uses the first commit
2574 # to create trunk, tags, and branches.
2575 revnum = 2
2577 class SVNCommitInternalInconsistencyError(Exception):
2578 """Exception raised if we encounter an impossible state in the
2579 SVNCommit Databases."""
2581 pass
2583 def __init__(self, description="", revnum=None, cvs_revs=None):
2584 """Instantiate an SVNCommit. DESCRIPTION is for debugging only.
2585 If REVNUM, the SVNCommit will correspond to that revision number;
2586 and if CVS_REVS, then they must be the exact set of CVSRevisions for
2587 REVNUM.
2589 It is an error to pass CVS_REVS without REVNUM, but you may pass
2590 REVNUM without CVS_REVS, and then add a revision at a time by
2591 invoking add_revision()."""
2593 self._description = description
2595 # Revprop metadata for this commit.
2597 # These initial values are placeholders. At least the log and the
2598 # date should be different by the time these are used.
2600 # They are private because their values should be returned encoded
2601 # in UTF8, but callers aren't required to set them in UTF8.
2602 # Therefore, accessor methods are used to set them, and
2603 # self.get_revprops() is used to to get them, in dictionary form.
2604 self._author = Ctx().username
2605 self._log_msg = "This log message means an SVNCommit was used too soon."
2606 self._max_date = 0 # Latest date seen so far.
2608 self.cvs_revs = cvs_revs or []
2609 if revnum:
2610 self.revnum = revnum
2611 else:
2612 self.revnum = SVNCommit.revnum
2613 SVNCommit.revnum += 1
2615 # The (uncleaned) symbolic name that is filled in this SVNCommit, if any.
2616 self.symbolic_name = None
2618 # If this commit is a default branch synchronization, this
2619 # variable represents the subversion revision number of the
2620 # *primary* commit where the default branch changes actually
2621 # happened. It is None otherwise.
2623 # It is possible for multiple synchronization commits to refer to
2624 # the same motivating commit revision number, and it is possible
2625 # for a single synchronization commit to contain CVSRevisions on
2626 # multiple different default branches.
2627 self.motivating_revnum = None
2629 # is_tag is true only if this commit is a fill of a symbolic name
2630 # that is a tag, None in all other cases.
2631 self.is_tag = None
2633 def set_symbolic_name(self, symbolic_name):
2634 """Set self.symbolic_name to SYMBOLIC_NAME."""
2636 self.symbolic_name = symbolic_name
2638 def set_motivating_revnum(self, revnum):
2639 """Set self.motivating_revnum to REVNUM."""
2641 self.motivating_revnum = revnum
2643 def set_author(self, author):
2644 """Set this SVNCommit's author to AUTHOR (a locally-encoded string).
2645 This is the only way to set an SVNCommit's author."""
2647 self._author = author
2649 def set_log_msg(self, msg):
2650 """Set this SVNCommit's log message to MSG (a locally-encoded string).
2651 This is the only way to set an SVNCommit's log message."""
2653 self._log_msg = msg
2655 def set_date(self, date):
2656 """Set this SVNCommit's date to DATE (an integer).
2657 Note that self.add_revision() updates this automatically based on
2658 a CVSRevision; so you may not need to call this at all, and even
2659 if you do, the value may be overwritten by a later call to
2660 self.add_revision()."""
2662 self._max_date = date
2664 def get_date(self):
2665 """Returns this SVNCommit's date as an integer."""
2667 return self._max_date
2669 def get_revprops(self):
2670 """Return the Subversion revprops for this SVNCommit."""
2672 date = format_date(self._max_date)
2673 try:
2674 utf8_author = None
2675 if self._author is not None:
2676 utf8_author = to_utf8(self._author)
2677 utf8_log = to_utf8(self.get_log_msg())
2678 return { 'svn:author' : utf8_author,
2679 'svn:log' : utf8_log,
2680 'svn:date' : date }
2681 except UnicodeError:
2682 Log().write(LOG_WARN, '%s: problem encoding author or log message:'
2683 % warning_prefix)
2684 Log().write(LOG_WARN, " author: '%s'" % self._author)
2685 Log().write(LOG_WARN, " log: '%s'" % self.get_log_msg().rstrip())
2686 Log().write(LOG_WARN, " date: '%s'" % date)
2687 Log().write(LOG_WARN,
2688 "(subversion rev %s) Related files:" % self.revnum)
2689 for c_rev in self.cvs_revs:
2690 Log().write(LOG_WARN, " ", c_rev.fname)
2692 Log().write(LOG_WARN, "Consider rerunning with one or more ",
2693 "'--encoding' parameters.\n")
2694 # It's better to fall back to the original (unknown encoding) data
2695 # than to either 1) quit or 2) record nothing at all.
2696 return { 'svn:author' : self._author,
2697 'svn:log' : self.get_log_msg(),
2698 'svn:date' : date }
2700 def add_revision(self, cvs_rev):
2701 self.cvs_revs.append(cvs_rev)
2702 if cvs_rev.timestamp > self._max_date:
2703 self._max_date = cvs_rev.timestamp
2705 def flush(self):
2706 Log().write(LOG_NORMAL, "Creating Subversion r%d (%s)"
2707 % (self.revnum, self._description))
2708 Ctx()._persistence_manager.put_svn_commit(self.revnum,
2709 self.cvs_revs,
2710 self._max_date,
2711 self.symbolic_name,
2712 self.motivating_revnum)
2714 def __str__(self):
2715 """ Print a human-readable description of this SVNCommit. This
2716 description is not intended to be machine-parseable (although
2717 we're not going to stop you if you try!)"""
2719 ret = "SVNCommit #: " + str(self.revnum) + "\n"
2720 if self.symbolic_name:
2721 ret += (" symbolic name: " + _clean_symbolic_name(self.symbolic_name)
2722 + "\n")
2723 else:
2724 ret += " NO symbolic name\n"
2725 ret += " debug description: " + self._description + "\n"
2726 ret += " cvs_revs:\n"
2727 for c_rev in self.cvs_revs:
2728 ret += " " + c_rev.unique_key() + "\n"
2729 return ret
2731 def get_log_msg(self):
2732 """Returns the actual log message for a primary commit, and the
2733 appropriate manufactured log message for a secondary commit."""
2735 if self.symbolic_name is not None:
2736 return self._log_msg_for_symbolic_name_commit()
2737 elif self.motivating_revnum is not None:
2738 return self._log_msg_for_default_branch_commit()
2739 else:
2740 return self._log_msg
2742 def _log_msg_for_symbolic_name_commit(self):
2743 """Creates a log message for a manufactured commit that fills
2744 self.symbolic_name. If self.is_tag is true, write the log message
2745 as though for a tag, else write it as though for a branch."""
2747 type = 'branch'
2748 if self.is_tag:
2749 type = 'tag'
2751 # In Python 2.2.3, we could use textwrap.fill(). Oh well :-).
2752 space_or_newline = ' '
2753 cleaned_symbolic_name = _clean_symbolic_name(self.symbolic_name)
2754 if len(cleaned_symbolic_name) >= 13:
2755 space_or_newline = '\n'
2757 return "This commit was manufactured by cvs2svn to create %s%s'%s'." \
2758 % (type, space_or_newline, cleaned_symbolic_name)
2760 def _log_msg_for_default_branch_commit(self):
2761 """Creates a log message for a manufactured commit that
2762 synchronizes a non-trunk default branch with trunk."""
2764 msg = 'This commit was generated by cvs2svn to compensate for ' \
2765 'changes in r%d,\n' \
2766 'which included commits to RCS files with non-trunk default ' \
2767 'branches.\n' % self.motivating_revnum
2768 return msg
2771 class CVSRevisionAggregator:
2772 """This class groups CVSRevisions into CVSCommits that represent
2773 at least one SVNCommit."""
2775 def __init__(self):
2776 self.metadata_db = Database(temp(METADATA_DB), DB_OPEN_READ)
2777 if not Ctx().trunk_only:
2778 self.last_revs_db = Database(temp(SYMBOL_LAST_CVS_REVS_DB),
2779 DB_OPEN_READ)
2781 # A map { key : CVSCommit } of CVS commits currently being
2782 # accumulated. If the CVSCommit is still open to further
2783 # CVSRevisions, then key is CVSRevision.digest. If not (because
2784 # an inbound commit wanted to affect a file that was already
2785 # within the CVSCommit), then key is CVSRevision.digest plus some
2786 # number of appended '-'.
2787 self.cvs_commits = {}
2789 # List of ready commits.
2790 self.ready_queue = [ ]
2792 # A map { symbol : None } of symbolic names for which the last
2793 # source CVSRevision has already been processed but which haven't
2794 # been closed yet.
2795 self.pending_symbols = {}
2797 # A list of closed symbols. That is, we've already encountered
2798 # the last CVSRevision that is a source for that symbol, the final
2799 # fill for this symbol has been done, and we never need to fill it
2800 # again.
2801 self.done_symbols = [ ]
2803 # This variable holds the most recently created primary svn_commit
2804 # object. CVSRevisionAggregator maintains this variable merely
2805 # for its date, so that it can set dates for the SVNCommits
2806 # created in self._attempt_to_commit_symbols().
2807 self.latest_primary_svn_commit = None
2809 Ctx()._symbolings_logger = SymbolingsLogger()
2810 Ctx()._persistence_manager = PersistenceManager(DB_OPEN_NEW)
2811 Ctx()._default_branches_db = SDatabase(temp(DEFAULT_BRANCHES_DB),
2812 DB_OPEN_READ)
2814 def _extract_ready_commits(self, timestamp):
2815 """Extract and return any active commits that expire by TIMESTAMP."""
2817 for digest_key, cvs_commit in self.cvs_commits.items():
2818 if cvs_commit.t_max + COMMIT_THRESHOLD < timestamp:
2819 self.ready_queue.append(cvs_commit)
2820 del self.cvs_commits[digest_key]
2822 def _commit_ready_commits(self):
2823 """Sort the commits from self.ready_queue by time, then process them."""
2825 self.ready_queue.sort()
2826 while self.ready_queue:
2827 cvs_commit = self.ready_queue[0]
2828 del self.ready_queue[0]
2829 self.latest_primary_svn_commit = \
2830 cvs_commit.process_revisions(self.done_symbols)
2831 self._attempt_to_commit_symbols()
2833 def process_revision(self, c_rev):
2834 # Each time we read a new line, scan the accumulating commits to
2835 # see if any are ready for processing.
2836 self._extract_ready_commits(c_rev.timestamp)
2838 for digest_key, cvs_commit in self.cvs_commits.items():
2839 # If the inbound commit is on the same file as a pending commit,
2840 # close the pending commit to further changes. Don't flush it though,
2841 # as there may be other pending commits dated before this one.
2842 # ### ISSUE: the has_file() check below is not optimal.
2843 # It does fix the dataloss bug where revisions would get lost
2844 # if checked in too quickly, but it can also break apart the
2845 # commits. The correct fix would require tracking the dependencies
2846 # between change sets and committing them in proper order.
2847 if cvs_commit.has_file(c_rev.fname):
2848 unused_id = digest_key + '-'
2849 # Find a string that does is not already a key in
2850 # the self.cvs_commits dict
2851 while self.cvs_commits.has_key(unused_id):
2852 unused_id += '-'
2853 self.cvs_commits[unused_id] = cvs_commit
2854 del self.cvs_commits[digest_key]
2856 # Add this item into the set of still-available commits.
2857 if self.cvs_commits.has_key(c_rev.digest):
2858 cvs_commit = self.cvs_commits[c_rev.digest]
2859 else:
2860 author, log = self.metadata_db[c_rev.digest]
2861 cvs_commit = CVSCommit(c_rev.digest, author, log)
2862 self.cvs_commits[c_rev.digest] = cvs_commit
2863 cvs_commit.add_revision(c_rev)
2865 # Any elements in self.ready_queue at this point need to be
2866 # processed, because this latest rev couldn't possibly be part of
2867 # any of them.
2868 self._commit_ready_commits()
2870 self._add_pending_symbols(c_rev)
2872 def flush(self):
2873 """Commit anything left in self.cvs_commits. Then inform the
2874 SymbolingsLogger that all commits are done."""
2876 self._extract_ready_commits(1L<<32)
2877 self._commit_ready_commits()
2879 if not Ctx().trunk_only:
2880 Ctx()._symbolings_logger.close()
2882 def _add_pending_symbols(self, c_rev):
2883 """Add to self.pending_symbols any symbols from C_REV for which
2884 C_REV is the last CVSRevision.
2886 If we're not doing a trunk-only conversion, get the symbolic names
2887 that this c_rev is the last *source* CVSRevision for and add them
2888 to those left over from previous passes through the aggregator."""
2890 if not Ctx().trunk_only:
2891 for sym in self.last_revs_db.get(c_rev.unique_key(), []):
2892 self.pending_symbols[sym] = None
2894 def _attempt_to_commit_symbols(self):
2895 """Generate one SVNCommit for each symbol in self.pending_symbols
2896 that doesn't have an opening CVSRevision in either self.ready_queue
2897 or self.cvs_commits.values()."""
2899 # Make a list of all symbols from self.pending_symbols that do not
2900 # have *source* CVSRevisions in the pending commit queues
2901 # (self.cvs_commits or self.ready_queue):
2902 closeable_symbols = []
2903 pending_commits = self.cvs_commits.values() + self.ready_queue
2904 for sym in self.pending_symbols:
2905 for cvs_commit in pending_commits:
2906 if cvs_commit.opens_symbolic_name(sym):
2907 break
2908 else:
2909 closeable_symbols.append(sym)
2911 # Sort the closeable symbols so that we will always process the
2912 # symbols in the same order, regardless of the order in which the
2913 # dict hashing algorithm hands them back to us. We do this so
2914 # that our tests will get the same results on all platforms.
2915 closeable_symbols.sort()
2916 for sym in closeable_symbols:
2917 svn_commit = SVNCommit("closing tag/branch '%s'" % sym)
2918 svn_commit.set_symbolic_name(sym)
2919 svn_commit.set_date(self.latest_primary_svn_commit.get_date())
2920 svn_commit.flush()
2921 self.done_symbols.append(sym)
2922 del self.pending_symbols[sym]
2925 class SymbolingsReader:
2926 """Provides an interface to the SYMBOL_OPENINGS_CLOSINGS_SORTED file
2927 and the SYMBOL_OFFSETS_DB. Does the heavy lifting of finding and
2928 returning the correct opening and closing Subversion revision
2929 numbers for a given symbolic name."""
2931 def __init__(self):
2932 """Opens the SYMBOL_OPENINGS_CLOSINGS_SORTED for reading, and
2933 reads the offsets database into memory."""
2935 self.symbolings = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
2936 # The offsets_db is really small, and we need to read and write
2937 # from it a fair bit, so suck it into memory
2938 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_READ)
2939 self.offsets = { }
2940 for key in offsets_db:
2941 #print " ZOO:", key, offsets_db[key]
2942 self.offsets[key] = offsets_db[key]
2944 def filling_guide_for_symbol(self, symbolic_name, svn_revnum):
2945 """Given SYMBOLIC_NAME and SVN_REVNUM, return a new
2946 SymbolicNameFillingGuide object.
2948 Note that if we encounter an opening rev in this fill, but the
2949 corresponding closing rev takes place later than SVN_REVNUM, the
2950 closing will not be passed to SymbolicNameFillingGuide in this
2951 fill (and will be discarded when encountered in a later fill).
2952 This is perfectly fine, because we can still do a valid fill
2953 without the closing--we always try to fill what we can as soon as
2954 we can."""
2956 openings_closings_map = OpeningsClosingsMap(symbolic_name)
2958 # It's possible to have a branch start with a file that was added
2959 # on a branch
2960 if self.offsets.has_key(symbolic_name):
2961 # set our read offset for self.symbolings to the offset for
2962 # symbolic_name
2963 self.symbolings.seek(self.offsets[symbolic_name])
2965 while 1:
2966 fpos = self.symbolings.tell()
2967 line = self.symbolings.readline().rstrip()
2968 if not line:
2969 break
2970 name, revnum, type, branch_name, cvs_path = line.split(" ", 4)
2971 if branch_name == '*':
2972 svn_path = Ctx().project.make_trunk_path(cvs_path)
2973 else:
2974 svn_path = Ctx().project.make_branch_path(branch_name, cvs_path)
2975 revnum = int(revnum)
2976 if revnum > svn_revnum or name != symbolic_name:
2977 break
2978 openings_closings_map.register(svn_path, revnum, type)
2980 # get current offset of the read marker and set it to the offset
2981 # for the beginning of the line we just read if we used anything
2982 # we read.
2983 if not openings_closings_map.is_empty():
2984 self.offsets[symbolic_name] = fpos
2986 return SymbolicNameFillingGuide(openings_closings_map)
2989 class SvnRevisionRange:
2990 """The range of subversion revision numbers from which a path can be
2991 copied. self.opening_revnum is the number of the earliest such
2992 revision, and self.closing_revnum is one higher than the number of
2993 the last such revision. If self.closing_revnum is None, then no
2994 closings were registered."""
2996 def __init__(self, opening_revnum):
2997 self.opening_revnum = opening_revnum
2998 self.closing_revnum = None
3000 def add_closing(self, closing_revnum):
3001 # When we have a non-trunk default branch, we may have multiple
3002 # closings--only register the first closing we encounter.
3003 if self.closing_revnum is None:
3004 self.closing_revnum = closing_revnum
3006 def __str__(self):
3007 if self.closing_revnum is None:
3008 return '[%d:]' % (self.opening_revnum,)
3009 else:
3010 return '[%d:%d]' % (self.opening_revnum, self.closing_revnum,)
3013 class OpeningsClosingsMap:
3014 """A dictionary of openings and closings for a symbolic name in the
3015 current SVNCommit.
3017 The user should call self.register() for the openings and closings,
3018 then self.get_node_tree() to retrieve the information as a
3019 SymbolicNameFillingGuide."""
3021 def __init__(self, symbolic_name):
3022 """Initialize OpeningsClosingsMap and prepare it for receiving
3023 openings and closings."""
3025 self.name = symbolic_name
3027 # A dictionary of SVN_PATHS to SvnRevisionRange objects.
3028 self.things = { }
3030 def register(self, svn_path, svn_revnum, type):
3031 """Register an opening or closing revision for this symbolic name.
3032 SVN_PATH is the source path that needs to be copied into
3033 self.symbolic_name, and SVN_REVNUM is either the first svn
3034 revision number that we can copy from (our opening), or the last
3035 (not inclusive) svn revision number that we can copy from (our
3036 closing). TYPE indicates whether this path is an opening or a a
3037 closing.
3039 The opening for a given SVN_PATH must be passed before the closing
3040 for it to have any effect... any closing encountered before a
3041 corresponding opening will be discarded.
3043 It is not necessary to pass a corresponding closing for every
3044 opening."""
3046 # Always log an OPENING
3047 if type == OPENING:
3048 self.things[svn_path] = SvnRevisionRange(svn_revnum)
3049 # Only log a closing if we've already registered the opening for that
3050 # path.
3051 elif type == CLOSING and self.things.has_key(svn_path):
3052 self.things[svn_path].add_closing(svn_revnum)
3054 def is_empty(self):
3055 """Return true if we haven't accumulated any openings or closings,
3056 false otherwise."""
3058 return not len(self.things)
3060 def get_things(self):
3061 """Return a list of (svn_path, SvnRevisionRange) tuples for all
3062 svn_paths with registered openings or closings."""
3064 return self.things.items()
3067 class SymbolicNameFillingGuide:
3068 """A node tree representing the source paths to be copied to fill
3069 self.symbolic_name in the current SVNCommit.
3071 self._node_tree is the root of the directory tree, in the form {
3072 path_component : subnode }. Leaf nodes are instances of
3073 SvnRevisionRange. Intermediate (directory) nodes are dictionaries
3074 mapping relative names to subnodes.
3076 By walking self._node_tree and calling self.get_best_revnum() on
3077 each node, the caller can determine what subversion revision number
3078 to copy the path corresponding to that node from. self._node_tree
3079 should be treated as read-only.
3081 The caller can then descend to sub-nodes to see if their "best
3082 revnum" differs from their parents' and if it does, take appropriate
3083 actions to "patch up" the subtrees."""
3085 def __init__(self, openings_closings_map):
3086 """Initializes a SymbolicNameFillingGuide for SYMBOLIC_NAME and
3087 store into it the openings and closings from
3088 OPENINGS_CLOSINGS_MAP."""
3090 self.name = openings_closings_map.name
3092 # The dictionary that holds our node tree as a map { node_key :
3093 # node }.
3094 self._node_tree = { }
3096 for svn_path, svn_revision_range in openings_closings_map.get_things():
3097 (head, tail) = _path_split(svn_path)
3098 self._get_node_for_path(head)[tail] = svn_revision_range
3100 #self.print_node_tree(self._node_tree)
3102 def _get_node_for_path(self, svn_path):
3103 """Return the node key for svn_path, creating new nodes as needed."""
3105 # Walk down the path, one node at a time.
3106 node = self._node_tree
3107 for component in svn_path.split('/'):
3108 if node.has_key(component):
3109 node = node[component]
3110 else:
3111 old_node = node
3112 node = {}
3113 old_node[component] = node
3115 return node
3117 def get_best_revnum(self, node, preferred_revnum):
3118 """Determine the best subversion revision number to use when
3119 copying the source tree beginning at NODE. Returns a
3120 subversion revision number.
3122 PREFERRED_REVNUM is passed to best_rev and used to calculate the
3123 best_revnum."""
3125 def score_revisions(svn_revision_ranges):
3126 """Return a list of revisions and scores based on
3127 SVN_REVISION_RANGES. The returned list looks like:
3129 [(REV1 SCORE1), (REV2 SCORE2), ...]
3131 where the tuples are sorted by revision number.
3132 SVN_REVISION_RANGES is a list of SvnRevisionRange objects.
3134 For each svn revision that appears as either an opening_revnum
3135 or closing_revnum for one of the svn_revision_ranges, output a
3136 tuple indicating how many of the SvnRevisionRanges include that
3137 svn_revision in its range. A score thus indicates that copying
3138 the corresponding revision (or any following revision up to the
3139 next revision in the list) of the object in question would yield
3140 that many correct paths at or underneath the object. There may
3141 be other paths underneath it which are not correct and would
3142 need to be deleted or recopied; those can only be detected by
3143 descending and examining their scores.
3145 If OPENINGS is empty, return the empty list."""
3147 openings = [ x.opening_revnum
3148 for x in svn_revision_ranges ]
3149 closings = [ x.closing_revnum
3150 for x in svn_revision_ranges
3151 if x.closing_revnum is not None ]
3153 # First look for easy out.
3154 if not openings:
3155 return []
3157 # Create a list with both openings (which increment the total)
3158 # and closings (which decrement the total):
3159 things = [(rev,1) for rev in openings] + [(rev,-1) for rev in closings]
3160 # Sort by revision number:
3161 things.sort()
3162 # Initialize output list with zeroth element of things. This
3163 # element must exist, because it was already verified that
3164 # openings is not empty.
3165 scores = [ things[0] ]
3166 total = scores[-1][1]
3167 for (rev, change) in things[1:]:
3168 total += change
3169 if rev == scores[-1][0]:
3170 # Same revision as last entry; modify last entry:
3171 scores[-1] = (rev, total)
3172 else:
3173 # Previously-unseen revision; create new entry:
3174 scores.append((rev, total))
3175 return scores
3177 def best_rev(scores, preferred_rev):
3178 """Return the revision with the highest score from SCORES, a list
3179 returned by score_revisions(). When the maximum score is shared
3180 by multiple revisions, the oldest revision is selected, unless
3181 PREFERRED_REV is one of the possibilities, in which case, it is
3182 selected."""
3184 max_score = 0
3185 preferred_rev_score = -1
3186 rev = SVN_INVALID_REVNUM
3187 if preferred_rev is None:
3188 # Comparison order of different types is arbitrary. Do not
3189 # expect None to compare less than int values below.
3190 preferred_rev = SVN_INVALID_REVNUM
3191 for revnum, count in scores:
3192 if count > max_score:
3193 max_score = count
3194 rev = revnum
3195 if revnum <= preferred_rev:
3196 preferred_rev_score = count
3197 if preferred_rev_score == max_score:
3198 rev = preferred_rev
3199 return rev, max_score
3201 # Aggregate openings and closings from the rev tree
3202 svn_revision_ranges = self._list_revnums(node)
3204 # Score the lists
3205 scores = score_revisions(svn_revision_ranges)
3207 revnum, max_score = best_rev(scores, preferred_revnum)
3209 if revnum == SVN_INVALID_REVNUM:
3210 raise FatalError("failed to find a revision "
3211 + "to copy from when copying %s" % name)
3212 return revnum, max_score
3214 def _list_revnums(self, node):
3215 """Return a list of all the SvnRevisionRanges (including
3216 duplicates) for all leaf nodes at and under NODE."""
3218 if isinstance(node, SvnRevisionRange):
3219 # It is a leaf node.
3220 return [ node ]
3221 else:
3222 # It is an intermediate node.
3223 revnums = []
3224 for key, subnode in node.items():
3225 revnums.extend(self._list_revnums(subnode))
3226 return revnums
3228 def get_sources(self):
3229 """Return the list of sources for this symbolic name.
3231 The Project instance defines what are legitimate sources. Raise
3232 an exception if a change occurred outside of the source
3233 directories."""
3235 return self._get_sub_sources('', self._node_tree)
3237 def _get_sub_sources(self, start_svn_path, start_node):
3238 """Return the list of sources for this symbolic name, starting the
3239 search at path START_SVN_PATH, which is node START_NODE. This is
3240 a helper method, called by get_sources() (see)."""
3242 project = Ctx().project
3243 if isinstance(start_node, SvnRevisionRange):
3244 # This implies that a change was found outside of the
3245 # legitimate sources. This should never happen.
3246 raise
3247 elif project.is_source(start_svn_path):
3248 # This is a legitimate source. Add it to list.
3249 return [ FillSource(start_svn_path, start_node) ]
3250 else:
3251 # This is a directory that is not a legitimate source. (That's
3252 # OK because it hasn't changed directly.) But directories
3253 # within it have been changed, so we need to search recursively
3254 # to find their enclosing sources.
3255 sources = []
3256 for entry, node in start_node.items():
3257 svn_path = _path_join(start_svn_path, entry)
3258 sources.extend(self._get_sub_sources(svn_path, node))
3260 return sources
3262 def print_node_tree(self, node, name='/', indent_depth=0):
3263 """For debugging purposes. Prints all nodes in TREE that are
3264 rooted at NODE. INDENT_DEPTH is used to indent the output of
3265 recursive calls."""
3267 if not indent_depth:
3268 print "TREE", "=" * 75
3269 if isinstance(node, SvnRevisionRange):
3270 print "TREE:", " " * (indent_depth * 2), name, node
3271 else:
3272 print "TREE:", " " * (indent_depth * 2), name
3273 for key, value in node.items():
3274 self.print_node_tree(value, key, (indent_depth + 1))
3277 class FillSource:
3278 """Representation of a fill source used by the symbol filler in
3279 SVNRepositoryMirror."""
3281 def __init__(self, prefix, node):
3282 """Create an unscored fill source with a prefix and a key."""
3284 self.prefix = prefix
3285 self.node = node
3286 self.score = None
3287 self.revnum = None
3289 def set_score(self, score, revnum):
3290 """Set the SCORE and REVNUM."""
3292 self.score = score
3293 self.revnum = revnum
3295 def __cmp__(self, other):
3296 """Comparison operator used to sort FillSources in descending
3297 score order."""
3299 if self.score is None or other.score is None:
3300 raise TypeError, 'Tried to compare unscored FillSource'
3301 return cmp(other.score, self.score)
3304 class SVNRepositoryMirror:
3305 """Mirror a Subversion Repository as it is constructed, one
3306 SVNCommit at a time. The mirror is skeletal; it does not contain
3307 file contents. The creation of a dumpfile or Subversion repository
3308 is handled by delegates. See self.add_delegate method for how to
3309 set delegates.
3311 The structure of the repository is kept in two databases and one
3312 hash. The revs_db database maps revisions to root node keys, and
3313 the nodes_db database maps node keys to nodes. A node is a hash
3314 from directory names to keys. Both the revs_db and the nodes_db are
3315 stored on disk and each access is expensive.
3317 The nodes_db database only has the keys for old revisions. The
3318 revision that is being contructed is kept in memory in the new_nodes
3319 hash which is cheap to access.
3321 You must invoke _start_commit between SVNCommits.
3323 *** WARNING *** All path arguments to methods in this class CANNOT
3324 have leading or trailing slashes."""
3326 class SVNRepositoryMirrorPathExistsError(Exception):
3327 """Exception raised if an attempt is made to add a path to the
3328 repository mirror and that path already exists in the youngest
3329 revision of the repository."""
3331 pass
3333 class SVNRepositoryMirrorUnexpectedOperationError(Exception):
3334 """Exception raised if a CVSRevision is found to have an unexpected
3335 operation (OP) value."""
3337 pass
3339 class SVNRepositoryMirrorInvalidFillOperationError(Exception):
3340 """Exception raised if an empty SymbolicNameFillingGuide is returned
3341 during a fill where the branch in question already exists."""
3343 pass
3345 def __init__(self):
3346 """Set up the SVNRepositoryMirror and prepare it for SVNCommits."""
3348 self.key_generator = KeyGenerator()
3350 self.delegates = [ ]
3352 # This corresponds to the 'revisions' table in a Subversion fs.
3353 self.revs_db = SDatabase(temp(SVN_MIRROR_REVISIONS_DB), DB_OPEN_NEW)
3354 Cleanup().register(temp(SVN_MIRROR_REVISIONS_DB), pass8)
3356 # This corresponds to the 'nodes' table in a Subversion fs. (We
3357 # don't need a 'representations' or 'strings' table because we
3358 # only track metadata, not file contents.)
3359 self.nodes_db = Database(temp(SVN_MIRROR_NODES_DB), DB_OPEN_NEW)
3360 Cleanup().register(temp(SVN_MIRROR_NODES_DB), pass8)
3362 # Start at revision 0 without a root node. It will be created
3363 # by _open_writable_root_node.
3364 self.youngest = 0
3365 self.new_root_key = None
3366 self.new_nodes = { }
3368 if not Ctx().trunk_only:
3369 ###PERF IMPT: Suck this into memory.
3370 self.tags_db = TagsDatabase(DB_OPEN_READ)
3371 self.symbolings_reader = SymbolingsReader()
3373 def _initialize_repository(self, date):
3374 """Initialize the repository by creating the directories for
3375 trunk, tags, and branches. This method should only be called
3376 after all delegates are added to the repository mirror."""
3378 # Make a 'fake' SVNCommit so we can take advantage of the revprops
3379 # magic therein
3380 svn_commit = SVNCommit("Initialization", 1)
3381 svn_commit.set_date(date)
3382 svn_commit.set_log_msg("New repository initialized by cvs2svn.")
3384 self._start_commit(svn_commit)
3385 self._mkdir(Ctx().project.trunk_path)
3386 if not Ctx().trunk_only:
3387 self._mkdir(Ctx().project.branches_path)
3388 self._mkdir(Ctx().project.tags_path)
3390 def _start_commit(self, svn_commit):
3391 """Start a new commit."""
3393 if self.youngest > 0:
3394 self._end_commit()
3396 self.youngest = svn_commit.revnum
3397 self.new_root_key = None
3398 self.new_nodes = { }
3400 self._invoke_delegates('start_commit', svn_commit)
3402 def _end_commit(self):
3403 """Called at the end of each commit. This method copies the newly
3404 created nodes to the on-disk nodes db."""
3406 if self.new_root_key is None:
3407 # No changes were made in this revision, so we make the root node
3408 # of the new revision be the same as the last one.
3409 self.revs_db[str(self.youngest)] = self.revs_db[str(self.youngest - 1)]
3410 else:
3411 self.revs_db[str(self.youngest)] = self.new_root_key
3412 # Copy the new nodes to the nodes_db
3413 for key, value in self.new_nodes.items():
3414 self.nodes_db[key] = value
3416 def _get_node(self, key):
3417 """Returns the node contents for KEY which may refer to either
3418 self.nodes_db or self.new_nodes."""
3420 if self.new_nodes.has_key(key):
3421 return self.new_nodes[key]
3422 else:
3423 return self.nodes_db[key]
3425 def _open_readonly_node(self, path, revnum):
3426 """Open a readonly node for PATH at revision REVNUM. Returns the
3427 node key and node contents if the path exists, else (None, None)."""
3429 # Get the root key
3430 if revnum == self.youngest:
3431 if self.new_root_key is None:
3432 node_key = self.revs_db[str(self.youngest - 1)]
3433 else:
3434 node_key = self.new_root_key
3435 else:
3436 node_key = self.revs_db[str(revnum)]
3438 for component in path.split('/'):
3439 node_contents = self._get_node(node_key)
3440 node_key = node_contents.get(component, None)
3441 if node_key is None:
3442 return None
3444 return node_key
3446 def _open_writable_root_node(self):
3447 """Open a writable root node. The current root node is returned
3448 immeditely if it is already writable. If not, create a new one by
3449 copying the contents of the root node of the previous version."""
3451 if self.new_root_key is not None:
3452 return self.new_root_key, self.new_nodes[self.new_root_key]
3454 if self.youngest < 2:
3455 new_contents = { }
3456 else:
3457 new_contents = self.nodes_db[self.revs_db[str(self.youngest - 1)]]
3458 self.new_root_key = self.key_generator.gen_key()
3459 self.new_nodes = { self.new_root_key: new_contents }
3461 return self.new_root_key, new_contents
3463 def _open_writable_node(self, svn_path, create):
3464 """Open a writable node for the path SVN_PATH, creating SVN_PATH
3465 and any missing directories if CREATE is True."""
3467 parent_key, parent_contents = self._open_writable_root_node()
3469 # Walk up the path, one node at a time.
3470 path_so_far = None
3471 components = svn_path.split('/')
3472 for i in range(len(components)):
3473 component = components[i]
3474 path_so_far = _path_join(path_so_far, component)
3475 this_key = parent_contents.get(component, None)
3476 if this_key is not None:
3477 # The component exists.
3478 this_contents = self.new_nodes.get(this_key, None)
3479 if this_contents is None:
3480 # Suck the node from the nodes_db, but update the key
3481 this_contents = self.nodes_db[this_key]
3482 this_key = self.key_generator.gen_key()
3483 self.new_nodes[this_key] = this_contents
3484 parent_contents[component] = this_key
3485 elif create:
3486 # The component does not exists, so we create it.
3487 this_contents = { }
3488 this_key = self.key_generator.gen_key()
3489 self.new_nodes[this_key] = this_contents
3490 parent_contents[component] = this_key
3491 if i < len(components) - 1:
3492 self._invoke_delegates('mkdir', path_so_far)
3493 else:
3494 # The component does not exists and we are not instructed to
3495 # create it, so we give up.
3496 return None, None
3498 parent_key = this_key
3499 parent_contents = this_contents
3501 return this_key, this_contents
3503 def _path_exists(self, path):
3504 """If PATH exists in self.youngest of the svn repository mirror,
3505 return true, else return None.
3507 PATH must not start with '/'."""
3509 return self._open_readonly_node(path, self.youngest) is not None
3511 def _fast_delete_path(self, parent_path, parent_contents, component):
3512 """Delete COMPONENT from the parent direcory PARENT_PATH with the
3513 contents PARENT_CONTENTS. Do nothing if COMPONENT does not exist
3514 in PARENT_CONTENTS."""
3516 if parent_contents.has_key(component):
3517 del parent_contents[component]
3518 self._invoke_delegates('delete_path',
3519 _path_join(parent_path, component))
3521 def _delete_path(self, svn_path, should_prune=False):
3522 """Delete PATH from the tree. If SHOULD_PRUNE is true, then delete
3523 all ancestor directories that are made empty when SVN_PATH is deleted.
3524 In other words, SHOULD_PRUNE is like the -P option to 'cvs checkout'.
3526 NOTE: This function ignores requests to delete the root directory
3527 or any directory for which Ctx().project.is_unremovable() returns
3528 True, either directly or by pruning."""
3530 if svn_path == '' or Ctx().project.is_unremovable(svn_path):
3531 return
3533 (parent_path, entry,) = _path_split(svn_path)
3534 if parent_path:
3535 parent_key, parent_contents = \
3536 self._open_writable_node(parent_path, False)
3537 else:
3538 parent_key, parent_contents = self._open_writable_root_node()
3540 if parent_key is not None:
3541 self._fast_delete_path(parent_path, parent_contents, entry)
3542 # The following recursion makes pruning an O(n^2) operation in the
3543 # worst case (where n is the depth of SVN_PATH), but the worst case
3544 # is probably rare, and the constant cost is pretty low. Another
3545 # drawback is that we issue a delete for each path and not just
3546 # a single delete for the topmost directory pruned.
3547 if should_prune and len(parent_contents) == 0:
3548 self._delete_path(parent_path, True)
3550 def _mkdir(self, path):
3551 """Create PATH in the repository mirror at the youngest revision."""
3553 self._open_writable_node(path, True)
3554 self._invoke_delegates('mkdir', path)
3556 def _change_path(self, cvs_rev):
3557 """Register a change in self.youngest for the CVS_REV's svn_path
3558 in the repository mirror."""
3560 # We do not have to update the nodes because our mirror is only
3561 # concerned with the presence or absence of paths, and a file
3562 # content change does not cause any path changes.
3563 self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, False))
3565 def _add_path(self, cvs_rev):
3566 """Add the CVS_REV's svn_path to the repository mirror."""
3568 self._open_writable_node(cvs_rev.svn_path, True)
3569 self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, True))
3571 def _copy_path(self, src_path, dest_path, src_revnum):
3572 """Copy SRC_PATH at subversion revision number SRC_REVNUM to
3573 DEST_PATH. In the youngest revision of the repository, DEST_PATH's
3574 parent *must* exist, but DEST_PATH *cannot* exist.
3576 Return the node key and the contents of the new node at DEST_PATH
3577 as a dictionary."""
3579 # get the contents of the node of our src_path
3580 src_key = self._open_readonly_node(src_path, src_revnum)
3581 src_contents = self._get_node(src_key)
3583 # Get the parent path and the base path of the dest_path
3584 (dest_parent, dest_basename,) = _path_split(dest_path)
3585 dest_parent_key, dest_parent_contents = \
3586 self._open_writable_node(dest_parent, False)
3588 if dest_parent_contents.has_key(dest_basename):
3589 msg = "Attempt to add path '%s' to repository mirror " % dest_path
3590 msg += "when it already exists in the mirror."
3591 raise self.SVNRepositoryMirrorPathExistsError, msg
3593 dest_parent_contents[dest_basename] = src_key
3594 self._invoke_delegates('copy_path', src_path, dest_path, src_revnum)
3596 # Yes sir, src_key and src_contents are also the contents of the
3597 # destination. This is a cheap copy, remember! :-)
3598 return src_key, src_contents
3600 def _fill_symbolic_name(self, svn_commit):
3601 """Performs all copies necessary to create as much of the the tag
3602 or branch SVN_COMMIT.symbolic_name as possible given the current
3603 revision of the repository mirror.
3605 The symbolic name is guaranteed to exist in the Subversion
3606 repository by the end of this call, even if there are no paths
3607 under it."""
3609 symbol_fill = self.symbolings_reader.filling_guide_for_symbol(
3610 svn_commit.symbolic_name, self.youngest)
3611 # Get the list of sources for the symbolic name.
3612 sources = symbol_fill.get_sources()
3614 if sources:
3615 if svn_commit.symbolic_name in self.tags_db:
3616 dest_prefix = Ctx().project.get_tag_path(svn_commit.symbolic_name)
3617 else:
3618 dest_prefix = Ctx().project.get_branch_path(svn_commit.symbolic_name)
3620 dest_key = self._open_writable_node(dest_prefix, False)[0]
3621 self._fill(symbol_fill, dest_prefix, dest_key, sources)
3622 else:
3623 # We can only get here for a branch whose first commit is an add
3624 # (as opposed to a copy).
3625 dest_path = Ctx().project.get_branch_path(symbol_fill.name)
3626 if not self._path_exists(dest_path):
3627 # If our symbol_fill was empty, that means that our first
3628 # commit on the branch was to a file added on the branch, and
3629 # that this is our first fill of that branch.
3631 # This case is covered by test 16.
3633 # ...we create the branch by copying trunk from the our
3634 # current revision number minus 1
3635 source_path = Ctx().project.trunk_path
3636 entries = self._copy_path(source_path, dest_path,
3637 svn_commit.revnum - 1)[1]
3638 # Now since we've just copied trunk to a branch that's
3639 # *supposed* to be empty, we delete any entries in the
3640 # copied directory.
3641 for entry in entries:
3642 del_path = dest_path + '/' + entry
3643 # Delete but don't prune.
3644 self._delete_path(del_path)
3645 else:
3646 msg = "Error filling branch '" \
3647 + _clean_symbolic_name(symbol_fill.name) + "'.\n"
3648 msg += "Received an empty SymbolicNameFillingGuide and\n"
3649 msg += "attempted to create a branch that already exists."
3650 raise self.SVNRepositoryMirrorInvalidFillOperationError, msg
3652 def _fill(self, symbol_fill, dest_prefix, dest_key, sources,
3653 path = None, parent_source_prefix = None,
3654 preferred_revnum = None, prune_ok = None):
3655 """Fill the tag or branch at DEST_PREFIX + PATH with items from
3656 SOURCES, and recurse into the child items.
3658 DEST_PREFIX is the prefix of the destination directory, e.g.
3659 '/tags/my_tag' or '/branches/my_branch', and SOURCES is a list of
3660 FillSource classes that are candidates to be copied to the
3661 destination. DEST_KEY is the key in self.nodes_db to the
3662 destination, or None if the destination does not yet exist.
3664 PATH is the path relative to DEST_PREFIX. If PATH is None, we
3665 are at the top level, e.g. '/tags/my_tag'.
3667 PARENT_SOURCE_PREFIX is the source prefix that was used to copy
3668 the parent directory, and PREFERRED_REVNUM is an int which is the
3669 source revision number that the caller (who may have copied KEY's
3670 parent) used to perform its copy. If PREFERRED_REVNUM is None,
3671 then no revision is preferable to any other (which probably means
3672 that no copies have happened yet).
3674 PRUNE_OK means that a copy has been made in this recursion, and
3675 it's safe to prune directories that are not in
3676 SYMBOL_FILL._node_tree, provided that said directory has a source
3677 prefix of one of the PARENT_SOURCE_PREFIX.
3679 PATH, PARENT_SOURCE_PREFIX, PRUNE_OK, and PREFERRED_REVNUM
3680 should only be passed in by recursive calls."""
3682 # Calculate scores and revnums for all sources
3683 for source in sources:
3684 src_revnum, score = symbol_fill.get_best_revnum(source.node,
3685 preferred_revnum)
3686 source.set_score(score, src_revnum)
3688 # Sort the sources in descending score order so that we will make
3689 # a eventual copy from the source with the highest score.
3690 sources.sort()
3691 copy_source = sources[0]
3693 src_path = _path_join(copy_source.prefix, path)
3694 dest_path = _path_join(dest_prefix, path)
3696 # Figure out if we shall copy to this destination and delete any
3697 # destination path that is in the way.
3698 do_copy = 0
3699 if dest_key is None:
3700 do_copy = 1
3701 elif prune_ok and (parent_source_prefix != copy_source.prefix or
3702 copy_source.revnum != preferred_revnum):
3703 # We are about to replace the destination, so we need to remove
3704 # it before we perform the copy.
3705 self._delete_path(dest_path)
3706 do_copy = 1
3708 if do_copy:
3709 dest_key, dest_entries = self._copy_path(src_path, dest_path,
3710 copy_source.revnum)
3711 prune_ok = 1
3712 else:
3713 dest_entries = self._get_node(dest_key)
3715 # Create the SRC_ENTRIES hash from SOURCES. The keys are path
3716 # elements and the values are lists of FillSource classes where
3717 # this path element exists.
3718 src_entries = {}
3719 for source in sources:
3720 if isinstance(source.node, SvnRevisionRange):
3721 continue
3722 for entry, node in source.node.items():
3723 src_entries.setdefault(entry, []).append(
3724 FillSource(source.prefix, node))
3726 if prune_ok:
3727 # Delete the entries in DEST_ENTRIES that are not in src_entries.
3728 delete_list = [ ]
3729 for entry in dest_entries:
3730 if not src_entries.has_key(entry):
3731 delete_list.append(entry)
3732 if delete_list:
3733 if not self.new_nodes.has_key(dest_key):
3734 dest_key, dest_entries = self._open_writable_node(dest_path, True)
3735 # Sort the delete list to get "diffable" dumpfiles.
3736 delete_list.sort()
3737 for entry in delete_list:
3738 self._fast_delete_path(dest_path, dest_entries, entry)
3740 # Recurse into the SRC_ENTRIES keys sorted in alphabetical order.
3741 src_keys = src_entries.keys()
3742 src_keys.sort()
3743 for src_key in src_keys:
3744 next_dest_key = dest_entries.get(src_key, None)
3745 self._fill(symbol_fill, dest_prefix, next_dest_key,
3746 src_entries[src_key], _path_join(path, src_key),
3747 copy_source.prefix, sources[0].revnum, prune_ok)
3749 def _synchronize_default_branch(self, svn_commit):
3750 """Propagate any changes that happened on a non-trunk default
3751 branch to the trunk of the repository. See
3752 CVSCommit._post_commit() for details on why this is necessary."""
3754 for cvs_rev in svn_commit.cvs_revs:
3755 svn_trunk_path = Ctx().project.make_trunk_path(cvs_rev.cvs_path)
3756 if cvs_rev.op == OP_ADD or cvs_rev.op == OP_CHANGE:
3757 if self._path_exists(svn_trunk_path):
3758 # Delete the path on trunk...
3759 self._delete_path(svn_trunk_path)
3760 # ...and copy over from branch
3761 self._copy_path(cvs_rev.svn_path, svn_trunk_path,
3762 svn_commit.motivating_revnum)
3763 elif cvs_rev.op == OP_DELETE:
3764 # delete trunk path
3765 self._delete_path(svn_trunk_path)
3766 else:
3767 msg = ("Unknown CVSRevision operation '%s' in default branch sync."
3768 % cvs_rev.op)
3769 raise self.SVNRepositoryMirrorUnexpectedOperationError, msg
3771 def commit(self, svn_commit):
3772 """Add an SVNCommit to the SVNRepository, incrementing the
3773 Repository revision number, and changing the repository. Invoke
3774 the delegates' _start_commit() method."""
3776 if svn_commit.revnum == 2:
3777 self._initialize_repository(svn_commit.get_date())
3779 self._start_commit(svn_commit)
3781 if svn_commit.symbolic_name:
3782 Log().write(LOG_VERBOSE, "Filling symbolic name:",
3783 _clean_symbolic_name(svn_commit.symbolic_name))
3784 self._fill_symbolic_name(svn_commit)
3785 elif svn_commit.motivating_revnum:
3786 Log().write(LOG_VERBOSE, "Synchronizing default_branch motivated by %d"
3787 % svn_commit.motivating_revnum)
3788 self._synchronize_default_branch(svn_commit)
3789 else: # This actually commits CVSRevisions
3790 if len(svn_commit.cvs_revs) > 1: plural = "s"
3791 else: plural = ""
3792 Log().write(LOG_VERBOSE, "Committing %d CVSRevision%s"
3793 % (len(svn_commit.cvs_revs), plural))
3794 for cvs_rev in svn_commit.cvs_revs:
3795 # See comment in CVSCommit._commit() for what this is all
3796 # about. Note that although asking self._path_exists() is
3797 # somewhat expensive, we only do it if the first two (cheap)
3798 # tests succeed first.
3799 if not ((cvs_rev.deltatext_code == DELTATEXT_EMPTY)
3800 and (cvs_rev.rev == "1.1.1.1")
3801 and self._path_exists(cvs_rev.svn_path)):
3802 if cvs_rev.op == OP_ADD:
3803 self._add_path(cvs_rev)
3804 elif cvs_rev.op == OP_CHANGE:
3805 # Fix for Issue #74:
3807 # Here's the scenario. You have file FOO that is imported
3808 # on a non-trunk vendor branch. So in r1.1 and r1.1.1.1,
3809 # the file exists.
3811 # Moving forward in time, FOO is deleted on the default
3812 # branch (r1.1.1.2). cvs2svn determines that this delete
3813 # also needs to happen on trunk, so FOO is deleted on
3814 # trunk.
3816 # Along come r1.2, whose op is OP_CHANGE (because r1.1 is
3817 # not 'dead', we assume it's a change). However, since
3818 # our trunk file has been deleted, svnadmin blows up--you
3819 # can't change a file that doesn't exist!
3821 # Soooo... we just check the path, and if it doesn't
3822 # exist, we do an add... if the path does exist, it's
3823 # business as usual.
3824 if not self._path_exists(cvs_rev.svn_path):
3825 self._add_path(cvs_rev)
3826 else:
3827 self._change_path(cvs_rev)
3829 if cvs_rev.op == OP_DELETE:
3830 self._delete_path(cvs_rev.svn_path, Ctx().prune)
3832 def cleanup(self):
3833 """Callback for the Cleanup.register in self.__init__."""
3835 self.revs_db = None
3836 self.nodes_db = None
3838 def add_delegate(self, delegate):
3839 """Adds DELEGATE to self.delegates.
3841 For every delegate you add, as soon as SVNRepositoryMirror
3842 performs a repository action method, SVNRepositoryMirror will call
3843 the delegate's corresponding repository action method. Multiple
3844 delegates will be called in the order that they are added. See
3845 SVNRepositoryMirrorDelegate for more information."""
3847 self.delegates.append(delegate)
3849 def _invoke_delegates(self, method, *args):
3850 """Iterate through each of our delegates, in the order that they
3851 were added, and call the delegate's method named METHOD with the
3852 arguments in ARGS."""
3854 for delegate in self.delegates:
3855 getattr(delegate, method)(*args)
3857 def finish(self):
3858 """Calls the delegate finish method."""
3860 self._end_commit()
3861 self._invoke_delegates('finish')
3862 self.cleanup()
3865 class SVNCommitItem:
3866 """A wrapper class for CVSRevision objects upon which
3867 Subversion-related data (such as properties) may be hung."""
3869 def __init__(self, c_rev, svn_props_changed):
3870 """Initialize instance and record the properties for this file.
3871 SVN_PROPS_CHANGED indicates whether the svn: properties are known
3872 to have changed since the last revision.
3874 The properties are set by the SVNPropertySetters in
3875 Ctx().svn_property_setters, then we read a couple of the
3876 properties back out for our own purposes."""
3878 self.c_rev = c_rev
3879 # Did the svn properties change for this file (i.e., do they have
3880 # to be written to the dumpfile?)
3881 self.svn_props_changed = svn_props_changed
3883 # The properties for this item as a map { key : value }. If VALUE
3884 # is None, no property should be set.
3885 self.svn_props = { }
3887 for svn_property_setter in Ctx().svn_property_setters:
3888 svn_property_setter.set_properties(self)
3890 # Remember if we need to filter the EOLs. We could actually use
3891 # self.svn_props now, since it is initialized for each revision.
3892 self.needs_eol_filter = \
3893 self.svn_props.get('svn:eol-style', None) is not None
3895 self.has_keywords = self.svn_props.get('svn:keywords', None) is not None
3898 class SVNRepositoryMirrorDelegate:
3899 """Abstract superclass for any delegate to SVNRepositoryMirror.
3900 Subclasses must implement all of the methods below.
3902 For each method, a subclass implements, in its own way, the
3903 Subversion operation implied by the method's name. For example, for
3904 the add_path method, the DumpfileDelegate would write out a
3905 "Node-add:" command to a Subversion dumpfile, the StdoutDelegate
3906 would merely print that the path is being added to the repository,
3907 and the RepositoryDelegate would actually cause the path to be added
3908 to the Subversion repository that it is creating.
3911 def start_commit(self, svn_commit):
3912 """Perform any actions needed to start SVNCommit SVN_COMMIT;
3913 see subclass implementation for details."""
3915 raise NotImplementedError
3917 def mkdir(self, path):
3918 """PATH is a string; see subclass implementation for details."""
3920 raise NotImplementedError
3922 def add_path(self, s_item):
3923 """S_ITEM is an SVNCommitItem; see subclass implementation for
3924 details."""
3926 raise NotImplementedError
3928 def change_path(self, s_item):
3929 """S_ITEM is an SVNCommitItem; see subclass implementation for
3930 details."""
3932 raise NotImplementedError
3934 def delete_path(self, path):
3935 """PATH is a string; see subclass implementation for
3936 details."""
3938 raise NotImplementedError
3940 def copy_path(self, src_path, dest_path, src_revnum):
3941 """SRC_PATH and DEST_PATH are both strings, and SRC_REVNUM is a
3942 subversion revision number (int); see subclass implementation for
3943 details."""
3945 raise NotImplementedError
3947 def finish(self):
3948 """Perform any cleanup necessary after all revisions have been
3949 committed."""
3951 raise NotImplementedError
3954 class DumpfileDelegate(SVNRepositoryMirrorDelegate):
3955 """Create a Subversion dumpfile."""
3957 def __init__(self, dumpfile_path=None):
3958 """Return a new DumpfileDelegate instance, attached to a dumpfile
3959 DUMPFILE_PATH (Ctx().dumpfile, if None), using Ctx().encoding."""
3961 if dumpfile_path:
3962 self.dumpfile_path = dumpfile_path
3963 else:
3964 self.dumpfile_path = Ctx().dumpfile
3966 self.dumpfile = open(self.dumpfile_path, 'wb')
3967 self._write_dumpfile_header(self.dumpfile)
3969 def _write_dumpfile_header(self, dumpfile):
3970 # Initialize the dumpfile with the standard headers.
3972 # Since the CVS repository doesn't have a UUID, and the Subversion
3973 # repository will be created with one anyway, we don't specify a
3974 # UUID in the dumpflie
3975 dumpfile.write('SVN-fs-dump-format-version: 2\n\n')
3977 def _utf8_path(self, path):
3978 """Return a copy of PATH encoded in UTF-8."""
3980 pieces = path.split('/')
3981 # Convert each path component separately (as they may each use
3982 # different encodings).
3983 for i in range(len(pieces)):
3984 try:
3985 # Log messages can be converted with the 'replace' strategy,
3986 # but we can't afford any lossiness here.
3987 pieces[i] = to_utf8(pieces[i], 'strict')
3988 except UnicodeError:
3989 raise FatalError(
3990 "Unable to convert a path '%s' to internal encoding.\n"
3991 "Consider rerunning with one or more '--encoding' parameters."
3992 % (path,))
3993 return '/'.join(pieces)
3995 def _string_for_prop(self, name, value):
3996 """Return a property in the form needed for the dumpfile."""
3998 return 'K %d\n%s\nV %d\n%s\n' % (len(name), name, len(value), value)
4000 def start_commit(self, svn_commit):
4001 """Emit the start of SVN_COMMIT (an SVNCommit)."""
4003 self.revision = svn_commit.revnum
4005 # The start of a new commit typically looks like this:
4007 # Revision-number: 1
4008 # Prop-content-length: 129
4009 # Content-length: 129
4011 # K 7
4012 # svn:log
4013 # V 27
4014 # Log message for revision 1.
4015 # K 10
4016 # svn:author
4017 # V 7
4018 # jrandom
4019 # K 8
4020 # svn:date
4021 # V 27
4022 # 2003-04-22T22:57:58.132837Z
4023 # PROPS-END
4025 # Notice that the length headers count everything -- not just the
4026 # length of the data but also the lengths of the lengths, including
4027 # the 'K ' or 'V ' prefixes.
4029 # The reason there are both Prop-content-length and Content-length
4030 # is that the former includes just props, while the latter includes
4031 # everything. That's the generic header form for any entity in a
4032 # dumpfile. But since revisions only have props, the two lengths
4033 # are always the same for revisions.
4035 # Calculate the output needed for the property definitions.
4036 props = svn_commit.get_revprops()
4037 prop_names = props.keys()
4038 prop_names.sort()
4039 prop_strings = []
4040 for propname in prop_names:
4041 if props[propname] is not None:
4042 prop_strings.append(self._string_for_prop(propname, props[propname]))
4044 all_prop_strings = ''.join(prop_strings) + 'PROPS-END\n'
4045 total_len = len(all_prop_strings)
4047 # Print the revision header and props
4048 self.dumpfile.write('Revision-number: %d\n'
4049 'Prop-content-length: %d\n'
4050 'Content-length: %d\n'
4051 '\n'
4052 % (self.revision, total_len, total_len))
4054 self.dumpfile.write(all_prop_strings)
4055 self.dumpfile.write('\n')
4057 def mkdir(self, path):
4058 """Emit the creation of directory PATH."""
4060 self.dumpfile.write("Node-path: %s\n"
4061 "Node-kind: dir\n"
4062 "Node-action: add\n"
4063 "\n"
4064 "\n" % self._utf8_path(path))
4066 def _add_or_change_path(self, s_item, op):
4067 """Emit the addition or change corresponding to S_ITEM.
4068 OP is either the constant OP_ADD or OP_CHANGE."""
4070 # Validation stuffs
4071 if op == OP_ADD:
4072 action = 'add'
4073 elif op == OP_CHANGE:
4074 action = 'change'
4075 else:
4076 raise FatalError("_add_or_change_path() called with bad op ('%s')"
4077 % (op,))
4079 # Convenience variables
4080 c_rev = s_item.c_rev
4082 # The property handling here takes advantage of an undocumented
4083 # but IMHO consistent feature of the Subversion dumpfile-loading
4084 # code. When a node's properties aren't mentioned (that is, the
4085 # "Prop-content-length:" header is absent, no properties are
4086 # listed at all, and there is no "PROPS-END\n" line) then no
4087 # change is made to the node's properties.
4089 # This is consistent with the way dumpfiles behave w.r.t. text
4090 # content changes, so I'm comfortable relying on it. If you
4091 # commit a change to *just* the properties of some node that
4092 # already has text contents from a previous revision, then in the
4093 # dumpfile output for the prop change, no "Text-content-length:"
4094 # nor "Text-content-md5:" header will be present, and the text of
4095 # the file will not be given. But this does not cause the file's
4096 # text to be erased! It simply remains unchanged.
4098 # This works out great for cvs2svn, due to lucky coincidences:
4100 # For files, the only properties we ever set are set in the first
4101 # revision; all other revisions (including on branches) inherit
4102 # from that. After the first revision, we never change file
4103 # properties, therefore, there is no need to remember the full set
4104 # of properties on a given file once we've set it.
4106 # For directories, the only property we set is "svn:ignore", and
4107 # while we may change it after the first revision, we always do so
4108 # based on the contents of a ".cvsignore" file -- in other words,
4109 # CVS is doing the remembering for us, so we still don't have to
4110 # preserve the previous value of the property ourselves.
4112 # Calculate the (sorted-by-name) property string and length, if any.
4113 if s_item.svn_props_changed:
4114 svn_props = s_item.svn_props
4115 prop_contents = ''
4116 prop_names = svn_props.keys()
4117 prop_names.sort()
4118 for pname in prop_names:
4119 pvalue = svn_props[pname]
4120 if pvalue is not None:
4121 prop_contents += self._string_for_prop(pname, pvalue)
4122 prop_contents += 'PROPS-END\n'
4123 props_header = 'Prop-content-length: %d\n' % len(prop_contents)
4124 else:
4125 prop_contents = ''
4126 props_header = ''
4128 # treat .cvsignore as a directory property
4129 dir_path, basename = os.path.split(c_rev.svn_path)
4130 if basename == ".cvsignore":
4131 ignore_vals = generate_ignores(c_rev)
4132 ignore_contents = '\n'.join(ignore_vals)
4133 ignore_contents = ('K 10\nsvn:ignore\nV %d\n%s\n' % \
4134 (len(ignore_contents), ignore_contents))
4135 ignore_contents += 'PROPS-END\n'
4136 ignore_len = len(ignore_contents)
4138 # write headers, then props
4139 self.dumpfile.write('Node-path: %s\n'
4140 'Node-kind: dir\n'
4141 'Node-action: change\n'
4142 'Prop-content-length: %d\n'
4143 'Content-length: %d\n'
4144 '\n'
4145 '%s'
4146 % (self._utf8_path(dir_path), ignore_len,
4147 ignore_len, ignore_contents))
4149 # If the file has keywords, we must prevent CVS/RCS from expanding
4150 # the keywords because they must be unexpanded in the repository,
4151 # or Subversion will get confused.
4152 pipe_cmd, pipe = Ctx().cvs_repository.get_co_pipe(
4153 c_rev, suppress_keyword_substitution=s_item.has_keywords)
4155 self.dumpfile.write('Node-path: %s\n'
4156 'Node-kind: file\n'
4157 'Node-action: %s\n'
4158 '%s' # no property header if no props
4159 'Text-content-length: '
4160 % (self._utf8_path(c_rev.svn_path),
4161 action, props_header))
4163 pos = self.dumpfile.tell()
4165 self.dumpfile.write('0000000000000000\n'
4166 'Text-content-md5: 00000000000000000000000000000000\n'
4167 'Content-length: 0000000000000000\n'
4168 '\n')
4170 if prop_contents:
4171 self.dumpfile.write(prop_contents)
4173 # Insert a filter to convert all EOLs to LFs if neccessary
4174 if s_item.needs_eol_filter:
4175 data_reader = LF_EOL_Filter(pipe.stdout)
4176 else:
4177 data_reader = pipe.stdout
4179 # Insert the rev contents, calculating length and checksum as we go.
4180 checksum = md5.new()
4181 length = 0
4182 while True:
4183 buf = data_reader.read(PIPE_READ_SIZE)
4184 if buf == '':
4185 break
4186 checksum.update(buf)
4187 length += len(buf)
4188 self.dumpfile.write(buf)
4190 pipe.stdout.close()
4191 error_output = pipe.stderr.read()
4192 exit_status = pipe.wait()
4193 if exit_status:
4194 raise FatalError("The command '%s' failed with exit status: %s\n"
4195 "and the following output:\n"
4196 "%s" % (pipe_cmd, exit_status, error_output))
4198 # Go back to patch up the length and checksum headers:
4199 self.dumpfile.seek(pos, 0)
4200 # We left 16 zeros for the text length; replace them with the real
4201 # length, padded on the left with spaces:
4202 self.dumpfile.write('%16d' % length)
4203 # 16... + 1 newline + len('Text-content-md5: ') == 35
4204 self.dumpfile.seek(pos + 35, 0)
4205 self.dumpfile.write(checksum.hexdigest())
4206 # 35... + 32 bytes of checksum + 1 newline + len('Content-length: ') == 84
4207 self.dumpfile.seek(pos + 84, 0)
4208 # The content length is the length of property data, text data,
4209 # and any metadata around/inside around them.
4210 self.dumpfile.write('%16d' % (length + len(prop_contents)))
4211 # Jump back to the end of the stream
4212 self.dumpfile.seek(0, 2)
4214 # This record is done (write two newlines -- one to terminate
4215 # contents that weren't themselves newline-termination, one to
4216 # provide a blank line for readability.
4217 self.dumpfile.write('\n\n')
4219 def add_path(self, s_item):
4220 """Emit the addition corresponding to S_ITEM, an SVNCommitItem."""
4222 self._add_or_change_path(s_item, OP_ADD)
4224 def change_path(self, s_item):
4225 """Emit the change corresponding to S_ITEM, an SVNCommitItem."""
4227 self._add_or_change_path(s_item, OP_CHANGE)
4229 def delete_path(self, path):
4230 """Emit the deletion of PATH."""
4232 self.dumpfile.write('Node-path: %s\n'
4233 'Node-action: delete\n'
4234 '\n' % self._utf8_path(path))
4236 def copy_path(self, src_path, dest_path, src_revnum):
4237 """Emit the copying of SRC_PATH at SRC_REV to DEST_PATH."""
4239 # We don't need to include "Node-kind:" for copies; the loader
4240 # ignores it anyway and just uses the source kind instead.
4241 self.dumpfile.write('Node-path: %s\n'
4242 'Node-action: add\n'
4243 'Node-copyfrom-rev: %d\n'
4244 'Node-copyfrom-path: /%s\n'
4245 '\n'
4246 % (self._utf8_path(dest_path),
4247 src_revnum,
4248 self._utf8_path(src_path)))
4250 def finish(self):
4251 """Perform any cleanup necessary after all revisions have been
4252 committed."""
4254 self.dumpfile.close()
4257 class RepositoryDelegate(DumpfileDelegate):
4258 """Creates a new Subversion Repository. DumpfileDelegate does all
4259 of the heavy lifting."""
4261 def __init__(self):
4262 self.svnadmin = Ctx().svnadmin
4263 self.target = Ctx().target
4264 if not Ctx().existing_svnrepos:
4265 Log().write(LOG_NORMAL,"Creating new repository '%s'" % (self.target))
4266 if not Ctx().fs_type:
4267 # User didn't say what kind repository (bdb, fsfs, etc).
4268 # We still pass --bdb-txn-nosync. It's a no-op if the default
4269 # repository type doesn't support it, but we definitely want
4270 # it if BDB is the default.
4271 run_command('%s create %s "%s"' % (self.svnadmin,
4272 "--bdb-txn-nosync",
4273 self.target))
4274 elif Ctx().fs_type == 'bdb':
4275 # User explicitly specified bdb.
4277 # Since this is a BDB repository, pass --bdb-txn-nosync,
4278 # because it gives us a 4-5x speed boost (if cvs2svn is
4279 # creating the repository, cvs2svn should be the only program
4280 # accessing the svn repository (until cvs is done, at least)).
4281 # But we'll turn no-sync off in self.finish(), unless
4282 # instructed otherwise.
4283 run_command('%s create %s %s "%s"' % (self.svnadmin,
4284 "--fs-type=bdb",
4285 "--bdb-txn-nosync",
4286 self.target))
4287 else:
4288 # User specified something other than bdb.
4289 run_command('%s create %s "%s"' % (self.svnadmin,
4290 "--fs-type=%s" % Ctx().fs_type,
4291 self.target))
4293 # Since the output of this run is a repository, not a dumpfile,
4294 # the temporary dumpfiles we create should go in the tmpdir.
4295 DumpfileDelegate.__init__(self, temp(Ctx().dumpfile))
4297 # This is 1 if a commit is in progress, otherwise None.
4298 self._commit_in_progress = None
4300 self.dumpfile = open(self.dumpfile_path, 'w+b')
4301 self.loader_pipe = SimplePopen([ self.svnadmin, 'load', '-q',
4302 self.target ], True)
4303 self.loader_pipe.stdout.close()
4304 try:
4305 self._write_dumpfile_header(self.loader_pipe.stdin)
4306 except IOError:
4307 raise FatalError("svnadmin failed with the following output while "
4308 "loading the dumpfile:\n"
4309 + self.loader_pipe.stderr.read())
4311 def _feed_pipe(self):
4312 """Feed the revision stored in the dumpfile to the svnadmin
4313 load pipe."""
4315 self.dumpfile.seek(0)
4316 while 1:
4317 data = self.dumpfile.read(128*1024) # Chunk size is arbitrary
4318 if not len(data):
4319 break
4320 try:
4321 self.loader_pipe.stdin.write(data)
4322 except IOError:
4323 raise FatalError("svnadmin failed with the following output "
4324 "while loading the dumpfile:\n"
4325 + self.loader_pipe.stderr.read())
4327 def start_commit(self, svn_commit):
4328 """Start a new commit. If a commit is already in progress, close
4329 the dumpfile, load it into the svn repository, open a new
4330 dumpfile, and write the header into it."""
4332 if self._commit_in_progress:
4333 self._feed_pipe()
4334 self.dumpfile.seek(0)
4335 self.dumpfile.truncate()
4336 DumpfileDelegate.start_commit(self, svn_commit)
4337 self._commit_in_progress = 1
4339 def finish(self):
4340 """Loads the last commit into the repository."""
4342 self._feed_pipe()
4343 self.dumpfile.close()
4344 self.loader_pipe.stdin.close()
4345 error_output = self.loader_pipe.stderr.read()
4346 exit_status = self.loader_pipe.wait()
4347 if exit_status:
4348 raise FatalError('svnadmin load failed with exit status: %s\n'
4349 'and the following output:\n'
4350 '%s' % (exit_status, error_output,))
4351 os.remove(self.dumpfile_path)
4353 # If this is a BDB repository, and we created the repository, and
4354 # --bdb-no-sync wasn't passed, then comment out the DB_TXN_NOSYNC
4355 # line in the DB_CONFIG file, because txn syncing should be on by
4356 # default in BDB repositories.
4358 # We determine if this is a BDB repository by looking for the
4359 # DB_CONFIG file, which doesn't exist in FSFS, rather than by
4360 # checking Ctx().fs_type. That way this code will Do The Right
4361 # Thing in all circumstances.
4362 db_config = os.path.join(self.target, "db/DB_CONFIG")
4363 if (not Ctx().existing_svnrepos and not Ctx().bdb_txn_nosync
4364 and os.path.exists(db_config)):
4365 no_sync = 'set_flags DB_TXN_NOSYNC\n'
4367 contents = open(db_config, 'r').readlines()
4368 index = contents.index(no_sync)
4369 contents[index] = '# ' + no_sync
4370 contents = open(db_config, 'w').writelines(contents)
4373 class StdoutDelegate(SVNRepositoryMirrorDelegate):
4374 """Makes no changes to the disk, but writes out information to
4375 STDOUT about what the SVNRepositoryMirror is doing. Of course, our
4376 print statements will state that we're doing something, when in
4377 reality, we aren't doing anything other than printing out that we're
4378 doing something. Kind of zen, really."""
4380 def __init__(self, total_revs):
4381 self.total_revs = total_revs
4383 def start_commit(self, svn_commit):
4384 """Prints out the Subversion revision number of the commit that is
4385 being started."""
4387 Log().write(LOG_VERBOSE, "=" * 60)
4388 Log().write(LOG_NORMAL, "Starting Subversion r%d / %d" %
4389 (svn_commit.revnum, self.total_revs))
4391 def mkdir(self, path):
4392 """Print a line stating that we are creating directory PATH."""
4394 Log().write(LOG_VERBOSE, " New Directory", path)
4396 def add_path(self, s_item):
4397 """Print a line stating that we are 'adding' s_item.c_rev.svn_path."""
4399 Log().write(LOG_VERBOSE, " Adding", s_item.c_rev.svn_path)
4401 def change_path(self, s_item):
4402 """Print a line stating that we are 'changing' s_item.c_rev.svn_path."""
4404 Log().write(LOG_VERBOSE, " Changing", s_item.c_rev.svn_path)
4406 def delete_path(self, path):
4407 """Print a line stating that we are 'deleting' PATH."""
4409 Log().write(LOG_VERBOSE, " Deleting", path)
4411 def copy_path(self, src_path, dest_path, src_revnum):
4412 """Print a line stating that we are 'copying' revision SRC_REVNUM
4413 of SRC_PATH to DEST_PATH."""
4415 Log().write(LOG_VERBOSE, " Copying revision", src_revnum, "of", src_path)
4416 Log().write(LOG_VERBOSE, " to", dest_path)
4418 def finish(self):
4419 """State that we are done creating our repository."""
4421 Log().write(LOG_VERBOSE, "Finished creating Subversion repository.")
4422 Log().write(LOG_QUIET, "Done.")
4425 def pass1():
4426 OS_SEP_PLUS_ATTIC = os.sep + 'Attic'
4427 Log().write(LOG_QUIET, "Examining all CVS ',v' files...")
4428 cd = CollectData()
4430 def visit_file(baton, dirname, files):
4431 cd = baton
4432 for fname in files:
4433 verify_filename_legal(fname)
4434 if not fname.endswith(',v'):
4435 continue
4436 cd.found_valid_file = 1
4437 pathname = os.path.join(dirname, fname)
4438 if dirname.endswith(OS_SEP_PLUS_ATTIC):
4439 # drop the 'Attic' portion from the pathname for the canonical name.
4440 fdc = FileDataCollector(cd, os.path.join(dirname[:-6], fname),
4441 pathname)
4442 else:
4443 # If this file also exists in the attic, it's a fatal error
4444 attic_path = os.path.join(dirname, 'Attic', fname)
4445 if os.path.exists(attic_path):
4446 err = "%s: A CVS repository cannot contain both %s and %s" \
4447 % (error_prefix, pathname, attic_path)
4448 sys.stderr.write(err + '\n')
4449 cd.fatal_errors.append(err)
4450 fdc = FileDataCollector(cd, pathname, pathname)
4451 Log().write(LOG_NORMAL, pathname)
4452 try:
4453 cvs2svn_rcsparse.parse(open(pathname, 'rb'), fdc)
4454 except (cvs2svn_rcsparse.common.RCSParseError, ValueError,
4455 RuntimeError):
4456 err = "%s: '%s' is not a valid ,v file" \
4457 % (error_prefix, pathname)
4458 sys.stderr.write(err + '\n')
4459 cd.fatal_errors.append(err)
4460 except:
4461 Log().write(LOG_WARN,
4462 "Exception occurred while parsing %s" % pathname)
4463 raise
4465 os.path.walk(Ctx().project.project_cvs_repos_path, visit_file, cd)
4466 Log().write(LOG_VERBOSE, 'Processed', cd.num_files, 'files')
4468 cd.write_symbol_db()
4470 if len(cd.fatal_errors) > 0:
4471 raise FatalException("Pass 1 complete.\n"
4472 + "=" * 75 + "\n"
4473 + "Error summary:\n"
4474 + "\n".join(cd.fatal_errors) + "\n"
4475 + "Exited due to fatal error(s).\n")
4477 if cd.found_valid_file is None:
4478 raise FatalException(
4479 "\n"
4480 "No RCS files found in your CVS Repository!\n"
4481 "Are you absolutely certain you are pointing cvs2svn\n"
4482 "at a CVS repository?\n"
4483 "\n"
4484 "Exited due to fatal error(s).\n")
4486 StatsKeeper().reset_c_rev_info()
4487 StatsKeeper().archive()
4488 Log().write(LOG_QUIET, "Done")
4491 def pass2():
4492 """Pass 2: clean up the revision information."""
4494 symbol_db = SymbolDatabase()
4495 symbol_db.read()
4497 # Convert the list of regexps to a list of strings
4498 excludes = symbol_db.find_excluded_symbols(Ctx().excludes)
4500 error_detected = 0
4502 Log().write(LOG_QUIET, "Checking for blocked exclusions...")
4503 blocked_excludes = symbol_db.find_blocked_excludes(excludes)
4504 if blocked_excludes:
4505 for branch, blockers in blocked_excludes.items():
4506 sys.stderr.write(error_prefix + ": The branch '%s' cannot be "
4507 "excluded because the following symbols depend "
4508 "on it:\n" % (branch))
4509 for blocker in blockers:
4510 sys.stderr.write(" '%s'\n" % (blocker))
4511 sys.stderr.write("\n")
4512 error_detected = 1
4514 Log().write(LOG_QUIET, "Checking for forced tags with commits...")
4515 invalid_forced_tags = [ ]
4516 for forced_tag in Ctx().forced_tags:
4517 if excludes.has_key(forced_tag):
4518 continue
4519 if symbol_db.branch_has_commit(forced_tag):
4520 invalid_forced_tags.append(forced_tag)
4521 if invalid_forced_tags:
4522 sys.stderr.write(error_prefix + ": The following branches cannot be "
4523 "forced to be tags because they have commits:\n")
4524 for tag in invalid_forced_tags:
4525 sys.stderr.write(" '%s'\n" % (tag))
4526 sys.stderr.write("\n")
4527 error_detected = 1
4529 Log().write(LOG_QUIET, "Checking for tag/branch mismatches...")
4530 mismatches = symbol_db.find_mismatches(excludes)
4531 def is_not_forced(mismatch):
4532 name = mismatch[0]
4533 return not (name in Ctx().forced_tags or name in Ctx().forced_branches)
4534 mismatches = filter(is_not_forced, mismatches)
4535 if mismatches:
4536 sys.stderr.write(error_prefix + ": The following symbols are tags "
4537 "in some files and branches in others.\nUse "
4538 "--force-tag, --force-branch and/or --exclude to "
4539 "resolve the symbols.\n")
4540 for name, tag_count, branch_count, commit_count in mismatches:
4541 sys.stderr.write(" '%s' is a tag in %d files, a branch in "
4542 "%d files and has commits in %d files.\n"
4543 % (name, tag_count, branch_count, commit_count))
4544 error_detected = 1
4546 # Bail out now if we found errors
4547 if error_detected:
4548 sys.exit(1)
4550 # Create the tags database
4551 tags_db = TagsDatabase(DB_OPEN_NEW)
4552 for tag in symbol_db.tags:
4553 if tag not in Ctx().forced_branches:
4554 tags_db.add(tag)
4555 for tag in Ctx().forced_tags:
4556 tags_db.add(tag)
4558 Log().write(LOG_QUIET, "Re-synchronizing CVS revision timestamps...")
4560 # We may have recorded some changes in revisions' timestamp. We need to
4561 # scan for any other files which may have had the same log message and
4562 # occurred at "the same time" and change their timestamps, too.
4564 # read the resync data file
4565 def read_resync(fname):
4566 """Read the .resync file into memory."""
4568 ### note that we assume that we can hold the entire resync file in
4569 ### memory. really large repositories with whacky timestamps could
4570 ### bust this assumption. should that ever happen, then it is possible
4571 ### to split the resync file into pieces and make multiple passes,
4572 ### using each piece.
4575 # A digest maps to a sequence of lists which specify a lower and upper
4576 # time bound for matching up the commit. We keep a sequence of these
4577 # because a number of checkins with the same log message (e.g. an empty
4578 # log message) could need to be remapped. We also make them a list
4579 # because we will dynamically expand the lower/upper bound as we find
4580 # commits that fall into a particular msg and time range.
4582 # resync == digest -> [ [old_time_lower, old_time_upper, new_time], ... ]
4584 resync = { }
4586 for line in fileinput.FileInput(fname):
4587 t1 = int(line[:8], 16)
4588 digest = line[9:DIGEST_END_IDX]
4589 t2 = int(line[DIGEST_END_IDX+1:], 16)
4590 t1_l = t1 - COMMIT_THRESHOLD/2
4591 t1_u = t1 + COMMIT_THRESHOLD/2
4592 resync.setdefault(digest, []).append([t1_l, t1_u, t2])
4594 # For each digest, sort the resync items in it in increasing order,
4595 # based on the lower time bound.
4596 for val in resync.values():
4597 val.sort()
4599 return resync
4601 resync = read_resync(temp(DATAFILE + RESYNC_SUFFIX))
4603 output = open(temp(DATAFILE + CLEAN_REVS_SUFFIX), 'w')
4604 Cleanup().register(temp(DATAFILE + CLEAN_REVS_SUFFIX), pass3)
4606 tweaked_timestamps_db = Database(temp(TWEAKED_TIMESTAMPS_DB), DB_OPEN_NEW)
4607 Cleanup().register(temp(TWEAKED_TIMESTAMPS_DB), pass2)
4609 # process the revisions file, looking for items to clean up
4610 for line in fileinput.FileInput(temp(DATAFILE + REVS_SUFFIX)):
4611 c_rev = CVSRevision(Ctx(), line[:-1])
4613 # Skip this entire revision if it's on an excluded branch
4614 if excludes.has_key(c_rev.branch_name):
4615 continue
4617 new_prev_ts = None
4618 if c_rev.prev_rev is not None:
4619 new_prev_ts = tweaked_timestamps_db.get(
4620 c_rev.unique_key(c_rev.prev_rev), None)
4621 if new_prev_ts:
4622 c_rev.prev_timestamp = new_prev_ts
4624 new_next_ts = None
4625 if c_rev.next_rev is not None:
4626 new_next_ts = tweaked_timestamps_db.get(
4627 c_rev.unique_key(c_rev.next_rev), None)
4628 if new_next_ts:
4629 c_rev.next_timestamp = new_next_ts
4631 # Remove all references to excluded tags and branches
4632 def not_excluded(symbol, excludes=excludes):
4633 return not excludes.has_key(symbol)
4634 c_rev.branches = filter(not_excluded, c_rev.branches)
4635 c_rev.tags = filter(not_excluded, c_rev.tags)
4637 # Convert all branches that are forced to be tags
4638 for forced_tag in Ctx().forced_tags:
4639 if forced_tag in c_rev.branches:
4640 c_rev.branches.remove(forced_tag)
4641 c_rev.tags.append(forced_tag)
4643 # Convert all tags that are forced to be branches
4644 for forced_branch in Ctx().forced_branches:
4645 if forced_branch in c_rev.tags:
4646 c_rev.tags.remove(forced_branch)
4647 c_rev.branches.append(forced_branch)
4649 # see if this is "near" any of the resync records we
4650 # have recorded for this digest [of the log message].
4651 for record in resync.get(c_rev.digest, []):
4652 if record[2] == c_rev.timestamp:
4653 # This means that either c_rev is the same revision that
4654 # caused the resync record to exist, or c_rev is a different
4655 # CVS revision that happens to have the same timestamp. In
4656 # either case, we don't have to do anything, so we...
4657 continue
4659 if record[0] <= c_rev.timestamp <= record[1]:
4660 # bingo! We probably want to remap the time on this c_rev,
4661 # unless the remapping would be useless because the new time
4662 # would fall outside the COMMIT_THRESHOLD window for this
4663 # commit group.
4664 new_timestamp = record[2]
4665 # If the new timestamp is earlier than that of our previous revision
4666 if new_timestamp < c_rev.prev_timestamp:
4667 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4668 + " to time %s, which is before previous the time of"
4669 + " revision %s (%s):")
4670 Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4671 c_rev.cvs_path, new_timestamp,
4672 c_rev.prev_rev, c_rev.prev_timestamp))
4673 # If resyncing our rev to c_rev.prev_timestamp + 1 will place
4674 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4675 # attempted resync time, then sync back to c_rev.prev_timestamp
4676 # + 1...
4677 if ((c_rev.prev_timestamp + 1) - new_timestamp) < COMMIT_THRESHOLD:
4678 new_timestamp = c_rev.prev_timestamp + 1
4679 Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4680 new_timestamp))
4681 else:
4682 Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4683 warning_prefix)
4684 continue
4686 # If the new timestamp is later than that of our next revision
4687 elif c_rev.next_timestamp and new_timestamp > c_rev.next_timestamp:
4688 desc = ("%s: Attempt to set timestamp of revision %s on file %s"
4689 + " to time %s, which is after time of next"
4690 + " revision %s (%s):")
4691 Log().write(LOG_WARN, desc % (warning_prefix, c_rev.rev,
4692 c_rev.cvs_path, new_timestamp,
4693 c_rev.prev_rev, c_rev.next_timestamp))
4694 # If resyncing our rev to c_rev.next_timestamp - 1 will place
4695 # the timestamp of c_rev within COMMIT_THRESHOLD of the
4696 # attempted resync time, then sync forward to c_rev.next_timestamp
4697 # - 1...
4698 if (new_timestamp - (c_rev.next_timestamp - 1)) < COMMIT_THRESHOLD:
4699 new_timestamp = c_rev.next_timestamp - 1
4700 Log().write(LOG_WARN, "%s: Time set to %s" % (warning_prefix,
4701 new_timestamp))
4702 else:
4703 Log().write(LOG_WARN, "%s: Timestamp left untouched" %
4704 warning_prefix)
4705 continue
4707 # Fix for Issue #71: Avoid resyncing two consecutive revisions
4708 # to the same timestamp.
4709 elif (new_timestamp == c_rev.prev_timestamp
4710 or new_timestamp == c_rev.next_timestamp):
4711 continue
4713 # adjust the time range. we want the COMMIT_THRESHOLD from the
4714 # bounds of the earlier/latest commit in this group.
4715 record[0] = min(record[0], c_rev.timestamp - COMMIT_THRESHOLD/2)
4716 record[1] = max(record[1], c_rev.timestamp + COMMIT_THRESHOLD/2)
4718 msg = "PASS2 RESYNC: '%s' (%s): old time='%s' delta=%ds" \
4719 % (c_rev.cvs_path, c_rev.rev, time.ctime(c_rev.timestamp),
4720 new_timestamp - c_rev.timestamp)
4721 Log().write(LOG_VERBOSE, msg)
4723 c_rev.timestamp = new_timestamp
4724 tweaked_timestamps_db[c_rev.unique_key()] = new_timestamp
4726 # stop looking for hits
4727 break
4729 output.write(str(c_rev) + "\n")
4730 Log().write(LOG_QUIET, "Done")
4733 def pass3():
4734 Log().write(LOG_QUIET, "Sorting CVS revisions...")
4735 sort_file(temp(DATAFILE + CLEAN_REVS_SUFFIX),
4736 temp(DATAFILE + SORTED_REVS_SUFFIX))
4737 Cleanup().register(temp(DATAFILE + SORTED_REVS_SUFFIX), pass5)
4738 Log().write(LOG_QUIET, "Done")
4741 def pass4():
4742 """Iterate through sorted revs, storing them in a database.
4743 If we're not doing a trunk-only conversion, generate the
4744 LastSymbolicNameDatabase, which contains the last CVSRevision
4745 that is a source for each tag or branch."""
4747 Log().write(LOG_QUIET,
4748 "Copying CVS revision data from flat file to database...")
4749 cvs_revs_db = CVSRevisionDatabase(DB_OPEN_NEW)
4750 if not Ctx().trunk_only:
4751 Log().write(LOG_QUIET,
4752 "Finding last CVS revisions for all symbolic names...")
4753 last_sym_name_db = LastSymbolicNameDatabase()
4754 else:
4755 # This is to avoid testing Ctx().trunk_only every time around the loop
4756 class DummyLSNDB:
4757 def noop(*args): pass
4758 log_revision = noop
4759 create_database = noop
4760 last_sym_name_db = DummyLSNDB()
4762 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4763 c_rev = CVSRevision(Ctx(), line[:-1])
4764 cvs_revs_db.log_revision(c_rev)
4765 last_sym_name_db.log_revision(c_rev)
4766 StatsKeeper().record_c_rev(c_rev)
4768 last_sym_name_db.create_database()
4769 StatsKeeper().archive()
4770 Log().write(LOG_QUIET, "Done")
4773 def pass5():
4774 """Generate the SVNCommit <-> CVSRevision mapping databases.
4775 CVSCommit._commit also calls SymbolingsLogger to register
4776 CVSRevisions that represent an opening or closing for a path on a
4777 branch or tag. See SymbolingsLogger for more details."""
4779 Log().write(LOG_QUIET, "Mapping CVS revisions to Subversion commits...")
4781 aggregator = CVSRevisionAggregator()
4782 for line in fileinput.FileInput(temp(DATAFILE + SORTED_REVS_SUFFIX)):
4783 c_rev = CVSRevision(Ctx(), line[:-1])
4784 if not (Ctx().trunk_only and c_rev.branch_name is not None):
4785 aggregator.process_revision(c_rev)
4786 aggregator.flush()
4788 StatsKeeper().set_svn_rev_count(SVNCommit.revnum - 1)
4789 StatsKeeper().archive()
4790 Log().write(LOG_QUIET, "Done")
4793 def pass6():
4794 Log().write(LOG_QUIET, "Sorting symbolic name source revisions...")
4796 if not Ctx().trunk_only:
4797 sort_file(temp(SYMBOL_OPENINGS_CLOSINGS),
4798 temp(SYMBOL_OPENINGS_CLOSINGS_SORTED))
4799 Cleanup().register(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), pass8)
4800 Log().write(LOG_QUIET, "Done")
4803 def pass7():
4804 Log().write(LOG_QUIET, "Determining offsets for all symbolic names...")
4806 def generate_offsets_for_symbolings():
4807 """This function iterates through all the lines in
4808 SYMBOL_OPENINGS_CLOSINGS_SORTED, writing out a file mapping
4809 SYMBOLIC_NAME to the file offset in SYMBOL_OPENINGS_CLOSINGS_SORTED
4810 where SYMBOLIC_NAME is first encountered. This will allow us to
4811 seek to the various offsets in the file and sequentially read only
4812 the openings and closings that we need."""
4814 ###PERF This is a fine example of a db that can be in-memory and
4815 #just flushed to disk when we're done. Later, it can just be sucked
4816 #back into memory.
4817 offsets_db = Database(temp(SYMBOL_OFFSETS_DB), DB_OPEN_NEW)
4818 Cleanup().register(temp(SYMBOL_OFFSETS_DB), pass8)
4820 file = open(temp(SYMBOL_OPENINGS_CLOSINGS_SORTED), 'r')
4821 old_sym = ""
4822 while 1:
4823 fpos = file.tell()
4824 line = file.readline()
4825 if not line:
4826 break
4827 sym, svn_revnum, cvs_rev_key = line.split(" ", 2)
4828 if sym != old_sym:
4829 Log().write(LOG_VERBOSE, " ", sym)
4830 old_sym = sym
4831 offsets_db[sym] = fpos
4833 if not Ctx().trunk_only:
4834 generate_offsets_for_symbolings()
4835 Log().write(LOG_QUIET, "Done.")
4838 def pass8():
4839 svncounter = 2 # Repository initialization is 1.
4840 repos = SVNRepositoryMirror()
4841 persistence_manager = PersistenceManager(DB_OPEN_READ)
4843 if Ctx().target:
4844 if not Ctx().dry_run:
4845 repos.add_delegate(RepositoryDelegate())
4846 Log().write(LOG_QUIET, "Starting Subversion Repository.")
4847 else:
4848 if not Ctx().dry_run:
4849 repos.add_delegate(DumpfileDelegate())
4850 Log().write(LOG_QUIET, "Starting Subversion Dumpfile.")
4852 repos.add_delegate(StdoutDelegate(StatsKeeper().svn_rev_count()))
4854 while 1:
4855 svn_commit = persistence_manager.get_svn_commit(svncounter)
4856 if not svn_commit:
4857 break
4858 repos.commit(svn_commit)
4859 svncounter += 1
4861 repos.finish()
4863 _passes = [
4864 pass1,
4865 pass2,
4866 pass3,
4867 pass4,
4868 pass5,
4869 pass6,
4870 pass7,
4871 pass8,
4875 class Ctx:
4876 """Session state for this run of cvs2svn. For example, run-time
4877 options are stored here. This class is a Borg, see
4878 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66531."""
4880 __shared_state = { }
4882 def __init__(self):
4883 self.__dict__ = self.__shared_state
4884 if self.__dict__:
4885 return
4886 # Else, initialize to defaults.
4887 self.target = None
4888 self.dumpfile = DUMPFILE
4889 self.tmpdir = '.'
4890 self.verbose = 0
4891 self.quiet = 0
4892 self.prune = 1
4893 self.existing_svnrepos = 0
4894 self.dump_only = 0
4895 self.dry_run = 0
4896 self.trunk_only = 0
4897 self.trunk_base = "trunk"
4898 self.tags_base = "tags"
4899 self.branches_base = "branches"
4900 self.encoding = ["ascii"]
4901 self.mime_types_file = None
4902 self.auto_props_file = None
4903 self.auto_props_ignore_case = False
4904 self.no_default_eol = 0
4905 self.eol_from_mime_type = 0
4906 self.keywords_off = 0
4907 self.use_cvs = None
4908 self.svnadmin = "svnadmin"
4909 self.username = None
4910 self.print_help = 0
4911 self.skip_cleanup = 0
4912 self.bdb_txn_nosync = 0
4913 self.fs_type = None
4914 self.forced_branches = []
4915 self.forced_tags = []
4916 self.excludes = []
4917 self.symbol_transforms = []
4918 self.svn_property_setters = []
4921 class SVNPropertySetter:
4922 """Abstract class for objects that can set properties on a SVNCommitItem."""
4924 def set_properties(self, s_item):
4925 """Set any properties that can be determined for S_ITEM."""
4927 raise NotImplementedError
4930 class CVSRevisionNumberSetter(SVNPropertySetter):
4931 """Set the cvs2svn:cvs-rev property to the CVS revision number."""
4933 def set_properties(self, s_item):
4934 s_item.svn_props['cvs2svn:cvs-rev'] = s_item.c_rev.rev
4935 s_item.svn_props_changed = True
4938 class ExecutablePropertySetter(SVNPropertySetter):
4939 """Set the svn:executable property based on c_rev.file_executable."""
4941 def set_properties(self, s_item):
4942 if s_item.c_rev.file_executable:
4943 s_item.svn_props['svn:executable'] = '*'
4946 class BinaryFileEOLStyleSetter(SVNPropertySetter):
4947 """Set the eol-style for binary files to None."""
4949 def set_properties(self, s_item):
4950 if s_item.c_rev.mode == 'b':
4951 s_item.svn_props['svn:eol-style'] = None
4954 class MimeMapper(SVNPropertySetter):
4955 """A class that provides mappings from file names to MIME types."""
4957 def __init__(self, mime_types_file):
4958 self.mappings = { }
4960 for line in fileinput.input(mime_types_file):
4961 if line.startswith("#"):
4962 continue
4964 # format of a line is something like
4965 # text/plain c h cpp
4966 extensions = line.split()
4967 if len(extensions) < 2:
4968 continue
4969 type = extensions.pop(0)
4970 for ext in extensions:
4971 if self.mappings.has_key(ext) and self.mappings[ext] != type:
4972 sys.stderr.write("%s: ambiguous MIME mapping for *.%s (%s or %s)\n"
4973 % (warning_prefix, ext, self.mappings[ext], type))
4974 self.mappings[ext] = type
4976 def set_properties(self, s_item):
4977 basename, extension = os.path.splitext(
4978 os.path.basename(s_item.c_rev.cvs_path)
4981 # Extension includes the dot, so strip it (will leave extension
4982 # empty if filename ends with a dot, which is ok):
4983 extension = extension[1:]
4985 # If there is no extension (or the file ends with a period), use
4986 # the base name for mapping. This allows us to set mappings for
4987 # files such as README or Makefile:
4988 if not extension:
4989 extension = basename
4991 mime_type = self.mappings.get(extension, None)
4992 if mime_type is not None:
4993 s_item.svn_props['svn:mime-type'] = mime_type
4996 class AutoPropsPropertySetter(SVNPropertySetter):
4997 """Set arbitrary svn properties based on an auto-props configuration.
4999 This class supports case-sensitive or case-insensitive pattern
5000 matching. The 'correct' behavior is not quite clear, because
5001 subversion itself does an inconsistent job of handling case in
5002 auto-props patterns; see
5003 http://subversion.tigris.org/issues/show_bug.cgi?id=2036.
5005 If a property specified in auto-props has already been set to a
5006 different value, print a warning and leave the old property value
5007 unchanged."""
5009 class Pattern:
5010 """Describes the properties to be set for files matching a pattern."""
5012 def __init__(self, pattern, propdict):
5013 # A glob-like pattern:
5014 self.pattern = pattern
5015 # A dictionary of properties that should be set:
5016 self.propdict = propdict
5018 def match(self, basename):
5019 """Does the file with the specified basename match pattern?"""
5021 return fnmatch.fnmatch(basename, self.pattern)
5023 def __init__(self, configfilename, ignore_case):
5024 config = ConfigParser.ConfigParser()
5025 if ignore_case:
5026 self.transform_case = self.squash_case
5027 else:
5028 config.optionxform = self.preserve_case
5029 self.transform_case = self.preserve_case
5031 config.readfp(file(configfilename))
5032 self.patterns = []
5033 for section in config.sections():
5034 if self.transform_case(section) == 'auto-props':
5035 for pattern in config.options(section):
5036 value = config.get(section, pattern)
5037 if value:
5038 self._add_pattern(pattern, value)
5040 def squash_case(self, s):
5041 return s.lower()
5043 def preserve_case(self, s):
5044 return s
5046 def _add_pattern(self, pattern, value):
5047 props = value.split(';')
5048 propdict = {}
5049 for prop in props:
5050 s = prop.split('=', 1)
5051 if len(s) == 1:
5052 propdict[s[0]] = None
5053 else:
5054 propdict[s[0]] = s[1]
5055 self.patterns.append(
5056 self.Pattern(self.transform_case(pattern), propdict))
5058 def get_propdict(self, path):
5059 basename = self.transform_case(os.path.basename(path))
5060 propdict = {}
5061 for pattern in self.patterns:
5062 if pattern.match(basename):
5063 for (key,value) in pattern.propdict.items():
5064 if propdict.has_key(key):
5065 if propdict[key] != value:
5066 Log().write(
5067 LOG_WARN,
5068 "Contradictory values set for property '%s' for file %s."
5069 % (k, path,))
5070 else:
5071 propdict[key] = value
5073 return propdict
5075 def set_properties(self, s_item):
5076 propdict = self.get_propdict(s_item.c_rev.cvs_path)
5077 for (k,v) in propdict.items():
5078 if s_item.svn_props.has_key(k):
5079 if s_item.svn_props[k] != v:
5080 Log().write(
5081 LOG_WARN,
5082 "Property '%s' already set to %r for file %s; "
5083 "auto-props value (%r) ignored."
5084 % (k, s_item.svn_props[k], s_item.c_rev.cvs_path, v,))
5085 else:
5086 s_item.svn_props[k] = v
5089 class BinaryFileDefaultMimeTypeSetter(SVNPropertySetter):
5090 """If the file is binary and its svn:mime-type property is not yet
5091 set, set it to 'application/octet-stream'."""
5093 def set_properties(self, s_item):
5094 if not s_item.svn_props.has_key('svn:mime-type') \
5095 and s_item.c_rev.mode == 'b':
5096 s_item.svn_props['svn:mime-type'] = 'application/octet-stream'
5099 class EOLStyleFromMimeTypeSetter(SVNPropertySetter):
5100 """Set svn:eol-style based on svn:mime-type.
5102 If svn:mime-type is known but svn:eol-style is not, then set
5103 svn:eol-style based on svn:mime-type as follows: if svn:mime-type
5104 starts with 'text/', then set svn:eol-style to native; otherwise,
5105 force it to remain unset. See also issue #39."""
5107 def set_properties(self, s_item):
5108 if not s_item.svn_props.has_key('svn:eol-style') \
5109 and s_item.svn_props.get('svn:mime-type', None) is not None:
5110 if s_item.svn_props['svn:mime-type'].startswith("text/"):
5111 s_item.svn_props['svn:eol-style'] = 'native'
5112 else:
5113 s_item.svn_props['svn:eol-style'] = None
5116 class DefaultEOLStyleSetter(SVNPropertySetter):
5117 """Set the eol-style if one has not already been set."""
5119 def __init__(self, value):
5120 """Initialize with the specified default VALUE."""
5122 self.value = value
5124 def set_properties(self, s_item):
5125 if not s_item.svn_props.has_key('svn:eol-style'):
5126 s_item.svn_props['svn:eol-style'] = self.value
5129 class KeywordsPropertySetter(SVNPropertySetter):
5130 """If the svn:keywords property is not yet set, set it based on the
5131 file's mode. See issue #2."""
5133 def __init__(self, value):
5134 """Use VALUE for the value of the svn:keywords property if it is
5135 to be set."""
5137 self.value = value
5139 def set_properties(self, s_item):
5140 if not s_item.svn_props.has_key('svn:keywords') \
5141 and s_item.c_rev.mode in [None, 'kv', 'kvl']:
5142 s_item.svn_props['svn:keywords'] = self.value
5145 def convert(start_pass, end_pass):
5146 """Convert a CVS repository to an SVN repository."""
5148 cleanup = Cleanup()
5149 times = [ None ] * (end_pass + 1)
5150 times[start_pass - 1] = time.time()
5151 StatsKeeper().set_start_time(time.time())
5152 for i in range(start_pass - 1, end_pass):
5153 Log().write(LOG_QUIET, '----- pass %d -----' % (i + 1))
5154 _passes[i]()
5155 times[i + 1] = time.time()
5156 StatsKeeper().log_duration_for_pass(times[i + 1] - times[i], i + 1)
5157 # Dispose of items in Ctx() not intended to live past the end of the pass
5158 # (Identified by exactly one leading underscore)
5159 for attr in dir(Ctx()):
5160 if (len(attr) > 2 and attr[0] == '_' and attr[1] != '_'
5161 and attr[:6] != "_Ctx__"):
5162 delattr(Ctx(), attr)
5163 if not Ctx().skip_cleanup:
5164 cleanup.cleanup(_passes[i])
5165 StatsKeeper().set_end_time(time.time())
5167 Log().write(LOG_QUIET, StatsKeeper())
5168 if end_pass < 4:
5169 Log().write(LOG_QUIET,
5170 '(These are unaltered CVS repository stats and do not\n'
5171 ' reflect tags or branches excluded via --exclude)\n')
5172 Log().write(LOG_NORMAL, StatsKeeper().timings())
5175 def normalize_ttb_path(opt, path):
5176 """Normalize a path to be used for --trunk, --tags, or --branches.
5178 1. Strip leading, trailing, and duplicated '/'.
5179 2. Verify that the path is not empty.
5181 Return the normalized path.
5183 If the path is invalid, write an error message and exit."""
5185 norm_path = _path_join(*path.split('/'))
5186 if not norm_path:
5187 raise FatalError("cannot pass an empty path to %s." % (opt,))
5188 return norm_path
5191 def verify_paths_disjoint(*paths):
5192 """Verify that all of the paths in the argument list are disjoint.
5194 If any of the paths is nested in another one (i.e., in the sense
5195 that 'a/b/c/d' is nested in 'a/b'), or any two paths are identical,
5196 write an error message and exit."""
5198 paths = [(path.split('/'), path) for path in paths]
5199 # If all overlapping elements are equal, a shorter list is
5200 # considered "less than" a longer one. Therefore if any paths are
5201 # nested, this sort will leave at least one such pair adjacent, in
5202 # the order [nest,nestling].
5203 paths.sort()
5204 for i in range(1, len(paths)):
5205 split_path1, path1 = paths[i - 1]
5206 split_path2, path2 = paths[i]
5207 if len(split_path1) <= len(split_path2) \
5208 and split_path2[:len(split_path1)] == split_path1:
5209 raise FatalError("paths %s and %s are not disjoint." % (path1, path2,))
5212 def usage():
5213 print 'USAGE: %s [-v] [-s svn-repos-path] [-p pass] cvs-repos-path' \
5214 % os.path.basename(sys.argv[0])
5215 print ' --help, -h print this usage message and exit with success'
5216 print ' --version print the version number'
5217 print ' -q quiet'
5218 print ' -v verbose'
5219 print ' -s PATH path for SVN repos'
5220 print ' -p START[:END] start at pass START, end at pass END of %d' \
5221 % len(_passes)
5222 print ' If only START is given, run only pass START'
5223 print ' (implicitly enables --skip-cleanup)'
5224 print ' --existing-svnrepos load into existing SVN repository'
5225 print ' --dumpfile=PATH name of intermediate svn dumpfile'
5226 print ' --tmpdir=PATH directory to use for tmp data (default to cwd)'
5227 print ' --profile profile with \'hotshot\' (into file cvs2svn.hotshot)'
5228 print ' --dry-run do not create a repository or a dumpfile;'
5229 print ' just print what would happen.'
5230 print ' --use-cvs use CVS instead of RCS \'co\' to extract data'
5231 print ' (only use this if having problems with RCS)'
5232 print ' --svnadmin=PATH path to the svnadmin program'
5233 print ' --trunk-only convert only trunk commits, not tags nor branches'
5234 print ' --trunk=PATH path for trunk (default: %s)' \
5235 % Ctx().trunk_base
5236 print ' --branches=PATH path for branches (default: %s)' \
5237 % Ctx().branches_base
5238 print ' --tags=PATH path for tags (default: %s)' \
5239 % Ctx().tags_base
5240 print ' --no-prune don\'t prune empty directories'
5241 print ' --dump-only just produce a dumpfile, don\'t commit to a repos'
5242 print ' --encoding=ENC encoding of paths and log messages in CVS repos'
5243 print ' Multiple of these options may be passed, where they'
5244 print ' will be treated as an ordered list of encodings to'
5245 print ' attempt (with "ascii" as a hardcoded last resort)'
5246 print ' --force-branch=NAME force NAME to be a branch'
5247 print ' --force-tag=NAME force NAME to be a tag'
5248 print ' --exclude=REGEXP exclude branches and tags matching REGEXP'
5249 print ' --symbol-transform=P:S transform symbol names from P to S where P and S'
5250 print ' use Python regexp and reference syntax respectively'
5251 print ' --username=NAME username for cvs2svn-synthesized commits'
5252 print ' --skip-cleanup prevent the deletion of intermediate files'
5253 print ' --bdb-txn-nosync pass --bdb-txn-nosync to "svnadmin create"'
5254 print ' --fs-type=TYPE pass --fs-type=TYPE to "svnadmin create"'
5255 print ' --cvs-revnums record CVS revision numbers as file properties'
5256 print ' --auto-props=FILE set file properties from the auto-props section'
5257 print ' of a file in svn config format'
5258 print ' --auto-props-ignore-case Ignore case when matching auto-props patterns'
5259 print ' --mime-types=FILE specify an apache-style mime.types file for'
5260 print ' setting svn:mime-type'
5261 print ' --eol-from-mime-type set svn:eol-style from mime type if known'
5262 print ' --no-default-eol don\'t set svn:eol-style to \'native\' for'
5263 print ' non-binary files with undetermined mime types'
5264 print ' --keywords-off don\'t set svn:keywords on any files (by default,'
5265 print ' cvs2svn sets svn:keywords on non-binary files to'
5266 print ' "%s")' % SVN_KEYWORDS_VALUE
5269 def main():
5270 # Convenience var, so we don't have to keep instantiating this Borg.
5271 ctx = Ctx()
5273 profiling = None
5274 start_pass = 1
5275 end_pass = len(_passes)
5277 try:
5278 opts, args = getopt.getopt(sys.argv[1:], 'p:s:qvh',
5279 [ "help", "create", "trunk=",
5280 "username=", "existing-svnrepos",
5281 "branches=", "tags=", "encoding=",
5282 "force-branch=", "force-tag=", "exclude=",
5283 "use-cvs", "mime-types=",
5284 "auto-props=", "auto-props-ignore-case",
5285 "eol-from-mime-type", "no-default-eol",
5286 "trunk-only", "no-prune", "dry-run",
5287 "dump-only", "dumpfile=", "tmpdir=",
5288 "svnadmin=", "skip-cleanup", "cvs-revnums",
5289 "bdb-txn-nosync", "fs-type=",
5290 "version", "profile",
5291 "keywords-off", "symbol-transform="])
5292 except getopt.GetoptError, e:
5293 sys.stderr.write(error_prefix + ': ' + str(e) + '\n\n')
5294 usage()
5295 sys.exit(1)
5297 for opt, value in opts:
5298 if opt == '--version':
5299 print '%s version %s' % (os.path.basename(sys.argv[0]), VERSION)
5300 sys.exit(0)
5301 elif opt == '-p':
5302 # Don't cleanup if we're doing incrementals.
5303 ctx.skip_cleanup = 1
5304 if value.find(':') > 0:
5305 start_pass, end_pass = map(int, value.split(':'))
5306 else:
5307 end_pass = start_pass = int(value)
5308 if start_pass > len(_passes) or start_pass < 1:
5309 raise FatalError(
5310 'illegal value (%d) for starting pass. Must be 1 through %d.'
5311 % (int(start_pass), len(_passes),))
5312 if end_pass < start_pass or end_pass > len(_passes):
5313 raise FatalError(
5314 'illegal value (%d) for ending pass. Must be %d through %d.'
5315 % (int(end_pass), int(start_pass), len(_passes),))
5316 elif (opt == '--help') or (opt == '-h'):
5317 ctx.print_help = 1
5318 elif opt == '-v':
5319 Log().log_level = LOG_VERBOSE
5320 ctx.verbose = 1
5321 elif opt == '-q':
5322 Log().log_level = LOG_QUIET
5323 ctx.quiet = 1
5324 elif opt == '-s':
5325 ctx.target = value
5326 elif opt == '--existing-svnrepos':
5327 ctx.existing_svnrepos = 1
5328 elif opt == '--dumpfile':
5329 ctx.dumpfile = value
5330 elif opt == '--tmpdir':
5331 ctx.tmpdir = value
5332 elif opt == '--use-cvs':
5333 ctx.use_cvs = 1
5334 elif opt == '--svnadmin':
5335 ctx.svnadmin = value
5336 elif opt == '--trunk-only':
5337 ctx.trunk_only = 1
5338 elif opt == '--trunk':
5339 ctx.trunk_base = normalize_ttb_path(opt, value)
5340 elif opt == '--branches':
5341 ctx.branches_base = normalize_ttb_path(opt, value)
5342 elif opt == '--tags':
5343 ctx.tags_base = normalize_ttb_path(opt, value)
5344 elif opt == '--no-prune':
5345 ctx.prune = None
5346 elif opt == '--dump-only':
5347 ctx.dump_only = 1
5348 elif opt == '--dry-run':
5349 ctx.dry_run = 1
5350 elif opt == '--encoding':
5351 ctx.encoding.insert(-1, value)
5352 elif opt == '--force-branch':
5353 ctx.forced_branches.append(value)
5354 elif opt == '--force-tag':
5355 ctx.forced_tags.append(value)
5356 elif opt == '--exclude':
5357 try:
5358 ctx.excludes.append(re.compile('^' + value + '$'))
5359 except re.error, e:
5360 raise FatalError("'%s' is not a valid regexp." % (value,))
5361 elif opt == '--mime-types':
5362 ctx.mime_types_file = value
5363 elif opt == '--auto-props':
5364 ctx.auto_props_file = value
5365 elif opt == '--auto-props-ignore-case':
5366 ctx.auto_props_ignore_case = True
5367 elif opt == '--eol-from-mime-type':
5368 ctx.eol_from_mime_type = 1
5369 elif opt == '--no-default-eol':
5370 ctx.no_default_eol = 1
5371 elif opt == '--keywords-off':
5372 ctx.keywords_off = 1
5373 elif opt == '--username':
5374 ctx.username = value
5375 elif opt == '--skip-cleanup':
5376 ctx.skip_cleanup = 1
5377 elif opt == '--cvs-revnums':
5378 ctx.svn_property_setters.append(CVSRevisionNumberSetter())
5379 elif opt == '--bdb-txn-nosync':
5380 ctx.bdb_txn_nosync = 1
5381 elif opt == '--fs-type':
5382 ctx.fs_type = value
5383 elif opt == '--create':
5384 sys.stderr.write(warning_prefix +
5385 ': The behaviour produced by the --create option is now the '
5386 'default,\nand passing the option is deprecated.\n')
5387 elif opt == '--profile':
5388 profiling = 1
5389 elif opt == '--symbol-transform':
5390 [pattern, replacement] = value.split(":")
5391 try:
5392 pattern = re.compile(pattern)
5393 except re.error, e:
5394 raise FatalError("'%s' is not a valid regexp." % (pattern,))
5395 ctx.symbol_transforms.append((pattern, replacement,))
5397 if ctx.print_help:
5398 usage()
5399 sys.exit(0)
5401 # Consistency check for options and arguments.
5402 if len(args) == 0:
5403 usage()
5404 sys.exit(1)
5406 if len(args) > 1:
5407 sys.stderr.write(error_prefix +
5408 ": must pass only one CVS repository.\n")
5409 usage()
5410 sys.exit(1)
5412 cvsroot = args[0]
5414 if ctx.use_cvs:
5415 ctx.cvs_repository = CVSRepositoryViaCVS(cvsroot)
5416 else:
5417 ctx.cvs_repository = CVSRepositoryViaRCS(cvsroot)
5419 if (not ctx.target) and (not ctx.dump_only) and (not ctx.dry_run):
5420 raise FatalError("must pass one of '-s' or '--dump-only'.")
5422 def not_both(opt1val, opt1name, opt2val, opt2name):
5423 if opt1val and opt2val:
5424 raise FatalError("cannot pass both '%s' and '%s'."
5425 % (opt1name, opt2name,))
5427 not_both(ctx.target, '-s',
5428 ctx.dump_only, '--dump-only')
5430 not_both(ctx.dump_only, '--dump-only',
5431 ctx.existing_svnrepos, '--existing-svnrepos')
5433 not_both(ctx.bdb_txn_nosync, '--bdb-txn-nosync',
5434 ctx.existing_svnrepos, '--existing-svnrepos')
5436 not_both(ctx.dump_only, '--dump-only',
5437 ctx.bdb_txn_nosync, '--bdb-txn-nosync')
5439 not_both(ctx.quiet, '-q',
5440 ctx.verbose, '-v')
5442 not_both(ctx.fs_type, '--fs-type',
5443 ctx.existing_svnrepos, '--existing-svnrepos')
5445 if ctx.fs_type and ctx.fs_type != 'bdb' and ctx.bdb_txn_nosync:
5446 raise FatalError("cannot pass --bdb-txn-nosync with --fs-type=%s."
5447 % ctx.fs_type)
5449 # Create the default project (using ctx.trunk, ctx.branches, and ctx.tags):
5450 ctx.project = Project(ctx.cvs_repository.cvs_repos_path,
5451 ctx.trunk_base, ctx.branches_base, ctx.tags_base)
5453 if ctx.existing_svnrepos and not os.path.isdir(ctx.target):
5454 raise FatalError("the svn-repos-path '%s' is not an "
5455 "existing directory." % ctx.target)
5457 if not ctx.dump_only and not ctx.existing_svnrepos \
5458 and (not ctx.dry_run) and os.path.exists(ctx.target):
5459 raise FatalError("the svn-repos-path '%s' exists.\n"
5460 "Remove it, or pass '--existing-svnrepos'."
5461 % ctx.target)
5463 if ctx.target and not ctx.dry_run:
5464 # Verify that svnadmin can be executed. The 'help' subcommand
5465 # should be harmless.
5466 try:
5467 check_command_runs([ctx.svnadmin, 'help'], 'svnadmin')
5468 except CommandFailedException, e:
5469 raise FatalError(
5470 '%s\n'
5471 'svnadmin could not be executed. Please ensure that it is\n'
5472 'installed and/or use the --svnadmin option.' % (e,))
5474 ctx.svn_property_setters.append(ExecutablePropertySetter())
5476 ctx.svn_property_setters.append(BinaryFileEOLStyleSetter())
5478 if ctx.mime_types_file:
5479 ctx.svn_property_setters.append(MimeMapper(ctx.mime_types_file))
5481 if ctx.auto_props_file:
5482 ctx.svn_property_setters.append(AutoPropsPropertySetter(
5483 ctx.auto_props_file, ctx.auto_props_ignore_case))
5485 ctx.svn_property_setters.append(BinaryFileDefaultMimeTypeSetter())
5487 if ctx.eol_from_mime_type:
5488 ctx.svn_property_setters.append(EOLStyleFromMimeTypeSetter())
5490 if ctx.no_default_eol:
5491 ctx.svn_property_setters.append(DefaultEOLStyleSetter(None))
5492 else:
5493 ctx.svn_property_setters.append(DefaultEOLStyleSetter('native'))
5495 if not ctx.keywords_off:
5496 ctx.svn_property_setters.append(
5497 KeywordsPropertySetter(SVN_KEYWORDS_VALUE))
5499 # Make sure the tmp directory exists. Note that we don't check if
5500 # it's empty -- we want to be able to use, for example, "." to hold
5501 # tempfiles. But if we *did* want check if it were empty, we'd do
5502 # something like os.stat(ctx.tmpdir)[stat.ST_NLINK], of course :-).
5503 if not os.path.exists(ctx.tmpdir):
5504 os.mkdir(ctx.tmpdir)
5505 elif not os.path.isdir(ctx.tmpdir):
5506 raise FatalError(
5507 "cvs2svn tried to use '%s' for temporary files, but that path\n"
5508 " exists and is not a directory. Please make it be a directory,\n"
5509 " or specify some other directory for temporary files."
5510 % (ctx.tmpdir,))
5512 # But do lock the tmpdir, to avoid process clash.
5513 try:
5514 os.mkdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
5515 except OSError, e:
5516 if e.errno == errno.EACCES:
5517 raise FatalError("Permission denied:"
5518 + " No write access to directory '%s'." % ctx.tmpdir)
5519 if e.errno == errno.EEXIST:
5520 raise FatalError(
5521 "cvs2svn is using directory '%s' for temporary files, but\n"
5522 " subdirectory '%s/cvs2svn.lock' exists, indicating that another\n"
5523 " cvs2svn process is currently using '%s' as its temporary\n"
5524 " workspace. If you are certain that is not the case,\n"
5525 " then remove the '%s/cvs2svn.lock' subdirectory."
5526 % (ctx.tmpdir, ctx.tmpdir, ctx.tmpdir, ctx.tmpdir,))
5527 raise
5528 try:
5529 if profiling:
5530 import hotshot
5531 prof = hotshot.Profile('cvs2svn.hotshot')
5532 prof.runcall(convert, start_pass, end_pass)
5533 prof.close()
5534 else:
5535 convert(start_pass, end_pass)
5536 finally:
5537 try: os.rmdir(os.path.join(ctx.tmpdir, 'cvs2svn.lock'))
5538 except: pass
5541 if __name__ == '__main__':
5542 try:
5543 main()
5544 except FatalException, e:
5545 sys.stderr.write(str(e))
5546 sys.exit(1)